eval-protocol 0.2.78__tar.gz → 0.2.80__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (446) hide show
  1. {eval_protocol-0.2.78/eval_protocol.egg-info → eval_protocol-0.2.80}/PKG-INFO +1 -1
  2. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/_version.py +3 -3
  3. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/adapters/fireworks_tracing.py +12 -2
  4. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/auth.py +8 -1
  5. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/cli.py +4 -0
  6. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/cli_commands/create_rft.py +82 -0
  7. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/cli_commands/upload.py +66 -15
  8. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/common_utils.py +17 -0
  9. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/evaluation.py +3 -0
  10. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/fireworks_rft.py +17 -3
  11. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/generation/clients.py +3 -0
  12. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/platform_api.py +18 -10
  13. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/pytest/handle_persist_flow.py +15 -8
  14. {eval_protocol-0.2.78 → eval_protocol-0.2.80/eval_protocol.egg-info}/PKG-INFO +1 -1
  15. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/LICENSE +0 -0
  16. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/README.md +0 -0
  17. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/development/__init__.py +0 -0
  18. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/development/normalize_sandbox_fusion.py +0 -0
  19. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/development/utils/__init__.py +0 -0
  20. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/development/utils/generate_api_key.py +0 -0
  21. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/development/utils/subprocess_manager.py +0 -0
  22. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/__init__.py +0 -0
  23. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/__main__.py +0 -0
  24. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/adapters/__init__.py +0 -0
  25. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/adapters/base.py +0 -0
  26. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/adapters/bigquery.py +0 -0
  27. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/adapters/braintrust.py +0 -0
  28. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/adapters/huggingface.py +0 -0
  29. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/adapters/langchain.py +0 -0
  30. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/adapters/langfuse.py +0 -0
  31. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/adapters/langsmith.py +0 -0
  32. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/adapters/openai_responses.py +0 -0
  33. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/adapters/trl.py +0 -0
  34. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/adapters/utils.py +0 -0
  35. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/adapters/weave.py +0 -0
  36. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/agent/__init__.py +0 -0
  37. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/agent/models.py +0 -0
  38. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/agent/orchestrator.py +0 -0
  39. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/agent/resource_abc.py +0 -0
  40. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/agent/resource_pool.py +0 -0
  41. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/agent/resources/__init__.py +0 -0
  42. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/agent/resources/bfcl_envs/__init__.py +0 -0
  43. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/agent/resources/bfcl_envs/gorilla_file_system.py +0 -0
  44. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/agent/resources/bfcl_envs/math_api.py +0 -0
  45. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/agent/resources/bfcl_envs/posting_api.py +0 -0
  46. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/agent/resources/bfcl_sim_api_resource.py +0 -0
  47. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/agent/resources/docker_resource.py +0 -0
  48. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/agent/resources/filesystem_resource.py +0 -0
  49. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/agent/resources/python_state_resource.py +0 -0
  50. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/agent/resources/sql_resource.py +0 -0
  51. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/agent/task_manager.py +0 -0
  52. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/agent/tool_registry.py +0 -0
  53. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/benchmarks/__init__.py +0 -0
  54. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/benchmarks/data/airline_dataset.jsonl +0 -0
  55. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/benchmarks/data/retail_dataset.jsonl +0 -0
  56. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/benchmarks/test_aime25.py +0 -0
  57. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/benchmarks/test_frozen_lake.py +0 -0
  58. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/benchmarks/test_gpqa.py +0 -0
  59. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/benchmarks/test_livebench_data_analysis.py +0 -0
  60. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/benchmarks/test_tau_bench_airline.py +0 -0
  61. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/benchmarks/test_tau_bench_retail.py +0 -0
  62. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/cli_commands/__init__.py +0 -0
  63. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/cli_commands/agent_eval_cmd.py +0 -0
  64. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/cli_commands/common.py +0 -0
  65. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/cli_commands/deploy.py +0 -0
  66. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/cli_commands/deploy_mcp.py +0 -0
  67. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/cli_commands/logs.py +0 -0
  68. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/cli_commands/preview.py +0 -0
  69. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/cli_commands/run_eval_cmd.py +0 -0
  70. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/config.py +0 -0
  71. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/data_loader/__init__.py +0 -0
  72. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/data_loader/dynamic_data_loader.py +0 -0
  73. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/data_loader/factory_data_loader.py +0 -0
  74. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/data_loader/inline_data_loader.py +0 -0
  75. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/data_loader/jsonl_data_loader.py +0 -0
  76. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/data_loader/models.py +0 -0
  77. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/dataset_logger/__init__.py +0 -0
  78. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/dataset_logger/dataset_logger.py +0 -0
  79. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/dataset_logger/local_fs_dataset_logger_adapter.py +0 -0
  80. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/dataset_logger/sqlite_dataset_logger_adapter.py +0 -0
  81. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/dataset_logger/sqlite_evaluation_row_store.py +0 -0
  82. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/datasets/__init__.py +0 -0
  83. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/datasets/loader.py +0 -0
  84. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/directory_utils.py +0 -0
  85. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/event_bus/__init__.py +0 -0
  86. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/event_bus/event_bus.py +0 -0
  87. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/event_bus/logger.py +0 -0
  88. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/event_bus/sqlite_event_bus.py +0 -0
  89. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/event_bus/sqlite_event_bus_database.py +0 -0
  90. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/exceptions.py +0 -0
  91. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/execution/__init__.py +0 -0
  92. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/execution/pipeline.py +0 -0
  93. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/gcp_tools.py +0 -0
  94. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/generation/cache.py +0 -0
  95. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/generation/clients/base.py +0 -0
  96. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/generic_server.py +0 -0
  97. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/get_pep440_version.py +0 -0
  98. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/human_id/__init__.py +0 -0
  99. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/human_id/dictionary.py +0 -0
  100. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/integrations/__init__.py +0 -0
  101. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/integrations/deepeval.py +0 -0
  102. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/integrations/openeval.py +0 -0
  103. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/integrations/trl.py +0 -0
  104. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/log_utils/__init__.py +0 -0
  105. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/log_utils/elasticsearch_client.py +0 -0
  106. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/log_utils/elasticsearch_direct_http_handler.py +0 -0
  107. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/log_utils/elasticsearch_index_manager.py +0 -0
  108. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/log_utils/fireworks_tracing_http_handler.py +0 -0
  109. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/log_utils/init.py +0 -0
  110. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/log_utils/rollout_context.py +0 -0
  111. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/log_utils/rollout_id_filter.py +0 -0
  112. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/log_utils/util.py +0 -0
  113. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/logging_utils.py +0 -0
  114. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp/__init__.py +0 -0
  115. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp/adapter.py +0 -0
  116. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp/client/__init__.py +0 -0
  117. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp/client/connection.py +0 -0
  118. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp/clients.py +0 -0
  119. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp/execution/__init__.py +0 -0
  120. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp/execution/base_policy.py +0 -0
  121. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp/execution/manager.py +0 -0
  122. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp/execution/policy.py +0 -0
  123. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp/grid_renderer.py +0 -0
  124. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp/mcp_multi_client.py +0 -0
  125. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp/mcpgym.py +0 -0
  126. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp/process_manager.py +0 -0
  127. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp/session/__init__.py +0 -0
  128. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp/session/manager.py +0 -0
  129. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp/simple_process_manager.py +0 -0
  130. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp/simulation_server.py +0 -0
  131. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp_agent/__init__.py +0 -0
  132. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp_agent/config.py +0 -0
  133. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp_agent/main.py +0 -0
  134. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp_agent/orchestration/__init__.py +0 -0
  135. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp_agent/orchestration/base_client.py +0 -0
  136. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp_agent/orchestration/local_docker_client.py +0 -0
  137. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp_agent/orchestration/stdio_mcp_client_helper.py +0 -0
  138. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp_env.py +0 -0
  139. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp_servers/__init__.py +0 -0
  140. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp_servers/frozen_lake/frozen_lake_adapter.py +0 -0
  141. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp_servers/frozen_lake/frozen_lake_mcp.py +0 -0
  142. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp_servers/frozen_lake/server.py +0 -0
  143. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp_servers/tau2/README.md +0 -0
  144. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp_servers/tau2/__init__.py +0 -0
  145. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp_servers/tau2/airplane_environment/airline_environment.py +0 -0
  146. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp_servers/tau2/mock_environment/mock_environment.py +0 -0
  147. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp_servers/tau2/retail_environment/retail_environment.py +0 -0
  148. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp_servers/tau2/server.py +0 -0
  149. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp_servers/tau2/tau2_mcp.py +0 -0
  150. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp_servers/tau2/tests/system_prompts/airline_agent_system_prompt.md +0 -0
  151. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp_servers/tau2/tests/system_prompts/mock_agent_system_prompt.md +0 -0
  152. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp_servers/tau2/tests/system_prompts/retail_agent_system_prompt.md +0 -0
  153. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp_servers/tau2/tests/test_tau2_e2e.py +0 -0
  154. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/models.py +0 -0
  155. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/packaging.py +0 -0
  156. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/playback_policy.py +0 -0
  157. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/proxy/__init__.py +0 -0
  158. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/proxy/proxy_core/__init__.py +0 -0
  159. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/proxy/proxy_core/app.py +0 -0
  160. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/proxy/proxy_core/auth.py +0 -0
  161. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/proxy/proxy_core/langfuse.py +0 -0
  162. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/proxy/proxy_core/litellm.py +0 -0
  163. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/proxy/proxy_core/main.py +0 -0
  164. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/proxy/proxy_core/models.py +0 -0
  165. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/proxy/proxy_core/redis_utils.py +0 -0
  166. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/pytest/__init__.py +0 -0
  167. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/pytest/default_agent_rollout_processor.py +0 -0
  168. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/pytest/default_dataset_adapter.py +0 -0
  169. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/pytest/default_langchain_rollout_processor.py +0 -0
  170. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/pytest/default_mcp_gym_rollout_processor.py +0 -0
  171. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/pytest/default_no_op_rollout_processor.py +0 -0
  172. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/pytest/default_pydantic_ai_rollout_processor.py +0 -0
  173. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/pytest/default_single_turn_rollout_process.py +0 -0
  174. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/pytest/dual_mode_wrapper.py +0 -0
  175. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/pytest/elasticsearch_setup.py +0 -0
  176. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/pytest/evaluation_test.py +0 -0
  177. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/pytest/evaluation_test_postprocess.py +0 -0
  178. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/pytest/evaluation_test_utils.py +0 -0
  179. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/pytest/exception_config.py +0 -0
  180. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/pytest/execution.py +0 -0
  181. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/pytest/generate_parameter_combinations.py +0 -0
  182. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/pytest/github_action_rollout_processor.py +0 -0
  183. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/pytest/parameterize.py +0 -0
  184. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/pytest/plugin.py +0 -0
  185. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/pytest/remote_rollout_processor.py +0 -0
  186. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/pytest/rollout_processor.py +0 -0
  187. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/pytest/store_experiment_link.py +0 -0
  188. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/pytest/store_results_url.py +0 -0
  189. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/pytest/tracing_utils.py +0 -0
  190. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/pytest/types.py +0 -0
  191. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/pytest/validate_signature.py +0 -0
  192. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/quickstart/__init__.py +0 -0
  193. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/quickstart/aha_judge/__init__.py +0 -0
  194. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/quickstart/aha_judge/llm_judge.py +0 -0
  195. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/quickstart/aha_judge/llm_judge_braintrust.py +0 -0
  196. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/quickstart/aha_judge/llm_judge_langfuse.py +0 -0
  197. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/quickstart/aha_judge/llm_judge_langsmith.py +0 -0
  198. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/quickstart/aha_judge/llm_judge_openai_responses.py +0 -0
  199. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/quickstart/aha_judge/utils.py +0 -0
  200. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/quickstart/llm_judge.py +0 -0
  201. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/quickstart/llm_judge_braintrust.py +0 -0
  202. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/quickstart/svg_agent/evaluator/test_svgagent.py +0 -0
  203. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/quickstart/svg_agent/evaluator/utils.py +0 -0
  204. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/quickstart/svg_agent/vercel_svg_server/api/init.py +0 -0
  205. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/quickstart/utils.py +0 -0
  206. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/resources.py +0 -0
  207. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/reward_function.py +0 -0
  208. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/rewards/__init__.py +0 -0
  209. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/rewards/accuracy.py +0 -0
  210. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/rewards/accuracy_length.py +0 -0
  211. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/rewards/apps_coding_reward.py +0 -0
  212. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/rewards/apps_execution_utils.py +0 -0
  213. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/rewards/apps_testing_util.py +0 -0
  214. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/rewards/bfcl_reward.py +0 -0
  215. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/rewards/code_execution.py +0 -0
  216. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/rewards/code_execution_utils.py +0 -0
  217. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/rewards/cpp_code.py +0 -0
  218. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/rewards/deepcoder_reward.py +0 -0
  219. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/rewards/format.py +0 -0
  220. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/rewards/function_calling.py +0 -0
  221. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/rewards/json_schema.py +0 -0
  222. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/rewards/language_consistency.py +0 -0
  223. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/rewards/lean_prover.py +0 -0
  224. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/rewards/length.py +0 -0
  225. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/rewards/list_comparison_math_reward.py +0 -0
  226. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/rewards/math.py +0 -0
  227. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/rewards/multiple_choice_math_reward.py +0 -0
  228. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/rewards/reasoning_steps.py +0 -0
  229. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/rewards/repetition.py +0 -0
  230. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/rewards/tag_count.py +0 -0
  231. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/rl_processing.py +0 -0
  232. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/server.py +0 -0
  233. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/stats/__init__.py +0 -0
  234. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/stats/confidence_intervals.py +0 -0
  235. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/typed_interface.py +0 -0
  236. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/types/__init__.py +0 -0
  237. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/types/errors.py +0 -0
  238. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/types/remote_rollout_processor.py +0 -0
  239. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/types/types.py +0 -0
  240. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/utils/__init__.py +0 -0
  241. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/utils/batch_evaluation.py +0 -0
  242. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/utils/batch_transformation.py +0 -0
  243. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/utils/browser_utils.py +0 -0
  244. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/utils/check_server_status.py +0 -0
  245. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/utils/dataset_helpers.py +0 -0
  246. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/utils/evaluation_row_utils.py +0 -0
  247. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/utils/logs_models.py +0 -0
  248. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/utils/logs_server.py +0 -0
  249. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/utils/module_loader.py +0 -0
  250. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/utils/packaging_utils.py +0 -0
  251. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/utils/show_results_url.py +0 -0
  252. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/utils/static_policy.py +0 -0
  253. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/utils/subprocess_utils.py +0 -0
  254. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/utils/vite_server.py +0 -0
  255. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol.egg-info/SOURCES.txt +0 -0
  256. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol.egg-info/dependency_links.txt +0 -0
  257. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol.egg-info/entry_points.txt +0 -0
  258. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol.egg-info/requires.txt +0 -0
  259. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol.egg-info/top_level.txt +0 -0
  260. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/pyproject.toml +0 -0
  261. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/setup.cfg +0 -0
  262. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/setup.py +0 -0
  263. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_accuracy.py +0 -0
  264. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_accuracy_length.py +0 -0
  265. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_adapters_e2e.py +0 -0
  266. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_agent_orchestrator.py +0 -0
  267. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_agent_resources.py +0 -0
  268. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_auth.py +0 -0
  269. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_batch_evaluation.py +0 -0
  270. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_cli.py +0 -0
  271. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_cli_agent.py +0 -0
  272. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_cli_args.py +0 -0
  273. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_code_execution.py +0 -0
  274. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_config.py +0 -0
  275. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_control_plane_separation.py +0 -0
  276. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_cpp_code.py +0 -0
  277. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_data_driven_task_manager.py +0 -0
  278. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_deepcoder_reward.py +0 -0
  279. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_deepeval_integration.py +0 -0
  280. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_deploy_integration.py +0 -0
  281. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_directory_utils.py +0 -0
  282. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_e2b_integration.py +0 -0
  283. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_e2b_js_integration.py +0 -0
  284. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_edge_cases.py +0 -0
  285. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_ep_upload_e2e.py +0 -0
  286. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_eval_protocol_import.py +0 -0
  287. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_evaluation.py +0 -0
  288. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_evaluation_integration.py +0 -0
  289. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_evaluation_postprocess.py +0 -0
  290. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_evaluation_preview_integration.py +0 -0
  291. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_event_bus.py +0 -0
  292. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_event_bus_helper.py +0 -0
  293. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_examples_end_to_end.py +0 -0
  294. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_exceptions.py +0 -0
  295. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_fireworks_api.py +0 -0
  296. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_format.py +0 -0
  297. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_fractional_code.py +0 -0
  298. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_function_calling.py +0 -0
  299. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_gcp_tools.py +0 -0
  300. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_generic_server.py +0 -0
  301. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_human_id.py +0 -0
  302. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_integration.py +0 -0
  303. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_json_schema.py +0 -0
  304. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_kwargs_validation.py +0 -0
  305. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_language_consistency.py +0 -0
  306. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_lean_prover.py +0 -0
  307. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_lean_prover_runner.py +0 -0
  308. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_length.py +0 -0
  309. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_list_comparison_math_reward.py +0 -0
  310. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_logs_server.py +0 -0
  311. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_logs_server_simple.py +0 -0
  312. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_math.py +0 -0
  313. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_message_field_filtering.py +0 -0
  314. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_minimal.py +0 -0
  315. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_models.py +0 -0
  316. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_models_rl.py +0 -0
  317. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_multiple_choice_math_reward.py +0 -0
  318. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_n_variant_batch_integration.py +0 -0
  319. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_n_variant_integration.py +0 -0
  320. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_openai_compatibility.py +0 -0
  321. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_openeval_integration.py +0 -0
  322. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_packaging.py +0 -0
  323. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_parallel_rollouts.py +0 -0
  324. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_platform_api.py +0 -0
  325. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_quickstart_utils.py +0 -0
  326. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_readiness.py +0 -0
  327. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_reasoning_steps.py +0 -0
  328. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_repetition.py +0 -0
  329. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_repetition_debug.py +0 -0
  330. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_retry_mechanism.py +0 -0
  331. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_reward_function.py +0 -0
  332. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_reward_protocol_import.py +0 -0
  333. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_rl_processing.py +0 -0
  334. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_rollout_control_plane_integration.py +0 -0
  335. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_server.py +0 -0
  336. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_show_results_url.py +0 -0
  337. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_status_migration_changes.py +0 -0
  338. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_status_migration_integration.py +0 -0
  339. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_status_model.py +0 -0
  340. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_tag_count.py +0 -0
  341. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_tau_bench_airline_smoke.py +0 -0
  342. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_typed_interface.py +0 -0
  343. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_typed_interface_rl.py +0 -0
  344. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_upload_entrypoint.py +0 -0
  345. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_url_handling.py +0 -0
  346. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_vite_server.py +0 -0
  347. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/__init__.py +0 -0
  348. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/agent/__init__.py +0 -0
  349. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/agent/base.py +0 -0
  350. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/agent/llm_agent.py +0 -0
  351. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/api_service/__init__.py +0 -0
  352. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/api_service/api_config.py +0 -0
  353. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/api_service/data_model.py +0 -0
  354. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/api_service/simulation_service.py +0 -0
  355. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/cli.py +0 -0
  356. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/config.py +0 -0
  357. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/data/domains/airline/policy.md +0 -0
  358. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/data/domains/mock/policy.md +0 -0
  359. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/data/domains/mock/policy_solo.md +0 -0
  360. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/data/domains/retail/policy.md +0 -0
  361. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/data/domains/telecom/main_policy.md +0 -0
  362. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/data/domains/telecom/main_policy_solo.md +0 -0
  363. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/data/domains/telecom/tech_support_manual.md +0 -0
  364. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/data/domains/telecom/tech_support_workflow.md +0 -0
  365. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/data/domains/telecom/tech_support_workflow_solo.md +0 -0
  366. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/data/user_simulator/simulation_guidelines.md +0 -0
  367. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/data/user_simulator/simulation_guidelines_tools.md +0 -0
  368. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/data_model/__init__.py +0 -0
  369. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/data_model/message.py +0 -0
  370. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/data_model/simulation.py +0 -0
  371. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/data_model/tasks.py +0 -0
  372. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/domains/__init__.py +0 -0
  373. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/domains/airline/__init__.py +0 -0
  374. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/domains/airline/data_model.py +0 -0
  375. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/domains/airline/environment.py +0 -0
  376. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/domains/airline/tools.py +0 -0
  377. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/domains/airline/utils.py +0 -0
  378. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/domains/mock/__init__.py +0 -0
  379. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/domains/mock/data_model.py +0 -0
  380. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/domains/mock/environment.py +0 -0
  381. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/domains/mock/tools.py +0 -0
  382. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/domains/mock/utils.py +0 -0
  383. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/domains/retail/__init__.py +0 -0
  384. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/domains/retail/data_model.py +0 -0
  385. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/domains/retail/environment.py +0 -0
  386. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/domains/retail/tools.py +0 -0
  387. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/domains/retail/utils.py +0 -0
  388. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/domains/telecom/__init__.py +0 -0
  389. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/domains/telecom/data_model.py +0 -0
  390. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/domains/telecom/environment.py +0 -0
  391. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/domains/telecom/tasks/__init__.py +0 -0
  392. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/domains/telecom/tasks/const.py +0 -0
  393. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/domains/telecom/tasks/create_tasks.py +0 -0
  394. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/domains/telecom/tasks/manager.py +0 -0
  395. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/domains/telecom/tasks/mms_issues.py +0 -0
  396. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/domains/telecom/tasks/mobile_data_issues.py +0 -0
  397. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/domains/telecom/tasks/service_issues.py +0 -0
  398. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/domains/telecom/tasks/utils.py +0 -0
  399. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/domains/telecom/tools.py +0 -0
  400. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/domains/telecom/user_data_model.py +0 -0
  401. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/domains/telecom/user_tools.py +0 -0
  402. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/domains/telecom/utils.py +0 -0
  403. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/environment/__init__.py +0 -0
  404. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/environment/db.py +0 -0
  405. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/environment/environment.py +0 -0
  406. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/environment/server.py +0 -0
  407. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/environment/tool.py +0 -0
  408. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/environment/toolkit.py +0 -0
  409. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/environment/utils/interface_agent.py +0 -0
  410. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/evaluator/__init__.py +0 -0
  411. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/evaluator/evaluator.py +0 -0
  412. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/evaluator/evaluator_action.py +0 -0
  413. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/evaluator/evaluator_base.py +0 -0
  414. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/evaluator/evaluator_communicate.py +0 -0
  415. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/evaluator/evaluator_env.py +0 -0
  416. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/evaluator/evaluator_nl_assertions.py +0 -0
  417. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/metrics/__init__.py +0 -0
  418. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/metrics/agent_metrics.py +0 -0
  419. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/metrics/break_down_metrics.py +0 -0
  420. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/orchestrator/__init__.py +0 -0
  421. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/orchestrator/environment_manager.py +0 -0
  422. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/orchestrator/orchestrator.py +0 -0
  423. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/orchestrator/utils.py +0 -0
  424. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/registry.py +0 -0
  425. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/run.py +0 -0
  426. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/scripts/__init__.py +0 -0
  427. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/scripts/check_data.py +0 -0
  428. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/scripts/show_domain_doc.py +0 -0
  429. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/scripts/start_servers.py +0 -0
  430. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/scripts/view_simulations.py +0 -0
  431. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/user/__init__.py +0 -0
  432. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/user/base.py +0 -0
  433. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/user/user_simulator.py +0 -0
  434. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/utils/__init__.py +0 -0
  435. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/utils/display.py +0 -0
  436. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/utils/io_utils.py +0 -0
  437. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/utils/llm_utils.py +0 -0
  438. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/utils/pydantic_utils.py +0 -0
  439. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/utils/utils.py +0 -0
  440. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/versioneer.py +0 -0
  441. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vite-app/dist/assets/favicon-BkAAWQga.png +0 -0
  442. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vite-app/dist/assets/index-BGlGI2LH.css +0 -0
  443. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vite-app/dist/assets/index-CnGlFAnP.js +0 -0
  444. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vite-app/dist/assets/index-CnGlFAnP.js.map +0 -0
  445. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vite-app/dist/assets/logo-light-BprIBJQW.png +0 -0
  446. {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vite-app/dist/index.html +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: eval-protocol
3
- Version: 0.2.78
3
+ Version: 0.2.80
4
4
  Summary: The official Python SDK for Eval Protocol (EP.) EP is an open protocol that standardizes how developers author evals for large language model (LLM) applications.
5
5
  Author-email: Fireworks AI <info@fireworks.ai>
6
6
  License-Expression: MIT
@@ -8,11 +8,11 @@ import json
8
8
 
9
9
  version_json = '''
10
10
  {
11
- "date": "2025-11-03T11:40:49-0800",
11
+ "date": "2025-11-04T15:41:02-0800",
12
12
  "dirty": false,
13
13
  "error": null,
14
- "full-revisionid": "fd090d994b7ee9738c2310e1e61427d027bad38d",
15
- "version": "0.2.78"
14
+ "full-revisionid": "9303a224e55ef4f6e47aaa9e9e596ebd1c83cc56",
15
+ "version": "0.2.80"
16
16
  }
17
17
  ''' # END VERSION_JSON
18
18
 
@@ -273,7 +273,12 @@ class FireworksTracingAdapter(BaseAdapter):
273
273
  if not tags:
274
274
  raise ValueError("At least one tag is required to fetch logs")
275
275
 
276
- headers = {"Authorization": f"Bearer {os.environ.get('FIREWORKS_API_KEY')}"}
276
+ from ..common_utils import get_user_agent
277
+
278
+ headers = {
279
+ "Authorization": f"Bearer {os.environ.get('FIREWORKS_API_KEY')}",
280
+ "User-Agent": get_user_agent(),
281
+ }
277
282
  params: Dict[str, Any] = {"tags": tags, "limit": limit, "hours_back": hours_back, "program": "eval_protocol"}
278
283
 
279
284
  # Try /logs first, fall back to /v1/logs if not found
@@ -398,7 +403,12 @@ class FireworksTracingAdapter(BaseAdapter):
398
403
  else:
399
404
  url = f"{self.base_url}/v1/traces/pointwise"
400
405
 
401
- headers = {"Authorization": f"Bearer {os.environ.get('FIREWORKS_API_KEY')}"}
406
+ from ..common_utils import get_user_agent
407
+
408
+ headers = {
409
+ "Authorization": f"Bearer {os.environ.get('FIREWORKS_API_KEY')}",
410
+ "User-Agent": get_user_agent(),
411
+ }
402
412
 
403
413
  result = None
404
414
  try:
@@ -242,9 +242,16 @@ def verify_api_key_and_get_account_id(
242
242
  if not resolved_key:
243
243
  return None
244
244
  resolved_base = api_base or get_fireworks_api_base()
245
+
246
+ from .common_utils import get_user_agent
247
+
245
248
  url = f"{resolved_base.rstrip('/')}/verifyApiKey"
246
- headers = {"Authorization": f"Bearer {resolved_key}"}
249
+ headers = {
250
+ "Authorization": f"Bearer {resolved_key}",
251
+ "User-Agent": get_user_agent(),
252
+ }
247
253
  resp = requests.get(url, headers=headers, timeout=10)
254
+
248
255
  if resp.status_code != 200:
249
256
  logger.debug("verifyApiKey returned status %s", resp.status_code)
250
257
  return None
@@ -355,6 +355,10 @@ def parse_args(args=None):
355
355
  action="store_true",
356
356
  help="Non-interactive: upload all discovered evaluation tests",
357
357
  )
358
+ upload_parser.add_argument(
359
+ "--env-file",
360
+ help="Path to .env file containing secrets to upload (default: .env in current directory)",
361
+ )
358
362
 
359
363
  # Create command group
360
364
  create_parser = subparsers.add_parser(
@@ -5,12 +5,15 @@ import time
5
5
  import argparse
6
6
  from typing import Any, Dict, Optional
7
7
 
8
+ import requests
9
+
8
10
  from ..auth import (
9
11
  get_fireworks_account_id,
10
12
  get_fireworks_api_base,
11
13
  get_fireworks_api_key,
12
14
  verify_api_key_and_get_account_id,
13
15
  )
16
+ from ..common_utils import get_user_agent
14
17
  from ..fireworks_rft import (
15
18
  _map_api_host_to_app_host,
16
19
  build_default_output_model,
@@ -263,6 +266,67 @@ def _auto_select_evaluator_id(cwd: str) -> Optional[str]:
263
266
  return None
264
267
 
265
268
 
269
+ def _poll_evaluator_status(
270
+ evaluator_resource_name: str, api_key: str, api_base: str, timeout_minutes: int = 5
271
+ ) -> bool:
272
+ """
273
+ Poll evaluator status until it becomes ACTIVE or times out.
274
+
275
+ Args:
276
+ evaluator_resource_name: Full evaluator resource name (e.g., accounts/xxx/evaluators/yyy)
277
+ api_key: Fireworks API key
278
+ api_base: Fireworks API base URL
279
+ timeout_minutes: Maximum time to wait in minutes
280
+
281
+ Returns:
282
+ True if evaluator becomes ACTIVE, False if timeout or BUILD_FAILED
283
+ """
284
+ headers = {
285
+ "Authorization": f"Bearer {api_key}",
286
+ "Content-Type": "application/json",
287
+ "User-Agent": get_user_agent(),
288
+ }
289
+
290
+ check_url = f"{api_base}/v1/{evaluator_resource_name}"
291
+ timeout_seconds = timeout_minutes * 60
292
+ poll_interval = 10 # seconds
293
+ start_time = time.time()
294
+
295
+ print(f"Polling evaluator status (timeout: {timeout_minutes}m, interval: {poll_interval}s)...")
296
+
297
+ while time.time() - start_time < timeout_seconds:
298
+ try:
299
+ response = requests.get(check_url, headers=headers, timeout=30)
300
+ response.raise_for_status()
301
+
302
+ evaluator_data = response.json()
303
+ state = evaluator_data.get("state", "STATE_UNSPECIFIED")
304
+ status = evaluator_data.get("status", "")
305
+
306
+ if state == "ACTIVE":
307
+ print("✅ Evaluator is ACTIVE and ready!")
308
+ return True
309
+ elif state == "BUILD_FAILED":
310
+ print(f"❌ Evaluator build failed. Status: {status}")
311
+ return False
312
+ elif state == "BUILDING":
313
+ elapsed_minutes = (time.time() - start_time) / 60
314
+ print(f"⏳ Evaluator is still building... ({elapsed_minutes:.1f}m elapsed)")
315
+ else:
316
+ print(f"⏳ Evaluator state: {state}, status: {status}")
317
+
318
+ except requests.exceptions.RequestException as e:
319
+ print(f"Warning: Failed to check evaluator status: {e}")
320
+
321
+ # Wait before next poll
322
+ time.sleep(poll_interval)
323
+
324
+ # Timeout reached
325
+ elapsed_minutes = (time.time() - start_time) / 60
326
+ print(f"⏰ Timeout after {elapsed_minutes:.1f}m - evaluator is not yet ACTIVE")
327
+ return False
328
+
329
+
266
330
  def create_rft_command(args) -> int:
267
331
  evaluator_id: Optional[str] = getattr(args, "evaluator_id", None)
268
332
  non_interactive: bool = bool(getattr(args, "yes", False))
@@ -328,10 +392,28 @@ def create_rft_command(args) -> int:
328
392
  description=None,
329
393
  force=False,
330
394
  yes=True,
395
+ env_file=None, # Add the new env_file parameter
331
396
  )
332
397
  rc = upload_command(upload_args)
333
398
  if rc == 0:
334
399
  print(f"✓ Uploaded/ensured evaluator: {evaluator_id}")
400
+
401
+ # Poll for evaluator status
402
+ print(f"Waiting for evaluator '{evaluator_id}' to become ACTIVE...")
403
+ is_active = _poll_evaluator_status(
404
+ evaluator_resource_name=evaluator_resource_name, api_key=api_key, api_base=api_base, timeout_minutes=5
405
+ )
406
+
407
+ if not is_active:
408
+ # Print helpful message with dashboard link
409
+ app_base = _map_api_host_to_app_host(api_base)
410
+ evaluator_slug = _extract_terminal_segment(evaluator_id)
411
+ dashboard_url = f"{app_base}/dashboard/evaluators/{evaluator_slug}"
412
+
413
+ print("\n❌ Evaluator is not ready within the timeout period.")
414
+ print(f"📊 Please check the evaluator status at: {dashboard_url}")
415
+ print(" Wait for it to become ACTIVE, then run 'eval-protocol create rft' again.")
416
+ return 1
335
417
  else:
336
418
  print("Warning: Evaluator upload did not complete successfully; proceeding to RFT creation.")
337
419
  except Exception as e:
@@ -9,7 +9,7 @@ import runpy
9
9
  import sys
10
10
  from dataclasses import dataclass
11
11
  from pathlib import Path
12
- from typing import Any, Callable, Iterable, Optional
12
+ from typing import Any, Dict, Iterable
13
13
 
14
14
  import pytest
15
15
  from eval_protocol.auth import (
@@ -551,6 +551,35 @@ def _prompt_select(tests: list[DiscoveredTest], non_interactive: bool) -> list[D
551
551
  return _prompt_select_interactive(tests)
552
552
 
553
553
 
554
+ def _load_secrets_from_env_file(env_file_path: str) -> Dict[str, str]:
555
+ """
556
+ Load secrets from a .env file that should be uploaded to Fireworks.
557
+
558
+ Returns a dictionary of secret key-value pairs that contain 'API_KEY' in the name.
559
+ """
560
+ if not os.path.exists(env_file_path):
561
+ return {}
562
+
563
+ # Load the .env file into a temporary environment
564
+ env_vars = {}
565
+ with open(env_file_path, "r") as f:
566
+ for line in f:
567
+ line = line.strip()
568
+ if line and not line.startswith("#") and "=" in line:
569
+ key, value = line.split("=", 1)
570
+ key = key.strip()
571
+ value = value.strip().strip('"').strip("'") # Remove quotes
572
+ env_vars[key] = value
573
+
574
+ # Filter for secrets that look like API keys
575
+ secrets = {}
576
+ for key, value in env_vars.items():
577
+ if "API_KEY" in key.upper() and value:
578
+ secrets[key] = value
579
+
580
+ return secrets
581
+
582
+
554
583
  def upload_command(args: argparse.Namespace) -> int:
555
584
  root = os.path.abspath(getattr(args, "path", "."))
556
585
  entries_arg = getattr(args, "entry", None)
@@ -585,11 +614,27 @@ def upload_command(args: argparse.Namespace) -> int:
585
614
  display_name = getattr(args, "display_name", None)
586
615
  description = getattr(args, "description", None)
587
616
  force = bool(getattr(args, "force", False))
617
+ env_file = getattr(args, "env_file", None)
588
618
 
589
- # Ensure FIREWORKS_API_KEY is available to the remote by storing it as a Fireworks secret
619
+ # Load secrets from .env file and ensure they're available on Fireworks
590
620
  try:
591
621
  fw_account_id = get_fireworks_account_id()
622
+
623
+ # Determine .env file path
624
+ if env_file:
625
+ env_file_path = env_file
626
+ else:
627
+ env_file_path = os.path.join(root, ".env")
628
+
629
+ # Load secrets from .env file
630
+ secrets_from_file = _load_secrets_from_env_file(env_file_path)
631
+ secrets_from_env_file = secrets_from_file.copy() # Track what came from .env file
632
+
633
+ # Also ensure FIREWORKS_API_KEY from environment is included
592
634
  fw_api_key_value = get_fireworks_api_key()
635
+ if fw_api_key_value:
636
+ secrets_from_file["FIREWORKS_API_KEY"] = fw_api_key_value
637
+
593
638
  if not fw_account_id and fw_api_key_value:
594
639
  # Attempt to verify and resolve account id from server headers
595
640
  resolved = verify_api_key_and_get_account_id(api_key=fw_api_key_value, api_base=get_fireworks_api_base())
@@ -598,21 +643,27 @@ def upload_command(args: argparse.Namespace) -> int:
598
643
  # Propagate to environment so downstream calls use it if needed
599
644
  os.environ["FIREWORKS_ACCOUNT_ID"] = fw_account_id
600
645
  print(f"Resolved FIREWORKS_ACCOUNT_ID via API verification: {fw_account_id}")
601
- if fw_account_id and fw_api_key_value:
602
- print("Ensuring FIREWORKS_API_KEY is registered as a secret on Fireworks for rollout...")
603
- if create_or_update_fireworks_secret(
604
- account_id=fw_account_id,
605
- key_name="FIREWORKS_API_KEY",
606
- secret_value=fw_api_key_value,
607
- ):
608
- print(" FIREWORKS_API_KEY secret created/updated on Fireworks.")
609
- else:
610
- print("Warning: Failed to create/update FIREWORKS_API_KEY secret on Fireworks.")
646
+
647
+ if fw_account_id and secrets_from_file:
648
+ print(f"Found {len(secrets_from_file)} API keys to upload as Fireworks secrets...")
649
+ if secrets_from_env_file and os.path.exists(env_file_path):
650
+ print(f"Loading secrets from: {env_file_path}")
651
+
652
+ for secret_name, secret_value in secrets_from_file.items():
653
+ print(f"Ensuring {secret_name} is registered as a secret on Fireworks for rollout...")
654
+ if create_or_update_fireworks_secret(
655
+ account_id=fw_account_id,
656
+ key_name=secret_name,
657
+ secret_value=secret_value,
658
+ ):
659
+ print(f"✓ {secret_name} secret created/updated on Fireworks.")
660
+ else:
661
+ print(f"Warning: Failed to create/update {secret_name} secret on Fireworks.")
611
662
  else:
612
663
  if not fw_account_id:
613
- print("Warning: FIREWORKS_ACCOUNT_ID not found; cannot register FIREWORKS_API_KEY secret.")
614
- if not fw_api_key_value:
615
- print("Warning: FIREWORKS_API_KEY not found locally; cannot register secret.")
664
+ print("Warning: FIREWORKS_ACCOUNT_ID not found; cannot register secrets.")
665
+ if not secrets_from_file:
666
+ print("Warning: No API keys found in environment or .env file; no secrets to register.")
616
667
  except Exception as e:
617
668
  print(f"Warning: Skipped Fireworks secret registration due to error: {e}")
618
669
 
@@ -5,6 +5,23 @@ from typing import Any, Dict, List
5
5
  import requests
6
6
 
7
7
 
8
+ def get_user_agent() -> str:
9
+ """
10
+ Returns the user-agent string for eval-protocol CLI requests.
11
+
12
+ Format: eval-protocol-cli/{version}
13
+
14
+ Returns:
15
+ User-agent string identifying the eval-protocol CLI and version.
16
+ """
17
+ try:
18
+ from . import __version__
19
+
20
+ return f"eval-protocol/{__version__}"
21
+ except Exception:
22
+ return "eval-protocol/unknown"
23
+
24
+
8
25
  def load_jsonl(file_path: str) -> List[Dict[str, Any]]:
9
26
  """
10
27
  Reads a JSONL file where each line is a valid JSON object and returns a list of these objects.
@@ -20,6 +20,7 @@ from eval_protocol.auth import (
20
20
  get_fireworks_api_key,
21
21
  verify_api_key_and_get_account_id,
22
22
  )
23
+ from eval_protocol.common_utils import get_user_agent
23
24
  from eval_protocol.typed_interface import EvaluationMode
24
25
 
25
26
  from eval_protocol.get_pep440_version import get_pep440_version
@@ -405,6 +406,7 @@ class Evaluator:
405
406
  headers = {
406
407
  "Authorization": f"Bearer {auth_token}",
407
408
  "Content-Type": "application/json",
409
+ "User-Agent": get_user_agent(),
408
410
  }
409
411
  logger.info(f"Previewing evaluator using API endpoint: {url} with account: {account_id}")
410
412
  logger.debug(f"Preview API Request URL: {url}")
@@ -748,6 +750,7 @@ class Evaluator:
748
750
  headers = {
749
751
  "Authorization": f"Bearer {auth_token}",
750
752
  "Content-Type": "application/json",
753
+ "User-Agent": get_user_agent(),
751
754
  }
752
755
 
753
756
  self._ensure_requirements_present(os.getcwd())
@@ -11,6 +11,7 @@ from typing import Any, Callable, Dict, Iterable, Optional, Tuple
11
11
  import requests
12
12
 
13
13
  from .auth import get_fireworks_account_id, get_fireworks_api_base, get_fireworks_api_key
14
+ from .common_utils import get_user_agent
14
15
 
15
16
 
16
17
  def _map_api_host_to_app_host(api_base: str) -> str:
@@ -157,12 +158,17 @@ def create_dataset_from_jsonl(
157
158
  display_name: Optional[str],
158
159
  jsonl_path: str,
159
160
  ) -> Tuple[str, Dict[str, Any]]:
160
- headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
161
+ headers = {
162
+ "Authorization": f"Bearer {api_key}",
163
+ "Content-Type": "application/json",
164
+ "User-Agent": get_user_agent(),
165
+ }
161
166
  # Count examples quickly
162
167
  example_count = 0
163
168
  with open(jsonl_path, "r", encoding="utf-8") as f:
164
169
  for _ in f:
165
170
  example_count += 1
171
+
166
172
  dataset_url = f"{api_base.rstrip('/')}/v1/accounts/{account_id}/datasets"
167
173
  payload = {
168
174
  "dataset": {
@@ -181,7 +187,10 @@ def create_dataset_from_jsonl(
181
187
  upload_url = f"{api_base.rstrip('/')}/v1/accounts/{account_id}/datasets/{dataset_id}:upload"
182
188
  with open(jsonl_path, "rb") as f:
183
189
  files = {"file": f}
184
- up_headers = {"Authorization": f"Bearer {api_key}"}
190
+ up_headers = {
191
+ "Authorization": f"Bearer {api_key}",
192
+ "User-Agent": get_user_agent(),
193
+ }
185
194
  up_resp = requests.post(upload_url, files=files, headers=up_headers, timeout=600)
186
195
  if up_resp.status_code not in (200, 201):
187
196
  raise RuntimeError(f"Dataset upload failed: {up_resp.status_code} {up_resp.text}")
@@ -195,7 +204,12 @@ def create_reinforcement_fine_tuning_job(
195
204
  body: Dict[str, Any],
196
205
  ) -> Dict[str, Any]:
197
206
  url = f"{api_base.rstrip('/')}/v1/accounts/{account_id}/reinforcementFineTuningJobs"
198
- headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json", "Accept": "application/json"}
207
+ headers = {
208
+ "Authorization": f"Bearer {api_key}",
209
+ "Content-Type": "application/json",
210
+ "Accept": "application/json",
211
+ "User-Agent": get_user_agent(),
212
+ }
199
213
  resp = requests.post(url, json=body, headers=headers, timeout=60)
200
214
  if resp.status_code not in (200, 201):
201
215
  raise RuntimeError(f"RFT job creation failed: {resp.status_code} {resp.text}")
@@ -13,6 +13,8 @@ import aiohttp
13
13
  from omegaconf import DictConfig
14
14
  from pydantic import BaseModel # Added for new models
15
15
 
16
+ from ..common_utils import get_user_agent
17
+
16
18
  logger = logging.getLogger(__name__)
17
19
 
18
20
 
@@ -101,6 +103,7 @@ class FireworksModelClient(ModelClient):
101
103
  "Authorization": f"Bearer {self.api_key}",
102
104
  "Content-Type": "application/json",
103
105
  "Accept": "application/json",
106
+ "User-Agent": get_user_agent(),
104
107
  }
105
108
 
106
109
  debug_payload_log = json.loads(json.dumps(payload))
@@ -11,6 +11,7 @@ from eval_protocol.auth import (
11
11
  get_fireworks_api_base,
12
12
  get_fireworks_api_key,
13
13
  )
14
+ from eval_protocol.common_utils import get_user_agent
14
15
 
15
16
  logger = logging.getLogger(__name__)
16
17
 
@@ -95,6 +96,7 @@ def create_or_update_fireworks_secret(
95
96
  headers = {
96
97
  "Authorization": f"Bearer {resolved_api_key}",
97
98
  "Content-Type": "application/json",
99
+ "User-Agent": get_user_agent(),
98
100
  }
99
101
 
100
102
  # The secret_id for GET/PATCH/DELETE operations is the key_name.
@@ -107,10 +109,10 @@ def create_or_update_fireworks_secret(
107
109
 
108
110
  # Check if secret exists using GET (path uses normalized resource id)
109
111
  resource_id = _normalize_secret_resource_id(key_name)
110
- get_url = f"{resolved_api_base.rstrip('/')}/v1/accounts/{resolved_account_id}/secrets/{resource_id}"
111
112
  secret_exists = False
112
113
  try:
113
- response = requests.get(get_url, headers=headers, timeout=10)
114
+ url = f"{resolved_api_base}/v1/accounts/{resolved_account_id}/secrets/{resource_id}"
115
+ response = requests.get(url, headers=headers, timeout=10)
114
116
  if response.status_code == 200:
115
117
  secret_exists = True
116
118
  logger.info(f"Secret '{key_name}' already exists. Will attempt to update.")
@@ -131,7 +133,6 @@ def create_or_update_fireworks_secret(
131
133
 
132
134
  if secret_exists:
133
135
  # Update existing secret (PATCH)
134
- patch_url = f"{resolved_api_base.rstrip('/')}/v1/accounts/{resolved_account_id}/secrets/{resource_id}"
135
136
  # Body for PATCH requires 'keyName' and 'value'.
136
137
  # Transform key_name for payload: uppercase and underscores
137
138
  payload_key_name = key_name.upper().replace("-", "_")
@@ -146,7 +147,8 @@ def create_or_update_fireworks_secret(
146
147
  payload = {"keyName": payload_key_name, "value": secret_value}
147
148
  try:
148
149
  logger.debug(f"PATCH payload for '{key_name}': {payload}")
149
- response = requests.patch(patch_url, headers=headers, json=payload, timeout=30)
150
+ url = f"{resolved_api_base}/v1/accounts/{resolved_account_id}/secrets/{resource_id}"
151
+ response = requests.patch(url, json=payload, headers=headers, timeout=30)
150
152
  response.raise_for_status()
151
153
  logger.info(f"Successfully updated secret '{key_name}' on Fireworks platform.")
152
154
  return True
@@ -158,7 +160,6 @@ def create_or_update_fireworks_secret(
158
160
  return False
159
161
  else:
160
162
  # Create new secret (POST)
161
- post_url = f"{resolved_api_base.rstrip('/')}/v1/accounts/{resolved_account_id}/secrets"
162
163
  # Body for POST is gatewaySecret. 'name' field in payload is the resource path.
163
164
  # Let's assume for POST, the 'name' in payload can be omitted or is the key_name.
164
165
  # The API should ideally use 'keyName' from URL or a specific 'secretId' in payload for creation if 'name' is server-assigned.
@@ -183,7 +184,8 @@ def create_or_update_fireworks_secret(
183
184
  }
184
185
  try:
185
186
  logger.debug(f"POST payload for '{key_name}': {payload}")
186
- response = requests.post(post_url, headers=headers, json=payload, timeout=30)
187
+ url = f"{resolved_api_base}/v1/accounts/{resolved_account_id}/secrets"
188
+ response = requests.post(url, json=payload, headers=headers, timeout=30)
187
189
  response.raise_for_status()
188
190
  logger.info(
189
191
  f"Successfully created secret '{key_name}' on Fireworks platform. Full name: {response.json().get('name')}"
@@ -217,11 +219,14 @@ def get_fireworks_secret(
217
219
  logger.error("Missing Fireworks API key, base URL, or account ID for getting secret.")
218
220
  return None
219
221
 
220
- headers = {"Authorization": f"Bearer {resolved_api_key}"}
222
+ headers = {
223
+ "Authorization": f"Bearer {resolved_api_key}",
224
+ "User-Agent": get_user_agent(),
225
+ }
221
226
  resource_id = _normalize_secret_resource_id(key_name)
222
- url = f"{resolved_api_base.rstrip('/')}/v1/accounts/{resolved_account_id}/secrets/{resource_id}"
223
227
 
224
228
  try:
229
+ url = f"{resolved_api_base}/v1/accounts/{resolved_account_id}/secrets/{resource_id}"
225
230
  response = requests.get(url, headers=headers, timeout=10)
226
231
  if response.status_code == 200:
227
232
  logger.info(f"Successfully retrieved secret '{key_name}'.")
@@ -254,11 +259,14 @@ def delete_fireworks_secret(
254
259
  logger.error("Missing Fireworks API key, base URL, or account ID for deleting secret.")
255
260
  return False
256
261
 
257
- headers = {"Authorization": f"Bearer {resolved_api_key}"}
262
+ headers = {
263
+ "Authorization": f"Bearer {resolved_api_key}",
264
+ "User-Agent": get_user_agent(),
265
+ }
258
266
  resource_id = _normalize_secret_resource_id(key_name)
259
- url = f"{resolved_api_base.rstrip('/')}/v1/accounts/{resolved_account_id}/secrets/{resource_id}"
260
267
 
261
268
  try:
269
+ url = f"{resolved_api_base}/v1/accounts/{resolved_account_id}/secrets/{resource_id}"
262
270
  response = requests.delete(url, headers=headers, timeout=30)
263
271
  if response.status_code == 200 or response.status_code == 204: # 204 No Content is also success for DELETE
264
272
  logger.info(f"Successfully deleted secret '{key_name}'.")
@@ -7,9 +7,11 @@ import pathlib
7
7
  import re
8
8
  from typing import Any
9
9
 
10
+ from eval_protocol.common_utils import get_user_agent
10
11
  from eval_protocol.directory_utils import find_eval_protocol_dir
11
12
  from eval_protocol.models import EvaluationRow
12
13
  from eval_protocol.pytest.store_experiment_link import store_experiment_link
14
+
13
15
  import requests
14
16
 
15
17
 
@@ -127,10 +129,14 @@ def handle_persist_flow(all_results: list[list[EvaluationRow]], test_func_name:
127
129
  )
128
130
  continue
129
131
 
130
- headers = {"Authorization": f"Bearer {fireworks_api_key}", "Content-Type": "application/json"}
132
+ api_base = "https://api.fireworks.ai"
133
+ headers = {
134
+ "Authorization": f"Bearer {fireworks_api_key}",
135
+ "Content-Type": "application/json",
136
+ "User-Agent": get_user_agent(),
137
+ }
131
138
 
132
139
  # Make dataset first
133
- dataset_url = f"https://api.fireworks.ai/v1/accounts/{fireworks_account_id}/datasets"
134
140
 
135
141
  dataset_payload = { # pyright: ignore[reportUnknownVariableType]
136
142
  "dataset": {
@@ -142,6 +148,7 @@ def handle_persist_flow(all_results: list[list[EvaluationRow]], test_func_name:
142
148
  "datasetId": dataset_name,
143
149
  }
144
150
 
151
+ dataset_url = f"{api_base}/v1/accounts/{fireworks_account_id}/datasets"
145
152
  dataset_response = requests.post(dataset_url, json=dataset_payload, headers=headers) # pyright: ignore[reportUnknownArgumentType]
146
153
 
147
154
  # Skip if dataset creation failed
@@ -157,13 +164,13 @@ def handle_persist_flow(all_results: list[list[EvaluationRow]], test_func_name:
157
164
  dataset_id = dataset_data.get("datasetId", dataset_name) # pyright: ignore[reportAny]
158
165
 
159
166
  # Upload the JSONL file content
160
- upload_url = (
161
- f"https://api.fireworks.ai/v1/accounts/{fireworks_account_id}/datasets/{dataset_id}:upload"
162
- )
163
- upload_headers = {"Authorization": f"Bearer {fireworks_api_key}"}
164
-
167
+ upload_url = f"{api_base}/v1/accounts/{fireworks_account_id}/datasets/{dataset_id}:upload"
165
168
  with open(exp_file, "rb") as f:
166
169
  files = {"file": f}
170
+ upload_headers = {
171
+ "Authorization": f"Bearer {fireworks_api_key}",
172
+ "User-Agent": get_user_agent(),
173
+ }
167
174
  upload_response = requests.post(upload_url, files=files, headers=upload_headers)
168
175
 
169
176
  # Skip if upload failed
@@ -176,7 +183,6 @@ def handle_persist_flow(all_results: list[list[EvaluationRow]], test_func_name:
176
183
  continue
177
184
 
178
185
  # Create evaluation job (optional - don't skip experiment if this fails)
179
- eval_job_url = f"https://api.fireworks.ai/v1/accounts/{fireworks_account_id}/evaluationJobs"
180
186
  # Truncate job ID to fit 63 character limit
181
187
  job_id_base = f"{dataset_name}-job"
182
188
  if len(job_id_base) > 63:
@@ -194,6 +200,7 @@ def handle_persist_flow(all_results: list[list[EvaluationRow]], test_func_name:
194
200
  },
195
201
  }
196
202
 
203
+ eval_job_url = f"{api_base}/v1/accounts/{fireworks_account_id}/evaluationJobs"
197
204
  eval_response = requests.post(eval_job_url, json=eval_job_payload, headers=headers)
198
205
 
199
206
  if eval_response.status_code in [200, 201]:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: eval-protocol
3
- Version: 0.2.78
3
+ Version: 0.2.80
4
4
  Summary: The official Python SDK for Eval Protocol (EP.) EP is an open protocol that standardizes how developers author evals for large language model (LLM) applications.
5
5
  Author-email: Fireworks AI <info@fireworks.ai>
6
6
  License-Expression: MIT
File without changes
File without changes