eval-protocol 0.2.41__tar.gz → 0.2.43__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (414) hide show
  1. {eval_protocol-0.2.41/eval_protocol.egg-info → eval_protocol-0.2.43}/PKG-INFO +1 -1
  2. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/_version.py +3 -3
  3. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/adapters/fireworks_tracing.py +1 -3
  4. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/auth.py +92 -28
  5. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/cli.py +109 -20
  6. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/cli_commands/upload.py +171 -30
  7. eval_protocol-0.2.43/eval_protocol/directory_utils.py +39 -0
  8. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/evaluation.py +17 -0
  9. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/platform_api.py +20 -9
  10. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/pytest/remote_rollout_processor.py +22 -3
  11. {eval_protocol-0.2.41 → eval_protocol-0.2.43/eval_protocol.egg-info}/PKG-INFO +1 -1
  12. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol.egg-info/SOURCES.txt +4 -2
  13. eval_protocol-0.2.43/tests/test_directory_utils.py +95 -0
  14. eval_protocol-0.2.43/tests/test_upload_entrypoint.py +227 -0
  15. eval_protocol-0.2.41/vite-app/dist/assets/index-D3tKqxWU.js → eval_protocol-0.2.43/vite-app/dist/assets/index-C81y9r9l.js +2 -2
  16. eval_protocol-0.2.41/vite-app/dist/assets/index-D3tKqxWU.js.map → eval_protocol-0.2.43/vite-app/dist/assets/index-C81y9r9l.js.map +1 -1
  17. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vite-app/dist/index.html +1 -1
  18. eval_protocol-0.2.41/eval_protocol/directory_utils.py +0 -55
  19. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/LICENSE +0 -0
  20. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/README.md +0 -0
  21. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/development/__init__.py +0 -0
  22. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/development/normalize_sandbox_fusion.py +0 -0
  23. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/development/utils/__init__.py +0 -0
  24. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/development/utils/generate_api_key.py +0 -0
  25. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/development/utils/subprocess_manager.py +0 -0
  26. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/__init__.py +0 -0
  27. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/__main__.py +0 -0
  28. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/adapters/__init__.py +0 -0
  29. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/adapters/base.py +0 -0
  30. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/adapters/bigquery.py +0 -0
  31. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/adapters/braintrust.py +0 -0
  32. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/adapters/huggingface.py +0 -0
  33. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/adapters/langchain.py +0 -0
  34. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/adapters/langfuse.py +0 -0
  35. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/adapters/langsmith.py +0 -0
  36. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/adapters/openai_responses.py +0 -0
  37. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/adapters/trl.py +0 -0
  38. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/adapters/utils.py +0 -0
  39. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/agent/__init__.py +0 -0
  40. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/agent/models.py +0 -0
  41. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/agent/orchestrator.py +0 -0
  42. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/agent/resource_abc.py +0 -0
  43. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/agent/resource_pool.py +0 -0
  44. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/agent/resources/__init__.py +0 -0
  45. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/agent/resources/bfcl_envs/__init__.py +0 -0
  46. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/agent/resources/bfcl_envs/gorilla_file_system.py +0 -0
  47. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/agent/resources/bfcl_envs/math_api.py +0 -0
  48. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/agent/resources/bfcl_envs/posting_api.py +0 -0
  49. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/agent/resources/bfcl_sim_api_resource.py +0 -0
  50. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/agent/resources/docker_resource.py +0 -0
  51. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/agent/resources/filesystem_resource.py +0 -0
  52. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/agent/resources/python_state_resource.py +0 -0
  53. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/agent/resources/sql_resource.py +0 -0
  54. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/agent/task_manager.py +0 -0
  55. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/agent/tool_registry.py +0 -0
  56. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/benchmarks/__init__.py +0 -0
  57. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/benchmarks/data/airline_dataset.jsonl +0 -0
  58. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/benchmarks/data/retail_dataset.jsonl +0 -0
  59. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/benchmarks/test_aime25.py +0 -0
  60. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/benchmarks/test_frozen_lake.py +0 -0
  61. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/benchmarks/test_gpqa.py +0 -0
  62. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/benchmarks/test_livebench_data_analysis.py +0 -0
  63. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/benchmarks/test_tau_bench_airline.py +0 -0
  64. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/benchmarks/test_tau_bench_retail.py +0 -0
  65. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/cli_commands/__init__.py +0 -0
  66. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/cli_commands/agent_eval_cmd.py +0 -0
  67. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/cli_commands/common.py +0 -0
  68. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/cli_commands/deploy.py +0 -0
  69. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/cli_commands/deploy_mcp.py +0 -0
  70. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/cli_commands/logs.py +0 -0
  71. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/cli_commands/preview.py +0 -0
  72. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/cli_commands/run_eval_cmd.py +0 -0
  73. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/common_utils.py +0 -0
  74. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/config.py +0 -0
  75. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/data_loader/__init__.py +0 -0
  76. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/data_loader/dynamic_data_loader.py +0 -0
  77. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/data_loader/factory_data_loader.py +0 -0
  78. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/data_loader/inline_data_loader.py +0 -0
  79. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/data_loader/models.py +0 -0
  80. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/dataset_logger/__init__.py +0 -0
  81. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/dataset_logger/dataset_logger.py +0 -0
  82. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/dataset_logger/local_fs_dataset_logger_adapter.py +0 -0
  83. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/dataset_logger/sqlite_dataset_logger_adapter.py +0 -0
  84. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/dataset_logger/sqlite_evaluation_row_store.py +0 -0
  85. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/datasets/__init__.py +0 -0
  86. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/datasets/loader.py +0 -0
  87. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/event_bus/__init__.py +0 -0
  88. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/event_bus/event_bus.py +0 -0
  89. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/event_bus/logger.py +0 -0
  90. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/event_bus/sqlite_event_bus.py +0 -0
  91. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/event_bus/sqlite_event_bus_database.py +0 -0
  92. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/execution/__init__.py +0 -0
  93. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/execution/pipeline.py +0 -0
  94. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/gcp_tools.py +0 -0
  95. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/generation/cache.py +0 -0
  96. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/generation/clients/base.py +0 -0
  97. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/generation/clients.py +0 -0
  98. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/generic_server.py +0 -0
  99. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/get_pep440_version.py +0 -0
  100. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/human_id/__init__.py +0 -0
  101. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/human_id/dictionary.py +0 -0
  102. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/integrations/__init__.py +0 -0
  103. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/integrations/deepeval.py +0 -0
  104. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/integrations/openeval.py +0 -0
  105. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/integrations/trl.py +0 -0
  106. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/log_utils/__init__.py +0 -0
  107. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/log_utils/elasticsearch_client.py +0 -0
  108. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/log_utils/elasticsearch_direct_http_handler.py +0 -0
  109. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/log_utils/elasticsearch_index_manager.py +0 -0
  110. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/log_utils/rollout_id_filter.py +0 -0
  111. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/logging_utils.py +0 -0
  112. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp/__init__.py +0 -0
  113. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp/adapter.py +0 -0
  114. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp/client/__init__.py +0 -0
  115. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp/client/connection.py +0 -0
  116. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp/clients.py +0 -0
  117. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp/execution/__init__.py +0 -0
  118. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp/execution/base_policy.py +0 -0
  119. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp/execution/manager.py +0 -0
  120. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp/execution/policy.py +0 -0
  121. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp/grid_renderer.py +0 -0
  122. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp/mcp_multi_client.py +0 -0
  123. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp/mcpgym.py +0 -0
  124. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp/process_manager.py +0 -0
  125. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp/session/__init__.py +0 -0
  126. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp/session/manager.py +0 -0
  127. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp/simple_process_manager.py +0 -0
  128. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp/simulation_server.py +0 -0
  129. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp_agent/__init__.py +0 -0
  130. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp_agent/config.py +0 -0
  131. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp_agent/main.py +0 -0
  132. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp_agent/orchestration/__init__.py +0 -0
  133. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp_agent/orchestration/base_client.py +0 -0
  134. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp_agent/orchestration/local_docker_client.py +0 -0
  135. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp_agent/orchestration/stdio_mcp_client_helper.py +0 -0
  136. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp_env.py +0 -0
  137. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp_servers/__init__.py +0 -0
  138. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp_servers/frozen_lake/frozen_lake_adapter.py +0 -0
  139. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp_servers/frozen_lake/frozen_lake_mcp.py +0 -0
  140. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp_servers/frozen_lake/server.py +0 -0
  141. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp_servers/tau2/README.md +0 -0
  142. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp_servers/tau2/__init__.py +0 -0
  143. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp_servers/tau2/airplane_environment/airline_environment.py +0 -0
  144. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp_servers/tau2/mock_environment/mock_environment.py +0 -0
  145. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp_servers/tau2/retail_environment/retail_environment.py +0 -0
  146. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp_servers/tau2/server.py +0 -0
  147. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp_servers/tau2/tau2_mcp.py +0 -0
  148. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp_servers/tau2/tests/system_prompts/airline_agent_system_prompt.md +0 -0
  149. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp_servers/tau2/tests/system_prompts/mock_agent_system_prompt.md +0 -0
  150. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp_servers/tau2/tests/system_prompts/retail_agent_system_prompt.md +0 -0
  151. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp_servers/tau2/tests/test_tau2_e2e.py +0 -0
  152. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/models.py +0 -0
  153. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/packaging.py +0 -0
  154. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/playback_policy.py +0 -0
  155. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/pytest/__init__.py +0 -0
  156. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/pytest/default_agent_rollout_processor.py +0 -0
  157. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/pytest/default_dataset_adapter.py +0 -0
  158. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/pytest/default_langchain_rollout_processor.py +0 -0
  159. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/pytest/default_mcp_gym_rollout_processor.py +0 -0
  160. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/pytest/default_no_op_rollout_processor.py +0 -0
  161. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/pytest/default_pydantic_ai_rollout_processor.py +0 -0
  162. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/pytest/default_single_turn_rollout_process.py +0 -0
  163. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/pytest/dual_mode_wrapper.py +0 -0
  164. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/pytest/elasticsearch_setup.py +0 -0
  165. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/pytest/evaluation_test.py +0 -0
  166. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/pytest/evaluation_test_postprocess.py +0 -0
  167. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/pytest/exception_config.py +0 -0
  168. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/pytest/execution.py +0 -0
  169. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/pytest/generate_parameter_combinations.py +0 -0
  170. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/pytest/handle_persist_flow.py +0 -0
  171. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/pytest/parameterize.py +0 -0
  172. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/pytest/plugin.py +0 -0
  173. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/pytest/rollout_processor.py +0 -0
  174. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/pytest/store_experiment_link.py +0 -0
  175. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/pytest/store_results_url.py +0 -0
  176. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/pytest/types.py +0 -0
  177. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/pytest/utils.py +0 -0
  178. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/pytest/validate_signature.py +0 -0
  179. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/quickstart/__init__.py +0 -0
  180. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/quickstart/llm_judge.py +0 -0
  181. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/quickstart/llm_judge_braintrust.py +0 -0
  182. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/quickstart/llm_judge_langfuse.py +0 -0
  183. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/quickstart/llm_judge_langsmith.py +0 -0
  184. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/quickstart/llm_judge_openai_responses.py +0 -0
  185. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/quickstart/utils.py +0 -0
  186. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/resources.py +0 -0
  187. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/reward_function.py +0 -0
  188. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/rewards/__init__.py +0 -0
  189. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/rewards/accuracy.py +0 -0
  190. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/rewards/accuracy_length.py +0 -0
  191. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/rewards/apps_coding_reward.py +0 -0
  192. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/rewards/apps_execution_utils.py +0 -0
  193. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/rewards/apps_testing_util.py +0 -0
  194. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/rewards/bfcl_reward.py +0 -0
  195. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/rewards/code_execution.py +0 -0
  196. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/rewards/code_execution_utils.py +0 -0
  197. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/rewards/cpp_code.py +0 -0
  198. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/rewards/deepcoder_reward.py +0 -0
  199. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/rewards/format.py +0 -0
  200. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/rewards/function_calling.py +0 -0
  201. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/rewards/json_schema.py +0 -0
  202. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/rewards/language_consistency.py +0 -0
  203. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/rewards/lean_prover.py +0 -0
  204. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/rewards/length.py +0 -0
  205. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/rewards/list_comparison_math_reward.py +0 -0
  206. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/rewards/math.py +0 -0
  207. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/rewards/multiple_choice_math_reward.py +0 -0
  208. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/rewards/reasoning_steps.py +0 -0
  209. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/rewards/repetition.py +0 -0
  210. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/rewards/tag_count.py +0 -0
  211. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/rl_processing.py +0 -0
  212. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/server.py +0 -0
  213. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/stats/__init__.py +0 -0
  214. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/stats/confidence_intervals.py +0 -0
  215. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/typed_interface.py +0 -0
  216. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/types/__init__.py +0 -0
  217. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/types/errors.py +0 -0
  218. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/types/remote_rollout_processor.py +0 -0
  219. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/types/types.py +0 -0
  220. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/utils/__init__.py +0 -0
  221. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/utils/batch_evaluation.py +0 -0
  222. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/utils/batch_transformation.py +0 -0
  223. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/utils/check_server_status.py +0 -0
  224. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/utils/dataset_helpers.py +0 -0
  225. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/utils/logs_models.py +0 -0
  226. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/utils/logs_server.py +0 -0
  227. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/utils/module_loader.py +0 -0
  228. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/utils/packaging_utils.py +0 -0
  229. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/utils/show_results_url.py +0 -0
  230. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/utils/static_policy.py +0 -0
  231. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/utils/subprocess_utils.py +0 -0
  232. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/utils/vite_server.py +0 -0
  233. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol.egg-info/dependency_links.txt +0 -0
  234. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol.egg-info/entry_points.txt +0 -0
  235. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol.egg-info/requires.txt +0 -0
  236. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol.egg-info/top_level.txt +0 -0
  237. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/pyproject.toml +0 -0
  238. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/setup.cfg +0 -0
  239. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/setup.py +0 -0
  240. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_accuracy.py +0 -0
  241. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_accuracy_length.py +0 -0
  242. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_adapters_e2e.py +0 -0
  243. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_agent_orchestrator.py +0 -0
  244. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_agent_resources.py +0 -0
  245. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_auth.py +0 -0
  246. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_batch_evaluation.py +0 -0
  247. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_cli.py +0 -0
  248. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_cli_agent.py +0 -0
  249. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_cli_args.py +0 -0
  250. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_code_execution.py +0 -0
  251. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_config.py +0 -0
  252. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_control_plane_separation.py +0 -0
  253. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_cpp_code.py +0 -0
  254. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_data_driven_task_manager.py +0 -0
  255. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_deepcoder_reward.py +0 -0
  256. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_deepeval_integration.py +0 -0
  257. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_deploy_integration.py +0 -0
  258. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_e2b_integration.py +0 -0
  259. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_e2b_js_integration.py +0 -0
  260. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_edge_cases.py +0 -0
  261. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_eval_protocol_import.py +0 -0
  262. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_evaluation.py +0 -0
  263. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_evaluation_integration.py +0 -0
  264. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_evaluation_postprocess.py +0 -0
  265. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_evaluation_preview_integration.py +0 -0
  266. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_event_bus.py +0 -0
  267. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_examples_end_to_end.py +0 -0
  268. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_fireworks_api.py +0 -0
  269. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_format.py +0 -0
  270. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_fractional_code.py +0 -0
  271. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_function_calling.py +0 -0
  272. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_gcp_tools.py +0 -0
  273. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_generic_server.py +0 -0
  274. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_human_id.py +0 -0
  275. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_integration.py +0 -0
  276. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_json_schema.py +0 -0
  277. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_kwargs_validation.py +0 -0
  278. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_language_consistency.py +0 -0
  279. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_lean_prover.py +0 -0
  280. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_lean_prover_runner.py +0 -0
  281. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_length.py +0 -0
  282. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_list_comparison_math_reward.py +0 -0
  283. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_logs_server.py +0 -0
  284. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_logs_server_simple.py +0 -0
  285. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_math.py +0 -0
  286. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_minimal.py +0 -0
  287. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_models.py +0 -0
  288. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_models_rl.py +0 -0
  289. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_multiple_choice_math_reward.py +0 -0
  290. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_n_variant_batch_integration.py +0 -0
  291. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_n_variant_integration.py +0 -0
  292. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_openai_compatibility.py +0 -0
  293. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_openeval_integration.py +0 -0
  294. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_packaging.py +0 -0
  295. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_parallel_rollouts.py +0 -0
  296. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_platform_api.py +0 -0
  297. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_quickstart_utils.py +0 -0
  298. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_readiness.py +0 -0
  299. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_reasoning_steps.py +0 -0
  300. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_repetition.py +0 -0
  301. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_repetition_debug.py +0 -0
  302. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_retry_mechanism.py +0 -0
  303. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_reward_function.py +0 -0
  304. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_reward_protocol_import.py +0 -0
  305. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_rl_processing.py +0 -0
  306. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_rollout_control_plane_integration.py +0 -0
  307. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_server.py +0 -0
  308. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_show_results_url.py +0 -0
  309. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_status_migration_changes.py +0 -0
  310. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_status_migration_integration.py +0 -0
  311. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_status_model.py +0 -0
  312. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_tag_count.py +0 -0
  313. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_tau_bench_airline_smoke.py +0 -0
  314. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_typed_interface.py +0 -0
  315. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_typed_interface_rl.py +0 -0
  316. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_url_handling.py +0 -0
  317. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_vite_server.py +0 -0
  318. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/__init__.py +0 -0
  319. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/agent/__init__.py +0 -0
  320. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/agent/base.py +0 -0
  321. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/agent/llm_agent.py +0 -0
  322. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/api_service/__init__.py +0 -0
  323. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/api_service/api_config.py +0 -0
  324. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/api_service/data_model.py +0 -0
  325. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/api_service/simulation_service.py +0 -0
  326. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/cli.py +0 -0
  327. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/config.py +0 -0
  328. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/data/domains/airline/policy.md +0 -0
  329. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/data/domains/mock/policy.md +0 -0
  330. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/data/domains/mock/policy_solo.md +0 -0
  331. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/data/domains/retail/policy.md +0 -0
  332. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/data/domains/telecom/main_policy.md +0 -0
  333. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/data/domains/telecom/main_policy_solo.md +0 -0
  334. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/data/domains/telecom/tech_support_manual.md +0 -0
  335. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/data/domains/telecom/tech_support_workflow.md +0 -0
  336. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/data/domains/telecom/tech_support_workflow_solo.md +0 -0
  337. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/data/user_simulator/simulation_guidelines.md +0 -0
  338. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/data/user_simulator/simulation_guidelines_tools.md +0 -0
  339. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/data_model/__init__.py +0 -0
  340. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/data_model/message.py +0 -0
  341. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/data_model/simulation.py +0 -0
  342. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/data_model/tasks.py +0 -0
  343. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/domains/__init__.py +0 -0
  344. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/domains/airline/__init__.py +0 -0
  345. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/domains/airline/data_model.py +0 -0
  346. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/domains/airline/environment.py +0 -0
  347. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/domains/airline/tools.py +0 -0
  348. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/domains/airline/utils.py +0 -0
  349. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/domains/mock/__init__.py +0 -0
  350. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/domains/mock/data_model.py +0 -0
  351. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/domains/mock/environment.py +0 -0
  352. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/domains/mock/tools.py +0 -0
  353. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/domains/mock/utils.py +0 -0
  354. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/domains/retail/__init__.py +0 -0
  355. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/domains/retail/data_model.py +0 -0
  356. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/domains/retail/environment.py +0 -0
  357. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/domains/retail/tools.py +0 -0
  358. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/domains/retail/utils.py +0 -0
  359. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/domains/telecom/__init__.py +0 -0
  360. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/domains/telecom/data_model.py +0 -0
  361. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/domains/telecom/environment.py +0 -0
  362. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/domains/telecom/tasks/__init__.py +0 -0
  363. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/domains/telecom/tasks/const.py +0 -0
  364. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/domains/telecom/tasks/create_tasks.py +0 -0
  365. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/domains/telecom/tasks/manager.py +0 -0
  366. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/domains/telecom/tasks/mms_issues.py +0 -0
  367. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/domains/telecom/tasks/mobile_data_issues.py +0 -0
  368. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/domains/telecom/tasks/service_issues.py +0 -0
  369. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/domains/telecom/tasks/utils.py +0 -0
  370. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/domains/telecom/tools.py +0 -0
  371. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/domains/telecom/user_data_model.py +0 -0
  372. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/domains/telecom/user_tools.py +0 -0
  373. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/domains/telecom/utils.py +0 -0
  374. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/environment/__init__.py +0 -0
  375. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/environment/db.py +0 -0
  376. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/environment/environment.py +0 -0
  377. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/environment/server.py +0 -0
  378. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/environment/tool.py +0 -0
  379. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/environment/toolkit.py +0 -0
  380. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/environment/utils/interface_agent.py +0 -0
  381. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/evaluator/__init__.py +0 -0
  382. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/evaluator/evaluator.py +0 -0
  383. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/evaluator/evaluator_action.py +0 -0
  384. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/evaluator/evaluator_base.py +0 -0
  385. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/evaluator/evaluator_communicate.py +0 -0
  386. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/evaluator/evaluator_env.py +0 -0
  387. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/evaluator/evaluator_nl_assertions.py +0 -0
  388. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/metrics/__init__.py +0 -0
  389. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/metrics/agent_metrics.py +0 -0
  390. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/metrics/break_down_metrics.py +0 -0
  391. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/orchestrator/__init__.py +0 -0
  392. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/orchestrator/environment_manager.py +0 -0
  393. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/orchestrator/orchestrator.py +0 -0
  394. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/orchestrator/utils.py +0 -0
  395. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/registry.py +0 -0
  396. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/run.py +0 -0
  397. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/scripts/__init__.py +0 -0
  398. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/scripts/check_data.py +0 -0
  399. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/scripts/show_domain_doc.py +0 -0
  400. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/scripts/start_servers.py +0 -0
  401. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/scripts/view_simulations.py +0 -0
  402. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/user/__init__.py +0 -0
  403. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/user/base.py +0 -0
  404. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/user/user_simulator.py +0 -0
  405. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/utils/__init__.py +0 -0
  406. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/utils/display.py +0 -0
  407. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/utils/io_utils.py +0 -0
  408. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/utils/llm_utils.py +0 -0
  409. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/utils/pydantic_utils.py +0 -0
  410. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/utils/utils.py +0 -0
  411. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/versioneer.py +0 -0
  412. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vite-app/dist/assets/favicon-BkAAWQga.png +0 -0
  413. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vite-app/dist/assets/index-DpYZaoAr.css +0 -0
  414. {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vite-app/dist/assets/logo-light-BprIBJQW.png +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: eval-protocol
3
- Version: 0.2.41
3
+ Version: 0.2.43
4
4
  Summary: The official Python SDK for Eval Protocol (EP.) EP is an open protocol that standardizes how developers author evals for large language model (LLM) applications.
5
5
  Author-email: Fireworks AI <info@fireworks.ai>
6
6
  License-Expression: MIT
@@ -8,11 +8,11 @@ import json
8
8
 
9
9
  version_json = '''
10
10
  {
11
- "date": "2025-10-07T15:43:37-0700",
11
+ "date": "2025-10-08T08:52:41-0700",
12
12
  "dirty": false,
13
13
  "error": null,
14
- "full-revisionid": "289abc56fc56935b45b11da011712fe48d956af1",
15
- "version": "0.2.41"
14
+ "full-revisionid": "535169e7193e6500d8d323e7dbc31c14dca98b96",
15
+ "version": "0.2.43"
16
16
  }
17
17
  ''' # END VERSION_JSON
18
18
 
@@ -371,9 +371,7 @@ class FireworksTracingAdapter(BaseAdapter):
371
371
  error_msg = error_detail or e.response.text
372
372
 
373
373
  # Retry on 404 if it's due to incomplete/missing traces (backend still indexing)
374
- if e.response.status_code == 404 and (
375
- "Incomplete traces" in error_detail or "No traces found" in error_detail
376
- ):
374
+ if e.response.status_code == 404:
377
375
  should_retry = True
378
376
  except Exception:
379
377
  error_msg = e.response.text
@@ -6,10 +6,48 @@ from typing import Dict, Optional # Added Dict
6
6
 
7
7
  logger = logging.getLogger(__name__)
8
8
 
9
+ # Default locations (used for tests and as fallback). Actual resolution is dynamic via _get_auth_ini_file().
9
10
  FIREWORKS_CONFIG_DIR = Path.home() / ".fireworks"
10
11
  AUTH_INI_FILE = FIREWORKS_CONFIG_DIR / "auth.ini"
11
12
 
12
13
 
14
+ def _get_profile_base_dir() -> Path:
15
+ """
16
+ Resolve the Fireworks configuration base directory following firectl behavior:
17
+ - Default: ~/.fireworks
18
+ - If FIREWORKS_PROFILE is set and non-empty: ~/.fireworks/profiles/<profile>
19
+ """
20
+ profile_name = os.environ.get("FIREWORKS_PROFILE", "").strip()
21
+ base_dir = Path.home() / ".fireworks"
22
+ if profile_name:
23
+ base_dir = base_dir / "profiles" / profile_name
24
+ return base_dir
25
+
26
+
27
+ def _get_auth_ini_file() -> Path:
28
+ """
29
+ Determine the auth.ini file path.
30
+ Priority:
31
+ 1) FIREWORKS_AUTH_FILE env var when set
32
+ 2) ~/.fireworks[/profiles/<profile>]/auth.ini (profile driven)
33
+ """
34
+ auth_file_env = os.environ.get("FIREWORKS_AUTH_FILE")
35
+ if auth_file_env:
36
+ return Path(auth_file_env)
37
+ return _get_profile_base_dir() / "auth.ini"
38
+
39
+
40
+ def _is_profile_active() -> bool:
41
+ """
42
+ Returns True if a specific profile or explicit auth file is active.
43
+ In this case, profile-based credentials should take precedence over env vars.
44
+ """
45
+ if os.environ.get("FIREWORKS_AUTH_FILE"):
46
+ return True
47
+ prof = os.environ.get("FIREWORKS_PROFILE", "").strip()
48
+ return bool(prof)
49
+
50
+
13
51
  def _parse_simple_auth_file(file_path: Path) -> Dict[str, str]:
14
52
  """
15
53
  Parses an auth file with simple key=value lines.
@@ -20,7 +58,7 @@ def _parse_simple_auth_file(file_path: Path) -> Dict[str, str]:
20
58
  if not file_path.exists():
21
59
  return creds
22
60
  try:
23
- with open(file_path, "r") as f:
61
+ with open(file_path, "r", encoding="utf-8") as f:
24
62
  for line in f:
25
63
  line = line.strip()
26
64
  if not line or line.startswith("#") or line.startswith(";"):
@@ -39,7 +77,7 @@ def _parse_simple_auth_file(file_path: Path) -> Dict[str, str]:
39
77
  if key in ["api_key", "account_id"] and value:
40
78
  creds[key] = value
41
79
  except Exception as e:
42
- logger.warning(f"Error during simple parsing of {file_path}: {e}")
80
+ logger.warning("Error during simple parsing of %s: %s", str(file_path), e)
43
81
  return creds
44
82
 
45
83
 
@@ -48,13 +86,14 @@ def _get_credential_from_config_file(key_name: str) -> Optional[str]:
48
86
  Helper to get a specific credential (api_key or account_id) from auth.ini.
49
87
  Tries simple parsing first, then configparser.
50
88
  """
51
- if not AUTH_INI_FILE.exists():
89
+ auth_ini_path = _get_auth_ini_file()
90
+ if not auth_ini_path.exists():
52
91
  return None
53
92
 
54
93
  # 1. Try simple key-value parsing first
55
- simple_creds = _parse_simple_auth_file(AUTH_INI_FILE)
94
+ simple_creds = _parse_simple_auth_file(auth_ini_path)
56
95
  if key_name in simple_creds:
57
- logger.debug(f"Using {key_name} from simple key-value parsing of {AUTH_INI_FILE}.")
96
+ logger.debug("Using %s from simple key-value parsing of %s.", key_name, str(auth_ini_path))
58
97
  return simple_creds[key_name]
59
98
 
60
99
  # 2. Fallback to configparser if not found via simple parsing or if simple parsing failed
@@ -62,30 +101,35 @@ def _get_credential_from_config_file(key_name: str) -> Optional[str]:
62
101
  # but only if simple parsing didn't yield the key.
63
102
  try:
64
103
  config = configparser.ConfigParser()
65
- config.read(AUTH_INI_FILE)
104
+ config.read(auth_ini_path)
66
105
 
67
106
  # Try [fireworks] section
68
107
  if "fireworks" in config and config.has_option("fireworks", key_name):
69
108
  value_from_file = config.get("fireworks", key_name)
70
109
  if value_from_file:
71
- logger.debug(f"Using {key_name} from [fireworks] section in {AUTH_INI_FILE}.")
110
+ logger.debug("Using %s from [fireworks] section in %s.", key_name, str(auth_ini_path))
72
111
  return value_from_file
73
112
 
74
113
  # Try default section (configparser might place items without section header here)
75
114
  if config.has_option(config.default_section, key_name):
76
115
  value_from_default = config.get(config.default_section, key_name)
77
116
  if value_from_default:
78
- logger.debug(f"Using {key_name} from default section [{config.default_section}] in {AUTH_INI_FILE}.")
117
+ logger.debug(
118
+ "Using %s from default section [%s] in %s.",
119
+ key_name,
120
+ config.default_section,
121
+ str(auth_ini_path),
122
+ )
79
123
  return value_from_default
80
124
 
81
125
  except configparser.MissingSectionHeaderError:
82
126
  # This error implies the file is purely key-value, which simple parsing should have handled.
83
127
  # If simple parsing failed to get the key, then it's likely not there or malformed.
84
- logger.debug(f"{AUTH_INI_FILE} has no section headers, and simple parsing did not find {key_name}.")
128
+ logger.debug("%s has no section headers, and simple parsing did not find %s.", str(auth_ini_path), key_name)
85
129
  except configparser.Error as e_config:
86
- logger.warning(f"Configparser error reading {AUTH_INI_FILE} for {key_name}: {e_config}")
130
+ logger.warning("Configparser error reading %s for %s: %s", str(auth_ini_path), key_name, e_config)
87
131
  except Exception as e_general:
88
- logger.warning(f"Unexpected error reading {AUTH_INI_FILE} for {key_name}: {e_general}")
132
+ logger.warning("Unexpected error reading %s for %s: %s", str(auth_ini_path), key_name, e_general)
89
133
 
90
134
  return None
91
135
 
@@ -101,14 +145,24 @@ def get_fireworks_api_key() -> Optional[str]:
101
145
  Returns:
102
146
  The API key if found, otherwise None.
103
147
  """
104
- api_key = os.environ.get("FIREWORKS_API_KEY")
105
- if api_key:
106
- logger.debug("Using FIREWORKS_API_KEY from environment variable.")
107
- return api_key
108
-
109
- api_key_from_file = _get_credential_from_config_file("api_key")
110
- if api_key_from_file:
111
- return api_key_from_file
148
+ # If a profile is active, prefer profile file first, then env
149
+ if _is_profile_active():
150
+ api_key_from_file = _get_credential_from_config_file("api_key")
151
+ if api_key_from_file:
152
+ return api_key_from_file
153
+ api_key = os.environ.get("FIREWORKS_API_KEY")
154
+ if api_key:
155
+ logger.debug("Using FIREWORKS_API_KEY from environment variable (profile active but file missing).")
156
+ return api_key
157
+ else:
158
+ # Default behavior: env overrides file
159
+ api_key = os.environ.get("FIREWORKS_API_KEY")
160
+ if api_key:
161
+ logger.debug("Using FIREWORKS_API_KEY from environment variable.")
162
+ return api_key
163
+ api_key_from_file = _get_credential_from_config_file("api_key")
164
+ if api_key_from_file:
165
+ return api_key_from_file
112
166
 
113
167
  logger.debug("Fireworks API key not found in environment variables or auth.ini.")
114
168
  return None
@@ -125,14 +179,24 @@ def get_fireworks_account_id() -> Optional[str]:
125
179
  Returns:
126
180
  The Account ID if found, otherwise None.
127
181
  """
128
- account_id = os.environ.get("FIREWORKS_ACCOUNT_ID")
129
- if account_id:
130
- logger.debug("Using FIREWORKS_ACCOUNT_ID from environment variable.")
131
- return account_id
132
-
133
- account_id_from_file = _get_credential_from_config_file("account_id")
134
- if account_id_from_file:
135
- return account_id_from_file
182
+ # If a profile is active, prefer profile file first, then env
183
+ if _is_profile_active():
184
+ account_id_from_file = _get_credential_from_config_file("account_id")
185
+ if account_id_from_file:
186
+ return account_id_from_file
187
+ account_id = os.environ.get("FIREWORKS_ACCOUNT_ID")
188
+ if account_id:
189
+ logger.debug("Using FIREWORKS_ACCOUNT_ID from environment variable (profile active but file missing).")
190
+ return account_id
191
+ else:
192
+ # Default behavior: env overrides file
193
+ account_id = os.environ.get("FIREWORKS_ACCOUNT_ID")
194
+ if account_id:
195
+ logger.debug("Using FIREWORKS_ACCOUNT_ID from environment variable.")
196
+ return account_id
197
+ account_id_from_file = _get_credential_from_config_file("account_id")
198
+ if account_id_from_file:
199
+ return account_id_from_file
136
200
 
137
201
  logger.debug("Fireworks Account ID not found in environment variables or auth.ini.")
138
202
  return None
@@ -152,5 +216,5 @@ def get_fireworks_api_base() -> str:
152
216
  if os.environ.get("FIREWORKS_API_BASE"):
153
217
  logger.debug("Using FIREWORKS_API_BASE from environment variable.")
154
218
  else:
155
- logger.debug(f"FIREWORKS_API_BASE not set in environment, defaulting to {api_base}.")
219
+ logger.debug("FIREWORKS_API_BASE not set in environment, defaulting to %s.", api_base)
156
220
  return api_base
@@ -3,32 +3,47 @@ Command-line interface for Eval Protocol.
3
3
  """
4
4
 
5
5
  import argparse
6
- import asyncio
7
- import json
8
6
  import logging
9
7
  import os
10
8
  import sys
11
- import traceback
12
- import uuid
13
9
  from pathlib import Path
10
+ from typing import Any, cast
14
11
 
15
12
  logger = logging.getLogger(__name__)
16
13
 
17
14
 
18
- from .cli_commands.agent_eval_cmd import agent_eval_command
19
15
  from .cli_commands.common import setup_logging
20
- from .cli_commands.deploy import deploy_command
21
- from .cli_commands.deploy_mcp import deploy_mcp_command
22
- from .cli_commands.logs import logs_command
23
- from .cli_commands.preview import preview_command
24
- from .cli_commands.run_eval_cmd import hydra_cli_entry_point
25
- from .cli_commands.upload import upload_command
16
+
17
+ # Re-export deploy_command for backward compatibility with tests importing from eval_protocol.cli
18
+ try: # pragma: no cover - import-time alias for tests
19
+ from .cli_commands import deploy as _deploy_mod
20
+
21
+ deploy_command = _deploy_mod.deploy_command # type: ignore[attr-defined]
22
+ except Exception: # pragma: no cover
23
+ # If import fails in constrained environments, tests that import it will surface the issue
24
+ deploy_command = None # type: ignore[assignment]
25
+
26
+ # Re-export preview_command for backward compatibility with tests importing from eval_protocol.cli
27
+ try: # pragma: no cover - import-time alias for tests
28
+ from .cli_commands import preview as _preview_mod
29
+
30
+ preview_command = _preview_mod.preview_command # type: ignore[attr-defined]
31
+ except Exception: # pragma: no cover
32
+ preview_command = None # type: ignore[assignment]
26
33
 
27
34
 
28
35
  def parse_args(args=None):
29
36
  """Parse command line arguments"""
30
37
  parser = argparse.ArgumentParser(description="eval-protocol: Tools for evaluation and reward modeling")
31
38
  parser.add_argument("--verbose", "-v", action="store_true", help="Enable verbose logging")
39
+ parser.add_argument(
40
+ "--profile",
41
+ help="Fireworks profile to use (reads ~/.fireworks/profiles/<name>/auth.ini and settings.ini)",
42
+ )
43
+ parser.add_argument(
44
+ "--server",
45
+ help="Fireworks API server hostname or URL (e.g., dev.api.fireworks.ai or https://dev.api.fireworks.ai)",
46
+ )
32
47
 
33
48
  subparsers = parser.add_subparsers(dest="command", help="Command to run")
34
49
 
@@ -356,12 +371,68 @@ def main():
356
371
  os.environ["PYTHONPATH"] = f"{current_dir}{os.pathsep}{current_pythonpath}"
357
372
  else:
358
373
  os.environ["PYTHONPATH"] = current_dir
359
- logger.debug(f"Added current directory to PYTHONPATH: {current_dir}")
374
+ logger.debug("Added current directory to PYTHONPATH: %s", current_dir)
360
375
 
361
376
  # Also add to sys.path so it takes effect immediately for the current process
362
377
  if current_dir not in sys.path:
363
378
  sys.path.insert(0, current_dir)
364
379
 
380
+ # Pre-scan raw argv for global flags anywhere (before parsing or imports)
381
+ raw_argv = sys.argv[1:]
382
+
383
+ def _extract_flag_value(argv_list, flag_name):
384
+ # Supports --flag value and --flag=value
385
+ for i, tok in enumerate(argv_list):
386
+ if tok == flag_name:
387
+ if i + 1 < len(argv_list):
388
+ return argv_list[i + 1]
389
+ elif tok.startswith(flag_name + "="):
390
+ return tok.split("=", 1)[1]
391
+ return None
392
+
393
+ pre_profile = _extract_flag_value(raw_argv, "--profile")
394
+ pre_server = _extract_flag_value(raw_argv, "--server")
395
+
396
+ # Handle Fireworks profile selection early so downstream modules see the env
397
+ profile = pre_profile
398
+ if profile:
399
+ try:
400
+ os.environ["FIREWORKS_PROFILE"] = profile
401
+ # Mirror firectl behavior: ~/.fireworks[/profiles/<profile>]
402
+ base_dir = Path.home() / ".fireworks"
403
+ if profile:
404
+ base_dir = base_dir / "profiles" / profile
405
+ os.makedirs(str(base_dir), mode=0o700, exist_ok=True)
406
+
407
+ # Provide helpful env hints for consumers (optional)
408
+ os.environ["FIREWORKS_AUTH_FILE"] = str(base_dir / "auth.ini")
409
+ os.environ["FIREWORKS_SETTINGS_FILE"] = str(base_dir / "settings.ini")
410
+ logger.debug("Using Fireworks profile '%s' at %s", profile, base_dir)
411
+ except OSError as e:
412
+ logger.warning("Failed to initialize Fireworks profile '%s': %s", profile, e)
413
+
414
+ # Proactively resolve and export account_id from the active profile to avoid stale .env overrides
415
+ try:
416
+ from eval_protocol.auth import get_fireworks_account_id as _resolve_account_id
417
+
418
+ resolved_account = _resolve_account_id()
419
+ if resolved_account:
420
+ os.environ["FIREWORKS_ACCOUNT_ID"] = resolved_account
421
+ logger.debug("Resolved account_id from profile '%s': %s", profile, resolved_account)
422
+ except Exception as e: # noqa: B902
423
+ logger.debug("Unable to resolve account_id from profile '%s': %s", profile, e)
424
+
425
+ # Handle Fireworks server selection early
426
+ server = pre_server
427
+ if server:
428
+ # Normalize to full URL if just a hostname is supplied
429
+ normalized = server.strip()
430
+ if not normalized.startswith("http://") and not normalized.startswith("https://"):
431
+ normalized = f"https://{normalized}"
432
+ os.environ["FIREWORKS_API_BASE"] = normalized
433
+ logger.debug("Using Fireworks API base: %s", normalized)
434
+
435
+ # Now parse args normally (so help/commands work), after globals applied
365
436
  # Store original sys.argv[0] because Hydra might manipulate it
366
437
  # and we need it if we're not calling a Hydra app.
367
438
  original_script_name = sys.argv[0]
@@ -370,16 +441,28 @@ def main():
370
441
  setup_logging(args.verbose, getattr(args, "debug", False))
371
442
 
372
443
  if args.command == "preview":
444
+ if preview_command is None:
445
+ raise ImportError("preview_command is unavailable")
373
446
  return preview_command(args)
374
447
  elif args.command == "deploy":
448
+ if deploy_command is None:
449
+ raise ImportError("deploy_command is unavailable")
375
450
  return deploy_command(args)
376
451
  elif args.command == "deploy-mcp":
452
+ from .cli_commands.deploy_mcp import deploy_mcp_command
453
+
377
454
  return deploy_mcp_command(args)
378
455
  elif args.command == "agent-eval":
456
+ from .cli_commands.agent_eval_cmd import agent_eval_command
457
+
379
458
  return agent_eval_command(args)
380
459
  elif args.command == "logs":
460
+ from .cli_commands.logs import logs_command
461
+
381
462
  return logs_command(args)
382
463
  elif args.command == "upload":
464
+ from .cli_commands.upload import upload_command
465
+
383
466
  return upload_command(args)
384
467
  elif args.command == "run":
385
468
  # For the 'run' command, Hydra takes over argument parsing.
@@ -393,7 +476,7 @@ def main():
393
476
  local_conf_dir = os.path.join(current_dir, "conf")
394
477
 
395
478
  if not has_config_path and os.path.isdir(local_conf_dir):
396
- logger.info(f"Auto-detected local conf directory: {local_conf_dir}")
479
+ logger.info("Auto-detected local conf directory: %s", local_conf_dir)
397
480
  hydra_specific_args = [
398
481
  "--config-path",
399
482
  local_conf_dir,
@@ -410,18 +493,21 @@ def main():
410
493
  path_val = hydra_specific_args[i]
411
494
  abs_path = os.path.abspath(path_val)
412
495
  logger.debug(
413
- f"Converting relative --config-path '{path_val}' (space separated) to absolute '{abs_path}'"
496
+ "Converting relative --config-path '%s' (space separated) to absolute '%s'",
497
+ path_val,
498
+ abs_path,
414
499
  )
415
500
  processed_hydra_args.append(abs_path)
416
501
  else:
417
502
  logger.error("--config-path specified without a value.")
418
- pass
419
503
  elif arg.startswith("--config-path="):
420
504
  flag_part, path_val = arg.split("=", 1)
421
505
  processed_hydra_args.append(flag_part)
422
506
  abs_path = os.path.abspath(path_val)
423
507
  logger.debug(
424
- f"Converting relative --config-path '{path_val}' (equals separated) to absolute '{abs_path}'"
508
+ "Converting relative --config-path '%s' (equals separated) to absolute '%s'",
509
+ path_val,
510
+ abs_path,
425
511
  )
426
512
  processed_hydra_args.append(abs_path)
427
513
  else:
@@ -429,14 +515,17 @@ def main():
429
515
  i += 1
430
516
 
431
517
  sys.argv = [sys.argv[0]] + processed_hydra_args
432
- logger.info(f"SYSCALL_ARGV_FOR_HYDRA (after potential abspath conversion): {sys.argv}")
518
+ logger.info("SYSCALL_ARGV_FOR_HYDRA (after potential abspath conversion): %s", sys.argv)
433
519
 
434
520
  try:
435
- hydra_cli_entry_point()
521
+ from .cli_commands.run_eval_cmd import hydra_cli_entry_point
522
+
523
+ hydra_entry = cast(Any, hydra_cli_entry_point)
524
+ hydra_entry() # type: ignore # pylint: disable=no-value-for-parameter
436
525
  return 0
437
- except Exception as e:
526
+ except Exception as e: # pylint: disable=broad-except
438
527
  error_msg = str(e)
439
- logger.error(f"Evaluation failed: {e}")
528
+ logger.error("Evaluation failed: %s", e)
440
529
 
441
530
  # Provide helpful suggestions for common Hydra/config errors
442
531
  if "Cannot find primary config" in error_msg: