eval-protocol 0.2.64.dev2__tar.gz → 0.2.65__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (440) hide show
  1. {eval_protocol-0.2.64.dev2/eval_protocol.egg-info → eval_protocol-0.2.65}/PKG-INFO +1 -5
  2. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/__init__.py +4 -21
  3. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/_version.py +3 -3
  4. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/cli.py +71 -0
  5. eval_protocol-0.2.65/eval_protocol/cli_commands/create_rft.py +254 -0
  6. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/cli_commands/upload.py +18 -0
  7. eval_protocol-0.2.65/eval_protocol/fireworks_rft.py +218 -0
  8. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/proxy/proxy_core/app.py +0 -2
  9. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/proxy/proxy_core/litellm.py +0 -4
  10. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/pytest/default_mcp_gym_rollout_processor.py +25 -27
  11. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65/eval_protocol.egg-info}/PKG-INFO +1 -5
  12. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol.egg-info/SOURCES.txt +2 -0
  13. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol.egg-info/requires.txt +0 -5
  14. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/pyproject.toml +0 -6
  15. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/LICENSE +0 -0
  16. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/README.md +0 -0
  17. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/development/__init__.py +0 -0
  18. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/development/normalize_sandbox_fusion.py +0 -0
  19. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/development/utils/__init__.py +0 -0
  20. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/development/utils/generate_api_key.py +0 -0
  21. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/development/utils/subprocess_manager.py +0 -0
  22. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/__main__.py +0 -0
  23. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/adapters/__init__.py +0 -0
  24. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/adapters/base.py +0 -0
  25. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/adapters/bigquery.py +0 -0
  26. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/adapters/braintrust.py +0 -0
  27. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/adapters/fireworks_tracing.py +0 -0
  28. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/adapters/huggingface.py +0 -0
  29. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/adapters/langchain.py +0 -0
  30. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/adapters/langfuse.py +0 -0
  31. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/adapters/langsmith.py +0 -0
  32. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/adapters/openai_responses.py +0 -0
  33. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/adapters/trl.py +0 -0
  34. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/adapters/utils.py +0 -0
  35. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/adapters/weave.py +0 -0
  36. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/agent/__init__.py +0 -0
  37. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/agent/models.py +0 -0
  38. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/agent/orchestrator.py +0 -0
  39. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/agent/resource_abc.py +0 -0
  40. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/agent/resource_pool.py +0 -0
  41. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/agent/resources/__init__.py +0 -0
  42. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/agent/resources/bfcl_envs/__init__.py +0 -0
  43. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/agent/resources/bfcl_envs/gorilla_file_system.py +0 -0
  44. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/agent/resources/bfcl_envs/math_api.py +0 -0
  45. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/agent/resources/bfcl_envs/posting_api.py +0 -0
  46. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/agent/resources/bfcl_sim_api_resource.py +0 -0
  47. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/agent/resources/docker_resource.py +0 -0
  48. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/agent/resources/filesystem_resource.py +0 -0
  49. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/agent/resources/python_state_resource.py +0 -0
  50. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/agent/resources/sql_resource.py +0 -0
  51. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/agent/task_manager.py +0 -0
  52. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/agent/tool_registry.py +0 -0
  53. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/auth.py +0 -0
  54. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/benchmarks/__init__.py +0 -0
  55. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/benchmarks/data/airline_dataset.jsonl +0 -0
  56. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/benchmarks/data/retail_dataset.jsonl +0 -0
  57. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/benchmarks/test_aime25.py +0 -0
  58. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/benchmarks/test_frozen_lake.py +0 -0
  59. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/benchmarks/test_gpqa.py +0 -0
  60. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/benchmarks/test_livebench_data_analysis.py +0 -0
  61. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/benchmarks/test_tau_bench_airline.py +0 -0
  62. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/benchmarks/test_tau_bench_retail.py +0 -0
  63. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/cli_commands/__init__.py +0 -0
  64. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/cli_commands/agent_eval_cmd.py +0 -0
  65. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/cli_commands/common.py +0 -0
  66. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/cli_commands/deploy.py +0 -0
  67. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/cli_commands/deploy_mcp.py +0 -0
  68. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/cli_commands/logs.py +0 -0
  69. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/cli_commands/preview.py +0 -0
  70. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/cli_commands/run_eval_cmd.py +0 -0
  71. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/common_utils.py +0 -0
  72. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/config.py +0 -0
  73. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/data_loader/__init__.py +0 -0
  74. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/data_loader/dynamic_data_loader.py +0 -0
  75. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/data_loader/factory_data_loader.py +0 -0
  76. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/data_loader/inline_data_loader.py +0 -0
  77. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/data_loader/models.py +0 -0
  78. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/dataset_logger/__init__.py +0 -0
  79. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/dataset_logger/dataset_logger.py +0 -0
  80. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/dataset_logger/local_fs_dataset_logger_adapter.py +0 -0
  81. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/dataset_logger/sqlite_dataset_logger_adapter.py +0 -0
  82. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/dataset_logger/sqlite_evaluation_row_store.py +0 -0
  83. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/datasets/__init__.py +0 -0
  84. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/datasets/loader.py +0 -0
  85. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/directory_utils.py +0 -0
  86. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/evaluation.py +0 -0
  87. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/event_bus/__init__.py +0 -0
  88. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/event_bus/event_bus.py +0 -0
  89. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/event_bus/logger.py +0 -0
  90. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/event_bus/sqlite_event_bus.py +0 -0
  91. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/event_bus/sqlite_event_bus_database.py +0 -0
  92. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/execution/__init__.py +0 -0
  93. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/execution/pipeline.py +0 -0
  94. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/gcp_tools.py +0 -0
  95. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/generation/cache.py +0 -0
  96. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/generation/clients/base.py +0 -0
  97. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/generation/clients.py +0 -0
  98. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/generic_server.py +0 -0
  99. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/get_pep440_version.py +0 -0
  100. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/human_id/__init__.py +0 -0
  101. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/human_id/dictionary.py +0 -0
  102. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/integrations/__init__.py +0 -0
  103. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/integrations/deepeval.py +0 -0
  104. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/integrations/openeval.py +0 -0
  105. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/integrations/trl.py +0 -0
  106. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/log_utils/__init__.py +0 -0
  107. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/log_utils/elasticsearch_client.py +0 -0
  108. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/log_utils/elasticsearch_direct_http_handler.py +0 -0
  109. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/log_utils/elasticsearch_index_manager.py +0 -0
  110. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/log_utils/fireworks_tracing_http_handler.py +0 -0
  111. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/log_utils/init.py +0 -0
  112. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/log_utils/rollout_context.py +0 -0
  113. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/log_utils/rollout_id_filter.py +0 -0
  114. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/log_utils/util.py +0 -0
  115. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/logging_utils.py +0 -0
  116. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp/__init__.py +0 -0
  117. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp/adapter.py +0 -0
  118. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp/client/__init__.py +0 -0
  119. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp/client/connection.py +0 -0
  120. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp/clients.py +0 -0
  121. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp/execution/__init__.py +0 -0
  122. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp/execution/base_policy.py +0 -0
  123. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp/execution/manager.py +0 -0
  124. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp/execution/policy.py +0 -0
  125. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp/grid_renderer.py +0 -0
  126. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp/mcp_multi_client.py +0 -0
  127. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp/mcpgym.py +0 -0
  128. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp/process_manager.py +0 -0
  129. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp/session/__init__.py +0 -0
  130. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp/session/manager.py +0 -0
  131. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp/simple_process_manager.py +0 -0
  132. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp/simulation_server.py +0 -0
  133. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp_agent/__init__.py +0 -0
  134. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp_agent/config.py +0 -0
  135. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp_agent/main.py +0 -0
  136. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp_agent/orchestration/__init__.py +0 -0
  137. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp_agent/orchestration/base_client.py +0 -0
  138. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp_agent/orchestration/local_docker_client.py +0 -0
  139. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp_agent/orchestration/stdio_mcp_client_helper.py +0 -0
  140. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp_env.py +0 -0
  141. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp_servers/__init__.py +0 -0
  142. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp_servers/frozen_lake/frozen_lake_adapter.py +0 -0
  143. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp_servers/frozen_lake/frozen_lake_mcp.py +0 -0
  144. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp_servers/frozen_lake/server.py +0 -0
  145. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp_servers/tau2/README.md +0 -0
  146. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp_servers/tau2/__init__.py +0 -0
  147. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp_servers/tau2/airplane_environment/airline_environment.py +0 -0
  148. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp_servers/tau2/mock_environment/mock_environment.py +0 -0
  149. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp_servers/tau2/retail_environment/retail_environment.py +0 -0
  150. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp_servers/tau2/server.py +0 -0
  151. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp_servers/tau2/tau2_mcp.py +0 -0
  152. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp_servers/tau2/tests/system_prompts/airline_agent_system_prompt.md +0 -0
  153. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp_servers/tau2/tests/system_prompts/mock_agent_system_prompt.md +0 -0
  154. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp_servers/tau2/tests/system_prompts/retail_agent_system_prompt.md +0 -0
  155. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp_servers/tau2/tests/test_tau2_e2e.py +0 -0
  156. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/models.py +0 -0
  157. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/packaging.py +0 -0
  158. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/platform_api.py +0 -0
  159. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/playback_policy.py +0 -0
  160. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/proxy/__init__.py +0 -0
  161. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/proxy/proxy_core/__init__.py +0 -0
  162. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/proxy/proxy_core/auth.py +0 -0
  163. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/proxy/proxy_core/langfuse.py +0 -0
  164. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/proxy/proxy_core/main.py +0 -0
  165. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/proxy/proxy_core/models.py +0 -0
  166. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/proxy/proxy_core/redis_utils.py +0 -0
  167. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/pytest/__init__.py +0 -0
  168. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/pytest/default_agent_rollout_processor.py +0 -0
  169. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/pytest/default_dataset_adapter.py +0 -0
  170. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/pytest/default_langchain_rollout_processor.py +0 -0
  171. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/pytest/default_no_op_rollout_processor.py +0 -0
  172. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/pytest/default_pydantic_ai_rollout_processor.py +0 -0
  173. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/pytest/default_single_turn_rollout_process.py +0 -0
  174. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/pytest/dual_mode_wrapper.py +0 -0
  175. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/pytest/elasticsearch_setup.py +0 -0
  176. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/pytest/evaluation_test.py +0 -0
  177. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/pytest/evaluation_test_postprocess.py +0 -0
  178. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/pytest/evaluation_test_utils.py +0 -0
  179. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/pytest/exception_config.py +0 -0
  180. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/pytest/execution.py +0 -0
  181. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/pytest/generate_parameter_combinations.py +0 -0
  182. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/pytest/github_action_rollout_processor.py +0 -0
  183. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/pytest/handle_persist_flow.py +0 -0
  184. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/pytest/parameterize.py +0 -0
  185. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/pytest/plugin.py +0 -0
  186. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/pytest/remote_rollout_processor.py +0 -0
  187. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/pytest/rollout_processor.py +0 -0
  188. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/pytest/store_experiment_link.py +0 -0
  189. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/pytest/store_results_url.py +0 -0
  190. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/pytest/tracing_utils.py +0 -0
  191. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/pytest/types.py +0 -0
  192. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/pytest/validate_signature.py +0 -0
  193. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/quickstart/__init__.py +0 -0
  194. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/quickstart/aha_judge/__init__.py +0 -0
  195. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/quickstart/aha_judge/llm_judge.py +0 -0
  196. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/quickstart/aha_judge/llm_judge_braintrust.py +0 -0
  197. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/quickstart/aha_judge/llm_judge_langfuse.py +0 -0
  198. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/quickstart/aha_judge/llm_judge_langsmith.py +0 -0
  199. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/quickstart/aha_judge/llm_judge_openai_responses.py +0 -0
  200. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/quickstart/aha_judge/utils.py +0 -0
  201. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/quickstart/llm_judge.py +0 -0
  202. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/quickstart/llm_judge_braintrust.py +0 -0
  203. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/quickstart/svg_agent/vercel_svg_server/api/init.py +0 -0
  204. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/quickstart/utils.py +0 -0
  205. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/resources.py +0 -0
  206. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/reward_function.py +0 -0
  207. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/rewards/__init__.py +0 -0
  208. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/rewards/accuracy.py +0 -0
  209. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/rewards/accuracy_length.py +0 -0
  210. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/rewards/apps_coding_reward.py +0 -0
  211. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/rewards/apps_execution_utils.py +0 -0
  212. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/rewards/apps_testing_util.py +0 -0
  213. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/rewards/bfcl_reward.py +0 -0
  214. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/rewards/code_execution.py +0 -0
  215. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/rewards/code_execution_utils.py +0 -0
  216. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/rewards/cpp_code.py +0 -0
  217. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/rewards/deepcoder_reward.py +0 -0
  218. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/rewards/format.py +0 -0
  219. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/rewards/function_calling.py +0 -0
  220. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/rewards/json_schema.py +0 -0
  221. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/rewards/language_consistency.py +0 -0
  222. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/rewards/lean_prover.py +0 -0
  223. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/rewards/length.py +0 -0
  224. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/rewards/list_comparison_math_reward.py +0 -0
  225. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/rewards/math.py +0 -0
  226. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/rewards/multiple_choice_math_reward.py +0 -0
  227. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/rewards/reasoning_steps.py +0 -0
  228. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/rewards/repetition.py +0 -0
  229. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/rewards/tag_count.py +0 -0
  230. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/rl_processing.py +0 -0
  231. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/server.py +0 -0
  232. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/stats/__init__.py +0 -0
  233. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/stats/confidence_intervals.py +0 -0
  234. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/typed_interface.py +0 -0
  235. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/types/__init__.py +0 -0
  236. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/types/errors.py +0 -0
  237. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/types/remote_rollout_processor.py +0 -0
  238. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/types/types.py +0 -0
  239. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/utils/__init__.py +0 -0
  240. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/utils/batch_evaluation.py +0 -0
  241. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/utils/batch_transformation.py +0 -0
  242. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/utils/browser_utils.py +0 -0
  243. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/utils/check_server_status.py +0 -0
  244. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/utils/dataset_helpers.py +0 -0
  245. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/utils/evaluation_row_utils.py +0 -0
  246. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/utils/logs_models.py +0 -0
  247. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/utils/logs_server.py +0 -0
  248. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/utils/module_loader.py +0 -0
  249. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/utils/packaging_utils.py +0 -0
  250. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/utils/show_results_url.py +0 -0
  251. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/utils/static_policy.py +0 -0
  252. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/utils/subprocess_utils.py +0 -0
  253. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/utils/vite_server.py +0 -0
  254. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol.egg-info/dependency_links.txt +0 -0
  255. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol.egg-info/entry_points.txt +0 -0
  256. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol.egg-info/top_level.txt +0 -0
  257. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/setup.cfg +0 -0
  258. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/setup.py +0 -0
  259. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_accuracy.py +0 -0
  260. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_accuracy_length.py +0 -0
  261. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_adapters_e2e.py +0 -0
  262. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_agent_orchestrator.py +0 -0
  263. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_agent_resources.py +0 -0
  264. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_auth.py +0 -0
  265. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_batch_evaluation.py +0 -0
  266. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_cli.py +0 -0
  267. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_cli_agent.py +0 -0
  268. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_cli_args.py +0 -0
  269. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_code_execution.py +0 -0
  270. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_config.py +0 -0
  271. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_control_plane_separation.py +0 -0
  272. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_cpp_code.py +0 -0
  273. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_data_driven_task_manager.py +0 -0
  274. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_deepcoder_reward.py +0 -0
  275. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_deepeval_integration.py +0 -0
  276. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_deploy_integration.py +0 -0
  277. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_directory_utils.py +0 -0
  278. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_e2b_integration.py +0 -0
  279. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_e2b_js_integration.py +0 -0
  280. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_edge_cases.py +0 -0
  281. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_ep_upload_e2e.py +0 -0
  282. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_eval_protocol_import.py +0 -0
  283. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_evaluation.py +0 -0
  284. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_evaluation_integration.py +0 -0
  285. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_evaluation_postprocess.py +0 -0
  286. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_evaluation_preview_integration.py +0 -0
  287. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_event_bus.py +0 -0
  288. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_event_bus_helper.py +0 -0
  289. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_examples_end_to_end.py +0 -0
  290. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_fireworks_api.py +0 -0
  291. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_format.py +0 -0
  292. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_fractional_code.py +0 -0
  293. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_function_calling.py +0 -0
  294. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_gcp_tools.py +0 -0
  295. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_generic_server.py +0 -0
  296. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_human_id.py +0 -0
  297. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_integration.py +0 -0
  298. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_json_schema.py +0 -0
  299. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_kwargs_validation.py +0 -0
  300. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_language_consistency.py +0 -0
  301. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_lean_prover.py +0 -0
  302. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_lean_prover_runner.py +0 -0
  303. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_length.py +0 -0
  304. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_list_comparison_math_reward.py +0 -0
  305. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_logs_server.py +0 -0
  306. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_logs_server_simple.py +0 -0
  307. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_math.py +0 -0
  308. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_minimal.py +0 -0
  309. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_models.py +0 -0
  310. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_models_rl.py +0 -0
  311. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_multiple_choice_math_reward.py +0 -0
  312. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_n_variant_batch_integration.py +0 -0
  313. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_n_variant_integration.py +0 -0
  314. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_openai_compatibility.py +0 -0
  315. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_openeval_integration.py +0 -0
  316. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_packaging.py +0 -0
  317. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_parallel_rollouts.py +0 -0
  318. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_platform_api.py +0 -0
  319. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_quickstart_utils.py +0 -0
  320. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_readiness.py +0 -0
  321. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_reasoning_steps.py +0 -0
  322. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_repetition.py +0 -0
  323. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_repetition_debug.py +0 -0
  324. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_retry_mechanism.py +0 -0
  325. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_reward_function.py +0 -0
  326. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_reward_protocol_import.py +0 -0
  327. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_rl_processing.py +0 -0
  328. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_rollout_control_plane_integration.py +0 -0
  329. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_server.py +0 -0
  330. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_show_results_url.py +0 -0
  331. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_status_migration_changes.py +0 -0
  332. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_status_migration_integration.py +0 -0
  333. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_status_model.py +0 -0
  334. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_tag_count.py +0 -0
  335. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_tau_bench_airline_smoke.py +0 -0
  336. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_typed_interface.py +0 -0
  337. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_typed_interface_rl.py +0 -0
  338. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_upload_entrypoint.py +0 -0
  339. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_url_handling.py +0 -0
  340. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_vite_server.py +0 -0
  341. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/__init__.py +0 -0
  342. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/agent/__init__.py +0 -0
  343. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/agent/base.py +0 -0
  344. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/agent/llm_agent.py +0 -0
  345. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/api_service/__init__.py +0 -0
  346. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/api_service/api_config.py +0 -0
  347. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/api_service/data_model.py +0 -0
  348. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/api_service/simulation_service.py +0 -0
  349. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/cli.py +0 -0
  350. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/config.py +0 -0
  351. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/data/domains/airline/policy.md +0 -0
  352. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/data/domains/mock/policy.md +0 -0
  353. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/data/domains/mock/policy_solo.md +0 -0
  354. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/data/domains/retail/policy.md +0 -0
  355. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/data/domains/telecom/main_policy.md +0 -0
  356. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/data/domains/telecom/main_policy_solo.md +0 -0
  357. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/data/domains/telecom/tech_support_manual.md +0 -0
  358. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/data/domains/telecom/tech_support_workflow.md +0 -0
  359. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/data/domains/telecom/tech_support_workflow_solo.md +0 -0
  360. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/data/user_simulator/simulation_guidelines.md +0 -0
  361. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/data/user_simulator/simulation_guidelines_tools.md +0 -0
  362. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/data_model/__init__.py +0 -0
  363. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/data_model/message.py +0 -0
  364. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/data_model/simulation.py +0 -0
  365. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/data_model/tasks.py +0 -0
  366. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/domains/__init__.py +0 -0
  367. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/domains/airline/__init__.py +0 -0
  368. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/domains/airline/data_model.py +0 -0
  369. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/domains/airline/environment.py +0 -0
  370. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/domains/airline/tools.py +0 -0
  371. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/domains/airline/utils.py +0 -0
  372. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/domains/mock/__init__.py +0 -0
  373. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/domains/mock/data_model.py +0 -0
  374. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/domains/mock/environment.py +0 -0
  375. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/domains/mock/tools.py +0 -0
  376. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/domains/mock/utils.py +0 -0
  377. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/domains/retail/__init__.py +0 -0
  378. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/domains/retail/data_model.py +0 -0
  379. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/domains/retail/environment.py +0 -0
  380. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/domains/retail/tools.py +0 -0
  381. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/domains/retail/utils.py +0 -0
  382. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/domains/telecom/__init__.py +0 -0
  383. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/domains/telecom/data_model.py +0 -0
  384. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/domains/telecom/environment.py +0 -0
  385. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/domains/telecom/tasks/__init__.py +0 -0
  386. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/domains/telecom/tasks/const.py +0 -0
  387. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/domains/telecom/tasks/create_tasks.py +0 -0
  388. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/domains/telecom/tasks/manager.py +0 -0
  389. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/domains/telecom/tasks/mms_issues.py +0 -0
  390. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/domains/telecom/tasks/mobile_data_issues.py +0 -0
  391. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/domains/telecom/tasks/service_issues.py +0 -0
  392. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/domains/telecom/tasks/utils.py +0 -0
  393. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/domains/telecom/tools.py +0 -0
  394. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/domains/telecom/user_data_model.py +0 -0
  395. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/domains/telecom/user_tools.py +0 -0
  396. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/domains/telecom/utils.py +0 -0
  397. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/environment/__init__.py +0 -0
  398. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/environment/db.py +0 -0
  399. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/environment/environment.py +0 -0
  400. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/environment/server.py +0 -0
  401. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/environment/tool.py +0 -0
  402. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/environment/toolkit.py +0 -0
  403. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/environment/utils/interface_agent.py +0 -0
  404. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/evaluator/__init__.py +0 -0
  405. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/evaluator/evaluator.py +0 -0
  406. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/evaluator/evaluator_action.py +0 -0
  407. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/evaluator/evaluator_base.py +0 -0
  408. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/evaluator/evaluator_communicate.py +0 -0
  409. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/evaluator/evaluator_env.py +0 -0
  410. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/evaluator/evaluator_nl_assertions.py +0 -0
  411. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/metrics/__init__.py +0 -0
  412. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/metrics/agent_metrics.py +0 -0
  413. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/metrics/break_down_metrics.py +0 -0
  414. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/orchestrator/__init__.py +0 -0
  415. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/orchestrator/environment_manager.py +0 -0
  416. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/orchestrator/orchestrator.py +0 -0
  417. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/orchestrator/utils.py +0 -0
  418. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/registry.py +0 -0
  419. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/run.py +0 -0
  420. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/scripts/__init__.py +0 -0
  421. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/scripts/check_data.py +0 -0
  422. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/scripts/show_domain_doc.py +0 -0
  423. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/scripts/start_servers.py +0 -0
  424. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/scripts/view_simulations.py +0 -0
  425. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/user/__init__.py +0 -0
  426. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/user/base.py +0 -0
  427. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/user/user_simulator.py +0 -0
  428. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/utils/__init__.py +0 -0
  429. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/utils/display.py +0 -0
  430. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/utils/io_utils.py +0 -0
  431. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/utils/llm_utils.py +0 -0
  432. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/utils/pydantic_utils.py +0 -0
  433. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/utils/utils.py +0 -0
  434. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/versioneer.py +0 -0
  435. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vite-app/dist/assets/favicon-BkAAWQga.png +0 -0
  436. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vite-app/dist/assets/index-BnDJont9.css +0 -0
  437. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vite-app/dist/assets/index-Cu9t0G5i.js +0 -0
  438. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vite-app/dist/assets/index-Cu9t0G5i.js.map +0 -0
  439. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vite-app/dist/assets/logo-light-BprIBJQW.png +0 -0
  440. {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vite-app/dist/index.html +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: eval-protocol
3
- Version: 0.2.64.dev2
3
+ Version: 0.2.65
4
4
  Summary: The official Python SDK for Eval Protocol (EP.) EP is an open protocol that standardizes how developers author evals for large language model (LLM) applications.
5
5
  Author-email: Fireworks AI <info@fireworks.ai>
6
6
  License-Expression: MIT
@@ -107,10 +107,6 @@ Provides-Extra: langgraph-tools
107
107
  Requires-Dist: langgraph>=0.6.7; extra == "langgraph-tools"
108
108
  Requires-Dist: langchain>=0.3.0; extra == "langgraph-tools"
109
109
  Requires-Dist: langchain-fireworks>=0.3.0; extra == "langgraph-tools"
110
- Provides-Extra: proxy
111
- Requires-Dist: redis>=5.0.0; extra == "proxy"
112
- Requires-Dist: langfuse>=2.0.0; extra == "proxy"
113
- Requires-Dist: uuid6>=2025.0.0; extra == "proxy"
114
110
  Dynamic: license-file
115
111
 
116
112
  # Eval Protocol (EP)
@@ -79,28 +79,11 @@ except ImportError:
79
79
  WeaveAdapter = None
80
80
 
81
81
  try:
82
- from .proxy import create_app, AuthProvider, AccountInfo # pyright: ignore[reportAssignmentType]
82
+ from .proxy import create_app, AuthProvider, AccountInfo
83
83
  except ImportError:
84
-
85
- def create_app(*args, **kwargs):
86
- raise ImportError(
87
- "Proxy functionality requires additional dependencies. "
88
- "Please install with: pip install eval-protocol[proxy]"
89
- )
90
-
91
- class AuthProvider:
92
- def __init__(self, *args, **kwargs):
93
- raise ImportError(
94
- "Proxy functionality requires additional dependencies. "
95
- "Please install with: pip install eval-protocol[proxy]"
96
- )
97
-
98
- class AccountInfo:
99
- def __init__(self, *args, **kwargs):
100
- raise ImportError(
101
- "Proxy functionality requires additional dependencies. "
102
- "Please install with: pip install eval-protocol[proxy]"
103
- )
84
+ create_app = None
85
+ AuthProvider = None
86
+ AccountInfo = None
104
87
 
105
88
 
106
89
  warnings.filterwarnings("default", category=DeprecationWarning, module="eval_protocol")
@@ -8,11 +8,11 @@ import json
8
8
 
9
9
  version_json = '''
10
10
  {
11
- "date": "2025-10-27T10:41:48-0700",
11
+ "date": "2025-10-27T18:42:49-0700",
12
12
  "dirty": false,
13
13
  "error": null,
14
- "full-revisionid": "fb1b9a21a32a921cb3af8948f769fdd45148e41a",
15
- "version": "0.2.64-dev2"
14
+ "full-revisionid": "bc7fee952c3a4d4285245a83af0401e25eeb59d8",
15
+ "version": "0.2.65"
16
16
  }
17
17
  ''' # END VERSION_JSON
18
18
 
@@ -356,6 +356,70 @@ def parse_args(args=None):
356
356
  help="Non-interactive: upload all discovered evaluation tests",
357
357
  )
358
358
 
359
+ # Create command group
360
+ create_parser = subparsers.add_parser(
361
+ "create",
362
+ help="Resource creation commands",
363
+ )
364
+ create_subparsers = create_parser.add_subparsers(dest="create_command")
365
+ rft_parser = create_subparsers.add_parser(
366
+ "rft",
367
+ help="Create a Reinforcement Fine-tuning Job on Fireworks",
368
+ )
369
+ rft_parser.add_argument(
370
+ "--evaluator-id",
371
+ help="Evaluator ID used during upload; if omitted, derive from local traces or a single discovered test",
372
+ )
373
+ # Dataset options
374
+ rft_parser.add_argument(
375
+ "--dataset-id",
376
+ help="Use existing Fireworks dataset id (skip local materialization)",
377
+ )
378
+ rft_parser.add_argument(
379
+ "--dataset-jsonl",
380
+ help="Path to JSONL to upload as a new Fireworks dataset",
381
+ )
382
+ rft_parser.add_argument(
383
+ "--dataset-builder",
384
+ help="Explicit dataset builder spec (module::function or path::function)",
385
+ )
386
+ rft_parser.add_argument(
387
+ "--dataset-display-name",
388
+ help="Display name for dataset on Fireworks (defaults to dataset id)",
389
+ )
390
+ # Training config and evaluator/job settings
391
+ rft_parser.add_argument("--base-model", help="Base model resource id")
392
+ rft_parser.add_argument("--warm-start-from", help="Addon model to warm start from")
393
+ rft_parser.add_argument("--output-model", help="Output model id (defaults from evaluator)")
394
+ rft_parser.add_argument("--epochs", type=int)
395
+ rft_parser.add_argument("--batch-size", type=int)
396
+ rft_parser.add_argument("--learning-rate", type=float)
397
+ rft_parser.add_argument("--max-context-length", type=int)
398
+ rft_parser.add_argument("--lora-rank", type=int)
399
+ rft_parser.add_argument("--accelerator-count", type=int)
400
+ rft_parser.add_argument("--region", help="Fireworks region enum value")
401
+ rft_parser.add_argument("--display-name", help="RFT job display name")
402
+ rft_parser.add_argument("--evaluation-dataset", help="Optional separate eval dataset id")
403
+ rft_parser.add_argument("--eval-auto-carveout", dest="eval_auto_carveout", action="store_true", default=True)
404
+ rft_parser.add_argument("--no-eval-auto-carveout", dest="eval_auto_carveout", action="store_false")
405
+ # Inference params
406
+ rft_parser.add_argument("--temperature", type=float)
407
+ rft_parser.add_argument("--top-p", type=float)
408
+ rft_parser.add_argument("--top-k", type=int)
409
+ rft_parser.add_argument("--max-tokens", type=int)
410
+ rft_parser.add_argument("--n", type=int)
411
+ rft_parser.add_argument("--inference-extra-body", help="JSON string for extra inference params")
412
+ # Wandb
413
+ rft_parser.add_argument("--wandb-enabled", action="store_true")
414
+ rft_parser.add_argument("--wandb-project")
415
+ rft_parser.add_argument("--wandb-entity")
416
+ rft_parser.add_argument("--wandb-run-id")
417
+ rft_parser.add_argument("--wandb-api-key")
418
+ # Misc
419
+ rft_parser.add_argument("--rft-job-id", help="Specify an explicit RFT job id")
420
+ rft_parser.add_argument("--yes", "-y", action="store_true", help="Non-interactive mode")
421
+ rft_parser.add_argument("--dry-run", action="store_true", help="Print planned REST calls without sending")
422
+
359
423
  # Run command (for Hydra-based evaluations)
360
424
  # This subparser intentionally defines no arguments itself.
361
425
  # All arguments after 'run' will be passed to Hydra by parse_known_args.
@@ -481,6 +545,13 @@ def main():
481
545
  from .cli_commands.upload import upload_command
482
546
 
483
547
  return upload_command(args)
548
+ elif args.command == "create":
549
+ if args.create_command == "rft":
550
+ from .cli_commands.create_rft import create_rft_command
551
+
552
+ return create_rft_command(args)
553
+ print("Error: missing subcommand for 'create'. Try: eval-protocol create rft")
554
+ return 1
484
555
  elif args.command == "run":
485
556
  # For the 'run' command, Hydra takes over argument parsing.
486
557
 
@@ -0,0 +1,254 @@
1
+ import json
2
+ import os
3
+ import sys
4
+ from typing import Any, Dict, Optional
5
+
6
+ from ..auth import (
7
+ get_fireworks_account_id,
8
+ get_fireworks_api_base,
9
+ get_fireworks_api_key,
10
+ verify_api_key_and_get_account_id,
11
+ )
12
+ from ..fireworks_rft import (
13
+ _map_api_host_to_app_host,
14
+ build_default_dataset_id,
15
+ build_default_output_model,
16
+ create_dataset_from_jsonl,
17
+ create_reinforcement_fine_tuning_job,
18
+ detect_dataset_builder,
19
+ load_evaluator_trace,
20
+ materialize_dataset_via_builder,
21
+ )
22
+ from .upload import _discover_tests, _normalize_evaluator_id, _resolve_entry_to_qual_and_source
23
+
24
+
25
+ def _ensure_account_id() -> Optional[str]:
26
+ account_id = get_fireworks_account_id()
27
+ api_key = get_fireworks_api_key()
28
+ if not account_id and api_key:
29
+ resolved = verify_api_key_and_get_account_id(api_key=api_key, api_base=get_fireworks_api_base())
30
+ if resolved:
31
+ os.environ["FIREWORKS_ACCOUNT_ID"] = resolved
32
+ return resolved
33
+ return account_id
34
+
35
+
36
+ def _extract_terminal_segment(resource_name: str) -> str:
37
+ """Return the last path segment if a fully-qualified resource name is provided."""
38
+ try:
39
+ return resource_name.strip("/").split("/")[-1]
40
+ except Exception:
41
+ return resource_name
42
+
43
+
44
+ def _print_links(evaluator_id: str, dataset_id: str, job_name: Optional[str]) -> None:
45
+ api_base = get_fireworks_api_base()
46
+ app_base = _map_api_host_to_app_host(api_base)
47
+ print("\n📊 Dashboard Links:")
48
+ evaluator_slug = _extract_terminal_segment(evaluator_id)
49
+ print(f" Evaluator: {app_base}/dashboard/evaluators/{evaluator_slug}")
50
+ if dataset_id:
51
+ print(f" Dataset: {app_base}/dashboard/datasets/{dataset_id}")
52
+ if job_name:
53
+ # job_name likely like accounts/{account}/reinforcementFineTuningJobs/{id}
54
+ try:
55
+ job_id = job_name.strip().split("/")[-1]
56
+ print(f" RFT Job: {app_base}/dashboard/fine-tuning/reinforcement/{job_id}")
57
+ except Exception:
58
+ pass
59
+
60
+
61
+ def _auto_select_evaluator_id(cwd: str) -> Optional[str]:
62
+ # Try local traces
63
+ traces_dir = os.path.join(cwd, ".eval_protocol", "evaluators")
64
+ if os.path.isdir(traces_dir):
65
+ candidates = [f[:-5] for f in os.listdir(traces_dir) if f.endswith(".json")]
66
+ if len(candidates) == 1:
67
+ return candidates[0]
68
+ # Fall back to discovering a single evaluation_test
69
+ tests = _discover_tests(cwd)
70
+ if len(tests) == 1:
71
+ qualname, source_file_path = tests[0].qualname, tests[0].file_path
72
+ test_func_name = qualname.split(".")[-1]
73
+ source_file_name = os.path.splitext(os.path.basename(source_file_path))[0]
74
+ evaluator_id = _normalize_evaluator_id(f"{source_file_name}-{test_func_name}")
75
+ return evaluator_id
76
+ return None
77
+
78
+
79
+ def create_rft_command(args) -> int:
80
+ evaluator_id: Optional[str] = getattr(args, "evaluator_id", None)
81
+ non_interactive: bool = bool(getattr(args, "yes", False))
82
+ dry_run: bool = bool(getattr(args, "dry_run", False))
83
+
84
+ api_key = get_fireworks_api_key()
85
+ if not api_key:
86
+ print("Error: FIREWORKS_API_KEY not set.")
87
+ return 1
88
+
89
+ account_id = _ensure_account_id()
90
+ if not account_id:
91
+ print("Error: FIREWORKS_ACCOUNT_ID not set and could not be resolved.")
92
+ return 1
93
+
94
+ api_base = get_fireworks_api_base()
95
+
96
+ # Resolve evaluator id if omitted
97
+ project_root = os.getcwd()
98
+ if not evaluator_id:
99
+ evaluator_id = _auto_select_evaluator_id(project_root)
100
+ if not evaluator_id:
101
+ print("Error: Could not infer evaluator id. Provide --evaluator-id or run 'eval-protocol upload' first.")
102
+ return 1
103
+
104
+ # Resolve evaluator resource name via local trace
105
+ # trace = load_evaluator_trace(project_root, evaluator_id)
106
+ # if not trace or not isinstance(trace, dict):
107
+ # print(
108
+ # "Error: Evaluator trace not found. Run 'eval-protocol upload' first or provide --dataset-id/--dataset-jsonl and --evaluator-id."
109
+ # )
110
+ # return 1
111
+ # evaluator_resource_name = trace.get("evaluator_resource_name") or trace.get("name") or evaluator_id
112
+ evaluator_resource_name = evaluator_id
113
+
114
+ # Determine dataset id and materialization path
115
+ dataset_id = getattr(args, "dataset_id", None)
116
+ dataset_jsonl = getattr(args, "dataset_jsonl", None)
117
+ dataset_display_name = getattr(args, "dataset_display_name", None)
118
+ dataset_builder = getattr(args, "dataset_builder", None)
119
+
120
+ if not dataset_id:
121
+ # Try builder from args, else from trace detection
122
+ # TODO: build dataset from traces directly
123
+ # builder_spec = dataset_builder or trace.get("dataset_builder")
124
+ # if not builder_spec:
125
+ # # Attempt detect from metric_dir
126
+ # metric_dir = trace.get("metric_dir")
127
+ # if metric_dir:
128
+ # builder_spec = detect_dataset_builder(metric_dir)
129
+ # if not builder_spec:
130
+ # print(
131
+ # "Error: Could not determine dataset. Provide --dataset-id, --dataset-jsonl, or --dataset-builder."
132
+ # )
133
+ # return 1
134
+ # try:
135
+ # dataset_jsonl, count = materialize_dataset_via_builder(builder_spec)
136
+ # print(f"✓ Materialized dataset via builder ({builder_spec}): {count} rows → {dataset_jsonl}")
137
+ # except Exception as e:
138
+ # print(f"Error: dataset builder failed: {e}")
139
+ # return 1
140
+
141
+ if not dataset_jsonl:
142
+ print("Error: Could not determine dataset. Provide --dataset-id or --dataset-jsonl.")
143
+ return 1
144
+
145
+ inferred_dataset_id = build_default_dataset_id(evaluator_id)
146
+ if dry_run:
147
+ print("--dry-run: would create dataset and upload JSONL")
148
+ dataset_id = inferred_dataset_id
149
+ else:
150
+ try:
151
+ dataset_id, _ = create_dataset_from_jsonl(
152
+ account_id=account_id,
153
+ api_key=api_key,
154
+ api_base=api_base,
155
+ dataset_id=inferred_dataset_id,
156
+ display_name=dataset_display_name or inferred_dataset_id,
157
+ jsonl_path=dataset_jsonl,
158
+ )
159
+ print(f"✓ Created and uploaded dataset: {dataset_id}")
160
+ except Exception as e:
161
+ print(f"Error creating/uploading dataset: {e}")
162
+ return 1
163
+
164
+ # Build training config/body
165
+ training_config: Dict[str, Any] = {}
166
+ if getattr(args, "base_model", None):
167
+ training_config["baseModel"] = args.base_model
168
+ if getattr(args, "warm_start_from", None):
169
+ training_config["warmStartFrom"] = args.warm_start_from
170
+ if "baseModel" not in training_config and "warmStartFrom" not in training_config:
171
+ # Provide a conservative default if neither is set
172
+ training_config["baseModel"] = "accounts/fireworks/models/llama-v3p1-8b-instruct"
173
+
174
+ # Optional hyperparameters
175
+ for key, arg_name in [
176
+ ("epochs", "epochs"),
177
+ ("batchSize", "batch_size"),
178
+ ("learningRate", "learning_rate"),
179
+ ("maxContextLength", "max_context_length"),
180
+ ("loraRank", "lora_rank"),
181
+ ("acceleratorCount", "accelerator_count"),
182
+ ("region", "region"),
183
+ ]:
184
+ val = getattr(args, arg_name, None)
185
+ if val is not None:
186
+ training_config[key] = val
187
+
188
+ inference_params: Dict[str, Any] = {}
189
+ for key, arg_name in [
190
+ ("temperature", "temperature"),
191
+ ("topP", "top_p"),
192
+ ("topK", "top_k"),
193
+ ("maxTokens", "max_tokens"),
194
+ ("n", "n"),
195
+ ]:
196
+ val = getattr(args, arg_name, None)
197
+ if val is not None:
198
+ inference_params[key] = val
199
+ if getattr(args, "inference_extra_body", None):
200
+ inference_params["extraBody"] = args.inference_extra_body
201
+
202
+ wandb_config: Optional[Dict[str, Any]] = None
203
+ if getattr(args, "wandb_enabled", False):
204
+ wandb_config = {
205
+ "enabled": True,
206
+ "apiKey": getattr(args, "wandb_api_key", None),
207
+ "project": getattr(args, "wandb_project", None),
208
+ "entity": getattr(args, "wandb_entity", None),
209
+ "runId": getattr(args, "wandb_run_id", None),
210
+ }
211
+
212
+ body: Dict[str, Any] = {
213
+ # "displayName": getattr(args, "display_name", None) or f"{evaluator_id}-rft",
214
+ "dataset": f"accounts/{account_id}/datasets/{dataset_id}",
215
+ "evaluator": evaluator_resource_name,
216
+ "evalAutoCarveout": bool(getattr(args, "eval_auto_carveout", True)),
217
+ "trainingConfig": training_config,
218
+ "inferenceParameters": inference_params or None,
219
+ "wandbConfig": wandb_config,
220
+ "outputStats": None,
221
+ "outputMetrics": None,
222
+ "mcpServer": None,
223
+ }
224
+ print("Show body:")
225
+ print(json.dumps(body, indent=2))
226
+ if getattr(args, "evaluation_dataset", None):
227
+ body["evaluationDataset"] = args.evaluation_dataset
228
+ if getattr(args, "output_model", None):
229
+ body.setdefault("trainingConfig", {})["outputModel"] = f"accounts/{account_id}/models/{args.output_model}"
230
+ else:
231
+ body.setdefault("trainingConfig", {})["outputModel"] = build_default_output_model(evaluator_id)
232
+
233
+ # Clean None fields to avoid noisy payloads
234
+ body = {k: v for k, v in body.items() if v is not None}
235
+
236
+ if dry_run:
237
+ print("--dry-run: would create RFT job with body:")
238
+ print(json.dumps(body, indent=2))
239
+ _print_links(evaluator_id, dataset_id, None)
240
+ return 0
241
+
242
+ try:
243
+ result = create_reinforcement_fine_tuning_job(
244
+ account_id=account_id, api_key=api_key, api_base=api_base, body=body
245
+ )
246
+ job_name = result.get("name") if isinstance(result, dict) else None
247
+ print("\n✅ Created Reinforcement Fine-tuning Job")
248
+ if job_name:
249
+ print(f" name: {job_name}")
250
+ _print_links(evaluator_id, dataset_id, job_name)
251
+ return 0
252
+ except Exception as e:
253
+ print(f"Error creating RFT job: {e}")
254
+ return 1
@@ -21,6 +21,7 @@ from eval_protocol.auth import (
21
21
  from eval_protocol.platform_api import create_or_update_fireworks_secret
22
22
 
23
23
  from eval_protocol.evaluation import create_evaluation
24
+ from eval_protocol.fireworks_rft import save_evaluator_trace, detect_dataset_builder
24
25
 
25
26
 
26
27
  @dataclass
@@ -666,6 +667,23 @@ def upload_command(args: argparse.Namespace) -> int:
666
667
  )
667
668
  name = result.get("name", evaluator_id) if isinstance(result, dict) else evaluator_id
668
669
 
670
+ # Persist local evaluator trace for later `create rft`
671
+ try:
672
+ metric_dir = os.path.dirname(source_file_path) if source_file_path else root
673
+ builder_spec = detect_dataset_builder(metric_dir) or None
674
+ trace_payload = {
675
+ "evaluator_id": evaluator_id,
676
+ "evaluator_resource_name": name,
677
+ "entry_point": entry_point,
678
+ "metric_dir": metric_dir,
679
+ "project_root": root,
680
+ "dataset_builder": builder_spec,
681
+ }
682
+ save_evaluator_trace(project_root=root, evaluator_id=evaluator_id, trace=trace_payload)
683
+ except Exception:
684
+ # Non-fatal; continue
685
+ pass
686
+
669
687
  # Print success message with Fireworks dashboard link
670
688
  print(f"\n✅ Successfully uploaded evaluator: {evaluator_id}")
671
689
  print("📊 View in Fireworks Dashboard:")
@@ -0,0 +1,218 @@
1
+ import importlib.util
2
+ import io
3
+ import json
4
+ import os
5
+ import sys
6
+ import tempfile
7
+ import time
8
+ from pathlib import Path
9
+ from typing import Any, Callable, Dict, Iterable, Optional, Tuple
10
+
11
+ import requests
12
+
13
+ from .auth import get_fireworks_account_id, get_fireworks_api_base, get_fireworks_api_key
14
+
15
+
16
+ def _map_api_host_to_app_host(api_base: str) -> str:
17
+ try:
18
+ from urllib.parse import urlparse
19
+
20
+ parsed = urlparse(api_base)
21
+ host = parsed.netloc or parsed.path
22
+ if host.startswith("dev.api.fireworks.ai"):
23
+ return f"{parsed.scheme or 'https'}://dev.fireworks.ai"
24
+ if host.startswith("api."):
25
+ return f"{parsed.scheme or 'https'}://{host.replace('api.', 'app.', 1)}"
26
+ return f"{parsed.scheme or 'https'}://{host}"
27
+ except Exception:
28
+ return "https://app.fireworks.ai"
29
+
30
+
31
+ def load_evaluator_trace(project_root: str, evaluator_id: str) -> Optional[Dict[str, Any]]:
32
+ trace_path = Path(project_root) / ".eval_protocol" / "evaluators" / f"{evaluator_id}.json"
33
+ if not trace_path.exists():
34
+ return None
35
+ try:
36
+ with open(trace_path, "r", encoding="utf-8") as f:
37
+ return json.load(f)
38
+ except Exception:
39
+ return None
40
+
41
+
42
+ def save_evaluator_trace(project_root: str, evaluator_id: str, trace: Dict[str, Any]) -> None:
43
+ base_dir = Path(project_root) / ".eval_protocol" / "evaluators"
44
+ base_dir.mkdir(parents=True, exist_ok=True)
45
+ trace_path = base_dir / f"{evaluator_id}.json"
46
+ with open(trace_path, "w", encoding="utf-8") as f:
47
+ json.dump(trace, f, indent=2, ensure_ascii=False)
48
+
49
+
50
+ def detect_dataset_builder(metric_dir: str) -> Optional[str]:
51
+ """
52
+ Best-effort scan for a dataset builder callable inside the metric directory.
53
+ Returns a builder spec string in the form "path/to/module.py::function" if found.
54
+ """
55
+ try:
56
+ candidates: list[Tuple[str, str]] = []
57
+ for root, _, files in os.walk(metric_dir):
58
+ for name in files:
59
+ if not name.endswith(".py"):
60
+ continue
61
+ file_path = os.path.join(root, name)
62
+ # Load module via file location
63
+ module_name = Path(file_path).stem
64
+ spec = importlib.util.spec_from_file_location(module_name, file_path)
65
+ if not spec or not spec.loader:
66
+ continue
67
+ module = importlib.util.module_from_spec(spec)
68
+ try:
69
+ sys.modules[spec.name] = module
70
+ spec.loader.exec_module(module) # type: ignore[attr-defined]
71
+ except Exception:
72
+ continue
73
+ # Common exported symbol names
74
+ symbol_names = [
75
+ "build_training_dataset",
76
+ "get_training_dataset",
77
+ "get_dataset",
78
+ "dataset",
79
+ "DATASET_BUILDER",
80
+ ]
81
+ for symbol in symbol_names:
82
+ if hasattr(module, symbol):
83
+ candidates.append((file_path, symbol))
84
+ if not candidates:
85
+ return None
86
+ # Prefer build_training_dataset then get_training_dataset, else first
87
+ preference = {
88
+ "build_training_dataset": 0,
89
+ "get_training_dataset": 1,
90
+ "get_dataset": 2,
91
+ "dataset": 3,
92
+ "DATASET_BUILDER": 4,
93
+ }
94
+ candidates.sort(key=lambda x: preference.get(x[1], 99))
95
+ best_file, best_symbol = candidates[0]
96
+ return f"{best_file}::{best_symbol}"
97
+ except Exception:
98
+ return None
99
+
100
+
101
+ def _import_builder(builder_spec: str) -> Callable[[], Iterable[Dict[str, Any]]]:
102
+ target, func = builder_spec.split("::", 1)
103
+ # If target looks like a path, load from file
104
+ if "/" in target or target.endswith(".py") or os.path.exists(target):
105
+ file_path = target if target.endswith(".py") else f"{target}.py"
106
+ if not os.path.isfile(file_path):
107
+ raise ValueError(f"Builder file not found: {file_path}")
108
+ module_name = Path(file_path).stem
109
+ spec = importlib.util.spec_from_file_location(module_name, file_path)
110
+ if not spec or not spec.loader:
111
+ raise ValueError(f"Unable to load builder module: {file_path}")
112
+ module = importlib.util.module_from_spec(spec)
113
+ sys.modules[spec.name] = module
114
+ spec.loader.exec_module(module) # type: ignore[attr-defined]
115
+ else:
116
+ # Treat as module path
117
+ module = importlib.import_module(target)
118
+ if not hasattr(module, func):
119
+ raise ValueError(f"Function '{func}' not found in module '{getattr(module, '__name__', target)}'")
120
+ callable_obj = getattr(module, func)
121
+ if callable(callable_obj):
122
+ return callable_obj # type: ignore[return-value]
123
+ # If symbol is a constant like DATASET_BUILDER, expect it to be callable
124
+ if hasattr(callable_obj, "__call__"):
125
+ return callable_obj # type: ignore[return-value]
126
+ raise ValueError("Dataset builder is not callable")
127
+
128
+
129
+ def materialize_dataset_via_builder(builder_spec: str, output_path: Optional[str] = None) -> Tuple[str, int]:
130
+ builder = _import_builder(builder_spec)
131
+ rows_iter = builder()
132
+ if output_path is None:
133
+ fd, tmp_path = tempfile.mkstemp(prefix="ep_rft_dataset_", suffix=".jsonl")
134
+ os.close(fd)
135
+ output_path = tmp_path
136
+ count = 0
137
+ with open(output_path, "w", encoding="utf-8") as f:
138
+ for row in rows_iter:
139
+ f.write(json.dumps(row, ensure_ascii=False) + "\n")
140
+ count += 1
141
+ return output_path, count
142
+
143
+
144
+ def create_dataset_from_jsonl(
145
+ account_id: str,
146
+ api_key: str,
147
+ api_base: str,
148
+ dataset_id: str,
149
+ display_name: Optional[str],
150
+ jsonl_path: str,
151
+ ) -> Tuple[str, Dict[str, Any]]:
152
+ headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
153
+ # Count examples quickly
154
+ example_count = 0
155
+ with open(jsonl_path, "r", encoding="utf-8") as f:
156
+ for _ in f:
157
+ example_count += 1
158
+ dataset_url = f"{api_base.rstrip('/')}/v1/accounts/{account_id}/datasets"
159
+ payload = {
160
+ "dataset": {
161
+ "displayName": display_name or dataset_id,
162
+ "evalProtocol": {},
163
+ "format": "FORMAT_UNSPECIFIED",
164
+ "exampleCount": str(example_count),
165
+ },
166
+ "datasetId": dataset_id,
167
+ }
168
+ resp = requests.post(dataset_url, json=payload, headers=headers, timeout=60)
169
+ if resp.status_code not in (200, 201):
170
+ raise RuntimeError(f"Dataset creation failed: {resp.status_code} {resp.text}")
171
+ ds = resp.json()
172
+
173
+ upload_url = f"{api_base.rstrip('/')}/v1/accounts/{account_id}/datasets/{dataset_id}:upload"
174
+ with open(jsonl_path, "rb") as f:
175
+ files = {"file": f}
176
+ up_headers = {"Authorization": f"Bearer {api_key}"}
177
+ up_resp = requests.post(upload_url, files=files, headers=up_headers, timeout=600)
178
+ if up_resp.status_code not in (200, 201):
179
+ raise RuntimeError(f"Dataset upload failed: {up_resp.status_code} {up_resp.text}")
180
+ return dataset_id, ds
181
+
182
+
183
+ def create_reinforcement_fine_tuning_job(
184
+ account_id: str,
185
+ api_key: str,
186
+ api_base: str,
187
+ body: Dict[str, Any],
188
+ ) -> Dict[str, Any]:
189
+ url = f"{api_base.rstrip('/')}/v1/accounts/{account_id}/reinforcementFineTuningJobs"
190
+ headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json", "Accept": "application/json"}
191
+ resp = requests.post(url, json=body, headers=headers, timeout=60)
192
+ if resp.status_code not in (200, 201):
193
+ raise RuntimeError(f"RFT job creation failed: {resp.status_code} {resp.text}")
194
+ return resp.json()
195
+
196
+
197
+ def build_default_dataset_id(evaluator_id: str) -> str:
198
+ ts = time.strftime("%Y%m%d%H%M%S")
199
+ base = evaluator_id.lower().replace("_", "-")
200
+ return f"{base}-dataset-{ts}"
201
+
202
+
203
+ def build_default_output_model(evaluator_id: str) -> str:
204
+ base = evaluator_id.lower().replace("_", "-")
205
+ return f"{base}-rft"
206
+
207
+
208
+ __all__ = [
209
+ "load_evaluator_trace",
210
+ "save_evaluator_trace",
211
+ "detect_dataset_builder",
212
+ "materialize_dataset_via_builder",
213
+ "create_dataset_from_jsonl",
214
+ "create_reinforcement_fine_tuning_job",
215
+ "build_default_dataset_id",
216
+ "build_default_output_model",
217
+ "_map_api_host_to_app_host",
218
+ ]
@@ -208,7 +208,6 @@ def create_app(
208
208
  encoded_base_url: Optional[str] = None,
209
209
  config: ProxyConfig = Depends(get_config),
210
210
  redis_client: redis.Redis = Depends(get_redis),
211
- _: None = Depends(require_auth),
212
211
  ):
213
212
  params = ChatParams(
214
213
  project_id=project_id,
@@ -233,7 +232,6 @@ def create_app(
233
232
  request: Request,
234
233
  config: ProxyConfig = Depends(get_config),
235
234
  redis_client: redis.Redis = Depends(get_redis),
236
- _: None = Depends(require_auth),
237
235
  ):
238
236
  params = ChatParams(project_id=project_id)
239
237
  return await handle_chat_completion(