eval-protocol 0.2.57__tar.gz → 0.2.57.dev2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (439) hide show
  1. {eval_protocol-0.2.57/eval_protocol.egg-info → eval_protocol-0.2.57.dev2}/PKG-INFO +1 -1
  2. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/__init__.py +0 -2
  3. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/_version.py +3 -3
  4. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/adapters/fireworks_tracing.py +49 -0
  5. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/cli.py +10 -0
  6. eval_protocol-0.2.57.dev2/eval_protocol/cli_commands/logs.py +57 -0
  7. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/cli_commands/upload.py +15 -19
  8. eval_protocol-0.2.57.dev2/eval_protocol/event_bus/__init__.py +25 -0
  9. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/log_utils/elasticsearch_direct_http_handler.py +11 -0
  10. eval_protocol-0.2.57.dev2/eval_protocol/log_utils/fireworks_tracing_http_handler.py +138 -0
  11. eval_protocol-0.2.57.dev2/eval_protocol/log_utils/init.py +69 -0
  12. eval_protocol-0.2.57.dev2/eval_protocol/log_utils/rollout_context.py +84 -0
  13. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/proxy/proxy_core/redis_utils.py +11 -2
  14. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/pytest/evaluation_test.py +48 -14
  15. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/pytest/remote_rollout_processor.py +37 -65
  16. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/pytest/tracing_utils.py +0 -2
  17. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/utils/logs_server.py +78 -5
  18. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2/eval_protocol.egg-info}/PKG-INFO +1 -1
  19. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol.egg-info/SOURCES.txt +2 -0
  20. eval_protocol-0.2.57/eval_protocol/cli_commands/logs.py +0 -76
  21. eval_protocol-0.2.57/eval_protocol/event_bus/__init__.py +0 -5
  22. eval_protocol-0.2.57/eval_protocol/log_utils/fireworks_tracing_http_handler.py +0 -63
  23. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/LICENSE +0 -0
  24. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/README.md +0 -0
  25. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/development/__init__.py +0 -0
  26. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/development/normalize_sandbox_fusion.py +0 -0
  27. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/development/utils/__init__.py +0 -0
  28. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/development/utils/generate_api_key.py +0 -0
  29. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/development/utils/subprocess_manager.py +0 -0
  30. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/__main__.py +0 -0
  31. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/adapters/__init__.py +0 -0
  32. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/adapters/base.py +0 -0
  33. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/adapters/bigquery.py +0 -0
  34. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/adapters/braintrust.py +0 -0
  35. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/adapters/huggingface.py +0 -0
  36. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/adapters/langchain.py +0 -0
  37. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/adapters/langfuse.py +0 -0
  38. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/adapters/langsmith.py +0 -0
  39. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/adapters/openai_responses.py +0 -0
  40. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/adapters/trl.py +0 -0
  41. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/adapters/utils.py +0 -0
  42. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/adapters/weave.py +0 -0
  43. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/agent/__init__.py +0 -0
  44. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/agent/models.py +0 -0
  45. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/agent/orchestrator.py +0 -0
  46. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/agent/resource_abc.py +0 -0
  47. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/agent/resource_pool.py +0 -0
  48. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/agent/resources/__init__.py +0 -0
  49. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/agent/resources/bfcl_envs/__init__.py +0 -0
  50. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/agent/resources/bfcl_envs/gorilla_file_system.py +0 -0
  51. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/agent/resources/bfcl_envs/math_api.py +0 -0
  52. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/agent/resources/bfcl_envs/posting_api.py +0 -0
  53. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/agent/resources/bfcl_sim_api_resource.py +0 -0
  54. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/agent/resources/docker_resource.py +0 -0
  55. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/agent/resources/filesystem_resource.py +0 -0
  56. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/agent/resources/python_state_resource.py +0 -0
  57. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/agent/resources/sql_resource.py +0 -0
  58. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/agent/task_manager.py +0 -0
  59. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/agent/tool_registry.py +0 -0
  60. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/auth.py +0 -0
  61. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/benchmarks/__init__.py +0 -0
  62. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/benchmarks/data/airline_dataset.jsonl +0 -0
  63. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/benchmarks/data/retail_dataset.jsonl +0 -0
  64. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/benchmarks/test_aime25.py +0 -0
  65. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/benchmarks/test_frozen_lake.py +0 -0
  66. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/benchmarks/test_gpqa.py +0 -0
  67. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/benchmarks/test_livebench_data_analysis.py +0 -0
  68. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/benchmarks/test_tau_bench_airline.py +0 -0
  69. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/benchmarks/test_tau_bench_retail.py +0 -0
  70. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/cli_commands/__init__.py +0 -0
  71. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/cli_commands/agent_eval_cmd.py +0 -0
  72. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/cli_commands/common.py +0 -0
  73. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/cli_commands/deploy.py +0 -0
  74. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/cli_commands/deploy_mcp.py +0 -0
  75. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/cli_commands/preview.py +0 -0
  76. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/cli_commands/run_eval_cmd.py +0 -0
  77. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/common_utils.py +0 -0
  78. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/config.py +0 -0
  79. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/data_loader/__init__.py +0 -0
  80. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/data_loader/dynamic_data_loader.py +0 -0
  81. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/data_loader/factory_data_loader.py +0 -0
  82. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/data_loader/inline_data_loader.py +0 -0
  83. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/data_loader/models.py +0 -0
  84. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/dataset_logger/__init__.py +0 -0
  85. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/dataset_logger/dataset_logger.py +0 -0
  86. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/dataset_logger/local_fs_dataset_logger_adapter.py +0 -0
  87. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/dataset_logger/sqlite_dataset_logger_adapter.py +0 -0
  88. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/dataset_logger/sqlite_evaluation_row_store.py +0 -0
  89. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/datasets/__init__.py +0 -0
  90. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/datasets/loader.py +0 -0
  91. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/directory_utils.py +0 -0
  92. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/evaluation.py +0 -0
  93. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/event_bus/event_bus.py +0 -0
  94. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/event_bus/logger.py +0 -0
  95. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/event_bus/sqlite_event_bus.py +0 -0
  96. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/event_bus/sqlite_event_bus_database.py +0 -0
  97. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/execution/__init__.py +0 -0
  98. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/execution/pipeline.py +0 -0
  99. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/gcp_tools.py +0 -0
  100. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/generation/cache.py +0 -0
  101. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/generation/clients/base.py +0 -0
  102. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/generation/clients.py +0 -0
  103. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/generic_server.py +0 -0
  104. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/get_pep440_version.py +0 -0
  105. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/human_id/__init__.py +0 -0
  106. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/human_id/dictionary.py +0 -0
  107. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/integrations/__init__.py +0 -0
  108. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/integrations/deepeval.py +0 -0
  109. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/integrations/openeval.py +0 -0
  110. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/integrations/trl.py +0 -0
  111. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/log_utils/__init__.py +0 -0
  112. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/log_utils/elasticsearch_client.py +0 -0
  113. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/log_utils/elasticsearch_index_manager.py +0 -0
  114. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/log_utils/rollout_id_filter.py +0 -0
  115. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/log_utils/util.py +0 -0
  116. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/logging_utils.py +0 -0
  117. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/mcp/__init__.py +0 -0
  118. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/mcp/adapter.py +0 -0
  119. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/mcp/client/__init__.py +0 -0
  120. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/mcp/client/connection.py +0 -0
  121. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/mcp/clients.py +0 -0
  122. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/mcp/execution/__init__.py +0 -0
  123. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/mcp/execution/base_policy.py +0 -0
  124. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/mcp/execution/manager.py +0 -0
  125. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/mcp/execution/policy.py +0 -0
  126. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/mcp/grid_renderer.py +0 -0
  127. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/mcp/mcp_multi_client.py +0 -0
  128. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/mcp/mcpgym.py +0 -0
  129. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/mcp/process_manager.py +0 -0
  130. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/mcp/session/__init__.py +0 -0
  131. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/mcp/session/manager.py +0 -0
  132. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/mcp/simple_process_manager.py +0 -0
  133. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/mcp/simulation_server.py +0 -0
  134. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/mcp_agent/__init__.py +0 -0
  135. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/mcp_agent/config.py +0 -0
  136. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/mcp_agent/main.py +0 -0
  137. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/mcp_agent/orchestration/__init__.py +0 -0
  138. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/mcp_agent/orchestration/base_client.py +0 -0
  139. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/mcp_agent/orchestration/local_docker_client.py +0 -0
  140. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/mcp_agent/orchestration/stdio_mcp_client_helper.py +0 -0
  141. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/mcp_env.py +0 -0
  142. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/mcp_servers/__init__.py +0 -0
  143. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/mcp_servers/frozen_lake/frozen_lake_adapter.py +0 -0
  144. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/mcp_servers/frozen_lake/frozen_lake_mcp.py +0 -0
  145. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/mcp_servers/frozen_lake/server.py +0 -0
  146. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/mcp_servers/tau2/README.md +0 -0
  147. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/mcp_servers/tau2/__init__.py +0 -0
  148. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/mcp_servers/tau2/airplane_environment/airline_environment.py +0 -0
  149. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/mcp_servers/tau2/mock_environment/mock_environment.py +0 -0
  150. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/mcp_servers/tau2/retail_environment/retail_environment.py +0 -0
  151. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/mcp_servers/tau2/server.py +0 -0
  152. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/mcp_servers/tau2/tau2_mcp.py +0 -0
  153. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/mcp_servers/tau2/tests/system_prompts/airline_agent_system_prompt.md +0 -0
  154. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/mcp_servers/tau2/tests/system_prompts/mock_agent_system_prompt.md +0 -0
  155. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/mcp_servers/tau2/tests/system_prompts/retail_agent_system_prompt.md +0 -0
  156. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/mcp_servers/tau2/tests/test_tau2_e2e.py +0 -0
  157. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/models.py +0 -0
  158. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/packaging.py +0 -0
  159. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/platform_api.py +0 -0
  160. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/playback_policy.py +0 -0
  161. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/proxy/__init__.py +0 -0
  162. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/proxy/proxy_core/__init__.py +0 -0
  163. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/proxy/proxy_core/app.py +0 -0
  164. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/proxy/proxy_core/auth.py +0 -0
  165. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/proxy/proxy_core/langfuse.py +0 -0
  166. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/proxy/proxy_core/litellm.py +0 -0
  167. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/proxy/proxy_core/main.py +0 -0
  168. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/proxy/proxy_core/models.py +0 -0
  169. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/pytest/__init__.py +0 -0
  170. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/pytest/default_agent_rollout_processor.py +0 -0
  171. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/pytest/default_dataset_adapter.py +0 -0
  172. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/pytest/default_langchain_rollout_processor.py +0 -0
  173. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/pytest/default_mcp_gym_rollout_processor.py +0 -0
  174. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/pytest/default_no_op_rollout_processor.py +0 -0
  175. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/pytest/default_pydantic_ai_rollout_processor.py +0 -0
  176. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/pytest/default_single_turn_rollout_process.py +0 -0
  177. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/pytest/dual_mode_wrapper.py +0 -0
  178. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/pytest/elasticsearch_setup.py +0 -0
  179. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/pytest/evaluation_test_postprocess.py +0 -0
  180. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/pytest/evaluation_test_utils.py +0 -0
  181. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/pytest/exception_config.py +0 -0
  182. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/pytest/execution.py +0 -0
  183. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/pytest/generate_parameter_combinations.py +0 -0
  184. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/pytest/github_action_rollout_processor.py +0 -0
  185. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/pytest/handle_persist_flow.py +0 -0
  186. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/pytest/parameterize.py +0 -0
  187. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/pytest/plugin.py +0 -0
  188. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/pytest/rollout_processor.py +0 -0
  189. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/pytest/store_experiment_link.py +0 -0
  190. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/pytest/store_results_url.py +0 -0
  191. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/pytest/types.py +0 -0
  192. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/pytest/validate_signature.py +0 -0
  193. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/quickstart/__init__.py +0 -0
  194. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/quickstart/aha_judge/__init__.py +0 -0
  195. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/quickstart/aha_judge/llm_judge.py +0 -0
  196. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/quickstart/aha_judge/llm_judge_braintrust.py +0 -0
  197. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/quickstart/aha_judge/llm_judge_langfuse.py +0 -0
  198. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/quickstart/aha_judge/llm_judge_langsmith.py +0 -0
  199. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/quickstart/aha_judge/llm_judge_openai_responses.py +0 -0
  200. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/quickstart/aha_judge/utils.py +0 -0
  201. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/quickstart/llm_judge.py +0 -0
  202. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/quickstart/llm_judge_braintrust.py +0 -0
  203. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/quickstart/utils.py +0 -0
  204. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/resources.py +0 -0
  205. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/reward_function.py +0 -0
  206. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/rewards/__init__.py +0 -0
  207. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/rewards/accuracy.py +0 -0
  208. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/rewards/accuracy_length.py +0 -0
  209. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/rewards/apps_coding_reward.py +0 -0
  210. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/rewards/apps_execution_utils.py +0 -0
  211. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/rewards/apps_testing_util.py +0 -0
  212. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/rewards/bfcl_reward.py +0 -0
  213. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/rewards/code_execution.py +0 -0
  214. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/rewards/code_execution_utils.py +0 -0
  215. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/rewards/cpp_code.py +0 -0
  216. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/rewards/deepcoder_reward.py +0 -0
  217. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/rewards/format.py +0 -0
  218. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/rewards/function_calling.py +0 -0
  219. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/rewards/json_schema.py +0 -0
  220. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/rewards/language_consistency.py +0 -0
  221. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/rewards/lean_prover.py +0 -0
  222. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/rewards/length.py +0 -0
  223. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/rewards/list_comparison_math_reward.py +0 -0
  224. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/rewards/math.py +0 -0
  225. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/rewards/multiple_choice_math_reward.py +0 -0
  226. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/rewards/reasoning_steps.py +0 -0
  227. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/rewards/repetition.py +0 -0
  228. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/rewards/tag_count.py +0 -0
  229. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/rl_processing.py +0 -0
  230. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/server.py +0 -0
  231. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/stats/__init__.py +0 -0
  232. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/stats/confidence_intervals.py +0 -0
  233. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/typed_interface.py +0 -0
  234. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/types/__init__.py +0 -0
  235. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/types/errors.py +0 -0
  236. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/types/remote_rollout_processor.py +0 -0
  237. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/types/types.py +0 -0
  238. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/utils/__init__.py +0 -0
  239. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/utils/batch_evaluation.py +0 -0
  240. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/utils/batch_transformation.py +0 -0
  241. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/utils/browser_utils.py +0 -0
  242. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/utils/check_server_status.py +0 -0
  243. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/utils/dataset_helpers.py +0 -0
  244. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/utils/evaluation_row_utils.py +0 -0
  245. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/utils/logs_models.py +0 -0
  246. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/utils/module_loader.py +0 -0
  247. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/utils/packaging_utils.py +0 -0
  248. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/utils/show_results_url.py +0 -0
  249. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/utils/static_policy.py +0 -0
  250. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/utils/subprocess_utils.py +0 -0
  251. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol/utils/vite_server.py +0 -0
  252. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol.egg-info/dependency_links.txt +0 -0
  253. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol.egg-info/entry_points.txt +0 -0
  254. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol.egg-info/requires.txt +0 -0
  255. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/eval_protocol.egg-info/top_level.txt +0 -0
  256. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/pyproject.toml +0 -0
  257. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/setup.cfg +0 -0
  258. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/setup.py +0 -0
  259. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_accuracy.py +0 -0
  260. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_accuracy_length.py +0 -0
  261. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_adapters_e2e.py +0 -0
  262. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_agent_orchestrator.py +0 -0
  263. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_agent_resources.py +0 -0
  264. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_auth.py +0 -0
  265. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_batch_evaluation.py +0 -0
  266. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_cli.py +0 -0
  267. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_cli_agent.py +0 -0
  268. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_cli_args.py +0 -0
  269. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_code_execution.py +0 -0
  270. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_config.py +0 -0
  271. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_control_plane_separation.py +0 -0
  272. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_cpp_code.py +0 -0
  273. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_data_driven_task_manager.py +0 -0
  274. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_deepcoder_reward.py +0 -0
  275. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_deepeval_integration.py +0 -0
  276. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_deploy_integration.py +0 -0
  277. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_directory_utils.py +0 -0
  278. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_e2b_integration.py +0 -0
  279. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_e2b_js_integration.py +0 -0
  280. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_edge_cases.py +0 -0
  281. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_eval_protocol_import.py +0 -0
  282. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_evaluation.py +0 -0
  283. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_evaluation_integration.py +0 -0
  284. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_evaluation_postprocess.py +0 -0
  285. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_evaluation_preview_integration.py +0 -0
  286. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_event_bus.py +0 -0
  287. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_event_bus_helper.py +0 -0
  288. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_examples_end_to_end.py +0 -0
  289. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_fireworks_api.py +0 -0
  290. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_format.py +0 -0
  291. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_fractional_code.py +0 -0
  292. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_function_calling.py +0 -0
  293. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_gcp_tools.py +0 -0
  294. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_generic_server.py +0 -0
  295. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_human_id.py +0 -0
  296. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_integration.py +0 -0
  297. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_json_schema.py +0 -0
  298. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_kwargs_validation.py +0 -0
  299. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_language_consistency.py +0 -0
  300. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_lean_prover.py +0 -0
  301. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_lean_prover_runner.py +0 -0
  302. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_length.py +0 -0
  303. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_list_comparison_math_reward.py +0 -0
  304. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_logs_server.py +0 -0
  305. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_logs_server_simple.py +0 -0
  306. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_math.py +0 -0
  307. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_minimal.py +0 -0
  308. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_models.py +0 -0
  309. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_models_rl.py +0 -0
  310. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_multiple_choice_math_reward.py +0 -0
  311. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_n_variant_batch_integration.py +0 -0
  312. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_n_variant_integration.py +0 -0
  313. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_openai_compatibility.py +0 -0
  314. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_openeval_integration.py +0 -0
  315. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_packaging.py +0 -0
  316. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_parallel_rollouts.py +0 -0
  317. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_platform_api.py +0 -0
  318. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_quickstart_utils.py +0 -0
  319. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_readiness.py +0 -0
  320. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_reasoning_steps.py +0 -0
  321. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_repetition.py +0 -0
  322. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_repetition_debug.py +0 -0
  323. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_retry_mechanism.py +0 -0
  324. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_reward_function.py +0 -0
  325. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_reward_protocol_import.py +0 -0
  326. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_rl_processing.py +0 -0
  327. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_rollout_control_plane_integration.py +0 -0
  328. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_server.py +0 -0
  329. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_show_results_url.py +0 -0
  330. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_status_migration_changes.py +0 -0
  331. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_status_migration_integration.py +0 -0
  332. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_status_model.py +0 -0
  333. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_tag_count.py +0 -0
  334. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_tau_bench_airline_smoke.py +0 -0
  335. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_typed_interface.py +0 -0
  336. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_typed_interface_rl.py +0 -0
  337. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_upload_entrypoint.py +0 -0
  338. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_url_handling.py +0 -0
  339. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/tests/test_vite_server.py +0 -0
  340. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/__init__.py +0 -0
  341. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/agent/__init__.py +0 -0
  342. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/agent/base.py +0 -0
  343. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/agent/llm_agent.py +0 -0
  344. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/api_service/__init__.py +0 -0
  345. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/api_service/api_config.py +0 -0
  346. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/api_service/data_model.py +0 -0
  347. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/api_service/simulation_service.py +0 -0
  348. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/cli.py +0 -0
  349. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/config.py +0 -0
  350. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/data/domains/airline/policy.md +0 -0
  351. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/data/domains/mock/policy.md +0 -0
  352. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/data/domains/mock/policy_solo.md +0 -0
  353. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/data/domains/retail/policy.md +0 -0
  354. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/data/domains/telecom/main_policy.md +0 -0
  355. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/data/domains/telecom/main_policy_solo.md +0 -0
  356. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/data/domains/telecom/tech_support_manual.md +0 -0
  357. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/data/domains/telecom/tech_support_workflow.md +0 -0
  358. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/data/domains/telecom/tech_support_workflow_solo.md +0 -0
  359. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/data/user_simulator/simulation_guidelines.md +0 -0
  360. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/data/user_simulator/simulation_guidelines_tools.md +0 -0
  361. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/data_model/__init__.py +0 -0
  362. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/data_model/message.py +0 -0
  363. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/data_model/simulation.py +0 -0
  364. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/data_model/tasks.py +0 -0
  365. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/domains/__init__.py +0 -0
  366. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/domains/airline/__init__.py +0 -0
  367. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/domains/airline/data_model.py +0 -0
  368. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/domains/airline/environment.py +0 -0
  369. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/domains/airline/tools.py +0 -0
  370. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/domains/airline/utils.py +0 -0
  371. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/domains/mock/__init__.py +0 -0
  372. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/domains/mock/data_model.py +0 -0
  373. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/domains/mock/environment.py +0 -0
  374. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/domains/mock/tools.py +0 -0
  375. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/domains/mock/utils.py +0 -0
  376. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/domains/retail/__init__.py +0 -0
  377. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/domains/retail/data_model.py +0 -0
  378. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/domains/retail/environment.py +0 -0
  379. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/domains/retail/tools.py +0 -0
  380. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/domains/retail/utils.py +0 -0
  381. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/domains/telecom/__init__.py +0 -0
  382. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/domains/telecom/data_model.py +0 -0
  383. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/domains/telecom/environment.py +0 -0
  384. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/domains/telecom/tasks/__init__.py +0 -0
  385. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/domains/telecom/tasks/const.py +0 -0
  386. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/domains/telecom/tasks/create_tasks.py +0 -0
  387. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/domains/telecom/tasks/manager.py +0 -0
  388. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/domains/telecom/tasks/mms_issues.py +0 -0
  389. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/domains/telecom/tasks/mobile_data_issues.py +0 -0
  390. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/domains/telecom/tasks/service_issues.py +0 -0
  391. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/domains/telecom/tasks/utils.py +0 -0
  392. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/domains/telecom/tools.py +0 -0
  393. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/domains/telecom/user_data_model.py +0 -0
  394. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/domains/telecom/user_tools.py +0 -0
  395. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/domains/telecom/utils.py +0 -0
  396. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/environment/__init__.py +0 -0
  397. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/environment/db.py +0 -0
  398. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/environment/environment.py +0 -0
  399. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/environment/server.py +0 -0
  400. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/environment/tool.py +0 -0
  401. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/environment/toolkit.py +0 -0
  402. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/environment/utils/interface_agent.py +0 -0
  403. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/evaluator/__init__.py +0 -0
  404. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/evaluator/evaluator.py +0 -0
  405. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/evaluator/evaluator_action.py +0 -0
  406. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/evaluator/evaluator_base.py +0 -0
  407. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/evaluator/evaluator_communicate.py +0 -0
  408. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/evaluator/evaluator_env.py +0 -0
  409. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/evaluator/evaluator_nl_assertions.py +0 -0
  410. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/metrics/__init__.py +0 -0
  411. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/metrics/agent_metrics.py +0 -0
  412. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/metrics/break_down_metrics.py +0 -0
  413. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/orchestrator/__init__.py +0 -0
  414. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/orchestrator/environment_manager.py +0 -0
  415. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/orchestrator/orchestrator.py +0 -0
  416. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/orchestrator/utils.py +0 -0
  417. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/registry.py +0 -0
  418. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/run.py +0 -0
  419. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/scripts/__init__.py +0 -0
  420. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/scripts/check_data.py +0 -0
  421. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/scripts/show_domain_doc.py +0 -0
  422. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/scripts/start_servers.py +0 -0
  423. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/scripts/view_simulations.py +0 -0
  424. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/user/__init__.py +0 -0
  425. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/user/base.py +0 -0
  426. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/user/user_simulator.py +0 -0
  427. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/utils/__init__.py +0 -0
  428. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/utils/display.py +0 -0
  429. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/utils/io_utils.py +0 -0
  430. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/utils/llm_utils.py +0 -0
  431. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/utils/pydantic_utils.py +0 -0
  432. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vendor/tau2/utils/utils.py +0 -0
  433. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/versioneer.py +0 -0
  434. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vite-app/dist/assets/favicon-BkAAWQga.png +0 -0
  435. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vite-app/dist/assets/index-BnDJont9.css +0 -0
  436. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vite-app/dist/assets/index-Cu9t0G5i.js +0 -0
  437. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vite-app/dist/assets/index-Cu9t0G5i.js.map +0 -0
  438. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vite-app/dist/assets/logo-light-BprIBJQW.png +0 -0
  439. {eval_protocol-0.2.57 → eval_protocol-0.2.57.dev2}/vite-app/dist/index.html +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: eval-protocol
3
- Version: 0.2.57
3
+ Version: 0.2.57.dev2
4
4
  Summary: The official Python SDK for Eval Protocol (EP.) EP is an open protocol that standardizes how developers author evals for large language model (LLM) applications.
5
5
  Author-email: Fireworks AI <info@fireworks.ai>
6
6
  License-Expression: MIT
@@ -36,7 +36,6 @@ from .utils.evaluation_row_utils import (
36
36
  filter_longest_conversation,
37
37
  )
38
38
  from .pytest import evaluation_test, SingleTurnRolloutProcessor, RemoteRolloutProcessor, GithubActionRolloutProcessor
39
- from .pytest.remote_rollout_processor import create_elasticsearch_config_from_env
40
39
  from .pytest.parameterize import DefaultParameterIdGenerator
41
40
  from .log_utils.elasticsearch_direct_http_handler import ElasticsearchDirectHttpHandler
42
41
  from .log_utils.rollout_id_filter import RolloutIdFilter
@@ -90,7 +89,6 @@ except ImportError:
90
89
  warnings.filterwarnings("default", category=DeprecationWarning, module="eval_protocol")
91
90
 
92
91
  __all__ = [
93
- "create_elasticsearch_config_from_env",
94
92
  "ElasticsearchConfig",
95
93
  "ElasticsearchDirectHttpHandler",
96
94
  "RolloutIdFilter",
@@ -8,11 +8,11 @@ import json
8
8
 
9
9
  version_json = '''
10
10
  {
11
- "date": "2025-10-15T12:11:42-0700",
11
+ "date": "2025-10-21T14:44:45-0700",
12
12
  "dirty": false,
13
13
  "error": null,
14
- "full-revisionid": "e3a7e386b88f6bc402fb82042664cbb4ded44eca",
15
- "version": "0.2.57"
14
+ "full-revisionid": "5a0eb89e557f1362bc17acd8a02c25a072dc3092",
15
+ "version": "0.2.57-dev2"
16
16
  }
17
17
  ''' # END VERSION_JSON
18
18
 
@@ -265,6 +265,55 @@ class FireworksTracingAdapter(BaseAdapter):
265
265
  self.base_url = base_url.rstrip("/")
266
266
  self.timeout = timeout
267
267
 
268
+ def search_logs(self, tags: List[str], limit: int = 100, hours_back: int = 24) -> List[Dict[str, Any]]:
269
+ """Fetch logs from Fireworks tracing gateway /logs endpoint.
270
+
271
+ Returns entries with keys: timestamp, message, severity, tags.
272
+ """
273
+ if not tags:
274
+ raise ValueError("At least one tag is required to fetch logs")
275
+
276
+ headers = {"Authorization": f"Bearer {os.environ.get('FIREWORKS_API_KEY')}"}
277
+ params: Dict[str, Any] = {"tags": tags, "limit": limit, "hours_back": hours_back, "program": "eval_protocol"}
278
+
279
+ # Try /logs first, fall back to /v1/logs if not found
280
+ urls_to_try = [f"{self.base_url}/logs", f"{self.base_url}/v1/logs"]
281
+ data: Dict[str, Any] = {}
282
+ last_error: Optional[str] = None
283
+ for url in urls_to_try:
284
+ try:
285
+ response = requests.get(url, params=params, timeout=self.timeout, headers=headers)
286
+ if response.status_code == 404:
287
+ # Try next variant
288
+ last_error = f"404 for {url}"
289
+ continue
290
+ response.raise_for_status()
291
+ data = response.json() or {}
292
+ break
293
+ except requests.exceptions.RequestException as e:
294
+ last_error = str(e)
295
+ continue
296
+ else:
297
+ # All attempts failed
298
+ if last_error:
299
+ logger.error("Failed to fetch logs from Fireworks (tried %s): %s", urls_to_try, last_error)
300
+ return []
301
+
302
+ entries: List[Dict[str, Any]] = data.get("entries", []) or []
303
+ # Normalize minimal shape
304
+ results: List[Dict[str, Any]] = []
305
+ for e in entries:
306
+ results.append(
307
+ {
308
+ "timestamp": e.get("timestamp"),
309
+ "message": e.get("message"),
310
+ "severity": e.get("severity", "INFO"),
311
+ "tags": e.get("tags", []),
312
+ "status": e.get("status"),
313
+ }
314
+ )
315
+ return results
316
+
268
317
  def get_evaluation_rows(
269
318
  self,
270
319
  tags: List[str],
@@ -307,6 +307,16 @@ def parse_args(args=None):
307
307
  action="store_true",
308
308
  help="Use env vars for Elasticsearch config (requires ELASTICSEARCH_URL, ELASTICSEARCH_API_KEY, ELASTICSEARCH_INDEX_NAME)",
309
309
  )
310
+ logs_parser.add_argument(
311
+ "--use-fireworks",
312
+ action="store_true",
313
+ help="Force Fireworks tracing backend for logs UI (overrides env auto-detection)",
314
+ )
315
+ logs_parser.add_argument(
316
+ "--use-elasticsearch",
317
+ action="store_true",
318
+ help="Force Elasticsearch backend for logs UI (overrides env auto-detection)",
319
+ )
310
320
 
311
321
  # Upload command
312
322
  upload_parser = subparsers.add_parser(
@@ -0,0 +1,57 @@
1
+ """
2
+ CLI command for serving logs with file watching and real-time updates.
3
+ """
4
+
5
+ import sys
6
+ from pathlib import Path
7
+
8
+ import os
9
+ from ..utils.logs_server import serve_logs
10
+
11
+
12
+ def logs_command(args):
13
+ """Serve logs with file watching and real-time updates"""
14
+
15
+ port = args.port
16
+ print("🚀 Starting Eval Protocol Logs Server")
17
+ print(f"🌐 URL: http://localhost:{port}")
18
+ print(f"🔌 WebSocket: ws://localhost:{port}/ws")
19
+ print(f"👀 Watching paths: {['current directory']}")
20
+ print(f"🔍 Debug mode: {args.debug}")
21
+ print("Press Ctrl+C to stop the server")
22
+ print("-" * 50)
23
+
24
+ # Backend selection: Fireworks first when API key present, unless overridden
25
+ use_fireworks = False
26
+ if getattr(args, "use_fireworks", False):
27
+ use_fireworks = True
28
+ elif getattr(args, "use_elasticsearch", False):
29
+ use_fireworks = False
30
+ else:
31
+ use_fireworks = bool(os.environ.get("FIREWORKS_API_KEY"))
32
+
33
+ # Setup backend configs
34
+ elasticsearch_config = None
35
+ # Prefer explicit FW_TRACING_GATEWAY_BASE_URL, then GATEWAY_URL from env (remote validation),
36
+ # finally default to public tracing.fireworks.ai
37
+ fireworks_base_url = (
38
+ os.environ.get("FW_TRACING_GATEWAY_BASE_URL")
39
+ or os.environ.get("GATEWAY_URL")
40
+ or "https://tracing.fireworks.ai"
41
+ )
42
+
43
+ try:
44
+ serve_logs(
45
+ port=args.port,
46
+ elasticsearch_config=elasticsearch_config,
47
+ debug=args.debug,
48
+ backend="fireworks" if use_fireworks else "elasticsearch",
49
+ fireworks_base_url=fireworks_base_url if use_fireworks else None,
50
+ )
51
+ return 0
52
+ except KeyboardInterrupt:
53
+ print("\n🛑 Server stopped by user")
54
+ return 0
55
+ except Exception as e:
56
+ print(f"❌ Error starting server: {e}")
57
+ return 1
@@ -267,32 +267,29 @@ def _parse_entry(entry: str, cwd: str) -> tuple[str, str]:
267
267
  def _resolve_entry_to_qual_and_source(entry: str, cwd: str) -> tuple[str, str]:
268
268
  target, func = _parse_entry(entry, cwd)
269
269
 
270
- # Check if target looks like a file path
270
+ # Determine the file path to load
271
271
  if "/" in target or "\\" in target or os.path.exists(target):
272
- # It's a file path - convert to absolute and load as module
272
+ # It's a file path - convert to absolute
273
273
  if not os.path.isabs(target):
274
274
  target = os.path.abspath(os.path.join(cwd, target))
275
-
276
275
  if not target.endswith(".py"):
277
276
  target = target + ".py"
278
-
279
277
  if not os.path.isfile(target):
280
278
  raise ValueError(f"File not found: {target}")
281
-
282
- # Import module from file path
283
- spec = importlib.util.spec_from_file_location(Path(target).stem, target)
284
- if not spec or not spec.loader:
285
- raise ValueError(f"Unable to load module from path: {target}")
286
- module = importlib.util.module_from_spec(spec)
287
- sys.modules[spec.name] = module
288
- spec.loader.exec_module(module) # type: ignore[attr-defined]
289
- module_name = spec.name
290
279
  source_file_path = target
291
280
  else:
292
- # Treat as module path (e.g., "my_package.my_module")
293
- module_name = target
294
- module = importlib.import_module(module_name)
295
- source_file_path = getattr(module, "__file__", "") or ""
281
+ # Treat dotted name as a file path
282
+ dotted_as_path = target.replace(".", "/") + ".py"
283
+ source_file_path = os.path.join(cwd, dotted_as_path)
284
+
285
+ # Load the module from the file path
286
+ spec = importlib.util.spec_from_file_location(Path(source_file_path).stem, source_file_path)
287
+ if not spec or not spec.loader:
288
+ raise ValueError(f"Unable to load module from path: {source_file_path}")
289
+ module = importlib.util.module_from_spec(spec)
290
+ sys.modules[spec.name] = module
291
+ spec.loader.exec_module(module) # type: ignore[attr-defined]
292
+ module_name = spec.name
296
293
 
297
294
  if not hasattr(module, func):
298
295
  raise ValueError(f"Function '{func}' not found in module '{module_name}'")
@@ -591,8 +588,7 @@ def upload_command(args: argparse.Namespace) -> int:
591
588
 
592
589
  print(f"\nUploading evaluator '{evaluator_id}' for {qualname.split('.')[-1]}...")
593
590
  try:
594
- # Always treat as a single evaluator (single-metric) even if folder has helper modules
595
- test_dir = os.path.dirname(source_file_path) if source_file_path else root
591
+ test_dir = root
596
592
  metric_name = os.path.basename(test_dir) or "metric"
597
593
  result = create_evaluation(
598
594
  evaluator_id=evaluator_id,
@@ -0,0 +1,25 @@
1
+ # Global event bus instance - uses SqliteEventBus for cross-process functionality
2
+ from eval_protocol.event_bus.event_bus import EventBus
3
+
4
+
5
+ def _get_default_event_bus():
6
+ from eval_protocol.event_bus.sqlite_event_bus import SqliteEventBus
7
+
8
+ return SqliteEventBus()
9
+
10
+
11
+ # Lazy property that creates the event bus only when accessed
12
+ class _LazyEventBus(EventBus):
13
+ def __init__(self):
14
+ self._event_bus: EventBus | None = None
15
+
16
+ def _get_event_bus(self):
17
+ if self._event_bus is None:
18
+ self._event_bus = _get_default_event_bus()
19
+ return self._event_bus
20
+
21
+ def __getattr__(self, name):
22
+ return getattr(self._get_event_bus(), name)
23
+
24
+
25
+ event_bus: EventBus = _LazyEventBus()
@@ -60,6 +60,17 @@ class ElasticsearchDirectHttpHandler(logging.Handler):
60
60
  if status_info:
61
61
  data.update(status_info)
62
62
 
63
+ # Optional correlation enrichment
64
+ experiment_id = getattr(record, "experiment_id", None)
65
+ if experiment_id is not None:
66
+ data["experiment_id"] = experiment_id
67
+ run_id = getattr(record, "run_id", None)
68
+ if run_id is not None:
69
+ data["run_id"] = run_id
70
+ rollout_ids = getattr(record, "rollout_ids", None)
71
+ if rollout_ids is not None:
72
+ data["rollout_ids"] = rollout_ids
73
+
63
74
  # Schedule the HTTP request to run asynchronously
64
75
  self._schedule_async_send(data, record)
65
76
  except Exception as e:
@@ -0,0 +1,138 @@
1
+ import logging
2
+ import os
3
+ import threading
4
+ from datetime import datetime, timezone
5
+ from typing import Optional, Any, Dict, List, cast
6
+
7
+ import requests
8
+
9
+
10
+ class FireworksTracingHttpHandler(logging.Handler):
11
+ """Logging handler that posts structured logs to tracing.fireworks gateway /logs endpoint."""
12
+
13
+ def __init__(self, gateway_base_url: Optional[str] = None, rollout_id_env: str = "EP_ROLLOUT_ID") -> None:
14
+ super().__init__()
15
+ self.gateway_base_url = (
16
+ gateway_base_url or os.getenv("FW_TRACING_GATEWAY_BASE_URL") or "https://tracing.fireworks.ai"
17
+ )
18
+ self.rollout_id_env = rollout_id_env
19
+ self._session = requests.Session()
20
+ self._lock = threading.Lock()
21
+ # Include Authorization header if FIREWORKS_API_KEY is available
22
+ api_key = os.environ.get("FIREWORKS_API_KEY")
23
+ if api_key:
24
+ try:
25
+ self._session.headers.update({"Authorization": f"Bearer {api_key}"})
26
+ except Exception:
27
+ pass
28
+
29
+ def emit(self, record: logging.LogRecord) -> None:
30
+ try:
31
+ if not self.gateway_base_url:
32
+ return
33
+ rollout_id = self._get_rollout_id(record)
34
+ if not rollout_id:
35
+ return
36
+ payload = self._build_payload(record, rollout_id)
37
+ base = self.gateway_base_url.rstrip("/")
38
+ url = f"{base}/logs"
39
+ # Optional debug prints to aid local diagnostics
40
+ if os.environ.get("EP_DEBUG") == "true":
41
+ try:
42
+ tags_val = payload.get("tags")
43
+ tags_len = len(tags_val) if isinstance(tags_val, list) else 0
44
+ msg_val = payload.get("message")
45
+ msg_preview = msg_val[:80] if isinstance(msg_val, str) else msg_val
46
+ print(f"[FW_LOG] POST {url} rollout_id={rollout_id} tags={tags_len} msg={msg_preview}")
47
+ except Exception:
48
+ pass
49
+ with self._lock:
50
+ resp = self._session.post(url, json=payload, timeout=5)
51
+ if os.environ.get("EP_DEBUG") == "true":
52
+ try:
53
+ print(f"[FW_LOG] resp={resp.status_code}")
54
+ except Exception:
55
+ pass
56
+ # Fallback to /v1/logs if /logs is not found
57
+ if resp is not None and getattr(resp, "status_code", None) == 404:
58
+ alt = f"{base}/v1/logs"
59
+ if os.environ.get("EP_DEBUG") == "true":
60
+ try:
61
+ tags_val = payload.get("tags")
62
+ tags_len = len(tags_val) if isinstance(tags_val, list) else 0
63
+ print(f"[FW_LOG] RETRY POST {alt} rollout_id={rollout_id} tags={tags_len}")
64
+ except Exception:
65
+ pass
66
+ with self._lock:
67
+ resp2 = self._session.post(alt, json=payload, timeout=5)
68
+ if os.environ.get("EP_DEBUG") == "true":
69
+ try:
70
+ print(f"[FW_LOG] retry resp={resp2.status_code}")
71
+ except Exception:
72
+ pass
73
+ except Exception:
74
+ # Avoid raising exceptions from logging
75
+ self.handleError(record)
76
+
77
+ def _get_rollout_id(self, record: logging.LogRecord) -> Optional[str]:
78
+ if hasattr(record, "rollout_id") and cast(Any, getattr(record, "rollout_id")) is not None:
79
+ return str(cast(Any, getattr(record, "rollout_id")))
80
+ return os.getenv(self.rollout_id_env)
81
+
82
+ def _get_status_info(self, record: logging.LogRecord) -> Optional[Dict[str, Any]]:
83
+ """Extract status information from the log record's extra data."""
84
+ # Check if 'status' is in the extra data (passed via extra parameter)
85
+ if hasattr(record, "status") and record.status is not None: # type: ignore
86
+ status = record.status # type: ignore
87
+
88
+ # Handle Status class instances (Pydantic BaseModel)
89
+ if hasattr(status, "code") and hasattr(status, "message"):
90
+ # Status object - extract code and message
91
+ status_code = status.code
92
+ # Handle both enum values and direct integer values
93
+ if hasattr(status_code, "value"):
94
+ status_code = status_code.value
95
+
96
+ return {
97
+ "code": status_code,
98
+ "message": status.message,
99
+ "details": getattr(status, "details", []),
100
+ }
101
+ elif isinstance(status, dict):
102
+ # Dictionary representation of status
103
+ return {
104
+ "code": status.get("code"),
105
+ "message": status.get("message"),
106
+ "details": status.get("details", []),
107
+ }
108
+ return None
109
+
110
+ def _build_payload(self, record: logging.LogRecord, rollout_id: str) -> Dict[str, Any]:
111
+ timestamp = datetime.fromtimestamp(record.created, tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%S.%fZ")
112
+ message = record.getMessage()
113
+ tags: List[str] = [f"rollout_id:{rollout_id}"]
114
+ # Optional additional tags
115
+ if hasattr(record, "experiment_id") and cast(Any, getattr(record, "experiment_id")):
116
+ tags.append(f"experiment_id:{cast(Any, getattr(record, 'experiment_id'))}")
117
+ if hasattr(record, "run_id") and cast(Any, getattr(record, "run_id")):
118
+ tags.append(f"run_id:{cast(Any, getattr(record, 'run_id'))}")
119
+ # Groupwise list of rollout_ids
120
+ if hasattr(record, "rollout_ids") and cast(Any, getattr(record, "rollout_ids")):
121
+ try:
122
+ for rid in cast(List[str], getattr(record, "rollout_ids")):
123
+ tags.append(f"rollout_id:{rid}")
124
+ except Exception:
125
+ pass
126
+ program = cast(Optional[str], getattr(record, "program", None)) or "eval_protocol"
127
+
128
+ return {
129
+ "program": program,
130
+ "status": self._get_status_info(record),
131
+ "message": message,
132
+ "tags": tags,
133
+ "extras": {
134
+ "logger_name": record.name,
135
+ "level": record.levelname,
136
+ "timestamp": timestamp,
137
+ },
138
+ }
@@ -0,0 +1,69 @@
1
+ import logging
2
+ import os
3
+ from typing import Optional
4
+
5
+ from eval_protocol.log_utils.fireworks_tracing_http_handler import (
6
+ FireworksTracingHttpHandler,
7
+ )
8
+ from eval_protocol.log_utils.elasticsearch_direct_http_handler import (
9
+ ElasticsearchDirectHttpHandler,
10
+ )
11
+ from eval_protocol.log_utils.rollout_context import ContextRolloutIdFilter
12
+ from eval_protocol.types.remote_rollout_processor import ElasticsearchConfig
13
+
14
+
15
+ _INITIALIZED = False
16
+
17
+
18
+ def _get_env(name: str) -> Optional[str]:
19
+ val = os.getenv(name)
20
+ return val if val and val.strip() else None
21
+
22
+
23
+ def init_external_logging_from_env() -> None:
24
+ """
25
+ Initialize external logging sinks (Fireworks tracing, optional Elasticsearch) from env vars.
26
+
27
+ Idempotent: safe to call multiple times.
28
+
29
+ Environment variables:
30
+ - FW_TRACING_GATEWAY_BASE_URL: enable Fireworks tracing handler when set
31
+ - EP_ELASTICSEARCH_URL, EP_ELASTICSEARCH_API_KEY, EP_ELASTICSEARCH_INDEX: enable ES when all set
32
+ """
33
+ global _INITIALIZED
34
+ if _INITIALIZED:
35
+ return
36
+
37
+ root_logger = logging.getLogger()
38
+
39
+ # Ensure we do not add duplicate handlers if already present
40
+ existing_handler_types = {type(h).__name__ for h in root_logger.handlers}
41
+
42
+ # Fireworks tracing: prefer if FIREWORKS_API_KEY is present; default base URL if not provided
43
+ fw_key = _get_env("FIREWORKS_API_KEY")
44
+ # Allow remote validation gateway to act as tracing base when provided
45
+ fw_url = _get_env("FW_TRACING_GATEWAY_BASE_URL") or _get_env("GATEWAY_URL") or "https://tracing.fireworks.ai"
46
+ if fw_key and "FireworksTracingHttpHandler" not in existing_handler_types:
47
+ fw_handler = FireworksTracingHttpHandler(gateway_base_url=fw_url)
48
+ fw_handler.setLevel(logging.INFO)
49
+ fw_handler.addFilter(ContextRolloutIdFilter())
50
+ root_logger.addHandler(fw_handler)
51
+
52
+ # Elasticsearch
53
+ es_url = _get_env("EP_ELASTICSEARCH_URL")
54
+ es_api_key = _get_env("EP_ELASTICSEARCH_API_KEY")
55
+ es_index = _get_env("EP_ELASTICSEARCH_INDEX")
56
+ if (
57
+ not fw_key
58
+ and es_url
59
+ and es_api_key
60
+ and es_index
61
+ and "ElasticsearchDirectHttpHandler" not in existing_handler_types
62
+ ):
63
+ es_config = ElasticsearchConfig(url=es_url, api_key=es_api_key, index_name=es_index)
64
+ es_handler = ElasticsearchDirectHttpHandler(elasticsearch_config=es_config)
65
+ es_handler.setLevel(logging.INFO)
66
+ es_handler.addFilter(ContextRolloutIdFilter())
67
+ root_logger.addHandler(es_handler)
68
+
69
+ _INITIALIZED = True
@@ -0,0 +1,84 @@
1
+ import logging
2
+ import os
3
+ from contextlib import asynccontextmanager
4
+ from typing import List, Optional
5
+
6
+ import contextvars
7
+
8
+
9
+ # Context variables used to correlate logs with rollouts under concurrency
10
+ current_rollout_id: contextvars.ContextVar[Optional[str]] = contextvars.ContextVar("ep_rollout_id", default=None)
11
+ current_rollout_ids: contextvars.ContextVar[Optional[List[str]]] = contextvars.ContextVar(
12
+ "ep_rollout_ids", default=None
13
+ )
14
+ current_experiment_id: contextvars.ContextVar[Optional[str]] = contextvars.ContextVar("ep_experiment_id", default=None)
15
+ current_run_id: contextvars.ContextVar[Optional[str]] = contextvars.ContextVar("ep_run_id", default=None)
16
+
17
+
18
+ class ContextRolloutIdFilter(logging.Filter):
19
+ """
20
+ Logging filter that injects correlation fields into a LogRecord from ContextVars.
21
+
22
+ The filter is intended to be attached ONLY to external sink handlers (e.g.,
23
+ Fireworks or Elasticsearch). If there is no active rollout context, it drops
24
+ the record for that handler to avoid shipping uncorrelated logs.
25
+ """
26
+
27
+ def filter(self, record: logging.LogRecord) -> bool: # type: ignore[override]
28
+ rollout_id = current_rollout_id.get()
29
+ if not rollout_id:
30
+ # Allow explicit rollout IDs on the record or via environment fallback.
31
+ rollout_id = getattr(record, "rollout_id", None) or os.getenv("EP_ROLLOUT_ID")
32
+ if not rollout_id:
33
+ # No correlation context → do not emit to external sink
34
+ return False
35
+
36
+ # Inject primary correlation fields
37
+ setattr(record, "rollout_id", rollout_id)
38
+
39
+ rollout_ids = current_rollout_ids.get()
40
+ if rollout_ids:
41
+ setattr(record, "rollout_ids", rollout_ids)
42
+
43
+ experiment_id = current_experiment_id.get()
44
+ if experiment_id:
45
+ setattr(record, "experiment_id", experiment_id)
46
+
47
+ run_id = current_run_id.get()
48
+ if run_id:
49
+ setattr(record, "run_id", run_id)
50
+
51
+ return True
52
+
53
+
54
+ @asynccontextmanager
55
+ async def rollout_logging_context(
56
+ rollout_id: str,
57
+ *,
58
+ experiment_id: Optional[str] = None,
59
+ run_id: Optional[str] = None,
60
+ rollout_ids: Optional[List[str]] = None,
61
+ ):
62
+ """
63
+ Async context manager to set correlation ContextVars for the current task.
64
+
65
+ Args:
66
+ rollout_id: Primary rollout identifier for correlation.
67
+ experiment_id: Optional experiment ID for tagging.
68
+ run_id: Optional run ID for tagging.
69
+ rollout_ids: Optional list of related rollout IDs (e.g., groupwise mode).
70
+ """
71
+ t_rollout = current_rollout_id.set(rollout_id)
72
+ t_rollouts = current_rollout_ids.set(rollout_ids) if rollout_ids is not None else None
73
+ t_experiment = current_experiment_id.set(experiment_id) if experiment_id is not None else None
74
+ t_run = current_run_id.set(run_id) if run_id is not None else None
75
+ try:
76
+ yield
77
+ finally:
78
+ current_rollout_id.reset(t_rollout)
79
+ if t_rollouts is not None:
80
+ current_rollout_ids.reset(t_rollouts)
81
+ if t_experiment is not None:
82
+ current_experiment_id.reset(t_experiment)
83
+ if t_run is not None:
84
+ current_run_id.reset(t_run)
@@ -3,7 +3,7 @@ Redis utilities for tracking chat completions via insertion IDs.
3
3
  """
4
4
 
5
5
  import logging
6
- from typing import Set
6
+ from typing import Set, cast
7
7
  import redis
8
8
 
9
9
  logger = logging.getLogger(__name__)
@@ -40,7 +40,16 @@ def get_insertion_ids(redis_client: redis.Redis, rollout_id: str) -> Set[str]:
40
40
  Set of insertion_id strings, empty set if none found or on error
41
41
  """
42
42
  try:
43
- insertion_ids = redis_client.smembers(rollout_id)
43
+ raw = redis_client.smembers(rollout_id)
44
+ # Typing in redis stubs may be Awaitable[Set[Any]] | Set[Any]; at runtime this is a Set[bytes]
45
+ raw_ids = cast(Set[object], raw)
46
+ # Normalize to set[str]
47
+ insertion_ids: Set[str] = set()
48
+ for b in raw_ids:
49
+ try:
50
+ insertion_ids.add(b.decode("utf-8") if isinstance(b, (bytes, bytearray)) else cast(str, b))
51
+ except Exception:
52
+ continue
44
53
  logger.debug(f"Found {len(insertion_ids)} expected insertion_ids for rollout {rollout_id}")
45
54
  return insertion_ids
46
55
  except Exception as e: