eval-protocol 0.2.44__tar.gz → 0.2.45.dev0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (424) hide show
  1. {eval_protocol-0.2.44/eval_protocol.egg-info → eval_protocol-0.2.45.dev0}/PKG-INFO +1 -1
  2. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/__init__.py +8 -0
  3. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/_version.py +3 -3
  4. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/adapters/__init__.py +7 -0
  5. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/adapters/fireworks_tracing.py +32 -51
  6. eval_protocol-0.2.45.dev0/eval_protocol/adapters/weave.py +130 -0
  7. eval_protocol-0.2.45.dev0/eval_protocol/log_utils/util.py +22 -0
  8. eval_protocol-0.2.45.dev0/eval_protocol/proxy/proxy_core/__init__.py +10 -0
  9. eval_protocol-0.2.45.dev0/eval_protocol/proxy/proxy_core/app.py +259 -0
  10. eval_protocol-0.2.45.dev0/eval_protocol/proxy/proxy_core/auth.py +12 -0
  11. eval_protocol-0.2.45.dev0/eval_protocol/proxy/proxy_core/langfuse.py +358 -0
  12. eval_protocol-0.2.45.dev0/eval_protocol/proxy/proxy_core/litellm.py +168 -0
  13. eval_protocol-0.2.45.dev0/eval_protocol/proxy/proxy_core/main.py +10 -0
  14. eval_protocol-0.2.45.dev0/eval_protocol/proxy/proxy_core/models.py +51 -0
  15. eval_protocol-0.2.45.dev0/eval_protocol/proxy/proxy_core/redis_utils.py +48 -0
  16. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/pytest/remote_rollout_processor.py +1 -1
  17. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0/eval_protocol.egg-info}/PKG-INFO +1 -1
  18. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol.egg-info/SOURCES.txt +10 -0
  19. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/LICENSE +0 -0
  20. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/README.md +0 -0
  21. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/development/__init__.py +0 -0
  22. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/development/normalize_sandbox_fusion.py +0 -0
  23. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/development/utils/__init__.py +0 -0
  24. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/development/utils/generate_api_key.py +0 -0
  25. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/development/utils/subprocess_manager.py +0 -0
  26. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/__main__.py +0 -0
  27. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/adapters/base.py +0 -0
  28. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/adapters/bigquery.py +0 -0
  29. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/adapters/braintrust.py +0 -0
  30. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/adapters/huggingface.py +0 -0
  31. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/adapters/langchain.py +0 -0
  32. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/adapters/langfuse.py +0 -0
  33. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/adapters/langsmith.py +0 -0
  34. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/adapters/openai_responses.py +0 -0
  35. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/adapters/trl.py +0 -0
  36. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/adapters/utils.py +0 -0
  37. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/agent/__init__.py +0 -0
  38. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/agent/models.py +0 -0
  39. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/agent/orchestrator.py +0 -0
  40. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/agent/resource_abc.py +0 -0
  41. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/agent/resource_pool.py +0 -0
  42. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/agent/resources/__init__.py +0 -0
  43. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/agent/resources/bfcl_envs/__init__.py +0 -0
  44. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/agent/resources/bfcl_envs/gorilla_file_system.py +0 -0
  45. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/agent/resources/bfcl_envs/math_api.py +0 -0
  46. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/agent/resources/bfcl_envs/posting_api.py +0 -0
  47. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/agent/resources/bfcl_sim_api_resource.py +0 -0
  48. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/agent/resources/docker_resource.py +0 -0
  49. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/agent/resources/filesystem_resource.py +0 -0
  50. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/agent/resources/python_state_resource.py +0 -0
  51. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/agent/resources/sql_resource.py +0 -0
  52. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/agent/task_manager.py +0 -0
  53. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/agent/tool_registry.py +0 -0
  54. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/auth.py +0 -0
  55. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/benchmarks/__init__.py +0 -0
  56. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/benchmarks/data/airline_dataset.jsonl +0 -0
  57. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/benchmarks/data/retail_dataset.jsonl +0 -0
  58. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/benchmarks/test_aime25.py +0 -0
  59. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/benchmarks/test_frozen_lake.py +0 -0
  60. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/benchmarks/test_gpqa.py +0 -0
  61. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/benchmarks/test_livebench_data_analysis.py +0 -0
  62. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/benchmarks/test_tau_bench_airline.py +0 -0
  63. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/benchmarks/test_tau_bench_retail.py +0 -0
  64. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/cli.py +0 -0
  65. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/cli_commands/__init__.py +0 -0
  66. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/cli_commands/agent_eval_cmd.py +0 -0
  67. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/cli_commands/common.py +0 -0
  68. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/cli_commands/deploy.py +0 -0
  69. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/cli_commands/deploy_mcp.py +0 -0
  70. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/cli_commands/logs.py +0 -0
  71. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/cli_commands/preview.py +0 -0
  72. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/cli_commands/run_eval_cmd.py +0 -0
  73. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/cli_commands/upload.py +0 -0
  74. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/common_utils.py +0 -0
  75. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/config.py +0 -0
  76. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/data_loader/__init__.py +0 -0
  77. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/data_loader/dynamic_data_loader.py +0 -0
  78. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/data_loader/factory_data_loader.py +0 -0
  79. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/data_loader/inline_data_loader.py +0 -0
  80. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/data_loader/models.py +0 -0
  81. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/dataset_logger/__init__.py +0 -0
  82. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/dataset_logger/dataset_logger.py +0 -0
  83. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/dataset_logger/local_fs_dataset_logger_adapter.py +0 -0
  84. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/dataset_logger/sqlite_dataset_logger_adapter.py +0 -0
  85. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/dataset_logger/sqlite_evaluation_row_store.py +0 -0
  86. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/datasets/__init__.py +0 -0
  87. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/datasets/loader.py +0 -0
  88. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/directory_utils.py +0 -0
  89. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/evaluation.py +0 -0
  90. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/event_bus/__init__.py +0 -0
  91. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/event_bus/event_bus.py +0 -0
  92. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/event_bus/logger.py +0 -0
  93. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/event_bus/sqlite_event_bus.py +0 -0
  94. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/event_bus/sqlite_event_bus_database.py +0 -0
  95. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/execution/__init__.py +0 -0
  96. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/execution/pipeline.py +0 -0
  97. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/gcp_tools.py +0 -0
  98. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/generation/cache.py +0 -0
  99. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/generation/clients/base.py +0 -0
  100. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/generation/clients.py +0 -0
  101. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/generic_server.py +0 -0
  102. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/get_pep440_version.py +0 -0
  103. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/human_id/__init__.py +0 -0
  104. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/human_id/dictionary.py +0 -0
  105. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/integrations/__init__.py +0 -0
  106. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/integrations/deepeval.py +0 -0
  107. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/integrations/openeval.py +0 -0
  108. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/integrations/trl.py +0 -0
  109. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/log_utils/__init__.py +0 -0
  110. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/log_utils/elasticsearch_client.py +0 -0
  111. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/log_utils/elasticsearch_direct_http_handler.py +0 -0
  112. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/log_utils/elasticsearch_index_manager.py +0 -0
  113. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/log_utils/rollout_id_filter.py +0 -0
  114. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/logging_utils.py +0 -0
  115. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp/__init__.py +0 -0
  116. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp/adapter.py +0 -0
  117. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp/client/__init__.py +0 -0
  118. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp/client/connection.py +0 -0
  119. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp/clients.py +0 -0
  120. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp/execution/__init__.py +0 -0
  121. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp/execution/base_policy.py +0 -0
  122. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp/execution/manager.py +0 -0
  123. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp/execution/policy.py +0 -0
  124. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp/grid_renderer.py +0 -0
  125. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp/mcp_multi_client.py +0 -0
  126. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp/mcpgym.py +0 -0
  127. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp/process_manager.py +0 -0
  128. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp/session/__init__.py +0 -0
  129. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp/session/manager.py +0 -0
  130. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp/simple_process_manager.py +0 -0
  131. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp/simulation_server.py +0 -0
  132. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp_agent/__init__.py +0 -0
  133. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp_agent/config.py +0 -0
  134. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp_agent/main.py +0 -0
  135. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp_agent/orchestration/__init__.py +0 -0
  136. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp_agent/orchestration/base_client.py +0 -0
  137. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp_agent/orchestration/local_docker_client.py +0 -0
  138. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp_agent/orchestration/stdio_mcp_client_helper.py +0 -0
  139. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp_env.py +0 -0
  140. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp_servers/__init__.py +0 -0
  141. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp_servers/frozen_lake/frozen_lake_adapter.py +0 -0
  142. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp_servers/frozen_lake/frozen_lake_mcp.py +0 -0
  143. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp_servers/frozen_lake/server.py +0 -0
  144. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp_servers/tau2/README.md +0 -0
  145. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp_servers/tau2/__init__.py +0 -0
  146. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp_servers/tau2/airplane_environment/airline_environment.py +0 -0
  147. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp_servers/tau2/mock_environment/mock_environment.py +0 -0
  148. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp_servers/tau2/retail_environment/retail_environment.py +0 -0
  149. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp_servers/tau2/server.py +0 -0
  150. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp_servers/tau2/tau2_mcp.py +0 -0
  151. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp_servers/tau2/tests/system_prompts/airline_agent_system_prompt.md +0 -0
  152. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp_servers/tau2/tests/system_prompts/mock_agent_system_prompt.md +0 -0
  153. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp_servers/tau2/tests/system_prompts/retail_agent_system_prompt.md +0 -0
  154. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp_servers/tau2/tests/test_tau2_e2e.py +0 -0
  155. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/models.py +0 -0
  156. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/packaging.py +0 -0
  157. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/platform_api.py +0 -0
  158. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/playback_policy.py +0 -0
  159. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/pytest/__init__.py +0 -0
  160. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/pytest/default_agent_rollout_processor.py +0 -0
  161. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/pytest/default_dataset_adapter.py +0 -0
  162. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/pytest/default_langchain_rollout_processor.py +0 -0
  163. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/pytest/default_mcp_gym_rollout_processor.py +0 -0
  164. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/pytest/default_no_op_rollout_processor.py +0 -0
  165. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/pytest/default_pydantic_ai_rollout_processor.py +0 -0
  166. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/pytest/default_single_turn_rollout_process.py +0 -0
  167. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/pytest/dual_mode_wrapper.py +0 -0
  168. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/pytest/elasticsearch_setup.py +0 -0
  169. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/pytest/evaluation_test.py +0 -0
  170. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/pytest/evaluation_test_postprocess.py +0 -0
  171. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/pytest/exception_config.py +0 -0
  172. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/pytest/execution.py +0 -0
  173. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/pytest/generate_parameter_combinations.py +0 -0
  174. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/pytest/handle_persist_flow.py +0 -0
  175. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/pytest/parameterize.py +0 -0
  176. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/pytest/plugin.py +0 -0
  177. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/pytest/rollout_processor.py +0 -0
  178. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/pytest/store_experiment_link.py +0 -0
  179. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/pytest/store_results_url.py +0 -0
  180. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/pytest/types.py +0 -0
  181. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/pytest/utils.py +0 -0
  182. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/pytest/validate_signature.py +0 -0
  183. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/quickstart/__init__.py +0 -0
  184. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/quickstart/llm_judge.py +0 -0
  185. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/quickstart/llm_judge_braintrust.py +0 -0
  186. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/quickstart/llm_judge_langfuse.py +0 -0
  187. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/quickstart/llm_judge_langsmith.py +0 -0
  188. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/quickstart/llm_judge_openai_responses.py +0 -0
  189. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/quickstart/utils.py +0 -0
  190. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/resources.py +0 -0
  191. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/reward_function.py +0 -0
  192. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/rewards/__init__.py +0 -0
  193. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/rewards/accuracy.py +0 -0
  194. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/rewards/accuracy_length.py +0 -0
  195. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/rewards/apps_coding_reward.py +0 -0
  196. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/rewards/apps_execution_utils.py +0 -0
  197. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/rewards/apps_testing_util.py +0 -0
  198. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/rewards/bfcl_reward.py +0 -0
  199. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/rewards/code_execution.py +0 -0
  200. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/rewards/code_execution_utils.py +0 -0
  201. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/rewards/cpp_code.py +0 -0
  202. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/rewards/deepcoder_reward.py +0 -0
  203. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/rewards/format.py +0 -0
  204. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/rewards/function_calling.py +0 -0
  205. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/rewards/json_schema.py +0 -0
  206. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/rewards/language_consistency.py +0 -0
  207. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/rewards/lean_prover.py +0 -0
  208. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/rewards/length.py +0 -0
  209. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/rewards/list_comparison_math_reward.py +0 -0
  210. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/rewards/math.py +0 -0
  211. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/rewards/multiple_choice_math_reward.py +0 -0
  212. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/rewards/reasoning_steps.py +0 -0
  213. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/rewards/repetition.py +0 -0
  214. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/rewards/tag_count.py +0 -0
  215. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/rl_processing.py +0 -0
  216. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/server.py +0 -0
  217. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/stats/__init__.py +0 -0
  218. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/stats/confidence_intervals.py +0 -0
  219. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/typed_interface.py +0 -0
  220. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/types/__init__.py +0 -0
  221. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/types/errors.py +0 -0
  222. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/types/remote_rollout_processor.py +0 -0
  223. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/types/types.py +0 -0
  224. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/utils/__init__.py +0 -0
  225. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/utils/batch_evaluation.py +0 -0
  226. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/utils/batch_transformation.py +0 -0
  227. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/utils/check_server_status.py +0 -0
  228. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/utils/dataset_helpers.py +0 -0
  229. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/utils/logs_models.py +0 -0
  230. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/utils/logs_server.py +0 -0
  231. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/utils/module_loader.py +0 -0
  232. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/utils/packaging_utils.py +0 -0
  233. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/utils/show_results_url.py +0 -0
  234. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/utils/static_policy.py +0 -0
  235. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/utils/subprocess_utils.py +0 -0
  236. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol/utils/vite_server.py +0 -0
  237. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol.egg-info/dependency_links.txt +0 -0
  238. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol.egg-info/entry_points.txt +0 -0
  239. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol.egg-info/requires.txt +0 -0
  240. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/eval_protocol.egg-info/top_level.txt +0 -0
  241. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/pyproject.toml +0 -0
  242. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/setup.cfg +0 -0
  243. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/setup.py +0 -0
  244. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_accuracy.py +0 -0
  245. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_accuracy_length.py +0 -0
  246. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_adapters_e2e.py +0 -0
  247. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_agent_orchestrator.py +0 -0
  248. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_agent_resources.py +0 -0
  249. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_auth.py +0 -0
  250. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_batch_evaluation.py +0 -0
  251. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_cli.py +0 -0
  252. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_cli_agent.py +0 -0
  253. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_cli_args.py +0 -0
  254. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_code_execution.py +0 -0
  255. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_config.py +0 -0
  256. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_control_plane_separation.py +0 -0
  257. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_cpp_code.py +0 -0
  258. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_data_driven_task_manager.py +0 -0
  259. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_deepcoder_reward.py +0 -0
  260. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_deepeval_integration.py +0 -0
  261. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_deploy_integration.py +0 -0
  262. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_directory_utils.py +0 -0
  263. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_e2b_integration.py +0 -0
  264. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_e2b_js_integration.py +0 -0
  265. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_edge_cases.py +0 -0
  266. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_eval_protocol_import.py +0 -0
  267. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_evaluation.py +0 -0
  268. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_evaluation_integration.py +0 -0
  269. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_evaluation_postprocess.py +0 -0
  270. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_evaluation_preview_integration.py +0 -0
  271. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_event_bus.py +0 -0
  272. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_event_bus_helper.py +0 -0
  273. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_examples_end_to_end.py +0 -0
  274. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_fireworks_api.py +0 -0
  275. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_format.py +0 -0
  276. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_fractional_code.py +0 -0
  277. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_function_calling.py +0 -0
  278. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_gcp_tools.py +0 -0
  279. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_generic_server.py +0 -0
  280. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_human_id.py +0 -0
  281. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_integration.py +0 -0
  282. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_json_schema.py +0 -0
  283. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_kwargs_validation.py +0 -0
  284. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_language_consistency.py +0 -0
  285. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_lean_prover.py +0 -0
  286. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_lean_prover_runner.py +0 -0
  287. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_length.py +0 -0
  288. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_list_comparison_math_reward.py +0 -0
  289. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_logs_server.py +0 -0
  290. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_logs_server_simple.py +0 -0
  291. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_math.py +0 -0
  292. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_minimal.py +0 -0
  293. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_models.py +0 -0
  294. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_models_rl.py +0 -0
  295. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_multiple_choice_math_reward.py +0 -0
  296. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_n_variant_batch_integration.py +0 -0
  297. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_n_variant_integration.py +0 -0
  298. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_openai_compatibility.py +0 -0
  299. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_openeval_integration.py +0 -0
  300. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_packaging.py +0 -0
  301. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_parallel_rollouts.py +0 -0
  302. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_platform_api.py +0 -0
  303. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_quickstart_utils.py +0 -0
  304. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_readiness.py +0 -0
  305. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_reasoning_steps.py +0 -0
  306. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_repetition.py +0 -0
  307. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_repetition_debug.py +0 -0
  308. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_retry_mechanism.py +0 -0
  309. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_reward_function.py +0 -0
  310. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_reward_protocol_import.py +0 -0
  311. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_rl_processing.py +0 -0
  312. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_rollout_control_plane_integration.py +0 -0
  313. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_server.py +0 -0
  314. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_show_results_url.py +0 -0
  315. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_status_migration_changes.py +0 -0
  316. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_status_migration_integration.py +0 -0
  317. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_status_model.py +0 -0
  318. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_tag_count.py +0 -0
  319. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_tau_bench_airline_smoke.py +0 -0
  320. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_typed_interface.py +0 -0
  321. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_typed_interface_rl.py +0 -0
  322. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_upload_entrypoint.py +0 -0
  323. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_url_handling.py +0 -0
  324. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/tests/test_vite_server.py +0 -0
  325. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/__init__.py +0 -0
  326. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/agent/__init__.py +0 -0
  327. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/agent/base.py +0 -0
  328. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/agent/llm_agent.py +0 -0
  329. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/api_service/__init__.py +0 -0
  330. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/api_service/api_config.py +0 -0
  331. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/api_service/data_model.py +0 -0
  332. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/api_service/simulation_service.py +0 -0
  333. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/cli.py +0 -0
  334. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/config.py +0 -0
  335. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/data/domains/airline/policy.md +0 -0
  336. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/data/domains/mock/policy.md +0 -0
  337. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/data/domains/mock/policy_solo.md +0 -0
  338. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/data/domains/retail/policy.md +0 -0
  339. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/data/domains/telecom/main_policy.md +0 -0
  340. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/data/domains/telecom/main_policy_solo.md +0 -0
  341. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/data/domains/telecom/tech_support_manual.md +0 -0
  342. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/data/domains/telecom/tech_support_workflow.md +0 -0
  343. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/data/domains/telecom/tech_support_workflow_solo.md +0 -0
  344. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/data/user_simulator/simulation_guidelines.md +0 -0
  345. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/data/user_simulator/simulation_guidelines_tools.md +0 -0
  346. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/data_model/__init__.py +0 -0
  347. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/data_model/message.py +0 -0
  348. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/data_model/simulation.py +0 -0
  349. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/data_model/tasks.py +0 -0
  350. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/domains/__init__.py +0 -0
  351. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/domains/airline/__init__.py +0 -0
  352. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/domains/airline/data_model.py +0 -0
  353. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/domains/airline/environment.py +0 -0
  354. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/domains/airline/tools.py +0 -0
  355. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/domains/airline/utils.py +0 -0
  356. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/domains/mock/__init__.py +0 -0
  357. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/domains/mock/data_model.py +0 -0
  358. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/domains/mock/environment.py +0 -0
  359. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/domains/mock/tools.py +0 -0
  360. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/domains/mock/utils.py +0 -0
  361. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/domains/retail/__init__.py +0 -0
  362. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/domains/retail/data_model.py +0 -0
  363. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/domains/retail/environment.py +0 -0
  364. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/domains/retail/tools.py +0 -0
  365. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/domains/retail/utils.py +0 -0
  366. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/domains/telecom/__init__.py +0 -0
  367. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/domains/telecom/data_model.py +0 -0
  368. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/domains/telecom/environment.py +0 -0
  369. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/domains/telecom/tasks/__init__.py +0 -0
  370. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/domains/telecom/tasks/const.py +0 -0
  371. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/domains/telecom/tasks/create_tasks.py +0 -0
  372. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/domains/telecom/tasks/manager.py +0 -0
  373. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/domains/telecom/tasks/mms_issues.py +0 -0
  374. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/domains/telecom/tasks/mobile_data_issues.py +0 -0
  375. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/domains/telecom/tasks/service_issues.py +0 -0
  376. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/domains/telecom/tasks/utils.py +0 -0
  377. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/domains/telecom/tools.py +0 -0
  378. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/domains/telecom/user_data_model.py +0 -0
  379. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/domains/telecom/user_tools.py +0 -0
  380. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/domains/telecom/utils.py +0 -0
  381. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/environment/__init__.py +0 -0
  382. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/environment/db.py +0 -0
  383. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/environment/environment.py +0 -0
  384. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/environment/server.py +0 -0
  385. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/environment/tool.py +0 -0
  386. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/environment/toolkit.py +0 -0
  387. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/environment/utils/interface_agent.py +0 -0
  388. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/evaluator/__init__.py +0 -0
  389. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/evaluator/evaluator.py +0 -0
  390. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/evaluator/evaluator_action.py +0 -0
  391. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/evaluator/evaluator_base.py +0 -0
  392. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/evaluator/evaluator_communicate.py +0 -0
  393. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/evaluator/evaluator_env.py +0 -0
  394. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/evaluator/evaluator_nl_assertions.py +0 -0
  395. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/metrics/__init__.py +0 -0
  396. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/metrics/agent_metrics.py +0 -0
  397. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/metrics/break_down_metrics.py +0 -0
  398. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/orchestrator/__init__.py +0 -0
  399. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/orchestrator/environment_manager.py +0 -0
  400. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/orchestrator/orchestrator.py +0 -0
  401. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/orchestrator/utils.py +0 -0
  402. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/registry.py +0 -0
  403. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/run.py +0 -0
  404. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/scripts/__init__.py +0 -0
  405. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/scripts/check_data.py +0 -0
  406. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/scripts/show_domain_doc.py +0 -0
  407. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/scripts/start_servers.py +0 -0
  408. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/scripts/view_simulations.py +0 -0
  409. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/user/__init__.py +0 -0
  410. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/user/base.py +0 -0
  411. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/user/user_simulator.py +0 -0
  412. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/utils/__init__.py +0 -0
  413. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/utils/display.py +0 -0
  414. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/utils/io_utils.py +0 -0
  415. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/utils/llm_utils.py +0 -0
  416. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/utils/pydantic_utils.py +0 -0
  417. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vendor/tau2/utils/utils.py +0 -0
  418. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/versioneer.py +0 -0
  419. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vite-app/dist/assets/favicon-BkAAWQga.png +0 -0
  420. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vite-app/dist/assets/index-C81y9r9l.js +0 -0
  421. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vite-app/dist/assets/index-C81y9r9l.js.map +0 -0
  422. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vite-app/dist/assets/index-DpYZaoAr.css +0 -0
  423. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vite-app/dist/assets/logo-light-BprIBJQW.png +0 -0
  424. {eval_protocol-0.2.44 → eval_protocol-0.2.45.dev0}/vite-app/dist/index.html +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: eval-protocol
3
- Version: 0.2.44
3
+ Version: 0.2.45.dev0
4
4
  Summary: The official Python SDK for Eval Protocol (EP.) EP is an open protocol that standardizes how developers author evals for large language model (LLM) applications.
5
5
  Author-email: Fireworks AI <info@fireworks.ai>
6
6
  License-Expression: MIT
@@ -34,6 +34,7 @@ from .pytest import evaluation_test, SingleTurnRolloutProcessor, RemoteRolloutPr
34
34
  from .pytest.parameterize import DefaultParameterIdGenerator
35
35
  from .log_utils.elasticsearch_direct_http_handler import ElasticsearchDirectHttpHandler
36
36
  from .log_utils.rollout_id_filter import RolloutIdFilter
37
+ from .log_utils.util import setup_rollout_logging_for_elasticsearch_handler
37
38
 
38
39
  from .types.remote_rollout_processor import (
39
40
  InitRequest,
@@ -63,11 +64,18 @@ try:
63
64
  except ImportError:
64
65
  LangSmithAdapter = None
65
66
 
67
+
68
+ try:
69
+ from .adapters import WeaveAdapter
70
+ except ImportError:
71
+ WeaveAdapter = None
72
+
66
73
  warnings.filterwarnings("default", category=DeprecationWarning, module="eval_protocol")
67
74
 
68
75
  __all__ = [
69
76
  "ElasticsearchDirectHttpHandler",
70
77
  "RolloutIdFilter",
78
+ "setup_rollout_logging_for_elasticsearch_handler",
71
79
  "DataLoaderConfig",
72
80
  "Status",
73
81
  "RemoteRolloutProcessor",
@@ -8,11 +8,11 @@ import json
8
8
 
9
9
  version_json = '''
10
10
  {
11
- "date": "2025-10-08T11:55:20-0700",
11
+ "date": "2025-10-09T01:23:30-0700",
12
12
  "dirty": false,
13
13
  "error": null,
14
- "full-revisionid": "e5883aeb569de1af057de3eae81aaf7790f468f1",
15
- "version": "0.2.44"
14
+ "full-revisionid": "c2ec0c8bb3f927b3c7f77c8a0e4fb955c7685ea6",
15
+ "version": "0.2.45-dev"
16
16
  }
17
17
  ''' # END VERSION_JSON
18
18
 
@@ -92,3 +92,10 @@ try:
92
92
  __all__.extend(["LangSmithAdapter"])
93
93
  except ImportError:
94
94
  pass
95
+
96
+ try:
97
+ from .weave import WeaveAdapter
98
+
99
+ __all__.extend(["WeaveAdapter"])
100
+ except ImportError:
101
+ pass
@@ -7,9 +7,9 @@ to pull data from Langfuse deployments with simplified retry logic handling.
7
7
  from __future__ import annotations
8
8
  import logging
9
9
  import requests
10
- import time
11
10
  from datetime import datetime
12
11
  from typing import Any, Dict, List, Optional, Protocol
12
+ import os
13
13
 
14
14
  from eval_protocol.models import EvaluationRow, InputMetadata, ExecutionMetadata, Message
15
15
  from .base import BaseAdapter
@@ -281,9 +281,8 @@ class FireworksTracingAdapter(BaseAdapter):
281
281
  from_timestamp: Optional[datetime] = None,
282
282
  to_timestamp: Optional[datetime] = None,
283
283
  include_tool_calls: bool = True,
284
- backend_sleep_between_gets: float = 0.1,
285
- backend_max_retries: int = 3,
286
- proxy_max_retries: int = 3,
284
+ sleep_between_gets: float = 0.1,
285
+ max_retries: int = 3,
287
286
  span_name: Optional[str] = None,
288
287
  converter: Optional[TraceDictConverter] = None,
289
288
  ) -> List[EvaluationRow]:
@@ -305,10 +304,8 @@ class FireworksTracingAdapter(BaseAdapter):
305
304
  from_timestamp: Explicit start time (ISO format)
306
305
  to_timestamp: Explicit end time (ISO format)
307
306
  include_tool_calls: Whether to include tool calling traces
308
- backend_sleep_between_gets: Sleep time between backend trace fetches (passed to proxy)
309
- backend_max_retries: Maximum retries for backend operations (passed to proxy)
310
- proxy_max_retries: Maximum retries when proxy returns 404 (client-side retries with exponential backoff)
311
- span_name: If provided, extract messages from generations within this named span
307
+ sleep_between_gets: Sleep time between polling attempts (default: 2.5s)
308
+ max_retries: Max retry attempts used by proxy (default: 3)
312
309
  converter: Optional custom converter implementing TraceDictConverter protocol.
313
310
  If provided, this will be used instead of the default conversion logic.
314
311
 
@@ -318,9 +315,9 @@ class FireworksTracingAdapter(BaseAdapter):
318
315
  Raises:
319
316
  ValueError: If tags list is empty
320
317
  """
321
- # Validate that tags are provided (security requirement)
318
+ # Validate that tags are provided
322
319
  if not tags or len(tags) == 0:
323
- raise ValueError("At least one tag is required to fetch traces (security: prevents fetching all traces)")
320
+ raise ValueError("At least one tag is required to fetch traces")
324
321
 
325
322
  eval_rows = []
326
323
 
@@ -339,58 +336,42 @@ class FireworksTracingAdapter(BaseAdapter):
339
336
  "hours_back": hours_back,
340
337
  "from_timestamp": from_timestamp.isoformat() if from_timestamp else None,
341
338
  "to_timestamp": to_timestamp.isoformat() if to_timestamp else None,
342
- "sleep_between_gets": backend_sleep_between_gets,
343
- "max_retries": backend_max_retries,
339
+ "sleep_between_gets": sleep_between_gets,
340
+ "max_retries": max_retries,
344
341
  }
345
342
 
346
343
  # Remove None values
347
344
  params = {k: v for k, v in params.items() if v is not None}
348
345
 
349
- # Make request to proxy with retry logic
346
+ # Make request to proxy
350
347
  if self.project_id:
351
348
  url = f"{self.base_url}/v1/project_id/{self.project_id}/traces"
352
349
  else:
353
350
  url = f"{self.base_url}/v1/traces"
354
351
 
355
- # Retry loop for handling backend indexing delays (proxy returns 404)
352
+ headers = {"Authorization": f"Bearer {os.environ.get('FIREWORKS_API_KEY')}"}
353
+
356
354
  result = None
357
- for attempt in range(proxy_max_retries):
358
- try:
359
- response = requests.get(url, params=params, timeout=self.timeout)
360
- response.raise_for_status()
361
- result = response.json()
362
- break # Success, exit retry loop
363
- except requests.exceptions.HTTPError as e:
364
- error_msg = str(e)
365
- should_retry = False
366
-
367
- # Try to extract detail message from response
368
- if e.response is not None:
369
- try:
370
- error_detail = e.response.json().get("detail", "")
371
- error_msg = error_detail or e.response.text
372
-
373
- # Retry on 404 if it's due to incomplete/missing traces (backend still indexing)
374
- if e.response.status_code == 404:
375
- should_retry = True
376
- except Exception:
377
- error_msg = e.response.text
378
-
379
- if should_retry and attempt < proxy_max_retries - 1:
380
- sleep_time = 2 ** (attempt + 1)
381
- logger.warning(error_msg)
382
- time.sleep(sleep_time)
383
- else:
384
- # Final retry or non-retryable error
385
- logger.error("Failed to fetch traces from proxy: %s", error_msg)
386
- return eval_rows
387
- except requests.exceptions.RequestException as e:
388
- # Non-HTTP errors (network issues, timeouts, etc.)
389
- logger.error("Failed to fetch traces from proxy: %s", str(e))
390
- return eval_rows
391
-
392
- if result is None:
393
- logger.error("Failed to fetch traces after %d retries", proxy_max_retries)
355
+ try:
356
+ response = requests.get(url, params=params, timeout=self.timeout, headers=headers)
357
+ response.raise_for_status()
358
+ result = response.json()
359
+ except requests.exceptions.HTTPError as e:
360
+ error_msg = str(e)
361
+
362
+ # Try to extract detail message from response
363
+ if e.response is not None:
364
+ try:
365
+ error_detail = e.response.json().get("detail", {})
366
+ error_msg = error_detail or e.response.text
367
+ except Exception: # In case e.response.json() fails
368
+ error_msg = f"Proxy error: {e.response.text}"
369
+
370
+ logger.error("Failed to fetch traces from proxy: %s", error_msg)
371
+ return eval_rows
372
+ except requests.exceptions.RequestException as e:
373
+ # Non-HTTP errors (network issues, timeouts, etc.)
374
+ logger.error("Failed to fetch traces from proxy: %s", str(e))
394
375
  return eval_rows
395
376
 
396
377
  # Extract traces from response
@@ -0,0 +1,130 @@
1
+ """Weave (Weights & Biases) adapter for Eval Protocol.
2
+
3
+ This adapter fetches recent root traces from Weave Trace API and converts them
4
+ to `EvaluationRow` format for use in evaluation pipelines. It is intentionally
5
+ minimal and depends only on requests.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from typing import Any, Dict, List, Optional
11
+ import os
12
+ import requests
13
+
14
+ from eval_protocol.models import EvaluationRow, InputMetadata, Message, ExecutionMetadata
15
+ from .base import BaseAdapter
16
+
17
+
18
+ def _extract_messages_from_trace(trace: Dict[str, Any], include_tool_calls: bool = True) -> List[Message]:
19
+ messages: List[Message] = []
20
+
21
+ # Prefer explicit output messages if provided
22
+ output = trace.get("output") or {}
23
+ out_msgs = output.get("messages")
24
+ if isinstance(out_msgs, list):
25
+ for m in out_msgs:
26
+ messages.append(
27
+ Message(
28
+ role=m.get("role"),
29
+ content=m.get("content"),
30
+ tool_calls=m.get("tool_calls") if include_tool_calls else None,
31
+ tool_call_id=m.get("tool_call_id"),
32
+ name=m.get("name"),
33
+ )
34
+ )
35
+
36
+ # If no explicit output messages, fall back to final bubble from choices
37
+ if not messages:
38
+ choices = output.get("choices")
39
+ if isinstance(choices, list) and choices:
40
+ msg = (choices[0] or {}).get("message", {})
41
+ if msg:
42
+ messages.append(Message(role=msg.get("role"), content=msg.get("content")))
43
+
44
+ # Prepend input messages if present and not already contained
45
+ inputs = trace.get("inputs") or {}
46
+ in_msgs = inputs.get("messages")
47
+ if isinstance(in_msgs, list):
48
+ prefixed = [Message(role=m.get("role"), content=m.get("content")) for m in in_msgs]
49
+ messages = prefixed + messages
50
+
51
+ return messages
52
+
53
+
54
+ def _convert_trace_to_evaluation_row(
55
+ trace: Dict[str, Any], include_tool_calls: bool = True
56
+ ) -> Optional[EvaluationRow]:
57
+ messages = _extract_messages_from_trace(trace, include_tool_calls=include_tool_calls)
58
+ if not messages:
59
+ return None
60
+
61
+ # Provider-native IDs for UI joinability
62
+ session_data = {
63
+ "weave_trace_id": trace.get("id"),
64
+ "weave_project_id": trace.get("project_id"),
65
+ }
66
+
67
+ # Optional EP identifiers (if present in provider payload)
68
+ meta_in = (trace.get("inputs") or {}).get("metadata") or {}
69
+ meta_out = (trace.get("output") or {}).get("metadata") or {}
70
+ metadata = {**meta_in, **meta_out}
71
+
72
+ input_metadata = InputMetadata(row_id=metadata.get("row_id"), session_data=session_data)
73
+
74
+ # Preserve default factory behavior by only setting provided fields
75
+ exec_kwargs: Dict[str, Any] = {}
76
+ for k in ("invocation_id", "experiment_id", "rollout_id", "run_id"):
77
+ if metadata.get(k) is not None:
78
+ exec_kwargs[k] = metadata[k]
79
+ execution_metadata = ExecutionMetadata(**exec_kwargs)
80
+
81
+ # Capture tools if provider exposes them (prefer inputs)
82
+ tools = None
83
+ inputs = trace.get("inputs") or {}
84
+ if include_tool_calls and isinstance(inputs, dict) and "tools" in inputs:
85
+ tools = inputs.get("tools")
86
+
87
+ return EvaluationRow(
88
+ messages=messages, tools=tools, input_metadata=input_metadata, execution_metadata=execution_metadata
89
+ )
90
+
91
+
92
+ class WeaveAdapter(BaseAdapter):
93
+ """Adapter to pull data from Weave Trace API and convert to EvaluationRow format."""
94
+
95
+ def __init__(
96
+ self, base_url: Optional[str] = None, api_token: Optional[str] = None, project_id: Optional[str] = None
97
+ ):
98
+ self.base_url = base_url or os.getenv("WEAVE_TRACE_BASE_URL", "https://trace.wandb.ai")
99
+ self.api_token = api_token or os.getenv("WANDB_API_KEY")
100
+ # project_id is in form "<entity>/<project>"
101
+ self.project_id = project_id or (f"{os.getenv('WANDB_ENTITY')}/{os.getenv('WANDB_PROJECT')}")
102
+ if not self.api_token or not self.project_id or "/" not in self.project_id:
103
+ raise ValueError("Missing Weave credentials or project (WANDB_API_KEY and WANDB_ENTITY/WANDB_PROJECT)")
104
+
105
+ def _fetch_traces(self, limit: int = 100) -> List[Dict[str, Any]]:
106
+ url = f"{self.base_url}/calls/stream_query"
107
+ payload = {
108
+ "project_id": self.project_id,
109
+ "filter": {"trace_roots_only": True},
110
+ "limit": limit,
111
+ "offset": 0,
112
+ "sort_by": [{"field": "started_at", "direction": "desc"}],
113
+ "include_feedback": False,
114
+ }
115
+ headers = {"Authorization": f"Bearer {self.api_token}", "Content-Type": "application/json"}
116
+ resp = requests.post(url, json=payload, headers=headers, timeout=30)
117
+ resp.raise_for_status()
118
+ body = resp.json() or {}
119
+ return body.get("data", [])
120
+
121
+ def get_evaluation_rows(self, *args, **kwargs) -> List[EvaluationRow]:
122
+ limit = kwargs.get("limit", 100)
123
+ include_tool_calls = kwargs.get("include_tool_calls", True)
124
+ traces = self._fetch_traces(limit=limit)
125
+ rows: List[EvaluationRow] = []
126
+ for tr in traces:
127
+ row = _convert_trace_to_evaluation_row(tr, include_tool_calls=include_tool_calls)
128
+ if row:
129
+ rows.append(row)
130
+ return rows
@@ -0,0 +1,22 @@
1
+ import os
2
+ from eval_protocol.types.remote_rollout_processor import ElasticsearchConfig
3
+ from .elasticsearch_direct_http_handler import ElasticsearchDirectHttpHandler
4
+
5
+
6
+ def setup_rollout_logging_for_elasticsearch_handler(
7
+ handler: ElasticsearchDirectHttpHandler, rollout_id: str, elastic_search_config: ElasticsearchConfig
8
+ ) -> None:
9
+ """
10
+ Whenever a new subprocess is created, we need to setup the rollout context
11
+ for the subprocess. This is useful when implementing your own remote server
12
+ for rollout processing.
13
+
14
+ 1. Set the EP_ROLLOUT_ID environment variable
15
+ 2. Configure the Elasticsearch handler with the Elasticsearch config
16
+ """
17
+
18
+ # this should only affect this subprocess so logs from this subprocess can
19
+ # be correlated to the rollout
20
+ os.environ["EP_ROLLOUT_ID"] = rollout_id
21
+
22
+ handler.configure(elasticsearch_config=elastic_search_config)
@@ -0,0 +1,10 @@
1
+ from .models import ProxyConfig
2
+ from .app import create_app
3
+ from .auth import AuthProvider, NoAuthProvider
4
+
5
+ __all__ = [
6
+ "ProxyConfig",
7
+ "create_app",
8
+ "AuthProvider",
9
+ "NoAuthProvider",
10
+ ]
@@ -0,0 +1,259 @@
1
+ """
2
+ Metadata Extraction Gateway
3
+ A FastAPI service that sits in front of LiteLLM and extracts metadata from URL paths.
4
+ """
5
+
6
+ from fastapi import FastAPI, Depends, HTTPException, Request, Query
7
+ from typing import Optional, List
8
+ import os
9
+ import redis
10
+ import logging
11
+ import json
12
+ from pathlib import Path
13
+ import sys
14
+ from contextlib import asynccontextmanager
15
+
16
+ from .models import ProxyConfig, LangfuseTracesResponse
17
+ from .auth import AuthProvider, NoAuthProvider
18
+ from .litellm import handle_chat_completion, proxy_to_litellm
19
+ from .langfuse import fetch_langfuse_traces
20
+
21
+ # Configure logging before any other imports (so all modules inherit this config)
22
+ log_level = os.getenv("LOG_LEVEL", "INFO").upper()
23
+ logging.basicConfig(
24
+ level=getattr(logging, log_level),
25
+ format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
26
+ handlers=[logging.StreamHandler(sys.stdout)],
27
+ )
28
+
29
+ logger = logging.getLogger(__name__)
30
+
31
+
32
+ def build_proxy_config() -> ProxyConfig:
33
+ """Load environment and secrets, and build ProxyConfig (no Redis)."""
34
+ # Env
35
+ litellm_url = os.getenv("LITELLM_URL")
36
+ if not litellm_url:
37
+ raise ValueError("LITELLM_URL environment variable must be set")
38
+ request_timeout = float(os.getenv("REQUEST_TIMEOUT", "300.0"))
39
+
40
+ # Secrets - use SECRETS_PATH env var if set, otherwise default to proxy/secrets.json
41
+ secrets_path_str = os.getenv("SECRETS_PATH")
42
+ if secrets_path_str:
43
+ secrets_path = Path(secrets_path_str)
44
+ else:
45
+ secrets_path = Path(__file__).parent / "secrets.json"
46
+ if not secrets_path.exists():
47
+ raise ValueError(
48
+ "secrets.json not found! Please create it from secrets.json.example:\n"
49
+ " cp litellm_proxy_config/proxy/secrets.json.example litellm_proxy_config/proxy/secrets.json\n"
50
+ "Then add your Langfuse API keys to secrets.json"
51
+ )
52
+ try:
53
+ with open(secrets_path, "r") as f:
54
+ secrets_config = json.load(f)
55
+ langfuse_keys = secrets_config["langfuse_keys"]
56
+ default_project_id = secrets_config["default_project_id"]
57
+ logger.info(f"Loaded {len(langfuse_keys)} Langfuse project(s) from secrets.json")
58
+ except KeyError as e:
59
+ raise ValueError(f"Missing required key in secrets.json: {e}")
60
+ except json.JSONDecodeError as e:
61
+ raise ValueError(f"Invalid JSON in secrets.json: {e}")
62
+
63
+ return ProxyConfig(
64
+ litellm_url=litellm_url,
65
+ request_timeout=request_timeout,
66
+ langfuse_keys=langfuse_keys,
67
+ default_project_id=default_project_id,
68
+ )
69
+
70
+
71
+ def init_redis() -> redis.Redis:
72
+ """Initialize and return a Redis client from environment variables."""
73
+ redis_host = os.getenv("REDIS_HOST")
74
+ if not redis_host:
75
+ raise ValueError("REDIS_HOST environment variable must be set")
76
+ redis_port = int(os.getenv("REDIS_PORT", "6379"))
77
+ redis_password = os.getenv("REDIS_PASSWORD")
78
+
79
+ try:
80
+ client = redis.Redis(
81
+ host=redis_host,
82
+ port=redis_port,
83
+ password=redis_password if redis_password else None,
84
+ decode_responses=True,
85
+ socket_connect_timeout=5,
86
+ socket_timeout=5,
87
+ retry_on_timeout=True,
88
+ )
89
+ client.ping()
90
+ logger.info(f"Connected to Redis at {redis_host}:{redis_port}")
91
+ return client
92
+ except Exception as e:
93
+ raise ConnectionError(f"Failed to connect to Redis at {redis_host}:{redis_port}: {e}")
94
+
95
+
96
+ def create_app(
97
+ auth_provider: AuthProvider = NoAuthProvider(),
98
+ ) -> FastAPI:
99
+ @asynccontextmanager
100
+ async def lifespan(app: FastAPI):
101
+ # Build runtime on startup
102
+ app.state.config = build_proxy_config()
103
+ app.state.redis = init_redis()
104
+ try:
105
+ yield
106
+ finally:
107
+ try:
108
+ app.state.redis.close()
109
+ except Exception:
110
+ pass
111
+
112
+ app = FastAPI(title="LiteLLM Metadata Proxy", lifespan=lifespan)
113
+
114
+ def get_config(request: Request) -> ProxyConfig:
115
+ return request.app.state.config
116
+
117
+ def get_redis(request: Request) -> redis.Redis:
118
+ return request.app.state.redis
119
+
120
+ async def require_auth(request: Request) -> None:
121
+ auth_header = request.headers.get("authorization", "")
122
+ api_key = None
123
+ if auth_header.startswith("Bearer "):
124
+ api_key = auth_header.replace("Bearer ", "").strip()
125
+
126
+ auth_provider.validate(api_key)
127
+ return None
128
+
129
+ # =====================
130
+ # Chat completion routes
131
+ # =====================
132
+ @app.post(
133
+ "/project_id/{project_id}/rollout_id/{rollout_id}/invocation_id/{invocation_id}/experiment_id/{experiment_id}/run_id/{run_id}/row_id/{row_id}/chat/completions"
134
+ )
135
+ @app.post(
136
+ "/v1/project_id/{project_id}/rollout_id/{rollout_id}/invocation_id/{invocation_id}/experiment_id/{experiment_id}/run_id/{run_id}/row_id/{row_id}/chat/completions"
137
+ )
138
+ @app.post(
139
+ "/rollout_id/{rollout_id}/invocation_id/{invocation_id}/experiment_id/{experiment_id}/run_id/{run_id}/row_id/{row_id}/chat/completions"
140
+ )
141
+ @app.post(
142
+ "/v1/rollout_id/{rollout_id}/invocation_id/{invocation_id}/experiment_id/{experiment_id}/run_id/{run_id}/row_id/{row_id}/chat/completions"
143
+ )
144
+ @app.post(
145
+ "/project_id/{project_id}/rollout_id/{rollout_id}/invocation_id/{invocation_id}/experiment_id/{experiment_id}/run_id/{run_id}/row_id/{row_id}/encoded_base_url/{encoded_base_url}/chat/completions"
146
+ )
147
+ @app.post(
148
+ "/v1/project_id/{project_id}/rollout_id/{rollout_id}/invocation_id/{invocation_id}/experiment_id/{experiment_id}/run_id/{run_id}/row_id/{row_id}/encoded_base_url/{encoded_base_url}/chat/completions"
149
+ )
150
+ @app.post(
151
+ "/rollout_id/{rollout_id}/invocation_id/{invocation_id}/experiment_id/{experiment_id}/run_id/{run_id}/row_id/{row_id}/encoded_base_url/{encoded_base_url}/chat/completions"
152
+ )
153
+ @app.post(
154
+ "/v1/rollout_id/{rollout_id}/invocation_id/{invocation_id}/experiment_id/{experiment_id}/run_id/{run_id}/row_id/{row_id}/encoded_base_url/{encoded_base_url}/chat/completions"
155
+ )
156
+ async def chat_completion_with_full_metadata(
157
+ rollout_id: str,
158
+ invocation_id: str,
159
+ experiment_id: str,
160
+ run_id: str,
161
+ row_id: str,
162
+ request: Request,
163
+ project_id: Optional[str] = None,
164
+ encoded_base_url: Optional[str] = None,
165
+ config: ProxyConfig = Depends(get_config),
166
+ redis_client: redis.Redis = Depends(get_redis),
167
+ ):
168
+ return await handle_chat_completion(
169
+ config=config,
170
+ redis_client=redis_client,
171
+ request=request,
172
+ project_id=project_id,
173
+ rollout_id=rollout_id,
174
+ invocation_id=invocation_id,
175
+ experiment_id=experiment_id,
176
+ run_id=run_id,
177
+ row_id=row_id,
178
+ encoded_base_url=encoded_base_url,
179
+ )
180
+
181
+ @app.post("/project_id/{project_id}/chat/completions")
182
+ @app.post("/v1/project_id/{project_id}/chat/completions")
183
+ async def chat_completion_with_project_only(
184
+ project_id: str,
185
+ request: Request,
186
+ config: ProxyConfig = Depends(get_config),
187
+ redis_client: redis.Redis = Depends(get_redis),
188
+ ):
189
+ return await handle_chat_completion(
190
+ config=config,
191
+ redis_client=redis_client,
192
+ request=request,
193
+ project_id=project_id,
194
+ )
195
+
196
+ # ===============
197
+ # Traces routes
198
+ # ===============
199
+ @app.get("/traces", response_model=LangfuseTracesResponse)
200
+ @app.get("/v1/traces", response_model=LangfuseTracesResponse)
201
+ @app.get("/project_id/{project_id}/traces", response_model=LangfuseTracesResponse)
202
+ @app.get("/v1/project_id/{project_id}/traces", response_model=LangfuseTracesResponse)
203
+ async def get_langfuse_traces(
204
+ tags: List[str] = Query(...), # REQUIRED query param
205
+ project_id: Optional[str] = None,
206
+ limit: int = 100,
207
+ sample_size: Optional[int] = None,
208
+ user_id: Optional[str] = None,
209
+ session_id: Optional[str] = None,
210
+ name: Optional[str] = None,
211
+ environment: Optional[str] = None,
212
+ version: Optional[str] = None,
213
+ release: Optional[str] = None,
214
+ fields: Optional[str] = None,
215
+ hours_back: Optional[int] = None,
216
+ from_timestamp: Optional[str] = None,
217
+ to_timestamp: Optional[str] = None,
218
+ sleep_between_gets: float = 2.5,
219
+ max_retries: int = 3,
220
+ config: ProxyConfig = Depends(get_config),
221
+ redis_client: redis.Redis = Depends(get_redis),
222
+ _: None = Depends(require_auth),
223
+ ) -> LangfuseTracesResponse:
224
+ return await fetch_langfuse_traces(
225
+ config=config,
226
+ redis_client=redis_client,
227
+ tags=tags,
228
+ project_id=project_id,
229
+ limit=limit,
230
+ sample_size=sample_size,
231
+ user_id=user_id,
232
+ session_id=session_id,
233
+ name=name,
234
+ environment=environment,
235
+ version=version,
236
+ release=release,
237
+ fields=fields,
238
+ hours_back=hours_back,
239
+ from_timestamp=from_timestamp,
240
+ to_timestamp=to_timestamp,
241
+ sleep_between_gets=sleep_between_gets,
242
+ max_retries=max_retries,
243
+ )
244
+
245
+ # Health
246
+ @app.get("/health")
247
+ async def health():
248
+ return {"status": "healthy", "service": "metadata-proxy"}
249
+
250
+ # Catch-all
251
+ @app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "DELETE", "PATCH"])
252
+ async def catch_all_proxy(
253
+ path: str,
254
+ request: Request,
255
+ config: ProxyConfig = Depends(get_config),
256
+ ):
257
+ return await proxy_to_litellm(config, path, request)
258
+
259
+ return app
@@ -0,0 +1,12 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import Optional
3
+
4
+
5
+ class AuthProvider(ABC):
6
+ @abstractmethod
7
+ def validate(self, api_key: Optional[str]) -> Optional[str]: ...
8
+
9
+
10
+ class NoAuthProvider(AuthProvider):
11
+ def validate(self, api_key: Optional[str]) -> Optional[str]:
12
+ return None