eval-protocol 0.2.45__tar.gz → 0.2.45.dev0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (424) hide show
  1. {eval_protocol-0.2.45/eval_protocol.egg-info → eval_protocol-0.2.45.dev0}/PKG-INFO +1 -1
  2. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/__init__.py +6 -0
  3. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/_version.py +3 -3
  4. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/adapters/__init__.py +7 -0
  5. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/adapters/fireworks_tracing.py +4 -2
  6. eval_protocol-0.2.45.dev0/eval_protocol/adapters/weave.py +130 -0
  7. eval_protocol-0.2.45.dev0/eval_protocol/proxy/proxy_core/__init__.py +10 -0
  8. eval_protocol-0.2.45.dev0/eval_protocol/proxy/proxy_core/app.py +259 -0
  9. eval_protocol-0.2.45.dev0/eval_protocol/proxy/proxy_core/auth.py +12 -0
  10. eval_protocol-0.2.45.dev0/eval_protocol/proxy/proxy_core/langfuse.py +358 -0
  11. eval_protocol-0.2.45.dev0/eval_protocol/proxy/proxy_core/litellm.py +168 -0
  12. eval_protocol-0.2.45.dev0/eval_protocol/proxy/proxy_core/main.py +10 -0
  13. eval_protocol-0.2.45.dev0/eval_protocol/proxy/proxy_core/models.py +51 -0
  14. eval_protocol-0.2.45.dev0/eval_protocol/proxy/proxy_core/redis_utils.py +48 -0
  15. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0/eval_protocol.egg-info}/PKG-INFO +1 -1
  16. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol.egg-info/SOURCES.txt +9 -0
  17. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/LICENSE +0 -0
  18. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/README.md +0 -0
  19. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/development/__init__.py +0 -0
  20. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/development/normalize_sandbox_fusion.py +0 -0
  21. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/development/utils/__init__.py +0 -0
  22. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/development/utils/generate_api_key.py +0 -0
  23. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/development/utils/subprocess_manager.py +0 -0
  24. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/__main__.py +0 -0
  25. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/adapters/base.py +0 -0
  26. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/adapters/bigquery.py +0 -0
  27. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/adapters/braintrust.py +0 -0
  28. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/adapters/huggingface.py +0 -0
  29. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/adapters/langchain.py +0 -0
  30. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/adapters/langfuse.py +0 -0
  31. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/adapters/langsmith.py +0 -0
  32. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/adapters/openai_responses.py +0 -0
  33. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/adapters/trl.py +0 -0
  34. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/adapters/utils.py +0 -0
  35. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/agent/__init__.py +0 -0
  36. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/agent/models.py +0 -0
  37. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/agent/orchestrator.py +0 -0
  38. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/agent/resource_abc.py +0 -0
  39. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/agent/resource_pool.py +0 -0
  40. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/agent/resources/__init__.py +0 -0
  41. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/agent/resources/bfcl_envs/__init__.py +0 -0
  42. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/agent/resources/bfcl_envs/gorilla_file_system.py +0 -0
  43. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/agent/resources/bfcl_envs/math_api.py +0 -0
  44. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/agent/resources/bfcl_envs/posting_api.py +0 -0
  45. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/agent/resources/bfcl_sim_api_resource.py +0 -0
  46. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/agent/resources/docker_resource.py +0 -0
  47. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/agent/resources/filesystem_resource.py +0 -0
  48. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/agent/resources/python_state_resource.py +0 -0
  49. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/agent/resources/sql_resource.py +0 -0
  50. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/agent/task_manager.py +0 -0
  51. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/agent/tool_registry.py +0 -0
  52. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/auth.py +0 -0
  53. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/benchmarks/__init__.py +0 -0
  54. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/benchmarks/data/airline_dataset.jsonl +0 -0
  55. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/benchmarks/data/retail_dataset.jsonl +0 -0
  56. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/benchmarks/test_aime25.py +0 -0
  57. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/benchmarks/test_frozen_lake.py +0 -0
  58. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/benchmarks/test_gpqa.py +0 -0
  59. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/benchmarks/test_livebench_data_analysis.py +0 -0
  60. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/benchmarks/test_tau_bench_airline.py +0 -0
  61. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/benchmarks/test_tau_bench_retail.py +0 -0
  62. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/cli.py +0 -0
  63. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/cli_commands/__init__.py +0 -0
  64. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/cli_commands/agent_eval_cmd.py +0 -0
  65. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/cli_commands/common.py +0 -0
  66. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/cli_commands/deploy.py +0 -0
  67. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/cli_commands/deploy_mcp.py +0 -0
  68. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/cli_commands/logs.py +0 -0
  69. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/cli_commands/preview.py +0 -0
  70. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/cli_commands/run_eval_cmd.py +0 -0
  71. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/cli_commands/upload.py +0 -0
  72. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/common_utils.py +0 -0
  73. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/config.py +0 -0
  74. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/data_loader/__init__.py +0 -0
  75. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/data_loader/dynamic_data_loader.py +0 -0
  76. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/data_loader/factory_data_loader.py +0 -0
  77. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/data_loader/inline_data_loader.py +0 -0
  78. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/data_loader/models.py +0 -0
  79. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/dataset_logger/__init__.py +0 -0
  80. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/dataset_logger/dataset_logger.py +0 -0
  81. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/dataset_logger/local_fs_dataset_logger_adapter.py +0 -0
  82. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/dataset_logger/sqlite_dataset_logger_adapter.py +0 -0
  83. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/dataset_logger/sqlite_evaluation_row_store.py +0 -0
  84. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/datasets/__init__.py +0 -0
  85. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/datasets/loader.py +0 -0
  86. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/directory_utils.py +0 -0
  87. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/evaluation.py +0 -0
  88. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/event_bus/__init__.py +0 -0
  89. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/event_bus/event_bus.py +0 -0
  90. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/event_bus/logger.py +0 -0
  91. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/event_bus/sqlite_event_bus.py +0 -0
  92. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/event_bus/sqlite_event_bus_database.py +0 -0
  93. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/execution/__init__.py +0 -0
  94. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/execution/pipeline.py +0 -0
  95. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/gcp_tools.py +0 -0
  96. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/generation/cache.py +0 -0
  97. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/generation/clients/base.py +0 -0
  98. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/generation/clients.py +0 -0
  99. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/generic_server.py +0 -0
  100. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/get_pep440_version.py +0 -0
  101. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/human_id/__init__.py +0 -0
  102. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/human_id/dictionary.py +0 -0
  103. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/integrations/__init__.py +0 -0
  104. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/integrations/deepeval.py +0 -0
  105. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/integrations/openeval.py +0 -0
  106. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/integrations/trl.py +0 -0
  107. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/log_utils/__init__.py +0 -0
  108. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/log_utils/elasticsearch_client.py +0 -0
  109. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/log_utils/elasticsearch_direct_http_handler.py +0 -0
  110. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/log_utils/elasticsearch_index_manager.py +0 -0
  111. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/log_utils/rollout_id_filter.py +0 -0
  112. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/log_utils/util.py +0 -0
  113. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/logging_utils.py +0 -0
  114. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp/__init__.py +0 -0
  115. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp/adapter.py +0 -0
  116. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp/client/__init__.py +0 -0
  117. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp/client/connection.py +0 -0
  118. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp/clients.py +0 -0
  119. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp/execution/__init__.py +0 -0
  120. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp/execution/base_policy.py +0 -0
  121. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp/execution/manager.py +0 -0
  122. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp/execution/policy.py +0 -0
  123. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp/grid_renderer.py +0 -0
  124. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp/mcp_multi_client.py +0 -0
  125. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp/mcpgym.py +0 -0
  126. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp/process_manager.py +0 -0
  127. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp/session/__init__.py +0 -0
  128. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp/session/manager.py +0 -0
  129. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp/simple_process_manager.py +0 -0
  130. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp/simulation_server.py +0 -0
  131. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp_agent/__init__.py +0 -0
  132. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp_agent/config.py +0 -0
  133. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp_agent/main.py +0 -0
  134. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp_agent/orchestration/__init__.py +0 -0
  135. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp_agent/orchestration/base_client.py +0 -0
  136. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp_agent/orchestration/local_docker_client.py +0 -0
  137. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp_agent/orchestration/stdio_mcp_client_helper.py +0 -0
  138. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp_env.py +0 -0
  139. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp_servers/__init__.py +0 -0
  140. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp_servers/frozen_lake/frozen_lake_adapter.py +0 -0
  141. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp_servers/frozen_lake/frozen_lake_mcp.py +0 -0
  142. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp_servers/frozen_lake/server.py +0 -0
  143. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp_servers/tau2/README.md +0 -0
  144. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp_servers/tau2/__init__.py +0 -0
  145. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp_servers/tau2/airplane_environment/airline_environment.py +0 -0
  146. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp_servers/tau2/mock_environment/mock_environment.py +0 -0
  147. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp_servers/tau2/retail_environment/retail_environment.py +0 -0
  148. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp_servers/tau2/server.py +0 -0
  149. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp_servers/tau2/tau2_mcp.py +0 -0
  150. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp_servers/tau2/tests/system_prompts/airline_agent_system_prompt.md +0 -0
  151. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp_servers/tau2/tests/system_prompts/mock_agent_system_prompt.md +0 -0
  152. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp_servers/tau2/tests/system_prompts/retail_agent_system_prompt.md +0 -0
  153. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/mcp_servers/tau2/tests/test_tau2_e2e.py +0 -0
  154. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/models.py +0 -0
  155. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/packaging.py +0 -0
  156. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/platform_api.py +0 -0
  157. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/playback_policy.py +0 -0
  158. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/pytest/__init__.py +0 -0
  159. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/pytest/default_agent_rollout_processor.py +0 -0
  160. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/pytest/default_dataset_adapter.py +0 -0
  161. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/pytest/default_langchain_rollout_processor.py +0 -0
  162. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/pytest/default_mcp_gym_rollout_processor.py +0 -0
  163. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/pytest/default_no_op_rollout_processor.py +0 -0
  164. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/pytest/default_pydantic_ai_rollout_processor.py +0 -0
  165. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/pytest/default_single_turn_rollout_process.py +0 -0
  166. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/pytest/dual_mode_wrapper.py +0 -0
  167. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/pytest/elasticsearch_setup.py +0 -0
  168. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/pytest/evaluation_test.py +0 -0
  169. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/pytest/evaluation_test_postprocess.py +0 -0
  170. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/pytest/exception_config.py +0 -0
  171. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/pytest/execution.py +0 -0
  172. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/pytest/generate_parameter_combinations.py +0 -0
  173. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/pytest/handle_persist_flow.py +0 -0
  174. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/pytest/parameterize.py +0 -0
  175. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/pytest/plugin.py +0 -0
  176. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/pytest/remote_rollout_processor.py +0 -0
  177. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/pytest/rollout_processor.py +0 -0
  178. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/pytest/store_experiment_link.py +0 -0
  179. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/pytest/store_results_url.py +0 -0
  180. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/pytest/types.py +0 -0
  181. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/pytest/utils.py +0 -0
  182. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/pytest/validate_signature.py +0 -0
  183. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/quickstart/__init__.py +0 -0
  184. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/quickstart/llm_judge.py +0 -0
  185. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/quickstart/llm_judge_braintrust.py +0 -0
  186. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/quickstart/llm_judge_langfuse.py +0 -0
  187. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/quickstart/llm_judge_langsmith.py +0 -0
  188. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/quickstart/llm_judge_openai_responses.py +0 -0
  189. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/quickstart/utils.py +0 -0
  190. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/resources.py +0 -0
  191. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/reward_function.py +0 -0
  192. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/rewards/__init__.py +0 -0
  193. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/rewards/accuracy.py +0 -0
  194. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/rewards/accuracy_length.py +0 -0
  195. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/rewards/apps_coding_reward.py +0 -0
  196. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/rewards/apps_execution_utils.py +0 -0
  197. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/rewards/apps_testing_util.py +0 -0
  198. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/rewards/bfcl_reward.py +0 -0
  199. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/rewards/code_execution.py +0 -0
  200. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/rewards/code_execution_utils.py +0 -0
  201. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/rewards/cpp_code.py +0 -0
  202. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/rewards/deepcoder_reward.py +0 -0
  203. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/rewards/format.py +0 -0
  204. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/rewards/function_calling.py +0 -0
  205. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/rewards/json_schema.py +0 -0
  206. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/rewards/language_consistency.py +0 -0
  207. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/rewards/lean_prover.py +0 -0
  208. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/rewards/length.py +0 -0
  209. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/rewards/list_comparison_math_reward.py +0 -0
  210. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/rewards/math.py +0 -0
  211. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/rewards/multiple_choice_math_reward.py +0 -0
  212. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/rewards/reasoning_steps.py +0 -0
  213. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/rewards/repetition.py +0 -0
  214. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/rewards/tag_count.py +0 -0
  215. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/rl_processing.py +0 -0
  216. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/server.py +0 -0
  217. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/stats/__init__.py +0 -0
  218. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/stats/confidence_intervals.py +0 -0
  219. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/typed_interface.py +0 -0
  220. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/types/__init__.py +0 -0
  221. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/types/errors.py +0 -0
  222. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/types/remote_rollout_processor.py +0 -0
  223. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/types/types.py +0 -0
  224. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/utils/__init__.py +0 -0
  225. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/utils/batch_evaluation.py +0 -0
  226. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/utils/batch_transformation.py +0 -0
  227. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/utils/check_server_status.py +0 -0
  228. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/utils/dataset_helpers.py +0 -0
  229. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/utils/logs_models.py +0 -0
  230. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/utils/logs_server.py +0 -0
  231. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/utils/module_loader.py +0 -0
  232. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/utils/packaging_utils.py +0 -0
  233. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/utils/show_results_url.py +0 -0
  234. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/utils/static_policy.py +0 -0
  235. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/utils/subprocess_utils.py +0 -0
  236. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol/utils/vite_server.py +0 -0
  237. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol.egg-info/dependency_links.txt +0 -0
  238. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol.egg-info/entry_points.txt +0 -0
  239. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol.egg-info/requires.txt +0 -0
  240. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/eval_protocol.egg-info/top_level.txt +0 -0
  241. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/pyproject.toml +0 -0
  242. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/setup.cfg +0 -0
  243. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/setup.py +0 -0
  244. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_accuracy.py +0 -0
  245. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_accuracy_length.py +0 -0
  246. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_adapters_e2e.py +0 -0
  247. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_agent_orchestrator.py +0 -0
  248. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_agent_resources.py +0 -0
  249. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_auth.py +0 -0
  250. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_batch_evaluation.py +0 -0
  251. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_cli.py +0 -0
  252. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_cli_agent.py +0 -0
  253. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_cli_args.py +0 -0
  254. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_code_execution.py +0 -0
  255. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_config.py +0 -0
  256. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_control_plane_separation.py +0 -0
  257. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_cpp_code.py +0 -0
  258. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_data_driven_task_manager.py +0 -0
  259. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_deepcoder_reward.py +0 -0
  260. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_deepeval_integration.py +0 -0
  261. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_deploy_integration.py +0 -0
  262. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_directory_utils.py +0 -0
  263. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_e2b_integration.py +0 -0
  264. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_e2b_js_integration.py +0 -0
  265. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_edge_cases.py +0 -0
  266. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_eval_protocol_import.py +0 -0
  267. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_evaluation.py +0 -0
  268. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_evaluation_integration.py +0 -0
  269. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_evaluation_postprocess.py +0 -0
  270. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_evaluation_preview_integration.py +0 -0
  271. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_event_bus.py +0 -0
  272. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_event_bus_helper.py +0 -0
  273. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_examples_end_to_end.py +0 -0
  274. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_fireworks_api.py +0 -0
  275. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_format.py +0 -0
  276. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_fractional_code.py +0 -0
  277. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_function_calling.py +0 -0
  278. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_gcp_tools.py +0 -0
  279. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_generic_server.py +0 -0
  280. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_human_id.py +0 -0
  281. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_integration.py +0 -0
  282. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_json_schema.py +0 -0
  283. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_kwargs_validation.py +0 -0
  284. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_language_consistency.py +0 -0
  285. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_lean_prover.py +0 -0
  286. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_lean_prover_runner.py +0 -0
  287. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_length.py +0 -0
  288. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_list_comparison_math_reward.py +0 -0
  289. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_logs_server.py +0 -0
  290. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_logs_server_simple.py +0 -0
  291. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_math.py +0 -0
  292. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_minimal.py +0 -0
  293. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_models.py +0 -0
  294. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_models_rl.py +0 -0
  295. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_multiple_choice_math_reward.py +0 -0
  296. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_n_variant_batch_integration.py +0 -0
  297. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_n_variant_integration.py +0 -0
  298. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_openai_compatibility.py +0 -0
  299. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_openeval_integration.py +0 -0
  300. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_packaging.py +0 -0
  301. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_parallel_rollouts.py +0 -0
  302. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_platform_api.py +0 -0
  303. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_quickstart_utils.py +0 -0
  304. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_readiness.py +0 -0
  305. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_reasoning_steps.py +0 -0
  306. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_repetition.py +0 -0
  307. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_repetition_debug.py +0 -0
  308. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_retry_mechanism.py +0 -0
  309. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_reward_function.py +0 -0
  310. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_reward_protocol_import.py +0 -0
  311. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_rl_processing.py +0 -0
  312. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_rollout_control_plane_integration.py +0 -0
  313. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_server.py +0 -0
  314. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_show_results_url.py +0 -0
  315. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_status_migration_changes.py +0 -0
  316. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_status_migration_integration.py +0 -0
  317. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_status_model.py +0 -0
  318. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_tag_count.py +0 -0
  319. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_tau_bench_airline_smoke.py +0 -0
  320. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_typed_interface.py +0 -0
  321. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_typed_interface_rl.py +0 -0
  322. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_upload_entrypoint.py +0 -0
  323. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_url_handling.py +0 -0
  324. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/tests/test_vite_server.py +0 -0
  325. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/__init__.py +0 -0
  326. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/agent/__init__.py +0 -0
  327. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/agent/base.py +0 -0
  328. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/agent/llm_agent.py +0 -0
  329. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/api_service/__init__.py +0 -0
  330. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/api_service/api_config.py +0 -0
  331. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/api_service/data_model.py +0 -0
  332. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/api_service/simulation_service.py +0 -0
  333. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/cli.py +0 -0
  334. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/config.py +0 -0
  335. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/data/domains/airline/policy.md +0 -0
  336. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/data/domains/mock/policy.md +0 -0
  337. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/data/domains/mock/policy_solo.md +0 -0
  338. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/data/domains/retail/policy.md +0 -0
  339. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/data/domains/telecom/main_policy.md +0 -0
  340. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/data/domains/telecom/main_policy_solo.md +0 -0
  341. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/data/domains/telecom/tech_support_manual.md +0 -0
  342. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/data/domains/telecom/tech_support_workflow.md +0 -0
  343. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/data/domains/telecom/tech_support_workflow_solo.md +0 -0
  344. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/data/user_simulator/simulation_guidelines.md +0 -0
  345. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/data/user_simulator/simulation_guidelines_tools.md +0 -0
  346. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/data_model/__init__.py +0 -0
  347. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/data_model/message.py +0 -0
  348. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/data_model/simulation.py +0 -0
  349. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/data_model/tasks.py +0 -0
  350. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/domains/__init__.py +0 -0
  351. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/domains/airline/__init__.py +0 -0
  352. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/domains/airline/data_model.py +0 -0
  353. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/domains/airline/environment.py +0 -0
  354. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/domains/airline/tools.py +0 -0
  355. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/domains/airline/utils.py +0 -0
  356. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/domains/mock/__init__.py +0 -0
  357. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/domains/mock/data_model.py +0 -0
  358. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/domains/mock/environment.py +0 -0
  359. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/domains/mock/tools.py +0 -0
  360. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/domains/mock/utils.py +0 -0
  361. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/domains/retail/__init__.py +0 -0
  362. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/domains/retail/data_model.py +0 -0
  363. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/domains/retail/environment.py +0 -0
  364. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/domains/retail/tools.py +0 -0
  365. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/domains/retail/utils.py +0 -0
  366. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/domains/telecom/__init__.py +0 -0
  367. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/domains/telecom/data_model.py +0 -0
  368. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/domains/telecom/environment.py +0 -0
  369. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/domains/telecom/tasks/__init__.py +0 -0
  370. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/domains/telecom/tasks/const.py +0 -0
  371. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/domains/telecom/tasks/create_tasks.py +0 -0
  372. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/domains/telecom/tasks/manager.py +0 -0
  373. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/domains/telecom/tasks/mms_issues.py +0 -0
  374. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/domains/telecom/tasks/mobile_data_issues.py +0 -0
  375. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/domains/telecom/tasks/service_issues.py +0 -0
  376. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/domains/telecom/tasks/utils.py +0 -0
  377. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/domains/telecom/tools.py +0 -0
  378. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/domains/telecom/user_data_model.py +0 -0
  379. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/domains/telecom/user_tools.py +0 -0
  380. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/domains/telecom/utils.py +0 -0
  381. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/environment/__init__.py +0 -0
  382. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/environment/db.py +0 -0
  383. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/environment/environment.py +0 -0
  384. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/environment/server.py +0 -0
  385. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/environment/tool.py +0 -0
  386. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/environment/toolkit.py +0 -0
  387. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/environment/utils/interface_agent.py +0 -0
  388. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/evaluator/__init__.py +0 -0
  389. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/evaluator/evaluator.py +0 -0
  390. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/evaluator/evaluator_action.py +0 -0
  391. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/evaluator/evaluator_base.py +0 -0
  392. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/evaluator/evaluator_communicate.py +0 -0
  393. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/evaluator/evaluator_env.py +0 -0
  394. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/evaluator/evaluator_nl_assertions.py +0 -0
  395. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/metrics/__init__.py +0 -0
  396. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/metrics/agent_metrics.py +0 -0
  397. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/metrics/break_down_metrics.py +0 -0
  398. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/orchestrator/__init__.py +0 -0
  399. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/orchestrator/environment_manager.py +0 -0
  400. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/orchestrator/orchestrator.py +0 -0
  401. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/orchestrator/utils.py +0 -0
  402. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/registry.py +0 -0
  403. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/run.py +0 -0
  404. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/scripts/__init__.py +0 -0
  405. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/scripts/check_data.py +0 -0
  406. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/scripts/show_domain_doc.py +0 -0
  407. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/scripts/start_servers.py +0 -0
  408. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/scripts/view_simulations.py +0 -0
  409. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/user/__init__.py +0 -0
  410. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/user/base.py +0 -0
  411. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/user/user_simulator.py +0 -0
  412. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/utils/__init__.py +0 -0
  413. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/utils/display.py +0 -0
  414. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/utils/io_utils.py +0 -0
  415. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/utils/llm_utils.py +0 -0
  416. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/utils/pydantic_utils.py +0 -0
  417. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vendor/tau2/utils/utils.py +0 -0
  418. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/versioneer.py +0 -0
  419. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vite-app/dist/assets/favicon-BkAAWQga.png +0 -0
  420. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vite-app/dist/assets/index-C81y9r9l.js +0 -0
  421. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vite-app/dist/assets/index-C81y9r9l.js.map +0 -0
  422. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vite-app/dist/assets/index-DpYZaoAr.css +0 -0
  423. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vite-app/dist/assets/logo-light-BprIBJQW.png +0 -0
  424. {eval_protocol-0.2.45 → eval_protocol-0.2.45.dev0}/vite-app/dist/index.html +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: eval-protocol
3
- Version: 0.2.45
3
+ Version: 0.2.45.dev0
4
4
  Summary: The official Python SDK for Eval Protocol (EP.) EP is an open protocol that standardizes how developers author evals for large language model (LLM) applications.
5
5
  Author-email: Fireworks AI <info@fireworks.ai>
6
6
  License-Expression: MIT
@@ -64,6 +64,12 @@ try:
64
64
  except ImportError:
65
65
  LangSmithAdapter = None
66
66
 
67
+
68
+ try:
69
+ from .adapters import WeaveAdapter
70
+ except ImportError:
71
+ WeaveAdapter = None
72
+
67
73
  warnings.filterwarnings("default", category=DeprecationWarning, module="eval_protocol")
68
74
 
69
75
  __all__ = [
@@ -8,11 +8,11 @@ import json
8
8
 
9
9
  version_json = '''
10
10
  {
11
- "date": "2025-10-08T14:59:37-0700",
11
+ "date": "2025-10-09T01:23:30-0700",
12
12
  "dirty": false,
13
13
  "error": null,
14
- "full-revisionid": "b120611112b84df8476cefcc02660c542e61b2a9",
15
- "version": "0.2.45"
14
+ "full-revisionid": "c2ec0c8bb3f927b3c7f77c8a0e4fb955c7685ea6",
15
+ "version": "0.2.45-dev"
16
16
  }
17
17
  ''' # END VERSION_JSON
18
18
 
@@ -92,3 +92,10 @@ try:
92
92
  __all__.extend(["LangSmithAdapter"])
93
93
  except ImportError:
94
94
  pass
95
+
96
+ try:
97
+ from .weave import WeaveAdapter
98
+
99
+ __all__.extend(["WeaveAdapter"])
100
+ except ImportError:
101
+ pass
@@ -7,9 +7,9 @@ to pull data from Langfuse deployments with simplified retry logic handling.
7
7
  from __future__ import annotations
8
8
  import logging
9
9
  import requests
10
- import time
11
10
  from datetime import datetime
12
11
  from typing import Any, Dict, List, Optional, Protocol
12
+ import os
13
13
 
14
14
  from eval_protocol.models import EvaluationRow, InputMetadata, ExecutionMetadata, Message
15
15
  from .base import BaseAdapter
@@ -349,9 +349,11 @@ class FireworksTracingAdapter(BaseAdapter):
349
349
  else:
350
350
  url = f"{self.base_url}/v1/traces"
351
351
 
352
+ headers = {"Authorization": f"Bearer {os.environ.get('FIREWORKS_API_KEY')}"}
353
+
352
354
  result = None
353
355
  try:
354
- response = requests.get(url, params=params, timeout=self.timeout)
356
+ response = requests.get(url, params=params, timeout=self.timeout, headers=headers)
355
357
  response.raise_for_status()
356
358
  result = response.json()
357
359
  except requests.exceptions.HTTPError as e:
@@ -0,0 +1,130 @@
1
+ """Weave (Weights & Biases) adapter for Eval Protocol.
2
+
3
+ This adapter fetches recent root traces from Weave Trace API and converts them
4
+ to `EvaluationRow` format for use in evaluation pipelines. It is intentionally
5
+ minimal and depends only on requests.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from typing import Any, Dict, List, Optional
11
+ import os
12
+ import requests
13
+
14
+ from eval_protocol.models import EvaluationRow, InputMetadata, Message, ExecutionMetadata
15
+ from .base import BaseAdapter
16
+
17
+
18
+ def _extract_messages_from_trace(trace: Dict[str, Any], include_tool_calls: bool = True) -> List[Message]:
19
+ messages: List[Message] = []
20
+
21
+ # Prefer explicit output messages if provided
22
+ output = trace.get("output") or {}
23
+ out_msgs = output.get("messages")
24
+ if isinstance(out_msgs, list):
25
+ for m in out_msgs:
26
+ messages.append(
27
+ Message(
28
+ role=m.get("role"),
29
+ content=m.get("content"),
30
+ tool_calls=m.get("tool_calls") if include_tool_calls else None,
31
+ tool_call_id=m.get("tool_call_id"),
32
+ name=m.get("name"),
33
+ )
34
+ )
35
+
36
+ # If no explicit output messages, fall back to final bubble from choices
37
+ if not messages:
38
+ choices = output.get("choices")
39
+ if isinstance(choices, list) and choices:
40
+ msg = (choices[0] or {}).get("message", {})
41
+ if msg:
42
+ messages.append(Message(role=msg.get("role"), content=msg.get("content")))
43
+
44
+ # Prepend input messages if present and not already contained
45
+ inputs = trace.get("inputs") or {}
46
+ in_msgs = inputs.get("messages")
47
+ if isinstance(in_msgs, list):
48
+ prefixed = [Message(role=m.get("role"), content=m.get("content")) for m in in_msgs]
49
+ messages = prefixed + messages
50
+
51
+ return messages
52
+
53
+
54
+ def _convert_trace_to_evaluation_row(
55
+ trace: Dict[str, Any], include_tool_calls: bool = True
56
+ ) -> Optional[EvaluationRow]:
57
+ messages = _extract_messages_from_trace(trace, include_tool_calls=include_tool_calls)
58
+ if not messages:
59
+ return None
60
+
61
+ # Provider-native IDs for UI joinability
62
+ session_data = {
63
+ "weave_trace_id": trace.get("id"),
64
+ "weave_project_id": trace.get("project_id"),
65
+ }
66
+
67
+ # Optional EP identifiers (if present in provider payload)
68
+ meta_in = (trace.get("inputs") or {}).get("metadata") or {}
69
+ meta_out = (trace.get("output") or {}).get("metadata") or {}
70
+ metadata = {**meta_in, **meta_out}
71
+
72
+ input_metadata = InputMetadata(row_id=metadata.get("row_id"), session_data=session_data)
73
+
74
+ # Preserve default factory behavior by only setting provided fields
75
+ exec_kwargs: Dict[str, Any] = {}
76
+ for k in ("invocation_id", "experiment_id", "rollout_id", "run_id"):
77
+ if metadata.get(k) is not None:
78
+ exec_kwargs[k] = metadata[k]
79
+ execution_metadata = ExecutionMetadata(**exec_kwargs)
80
+
81
+ # Capture tools if provider exposes them (prefer inputs)
82
+ tools = None
83
+ inputs = trace.get("inputs") or {}
84
+ if include_tool_calls and isinstance(inputs, dict) and "tools" in inputs:
85
+ tools = inputs.get("tools")
86
+
87
+ return EvaluationRow(
88
+ messages=messages, tools=tools, input_metadata=input_metadata, execution_metadata=execution_metadata
89
+ )
90
+
91
+
92
+ class WeaveAdapter(BaseAdapter):
93
+ """Adapter to pull data from Weave Trace API and convert to EvaluationRow format."""
94
+
95
+ def __init__(
96
+ self, base_url: Optional[str] = None, api_token: Optional[str] = None, project_id: Optional[str] = None
97
+ ):
98
+ self.base_url = base_url or os.getenv("WEAVE_TRACE_BASE_URL", "https://trace.wandb.ai")
99
+ self.api_token = api_token or os.getenv("WANDB_API_KEY")
100
+ # project_id is in form "<entity>/<project>"
101
+ self.project_id = project_id or (f"{os.getenv('WANDB_ENTITY')}/{os.getenv('WANDB_PROJECT')}")
102
+ if not self.api_token or not self.project_id or "/" not in self.project_id:
103
+ raise ValueError("Missing Weave credentials or project (WANDB_API_KEY and WANDB_ENTITY/WANDB_PROJECT)")
104
+
105
+ def _fetch_traces(self, limit: int = 100) -> List[Dict[str, Any]]:
106
+ url = f"{self.base_url}/calls/stream_query"
107
+ payload = {
108
+ "project_id": self.project_id,
109
+ "filter": {"trace_roots_only": True},
110
+ "limit": limit,
111
+ "offset": 0,
112
+ "sort_by": [{"field": "started_at", "direction": "desc"}],
113
+ "include_feedback": False,
114
+ }
115
+ headers = {"Authorization": f"Bearer {self.api_token}", "Content-Type": "application/json"}
116
+ resp = requests.post(url, json=payload, headers=headers, timeout=30)
117
+ resp.raise_for_status()
118
+ body = resp.json() or {}
119
+ return body.get("data", [])
120
+
121
+ def get_evaluation_rows(self, *args, **kwargs) -> List[EvaluationRow]:
122
+ limit = kwargs.get("limit", 100)
123
+ include_tool_calls = kwargs.get("include_tool_calls", True)
124
+ traces = self._fetch_traces(limit=limit)
125
+ rows: List[EvaluationRow] = []
126
+ for tr in traces:
127
+ row = _convert_trace_to_evaluation_row(tr, include_tool_calls=include_tool_calls)
128
+ if row:
129
+ rows.append(row)
130
+ return rows
@@ -0,0 +1,10 @@
1
+ from .models import ProxyConfig
2
+ from .app import create_app
3
+ from .auth import AuthProvider, NoAuthProvider
4
+
5
+ __all__ = [
6
+ "ProxyConfig",
7
+ "create_app",
8
+ "AuthProvider",
9
+ "NoAuthProvider",
10
+ ]
@@ -0,0 +1,259 @@
1
+ """
2
+ Metadata Extraction Gateway
3
+ A FastAPI service that sits in front of LiteLLM and extracts metadata from URL paths.
4
+ """
5
+
6
+ from fastapi import FastAPI, Depends, HTTPException, Request, Query
7
+ from typing import Optional, List
8
+ import os
9
+ import redis
10
+ import logging
11
+ import json
12
+ from pathlib import Path
13
+ import sys
14
+ from contextlib import asynccontextmanager
15
+
16
+ from .models import ProxyConfig, LangfuseTracesResponse
17
+ from .auth import AuthProvider, NoAuthProvider
18
+ from .litellm import handle_chat_completion, proxy_to_litellm
19
+ from .langfuse import fetch_langfuse_traces
20
+
21
+ # Configure logging before any other imports (so all modules inherit this config)
22
+ log_level = os.getenv("LOG_LEVEL", "INFO").upper()
23
+ logging.basicConfig(
24
+ level=getattr(logging, log_level),
25
+ format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
26
+ handlers=[logging.StreamHandler(sys.stdout)],
27
+ )
28
+
29
+ logger = logging.getLogger(__name__)
30
+
31
+
32
+ def build_proxy_config() -> ProxyConfig:
33
+ """Load environment and secrets, and build ProxyConfig (no Redis)."""
34
+ # Env
35
+ litellm_url = os.getenv("LITELLM_URL")
36
+ if not litellm_url:
37
+ raise ValueError("LITELLM_URL environment variable must be set")
38
+ request_timeout = float(os.getenv("REQUEST_TIMEOUT", "300.0"))
39
+
40
+ # Secrets - use SECRETS_PATH env var if set, otherwise default to proxy/secrets.json
41
+ secrets_path_str = os.getenv("SECRETS_PATH")
42
+ if secrets_path_str:
43
+ secrets_path = Path(secrets_path_str)
44
+ else:
45
+ secrets_path = Path(__file__).parent / "secrets.json"
46
+ if not secrets_path.exists():
47
+ raise ValueError(
48
+ "secrets.json not found! Please create it from secrets.json.example:\n"
49
+ " cp litellm_proxy_config/proxy/secrets.json.example litellm_proxy_config/proxy/secrets.json\n"
50
+ "Then add your Langfuse API keys to secrets.json"
51
+ )
52
+ try:
53
+ with open(secrets_path, "r") as f:
54
+ secrets_config = json.load(f)
55
+ langfuse_keys = secrets_config["langfuse_keys"]
56
+ default_project_id = secrets_config["default_project_id"]
57
+ logger.info(f"Loaded {len(langfuse_keys)} Langfuse project(s) from secrets.json")
58
+ except KeyError as e:
59
+ raise ValueError(f"Missing required key in secrets.json: {e}")
60
+ except json.JSONDecodeError as e:
61
+ raise ValueError(f"Invalid JSON in secrets.json: {e}")
62
+
63
+ return ProxyConfig(
64
+ litellm_url=litellm_url,
65
+ request_timeout=request_timeout,
66
+ langfuse_keys=langfuse_keys,
67
+ default_project_id=default_project_id,
68
+ )
69
+
70
+
71
+ def init_redis() -> redis.Redis:
72
+ """Initialize and return a Redis client from environment variables."""
73
+ redis_host = os.getenv("REDIS_HOST")
74
+ if not redis_host:
75
+ raise ValueError("REDIS_HOST environment variable must be set")
76
+ redis_port = int(os.getenv("REDIS_PORT", "6379"))
77
+ redis_password = os.getenv("REDIS_PASSWORD")
78
+
79
+ try:
80
+ client = redis.Redis(
81
+ host=redis_host,
82
+ port=redis_port,
83
+ password=redis_password if redis_password else None,
84
+ decode_responses=True,
85
+ socket_connect_timeout=5,
86
+ socket_timeout=5,
87
+ retry_on_timeout=True,
88
+ )
89
+ client.ping()
90
+ logger.info(f"Connected to Redis at {redis_host}:{redis_port}")
91
+ return client
92
+ except Exception as e:
93
+ raise ConnectionError(f"Failed to connect to Redis at {redis_host}:{redis_port}: {e}")
94
+
95
+
96
+ def create_app(
97
+ auth_provider: AuthProvider = NoAuthProvider(),
98
+ ) -> FastAPI:
99
+ @asynccontextmanager
100
+ async def lifespan(app: FastAPI):
101
+ # Build runtime on startup
102
+ app.state.config = build_proxy_config()
103
+ app.state.redis = init_redis()
104
+ try:
105
+ yield
106
+ finally:
107
+ try:
108
+ app.state.redis.close()
109
+ except Exception:
110
+ pass
111
+
112
+ app = FastAPI(title="LiteLLM Metadata Proxy", lifespan=lifespan)
113
+
114
+ def get_config(request: Request) -> ProxyConfig:
115
+ return request.app.state.config
116
+
117
+ def get_redis(request: Request) -> redis.Redis:
118
+ return request.app.state.redis
119
+
120
+ async def require_auth(request: Request) -> None:
121
+ auth_header = request.headers.get("authorization", "")
122
+ api_key = None
123
+ if auth_header.startswith("Bearer "):
124
+ api_key = auth_header.replace("Bearer ", "").strip()
125
+
126
+ auth_provider.validate(api_key)
127
+ return None
128
+
129
+ # =====================
130
+ # Chat completion routes
131
+ # =====================
132
+ @app.post(
133
+ "/project_id/{project_id}/rollout_id/{rollout_id}/invocation_id/{invocation_id}/experiment_id/{experiment_id}/run_id/{run_id}/row_id/{row_id}/chat/completions"
134
+ )
135
+ @app.post(
136
+ "/v1/project_id/{project_id}/rollout_id/{rollout_id}/invocation_id/{invocation_id}/experiment_id/{experiment_id}/run_id/{run_id}/row_id/{row_id}/chat/completions"
137
+ )
138
+ @app.post(
139
+ "/rollout_id/{rollout_id}/invocation_id/{invocation_id}/experiment_id/{experiment_id}/run_id/{run_id}/row_id/{row_id}/chat/completions"
140
+ )
141
+ @app.post(
142
+ "/v1/rollout_id/{rollout_id}/invocation_id/{invocation_id}/experiment_id/{experiment_id}/run_id/{run_id}/row_id/{row_id}/chat/completions"
143
+ )
144
+ @app.post(
145
+ "/project_id/{project_id}/rollout_id/{rollout_id}/invocation_id/{invocation_id}/experiment_id/{experiment_id}/run_id/{run_id}/row_id/{row_id}/encoded_base_url/{encoded_base_url}/chat/completions"
146
+ )
147
+ @app.post(
148
+ "/v1/project_id/{project_id}/rollout_id/{rollout_id}/invocation_id/{invocation_id}/experiment_id/{experiment_id}/run_id/{run_id}/row_id/{row_id}/encoded_base_url/{encoded_base_url}/chat/completions"
149
+ )
150
+ @app.post(
151
+ "/rollout_id/{rollout_id}/invocation_id/{invocation_id}/experiment_id/{experiment_id}/run_id/{run_id}/row_id/{row_id}/encoded_base_url/{encoded_base_url}/chat/completions"
152
+ )
153
+ @app.post(
154
+ "/v1/rollout_id/{rollout_id}/invocation_id/{invocation_id}/experiment_id/{experiment_id}/run_id/{run_id}/row_id/{row_id}/encoded_base_url/{encoded_base_url}/chat/completions"
155
+ )
156
+ async def chat_completion_with_full_metadata(
157
+ rollout_id: str,
158
+ invocation_id: str,
159
+ experiment_id: str,
160
+ run_id: str,
161
+ row_id: str,
162
+ request: Request,
163
+ project_id: Optional[str] = None,
164
+ encoded_base_url: Optional[str] = None,
165
+ config: ProxyConfig = Depends(get_config),
166
+ redis_client: redis.Redis = Depends(get_redis),
167
+ ):
168
+ return await handle_chat_completion(
169
+ config=config,
170
+ redis_client=redis_client,
171
+ request=request,
172
+ project_id=project_id,
173
+ rollout_id=rollout_id,
174
+ invocation_id=invocation_id,
175
+ experiment_id=experiment_id,
176
+ run_id=run_id,
177
+ row_id=row_id,
178
+ encoded_base_url=encoded_base_url,
179
+ )
180
+
181
+ @app.post("/project_id/{project_id}/chat/completions")
182
+ @app.post("/v1/project_id/{project_id}/chat/completions")
183
+ async def chat_completion_with_project_only(
184
+ project_id: str,
185
+ request: Request,
186
+ config: ProxyConfig = Depends(get_config),
187
+ redis_client: redis.Redis = Depends(get_redis),
188
+ ):
189
+ return await handle_chat_completion(
190
+ config=config,
191
+ redis_client=redis_client,
192
+ request=request,
193
+ project_id=project_id,
194
+ )
195
+
196
+ # ===============
197
+ # Traces routes
198
+ # ===============
199
+ @app.get("/traces", response_model=LangfuseTracesResponse)
200
+ @app.get("/v1/traces", response_model=LangfuseTracesResponse)
201
+ @app.get("/project_id/{project_id}/traces", response_model=LangfuseTracesResponse)
202
+ @app.get("/v1/project_id/{project_id}/traces", response_model=LangfuseTracesResponse)
203
+ async def get_langfuse_traces(
204
+ tags: List[str] = Query(...), # REQUIRED query param
205
+ project_id: Optional[str] = None,
206
+ limit: int = 100,
207
+ sample_size: Optional[int] = None,
208
+ user_id: Optional[str] = None,
209
+ session_id: Optional[str] = None,
210
+ name: Optional[str] = None,
211
+ environment: Optional[str] = None,
212
+ version: Optional[str] = None,
213
+ release: Optional[str] = None,
214
+ fields: Optional[str] = None,
215
+ hours_back: Optional[int] = None,
216
+ from_timestamp: Optional[str] = None,
217
+ to_timestamp: Optional[str] = None,
218
+ sleep_between_gets: float = 2.5,
219
+ max_retries: int = 3,
220
+ config: ProxyConfig = Depends(get_config),
221
+ redis_client: redis.Redis = Depends(get_redis),
222
+ _: None = Depends(require_auth),
223
+ ) -> LangfuseTracesResponse:
224
+ return await fetch_langfuse_traces(
225
+ config=config,
226
+ redis_client=redis_client,
227
+ tags=tags,
228
+ project_id=project_id,
229
+ limit=limit,
230
+ sample_size=sample_size,
231
+ user_id=user_id,
232
+ session_id=session_id,
233
+ name=name,
234
+ environment=environment,
235
+ version=version,
236
+ release=release,
237
+ fields=fields,
238
+ hours_back=hours_back,
239
+ from_timestamp=from_timestamp,
240
+ to_timestamp=to_timestamp,
241
+ sleep_between_gets=sleep_between_gets,
242
+ max_retries=max_retries,
243
+ )
244
+
245
+ # Health
246
+ @app.get("/health")
247
+ async def health():
248
+ return {"status": "healthy", "service": "metadata-proxy"}
249
+
250
+ # Catch-all
251
+ @app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "DELETE", "PATCH"])
252
+ async def catch_all_proxy(
253
+ path: str,
254
+ request: Request,
255
+ config: ProxyConfig = Depends(get_config),
256
+ ):
257
+ return await proxy_to_litellm(config, path, request)
258
+
259
+ return app
@@ -0,0 +1,12 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import Optional
3
+
4
+
5
+ class AuthProvider(ABC):
6
+ @abstractmethod
7
+ def validate(self, api_key: Optional[str]) -> Optional[str]: ...
8
+
9
+
10
+ class NoAuthProvider(AuthProvider):
11
+ def validate(self, api_key: Optional[str]) -> Optional[str]:
12
+ return None