eval-protocol 0.2.45.dev1__tar.gz → 0.2.46.dev0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (427) hide show
  1. {eval_protocol-0.2.45.dev1/eval_protocol.egg-info → eval_protocol-0.2.46.dev0}/PKG-INFO +1 -1
  2. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/_version.py +3 -3
  3. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/proxy/__init__.py +3 -1
  4. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/proxy/proxy_core/__init__.py +3 -1
  5. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/proxy/proxy_core/app.py +88 -63
  6. eval_protocol-0.2.46.dev0/eval_protocol/proxy/proxy_core/auth.py +18 -0
  7. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/proxy/proxy_core/langfuse.py +30 -24
  8. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/proxy/proxy_core/litellm.py +18 -15
  9. eval_protocol-0.2.46.dev0/eval_protocol/proxy/proxy_core/models.py +92 -0
  10. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0/eval_protocol.egg-info}/PKG-INFO +1 -1
  11. eval_protocol-0.2.45.dev1/eval_protocol/proxy/proxy_core/auth.py +0 -12
  12. eval_protocol-0.2.45.dev1/eval_protocol/proxy/proxy_core/models.py +0 -51
  13. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/LICENSE +0 -0
  14. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/README.md +0 -0
  15. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/development/__init__.py +0 -0
  16. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/development/normalize_sandbox_fusion.py +0 -0
  17. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/development/utils/__init__.py +0 -0
  18. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/development/utils/generate_api_key.py +0 -0
  19. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/development/utils/subprocess_manager.py +0 -0
  20. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/__init__.py +0 -0
  21. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/__main__.py +0 -0
  22. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/adapters/__init__.py +0 -0
  23. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/adapters/base.py +0 -0
  24. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/adapters/bigquery.py +0 -0
  25. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/adapters/braintrust.py +0 -0
  26. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/adapters/fireworks_tracing.py +0 -0
  27. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/adapters/huggingface.py +0 -0
  28. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/adapters/langchain.py +0 -0
  29. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/adapters/langfuse.py +0 -0
  30. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/adapters/langsmith.py +0 -0
  31. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/adapters/openai_responses.py +0 -0
  32. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/adapters/trl.py +0 -0
  33. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/adapters/utils.py +0 -0
  34. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/adapters/weave.py +0 -0
  35. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/agent/__init__.py +0 -0
  36. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/agent/models.py +0 -0
  37. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/agent/orchestrator.py +0 -0
  38. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/agent/resource_abc.py +0 -0
  39. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/agent/resource_pool.py +0 -0
  40. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/agent/resources/__init__.py +0 -0
  41. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/agent/resources/bfcl_envs/__init__.py +0 -0
  42. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/agent/resources/bfcl_envs/gorilla_file_system.py +0 -0
  43. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/agent/resources/bfcl_envs/math_api.py +0 -0
  44. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/agent/resources/bfcl_envs/posting_api.py +0 -0
  45. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/agent/resources/bfcl_sim_api_resource.py +0 -0
  46. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/agent/resources/docker_resource.py +0 -0
  47. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/agent/resources/filesystem_resource.py +0 -0
  48. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/agent/resources/python_state_resource.py +0 -0
  49. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/agent/resources/sql_resource.py +0 -0
  50. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/agent/task_manager.py +0 -0
  51. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/agent/tool_registry.py +0 -0
  52. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/auth.py +0 -0
  53. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/benchmarks/__init__.py +0 -0
  54. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/benchmarks/data/airline_dataset.jsonl +0 -0
  55. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/benchmarks/data/retail_dataset.jsonl +0 -0
  56. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/benchmarks/test_aime25.py +0 -0
  57. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/benchmarks/test_frozen_lake.py +0 -0
  58. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/benchmarks/test_gpqa.py +0 -0
  59. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/benchmarks/test_livebench_data_analysis.py +0 -0
  60. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/benchmarks/test_tau_bench_airline.py +0 -0
  61. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/benchmarks/test_tau_bench_retail.py +0 -0
  62. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/cli.py +0 -0
  63. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/cli_commands/__init__.py +0 -0
  64. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/cli_commands/agent_eval_cmd.py +0 -0
  65. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/cli_commands/common.py +0 -0
  66. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/cli_commands/deploy.py +0 -0
  67. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/cli_commands/deploy_mcp.py +0 -0
  68. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/cli_commands/logs.py +0 -0
  69. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/cli_commands/preview.py +0 -0
  70. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/cli_commands/run_eval_cmd.py +0 -0
  71. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/cli_commands/upload.py +0 -0
  72. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/common_utils.py +0 -0
  73. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/config.py +0 -0
  74. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/data_loader/__init__.py +0 -0
  75. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/data_loader/dynamic_data_loader.py +0 -0
  76. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/data_loader/factory_data_loader.py +0 -0
  77. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/data_loader/inline_data_loader.py +0 -0
  78. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/data_loader/models.py +0 -0
  79. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/dataset_logger/__init__.py +0 -0
  80. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/dataset_logger/dataset_logger.py +0 -0
  81. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/dataset_logger/local_fs_dataset_logger_adapter.py +0 -0
  82. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/dataset_logger/sqlite_dataset_logger_adapter.py +0 -0
  83. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/dataset_logger/sqlite_evaluation_row_store.py +0 -0
  84. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/datasets/__init__.py +0 -0
  85. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/datasets/loader.py +0 -0
  86. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/directory_utils.py +0 -0
  87. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/evaluation.py +0 -0
  88. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/event_bus/__init__.py +0 -0
  89. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/event_bus/event_bus.py +0 -0
  90. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/event_bus/logger.py +0 -0
  91. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/event_bus/sqlite_event_bus.py +0 -0
  92. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/event_bus/sqlite_event_bus_database.py +0 -0
  93. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/execution/__init__.py +0 -0
  94. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/execution/pipeline.py +0 -0
  95. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/gcp_tools.py +0 -0
  96. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/generation/cache.py +0 -0
  97. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/generation/clients/base.py +0 -0
  98. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/generation/clients.py +0 -0
  99. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/generic_server.py +0 -0
  100. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/get_pep440_version.py +0 -0
  101. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/human_id/__init__.py +0 -0
  102. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/human_id/dictionary.py +0 -0
  103. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/integrations/__init__.py +0 -0
  104. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/integrations/deepeval.py +0 -0
  105. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/integrations/openeval.py +0 -0
  106. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/integrations/trl.py +0 -0
  107. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/log_utils/__init__.py +0 -0
  108. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/log_utils/elasticsearch_client.py +0 -0
  109. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/log_utils/elasticsearch_direct_http_handler.py +0 -0
  110. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/log_utils/elasticsearch_index_manager.py +0 -0
  111. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/log_utils/rollout_id_filter.py +0 -0
  112. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/log_utils/util.py +0 -0
  113. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/logging_utils.py +0 -0
  114. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/mcp/__init__.py +0 -0
  115. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/mcp/adapter.py +0 -0
  116. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/mcp/client/__init__.py +0 -0
  117. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/mcp/client/connection.py +0 -0
  118. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/mcp/clients.py +0 -0
  119. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/mcp/execution/__init__.py +0 -0
  120. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/mcp/execution/base_policy.py +0 -0
  121. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/mcp/execution/manager.py +0 -0
  122. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/mcp/execution/policy.py +0 -0
  123. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/mcp/grid_renderer.py +0 -0
  124. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/mcp/mcp_multi_client.py +0 -0
  125. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/mcp/mcpgym.py +0 -0
  126. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/mcp/process_manager.py +0 -0
  127. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/mcp/session/__init__.py +0 -0
  128. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/mcp/session/manager.py +0 -0
  129. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/mcp/simple_process_manager.py +0 -0
  130. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/mcp/simulation_server.py +0 -0
  131. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/mcp_agent/__init__.py +0 -0
  132. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/mcp_agent/config.py +0 -0
  133. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/mcp_agent/main.py +0 -0
  134. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/mcp_agent/orchestration/__init__.py +0 -0
  135. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/mcp_agent/orchestration/base_client.py +0 -0
  136. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/mcp_agent/orchestration/local_docker_client.py +0 -0
  137. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/mcp_agent/orchestration/stdio_mcp_client_helper.py +0 -0
  138. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/mcp_env.py +0 -0
  139. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/mcp_servers/__init__.py +0 -0
  140. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/mcp_servers/frozen_lake/frozen_lake_adapter.py +0 -0
  141. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/mcp_servers/frozen_lake/frozen_lake_mcp.py +0 -0
  142. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/mcp_servers/frozen_lake/server.py +0 -0
  143. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/mcp_servers/tau2/README.md +0 -0
  144. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/mcp_servers/tau2/__init__.py +0 -0
  145. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/mcp_servers/tau2/airplane_environment/airline_environment.py +0 -0
  146. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/mcp_servers/tau2/mock_environment/mock_environment.py +0 -0
  147. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/mcp_servers/tau2/retail_environment/retail_environment.py +0 -0
  148. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/mcp_servers/tau2/server.py +0 -0
  149. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/mcp_servers/tau2/tau2_mcp.py +0 -0
  150. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/mcp_servers/tau2/tests/system_prompts/airline_agent_system_prompt.md +0 -0
  151. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/mcp_servers/tau2/tests/system_prompts/mock_agent_system_prompt.md +0 -0
  152. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/mcp_servers/tau2/tests/system_prompts/retail_agent_system_prompt.md +0 -0
  153. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/mcp_servers/tau2/tests/test_tau2_e2e.py +0 -0
  154. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/models.py +0 -0
  155. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/packaging.py +0 -0
  156. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/platform_api.py +0 -0
  157. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/playback_policy.py +0 -0
  158. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/proxy/proxy_core/main.py +0 -0
  159. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/proxy/proxy_core/redis_utils.py +0 -0
  160. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/pytest/__init__.py +0 -0
  161. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/pytest/default_agent_rollout_processor.py +0 -0
  162. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/pytest/default_dataset_adapter.py +0 -0
  163. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/pytest/default_langchain_rollout_processor.py +0 -0
  164. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/pytest/default_mcp_gym_rollout_processor.py +0 -0
  165. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/pytest/default_no_op_rollout_processor.py +0 -0
  166. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/pytest/default_pydantic_ai_rollout_processor.py +0 -0
  167. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/pytest/default_single_turn_rollout_process.py +0 -0
  168. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/pytest/dual_mode_wrapper.py +0 -0
  169. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/pytest/elasticsearch_setup.py +0 -0
  170. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/pytest/evaluation_test.py +0 -0
  171. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/pytest/evaluation_test_postprocess.py +0 -0
  172. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/pytest/exception_config.py +0 -0
  173. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/pytest/execution.py +0 -0
  174. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/pytest/generate_parameter_combinations.py +0 -0
  175. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/pytest/handle_persist_flow.py +0 -0
  176. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/pytest/parameterize.py +0 -0
  177. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/pytest/plugin.py +0 -0
  178. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/pytest/remote_rollout_processor.py +0 -0
  179. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/pytest/rollout_processor.py +0 -0
  180. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/pytest/store_experiment_link.py +0 -0
  181. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/pytest/store_results_url.py +0 -0
  182. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/pytest/types.py +0 -0
  183. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/pytest/utils.py +0 -0
  184. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/pytest/validate_signature.py +0 -0
  185. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/quickstart/__init__.py +0 -0
  186. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/quickstart/llm_judge.py +0 -0
  187. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/quickstart/llm_judge_braintrust.py +0 -0
  188. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/quickstart/llm_judge_langfuse.py +0 -0
  189. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/quickstart/llm_judge_langsmith.py +0 -0
  190. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/quickstart/llm_judge_openai_responses.py +0 -0
  191. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/quickstart/utils.py +0 -0
  192. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/resources.py +0 -0
  193. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/reward_function.py +0 -0
  194. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/rewards/__init__.py +0 -0
  195. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/rewards/accuracy.py +0 -0
  196. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/rewards/accuracy_length.py +0 -0
  197. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/rewards/apps_coding_reward.py +0 -0
  198. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/rewards/apps_execution_utils.py +0 -0
  199. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/rewards/apps_testing_util.py +0 -0
  200. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/rewards/bfcl_reward.py +0 -0
  201. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/rewards/code_execution.py +0 -0
  202. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/rewards/code_execution_utils.py +0 -0
  203. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/rewards/cpp_code.py +0 -0
  204. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/rewards/deepcoder_reward.py +0 -0
  205. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/rewards/format.py +0 -0
  206. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/rewards/function_calling.py +0 -0
  207. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/rewards/json_schema.py +0 -0
  208. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/rewards/language_consistency.py +0 -0
  209. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/rewards/lean_prover.py +0 -0
  210. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/rewards/length.py +0 -0
  211. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/rewards/list_comparison_math_reward.py +0 -0
  212. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/rewards/math.py +0 -0
  213. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/rewards/multiple_choice_math_reward.py +0 -0
  214. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/rewards/reasoning_steps.py +0 -0
  215. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/rewards/repetition.py +0 -0
  216. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/rewards/tag_count.py +0 -0
  217. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/rl_processing.py +0 -0
  218. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/server.py +0 -0
  219. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/stats/__init__.py +0 -0
  220. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/stats/confidence_intervals.py +0 -0
  221. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/typed_interface.py +0 -0
  222. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/types/__init__.py +0 -0
  223. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/types/errors.py +0 -0
  224. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/types/remote_rollout_processor.py +0 -0
  225. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/types/types.py +0 -0
  226. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/utils/__init__.py +0 -0
  227. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/utils/batch_evaluation.py +0 -0
  228. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/utils/batch_transformation.py +0 -0
  229. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/utils/check_server_status.py +0 -0
  230. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/utils/dataset_helpers.py +0 -0
  231. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/utils/logs_models.py +0 -0
  232. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/utils/logs_server.py +0 -0
  233. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/utils/module_loader.py +0 -0
  234. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/utils/packaging_utils.py +0 -0
  235. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/utils/show_results_url.py +0 -0
  236. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/utils/static_policy.py +0 -0
  237. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/utils/subprocess_utils.py +0 -0
  238. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol/utils/vite_server.py +0 -0
  239. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol.egg-info/SOURCES.txt +0 -0
  240. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol.egg-info/dependency_links.txt +0 -0
  241. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol.egg-info/entry_points.txt +0 -0
  242. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol.egg-info/requires.txt +0 -0
  243. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/eval_protocol.egg-info/top_level.txt +0 -0
  244. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/pyproject.toml +0 -0
  245. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/setup.cfg +0 -0
  246. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/setup.py +0 -0
  247. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_accuracy.py +0 -0
  248. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_accuracy_length.py +0 -0
  249. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_adapters_e2e.py +0 -0
  250. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_agent_orchestrator.py +0 -0
  251. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_agent_resources.py +0 -0
  252. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_auth.py +0 -0
  253. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_batch_evaluation.py +0 -0
  254. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_cli.py +0 -0
  255. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_cli_agent.py +0 -0
  256. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_cli_args.py +0 -0
  257. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_code_execution.py +0 -0
  258. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_config.py +0 -0
  259. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_control_plane_separation.py +0 -0
  260. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_cpp_code.py +0 -0
  261. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_data_driven_task_manager.py +0 -0
  262. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_deepcoder_reward.py +0 -0
  263. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_deepeval_integration.py +0 -0
  264. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_deploy_integration.py +0 -0
  265. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_directory_utils.py +0 -0
  266. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_e2b_integration.py +0 -0
  267. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_e2b_js_integration.py +0 -0
  268. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_edge_cases.py +0 -0
  269. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_eval_protocol_import.py +0 -0
  270. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_evaluation.py +0 -0
  271. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_evaluation_integration.py +0 -0
  272. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_evaluation_postprocess.py +0 -0
  273. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_evaluation_preview_integration.py +0 -0
  274. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_event_bus.py +0 -0
  275. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_event_bus_helper.py +0 -0
  276. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_examples_end_to_end.py +0 -0
  277. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_fireworks_api.py +0 -0
  278. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_format.py +0 -0
  279. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_fractional_code.py +0 -0
  280. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_function_calling.py +0 -0
  281. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_gcp_tools.py +0 -0
  282. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_generic_server.py +0 -0
  283. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_human_id.py +0 -0
  284. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_integration.py +0 -0
  285. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_json_schema.py +0 -0
  286. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_kwargs_validation.py +0 -0
  287. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_language_consistency.py +0 -0
  288. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_lean_prover.py +0 -0
  289. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_lean_prover_runner.py +0 -0
  290. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_length.py +0 -0
  291. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_list_comparison_math_reward.py +0 -0
  292. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_logs_server.py +0 -0
  293. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_logs_server_simple.py +0 -0
  294. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_math.py +0 -0
  295. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_minimal.py +0 -0
  296. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_models.py +0 -0
  297. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_models_rl.py +0 -0
  298. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_multiple_choice_math_reward.py +0 -0
  299. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_n_variant_batch_integration.py +0 -0
  300. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_n_variant_integration.py +0 -0
  301. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_openai_compatibility.py +0 -0
  302. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_openeval_integration.py +0 -0
  303. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_packaging.py +0 -0
  304. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_parallel_rollouts.py +0 -0
  305. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_platform_api.py +0 -0
  306. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_quickstart_utils.py +0 -0
  307. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_readiness.py +0 -0
  308. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_reasoning_steps.py +0 -0
  309. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_repetition.py +0 -0
  310. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_repetition_debug.py +0 -0
  311. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_retry_mechanism.py +0 -0
  312. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_reward_function.py +0 -0
  313. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_reward_protocol_import.py +0 -0
  314. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_rl_processing.py +0 -0
  315. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_rollout_control_plane_integration.py +0 -0
  316. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_server.py +0 -0
  317. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_show_results_url.py +0 -0
  318. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_status_migration_changes.py +0 -0
  319. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_status_migration_integration.py +0 -0
  320. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_status_model.py +0 -0
  321. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_tag_count.py +0 -0
  322. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_tau_bench_airline_smoke.py +0 -0
  323. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_typed_interface.py +0 -0
  324. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_typed_interface_rl.py +0 -0
  325. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_upload_entrypoint.py +0 -0
  326. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_url_handling.py +0 -0
  327. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/tests/test_vite_server.py +0 -0
  328. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/__init__.py +0 -0
  329. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/agent/__init__.py +0 -0
  330. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/agent/base.py +0 -0
  331. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/agent/llm_agent.py +0 -0
  332. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/api_service/__init__.py +0 -0
  333. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/api_service/api_config.py +0 -0
  334. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/api_service/data_model.py +0 -0
  335. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/api_service/simulation_service.py +0 -0
  336. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/cli.py +0 -0
  337. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/config.py +0 -0
  338. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/data/domains/airline/policy.md +0 -0
  339. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/data/domains/mock/policy.md +0 -0
  340. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/data/domains/mock/policy_solo.md +0 -0
  341. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/data/domains/retail/policy.md +0 -0
  342. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/data/domains/telecom/main_policy.md +0 -0
  343. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/data/domains/telecom/main_policy_solo.md +0 -0
  344. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/data/domains/telecom/tech_support_manual.md +0 -0
  345. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/data/domains/telecom/tech_support_workflow.md +0 -0
  346. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/data/domains/telecom/tech_support_workflow_solo.md +0 -0
  347. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/data/user_simulator/simulation_guidelines.md +0 -0
  348. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/data/user_simulator/simulation_guidelines_tools.md +0 -0
  349. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/data_model/__init__.py +0 -0
  350. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/data_model/message.py +0 -0
  351. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/data_model/simulation.py +0 -0
  352. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/data_model/tasks.py +0 -0
  353. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/domains/__init__.py +0 -0
  354. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/domains/airline/__init__.py +0 -0
  355. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/domains/airline/data_model.py +0 -0
  356. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/domains/airline/environment.py +0 -0
  357. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/domains/airline/tools.py +0 -0
  358. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/domains/airline/utils.py +0 -0
  359. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/domains/mock/__init__.py +0 -0
  360. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/domains/mock/data_model.py +0 -0
  361. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/domains/mock/environment.py +0 -0
  362. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/domains/mock/tools.py +0 -0
  363. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/domains/mock/utils.py +0 -0
  364. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/domains/retail/__init__.py +0 -0
  365. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/domains/retail/data_model.py +0 -0
  366. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/domains/retail/environment.py +0 -0
  367. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/domains/retail/tools.py +0 -0
  368. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/domains/retail/utils.py +0 -0
  369. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/domains/telecom/__init__.py +0 -0
  370. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/domains/telecom/data_model.py +0 -0
  371. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/domains/telecom/environment.py +0 -0
  372. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/domains/telecom/tasks/__init__.py +0 -0
  373. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/domains/telecom/tasks/const.py +0 -0
  374. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/domains/telecom/tasks/create_tasks.py +0 -0
  375. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/domains/telecom/tasks/manager.py +0 -0
  376. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/domains/telecom/tasks/mms_issues.py +0 -0
  377. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/domains/telecom/tasks/mobile_data_issues.py +0 -0
  378. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/domains/telecom/tasks/service_issues.py +0 -0
  379. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/domains/telecom/tasks/utils.py +0 -0
  380. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/domains/telecom/tools.py +0 -0
  381. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/domains/telecom/user_data_model.py +0 -0
  382. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/domains/telecom/user_tools.py +0 -0
  383. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/domains/telecom/utils.py +0 -0
  384. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/environment/__init__.py +0 -0
  385. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/environment/db.py +0 -0
  386. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/environment/environment.py +0 -0
  387. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/environment/server.py +0 -0
  388. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/environment/tool.py +0 -0
  389. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/environment/toolkit.py +0 -0
  390. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/environment/utils/interface_agent.py +0 -0
  391. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/evaluator/__init__.py +0 -0
  392. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/evaluator/evaluator.py +0 -0
  393. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/evaluator/evaluator_action.py +0 -0
  394. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/evaluator/evaluator_base.py +0 -0
  395. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/evaluator/evaluator_communicate.py +0 -0
  396. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/evaluator/evaluator_env.py +0 -0
  397. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/evaluator/evaluator_nl_assertions.py +0 -0
  398. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/metrics/__init__.py +0 -0
  399. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/metrics/agent_metrics.py +0 -0
  400. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/metrics/break_down_metrics.py +0 -0
  401. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/orchestrator/__init__.py +0 -0
  402. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/orchestrator/environment_manager.py +0 -0
  403. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/orchestrator/orchestrator.py +0 -0
  404. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/orchestrator/utils.py +0 -0
  405. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/registry.py +0 -0
  406. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/run.py +0 -0
  407. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/scripts/__init__.py +0 -0
  408. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/scripts/check_data.py +0 -0
  409. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/scripts/show_domain_doc.py +0 -0
  410. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/scripts/start_servers.py +0 -0
  411. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/scripts/view_simulations.py +0 -0
  412. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/user/__init__.py +0 -0
  413. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/user/base.py +0 -0
  414. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/user/user_simulator.py +0 -0
  415. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/utils/__init__.py +0 -0
  416. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/utils/display.py +0 -0
  417. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/utils/io_utils.py +0 -0
  418. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/utils/llm_utils.py +0 -0
  419. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/utils/pydantic_utils.py +0 -0
  420. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vendor/tau2/utils/utils.py +0 -0
  421. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/versioneer.py +0 -0
  422. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vite-app/dist/assets/favicon-BkAAWQga.png +0 -0
  423. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vite-app/dist/assets/index-C81y9r9l.js +0 -0
  424. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vite-app/dist/assets/index-C81y9r9l.js.map +0 -0
  425. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vite-app/dist/assets/index-DpYZaoAr.css +0 -0
  426. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vite-app/dist/assets/logo-light-BprIBJQW.png +0 -0
  427. {eval_protocol-0.2.45.dev1 → eval_protocol-0.2.46.dev0}/vite-app/dist/index.html +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: eval-protocol
3
- Version: 0.2.45.dev1
3
+ Version: 0.2.46.dev0
4
4
  Summary: The official Python SDK for Eval Protocol (EP.) EP is an open protocol that standardizes how developers author evals for large language model (LLM) applications.
5
5
  Author-email: Fireworks AI <info@fireworks.ai>
6
6
  License-Expression: MIT
@@ -8,11 +8,11 @@ import json
8
8
 
9
9
  version_json = '''
10
10
  {
11
- "date": "2025-10-09T01:30:59-0700",
11
+ "date": "2025-10-10T00:24:47-0700",
12
12
  "dirty": false,
13
13
  "error": null,
14
- "full-revisionid": "1eef32fcf8f230ad2ee2b46f90a4f62c77fd89a7",
15
- "version": "0.2.45-dev1"
14
+ "full-revisionid": "98d81a75608e700f91e697f88c149641328d9507",
15
+ "version": "0.2.46-dev"
16
16
  }
17
17
  ''' # END VERSION_JSON
18
18
 
@@ -5,11 +5,13 @@ A proxy service for extracting evaluation metadata from URL paths and managing
5
5
  Langfuse tracing for distributed evaluation workflows.
6
6
  """
7
7
 
8
- from .proxy_core import create_app, AuthProvider, NoAuthProvider, ProxyConfig
8
+ from .proxy_core import create_app, AuthProvider, NoAuthProvider, ProxyConfig, ChatParams, TracesParams
9
9
 
10
10
  __all__ = [
11
11
  "create_app",
12
12
  "AuthProvider",
13
13
  "NoAuthProvider",
14
14
  "ProxyConfig",
15
+ "ChatParams",
16
+ "TracesParams",
15
17
  ]
@@ -1,9 +1,11 @@
1
- from .models import ProxyConfig
1
+ from .models import ProxyConfig, ChatParams, TracesParams
2
2
  from .app import create_app
3
3
  from .auth import AuthProvider, NoAuthProvider
4
4
 
5
5
  __all__ = [
6
6
  "ProxyConfig",
7
+ "ChatParams",
8
+ "TracesParams",
7
9
  "create_app",
8
10
  "AuthProvider",
9
11
  "NoAuthProvider",
@@ -4,67 +4,75 @@ A FastAPI service that sits in front of LiteLLM and extracts metadata from URL p
4
4
  """
5
5
 
6
6
  from fastapi import FastAPI, Depends, HTTPException, Request, Query
7
- from typing import Optional, List
7
+ from typing import Optional, Callable, Dict, Any, List
8
8
  import os
9
9
  import redis
10
10
  import logging
11
- import json
11
+ import yaml
12
12
  from pathlib import Path
13
13
  import sys
14
14
  from contextlib import asynccontextmanager
15
15
 
16
- from .models import ProxyConfig, LangfuseTracesResponse
16
+ from .models import ProxyConfig, LangfuseTracesResponse, TracesParams, ChatParams, ChatRequestHook, TracesRequestHook
17
17
  from .auth import AuthProvider, NoAuthProvider
18
18
  from .litellm import handle_chat_completion, proxy_to_litellm
19
19
  from .langfuse import fetch_langfuse_traces
20
20
 
21
21
  # Configure logging before any other imports (so all modules inherit this config)
22
22
  log_level = os.getenv("LOG_LEVEL", "INFO").upper()
23
- logging.basicConfig(
24
- level=getattr(logging, log_level),
25
- format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
26
- handlers=[logging.StreamHandler(sys.stdout)],
27
- )
23
+ if not logging.getLogger().hasHandlers():
24
+ logging.basicConfig(
25
+ level=getattr(logging, log_level),
26
+ format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
27
+ handlers=[logging.StreamHandler(sys.stdout)],
28
+ )
28
29
 
29
30
  logger = logging.getLogger(__name__)
30
31
 
31
32
 
32
- def build_proxy_config() -> ProxyConfig:
33
- """Load environment and secrets, and build ProxyConfig (no Redis)."""
33
+ def build_proxy_config(
34
+ preprocess_chat_request: Optional[ChatRequestHook] = None,
35
+ preprocess_traces_request: Optional[TracesRequestHook] = None,
36
+ ) -> ProxyConfig:
37
+ """Load environment and secrets, and build ProxyConfig"""
34
38
  # Env
35
39
  litellm_url = os.getenv("LITELLM_URL")
36
40
  if not litellm_url:
37
41
  raise ValueError("LITELLM_URL environment variable must be set")
38
42
  request_timeout = float(os.getenv("REQUEST_TIMEOUT", "300.0"))
43
+ langfuse_host = os.getenv("LANGFUSE_HOST", "https://cloud.langfuse.com")
39
44
 
40
- # Secrets - use SECRETS_PATH env var if set, otherwise default to proxy/secrets.json
45
+ # Secrets - use SECRETS_PATH env var if set, otherwise default to proxy/secrets.yaml
41
46
  secrets_path_str = os.getenv("SECRETS_PATH")
42
47
  if secrets_path_str:
43
48
  secrets_path = Path(secrets_path_str)
44
49
  else:
45
- secrets_path = Path(__file__).parent / "secrets.json"
50
+ secrets_path = Path(__file__).parent / "secrets.yaml"
46
51
  if not secrets_path.exists():
47
52
  raise ValueError(
48
- "secrets.json not found! Please create it from secrets.json.example:\n"
49
- " cp litellm_proxy_config/proxy/secrets.json.example litellm_proxy_config/proxy/secrets.json\n"
50
- "Then add your Langfuse API keys to secrets.json"
53
+ "Secrets file not found! Please create it from secrets.yaml.example:\n"
54
+ " cp eval_protocol/proxy/proxy_core/secrets.yaml.example eval_protocol/proxy/proxy_core/secrets.yaml\n"
55
+ "Then add your Langfuse API keys to the secrets file"
51
56
  )
52
57
  try:
53
58
  with open(secrets_path, "r") as f:
54
- secrets_config = json.load(f)
59
+ secrets_config = yaml.safe_load(f)
55
60
  langfuse_keys = secrets_config["langfuse_keys"]
56
61
  default_project_id = secrets_config["default_project_id"]
57
- logger.info(f"Loaded {len(langfuse_keys)} Langfuse project(s) from secrets.json")
62
+ logger.info(f"Loaded {len(langfuse_keys)} Langfuse project(s) from {secrets_path.name}")
58
63
  except KeyError as e:
59
- raise ValueError(f"Missing required key in secrets.json: {e}")
60
- except json.JSONDecodeError as e:
61
- raise ValueError(f"Invalid JSON in secrets.json: {e}")
64
+ raise ValueError(f"Missing required key in secrets file: {e}")
65
+ except yaml.YAMLError as e:
66
+ raise ValueError(f"Invalid format in secrets file {secrets_path.name}: {e}")
62
67
 
63
68
  return ProxyConfig(
64
69
  litellm_url=litellm_url,
65
70
  request_timeout=request_timeout,
71
+ langfuse_host=langfuse_host,
66
72
  langfuse_keys=langfuse_keys,
67
73
  default_project_id=default_project_id,
74
+ preprocess_chat_request=preprocess_chat_request,
75
+ preprocess_traces_request=preprocess_traces_request,
68
76
  )
69
77
 
70
78
 
@@ -95,12 +103,15 @@ def init_redis() -> redis.Redis:
95
103
 
96
104
  def create_app(
97
105
  auth_provider: AuthProvider = NoAuthProvider(),
106
+ preprocess_chat_request: Optional[ChatRequestHook] = None,
107
+ preprocess_traces_request: Optional[TracesRequestHook] = None,
98
108
  ) -> FastAPI:
99
109
  @asynccontextmanager
100
110
  async def lifespan(app: FastAPI):
101
111
  # Build runtime on startup
102
- app.state.config = build_proxy_config()
112
+ app.state.config = build_proxy_config(preprocess_chat_request, preprocess_traces_request)
103
113
  app.state.redis = init_redis()
114
+
104
115
  try:
105
116
  yield
106
117
  finally:
@@ -117,13 +128,46 @@ def create_app(
117
128
  def get_redis(request: Request) -> redis.Redis:
118
129
  return request.app.state.redis
119
130
 
120
- async def require_auth(request: Request) -> None:
121
- auth_header = request.headers.get("authorization", "")
122
- api_key = None
123
- if auth_header.startswith("Bearer "):
124
- api_key = auth_header.replace("Bearer ", "").strip()
131
+ def get_traces_params(
132
+ tags: Optional[List[str]] = Query(default=None),
133
+ project_id: Optional[str] = None,
134
+ limit: int = 100,
135
+ sample_size: Optional[int] = None,
136
+ user_id: Optional[str] = None,
137
+ session_id: Optional[str] = None,
138
+ name: Optional[str] = None,
139
+ environment: Optional[str] = None,
140
+ version: Optional[str] = None,
141
+ release: Optional[str] = None,
142
+ fields: Optional[str] = None,
143
+ hours_back: Optional[int] = None,
144
+ from_timestamp: Optional[str] = None,
145
+ to_timestamp: Optional[str] = None,
146
+ sleep_between_gets: float = 2.5,
147
+ max_retries: int = 3,
148
+ ) -> TracesParams:
149
+ return TracesParams(
150
+ tags=tags,
151
+ project_id=project_id,
152
+ limit=limit,
153
+ sample_size=sample_size,
154
+ user_id=user_id,
155
+ session_id=session_id,
156
+ name=name,
157
+ environment=environment,
158
+ version=version,
159
+ release=release,
160
+ fields=fields,
161
+ hours_back=hours_back,
162
+ from_timestamp=from_timestamp,
163
+ to_timestamp=to_timestamp,
164
+ sleep_between_gets=sleep_between_gets,
165
+ max_retries=max_retries,
166
+ )
125
167
 
126
- auth_provider.validate(api_key)
168
+ async def require_auth(request: Request) -> None:
169
+ account_id = auth_provider.validate_and_return_account_id(request)
170
+ request.state.account_id = account_id
127
171
  return None
128
172
 
129
173
  # =====================
@@ -164,11 +208,9 @@ def create_app(
164
208
  encoded_base_url: Optional[str] = None,
165
209
  config: ProxyConfig = Depends(get_config),
166
210
  redis_client: redis.Redis = Depends(get_redis),
211
+ _: None = Depends(require_auth),
167
212
  ):
168
- return await handle_chat_completion(
169
- config=config,
170
- redis_client=redis_client,
171
- request=request,
213
+ params = ChatParams(
172
214
  project_id=project_id,
173
215
  rollout_id=rollout_id,
174
216
  invocation_id=invocation_id,
@@ -177,6 +219,12 @@ def create_app(
177
219
  row_id=row_id,
178
220
  encoded_base_url=encoded_base_url,
179
221
  )
222
+ return await handle_chat_completion(
223
+ config=config,
224
+ redis_client=redis_client,
225
+ request=request,
226
+ params=params,
227
+ )
180
228
 
181
229
  @app.post("/project_id/{project_id}/chat/completions")
182
230
  @app.post("/v1/project_id/{project_id}/chat/completions")
@@ -185,12 +233,14 @@ def create_app(
185
233
  request: Request,
186
234
  config: ProxyConfig = Depends(get_config),
187
235
  redis_client: redis.Redis = Depends(get_redis),
236
+ _: None = Depends(require_auth),
188
237
  ):
238
+ params = ChatParams(project_id=project_id)
189
239
  return await handle_chat_completion(
190
240
  config=config,
191
241
  redis_client=redis_client,
192
242
  request=request,
193
- project_id=project_id,
243
+ params=params,
194
244
  )
195
245
 
196
246
  # ===============
@@ -201,45 +251,20 @@ def create_app(
201
251
  @app.get("/project_id/{project_id}/traces", response_model=LangfuseTracesResponse)
202
252
  @app.get("/v1/project_id/{project_id}/traces", response_model=LangfuseTracesResponse)
203
253
  async def get_langfuse_traces(
204
- tags: List[str] = Query(...), # REQUIRED query param
254
+ request: Request,
255
+ params: TracesParams = Depends(get_traces_params),
205
256
  project_id: Optional[str] = None,
206
- limit: int = 100,
207
- sample_size: Optional[int] = None,
208
- user_id: Optional[str] = None,
209
- session_id: Optional[str] = None,
210
- name: Optional[str] = None,
211
- environment: Optional[str] = None,
212
- version: Optional[str] = None,
213
- release: Optional[str] = None,
214
- fields: Optional[str] = None,
215
- hours_back: Optional[int] = None,
216
- from_timestamp: Optional[str] = None,
217
- to_timestamp: Optional[str] = None,
218
- sleep_between_gets: float = 2.5,
219
- max_retries: int = 3,
220
257
  config: ProxyConfig = Depends(get_config),
221
258
  redis_client: redis.Redis = Depends(get_redis),
222
259
  _: None = Depends(require_auth),
223
260
  ) -> LangfuseTracesResponse:
261
+ if project_id is not None:
262
+ params.project_id = project_id
224
263
  return await fetch_langfuse_traces(
225
264
  config=config,
226
265
  redis_client=redis_client,
227
- tags=tags,
228
- project_id=project_id,
229
- limit=limit,
230
- sample_size=sample_size,
231
- user_id=user_id,
232
- session_id=session_id,
233
- name=name,
234
- environment=environment,
235
- version=version,
236
- release=release,
237
- fields=fields,
238
- hours_back=hours_back,
239
- from_timestamp=from_timestamp,
240
- to_timestamp=to_timestamp,
241
- sleep_between_gets=sleep_between_gets,
242
- max_retries=max_retries,
266
+ request=request,
267
+ params=params,
243
268
  )
244
269
 
245
270
  # Health
@@ -0,0 +1,18 @@
1
+ from abc import ABC, abstractmethod
2
+ import logging
3
+ from fastapi import Request
4
+ from fastapi import HTTPException
5
+ import httpx
6
+ from typing import Optional
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+
11
+ class AuthProvider(ABC):
12
+ @abstractmethod
13
+ def validate_and_return_account_id(self, request: Request) -> Optional[str]: ...
14
+
15
+
16
+ class NoAuthProvider(AuthProvider):
17
+ def validate_and_return_account_id(self, request: Request) -> Optional[str]:
18
+ return None
@@ -8,16 +8,18 @@ import logging
8
8
  import asyncio
9
9
  from typing import List, Optional, Dict, Any, Set
10
10
  from datetime import datetime, timedelta
11
- from fastapi import HTTPException
11
+ from fastapi import HTTPException, Request
12
12
  import redis
13
13
  from .redis_utils import get_insertion_ids
14
- from .models import ProxyConfig, LangfuseTracesResponse, TraceResponse
14
+ from .models import ProxyConfig, LangfuseTracesResponse, TraceResponse, TracesParams
15
15
 
16
16
  logger = logging.getLogger(__name__)
17
17
 
18
18
 
19
- def _extract_tag_value(tags: List[str], prefix: str) -> Optional[str]:
19
+ def _extract_tag_value(tags: Optional[List[str]], prefix: str) -> Optional[str]:
20
20
  """Extract value from a tag with the given prefix (e.g., 'rollout_id:' or 'insertion_id:')."""
21
+ if not tags:
22
+ return None
21
23
  for tag in tags:
22
24
  if tag.startswith(prefix):
23
25
  return tag.split(":", 1)[1]
@@ -60,7 +62,7 @@ async def _fetch_trace_list_with_retry(
60
62
  langfuse_client: Any,
61
63
  page: int,
62
64
  limit: int,
63
- tags: List[str],
65
+ tags: Optional[List[str]],
64
66
  user_id: Optional[str],
65
67
  session_id: Optional[str],
66
68
  name: Optional[str],
@@ -152,22 +154,8 @@ async def _fetch_trace_detail_with_retry(
152
154
  async def fetch_langfuse_traces(
153
155
  config: ProxyConfig,
154
156
  redis_client: redis.Redis,
155
- tags: List[str],
156
- project_id: Optional[str] = None,
157
- limit: int = 100,
158
- sample_size: Optional[int] = None,
159
- user_id: Optional[str] = None,
160
- session_id: Optional[str] = None,
161
- name: Optional[str] = None,
162
- environment: Optional[str] = None,
163
- version: Optional[str] = None,
164
- release: Optional[str] = None,
165
- fields: Optional[str] = None,
166
- hours_back: Optional[int] = None,
167
- from_timestamp: Optional[str] = None,
168
- to_timestamp: Optional[str] = None,
169
- sleep_between_gets: float = 2.5,
170
- max_retries: int = 3,
157
+ request: Request,
158
+ params: TracesParams,
171
159
  ):
172
160
  """
173
161
  Fetch full traces from Langfuse for the specified project.
@@ -184,9 +172,27 @@ async def fetch_langfuse_traces(
184
172
 
185
173
  Returns a list of full trace objects (including observations) in JSON format.
186
174
  """
187
- # Validate tags
188
- if not tags or not any(tag.startswith("rollout_id:") for tag in tags):
189
- raise HTTPException(status_code=422, detail="Tags must include at least one 'rollout_id:*' tag")
175
+
176
+ # Preprocess traces request
177
+ if config.preprocess_traces_request:
178
+ params = config.preprocess_traces_request(request, params)
179
+
180
+ tags = params.tags
181
+ project_id = params.project_id
182
+ limit = params.limit
183
+ sample_size = params.sample_size
184
+ user_id = params.user_id
185
+ session_id = params.session_id
186
+ name = params.name
187
+ environment = params.environment
188
+ version = params.version
189
+ release = params.release
190
+ fields = params.fields
191
+ hours_back = params.hours_back
192
+ from_timestamp = params.from_timestamp
193
+ to_timestamp = params.to_timestamp
194
+ sleep_between_gets = params.sleep_between_gets
195
+ max_retries = params.max_retries
190
196
 
191
197
  # Use default project if not specified
192
198
  if project_id is None:
@@ -210,7 +216,7 @@ async def fetch_langfuse_traces(
210
216
  langfuse_client = Langfuse(
211
217
  public_key=config.langfuse_keys[project_id]["public_key"],
212
218
  secret_key=config.langfuse_keys[project_id]["secret_key"],
213
- host="https://langfuse.fireworks.ai",
219
+ host=config.langfuse_host,
214
220
  )
215
221
 
216
222
  # Parse datetime strings if provided
@@ -11,7 +11,7 @@ from fastapi import Request, Response, HTTPException
11
11
  from typing import Optional
12
12
  import redis
13
13
  from .redis_utils import register_insertion_id
14
- from .models import ProxyConfig
14
+ from .models import ProxyConfig, ChatParams
15
15
 
16
16
  logger = logging.getLogger(__name__)
17
17
 
@@ -20,13 +20,7 @@ async def handle_chat_completion(
20
20
  config: ProxyConfig,
21
21
  redis_client: redis.Redis,
22
22
  request: Request,
23
- project_id: Optional[str] = None,
24
- rollout_id: Optional[str] = None,
25
- invocation_id: Optional[str] = None,
26
- experiment_id: Optional[str] = None,
27
- run_id: Optional[str] = None,
28
- row_id: Optional[str] = None,
29
- encoded_base_url: Optional[str] = None,
23
+ params: ChatParams,
30
24
  ) -> Response:
31
25
  """
32
26
  Handle chat completion requests and forward to LiteLLM.
@@ -36,14 +30,24 @@ async def handle_chat_completion(
36
30
 
37
31
  If encoded_base_url is provided, it will be decoded and added to the request.
38
32
  """
33
+ body = await request.body()
34
+ data = json.loads(body) if body else {}
35
+
36
+ if config.preprocess_chat_request:
37
+ data, params = config.preprocess_chat_request(data, request, params)
38
+
39
+ project_id = params.project_id
40
+ rollout_id = params.rollout_id
41
+ invocation_id = params.invocation_id
42
+ experiment_id = params.experiment_id
43
+ run_id = params.run_id
44
+ row_id = params.row_id
45
+ encoded_base_url = params.encoded_base_url
46
+
39
47
  # Use default project if not specified
40
48
  if project_id is None:
41
49
  project_id = config.default_project_id
42
50
 
43
- # Read the original request body
44
- body = await request.body()
45
- data = json.loads(body) if body else {}
46
-
47
51
  # Decode and add base_url if provided
48
52
  if encoded_base_url:
49
53
  try:
@@ -87,7 +91,7 @@ async def handle_chat_completion(
87
91
  # Add Langfuse configuration
88
92
  data["langfuse_public_key"] = config.langfuse_keys[project_id]["public_key"]
89
93
  data["langfuse_secret_key"] = config.langfuse_keys[project_id]["secret_key"]
90
- data["langfuse_host"] = "https://langfuse.fireworks.ai"
94
+ data["langfuse_host"] = config.langfuse_host
91
95
 
92
96
  # Forward to LiteLLM's standard /chat/completions endpoint
93
97
  # Set longer timeout for LLM API calls (LLMs can be slow)
@@ -135,12 +139,11 @@ async def proxy_to_litellm(config: ProxyConfig, path: str, request: Request) ->
135
139
  # Get body
136
140
  body = await request.body()
137
141
 
138
- # For POST/PUT/PATCH with JSON, extract API key from header
142
+ # Pass through API key from Authorization header
139
143
  if request.method in ["POST", "PUT", "PATCH"] and body:
140
144
  try:
141
145
  data = json.loads(body)
142
146
 
143
- # Extract API key from Authorization header
144
147
  auth_header = request.headers.get("authorization", "")
145
148
  if auth_header.startswith("Bearer "):
146
149
  api_key = auth_header.replace("Bearer ", "").strip()
@@ -0,0 +1,92 @@
1
+ """
2
+ Models for the LiteLLM Metadata Proxy.
3
+ """
4
+
5
+ from pydantic import BaseModel
6
+ from typing import Optional, List, Any, Dict, Callable, TypeAlias
7
+ from fastapi import Request, Query
8
+
9
+
10
+ ChatRequestHook: TypeAlias = Callable[[Dict[str, Any], Request, "ChatParams"], tuple[Dict[str, Any], "ChatParams"]]
11
+ TracesRequestHook: TypeAlias = Callable[[Request, "TracesParams"], "TracesParams"]
12
+
13
+
14
+ class ChatParams(BaseModel):
15
+ """Typed container for chat completion URL path parameters."""
16
+
17
+ project_id: Optional[str] = None
18
+ rollout_id: Optional[str] = None
19
+ invocation_id: Optional[str] = None
20
+ experiment_id: Optional[str] = None
21
+ run_id: Optional[str] = None
22
+ row_id: Optional[str] = None
23
+ encoded_base_url: Optional[str] = None
24
+
25
+
26
+ class TracesParams(BaseModel):
27
+ """Typed container for traces query parameters and controls."""
28
+
29
+ tags: Optional[List[str]] = None
30
+ project_id: Optional[str] = None
31
+ limit: int = 100
32
+ sample_size: Optional[int] = None
33
+ user_id: Optional[str] = None
34
+ session_id: Optional[str] = None
35
+ name: Optional[str] = None
36
+ environment: Optional[str] = None
37
+ version: Optional[str] = None
38
+ release: Optional[str] = None
39
+ fields: Optional[str] = None
40
+ hours_back: Optional[int] = None
41
+ from_timestamp: Optional[str] = None
42
+ to_timestamp: Optional[str] = None
43
+ sleep_between_gets: float = 2.5
44
+ max_retries: int = 3
45
+
46
+
47
+ class ProxyConfig(BaseModel):
48
+ """Configuration model for the LiteLLM Metadata Proxy"""
49
+
50
+ litellm_url: str
51
+ request_timeout: float = 300.0
52
+ langfuse_host: str
53
+ langfuse_keys: Dict[str, Dict[str, str]]
54
+ default_project_id: str
55
+ preprocess_chat_request: Optional[ChatRequestHook] = None
56
+ preprocess_traces_request: Optional[TracesRequestHook] = None
57
+
58
+
59
+ class ObservationResponse(BaseModel):
60
+ """Response model for a single observation within a trace"""
61
+
62
+ id: str
63
+ type: Optional[str] = None
64
+ name: Optional[str] = None
65
+ start_time: Optional[str] = None
66
+ end_time: Optional[str] = None
67
+ input: Optional[Any] = None
68
+ output: Optional[Any] = None
69
+ parent_observation_id: Optional[str] = None
70
+
71
+
72
+ class TraceResponse(BaseModel):
73
+ """Response model for a single trace"""
74
+
75
+ id: str
76
+ name: Optional[str] = None
77
+ user_id: Optional[str] = None
78
+ session_id: Optional[str] = None
79
+ tags: List[str] = []
80
+ timestamp: Optional[str] = None
81
+ input: Optional[Any] = None
82
+ output: Optional[Any] = None
83
+ metadata: Optional[Any] = None
84
+ observations: List[ObservationResponse] = []
85
+
86
+
87
+ class LangfuseTracesResponse(BaseModel):
88
+ """Response model for the /traces endpoint"""
89
+
90
+ project_id: str
91
+ total_traces: int
92
+ traces: List[TraceResponse]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: eval-protocol
3
- Version: 0.2.45.dev1
3
+ Version: 0.2.46.dev0
4
4
  Summary: The official Python SDK for Eval Protocol (EP.) EP is an open protocol that standardizes how developers author evals for large language model (LLM) applications.
5
5
  Author-email: Fireworks AI <info@fireworks.ai>
6
6
  License-Expression: MIT
@@ -1,12 +0,0 @@
1
- from abc import ABC, abstractmethod
2
- from typing import Optional
3
-
4
-
5
- class AuthProvider(ABC):
6
- @abstractmethod
7
- def validate(self, api_key: Optional[str]) -> Optional[str]: ...
8
-
9
-
10
- class NoAuthProvider(AuthProvider):
11
- def validate(self, api_key: Optional[str]) -> Optional[str]:
12
- return None
@@ -1,51 +0,0 @@
1
- """
2
- Models for the LiteLLM Metadata Proxy.
3
- """
4
-
5
- from pydantic import BaseModel
6
- from typing import Optional, List, Any, Dict
7
-
8
-
9
- class ProxyConfig(BaseModel):
10
- """Configuration model for the LiteLLM Metadata Proxy"""
11
-
12
- litellm_url: str
13
- request_timeout: float = 300.0
14
- langfuse_keys: Dict[str, Dict[str, str]]
15
- default_project_id: str
16
-
17
-
18
- class ObservationResponse(BaseModel):
19
- """Response model for a single observation within a trace"""
20
-
21
- id: str
22
- type: Optional[str] = None
23
- name: Optional[str] = None
24
- start_time: Optional[str] = None
25
- end_time: Optional[str] = None
26
- input: Optional[Any] = None
27
- output: Optional[Any] = None
28
- parent_observation_id: Optional[str] = None
29
-
30
-
31
- class TraceResponse(BaseModel):
32
- """Response model for a single trace"""
33
-
34
- id: str
35
- name: Optional[str] = None
36
- user_id: Optional[str] = None
37
- session_id: Optional[str] = None
38
- tags: List[str] = []
39
- timestamp: Optional[str] = None
40
- input: Optional[Any] = None
41
- output: Optional[Any] = None
42
- metadata: Optional[Any] = None
43
- observations: List[ObservationResponse] = []
44
-
45
-
46
- class LangfuseTracesResponse(BaseModel):
47
- """Response model for the /traces endpoint"""
48
-
49
- project_id: str
50
- total_traces: int
51
- traces: List[TraceResponse]