eval-protocol 0.3.22__tar.gz → 0.3.24__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (474) hide show
  1. {eval_protocol-0.3.22/eval_protocol.egg-info → eval_protocol-0.3.24}/PKG-INFO +4 -7
  2. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/_version.py +3 -3
  3. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/adapters/fireworks_tracing.py +3 -55
  4. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/proxy/proxy_core/app.py +15 -11
  5. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/proxy/proxy_core/langfuse.py +0 -1
  6. eval_protocol-0.3.24/eval_protocol/proxy/proxy_core/litellm.py +173 -0
  7. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/proxy/proxy_core/models.py +1 -1
  8. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/reward_function.py +1 -0
  9. {eval_protocol-0.3.22 → eval_protocol-0.3.24/eval_protocol.egg-info}/PKG-INFO +4 -7
  10. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol.egg-info/requires.txt +3 -6
  11. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/pyproject.toml +3 -7
  12. eval_protocol-0.3.22/eval_protocol/proxy/proxy_core/litellm.py +0 -154
  13. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/LICENSE +0 -0
  14. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/README.md +0 -0
  15. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/development/__init__.py +0 -0
  16. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/development/normalize_sandbox_fusion.py +0 -0
  17. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/development/utils/__init__.py +0 -0
  18. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/development/utils/generate_api_key.py +0 -0
  19. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/development/utils/subprocess_manager.py +0 -0
  20. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/__init__.py +0 -0
  21. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/__main__.py +0 -0
  22. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/adapters/__init__.py +0 -0
  23. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/adapters/base.py +0 -0
  24. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/adapters/bigquery.py +0 -0
  25. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/adapters/braintrust.py +0 -0
  26. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/adapters/dataframe.py +0 -0
  27. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/adapters/huggingface.py +0 -0
  28. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/adapters/langchain.py +0 -0
  29. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/adapters/langfuse.py +0 -0
  30. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/adapters/langsmith.py +0 -0
  31. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/adapters/openai_responses.py +0 -0
  32. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/adapters/trl.py +0 -0
  33. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/adapters/utils.py +0 -0
  34. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/adapters/weave.py +0 -0
  35. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/agent/__init__.py +0 -0
  36. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/agent/models.py +0 -0
  37. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/agent/orchestrator.py +0 -0
  38. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/agent/resource_abc.py +0 -0
  39. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/agent/resource_pool.py +0 -0
  40. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/agent/resources/__init__.py +0 -0
  41. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/agent/resources/bfcl_envs/__init__.py +0 -0
  42. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/agent/resources/bfcl_envs/gorilla_file_system.py +0 -0
  43. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/agent/resources/bfcl_envs/math_api.py +0 -0
  44. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/agent/resources/bfcl_envs/posting_api.py +0 -0
  45. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/agent/resources/bfcl_sim_api_resource.py +0 -0
  46. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/agent/resources/docker_resource.py +0 -0
  47. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/agent/resources/filesystem_resource.py +0 -0
  48. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/agent/resources/python_state_resource.py +0 -0
  49. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/agent/resources/sql_resource.py +0 -0
  50. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/agent/task_manager.py +0 -0
  51. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/agent/tool_registry.py +0 -0
  52. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/auth.py +0 -0
  53. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/benchmarks/__init__.py +0 -0
  54. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/benchmarks/data/airline_dataset.jsonl +0 -0
  55. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/benchmarks/data/retail_dataset.jsonl +0 -0
  56. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/benchmarks/test_aime25.py +0 -0
  57. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/benchmarks/test_frozen_lake.py +0 -0
  58. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/benchmarks/test_glm_streaming_compliance.py +0 -0
  59. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/benchmarks/test_gpqa.py +0 -0
  60. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/benchmarks/test_livebench_data_analysis.py +0 -0
  61. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/benchmarks/test_tau_bench_airline.py +0 -0
  62. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/benchmarks/test_tau_bench_retail.py +0 -0
  63. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/cli.py +0 -0
  64. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/cli_commands/__init__.py +0 -0
  65. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/cli_commands/agent_eval_cmd.py +0 -0
  66. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/cli_commands/common.py +0 -0
  67. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/cli_commands/create_rft.py +0 -0
  68. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/cli_commands/export_docs.py +0 -0
  69. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/cli_commands/local_test.py +0 -0
  70. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/cli_commands/logs.py +0 -0
  71. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/cli_commands/run_eval_cmd.py +0 -0
  72. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/cli_commands/upload.py +0 -0
  73. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/cli_commands/utils.py +0 -0
  74. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/common_utils.py +0 -0
  75. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/config.py +0 -0
  76. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/data_loader/__init__.py +0 -0
  77. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/data_loader/dynamic_data_loader.py +0 -0
  78. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/data_loader/factory_data_loader.py +0 -0
  79. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/data_loader/inline_data_loader.py +0 -0
  80. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/data_loader/jsonl_data_loader.py +0 -0
  81. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/data_loader/models.py +0 -0
  82. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/dataset_logger/__init__.py +0 -0
  83. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/dataset_logger/dataset_logger.py +0 -0
  84. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/dataset_logger/local_fs_dataset_logger_adapter.py +0 -0
  85. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/dataset_logger/sqlite_dataset_logger_adapter.py +0 -0
  86. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/dataset_logger/sqlite_evaluation_row_store.py +0 -0
  87. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/datasets/__init__.py +0 -0
  88. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/datasets/loader.py +0 -0
  89. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/directory_utils.py +0 -0
  90. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/evaluation.py +0 -0
  91. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/event_bus/__init__.py +0 -0
  92. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/event_bus/event_bus.py +0 -0
  93. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/event_bus/logger.py +0 -0
  94. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/event_bus/sqlite_event_bus.py +0 -0
  95. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/event_bus/sqlite_event_bus_database.py +0 -0
  96. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/exceptions.py +0 -0
  97. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/execution/__init__.py +0 -0
  98. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/execution/pipeline.py +0 -0
  99. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/fireworks_rft.py +0 -0
  100. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/gcp_tools.py +0 -0
  101. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/generation/cache.py +0 -0
  102. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/generation/clients/base.py +0 -0
  103. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/generation/clients.py +0 -0
  104. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/generic_server.py +0 -0
  105. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/get_pep440_version.py +0 -0
  106. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/human_id/__init__.py +0 -0
  107. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/human_id/dictionary.py +0 -0
  108. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/integrations/__init__.py +0 -0
  109. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/integrations/deepeval.py +0 -0
  110. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/integrations/fireworks_v1_completions_client.py +0 -0
  111. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/integrations/openai_rft.py +0 -0
  112. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/integrations/openeval.py +0 -0
  113. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/integrations/tinker_cookbook.py +0 -0
  114. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/integrations/tinker_rollout_processor.py +0 -0
  115. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/integrations/trl.py +0 -0
  116. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/log_utils/__init__.py +0 -0
  117. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/log_utils/elasticsearch_client.py +0 -0
  118. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/log_utils/elasticsearch_direct_http_handler.py +0 -0
  119. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/log_utils/elasticsearch_index_manager.py +0 -0
  120. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/log_utils/fireworks_tracing_http_handler.py +0 -0
  121. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/log_utils/init.py +0 -0
  122. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/log_utils/rollout_context.py +0 -0
  123. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/log_utils/rollout_id_filter.py +0 -0
  124. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/log_utils/util.py +0 -0
  125. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/logging_utils.py +0 -0
  126. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/mcp/__init__.py +0 -0
  127. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/mcp/adapter.py +0 -0
  128. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/mcp/client/__init__.py +0 -0
  129. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/mcp/client/connection.py +0 -0
  130. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/mcp/clients.py +0 -0
  131. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/mcp/execution/__init__.py +0 -0
  132. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/mcp/execution/base_policy.py +0 -0
  133. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/mcp/execution/manager.py +0 -0
  134. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/mcp/execution/policy.py +0 -0
  135. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/mcp/execution/vllm_policy.py +0 -0
  136. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/mcp/grid_renderer.py +0 -0
  137. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/mcp/mcp_multi_client.py +0 -0
  138. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/mcp/mcpgym.py +0 -0
  139. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/mcp/process_manager.py +0 -0
  140. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/mcp/session/__init__.py +0 -0
  141. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/mcp/session/manager.py +0 -0
  142. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/mcp/simple_process_manager.py +0 -0
  143. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/mcp/simulation_server.py +0 -0
  144. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/mcp_agent/__init__.py +0 -0
  145. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/mcp_agent/config.py +0 -0
  146. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/mcp_agent/main.py +0 -0
  147. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/mcp_agent/orchestration/__init__.py +0 -0
  148. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/mcp_agent/orchestration/base_client.py +0 -0
  149. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/mcp_agent/orchestration/local_docker_client.py +0 -0
  150. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/mcp_agent/orchestration/stdio_mcp_client_helper.py +0 -0
  151. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/mcp_env.py +0 -0
  152. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/mcp_servers/__init__.py +0 -0
  153. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/mcp_servers/frozen_lake/frozen_lake_adapter.py +0 -0
  154. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/mcp_servers/frozen_lake/frozen_lake_mcp.py +0 -0
  155. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/mcp_servers/frozen_lake/server.py +0 -0
  156. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/mcp_servers/tau2/README.md +0 -0
  157. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/mcp_servers/tau2/__init__.py +0 -0
  158. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/mcp_servers/tau2/airplane_environment/airline_environment.py +0 -0
  159. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/mcp_servers/tau2/mock_environment/mock_environment.py +0 -0
  160. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/mcp_servers/tau2/retail_environment/retail_environment.py +0 -0
  161. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/mcp_servers/tau2/server.py +0 -0
  162. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/mcp_servers/tau2/tau2_mcp.py +0 -0
  163. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/mcp_servers/tau2/tests/system_prompts/airline_agent_system_prompt.md +0 -0
  164. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/mcp_servers/tau2/tests/system_prompts/mock_agent_system_prompt.md +0 -0
  165. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/mcp_servers/tau2/tests/system_prompts/retail_agent_system_prompt.md +0 -0
  166. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/mcp_servers/tau2/tests/test_tau2_e2e.py +0 -0
  167. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/models.py +0 -0
  168. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/packaging.py +0 -0
  169. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/platform_api.py +0 -0
  170. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/playback_policy.py +0 -0
  171. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/proxy/__init__.py +0 -0
  172. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/proxy/proxy_core/__init__.py +0 -0
  173. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/proxy/proxy_core/auth.py +0 -0
  174. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/proxy/proxy_core/main.py +0 -0
  175. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/proxy/proxy_core/redis_utils.py +0 -0
  176. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/pytest/__init__.py +0 -0
  177. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/pytest/buffer.py +0 -0
  178. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/pytest/default_agent_rollout_processor.py +0 -0
  179. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/pytest/default_dataset_adapter.py +0 -0
  180. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/pytest/default_klavis_sandbox_rollout_processor.py +0 -0
  181. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/pytest/default_langchain_rollout_processor.py +0 -0
  182. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/pytest/default_mcp_gym_rollout_processor.py +0 -0
  183. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/pytest/default_no_op_rollout_processor.py +0 -0
  184. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/pytest/default_pydantic_ai_rollout_processor.py +0 -0
  185. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/pytest/default_single_turn_rollout_process.py +0 -0
  186. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/pytest/dual_mode_wrapper.py +0 -0
  187. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/pytest/elasticsearch_setup.py +0 -0
  188. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/pytest/evaluation_test.py +0 -0
  189. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/pytest/evaluation_test_postprocess.py +0 -0
  190. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/pytest/evaluation_test_utils.py +0 -0
  191. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/pytest/exception_config.py +0 -0
  192. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/pytest/execution.py +0 -0
  193. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/pytest/generate_parameter_combinations.py +0 -0
  194. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/pytest/github_action_rollout_processor.py +0 -0
  195. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/pytest/handle_persist_flow.py +0 -0
  196. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/pytest/integrations/openenv_trl_vllm.py +0 -0
  197. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/pytest/openenv_rollout_processor.py +0 -0
  198. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/pytest/parameterize.py +0 -0
  199. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/pytest/plugin.py +0 -0
  200. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/pytest/priority_scheduler.py +0 -0
  201. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/pytest/remote_rollout_processor.py +0 -0
  202. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/pytest/rollout_processor.py +0 -0
  203. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/pytest/rollout_result_post_processor.py +0 -0
  204. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/pytest/store_experiment_link.py +0 -0
  205. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/pytest/store_results_url.py +0 -0
  206. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/pytest/tracing_utils.py +0 -0
  207. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/pytest/types.py +0 -0
  208. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/pytest/utils.py +0 -0
  209. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/pytest/validate_signature.py +0 -0
  210. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/quickstart/__init__.py +0 -0
  211. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/quickstart/aha_judge/__init__.py +0 -0
  212. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/quickstart/aha_judge/llm_judge.py +0 -0
  213. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/quickstart/aha_judge/llm_judge_braintrust.py +0 -0
  214. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/quickstart/aha_judge/llm_judge_langfuse.py +0 -0
  215. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/quickstart/aha_judge/llm_judge_langsmith.py +0 -0
  216. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/quickstart/aha_judge/llm_judge_openai_responses.py +0 -0
  217. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/quickstart/aha_judge/utils.py +0 -0
  218. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/quickstart/llm_judge.py +0 -0
  219. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/quickstart/llm_judge_braintrust.py +0 -0
  220. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/quickstart/svg_agent/evaluator/test_svgagent.py +0 -0
  221. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/quickstart/svg_agent/evaluator/utils.py +0 -0
  222. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/quickstart/svg_agent/vercel_svg_server/api/init.py +0 -0
  223. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/quickstart/utils.py +0 -0
  224. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/resources.py +0 -0
  225. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/rewards/__init__.py +0 -0
  226. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/rewards/accuracy.py +0 -0
  227. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/rewards/accuracy_length.py +0 -0
  228. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/rewards/apps_coding_reward.py +0 -0
  229. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/rewards/apps_execution_utils.py +0 -0
  230. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/rewards/apps_testing_util.py +0 -0
  231. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/rewards/bfcl_reward.py +0 -0
  232. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/rewards/code_execution.py +0 -0
  233. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/rewards/code_execution_utils.py +0 -0
  234. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/rewards/cpp_code.py +0 -0
  235. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/rewards/deepcoder_reward.py +0 -0
  236. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/rewards/format.py +0 -0
  237. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/rewards/function_calling.py +0 -0
  238. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/rewards/json_schema.py +0 -0
  239. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/rewards/language_consistency.py +0 -0
  240. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/rewards/lean_prover.py +0 -0
  241. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/rewards/length.py +0 -0
  242. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/rewards/list_comparison_math_reward.py +0 -0
  243. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/rewards/math.py +0 -0
  244. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/rewards/multiple_choice_math_reward.py +0 -0
  245. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/rewards/reasoning_steps.py +0 -0
  246. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/rewards/repetition.py +0 -0
  247. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/rewards/tag_count.py +0 -0
  248. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/rl_processing.py +0 -0
  249. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/server.py +0 -0
  250. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/stats/__init__.py +0 -0
  251. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/stats/confidence_intervals.py +0 -0
  252. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/training/__init__.py +0 -0
  253. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/training/gepa_trainer.py +0 -0
  254. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/training/gepa_utils.py +0 -0
  255. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/training/trainer.py +0 -0
  256. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/training/utils.py +0 -0
  257. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/typed_interface.py +0 -0
  258. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/types/__init__.py +0 -0
  259. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/types/errors.py +0 -0
  260. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/types/remote_rollout_processor.py +0 -0
  261. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/types/types.py +0 -0
  262. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/utils/__init__.py +0 -0
  263. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/utils/batch_evaluation.py +0 -0
  264. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/utils/batch_transformation.py +0 -0
  265. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/utils/browser_utils.py +0 -0
  266. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/utils/check_server_status.py +0 -0
  267. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/utils/dataset_helpers.py +0 -0
  268. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/utils/evaluation_row_utils.py +0 -0
  269. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/utils/logs_models.py +0 -0
  270. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/utils/logs_server.py +0 -0
  271. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/utils/module_loader.py +0 -0
  272. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/utils/packaging_utils.py +0 -0
  273. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/utils/show_results_url.py +0 -0
  274. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/utils/static_policy.py +0 -0
  275. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/utils/subprocess_utils.py +0 -0
  276. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol/utils/vite_server.py +0 -0
  277. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol.egg-info/SOURCES.txt +0 -0
  278. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol.egg-info/dependency_links.txt +0 -0
  279. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol.egg-info/entry_points.txt +0 -0
  280. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/eval_protocol.egg-info/top_level.txt +0 -0
  281. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/setup.cfg +0 -0
  282. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/setup.py +0 -0
  283. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_accuracy.py +0 -0
  284. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_accuracy_length.py +0 -0
  285. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_adapters_e2e.py +0 -0
  286. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_agent_orchestrator.py +0 -0
  287. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_agent_resources.py +0 -0
  288. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_auth.py +0 -0
  289. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_batch_evaluation.py +0 -0
  290. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_cli_agent.py +0 -0
  291. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_cli_args.py +0 -0
  292. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_cli_create_rft.py +0 -0
  293. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_cli_local_test.py +0 -0
  294. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_cli_startup_benchmark.py +0 -0
  295. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_code_execution.py +0 -0
  296. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_config.py +0 -0
  297. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_control_plane_separation.py +0 -0
  298. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_cpp_code.py +0 -0
  299. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_data_driven_task_manager.py +0 -0
  300. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_deepcoder_reward.py +0 -0
  301. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_deepeval_integration.py +0 -0
  302. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_directory_utils.py +0 -0
  303. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_e2b_integration.py +0 -0
  304. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_e2b_js_integration.py +0 -0
  305. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_edge_cases.py +0 -0
  306. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_ep_upload_e2e.py +0 -0
  307. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_eval_protocol_import.py +0 -0
  308. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_evaluation.py +0 -0
  309. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_evaluation_postprocess.py +0 -0
  310. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_event_bus.py +0 -0
  311. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_event_bus_helper.py +0 -0
  312. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_examples_end_to_end.py +0 -0
  313. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_exception_config.py +0 -0
  314. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_exceptions.py +0 -0
  315. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_fireworks_api.py +0 -0
  316. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_fireworks_v1_completions_client.py +0 -0
  317. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_format.py +0 -0
  318. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_fractional_code.py +0 -0
  319. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_function_calling.py +0 -0
  320. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_gcp_tools.py +0 -0
  321. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_generic_server.py +0 -0
  322. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_human_id.py +0 -0
  323. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_integration.py +0 -0
  324. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_json_schema.py +0 -0
  325. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_kwargs_validation.py +0 -0
  326. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_language_consistency.py +0 -0
  327. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_lean_prover.py +0 -0
  328. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_lean_prover_runner.py +0 -0
  329. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_length.py +0 -0
  330. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_list_comparison_math_reward.py +0 -0
  331. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_litellm_policy_provider_fields.py +0 -0
  332. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_logs_server.py +0 -0
  333. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_logs_server_simple.py +0 -0
  334. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_math.py +0 -0
  335. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_message_field_filtering.py +0 -0
  336. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_minimal.py +0 -0
  337. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_models.py +0 -0
  338. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_models_rl.py +0 -0
  339. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_multiple_choice_math_reward.py +0 -0
  340. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_n_variant_batch_integration.py +0 -0
  341. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_n_variant_integration.py +0 -0
  342. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_no_implicit_dotenv.py +0 -0
  343. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_openai_compatibility.py +0 -0
  344. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_openai_rft_integration.py +0 -0
  345. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_openeval_integration.py +0 -0
  346. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_packaging.py +0 -0
  347. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_parallel_rollouts.py +0 -0
  348. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_platform_api.py +0 -0
  349. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_priority_scheduler.py +0 -0
  350. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_quickstart_utils.py +0 -0
  351. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_readiness.py +0 -0
  352. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_reasoning_steps.py +0 -0
  353. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_repetition.py +0 -0
  354. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_repetition_debug.py +0 -0
  355. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_retry_mechanism.py +0 -0
  356. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_reward_function.py +0 -0
  357. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_reward_protocol_import.py +0 -0
  358. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_rl_processing.py +0 -0
  359. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_rollout_control_plane_integration.py +0 -0
  360. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_rollout_logprobs.py +0 -0
  361. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_server.py +0 -0
  362. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_show_results_url.py +0 -0
  363. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_sqlite_hardening.py +0 -0
  364. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_status_migration_changes.py +0 -0
  365. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_status_migration_integration.py +0 -0
  366. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_status_model.py +0 -0
  367. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_tag_count.py +0 -0
  368. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_tau_bench_airline_smoke.py +0 -0
  369. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_training_utils.py +0 -0
  370. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_typed_interface.py +0 -0
  371. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_typed_interface_rl.py +0 -0
  372. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_upload_entrypoint.py +0 -0
  373. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_url_handling.py +0 -0
  374. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/tests/test_vite_server.py +0 -0
  375. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/__init__.py +0 -0
  376. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/agent/__init__.py +0 -0
  377. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/agent/base.py +0 -0
  378. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/agent/llm_agent.py +0 -0
  379. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/api_service/__init__.py +0 -0
  380. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/api_service/api_config.py +0 -0
  381. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/api_service/data_model.py +0 -0
  382. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/api_service/simulation_service.py +0 -0
  383. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/cli.py +0 -0
  384. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/config.py +0 -0
  385. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/data/domains/airline/policy.md +0 -0
  386. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/data/domains/mock/policy.md +0 -0
  387. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/data/domains/mock/policy_solo.md +0 -0
  388. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/data/domains/retail/policy.md +0 -0
  389. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/data/domains/telecom/main_policy.md +0 -0
  390. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/data/domains/telecom/main_policy_solo.md +0 -0
  391. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/data/domains/telecom/tech_support_manual.md +0 -0
  392. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/data/domains/telecom/tech_support_workflow.md +0 -0
  393. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/data/domains/telecom/tech_support_workflow_solo.md +0 -0
  394. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/data/user_simulator/simulation_guidelines.md +0 -0
  395. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/data/user_simulator/simulation_guidelines_tools.md +0 -0
  396. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/data_model/__init__.py +0 -0
  397. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/data_model/message.py +0 -0
  398. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/data_model/simulation.py +0 -0
  399. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/data_model/tasks.py +0 -0
  400. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/domains/__init__.py +0 -0
  401. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/domains/airline/__init__.py +0 -0
  402. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/domains/airline/data_model.py +0 -0
  403. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/domains/airline/environment.py +0 -0
  404. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/domains/airline/tools.py +0 -0
  405. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/domains/airline/utils.py +0 -0
  406. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/domains/mock/__init__.py +0 -0
  407. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/domains/mock/data_model.py +0 -0
  408. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/domains/mock/environment.py +0 -0
  409. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/domains/mock/tools.py +0 -0
  410. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/domains/mock/utils.py +0 -0
  411. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/domains/retail/__init__.py +0 -0
  412. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/domains/retail/data_model.py +0 -0
  413. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/domains/retail/environment.py +0 -0
  414. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/domains/retail/tools.py +0 -0
  415. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/domains/retail/utils.py +0 -0
  416. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/domains/telecom/__init__.py +0 -0
  417. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/domains/telecom/data_model.py +0 -0
  418. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/domains/telecom/environment.py +0 -0
  419. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/domains/telecom/tasks/__init__.py +0 -0
  420. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/domains/telecom/tasks/const.py +0 -0
  421. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/domains/telecom/tasks/create_tasks.py +0 -0
  422. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/domains/telecom/tasks/manager.py +0 -0
  423. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/domains/telecom/tasks/mms_issues.py +0 -0
  424. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/domains/telecom/tasks/mobile_data_issues.py +0 -0
  425. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/domains/telecom/tasks/service_issues.py +0 -0
  426. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/domains/telecom/tasks/utils.py +0 -0
  427. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/domains/telecom/tools.py +0 -0
  428. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/domains/telecom/user_data_model.py +0 -0
  429. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/domains/telecom/user_tools.py +0 -0
  430. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/domains/telecom/utils.py +0 -0
  431. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/environment/__init__.py +0 -0
  432. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/environment/db.py +0 -0
  433. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/environment/environment.py +0 -0
  434. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/environment/server.py +0 -0
  435. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/environment/tool.py +0 -0
  436. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/environment/toolkit.py +0 -0
  437. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/environment/utils/interface_agent.py +0 -0
  438. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/evaluator/__init__.py +0 -0
  439. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/evaluator/evaluator.py +0 -0
  440. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/evaluator/evaluator_action.py +0 -0
  441. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/evaluator/evaluator_base.py +0 -0
  442. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/evaluator/evaluator_communicate.py +0 -0
  443. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/evaluator/evaluator_env.py +0 -0
  444. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/evaluator/evaluator_nl_assertions.py +0 -0
  445. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/metrics/__init__.py +0 -0
  446. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/metrics/agent_metrics.py +0 -0
  447. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/metrics/break_down_metrics.py +0 -0
  448. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/orchestrator/__init__.py +0 -0
  449. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/orchestrator/environment_manager.py +0 -0
  450. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/orchestrator/orchestrator.py +0 -0
  451. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/orchestrator/utils.py +0 -0
  452. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/registry.py +0 -0
  453. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/run.py +0 -0
  454. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/scripts/__init__.py +0 -0
  455. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/scripts/check_data.py +0 -0
  456. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/scripts/show_domain_doc.py +0 -0
  457. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/scripts/start_servers.py +0 -0
  458. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/scripts/view_simulations.py +0 -0
  459. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/user/__init__.py +0 -0
  460. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/user/base.py +0 -0
  461. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/user/user_simulator.py +0 -0
  462. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/utils/__init__.py +0 -0
  463. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/utils/display.py +0 -0
  464. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/utils/io_utils.py +0 -0
  465. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/utils/llm_utils.py +0 -0
  466. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/utils/pydantic_utils.py +0 -0
  467. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vendor/tau2/utils/utils.py +0 -0
  468. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/versioneer.py +0 -0
  469. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vite-app/dist/assets/favicon-BkAAWQga.png +0 -0
  470. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vite-app/dist/assets/index-DFeF7AG_.js +0 -0
  471. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vite-app/dist/assets/index-DFeF7AG_.js.map +0 -0
  472. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vite-app/dist/assets/index-DvKW7FQL.css +0 -0
  473. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vite-app/dist/assets/logo-light-BprIBJQW.png +0 -0
  474. {eval_protocol-0.3.22 → eval_protocol-0.3.24}/vite-app/dist/index.html +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: eval-protocol
3
- Version: 0.3.22
3
+ Version: 0.3.24
4
4
  Summary: The official Python SDK for Eval Protocol (EP.) EP is an open protocol that standardizes how developers author evals for large language model (LLM) applications.
5
5
  Author-email: Fireworks AI <info@fireworks.ai>
6
6
  License-Expression: MIT
@@ -24,12 +24,12 @@ Requires-Dist: hydra-core>=1.3.2
24
24
  Requires-Dist: omegaconf>=2.3.0
25
25
  Requires-Dist: httpx>=0.24.0
26
26
  Requires-Dist: anthropic>=0.59.0
27
- Requires-Dist: litellm<1.82.0,>=1.81.0
27
+ Requires-Dist: litellm<1.75.0
28
28
  Requires-Dist: pytest>=6.0.0
29
29
  Requires-Dist: pytest-asyncio>=0.21.0
30
30
  Requires-Dist: peewee>=3.18.2
31
31
  Requires-Dist: backoff>=2.2.0
32
- Requires-Dist: fireworks-ai==1.0.0a20
32
+ Requires-Dist: fireworks-ai<2,>=1.0.0a20
33
33
  Requires-Dist: questionary>=2.0.0
34
34
  Requires-Dist: toml>=0.10.0
35
35
  Requires-Dist: loguru>=0.6.0
@@ -111,14 +111,11 @@ Requires-Dist: langchain-core>=0.3.75; extra == "langgraph"
111
111
  Provides-Extra: langgraph-tools
112
112
  Requires-Dist: langgraph>=0.6.7; extra == "langgraph-tools"
113
113
  Requires-Dist: langchain>=0.3.0; extra == "langgraph-tools"
114
+ Requires-Dist: langchain-fireworks>=0.3.0; extra == "langgraph-tools"
114
115
  Provides-Extra: proxy
115
116
  Requires-Dist: redis>=5.0.0; extra == "proxy"
116
117
  Requires-Dist: langfuse>=2.0.0; extra == "proxy"
117
118
  Requires-Dist: uuid6>=2025.0.0; extra == "proxy"
118
- Requires-Dist: litellm<1.82.0,>=1.81.0; extra == "proxy"
119
- Requires-Dist: opentelemetry-api>=1.29.0; extra == "proxy"
120
- Requires-Dist: opentelemetry-sdk>=1.29.0; extra == "proxy"
121
- Requires-Dist: opentelemetry-exporter-otlp>=1.29.0; extra == "proxy"
122
119
  Dynamic: license-file
123
120
 
124
121
  # Eval Protocol
@@ -8,11 +8,11 @@ import json
8
8
 
9
9
  version_json = '''
10
10
  {
11
- "date": "2026-03-06T17:12:43-0800",
11
+ "date": "2026-03-10T03:58:26-0700",
12
12
  "dirty": false,
13
13
  "error": null,
14
- "full-revisionid": "9f0f5e4d564aa8bdbf2868848f91bb7e325183d4",
15
- "version": "0.3.22"
14
+ "full-revisionid": "0f3c47172cc248d6a9ebbec809ee870f59b03698",
15
+ "version": "0.3.24"
16
16
  }
17
17
  ''' # END VERSION_JSON
18
18
 
@@ -8,10 +8,8 @@ from __future__ import annotations
8
8
  import logging
9
9
  import requests
10
10
  from datetime import datetime
11
- import ast
12
- import json
13
- import os
14
11
  from typing import Any, Dict, List, Optional, Protocol
12
+ import os
15
13
 
16
14
  from eval_protocol.models import EvaluationRow, InputMetadata, ExecutionMetadata, Message
17
15
  from .base import BaseAdapter
@@ -46,43 +44,6 @@ class TraceDictConverter(Protocol):
46
44
  ...
47
45
 
48
46
 
49
- def extract_otel_attributes(observations: List[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
50
- """Attempt to extract and parse attributes from raw_gen_ai_request observation. This only works when stored in OTEL format.
51
-
52
- Args:
53
- observations: List of observation dictionaries from the trace
54
-
55
- Returns:
56
- Dict with all attributes parsed. Or None if not found.
57
- """
58
- for obs in observations:
59
- if obs.get("name") == "raw_gen_ai_request" and obs.get("type") == "SPAN":
60
- metadata = obs.get("metadata") or {}
61
- attributes = metadata.get("attributes") or {}
62
-
63
- result: Dict[str, Any] = {}
64
-
65
- for key, value in attributes.items():
66
- # Try to parse stringified objects (could be Python repr or JSON)
67
- if isinstance(value, str) and value.startswith(("[", "{")):
68
- try:
69
- result[key] = ast.literal_eval(value)
70
- except Exception as e:
71
- logger.debug("Failed to parse %s with ast.literal_eval: %s", key, e)
72
- try:
73
- result[key] = json.loads(value)
74
- except Exception as e:
75
- logger.debug("Failed to parse %s with json.loads: %s", key, e)
76
- result[key] = value
77
- else:
78
- result[key] = value
79
-
80
- if result:
81
- return result
82
-
83
- return None
84
-
85
-
86
47
  def convert_trace_dict_to_evaluation_row(
87
48
  trace: Dict[str, Any], include_tool_calls: bool = True, span_name: Optional[str] = None
88
49
  ) -> Optional[EvaluationRow]:
@@ -135,19 +96,6 @@ def convert_trace_dict_to_evaluation_row(
135
96
  ):
136
97
  break # Break early if we've found all the metadata we need
137
98
 
138
- observations = trace.get("observations") or []
139
- # We can only extract when stored in OTEL format.
140
- otel_attributes = extract_otel_attributes(observations)
141
- if otel_attributes:
142
- # Find choices from any provider (llm.*.choices pattern)
143
- choices = None
144
- for key, value in otel_attributes.items():
145
- if key.endswith(".choices") and isinstance(value, list):
146
- choices = value
147
- break
148
- if choices and len(choices) > 0:
149
- execution_metadata.finish_reason = choices[0].get("finish_reason")
150
-
151
99
  return EvaluationRow(
152
100
  messages=messages,
153
101
  tools=tools,
@@ -212,7 +160,7 @@ def extract_messages_from_trace_dict(
212
160
  # Fallback: use the last GENERATION observation which typically contains full chat history
213
161
  if not messages:
214
162
  try:
215
- all_observations = trace.get("observations") or []
163
+ all_observations = trace.get("observations", [])
216
164
  gens = [obs for obs in all_observations if obs.get("type") == "GENERATION"]
217
165
  if gens:
218
166
  gens.sort(key=lambda x: x.get("start_time", ""))
@@ -238,7 +186,7 @@ def get_final_generation_in_span_dict(trace: Dict[str, Any], span_name: str) ->
238
186
  The final generation dictionary, or None if not found
239
187
  """
240
188
  # Get all observations from the trace
241
- all_observations = trace.get("observations") or []
189
+ all_observations = trace.get("observations", [])
242
190
 
243
191
  # Find a span with the given name that has generation children
244
192
  parent_span = None
@@ -15,7 +15,7 @@ from contextlib import asynccontextmanager
15
15
 
16
16
  from .models import ProxyConfig, LangfuseTracesResponse, TracesParams, ChatParams, ChatRequestHook, TracesRequestHook
17
17
  from .auth import AuthProvider, NoAuthProvider
18
- from .litellm import handle_chat_completion
18
+ from .litellm import handle_chat_completion, proxy_to_litellm
19
19
  from .langfuse import fetch_langfuse_traces, pointwise_fetch_langfuse_trace
20
20
 
21
21
  # Configure logging before any other imports (so all modules inherit this config)
@@ -35,6 +35,10 @@ def build_proxy_config(
35
35
  preprocess_traces_request: Optional[TracesRequestHook] = None,
36
36
  ) -> ProxyConfig:
37
37
  """Load environment and secrets, and build ProxyConfig"""
38
+ # Env
39
+ litellm_url = os.getenv("LITELLM_URL")
40
+ if not litellm_url:
41
+ raise ValueError("LITELLM_URL environment variable must be set")
38
42
  request_timeout = float(os.getenv("REQUEST_TIMEOUT", "300.0"))
39
43
  langfuse_host = os.getenv("LANGFUSE_HOST", "https://cloud.langfuse.com")
40
44
 
@@ -62,6 +66,7 @@ def build_proxy_config(
62
66
  raise ValueError(f"Invalid format in secrets file {secrets_path.name}: {e}")
63
67
 
64
68
  return ProxyConfig(
69
+ litellm_url=litellm_url,
65
70
  request_timeout=request_timeout,
66
71
  langfuse_host=langfuse_host,
67
72
  langfuse_keys=langfuse_keys,
@@ -108,16 +113,6 @@ def create_app(
108
113
  app.state.config = build_proxy_config(preprocess_chat_request, preprocess_traces_request)
109
114
  app.state.redis = init_redis()
110
115
 
111
- config = app.state.config
112
- default_keys = config.langfuse_keys[config.default_project_id]
113
- os.environ["LANGFUSE_PUBLIC_KEY"] = default_keys["public_key"]
114
- os.environ["LANGFUSE_SECRET_KEY"] = default_keys["secret_key"]
115
- os.environ.setdefault("LANGFUSE_HOST", config.langfuse_host)
116
-
117
- import litellm
118
-
119
- litellm.callbacks = ["langfuse_otel"]
120
-
121
116
  try:
122
117
  yield
123
118
  finally:
@@ -302,4 +297,13 @@ def create_app(
302
297
  async def health():
303
298
  return {"status": "healthy", "service": "metadata-proxy"}
304
299
 
300
+ # Catch-all
301
+ @app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "DELETE", "PATCH"])
302
+ async def catch_all_proxy(
303
+ path: str,
304
+ request: Request,
305
+ config: ProxyConfig = Depends(get_config),
306
+ ):
307
+ return await proxy_to_litellm(config, path, request)
308
+
305
309
  return app
@@ -50,7 +50,6 @@ def _serialize_trace_to_dict(trace_full: Any) -> Dict[str, Any]:
50
50
  "input": getattr(obs, "input", None),
51
51
  "output": getattr(obs, "output", None),
52
52
  "parent_observation_id": getattr(obs, "parent_observation_id", None),
53
- "metadata": getattr(obs, "metadata", None),
54
53
  }
55
54
  for obs in getattr(trace_full, "observations", [])
56
55
  ]
@@ -0,0 +1,173 @@
1
+ """
2
+ LiteLLM client - handles all communication with LiteLLM service.
3
+ """
4
+
5
+ import json
6
+ import base64
7
+ import httpx
8
+ import logging
9
+ from uuid6 import uuid7
10
+ from fastapi import Request, Response, HTTPException
11
+ import redis
12
+ from .redis_utils import register_insertion_id
13
+ from .models import ProxyConfig, ChatParams
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ async def handle_chat_completion(
19
+ config: ProxyConfig,
20
+ redis_client: redis.Redis,
21
+ request: Request,
22
+ params: ChatParams,
23
+ ) -> Response:
24
+ """
25
+ Handle chat completion requests and forward to LiteLLM.
26
+
27
+ If metadata IDs (rollout_id, etc.) are provided, they'll be added as tags
28
+ and the assistant message count will be tracked in Redis.
29
+
30
+ If encoded_base_url is provided, it will be decoded and added to the request.
31
+ """
32
+ body = await request.body()
33
+ data = json.loads(body) if body else {}
34
+
35
+ if config.preprocess_chat_request:
36
+ data, params = config.preprocess_chat_request(data, request, params)
37
+
38
+ project_id = params.project_id
39
+ rollout_id = params.rollout_id
40
+ invocation_id = params.invocation_id
41
+ experiment_id = params.experiment_id
42
+ run_id = params.run_id
43
+ row_id = params.row_id
44
+ encoded_base_url = params.encoded_base_url
45
+
46
+ # Use default project if not specified
47
+ if project_id is None:
48
+ project_id = config.default_project_id
49
+
50
+ # Decode and add base_url if provided
51
+ if encoded_base_url:
52
+ try:
53
+ # Decode from URL-safe base64
54
+ decoded_bytes = base64.urlsafe_b64decode(encoded_base_url)
55
+ base_url = decoded_bytes.decode("utf-8")
56
+ data["base_url"] = base_url
57
+ logger.debug(f"Decoded base_url: {base_url}")
58
+ except Exception as e:
59
+ logger.error(f"Failed to decode base_url: {e}")
60
+ raise HTTPException(status_code=400, detail=f"Invalid encoded_base_url: {str(e)}")
61
+
62
+ # Extract API key from Authorization header and inject into request body
63
+ auth_header = request.headers.get("authorization", "")
64
+ if auth_header.startswith("Bearer "):
65
+ api_key = auth_header.replace("Bearer ", "").strip()
66
+ # Only inject API key if model is a Fireworks model
67
+ model = data.get("model")
68
+ if model and isinstance(model, str) and model.startswith("fireworks_ai"):
69
+ data["api_key"] = api_key
70
+
71
+ # If metadata IDs are provided, add them as tags
72
+ insertion_id = None
73
+ if rollout_id is not None:
74
+ insertion_id = str(uuid7())
75
+
76
+ if "metadata" not in data:
77
+ data["metadata"] = {}
78
+ if "tags" not in data["metadata"]:
79
+ data["metadata"]["tags"] = []
80
+
81
+ # Add extracted IDs as tags
82
+ data["metadata"]["tags"].extend(
83
+ [
84
+ f"rollout_id:{rollout_id}",
85
+ f"insertion_id:{insertion_id}",
86
+ f"invocation_id:{invocation_id}",
87
+ f"experiment_id:{experiment_id}",
88
+ f"run_id:{run_id}",
89
+ f"row_id:{row_id}",
90
+ ]
91
+ )
92
+
93
+ # Add Langfuse configuration
94
+ data["langfuse_public_key"] = config.langfuse_keys[project_id]["public_key"]
95
+ data["langfuse_secret_key"] = config.langfuse_keys[project_id]["secret_key"]
96
+ data["langfuse_host"] = config.langfuse_host
97
+
98
+ # Forward to LiteLLM's standard /chat/completions endpoint
99
+ # Set longer timeout for LLM API calls (LLMs can be slow)
100
+ timeout = httpx.Timeout(config.request_timeout)
101
+ async with httpx.AsyncClient(timeout=timeout) as client:
102
+ # Copy headers from original request but exclude content-length (httpx will set it correctly)
103
+ headers = dict(request.headers)
104
+ headers.pop("host", None)
105
+ headers.pop("content-length", None) # Let httpx calculate the correct length
106
+ headers["content-type"] = "application/json"
107
+
108
+ # Forward to LiteLLM
109
+ litellm_url = f"{config.litellm_url}/chat/completions"
110
+
111
+ response = await client.post(
112
+ litellm_url,
113
+ json=data, # httpx will serialize and set correct Content-Length
114
+ headers=headers,
115
+ )
116
+
117
+ # Register insertion_id in Redis only on successful response
118
+ if response.status_code == 200 and insertion_id is not None and rollout_id is not None:
119
+ register_insertion_id(redis_client, rollout_id, insertion_id)
120
+
121
+ # Return the response
122
+ return Response(
123
+ content=response.content,
124
+ status_code=response.status_code,
125
+ headers=dict(response.headers),
126
+ )
127
+
128
+
129
+ async def proxy_to_litellm(config: ProxyConfig, path: str, request: Request) -> Response:
130
+ """
131
+ Catch-all proxy: Forward any request to LiteLLM, extracting API key from Authorization header.
132
+ """
133
+ # Set longer timeout for LLM API calls (LLMs can be slow)
134
+ timeout = httpx.Timeout(config.request_timeout)
135
+ async with httpx.AsyncClient(timeout=timeout) as client:
136
+ # Copy headers
137
+ headers = dict(request.headers)
138
+ headers.pop("host", None)
139
+ headers.pop("content-length", None)
140
+
141
+ # Get body
142
+ body = await request.body()
143
+
144
+ # Pass through API key from Authorization header
145
+ if request.method in ["POST", "PUT", "PATCH"] and body:
146
+ try:
147
+ data = json.loads(body)
148
+
149
+ auth_header = request.headers.get("authorization", "")
150
+ if auth_header.startswith("Bearer "):
151
+ api_key = auth_header.replace("Bearer ", "").strip()
152
+ data["api_key"] = api_key
153
+
154
+ # Re-serialize
155
+ body = json.dumps(data).encode()
156
+ except json.JSONDecodeError:
157
+ pass
158
+
159
+ # Forward to LiteLLM
160
+ litellm_url = f"{config.litellm_url}/{path}"
161
+
162
+ response = await client.request(
163
+ method=request.method,
164
+ url=litellm_url,
165
+ headers=headers,
166
+ content=body,
167
+ )
168
+
169
+ return Response(
170
+ content=response.content,
171
+ status_code=response.status_code,
172
+ headers=dict(response.headers),
173
+ )
@@ -53,6 +53,7 @@ class TracesParams(BaseModel):
53
53
  class ProxyConfig(BaseModel):
54
54
  """Configuration model for the LiteLLM Metadata Proxy"""
55
55
 
56
+ litellm_url: str
56
57
  request_timeout: float = 300.0
57
58
  langfuse_host: str
58
59
  langfuse_keys: Dict[str, Dict[str, str]]
@@ -72,7 +73,6 @@ class ObservationResponse(BaseModel):
72
73
  input: Optional[Any] = None
73
74
  output: Optional[Any] = None
74
75
  parent_observation_id: Optional[str] = None
75
- metadata: Optional[Dict[str, Any]] = None
76
76
 
77
77
 
78
78
  class TraceResponse(BaseModel):
@@ -12,6 +12,7 @@ import requests
12
12
  from .models import EvaluateResult, MetricResult
13
13
  from .typed_interface import reward_function
14
14
 
15
+ logging.basicConfig(level=logging.INFO)
15
16
  logger = logging.getLogger(__name__)
16
17
 
17
18
  T = TypeVar("T", bound=Callable[..., EvaluateResult])
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: eval-protocol
3
- Version: 0.3.22
3
+ Version: 0.3.24
4
4
  Summary: The official Python SDK for Eval Protocol (EP.) EP is an open protocol that standardizes how developers author evals for large language model (LLM) applications.
5
5
  Author-email: Fireworks AI <info@fireworks.ai>
6
6
  License-Expression: MIT
@@ -24,12 +24,12 @@ Requires-Dist: hydra-core>=1.3.2
24
24
  Requires-Dist: omegaconf>=2.3.0
25
25
  Requires-Dist: httpx>=0.24.0
26
26
  Requires-Dist: anthropic>=0.59.0
27
- Requires-Dist: litellm<1.82.0,>=1.81.0
27
+ Requires-Dist: litellm<1.75.0
28
28
  Requires-Dist: pytest>=6.0.0
29
29
  Requires-Dist: pytest-asyncio>=0.21.0
30
30
  Requires-Dist: peewee>=3.18.2
31
31
  Requires-Dist: backoff>=2.2.0
32
- Requires-Dist: fireworks-ai==1.0.0a20
32
+ Requires-Dist: fireworks-ai<2,>=1.0.0a20
33
33
  Requires-Dist: questionary>=2.0.0
34
34
  Requires-Dist: toml>=0.10.0
35
35
  Requires-Dist: loguru>=0.6.0
@@ -111,14 +111,11 @@ Requires-Dist: langchain-core>=0.3.75; extra == "langgraph"
111
111
  Provides-Extra: langgraph-tools
112
112
  Requires-Dist: langgraph>=0.6.7; extra == "langgraph-tools"
113
113
  Requires-Dist: langchain>=0.3.0; extra == "langgraph-tools"
114
+ Requires-Dist: langchain-fireworks>=0.3.0; extra == "langgraph-tools"
114
115
  Provides-Extra: proxy
115
116
  Requires-Dist: redis>=5.0.0; extra == "proxy"
116
117
  Requires-Dist: langfuse>=2.0.0; extra == "proxy"
117
118
  Requires-Dist: uuid6>=2025.0.0; extra == "proxy"
118
- Requires-Dist: litellm<1.82.0,>=1.81.0; extra == "proxy"
119
- Requires-Dist: opentelemetry-api>=1.29.0; extra == "proxy"
120
- Requires-Dist: opentelemetry-sdk>=1.29.0; extra == "proxy"
121
- Requires-Dist: opentelemetry-exporter-otlp>=1.29.0; extra == "proxy"
122
119
  Dynamic: license-file
123
120
 
124
121
  # Eval Protocol
@@ -12,12 +12,12 @@ hydra-core>=1.3.2
12
12
  omegaconf>=2.3.0
13
13
  httpx>=0.24.0
14
14
  anthropic>=0.59.0
15
- litellm<1.82.0,>=1.81.0
15
+ litellm<1.75.0
16
16
  pytest>=6.0.0
17
17
  pytest-asyncio>=0.21.0
18
18
  peewee>=3.18.2
19
19
  backoff>=2.2.0
20
- fireworks-ai==1.0.0a20
20
+ fireworks-ai<2,>=1.0.0a20
21
21
  questionary>=2.0.0
22
22
  toml>=0.10.0
23
23
  loguru>=0.6.0
@@ -93,6 +93,7 @@ langchain-core>=0.3.75
93
93
  [langgraph_tools]
94
94
  langgraph>=0.6.7
95
95
  langchain>=0.3.0
96
+ langchain-fireworks>=0.3.0
96
97
 
97
98
  [langsmith]
98
99
  langsmith>=0.1.86
@@ -107,10 +108,6 @@ openevals>=0.1.0
107
108
  redis>=5.0.0
108
109
  langfuse>=2.0.0
109
110
  uuid6>=2025.0.0
110
- litellm<1.82.0,>=1.81.0
111
- opentelemetry-api>=1.29.0
112
- opentelemetry-sdk>=1.29.0
113
- opentelemetry-exporter-otlp>=1.29.0
114
111
 
115
112
  [pydantic]
116
113
  pydantic-ai>=1.0.2
@@ -31,12 +31,12 @@ dependencies = [
31
31
  "omegaconf>=2.3.0",
32
32
  "httpx>=0.24.0",
33
33
  "anthropic>=0.59.0",
34
- "litellm>=1.81.0,<1.82.0",
34
+ "litellm<1.75.0",
35
35
  "pytest>=6.0.0",
36
36
  "pytest-asyncio>=0.21.0",
37
37
  "peewee>=3.18.2",
38
38
  "backoff>=2.2.0",
39
- "fireworks-ai==1.0.0a20",
39
+ "fireworks-ai>=1.0.0a20,<2",
40
40
  "questionary>=2.0.0",
41
41
  # Dependencies for vendored tau2 package
42
42
  "toml>=0.10.0",
@@ -146,17 +146,13 @@ langgraph = [
146
146
  langgraph_tools = [
147
147
  "langgraph>=0.6.7",
148
148
  "langchain>=0.3.0",
149
- # langchain-fireworks removed: incompatible with fireworks-ai>=1.0.0
149
+ "langchain-fireworks>=0.3.0",
150
150
  ]
151
151
 
152
152
  proxy = [
153
153
  "redis>=5.0.0",
154
154
  "langfuse>=2.0.0",
155
155
  "uuid6>=2025.0.0",
156
- "litellm>=1.81.0,<1.82.0",
157
- "opentelemetry-api>=1.29.0",
158
- "opentelemetry-sdk>=1.29.0",
159
- "opentelemetry-exporter-otlp>=1.29.0",
160
156
  ]
161
157
 
162
158
  [project.scripts]
@@ -1,154 +0,0 @@
1
- """
2
- LiteLLM client - handles all LLM calls directly via LiteLLM SDK with Langfuse OTEL integration.
3
- """
4
-
5
- import json
6
- import base64
7
- import logging
8
- from uuid6 import uuid7
9
- from fastapi import Request, Response, HTTPException
10
- from fastapi.responses import StreamingResponse
11
- import redis
12
- import openai
13
- from litellm import acompletion
14
-
15
- from .redis_utils import register_insertion_id
16
- from .models import ProxyConfig, ChatParams
17
-
18
- logger = logging.getLogger(__name__)
19
-
20
-
21
- async def handle_chat_completion(
22
- config: ProxyConfig,
23
- redis_client: redis.Redis,
24
- request: Request,
25
- params: ChatParams,
26
- ) -> Response:
27
- """
28
- Handle chat completion requests using LiteLLM SDK directly with Langfuse OTEL.
29
-
30
- If metadata IDs (rollout_id, etc.) are provided, they'll be added as tags
31
- and the assistant message count will be tracked in Redis.
32
-
33
- If encoded_base_url is provided, it will be decoded and used as api_base.
34
- """
35
- body = await request.body()
36
- data = json.loads(body) if body else {}
37
-
38
- if config.preprocess_chat_request:
39
- data, params = config.preprocess_chat_request(data, request, params)
40
-
41
- project_id = params.project_id
42
- rollout_id = params.rollout_id
43
- invocation_id = params.invocation_id
44
- experiment_id = params.experiment_id
45
- run_id = params.run_id
46
- row_id = params.row_id
47
- encoded_base_url = params.encoded_base_url
48
-
49
- # Use default project if not specified
50
- if project_id is None:
51
- project_id = config.default_project_id
52
-
53
- # Decode and add base_url if provided
54
- if encoded_base_url:
55
- try:
56
- decoded_bytes = base64.urlsafe_b64decode(encoded_base_url)
57
- data["base_url"] = decoded_bytes.decode("utf-8")
58
- logger.debug(f"Decoded base_url: {data['base_url']}")
59
- except Exception as e:
60
- logger.error(f"Failed to decode base_url: {e}")
61
- raise HTTPException(status_code=400, detail=f"Invalid encoded_base_url: {str(e)}")
62
-
63
- # Extract API key from Authorization header and add to data
64
- auth_header = request.headers.get("authorization", "")
65
- if auth_header.startswith("Bearer "):
66
- data["api_key"] = auth_header.replace("Bearer ", "").strip()
67
-
68
- # Build metadata with tags for Langfuse
69
- insertion_id = None
70
- metadata = data.pop("metadata", {}) or {}
71
- tags = list(metadata.pop("tags", []) or [])
72
-
73
- if rollout_id is not None:
74
- insertion_id = str(uuid7())
75
- tags.extend(
76
- [
77
- f"rollout_id:{rollout_id}",
78
- f"insertion_id:{insertion_id}",
79
- f"invocation_id:{invocation_id}",
80
- f"experiment_id:{experiment_id}",
81
- f"run_id:{run_id}",
82
- f"row_id:{row_id}",
83
- ]
84
- )
85
-
86
- # Build Langfuse metadata (tags + user if present)
87
- # Convert user_id (from preprocess hook) to trace_user_id for Langfuse
88
- user_id = metadata.pop("user_id", None) or data.get("user")
89
- litellm_metadata = {"tags": tags, **metadata}
90
- if user_id:
91
- litellm_metadata["trace_user_id"] = user_id
92
-
93
- langfuse_keys = config.langfuse_keys[project_id]
94
-
95
- # Check if streaming is requested
96
- is_streaming = data.get("stream", False)
97
-
98
- # Pop fields that we pass explicitly to avoid duplicate kwarg errors
99
- request_timeout = data.pop("timeout", None) or config.request_timeout
100
- data.pop("langfuse_public_key", None)
101
- data.pop("langfuse_secret_key", None)
102
-
103
- try:
104
- # Make the completion call - pass all params through
105
- # Note: langfuse_host is set via LANGFUSE_HOST env var at startup; OTEL doesn't support per-request host override
106
- response = await acompletion(
107
- **data,
108
- metadata=litellm_metadata,
109
- timeout=request_timeout,
110
- langfuse_public_key=langfuse_keys["public_key"],
111
- langfuse_secret_key=langfuse_keys["secret_key"],
112
- )
113
-
114
- if is_streaming:
115
- # For streaming, return a StreamingResponse with SSE format
116
- # Register insertion_id only after stream completes successfully
117
- async def stream_generator():
118
- async for chunk in response: # type: ignore[union-attr]
119
- yield f"data: {chunk.model_dump_json()}\n\n"
120
- yield "data: [DONE]\n\n"
121
- # Stream completed successfully - now register
122
- if insertion_id is not None and rollout_id is not None:
123
- register_insertion_id(redis_client, rollout_id, insertion_id)
124
-
125
- return StreamingResponse(
126
- stream_generator(),
127
- media_type="text/event-stream",
128
- headers={
129
- "Cache-Control": "no-cache",
130
- "Connection": "keep-alive",
131
- },
132
- )
133
- else:
134
- # Non-streaming: register insertion_id on success
135
- if insertion_id is not None and rollout_id is not None:
136
- register_insertion_id(redis_client, rollout_id, insertion_id)
137
-
138
- return Response(
139
- content=response.model_dump_json(),
140
- status_code=200,
141
- media_type="application/json",
142
- )
143
-
144
- except HTTPException:
145
- raise
146
- except openai.APIError as e:
147
- # Convert to HTTPException and let FastAPI handle it
148
- raise HTTPException(
149
- status_code=getattr(e, "status_code", 500),
150
- detail=str(e),
151
- )
152
- except Exception as e:
153
- logger.error(f"Unexpected error: {e}", exc_info=True)
154
- raise HTTPException(status_code=500, detail=str(e))
File without changes
File without changes