eval-protocol 0.2.88__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (449) hide show
  1. eval_protocol-0.2.88/LICENSE +21 -0
  2. eval_protocol-0.2.88/PKG-INFO +154 -0
  3. eval_protocol-0.2.88/README.md +39 -0
  4. eval_protocol-0.2.88/development/__init__.py +1 -0
  5. eval_protocol-0.2.88/development/normalize_sandbox_fusion.py +522 -0
  6. eval_protocol-0.2.88/development/utils/__init__.py +1 -0
  7. eval_protocol-0.2.88/development/utils/generate_api_key.py +31 -0
  8. eval_protocol-0.2.88/development/utils/subprocess_manager.py +435 -0
  9. eval_protocol-0.2.88/eval_protocol/__init__.py +178 -0
  10. eval_protocol-0.2.88/eval_protocol/__main__.py +10 -0
  11. eval_protocol-0.2.88/eval_protocol/_version.py +21 -0
  12. eval_protocol-0.2.88/eval_protocol/adapters/__init__.py +101 -0
  13. eval_protocol-0.2.88/eval_protocol/adapters/base.py +25 -0
  14. eval_protocol-0.2.88/eval_protocol/adapters/bigquery.py +304 -0
  15. eval_protocol-0.2.88/eval_protocol/adapters/braintrust.py +315 -0
  16. eval_protocol-0.2.88/eval_protocol/adapters/fireworks_tracing.py +453 -0
  17. eval_protocol-0.2.88/eval_protocol/adapters/huggingface.py +435 -0
  18. eval_protocol-0.2.88/eval_protocol/adapters/langchain.py +214 -0
  19. eval_protocol-0.2.88/eval_protocol/adapters/langfuse.py +552 -0
  20. eval_protocol-0.2.88/eval_protocol/adapters/langsmith.py +413 -0
  21. eval_protocol-0.2.88/eval_protocol/adapters/openai_responses.py +216 -0
  22. eval_protocol-0.2.88/eval_protocol/adapters/trl.py +8 -0
  23. eval_protocol-0.2.88/eval_protocol/adapters/utils.py +98 -0
  24. eval_protocol-0.2.88/eval_protocol/adapters/weave.py +130 -0
  25. eval_protocol-0.2.88/eval_protocol/agent/__init__.py +29 -0
  26. eval_protocol-0.2.88/eval_protocol/agent/models.py +69 -0
  27. eval_protocol-0.2.88/eval_protocol/agent/orchestrator.py +891 -0
  28. eval_protocol-0.2.88/eval_protocol/agent/resource_abc.py +89 -0
  29. eval_protocol-0.2.88/eval_protocol/agent/resource_pool.py +184 -0
  30. eval_protocol-0.2.88/eval_protocol/agent/resources/__init__.py +19 -0
  31. eval_protocol-0.2.88/eval_protocol/agent/resources/bfcl_envs/__init__.py +1 -0
  32. eval_protocol-0.2.88/eval_protocol/agent/resources/bfcl_envs/gorilla_file_system.py +342 -0
  33. eval_protocol-0.2.88/eval_protocol/agent/resources/bfcl_envs/math_api.py +40 -0
  34. eval_protocol-0.2.88/eval_protocol/agent/resources/bfcl_envs/posting_api.py +157 -0
  35. eval_protocol-0.2.88/eval_protocol/agent/resources/bfcl_sim_api_resource.py +313 -0
  36. eval_protocol-0.2.88/eval_protocol/agent/resources/docker_resource.py +476 -0
  37. eval_protocol-0.2.88/eval_protocol/agent/resources/filesystem_resource.py +371 -0
  38. eval_protocol-0.2.88/eval_protocol/agent/resources/python_state_resource.py +170 -0
  39. eval_protocol-0.2.88/eval_protocol/agent/resources/sql_resource.py +271 -0
  40. eval_protocol-0.2.88/eval_protocol/agent/task_manager.py +1073 -0
  41. eval_protocol-0.2.88/eval_protocol/agent/tool_registry.py +111 -0
  42. eval_protocol-0.2.88/eval_protocol/auth.py +331 -0
  43. eval_protocol-0.2.88/eval_protocol/benchmarks/__init__.py +0 -0
  44. eval_protocol-0.2.88/eval_protocol/benchmarks/data/airline_dataset.jsonl +50 -0
  45. eval_protocol-0.2.88/eval_protocol/benchmarks/data/retail_dataset.jsonl +114 -0
  46. eval_protocol-0.2.88/eval_protocol/benchmarks/test_aime25.py +130 -0
  47. eval_protocol-0.2.88/eval_protocol/benchmarks/test_frozen_lake.py +76 -0
  48. eval_protocol-0.2.88/eval_protocol/benchmarks/test_gpqa.py +154 -0
  49. eval_protocol-0.2.88/eval_protocol/benchmarks/test_livebench_data_analysis.py +549 -0
  50. eval_protocol-0.2.88/eval_protocol/benchmarks/test_tau_bench_airline.py +304 -0
  51. eval_protocol-0.2.88/eval_protocol/benchmarks/test_tau_bench_retail.py +294 -0
  52. eval_protocol-0.2.88/eval_protocol/cli.py +694 -0
  53. eval_protocol-0.2.88/eval_protocol/cli_commands/__init__.py +1 -0
  54. eval_protocol-0.2.88/eval_protocol/cli_commands/agent_eval_cmd.py +260 -0
  55. eval_protocol-0.2.88/eval_protocol/cli_commands/common.py +242 -0
  56. eval_protocol-0.2.88/eval_protocol/cli_commands/create_rft.py +734 -0
  57. eval_protocol-0.2.88/eval_protocol/cli_commands/deploy.py +509 -0
  58. eval_protocol-0.2.88/eval_protocol/cli_commands/deploy_mcp.py +290 -0
  59. eval_protocol-0.2.88/eval_protocol/cli_commands/local_test.py +175 -0
  60. eval_protocol-0.2.88/eval_protocol/cli_commands/logs.py +57 -0
  61. eval_protocol-0.2.88/eval_protocol/cli_commands/preview.py +186 -0
  62. eval_protocol-0.2.88/eval_protocol/cli_commands/run_eval_cmd.py +203 -0
  63. eval_protocol-0.2.88/eval_protocol/cli_commands/upload.py +743 -0
  64. eval_protocol-0.2.88/eval_protocol/common_utils.py +72 -0
  65. eval_protocol-0.2.88/eval_protocol/config.py +180 -0
  66. eval_protocol-0.2.88/eval_protocol/data_loader/__init__.py +5 -0
  67. eval_protocol-0.2.88/eval_protocol/data_loader/dynamic_data_loader.py +38 -0
  68. eval_protocol-0.2.88/eval_protocol/data_loader/factory_data_loader.py +38 -0
  69. eval_protocol-0.2.88/eval_protocol/data_loader/inline_data_loader.py +68 -0
  70. eval_protocol-0.2.88/eval_protocol/data_loader/jsonl_data_loader.py +42 -0
  71. eval_protocol-0.2.88/eval_protocol/data_loader/models.py +128 -0
  72. eval_protocol-0.2.88/eval_protocol/dataset_logger/__init__.py +40 -0
  73. eval_protocol-0.2.88/eval_protocol/dataset_logger/dataset_logger.py +37 -0
  74. eval_protocol-0.2.88/eval_protocol/dataset_logger/local_fs_dataset_logger_adapter.py +98 -0
  75. eval_protocol-0.2.88/eval_protocol/dataset_logger/sqlite_dataset_logger_adapter.py +45 -0
  76. eval_protocol-0.2.88/eval_protocol/dataset_logger/sqlite_evaluation_row_store.py +63 -0
  77. eval_protocol-0.2.88/eval_protocol/datasets/__init__.py +1 -0
  78. eval_protocol-0.2.88/eval_protocol/datasets/loader.py +519 -0
  79. eval_protocol-0.2.88/eval_protocol/directory_utils.py +39 -0
  80. eval_protocol-0.2.88/eval_protocol/evaluation.py +1471 -0
  81. eval_protocol-0.2.88/eval_protocol/event_bus/__init__.py +38 -0
  82. eval_protocol-0.2.88/eval_protocol/event_bus/event_bus.py +50 -0
  83. eval_protocol-0.2.88/eval_protocol/event_bus/logger.py +3 -0
  84. eval_protocol-0.2.88/eval_protocol/event_bus/sqlite_event_bus.py +126 -0
  85. eval_protocol-0.2.88/eval_protocol/event_bus/sqlite_event_bus_database.py +93 -0
  86. eval_protocol-0.2.88/eval_protocol/exceptions.py +177 -0
  87. eval_protocol-0.2.88/eval_protocol/execution/__init__.py +1 -0
  88. eval_protocol-0.2.88/eval_protocol/execution/pipeline.py +954 -0
  89. eval_protocol-0.2.88/eval_protocol/fireworks_rft.py +230 -0
  90. eval_protocol-0.2.88/eval_protocol/gcp_tools.py +484 -0
  91. eval_protocol-0.2.88/eval_protocol/generation/cache.py +141 -0
  92. eval_protocol-0.2.88/eval_protocol/generation/clients/base.py +67 -0
  93. eval_protocol-0.2.88/eval_protocol/generation/clients.py +254 -0
  94. eval_protocol-0.2.88/eval_protocol/generic_server.py +165 -0
  95. eval_protocol-0.2.88/eval_protocol/get_pep440_version.py +141 -0
  96. eval_protocol-0.2.88/eval_protocol/human_id/__init__.py +77 -0
  97. eval_protocol-0.2.88/eval_protocol/human_id/dictionary.py +507 -0
  98. eval_protocol-0.2.88/eval_protocol/integrations/__init__.py +9 -0
  99. eval_protocol-0.2.88/eval_protocol/integrations/deepeval.py +115 -0
  100. eval_protocol-0.2.88/eval_protocol/integrations/openeval.py +40 -0
  101. eval_protocol-0.2.88/eval_protocol/integrations/trl.py +187 -0
  102. eval_protocol-0.2.88/eval_protocol/log_utils/__init__.py +0 -0
  103. eval_protocol-0.2.88/eval_protocol/log_utils/elasticsearch_client.py +338 -0
  104. eval_protocol-0.2.88/eval_protocol/log_utils/elasticsearch_direct_http_handler.py +160 -0
  105. eval_protocol-0.2.88/eval_protocol/log_utils/elasticsearch_index_manager.py +168 -0
  106. eval_protocol-0.2.88/eval_protocol/log_utils/fireworks_tracing_http_handler.py +138 -0
  107. eval_protocol-0.2.88/eval_protocol/log_utils/init.py +69 -0
  108. eval_protocol-0.2.88/eval_protocol/log_utils/rollout_context.py +84 -0
  109. eval_protocol-0.2.88/eval_protocol/log_utils/rollout_id_filter.py +28 -0
  110. eval_protocol-0.2.88/eval_protocol/log_utils/util.py +22 -0
  111. eval_protocol-0.2.88/eval_protocol/logging_utils.py +175 -0
  112. eval_protocol-0.2.88/eval_protocol/mcp/__init__.py +49 -0
  113. eval_protocol-0.2.88/eval_protocol/mcp/adapter.py +131 -0
  114. eval_protocol-0.2.88/eval_protocol/mcp/client/__init__.py +12 -0
  115. eval_protocol-0.2.88/eval_protocol/mcp/client/connection.py +565 -0
  116. eval_protocol-0.2.88/eval_protocol/mcp/clients.py +197 -0
  117. eval_protocol-0.2.88/eval_protocol/mcp/execution/__init__.py +23 -0
  118. eval_protocol-0.2.88/eval_protocol/mcp/execution/base_policy.py +236 -0
  119. eval_protocol-0.2.88/eval_protocol/mcp/execution/manager.py +618 -0
  120. eval_protocol-0.2.88/eval_protocol/mcp/execution/policy.py +327 -0
  121. eval_protocol-0.2.88/eval_protocol/mcp/grid_renderer.py +54 -0
  122. eval_protocol-0.2.88/eval_protocol/mcp/mcp_multi_client.py +211 -0
  123. eval_protocol-0.2.88/eval_protocol/mcp/mcpgym.py +664 -0
  124. eval_protocol-0.2.88/eval_protocol/mcp/process_manager.py +177 -0
  125. eval_protocol-0.2.88/eval_protocol/mcp/session/__init__.py +11 -0
  126. eval_protocol-0.2.88/eval_protocol/mcp/session/manager.py +229 -0
  127. eval_protocol-0.2.88/eval_protocol/mcp/simple_process_manager.py +291 -0
  128. eval_protocol-0.2.88/eval_protocol/mcp/simulation_server.py +480 -0
  129. eval_protocol-0.2.88/eval_protocol/mcp_agent/__init__.py +1 -0
  130. eval_protocol-0.2.88/eval_protocol/mcp_agent/config.py +147 -0
  131. eval_protocol-0.2.88/eval_protocol/mcp_agent/main.py +18 -0
  132. eval_protocol-0.2.88/eval_protocol/mcp_agent/orchestration/__init__.py +1 -0
  133. eval_protocol-0.2.88/eval_protocol/mcp_agent/orchestration/base_client.py +132 -0
  134. eval_protocol-0.2.88/eval_protocol/mcp_agent/orchestration/local_docker_client.py +711 -0
  135. eval_protocol-0.2.88/eval_protocol/mcp_agent/orchestration/stdio_mcp_client_helper.py +3 -0
  136. eval_protocol-0.2.88/eval_protocol/mcp_env.py +393 -0
  137. eval_protocol-0.2.88/eval_protocol/mcp_servers/__init__.py +0 -0
  138. eval_protocol-0.2.88/eval_protocol/mcp_servers/frozen_lake/frozen_lake_adapter.py +160 -0
  139. eval_protocol-0.2.88/eval_protocol/mcp_servers/frozen_lake/frozen_lake_mcp.py +102 -0
  140. eval_protocol-0.2.88/eval_protocol/mcp_servers/frozen_lake/server.py +57 -0
  141. eval_protocol-0.2.88/eval_protocol/mcp_servers/tau2/README.md +250 -0
  142. eval_protocol-0.2.88/eval_protocol/mcp_servers/tau2/__init__.py +61 -0
  143. eval_protocol-0.2.88/eval_protocol/mcp_servers/tau2/airplane_environment/airline_environment.py +107 -0
  144. eval_protocol-0.2.88/eval_protocol/mcp_servers/tau2/mock_environment/mock_environment.py +100 -0
  145. eval_protocol-0.2.88/eval_protocol/mcp_servers/tau2/retail_environment/retail_environment.py +112 -0
  146. eval_protocol-0.2.88/eval_protocol/mcp_servers/tau2/server.py +83 -0
  147. eval_protocol-0.2.88/eval_protocol/mcp_servers/tau2/tau2_mcp.py +767 -0
  148. eval_protocol-0.2.88/eval_protocol/mcp_servers/tau2/tests/system_prompts/airline_agent_system_prompt.md +178 -0
  149. eval_protocol-0.2.88/eval_protocol/mcp_servers/tau2/tests/system_prompts/mock_agent_system_prompt.md +18 -0
  150. eval_protocol-0.2.88/eval_protocol/mcp_servers/tau2/tests/system_prompts/retail_agent_system_prompt.md +147 -0
  151. eval_protocol-0.2.88/eval_protocol/mcp_servers/tau2/tests/test_tau2_e2e.py +1689 -0
  152. eval_protocol-0.2.88/eval_protocol/models.py +1131 -0
  153. eval_protocol-0.2.88/eval_protocol/packaging.py +219 -0
  154. eval_protocol-0.2.88/eval_protocol/platform_api.py +379 -0
  155. eval_protocol-0.2.88/eval_protocol/playback_policy.py +374 -0
  156. eval_protocol-0.2.88/eval_protocol/proxy/__init__.py +18 -0
  157. eval_protocol-0.2.88/eval_protocol/proxy/proxy_core/__init__.py +13 -0
  158. eval_protocol-0.2.88/eval_protocol/proxy/proxy_core/app.py +305 -0
  159. eval_protocol-0.2.88/eval_protocol/proxy/proxy_core/auth.py +17 -0
  160. eval_protocol-0.2.88/eval_protocol/proxy/proxy_core/langfuse.py +528 -0
  161. eval_protocol-0.2.88/eval_protocol/proxy/proxy_core/litellm.py +173 -0
  162. eval_protocol-0.2.88/eval_protocol/proxy/proxy_core/main.py +10 -0
  163. eval_protocol-0.2.88/eval_protocol/proxy/proxy_core/models.py +98 -0
  164. eval_protocol-0.2.88/eval_protocol/proxy/proxy_core/redis_utils.py +57 -0
  165. eval_protocol-0.2.88/eval_protocol/pytest/__init__.py +52 -0
  166. eval_protocol-0.2.88/eval_protocol/pytest/default_agent_rollout_processor.py +279 -0
  167. eval_protocol-0.2.88/eval_protocol/pytest/default_dataset_adapter.py +9 -0
  168. eval_protocol-0.2.88/eval_protocol/pytest/default_langchain_rollout_processor.py +159 -0
  169. eval_protocol-0.2.88/eval_protocol/pytest/default_mcp_gym_rollout_processor.py +332 -0
  170. eval_protocol-0.2.88/eval_protocol/pytest/default_no_op_rollout_processor.py +27 -0
  171. eval_protocol-0.2.88/eval_protocol/pytest/default_pydantic_ai_rollout_processor.py +163 -0
  172. eval_protocol-0.2.88/eval_protocol/pytest/default_single_turn_rollout_process.py +166 -0
  173. eval_protocol-0.2.88/eval_protocol/pytest/dual_mode_wrapper.py +78 -0
  174. eval_protocol-0.2.88/eval_protocol/pytest/elasticsearch_setup.py +167 -0
  175. eval_protocol-0.2.88/eval_protocol/pytest/evaluation_test.py +708 -0
  176. eval_protocol-0.2.88/eval_protocol/pytest/evaluation_test_postprocess.py +208 -0
  177. eval_protocol-0.2.88/eval_protocol/pytest/evaluation_test_utils.py +594 -0
  178. eval_protocol-0.2.88/eval_protocol/pytest/exception_config.py +144 -0
  179. eval_protocol-0.2.88/eval_protocol/pytest/execution.py +111 -0
  180. eval_protocol-0.2.88/eval_protocol/pytest/generate_parameter_combinations.py +145 -0
  181. eval_protocol-0.2.88/eval_protocol/pytest/github_action_rollout_processor.py +225 -0
  182. eval_protocol-0.2.88/eval_protocol/pytest/handle_persist_flow.py +225 -0
  183. eval_protocol-0.2.88/eval_protocol/pytest/parameterize.py +424 -0
  184. eval_protocol-0.2.88/eval_protocol/pytest/plugin.py +413 -0
  185. eval_protocol-0.2.88/eval_protocol/pytest/remote_rollout_processor.py +207 -0
  186. eval_protocol-0.2.88/eval_protocol/pytest/rollout_processor.py +24 -0
  187. eval_protocol-0.2.88/eval_protocol/pytest/store_experiment_link.py +41 -0
  188. eval_protocol-0.2.88/eval_protocol/pytest/store_results_url.py +49 -0
  189. eval_protocol-0.2.88/eval_protocol/pytest/tracing_utils.py +177 -0
  190. eval_protocol-0.2.88/eval_protocol/pytest/types.py +77 -0
  191. eval_protocol-0.2.88/eval_protocol/pytest/validate_signature.py +71 -0
  192. eval_protocol-0.2.88/eval_protocol/quickstart/__init__.py +8 -0
  193. eval_protocol-0.2.88/eval_protocol/quickstart/aha_judge/__init__.py +4 -0
  194. eval_protocol-0.2.88/eval_protocol/quickstart/aha_judge/llm_judge.py +90 -0
  195. eval_protocol-0.2.88/eval_protocol/quickstart/aha_judge/llm_judge_braintrust.py +63 -0
  196. eval_protocol-0.2.88/eval_protocol/quickstart/aha_judge/llm_judge_langfuse.py +58 -0
  197. eval_protocol-0.2.88/eval_protocol/quickstart/aha_judge/llm_judge_langsmith.py +82 -0
  198. eval_protocol-0.2.88/eval_protocol/quickstart/aha_judge/llm_judge_openai_responses.py +66 -0
  199. eval_protocol-0.2.88/eval_protocol/quickstart/aha_judge/utils.py +133 -0
  200. eval_protocol-0.2.88/eval_protocol/quickstart/llm_judge.py +90 -0
  201. eval_protocol-0.2.88/eval_protocol/quickstart/llm_judge_braintrust.py +63 -0
  202. eval_protocol-0.2.88/eval_protocol/quickstart/svg_agent/evaluator/test_svgagent.py +223 -0
  203. eval_protocol-0.2.88/eval_protocol/quickstart/svg_agent/evaluator/utils.py +523 -0
  204. eval_protocol-0.2.88/eval_protocol/quickstart/svg_agent/vercel_svg_server/api/init.py +202 -0
  205. eval_protocol-0.2.88/eval_protocol/quickstart/utils.py +251 -0
  206. eval_protocol-0.2.88/eval_protocol/resources.py +128 -0
  207. eval_protocol-0.2.88/eval_protocol/reward_function.py +410 -0
  208. eval_protocol-0.2.88/eval_protocol/rewards/__init__.py +90 -0
  209. eval_protocol-0.2.88/eval_protocol/rewards/accuracy.py +469 -0
  210. eval_protocol-0.2.88/eval_protocol/rewards/accuracy_length.py +186 -0
  211. eval_protocol-0.2.88/eval_protocol/rewards/apps_coding_reward.py +331 -0
  212. eval_protocol-0.2.88/eval_protocol/rewards/apps_execution_utils.py +149 -0
  213. eval_protocol-0.2.88/eval_protocol/rewards/apps_testing_util.py +564 -0
  214. eval_protocol-0.2.88/eval_protocol/rewards/bfcl_reward.py +314 -0
  215. eval_protocol-0.2.88/eval_protocol/rewards/code_execution.py +1634 -0
  216. eval_protocol-0.2.88/eval_protocol/rewards/code_execution_utils.py +72 -0
  217. eval_protocol-0.2.88/eval_protocol/rewards/cpp_code.py +861 -0
  218. eval_protocol-0.2.88/eval_protocol/rewards/deepcoder_reward.py +166 -0
  219. eval_protocol-0.2.88/eval_protocol/rewards/format.py +132 -0
  220. eval_protocol-0.2.88/eval_protocol/rewards/function_calling.py +543 -0
  221. eval_protocol-0.2.88/eval_protocol/rewards/json_schema.py +444 -0
  222. eval_protocol-0.2.88/eval_protocol/rewards/language_consistency.py +705 -0
  223. eval_protocol-0.2.88/eval_protocol/rewards/lean_prover.py +482 -0
  224. eval_protocol-0.2.88/eval_protocol/rewards/length.py +377 -0
  225. eval_protocol-0.2.88/eval_protocol/rewards/list_comparison_math_reward.py +226 -0
  226. eval_protocol-0.2.88/eval_protocol/rewards/math.py +772 -0
  227. eval_protocol-0.2.88/eval_protocol/rewards/multiple_choice_math_reward.py +242 -0
  228. eval_protocol-0.2.88/eval_protocol/rewards/reasoning_steps.py +249 -0
  229. eval_protocol-0.2.88/eval_protocol/rewards/repetition.py +356 -0
  230. eval_protocol-0.2.88/eval_protocol/rewards/tag_count.py +175 -0
  231. eval_protocol-0.2.88/eval_protocol/rl_processing.py +82 -0
  232. eval_protocol-0.2.88/eval_protocol/server.py +271 -0
  233. eval_protocol-0.2.88/eval_protocol/stats/__init__.py +3 -0
  234. eval_protocol-0.2.88/eval_protocol/stats/confidence_intervals.py +114 -0
  235. eval_protocol-0.2.88/eval_protocol/typed_interface.py +306 -0
  236. eval_protocol-0.2.88/eval_protocol/types/__init__.py +4 -0
  237. eval_protocol-0.2.88/eval_protocol/types/errors.py +11 -0
  238. eval_protocol-0.2.88/eval_protocol/types/remote_rollout_processor.py +87 -0
  239. eval_protocol-0.2.88/eval_protocol/types/types.py +107 -0
  240. eval_protocol-0.2.88/eval_protocol/utils/__init__.py +13 -0
  241. eval_protocol-0.2.88/eval_protocol/utils/batch_evaluation.py +217 -0
  242. eval_protocol-0.2.88/eval_protocol/utils/batch_transformation.py +205 -0
  243. eval_protocol-0.2.88/eval_protocol/utils/browser_utils.py +114 -0
  244. eval_protocol-0.2.88/eval_protocol/utils/check_server_status.py +77 -0
  245. eval_protocol-0.2.88/eval_protocol/utils/dataset_helpers.py +112 -0
  246. eval_protocol-0.2.88/eval_protocol/utils/evaluation_row_utils.py +158 -0
  247. eval_protocol-0.2.88/eval_protocol/utils/logs_models.py +45 -0
  248. eval_protocol-0.2.88/eval_protocol/utils/logs_server.py +720 -0
  249. eval_protocol-0.2.88/eval_protocol/utils/module_loader.py +56 -0
  250. eval_protocol-0.2.88/eval_protocol/utils/packaging_utils.py +108 -0
  251. eval_protocol-0.2.88/eval_protocol/utils/show_results_url.py +74 -0
  252. eval_protocol-0.2.88/eval_protocol/utils/static_policy.py +309 -0
  253. eval_protocol-0.2.88/eval_protocol/utils/subprocess_utils.py +118 -0
  254. eval_protocol-0.2.88/eval_protocol/utils/vite_server.py +143 -0
  255. eval_protocol-0.2.88/eval_protocol.egg-info/PKG-INFO +154 -0
  256. eval_protocol-0.2.88/eval_protocol.egg-info/SOURCES.txt +448 -0
  257. eval_protocol-0.2.88/eval_protocol.egg-info/dependency_links.txt +1 -0
  258. eval_protocol-0.2.88/eval_protocol.egg-info/entry_points.txt +7 -0
  259. eval_protocol-0.2.88/eval_protocol.egg-info/requires.txt +119 -0
  260. eval_protocol-0.2.88/eval_protocol.egg-info/top_level.txt +3 -0
  261. eval_protocol-0.2.88/pyproject.toml +225 -0
  262. eval_protocol-0.2.88/setup.cfg +16 -0
  263. eval_protocol-0.2.88/setup.py +8 -0
  264. eval_protocol-0.2.88/tests/test_accuracy.py +344 -0
  265. eval_protocol-0.2.88/tests/test_accuracy_length.py +286 -0
  266. eval_protocol-0.2.88/tests/test_adapters_e2e.py +765 -0
  267. eval_protocol-0.2.88/tests/test_agent_orchestrator.py +507 -0
  268. eval_protocol-0.2.88/tests/test_agent_resources.py +426 -0
  269. eval_protocol-0.2.88/tests/test_auth.py +396 -0
  270. eval_protocol-0.2.88/tests/test_batch_evaluation.py +1202 -0
  271. eval_protocol-0.2.88/tests/test_cli.py +170 -0
  272. eval_protocol-0.2.88/tests/test_cli_agent.py +217 -0
  273. eval_protocol-0.2.88/tests/test_cli_args.py +156 -0
  274. eval_protocol-0.2.88/tests/test_cli_create_rft_infer.py +1038 -0
  275. eval_protocol-0.2.88/tests/test_cli_local_test.py +256 -0
  276. eval_protocol-0.2.88/tests/test_code_execution.py +572 -0
  277. eval_protocol-0.2.88/tests/test_config.py +219 -0
  278. eval_protocol-0.2.88/tests/test_control_plane_separation.py +284 -0
  279. eval_protocol-0.2.88/tests/test_cpp_code.py +833 -0
  280. eval_protocol-0.2.88/tests/test_data_driven_task_manager.py +483 -0
  281. eval_protocol-0.2.88/tests/test_deepcoder_reward.py +334 -0
  282. eval_protocol-0.2.88/tests/test_deepeval_integration.py +377 -0
  283. eval_protocol-0.2.88/tests/test_deploy_integration.py +214 -0
  284. eval_protocol-0.2.88/tests/test_directory_utils.py +95 -0
  285. eval_protocol-0.2.88/tests/test_e2b_integration.py +74 -0
  286. eval_protocol-0.2.88/tests/test_e2b_js_integration.py +80 -0
  287. eval_protocol-0.2.88/tests/test_edge_cases.py +160 -0
  288. eval_protocol-0.2.88/tests/test_ep_upload_e2e.py +646 -0
  289. eval_protocol-0.2.88/tests/test_eval_protocol_import.py +275 -0
  290. eval_protocol-0.2.88/tests/test_evaluation.py +431 -0
  291. eval_protocol-0.2.88/tests/test_evaluation_integration.py +365 -0
  292. eval_protocol-0.2.88/tests/test_evaluation_postprocess.py +467 -0
  293. eval_protocol-0.2.88/tests/test_evaluation_preview_integration.py +470 -0
  294. eval_protocol-0.2.88/tests/test_event_bus.py +301 -0
  295. eval_protocol-0.2.88/tests/test_event_bus_helper.py +73 -0
  296. eval_protocol-0.2.88/tests/test_examples_end_to_end.py +962 -0
  297. eval_protocol-0.2.88/tests/test_exceptions.py +371 -0
  298. eval_protocol-0.2.88/tests/test_fireworks_api.py +68 -0
  299. eval_protocol-0.2.88/tests/test_format.py +227 -0
  300. eval_protocol-0.2.88/tests/test_fractional_code.py +312 -0
  301. eval_protocol-0.2.88/tests/test_function_calling.py +1152 -0
  302. eval_protocol-0.2.88/tests/test_gcp_tools.py +578 -0
  303. eval_protocol-0.2.88/tests/test_generic_server.py +207 -0
  304. eval_protocol-0.2.88/tests/test_human_id.py +94 -0
  305. eval_protocol-0.2.88/tests/test_integration.py +159 -0
  306. eval_protocol-0.2.88/tests/test_json_schema.py +425 -0
  307. eval_protocol-0.2.88/tests/test_kwargs_validation.py +178 -0
  308. eval_protocol-0.2.88/tests/test_language_consistency.py +232 -0
  309. eval_protocol-0.2.88/tests/test_lean_prover.py +165 -0
  310. eval_protocol-0.2.88/tests/test_lean_prover_runner.py +127 -0
  311. eval_protocol-0.2.88/tests/test_length.py +379 -0
  312. eval_protocol-0.2.88/tests/test_list_comparison_math_reward.py +207 -0
  313. eval_protocol-0.2.88/tests/test_logs_server.py +596 -0
  314. eval_protocol-0.2.88/tests/test_logs_server_simple.py +88 -0
  315. eval_protocol-0.2.88/tests/test_math.py +540 -0
  316. eval_protocol-0.2.88/tests/test_message_field_filtering.py +64 -0
  317. eval_protocol-0.2.88/tests/test_minimal.py +113 -0
  318. eval_protocol-0.2.88/tests/test_models.py +723 -0
  319. eval_protocol-0.2.88/tests/test_models_rl.py +158 -0
  320. eval_protocol-0.2.88/tests/test_multiple_choice_math_reward.py +230 -0
  321. eval_protocol-0.2.88/tests/test_n_variant_batch_integration.py +407 -0
  322. eval_protocol-0.2.88/tests/test_n_variant_integration.py +205 -0
  323. eval_protocol-0.2.88/tests/test_openai_compatibility.py +82 -0
  324. eval_protocol-0.2.88/tests/test_openeval_integration.py +44 -0
  325. eval_protocol-0.2.88/tests/test_packaging.py +119 -0
  326. eval_protocol-0.2.88/tests/test_parallel_rollouts.py +379 -0
  327. eval_protocol-0.2.88/tests/test_platform_api.py +166 -0
  328. eval_protocol-0.2.88/tests/test_quickstart_utils.py +388 -0
  329. eval_protocol-0.2.88/tests/test_readiness.py +332 -0
  330. eval_protocol-0.2.88/tests/test_reasoning_steps.py +359 -0
  331. eval_protocol-0.2.88/tests/test_repetition.py +285 -0
  332. eval_protocol-0.2.88/tests/test_repetition_debug.py +21 -0
  333. eval_protocol-0.2.88/tests/test_retry_mechanism.py +399 -0
  334. eval_protocol-0.2.88/tests/test_reward_function.py +236 -0
  335. eval_protocol-0.2.88/tests/test_reward_protocol_import.py +274 -0
  336. eval_protocol-0.2.88/tests/test_rl_processing.py +167 -0
  337. eval_protocol-0.2.88/tests/test_rollout_control_plane_integration.py +630 -0
  338. eval_protocol-0.2.88/tests/test_server.py +99 -0
  339. eval_protocol-0.2.88/tests/test_show_results_url.py +336 -0
  340. eval_protocol-0.2.88/tests/test_status_migration_changes.py +440 -0
  341. eval_protocol-0.2.88/tests/test_status_migration_integration.py +388 -0
  342. eval_protocol-0.2.88/tests/test_status_model.py +360 -0
  343. eval_protocol-0.2.88/tests/test_tag_count.py +274 -0
  344. eval_protocol-0.2.88/tests/test_tau_bench_airline_smoke.py +241 -0
  345. eval_protocol-0.2.88/tests/test_typed_interface.py +262 -0
  346. eval_protocol-0.2.88/tests/test_typed_interface_rl.py +211 -0
  347. eval_protocol-0.2.88/tests/test_upload_entrypoint.py +227 -0
  348. eval_protocol-0.2.88/tests/test_url_handling.py +68 -0
  349. eval_protocol-0.2.88/tests/test_vite_server.py +224 -0
  350. eval_protocol-0.2.88/vendor/tau2/__init__.py +21 -0
  351. eval_protocol-0.2.88/vendor/tau2/agent/__init__.py +0 -0
  352. eval_protocol-0.2.88/vendor/tau2/agent/base.py +91 -0
  353. eval_protocol-0.2.88/vendor/tau2/agent/llm_agent.py +462 -0
  354. eval_protocol-0.2.88/vendor/tau2/api_service/__init__.py +1 -0
  355. eval_protocol-0.2.88/vendor/tau2/api_service/api_config.py +30 -0
  356. eval_protocol-0.2.88/vendor/tau2/api_service/data_model.py +19 -0
  357. eval_protocol-0.2.88/vendor/tau2/api_service/simulation_service.py +56 -0
  358. eval_protocol-0.2.88/vendor/tau2/cli.py +236 -0
  359. eval_protocol-0.2.88/vendor/tau2/config.py +45 -0
  360. eval_protocol-0.2.88/vendor/tau2/data/domains/airline/policy.md +167 -0
  361. eval_protocol-0.2.88/vendor/tau2/data/domains/mock/policy.md +7 -0
  362. eval_protocol-0.2.88/vendor/tau2/data/domains/mock/policy_solo.md +6 -0
  363. eval_protocol-0.2.88/vendor/tau2/data/domains/retail/policy.md +136 -0
  364. eval_protocol-0.2.88/vendor/tau2/data/domains/telecom/main_policy.md +159 -0
  365. eval_protocol-0.2.88/vendor/tau2/data/domains/telecom/main_policy_solo.md +155 -0
  366. eval_protocol-0.2.88/vendor/tau2/data/domains/telecom/tech_support_manual.md +206 -0
  367. eval_protocol-0.2.88/vendor/tau2/data/domains/telecom/tech_support_workflow.md +303 -0
  368. eval_protocol-0.2.88/vendor/tau2/data/domains/telecom/tech_support_workflow_solo.md +299 -0
  369. eval_protocol-0.2.88/vendor/tau2/data/user_simulator/simulation_guidelines.md +18 -0
  370. eval_protocol-0.2.88/vendor/tau2/data/user_simulator/simulation_guidelines_tools.md +30 -0
  371. eval_protocol-0.2.88/vendor/tau2/data_model/__init__.py +0 -0
  372. eval_protocol-0.2.88/vendor/tau2/data_model/message.py +203 -0
  373. eval_protocol-0.2.88/vendor/tau2/data_model/simulation.py +408 -0
  374. eval_protocol-0.2.88/vendor/tau2/data_model/tasks.py +443 -0
  375. eval_protocol-0.2.88/vendor/tau2/domains/__init__.py +0 -0
  376. eval_protocol-0.2.88/vendor/tau2/domains/airline/__init__.py +1 -0
  377. eval_protocol-0.2.88/vendor/tau2/domains/airline/data_model.py +240 -0
  378. eval_protocol-0.2.88/vendor/tau2/domains/airline/environment.py +37 -0
  379. eval_protocol-0.2.88/vendor/tau2/domains/airline/tools.py +701 -0
  380. eval_protocol-0.2.88/vendor/tau2/domains/airline/utils.py +6 -0
  381. eval_protocol-0.2.88/vendor/tau2/domains/mock/__init__.py +1 -0
  382. eval_protocol-0.2.88/vendor/tau2/domains/mock/data_model.py +32 -0
  383. eval_protocol-0.2.88/vendor/tau2/domains/mock/environment.py +39 -0
  384. eval_protocol-0.2.88/vendor/tau2/domains/mock/tools.py +121 -0
  385. eval_protocol-0.2.88/vendor/tau2/domains/mock/utils.py +7 -0
  386. eval_protocol-0.2.88/vendor/tau2/domains/retail/__init__.py +1 -0
  387. eval_protocol-0.2.88/vendor/tau2/domains/retail/data_model.py +195 -0
  388. eval_protocol-0.2.88/vendor/tau2/domains/retail/environment.py +37 -0
  389. eval_protocol-0.2.88/vendor/tau2/domains/retail/tools.py +701 -0
  390. eval_protocol-0.2.88/vendor/tau2/domains/retail/utils.py +6 -0
  391. eval_protocol-0.2.88/vendor/tau2/domains/telecom/__init__.py +1 -0
  392. eval_protocol-0.2.88/vendor/tau2/domains/telecom/data_model.py +206 -0
  393. eval_protocol-0.2.88/vendor/tau2/domains/telecom/environment.py +172 -0
  394. eval_protocol-0.2.88/vendor/tau2/domains/telecom/tasks/__init__.py +0 -0
  395. eval_protocol-0.2.88/vendor/tau2/domains/telecom/tasks/const.py +26 -0
  396. eval_protocol-0.2.88/vendor/tau2/domains/telecom/tasks/create_tasks.py +92 -0
  397. eval_protocol-0.2.88/vendor/tau2/domains/telecom/tasks/manager.py +209 -0
  398. eval_protocol-0.2.88/vendor/tau2/domains/telecom/tasks/mms_issues.py +325 -0
  399. eval_protocol-0.2.88/vendor/tau2/domains/telecom/tasks/mobile_data_issues.py +541 -0
  400. eval_protocol-0.2.88/vendor/tau2/domains/telecom/tasks/service_issues.py +452 -0
  401. eval_protocol-0.2.88/vendor/tau2/domains/telecom/tasks/utils.py +124 -0
  402. eval_protocol-0.2.88/vendor/tau2/domains/telecom/tools.py +752 -0
  403. eval_protocol-0.2.88/vendor/tau2/domains/telecom/user_data_model.py +392 -0
  404. eval_protocol-0.2.88/vendor/tau2/domains/telecom/user_tools.py +1109 -0
  405. eval_protocol-0.2.88/vendor/tau2/domains/telecom/utils.py +26 -0
  406. eval_protocol-0.2.88/vendor/tau2/environment/__init__.py +0 -0
  407. eval_protocol-0.2.88/vendor/tau2/environment/db.py +41 -0
  408. eval_protocol-0.2.88/vendor/tau2/environment/environment.py +391 -0
  409. eval_protocol-0.2.88/vendor/tau2/environment/server.py +223 -0
  410. eval_protocol-0.2.88/vendor/tau2/environment/tool.py +216 -0
  411. eval_protocol-0.2.88/vendor/tau2/environment/toolkit.py +206 -0
  412. eval_protocol-0.2.88/vendor/tau2/environment/utils/interface_agent.py +255 -0
  413. eval_protocol-0.2.88/vendor/tau2/evaluator/__init__.py +0 -0
  414. eval_protocol-0.2.88/vendor/tau2/evaluator/evaluator.py +129 -0
  415. eval_protocol-0.2.88/vendor/tau2/evaluator/evaluator_action.py +86 -0
  416. eval_protocol-0.2.88/vendor/tau2/evaluator/evaluator_base.py +26 -0
  417. eval_protocol-0.2.88/vendor/tau2/evaluator/evaluator_communicate.py +83 -0
  418. eval_protocol-0.2.88/vendor/tau2/evaluator/evaluator_env.py +140 -0
  419. eval_protocol-0.2.88/vendor/tau2/evaluator/evaluator_nl_assertions.py +145 -0
  420. eval_protocol-0.2.88/vendor/tau2/metrics/__init__.py +0 -0
  421. eval_protocol-0.2.88/vendor/tau2/metrics/agent_metrics.py +139 -0
  422. eval_protocol-0.2.88/vendor/tau2/metrics/break_down_metrics.py +124 -0
  423. eval_protocol-0.2.88/vendor/tau2/orchestrator/__init__.py +0 -0
  424. eval_protocol-0.2.88/vendor/tau2/orchestrator/environment_manager.py +259 -0
  425. eval_protocol-0.2.88/vendor/tau2/orchestrator/orchestrator.py +390 -0
  426. eval_protocol-0.2.88/vendor/tau2/orchestrator/utils.py +8 -0
  427. eval_protocol-0.2.88/vendor/tau2/registry.py +192 -0
  428. eval_protocol-0.2.88/vendor/tau2/run.py +508 -0
  429. eval_protocol-0.2.88/vendor/tau2/scripts/__init__.py +0 -0
  430. eval_protocol-0.2.88/vendor/tau2/scripts/check_data.py +32 -0
  431. eval_protocol-0.2.88/vendor/tau2/scripts/show_domain_doc.py +77 -0
  432. eval_protocol-0.2.88/vendor/tau2/scripts/start_servers.py +97 -0
  433. eval_protocol-0.2.88/vendor/tau2/scripts/view_simulations.py +268 -0
  434. eval_protocol-0.2.88/vendor/tau2/user/__init__.py +0 -0
  435. eval_protocol-0.2.88/vendor/tau2/user/base.py +144 -0
  436. eval_protocol-0.2.88/vendor/tau2/user/user_simulator.py +200 -0
  437. eval_protocol-0.2.88/vendor/tau2/utils/__init__.py +3 -0
  438. eval_protocol-0.2.88/vendor/tau2/utils/display.py +490 -0
  439. eval_protocol-0.2.88/vendor/tau2/utils/io_utils.py +75 -0
  440. eval_protocol-0.2.88/vendor/tau2/utils/llm_utils.py +305 -0
  441. eval_protocol-0.2.88/vendor/tau2/utils/pydantic_utils.py +32 -0
  442. eval_protocol-0.2.88/vendor/tau2/utils/utils.py +77 -0
  443. eval_protocol-0.2.88/versioneer.py +2305 -0
  444. eval_protocol-0.2.88/vite-app/dist/assets/favicon-BkAAWQga.png +0 -0
  445. eval_protocol-0.2.88/vite-app/dist/assets/index-BIhepl19.css +1 -0
  446. eval_protocol-0.2.88/vite-app/dist/assets/index-DaovgarD.js +137 -0
  447. eval_protocol-0.2.88/vite-app/dist/assets/index-DaovgarD.js.map +1 -0
  448. eval_protocol-0.2.88/vite-app/dist/assets/logo-light-BprIBJQW.png +0 -0
  449. eval_protocol-0.2.88/vite-app/dist/index.html +14 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Fireworks AI, Inc.
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,154 @@
1
+ Metadata-Version: 2.4
2
+ Name: eval-protocol
3
+ Version: 0.2.88
4
+ Summary: The official Python SDK for Eval Protocol (EP.) EP is an open protocol that standardizes how developers author evals for large language model (LLM) applications.
5
+ Author-email: Fireworks AI <info@fireworks.ai>
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/fireworks-ai/eval-protocol
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Operating System :: OS Independent
10
+ Requires-Python: >=3.10
11
+ Description-Content-Type: text/markdown
12
+ License-File: LICENSE
13
+ Requires-Dist: requests>=2.25.0
14
+ Requires-Dist: pydantic>=2.0.0
15
+ Requires-Dist: dataclasses-json>=0.5.7
16
+ Requires-Dist: uvicorn>=0.15.0
17
+ Requires-Dist: python-dotenv>=0.19.0
18
+ Requires-Dist: openai>=1.78.1
19
+ Requires-Dist: aiosqlite
20
+ Requires-Dist: aiohttp
21
+ Requires-Dist: mcp>=1.9.2
22
+ Requires-Dist: PyYAML>=5.0
23
+ Requires-Dist: hydra-core>=1.3.2
24
+ Requires-Dist: omegaconf>=2.3.0
25
+ Requires-Dist: httpx>=0.24.0
26
+ Requires-Dist: anthropic>=0.59.0
27
+ Requires-Dist: litellm<1.75.0
28
+ Requires-Dist: pytest>=6.0.0
29
+ Requires-Dist: pytest-asyncio>=0.21.0
30
+ Requires-Dist: peewee>=3.18.2
31
+ Requires-Dist: backoff>=2.2.0
32
+ Requires-Dist: questionary>=2.0.0
33
+ Requires-Dist: toml>=0.10.0
34
+ Requires-Dist: loguru>=0.6.0
35
+ Requires-Dist: docstring-parser>=0.15
36
+ Requires-Dist: rich>=12.0.0
37
+ Requires-Dist: psutil>=5.8.0
38
+ Requires-Dist: addict>=2.4.0
39
+ Requires-Dist: deepdiff>=6.0.0
40
+ Requires-Dist: websockets>=15.0.1
41
+ Requires-Dist: fastapi>=0.116.1
42
+ Provides-Extra: dev
43
+ Requires-Dist: build; extra == "dev"
44
+ Requires-Dist: twine; extra == "dev"
45
+ Requires-Dist: pytest-httpserver; extra == "dev"
46
+ Requires-Dist: werkzeug>=2.0.0; extra == "dev"
47
+ Requires-Dist: ruff>=0.5.0; extra == "dev"
48
+ Requires-Dist: transformers>=4.0.0; extra == "dev"
49
+ Requires-Dist: pandas>=1.5.0; extra == "dev"
50
+ Requires-Dist: types-setuptools; extra == "dev"
51
+ Requires-Dist: types-requests; extra == "dev"
52
+ Requires-Dist: types-PyYAML; extra == "dev"
53
+ Requires-Dist: types-docker; extra == "dev"
54
+ Requires-Dist: versioneer>=0.20; extra == "dev"
55
+ Requires-Dist: openai>=1.78.1; extra == "dev"
56
+ Requires-Dist: pre-commit; extra == "dev"
57
+ Requires-Dist: e2b; extra == "dev"
58
+ Requires-Dist: pytest-cov; extra == "dev"
59
+ Requires-Dist: pytest-xdist; extra == "dev"
60
+ Requires-Dist: docker==7.1.0; extra == "dev"
61
+ Requires-Dist: ipykernel>=6.30.0; extra == "dev"
62
+ Requires-Dist: jupyter>=1.1.1; extra == "dev"
63
+ Requires-Dist: pip>=25.1.1; extra == "dev"
64
+ Requires-Dist: haikus==0.3.8; extra == "dev"
65
+ Requires-Dist: syrupy>=4.0.0; extra == "dev"
66
+ Requires-Dist: gymnasium>=1.2.0; extra == "dev"
67
+ Provides-Extra: trl
68
+ Requires-Dist: torch>=1.9; extra == "trl"
69
+ Requires-Dist: trl>=0.7.0; extra == "trl"
70
+ Requires-Dist: peft>=0.7.0; extra == "trl"
71
+ Requires-Dist: transformers>=4.0.0; extra == "trl"
72
+ Requires-Dist: accelerate>=0.28.0; extra == "trl"
73
+ Provides-Extra: openevals
74
+ Requires-Dist: openevals>=0.1.0; extra == "openevals"
75
+ Provides-Extra: fireworks
76
+ Requires-Dist: fireworks-ai>=0.19.19; extra == "fireworks"
77
+ Provides-Extra: box2d
78
+ Requires-Dist: swig; extra == "box2d"
79
+ Requires-Dist: gymnasium[box2d]>=0.29.0; extra == "box2d"
80
+ Requires-Dist: Pillow; extra == "box2d"
81
+ Provides-Extra: langfuse
82
+ Requires-Dist: langfuse>=2.0.0; extra == "langfuse"
83
+ Provides-Extra: huggingface
84
+ Requires-Dist: datasets>=3.0.0; extra == "huggingface"
85
+ Requires-Dist: transformers>=4.0.0; extra == "huggingface"
86
+ Provides-Extra: langsmith
87
+ Requires-Dist: langsmith>=0.1.86; extra == "langsmith"
88
+ Provides-Extra: bigquery
89
+ Requires-Dist: google-cloud-bigquery>=3.0.0; extra == "bigquery"
90
+ Requires-Dist: google-auth>=2.0.0; extra == "bigquery"
91
+ Provides-Extra: svgbench
92
+ Requires-Dist: selenium>=4.0.0; extra == "svgbench"
93
+ Provides-Extra: pydantic
94
+ Requires-Dist: pydantic-ai>=1.0.2; extra == "pydantic"
95
+ Provides-Extra: supabase
96
+ Requires-Dist: supabase>=2.18.1; extra == "supabase"
97
+ Provides-Extra: chinook
98
+ Requires-Dist: psycopg2-binary>=2.9.10; extra == "chinook"
99
+ Provides-Extra: langchain
100
+ Requires-Dist: langchain-core>=0.3.0; extra == "langchain"
101
+ Provides-Extra: braintrust
102
+ Requires-Dist: braintrust[otel]; extra == "braintrust"
103
+ Provides-Extra: langgraph
104
+ Requires-Dist: langgraph>=0.6.7; extra == "langgraph"
105
+ Requires-Dist: langchain-core>=0.3.75; extra == "langgraph"
106
+ Provides-Extra: langgraph-tools
107
+ Requires-Dist: langgraph>=0.6.7; extra == "langgraph-tools"
108
+ Requires-Dist: langchain>=0.3.0; extra == "langgraph-tools"
109
+ Requires-Dist: langchain-fireworks>=0.3.0; extra == "langgraph-tools"
110
+ Provides-Extra: proxy
111
+ Requires-Dist: redis>=5.0.0; extra == "proxy"
112
+ Requires-Dist: langfuse>=2.0.0; extra == "proxy"
113
+ Requires-Dist: uuid6>=2025.0.0; extra == "proxy"
114
+ Dynamic: license-file
115
+
116
+ # Eval Protocol
117
+
118
+ [![PyPI - Version](https://img.shields.io/pypi/v/eval-protocol)](https://pypi.org/project/eval-protocol/)
119
+ [![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/eval-protocol/python-sdk)
120
+
121
+ **Eval Protocol (EP) is an open solution for doing reinforcement learning fine-tuning on existing agents — across any language, container, or framework.**
122
+
123
+ ![Eval Protocol overview](./docs/intro.png)
124
+
125
+ Most teams already have complex agents running in production — often across remote services with heavy dependencies, Docker containers, or TypeScript backends deployed on Vercel. When they try to train or fine-tune these agents with reinforcement learning, connecting them to a trainer quickly becomes painful.
126
+
127
+ Eval Protocol makes this possible in two ways:
128
+
129
+ 1. **Expose your agent through a simple API**
130
+ Wrap your existing agent (Python, TypeScript, Docker, etc.) in a simple HTTP service using EP’s rollout interface. EP handles the rollout orchestration, metadata passing, and trace storage automatically.
131
+ 2. **Connect with any trainer**
132
+ Once your agent speaks the EP standard, it can be fine-tuned or evaluated with any supported trainer — Fireworks RFT, TRL, Unsloth, or your own — with no environment rewrites.
133
+
134
+ The result: RL that works out-of-the-box for existing production agents.
135
+
136
+ ## Who This Is For
137
+
138
+ - **Applied AI teams** adding RL to existing production agents.
139
+ - **Research engineers** experimenting with fine-tuning complex, multi-turn or tool-using agents.
140
+ - **MLOps teams** building reproducible, language-agnostic rollout pipelines.
141
+
142
+ ## Quickstart
143
+
144
+ - See the Quickstart repository: [eval-protocol/quickstart](https://github.com/eval-protocol/quickstart/tree/main)
145
+
146
+ ## Resources
147
+
148
+ - **[Documentation](https://evalprotocol.io)** – Guides and API reference
149
+ - **[Discord](https://discord.com/channels/1137072072808472616/1400975572405850155)** – Community
150
+ - **[GitHub](https://github.com/eval-protocol/python-sdk)** – Source and examples
151
+
152
+ ## License
153
+
154
+ [MIT](LICENSE)
@@ -0,0 +1,39 @@
1
+ # Eval Protocol
2
+
3
+ [![PyPI - Version](https://img.shields.io/pypi/v/eval-protocol)](https://pypi.org/project/eval-protocol/)
4
+ [![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/eval-protocol/python-sdk)
5
+
6
+ **Eval Protocol (EP) is an open solution for doing reinforcement learning fine-tuning on existing agents — across any language, container, or framework.**
7
+
8
+ ![Eval Protocol overview](./docs/intro.png)
9
+
10
+ Most teams already have complex agents running in production — often across remote services with heavy dependencies, Docker containers, or TypeScript backends deployed on Vercel. When they try to train or fine-tune these agents with reinforcement learning, connecting them to a trainer quickly becomes painful.
11
+
12
+ Eval Protocol makes this possible in two ways:
13
+
14
+ 1. **Expose your agent through a simple API**
15
+ Wrap your existing agent (Python, TypeScript, Docker, etc.) in a simple HTTP service using EP’s rollout interface. EP handles the rollout orchestration, metadata passing, and trace storage automatically.
16
+ 2. **Connect with any trainer**
17
+ Once your agent speaks the EP standard, it can be fine-tuned or evaluated with any supported trainer — Fireworks RFT, TRL, Unsloth, or your own — with no environment rewrites.
18
+
19
+ The result: RL that works out-of-the-box for existing production agents.
20
+
21
+ ## Who This Is For
22
+
23
+ - **Applied AI teams** adding RL to existing production agents.
24
+ - **Research engineers** experimenting with fine-tuning complex, multi-turn or tool-using agents.
25
+ - **MLOps teams** building reproducible, language-agnostic rollout pipelines.
26
+
27
+ ## Quickstart
28
+
29
+ - See the Quickstart repository: [eval-protocol/quickstart](https://github.com/eval-protocol/quickstart/tree/main)
30
+
31
+ ## Resources
32
+
33
+ - **[Documentation](https://evalprotocol.io)** – Guides and API reference
34
+ - **[Discord](https://discord.com/channels/1137072072808472616/1400975572405850155)** – Community
35
+ - **[GitHub](https://github.com/eval-protocol/python-sdk)** – Source and examples
36
+
37
+ ## License
38
+
39
+ [MIT](LICENSE)
@@ -0,0 +1 @@
1
+ # This file makes the 'development' directory a Python package.