eval-protocol 0.2.98.dev1__tar.gz → 0.3.10.dev2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (489) hide show
  1. {eval_protocol-0.2.98.dev1/eval_protocol.egg-info → eval_protocol-0.3.10.dev2}/PKG-INFO +6 -3
  2. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/_version.py +3 -3
  3. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/adapters/__init__.py +16 -0
  4. eval_protocol-0.3.10.dev2/eval_protocol/adapters/dataframe.py +66 -0
  5. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/adapters/fireworks_tracing.py +11 -3
  6. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/agent/resources/sql_resource.py +60 -5
  7. eval_protocol-0.3.10.dev2/eval_protocol/auth.py +176 -0
  8. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/benchmarks/test_glm_streaming_compliance.py +255 -130
  9. eval_protocol-0.3.10.dev2/eval_protocol/cli.py +369 -0
  10. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/cli_commands/create_rft.py +196 -275
  11. eval_protocol-0.3.10.dev2/eval_protocol/cli_commands/export_docs.py +300 -0
  12. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/cli_commands/local_test.py +9 -12
  13. eval_protocol-0.3.10.dev2/eval_protocol/cli_commands/logs.py +146 -0
  14. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/cli_commands/upload.py +141 -48
  15. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/cli_commands/utils.py +286 -43
  16. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/dataset_logger/local_fs_dataset_logger_adapter.py +1 -1
  17. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/dataset_logger/sqlite_evaluation_row_store.py +23 -4
  18. eval_protocol-0.3.10.dev2/eval_protocol/evaluation.py +408 -0
  19. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/event_bus/__init__.py +6 -0
  20. eval_protocol-0.3.10.dev2/eval_protocol/event_bus/sqlite_event_bus_database.py +255 -0
  21. eval_protocol-0.3.10.dev2/eval_protocol/fireworks_client.py +132 -0
  22. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/fireworks_rft.py +4 -32
  23. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/integrations/tinker_rollout_processor.py +1 -1
  24. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/log_utils/fireworks_tracing_http_handler.py +7 -5
  25. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp/execution/manager.py +1 -1
  26. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/models.py +105 -3
  27. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/platform_api.py +79 -142
  28. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/proxy/proxy_core/app.py +5 -1
  29. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/proxy/proxy_core/redis_utils.py +9 -2
  30. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/__init__.py +13 -0
  31. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/default_agent_rollout_processor.py +1 -1
  32. eval_protocol-0.3.10.dev2/eval_protocol/pytest/default_klavis_sandbox_rollout_processor.py +174 -0
  33. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/default_pydantic_ai_rollout_processor.py +1 -1
  34. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/default_single_turn_rollout_process.py +51 -3
  35. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/evaluation_test.py +66 -28
  36. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/evaluation_test_utils.py +31 -4
  37. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/github_action_rollout_processor.py +7 -10
  38. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/integrations/openenv_trl_vllm.py +7 -4
  39. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/openenv_rollout_processor.py +3 -3
  40. eval_protocol-0.3.10.dev2/eval_protocol/pytest/priority_scheduler.py +515 -0
  41. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/remote_rollout_processor.py +60 -51
  42. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/rollout_processor.py +4 -0
  43. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/tracing_utils.py +18 -3
  44. eval_protocol-0.3.10.dev2/eval_protocol/training/__init__.py +45 -0
  45. eval_protocol-0.3.10.dev2/eval_protocol/training/gepa_trainer.py +522 -0
  46. eval_protocol-0.3.10.dev2/eval_protocol/training/gepa_utils.py +489 -0
  47. eval_protocol-0.3.10.dev2/eval_protocol/training/trainer.py +19 -0
  48. eval_protocol-0.3.10.dev2/eval_protocol/training/utils.py +19 -0
  49. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2/eval_protocol.egg-info}/PKG-INFO +6 -3
  50. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol.egg-info/SOURCES.txt +16 -10
  51. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol.egg-info/requires.txt +6 -2
  52. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/pyproject.toml +9 -5
  53. eval_protocol-0.3.10.dev2/tests/test_auth.py +73 -0
  54. eval_protocol-0.3.10.dev2/tests/test_cli_args.py +43 -0
  55. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_cli_create_rft.py +304 -281
  56. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_ep_upload_e2e.py +147 -192
  57. eval_protocol-0.3.10.dev2/tests/test_evaluation.py +133 -0
  58. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_examples_end_to_end.py +1 -1
  59. eval_protocol-0.3.10.dev2/tests/test_fireworks_client.py +143 -0
  60. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_minimal.py +2 -0
  61. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_models.py +32 -0
  62. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_priority_scheduler.py +18 -10
  63. eval_protocol-0.3.10.dev2/tests/test_rollout_logprobs.py +58 -0
  64. eval_protocol-0.3.10.dev2/tests/test_sqlite_hardening.py +474 -0
  65. eval_protocol-0.3.10.dev2/tests/test_training_utils.py +32 -0
  66. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_upload_entrypoint.py +10 -12
  67. eval_protocol-0.3.10.dev2/vite-app/dist/assets/index-10cZ11iB.js +137 -0
  68. eval_protocol-0.3.10.dev2/vite-app/dist/assets/index-10cZ11iB.js.map +1 -0
  69. eval_protocol-0.3.10.dev2/vite-app/dist/assets/index-DOD73Wyg.css +1 -0
  70. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vite-app/dist/index.html +2 -2
  71. eval_protocol-0.2.98.dev1/eval_protocol/auth.py +0 -331
  72. eval_protocol-0.2.98.dev1/eval_protocol/cli.py +0 -714
  73. eval_protocol-0.2.98.dev1/eval_protocol/cli_commands/deploy.py +0 -509
  74. eval_protocol-0.2.98.dev1/eval_protocol/cli_commands/deploy_mcp.py +0 -290
  75. eval_protocol-0.2.98.dev1/eval_protocol/cli_commands/logs.py +0 -57
  76. eval_protocol-0.2.98.dev1/eval_protocol/cli_commands/preview.py +0 -186
  77. eval_protocol-0.2.98.dev1/eval_protocol/evaluation.py +0 -1471
  78. eval_protocol-0.2.98.dev1/eval_protocol/event_bus/sqlite_event_bus_database.py +0 -93
  79. eval_protocol-0.2.98.dev1/eval_protocol/pytest/priority_scheduler.py +0 -348
  80. eval_protocol-0.2.98.dev1/tests/test_auth.py +0 -396
  81. eval_protocol-0.2.98.dev1/tests/test_cli.py +0 -170
  82. eval_protocol-0.2.98.dev1/tests/test_cli_args.py +0 -156
  83. eval_protocol-0.2.98.dev1/tests/test_deploy_integration.py +0 -214
  84. eval_protocol-0.2.98.dev1/tests/test_evaluation.py +0 -431
  85. eval_protocol-0.2.98.dev1/tests/test_evaluation_integration.py +0 -365
  86. eval_protocol-0.2.98.dev1/tests/test_evaluation_preview_integration.py +0 -470
  87. eval_protocol-0.2.98.dev1/vite-app/dist/assets/index-CuQbfdPD.js +0 -46
  88. eval_protocol-0.2.98.dev1/vite-app/dist/assets/index-CuQbfdPD.js.map +0 -1
  89. eval_protocol-0.2.98.dev1/vite-app/dist/assets/index-iZp_HgyW.css +0 -1
  90. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/LICENSE +0 -0
  91. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/README.md +0 -0
  92. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/development/__init__.py +0 -0
  93. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/development/normalize_sandbox_fusion.py +0 -0
  94. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/development/utils/__init__.py +0 -0
  95. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/development/utils/generate_api_key.py +0 -0
  96. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/development/utils/subprocess_manager.py +0 -0
  97. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/__init__.py +0 -0
  98. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/__main__.py +0 -0
  99. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/adapters/base.py +0 -0
  100. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/adapters/bigquery.py +0 -0
  101. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/adapters/braintrust.py +0 -0
  102. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/adapters/huggingface.py +0 -0
  103. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/adapters/langchain.py +0 -0
  104. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/adapters/langfuse.py +0 -0
  105. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/adapters/langsmith.py +0 -0
  106. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/adapters/openai_responses.py +0 -0
  107. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/adapters/trl.py +0 -0
  108. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/adapters/utils.py +0 -0
  109. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/adapters/weave.py +0 -0
  110. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/agent/__init__.py +0 -0
  111. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/agent/models.py +0 -0
  112. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/agent/orchestrator.py +0 -0
  113. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/agent/resource_abc.py +0 -0
  114. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/agent/resource_pool.py +0 -0
  115. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/agent/resources/__init__.py +0 -0
  116. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/agent/resources/bfcl_envs/__init__.py +0 -0
  117. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/agent/resources/bfcl_envs/gorilla_file_system.py +0 -0
  118. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/agent/resources/bfcl_envs/math_api.py +0 -0
  119. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/agent/resources/bfcl_envs/posting_api.py +0 -0
  120. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/agent/resources/bfcl_sim_api_resource.py +0 -0
  121. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/agent/resources/docker_resource.py +0 -0
  122. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/agent/resources/filesystem_resource.py +0 -0
  123. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/agent/resources/python_state_resource.py +0 -0
  124. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/agent/task_manager.py +0 -0
  125. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/agent/tool_registry.py +0 -0
  126. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/benchmarks/__init__.py +0 -0
  127. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/benchmarks/data/airline_dataset.jsonl +0 -0
  128. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/benchmarks/data/retail_dataset.jsonl +0 -0
  129. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/benchmarks/test_aime25.py +0 -0
  130. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/benchmarks/test_frozen_lake.py +0 -0
  131. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/benchmarks/test_gpqa.py +0 -0
  132. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/benchmarks/test_livebench_data_analysis.py +0 -0
  133. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/benchmarks/test_tau_bench_airline.py +0 -0
  134. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/benchmarks/test_tau_bench_retail.py +0 -0
  135. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/cli_commands/__init__.py +0 -0
  136. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/cli_commands/agent_eval_cmd.py +0 -0
  137. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/cli_commands/common.py +0 -0
  138. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/cli_commands/run_eval_cmd.py +0 -0
  139. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/common_utils.py +0 -0
  140. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/config.py +0 -0
  141. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/data_loader/__init__.py +0 -0
  142. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/data_loader/dynamic_data_loader.py +0 -0
  143. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/data_loader/factory_data_loader.py +0 -0
  144. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/data_loader/inline_data_loader.py +0 -0
  145. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/data_loader/jsonl_data_loader.py +0 -0
  146. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/data_loader/models.py +0 -0
  147. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/dataset_logger/__init__.py +0 -0
  148. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/dataset_logger/dataset_logger.py +0 -0
  149. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/dataset_logger/sqlite_dataset_logger_adapter.py +0 -0
  150. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/datasets/__init__.py +0 -0
  151. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/datasets/loader.py +0 -0
  152. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/directory_utils.py +0 -0
  153. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/event_bus/event_bus.py +0 -0
  154. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/event_bus/logger.py +0 -0
  155. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/event_bus/sqlite_event_bus.py +0 -0
  156. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/exceptions.py +0 -0
  157. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/execution/__init__.py +0 -0
  158. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/execution/pipeline.py +0 -0
  159. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/gcp_tools.py +0 -0
  160. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/generation/cache.py +0 -0
  161. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/generation/clients/base.py +0 -0
  162. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/generation/clients.py +0 -0
  163. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/generic_server.py +0 -0
  164. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/get_pep440_version.py +0 -0
  165. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/human_id/__init__.py +0 -0
  166. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/human_id/dictionary.py +0 -0
  167. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/integrations/__init__.py +0 -0
  168. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/integrations/deepeval.py +0 -0
  169. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/integrations/openai_rft.py +0 -0
  170. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/integrations/openeval.py +0 -0
  171. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/integrations/tinker_cookbook.py +0 -0
  172. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/integrations/trl.py +0 -0
  173. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/log_utils/__init__.py +0 -0
  174. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/log_utils/elasticsearch_client.py +0 -0
  175. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/log_utils/elasticsearch_direct_http_handler.py +0 -0
  176. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/log_utils/elasticsearch_index_manager.py +0 -0
  177. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/log_utils/init.py +0 -0
  178. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/log_utils/rollout_context.py +0 -0
  179. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/log_utils/rollout_id_filter.py +0 -0
  180. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/log_utils/util.py +0 -0
  181. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/logging_utils.py +0 -0
  182. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp/__init__.py +0 -0
  183. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp/adapter.py +0 -0
  184. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp/client/__init__.py +0 -0
  185. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp/client/connection.py +0 -0
  186. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp/clients.py +0 -0
  187. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp/execution/__init__.py +0 -0
  188. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp/execution/base_policy.py +0 -0
  189. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp/execution/policy.py +0 -0
  190. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp/execution/vllm_policy.py +0 -0
  191. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp/grid_renderer.py +0 -0
  192. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp/mcp_multi_client.py +0 -0
  193. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp/mcpgym.py +0 -0
  194. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp/process_manager.py +0 -0
  195. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp/session/__init__.py +0 -0
  196. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp/session/manager.py +0 -0
  197. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp/simple_process_manager.py +0 -0
  198. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp/simulation_server.py +0 -0
  199. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp_agent/__init__.py +0 -0
  200. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp_agent/config.py +0 -0
  201. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp_agent/main.py +0 -0
  202. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp_agent/orchestration/__init__.py +0 -0
  203. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp_agent/orchestration/base_client.py +0 -0
  204. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp_agent/orchestration/local_docker_client.py +0 -0
  205. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp_agent/orchestration/stdio_mcp_client_helper.py +0 -0
  206. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp_env.py +0 -0
  207. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp_servers/__init__.py +0 -0
  208. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp_servers/frozen_lake/frozen_lake_adapter.py +0 -0
  209. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp_servers/frozen_lake/frozen_lake_mcp.py +0 -0
  210. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp_servers/frozen_lake/server.py +0 -0
  211. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp_servers/tau2/README.md +0 -0
  212. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp_servers/tau2/__init__.py +0 -0
  213. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp_servers/tau2/airplane_environment/airline_environment.py +0 -0
  214. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp_servers/tau2/mock_environment/mock_environment.py +0 -0
  215. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp_servers/tau2/retail_environment/retail_environment.py +0 -0
  216. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp_servers/tau2/server.py +0 -0
  217. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp_servers/tau2/tau2_mcp.py +0 -0
  218. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp_servers/tau2/tests/system_prompts/airline_agent_system_prompt.md +0 -0
  219. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp_servers/tau2/tests/system_prompts/mock_agent_system_prompt.md +0 -0
  220. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp_servers/tau2/tests/system_prompts/retail_agent_system_prompt.md +0 -0
  221. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp_servers/tau2/tests/test_tau2_e2e.py +0 -0
  222. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/packaging.py +0 -0
  223. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/playback_policy.py +0 -0
  224. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/proxy/__init__.py +0 -0
  225. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/proxy/proxy_core/__init__.py +0 -0
  226. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/proxy/proxy_core/auth.py +0 -0
  227. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/proxy/proxy_core/langfuse.py +0 -0
  228. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/proxy/proxy_core/litellm.py +0 -0
  229. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/proxy/proxy_core/main.py +0 -0
  230. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/proxy/proxy_core/models.py +0 -0
  231. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/buffer.py +0 -0
  232. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/default_dataset_adapter.py +0 -0
  233. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/default_langchain_rollout_processor.py +0 -0
  234. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/default_mcp_gym_rollout_processor.py +0 -0
  235. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/default_no_op_rollout_processor.py +0 -0
  236. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/dual_mode_wrapper.py +0 -0
  237. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/elasticsearch_setup.py +0 -0
  238. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/evaluation_test_postprocess.py +0 -0
  239. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/exception_config.py +0 -0
  240. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/execution.py +0 -0
  241. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/generate_parameter_combinations.py +0 -0
  242. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/handle_persist_flow.py +0 -0
  243. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/parameterize.py +0 -0
  244. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/plugin.py +0 -0
  245. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/rollout_result_post_processor.py +0 -0
  246. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/store_experiment_link.py +0 -0
  247. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/store_results_url.py +0 -0
  248. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/types.py +0 -0
  249. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/validate_signature.py +0 -0
  250. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/quickstart/__init__.py +0 -0
  251. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/quickstart/aha_judge/__init__.py +0 -0
  252. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/quickstart/aha_judge/llm_judge.py +0 -0
  253. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/quickstart/aha_judge/llm_judge_braintrust.py +0 -0
  254. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/quickstart/aha_judge/llm_judge_langfuse.py +0 -0
  255. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/quickstart/aha_judge/llm_judge_langsmith.py +0 -0
  256. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/quickstart/aha_judge/llm_judge_openai_responses.py +0 -0
  257. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/quickstart/aha_judge/utils.py +0 -0
  258. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/quickstart/llm_judge.py +0 -0
  259. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/quickstart/llm_judge_braintrust.py +0 -0
  260. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/quickstart/svg_agent/evaluator/test_svgagent.py +0 -0
  261. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/quickstart/svg_agent/evaluator/utils.py +0 -0
  262. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/quickstart/svg_agent/vercel_svg_server/api/init.py +0 -0
  263. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/quickstart/utils.py +0 -0
  264. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/resources.py +0 -0
  265. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/reward_function.py +0 -0
  266. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rewards/__init__.py +0 -0
  267. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rewards/accuracy.py +0 -0
  268. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rewards/accuracy_length.py +0 -0
  269. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rewards/apps_coding_reward.py +0 -0
  270. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rewards/apps_execution_utils.py +0 -0
  271. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rewards/apps_testing_util.py +0 -0
  272. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rewards/bfcl_reward.py +0 -0
  273. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rewards/code_execution.py +0 -0
  274. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rewards/code_execution_utils.py +0 -0
  275. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rewards/cpp_code.py +0 -0
  276. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rewards/deepcoder_reward.py +0 -0
  277. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rewards/format.py +0 -0
  278. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rewards/function_calling.py +0 -0
  279. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rewards/json_schema.py +0 -0
  280. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rewards/language_consistency.py +0 -0
  281. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rewards/lean_prover.py +0 -0
  282. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rewards/length.py +0 -0
  283. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rewards/list_comparison_math_reward.py +0 -0
  284. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rewards/math.py +0 -0
  285. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rewards/multiple_choice_math_reward.py +0 -0
  286. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rewards/reasoning_steps.py +0 -0
  287. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rewards/repetition.py +0 -0
  288. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rewards/tag_count.py +0 -0
  289. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rl_processing.py +0 -0
  290. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/server.py +0 -0
  291. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/stats/__init__.py +0 -0
  292. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/stats/confidence_intervals.py +0 -0
  293. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/typed_interface.py +0 -0
  294. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/types/__init__.py +0 -0
  295. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/types/errors.py +0 -0
  296. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/types/remote_rollout_processor.py +0 -0
  297. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/types/types.py +0 -0
  298. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/utils/__init__.py +0 -0
  299. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/utils/batch_evaluation.py +0 -0
  300. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/utils/batch_transformation.py +0 -0
  301. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/utils/browser_utils.py +0 -0
  302. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/utils/check_server_status.py +0 -0
  303. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/utils/dataset_helpers.py +0 -0
  304. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/utils/evaluation_row_utils.py +0 -0
  305. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/utils/logs_models.py +0 -0
  306. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/utils/logs_server.py +0 -0
  307. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/utils/module_loader.py +0 -0
  308. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/utils/packaging_utils.py +0 -0
  309. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/utils/show_results_url.py +0 -0
  310. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/utils/static_policy.py +0 -0
  311. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/utils/subprocess_utils.py +0 -0
  312. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/utils/vite_server.py +0 -0
  313. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol.egg-info/dependency_links.txt +0 -0
  314. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol.egg-info/entry_points.txt +0 -0
  315. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol.egg-info/top_level.txt +0 -0
  316. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/setup.cfg +0 -0
  317. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/setup.py +0 -0
  318. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_accuracy.py +0 -0
  319. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_accuracy_length.py +0 -0
  320. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_adapters_e2e.py +0 -0
  321. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_agent_orchestrator.py +0 -0
  322. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_agent_resources.py +0 -0
  323. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_batch_evaluation.py +0 -0
  324. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_cli_agent.py +0 -0
  325. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_cli_local_test.py +0 -0
  326. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_code_execution.py +0 -0
  327. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_config.py +0 -0
  328. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_control_plane_separation.py +0 -0
  329. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_cpp_code.py +0 -0
  330. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_data_driven_task_manager.py +0 -0
  331. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_deepcoder_reward.py +0 -0
  332. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_deepeval_integration.py +0 -0
  333. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_directory_utils.py +0 -0
  334. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_e2b_integration.py +0 -0
  335. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_e2b_js_integration.py +0 -0
  336. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_edge_cases.py +0 -0
  337. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_eval_protocol_import.py +0 -0
  338. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_evaluation_postprocess.py +0 -0
  339. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_event_bus.py +0 -0
  340. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_event_bus_helper.py +0 -0
  341. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_exception_config.py +0 -0
  342. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_exceptions.py +0 -0
  343. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_fireworks_api.py +0 -0
  344. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_format.py +0 -0
  345. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_fractional_code.py +0 -0
  346. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_function_calling.py +0 -0
  347. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_gcp_tools.py +0 -0
  348. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_generic_server.py +0 -0
  349. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_human_id.py +0 -0
  350. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_integration.py +0 -0
  351. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_json_schema.py +0 -0
  352. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_kwargs_validation.py +0 -0
  353. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_language_consistency.py +0 -0
  354. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_lean_prover.py +0 -0
  355. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_lean_prover_runner.py +0 -0
  356. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_length.py +0 -0
  357. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_list_comparison_math_reward.py +0 -0
  358. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_litellm_policy_provider_fields.py +0 -0
  359. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_logs_server.py +0 -0
  360. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_logs_server_simple.py +0 -0
  361. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_math.py +0 -0
  362. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_message_field_filtering.py +0 -0
  363. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_models_rl.py +0 -0
  364. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_multiple_choice_math_reward.py +0 -0
  365. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_n_variant_batch_integration.py +0 -0
  366. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_n_variant_integration.py +0 -0
  367. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_openai_compatibility.py +0 -0
  368. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_openai_rft_integration.py +0 -0
  369. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_openeval_integration.py +0 -0
  370. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_packaging.py +0 -0
  371. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_parallel_rollouts.py +0 -0
  372. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_platform_api.py +0 -0
  373. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_quickstart_utils.py +0 -0
  374. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_readiness.py +0 -0
  375. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_reasoning_steps.py +0 -0
  376. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_repetition.py +0 -0
  377. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_repetition_debug.py +0 -0
  378. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_retry_mechanism.py +0 -0
  379. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_reward_function.py +0 -0
  380. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_reward_protocol_import.py +0 -0
  381. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_rl_processing.py +0 -0
  382. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_rollout_control_plane_integration.py +0 -0
  383. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_server.py +0 -0
  384. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_show_results_url.py +0 -0
  385. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_status_migration_changes.py +0 -0
  386. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_status_migration_integration.py +0 -0
  387. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_status_model.py +0 -0
  388. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_tag_count.py +0 -0
  389. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_tau_bench_airline_smoke.py +0 -0
  390. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_typed_interface.py +0 -0
  391. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_typed_interface_rl.py +0 -0
  392. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_url_handling.py +0 -0
  393. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/tests/test_vite_server.py +0 -0
  394. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/__init__.py +0 -0
  395. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/agent/__init__.py +0 -0
  396. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/agent/base.py +0 -0
  397. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/agent/llm_agent.py +0 -0
  398. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/api_service/__init__.py +0 -0
  399. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/api_service/api_config.py +0 -0
  400. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/api_service/data_model.py +0 -0
  401. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/api_service/simulation_service.py +0 -0
  402. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/cli.py +0 -0
  403. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/config.py +0 -0
  404. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/data/domains/airline/policy.md +0 -0
  405. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/data/domains/mock/policy.md +0 -0
  406. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/data/domains/mock/policy_solo.md +0 -0
  407. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/data/domains/retail/policy.md +0 -0
  408. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/data/domains/telecom/main_policy.md +0 -0
  409. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/data/domains/telecom/main_policy_solo.md +0 -0
  410. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/data/domains/telecom/tech_support_manual.md +0 -0
  411. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/data/domains/telecom/tech_support_workflow.md +0 -0
  412. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/data/domains/telecom/tech_support_workflow_solo.md +0 -0
  413. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/data/user_simulator/simulation_guidelines.md +0 -0
  414. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/data/user_simulator/simulation_guidelines_tools.md +0 -0
  415. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/data_model/__init__.py +0 -0
  416. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/data_model/message.py +0 -0
  417. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/data_model/simulation.py +0 -0
  418. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/data_model/tasks.py +0 -0
  419. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/__init__.py +0 -0
  420. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/airline/__init__.py +0 -0
  421. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/airline/data_model.py +0 -0
  422. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/airline/environment.py +0 -0
  423. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/airline/tools.py +0 -0
  424. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/airline/utils.py +0 -0
  425. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/mock/__init__.py +0 -0
  426. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/mock/data_model.py +0 -0
  427. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/mock/environment.py +0 -0
  428. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/mock/tools.py +0 -0
  429. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/mock/utils.py +0 -0
  430. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/retail/__init__.py +0 -0
  431. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/retail/data_model.py +0 -0
  432. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/retail/environment.py +0 -0
  433. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/retail/tools.py +0 -0
  434. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/retail/utils.py +0 -0
  435. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/telecom/__init__.py +0 -0
  436. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/telecom/data_model.py +0 -0
  437. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/telecom/environment.py +0 -0
  438. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/telecom/tasks/__init__.py +0 -0
  439. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/telecom/tasks/const.py +0 -0
  440. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/telecom/tasks/create_tasks.py +0 -0
  441. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/telecom/tasks/manager.py +0 -0
  442. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/telecom/tasks/mms_issues.py +0 -0
  443. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/telecom/tasks/mobile_data_issues.py +0 -0
  444. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/telecom/tasks/service_issues.py +0 -0
  445. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/telecom/tasks/utils.py +0 -0
  446. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/telecom/tools.py +0 -0
  447. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/telecom/user_data_model.py +0 -0
  448. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/telecom/user_tools.py +0 -0
  449. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/telecom/utils.py +0 -0
  450. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/environment/__init__.py +0 -0
  451. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/environment/db.py +0 -0
  452. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/environment/environment.py +0 -0
  453. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/environment/server.py +0 -0
  454. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/environment/tool.py +0 -0
  455. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/environment/toolkit.py +0 -0
  456. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/environment/utils/interface_agent.py +0 -0
  457. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/evaluator/__init__.py +0 -0
  458. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/evaluator/evaluator.py +0 -0
  459. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/evaluator/evaluator_action.py +0 -0
  460. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/evaluator/evaluator_base.py +0 -0
  461. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/evaluator/evaluator_communicate.py +0 -0
  462. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/evaluator/evaluator_env.py +0 -0
  463. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/evaluator/evaluator_nl_assertions.py +0 -0
  464. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/metrics/__init__.py +0 -0
  465. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/metrics/agent_metrics.py +0 -0
  466. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/metrics/break_down_metrics.py +0 -0
  467. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/orchestrator/__init__.py +0 -0
  468. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/orchestrator/environment_manager.py +0 -0
  469. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/orchestrator/orchestrator.py +0 -0
  470. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/orchestrator/utils.py +0 -0
  471. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/registry.py +0 -0
  472. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/run.py +0 -0
  473. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/scripts/__init__.py +0 -0
  474. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/scripts/check_data.py +0 -0
  475. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/scripts/show_domain_doc.py +0 -0
  476. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/scripts/start_servers.py +0 -0
  477. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/scripts/view_simulations.py +0 -0
  478. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/user/__init__.py +0 -0
  479. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/user/base.py +0 -0
  480. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/user/user_simulator.py +0 -0
  481. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/utils/__init__.py +0 -0
  482. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/utils/display.py +0 -0
  483. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/utils/io_utils.py +0 -0
  484. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/utils/llm_utils.py +0 -0
  485. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/utils/pydantic_utils.py +0 -0
  486. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/utils/utils.py +0 -0
  487. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/versioneer.py +0 -0
  488. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vite-app/dist/assets/favicon-BkAAWQga.png +0 -0
  489. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.10.dev2}/vite-app/dist/assets/logo-light-BprIBJQW.png +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: eval-protocol
3
- Version: 0.2.98.dev1
3
+ Version: 0.3.10.dev2
4
4
  Summary: The official Python SDK for Eval Protocol (EP.) EP is an open protocol that standardizes how developers author evals for large language model (LLM) applications.
5
5
  Author-email: Fireworks AI <info@fireworks.ai>
6
6
  License-Expression: MIT
@@ -29,6 +29,7 @@ Requires-Dist: pytest>=6.0.0
29
29
  Requires-Dist: pytest-asyncio>=0.21.0
30
30
  Requires-Dist: peewee>=3.18.2
31
31
  Requires-Dist: backoff>=2.2.0
32
+ Requires-Dist: fireworks-ai==1.0.0a22
32
33
  Requires-Dist: questionary>=2.0.0
33
34
  Requires-Dist: toml>=0.10.0
34
35
  Requires-Dist: loguru>=0.6.0
@@ -72,8 +73,6 @@ Requires-Dist: transformers>=4.0.0; extra == "trl"
72
73
  Requires-Dist: accelerate>=0.28.0; extra == "trl"
73
74
  Provides-Extra: openevals
74
75
  Requires-Dist: openevals>=0.1.0; extra == "openevals"
75
- Provides-Extra: fireworks
76
- Requires-Dist: fireworks-ai>=0.19.19; extra == "fireworks"
77
76
  Provides-Extra: box2d
78
77
  Requires-Dist: swig; extra == "box2d"
79
78
  Requires-Dist: gymnasium[box2d]>=0.29.0; extra == "box2d"
@@ -102,6 +101,10 @@ Provides-Extra: braintrust
102
101
  Requires-Dist: braintrust[otel]; extra == "braintrust"
103
102
  Provides-Extra: openenv
104
103
  Requires-Dist: openenv-core; extra == "openenv"
104
+ Provides-Extra: dspy
105
+ Requires-Dist: dspy>=3.0.0; extra == "dspy"
106
+ Provides-Extra: klavis
107
+ Requires-Dist: klavis>=2.18.0; extra == "klavis"
105
108
  Provides-Extra: langgraph
106
109
  Requires-Dist: langgraph>=0.6.7; extra == "langgraph"
107
110
  Requires-Dist: langchain-core>=0.3.75; extra == "langgraph"
@@ -8,11 +8,11 @@ import json
8
8
 
9
9
  version_json = '''
10
10
  {
11
- "date": "2025-12-15T16:40:32-0800",
11
+ "date": "2026-01-13T16:25:00-0800",
12
12
  "dirty": false,
13
13
  "error": null,
14
- "full-revisionid": "438a49431d16626a8e883cfb04afecfb188eb9dc",
15
- "version": "0.2.98.dev.1"
14
+ "full-revisionid": "66f191a09db5364b9cd9bb21230e1f48e50be724",
15
+ "version": "0.3.10.dev.2"
16
16
  }
17
17
  ''' # END VERSION_JSON
18
18
 
@@ -99,3 +99,19 @@ try:
99
99
  __all__.extend(["WeaveAdapter"])
100
100
  except ImportError:
101
101
  pass
102
+
103
+ # DataFrame adapter (pandas integration for Lilac, etc.)
104
+ try:
105
+ from .dataframe import (
106
+ evaluation_rows_to_dataframe,
107
+ dataframe_to_evaluation_rows,
108
+ )
109
+
110
+ __all__.extend(
111
+ [
112
+ "evaluation_rows_to_dataframe",
113
+ "dataframe_to_evaluation_rows",
114
+ ]
115
+ )
116
+ except ImportError:
117
+ pass
@@ -0,0 +1,66 @@
1
+ """
2
+ Pandas DataFrame adapter for Eval Protocol.
3
+
4
+ This module provides utilities for converting between EvaluationRow format
5
+ and pandas DataFrame format, enabling integration with data curation tools
6
+ such as Lilac, Great Expectations, or any pandas-based workflow.
7
+
8
+ Example usage:
9
+ >>> from eval_protocol.adapters.dataframe import (
10
+ ... evaluation_rows_to_dataframe,
11
+ ... dataframe_to_evaluation_rows,
12
+ ... )
13
+ >>>
14
+ >>> # Convert EvaluationRows to DataFrame
15
+ >>> df = evaluation_rows_to_dataframe(rows)
16
+ >>>
17
+ >>> # Convert back to EvaluationRows
18
+ >>> rows = dataframe_to_evaluation_rows(df)
19
+ """
20
+
21
+ from __future__ import annotations
22
+
23
+ import logging
24
+
25
+ import pandas as pd
26
+
27
+ from ..models import EvaluationRow
28
+
29
+ logger = logging.getLogger(__name__)
30
+
31
+
32
+ def evaluation_rows_to_dataframe(rows: list[EvaluationRow]) -> pd.DataFrame:
33
+ """Convert EvaluationRows to a pandas DataFrame.
34
+
35
+ Uses EvaluationRow.to_dict() for serialization.
36
+
37
+ Args:
38
+ rows: List of EvaluationRow objects
39
+
40
+ Returns:
41
+ DataFrame with 'data_json' containing serialized rows plus convenience fields
42
+ """
43
+ records = [row.to_dict() for row in rows]
44
+ return pd.DataFrame(records)
45
+
46
+
47
+ def dataframe_to_evaluation_rows(df: pd.DataFrame) -> list[EvaluationRow]:
48
+ """Convert a pandas DataFrame back to EvaluationRows.
49
+
50
+ Uses EvaluationRow.from_dict() for deserialization.
51
+
52
+ Args:
53
+ df: DataFrame with 'data_json' column containing serialized EvaluationRows
54
+
55
+ Returns:
56
+ List of EvaluationRow objects
57
+ """
58
+ rows = []
59
+ for _, row_data in df.iterrows():
60
+ try:
61
+ row = EvaluationRow.from_dict(row_data.to_dict())
62
+ rows.append(row)
63
+ except Exception as e:
64
+ logger.warning(f"Failed to convert row: {e}")
65
+ continue
66
+ return rows
@@ -253,6 +253,7 @@ class FireworksTracingAdapter(BaseAdapter):
253
253
  project_id: Optional[str] = None,
254
254
  base_url: str = "https://tracing.fireworks.ai",
255
255
  timeout: int = 300,
256
+ api_key: Optional[str] = None,
256
257
  ):
257
258
  """Initialize the Fireworks Tracing adapter.
258
259
 
@@ -260,15 +261,21 @@ class FireworksTracingAdapter(BaseAdapter):
260
261
  project_id: Optional project ID. If not provided, uses the default project configured on the server.
261
262
  base_url: The base URL of the tracing proxy (default: https://tracing.fireworks.ai)
262
263
  timeout: Request timeout in seconds (default: 300)
264
+ api_key: Optional API key. If not provided, falls back to FIREWORKS_API_KEY environment variable.
263
265
  """
264
266
  self.project_id = project_id
265
267
  self.base_url = base_url.rstrip("/")
266
268
  self.timeout = timeout
269
+ self._api_key = api_key
270
+
271
+ def _get_api_key(self) -> Optional[str]:
272
+ """Get the API key, preferring instance-level key over environment variable."""
273
+ return self._api_key or os.environ.get("FIREWORKS_API_KEY")
267
274
 
268
275
  def search_logs(self, tags: List[str], limit: int = 100, hours_back: int = 24) -> List[Dict[str, Any]]:
269
276
  """Fetch logs from Fireworks tracing gateway /logs endpoint.
270
277
 
271
- Returns entries with keys: timestamp, message, severity, tags.
278
+ Returns entries with keys: timestamp, message, severity, tags, status, extras.
272
279
  """
273
280
  if not tags:
274
281
  raise ValueError("At least one tag is required to fetch logs")
@@ -276,7 +283,7 @@ class FireworksTracingAdapter(BaseAdapter):
276
283
  from ..common_utils import get_user_agent
277
284
 
278
285
  headers = {
279
- "Authorization": f"Bearer {os.environ.get('FIREWORKS_API_KEY')}",
286
+ "Authorization": f"Bearer {self._get_api_key()}",
280
287
  "User-Agent": get_user_agent(),
281
288
  }
282
289
  params: Dict[str, Any] = {"tags": tags, "limit": limit, "hours_back": hours_back, "program": "eval_protocol"}
@@ -315,6 +322,7 @@ class FireworksTracingAdapter(BaseAdapter):
315
322
  "severity": e.get("severity", "INFO"),
316
323
  "tags": e.get("tags", []),
317
324
  "status": e.get("status"),
325
+ "extras": e.get("extras"),
318
326
  }
319
327
  )
320
328
  return results
@@ -406,7 +414,7 @@ class FireworksTracingAdapter(BaseAdapter):
406
414
  from ..common_utils import get_user_agent
407
415
 
408
416
  headers = {
409
- "Authorization": f"Bearer {os.environ.get('FIREWORKS_API_KEY')}",
417
+ "Authorization": f"Bearer {self._get_api_key()}",
410
418
  "User-Agent": get_user_agent(),
411
419
  }
412
420
 
@@ -12,6 +12,50 @@ from typing import Any, Dict, List, Optional
12
12
  from ..resource_abc import ForkableResource
13
13
 
14
14
 
15
+ # SQLite connection settings for hardened concurrency safety
16
+ SQLITE_CONNECTION_TIMEOUT = 30 # 30 seconds
17
+
18
+
19
+ def _apply_hardened_pragmas(conn: sqlite3.Connection) -> None:
20
+ """Apply hardened SQLite pragmas for concurrency safety."""
21
+ conn.execute("PRAGMA journal_mode=WAL") # Write-Ahead Logging
22
+ conn.execute("PRAGMA synchronous=NORMAL") # Balance safety and performance
23
+ conn.execute("PRAGMA busy_timeout=30000") # 30 second timeout
24
+ conn.execute("PRAGMA wal_autocheckpoint=1000") # Checkpoint every 1000 pages
25
+ conn.execute("PRAGMA cache_size=-64000") # 64MB cache
26
+ conn.execute("PRAGMA foreign_keys=ON") # Enable foreign key constraints
27
+ conn.execute("PRAGMA temp_store=MEMORY") # Store temp tables in memory
28
+
29
+
30
+ def _checkpoint_and_copy_database(
31
+ source_path: Path, dest_path: Path, timeout: int = SQLITE_CONNECTION_TIMEOUT
32
+ ) -> None:
33
+ """
34
+ Safely copy a SQLite database by checkpointing WAL first.
35
+
36
+ In WAL mode, data may exist in the -wal file that hasn't been written
37
+ to the main database file. This function performs a TRUNCATE checkpoint
38
+ to flush all WAL data to the main file before copying, ensuring a
39
+ complete and consistent copy.
40
+
41
+ Args:
42
+ source_path: Path to the source database file.
43
+ dest_path: Path where the copy should be created.
44
+ timeout: Connection timeout in seconds.
45
+ """
46
+ # First, checkpoint the WAL to ensure all data is in the main file
47
+ conn = sqlite3.connect(str(source_path), timeout=timeout)
48
+ try:
49
+ # TRUNCATE mode: checkpoint and truncate the WAL file to zero bytes
50
+ # This ensures all data is flushed to the main database file
51
+ conn.execute("PRAGMA wal_checkpoint(TRUNCATE)")
52
+ finally:
53
+ conn.close()
54
+
55
+ # Now safely copy just the main database file
56
+ shutil.copyfile(str(source_path), str(dest_path))
57
+
58
+
15
59
  class SQLResource(ForkableResource):
16
60
  """
17
61
  A ForkableResource for managing SQL database states, primarily SQLite.
@@ -20,6 +64,8 @@ class SQLResource(ForkableResource):
20
64
  and seed data, forked (by copying the DB file), checkpointed (by copying),
21
65
  and restored.
22
66
 
67
+ Uses hardened SQLite settings for concurrency safety.
68
+
23
69
  Attributes:
24
70
  _config (Dict[str, Any]): Configuration for the resource.
25
71
  _db_path (Optional[Path]): Path to the current SQLite database file.
@@ -38,8 +84,14 @@ class SQLResource(ForkableResource):
38
84
  def _get_db_connection(self) -> sqlite3.Connection:
39
85
  if not self._db_path:
40
86
  raise ConnectionError("Database path not set. Call setup() or fork() first.")
41
- # Set timeout to prevent indefinite hangs
42
- return sqlite3.connect(str(self._db_path), timeout=10)
87
+ # Set timeout to prevent indefinite hangs with hardened settings
88
+ conn = sqlite3.connect(
89
+ str(self._db_path),
90
+ timeout=SQLITE_CONNECTION_TIMEOUT,
91
+ isolation_level="DEFERRED", # Better for concurrent access
92
+ )
93
+ _apply_hardened_pragmas(conn)
94
+ return conn
43
95
 
44
96
  async def setup(self, config: Dict[str, Any]) -> None:
45
97
  """
@@ -111,7 +163,8 @@ class SQLResource(ForkableResource):
111
163
  forked_db_name = f"fork_{uuid.uuid4().hex}.sqlite"
112
164
  forked_resource._db_path = self._temp_dir / forked_db_name
113
165
 
114
- shutil.copyfile(str(self._db_path), str(forked_resource._db_path))
166
+ # Use checkpoint-and-copy to ensure WAL data is flushed before copying
167
+ _checkpoint_and_copy_database(self._db_path, forked_resource._db_path)
115
168
  return forked_resource
116
169
 
117
170
  async def checkpoint(self) -> Dict[str, Any]:
@@ -125,7 +178,8 @@ class SQLResource(ForkableResource):
125
178
 
126
179
  checkpoint_name = f"checkpoint_{self._db_path.stem}_{uuid.uuid4().hex}.sqlite"
127
180
  checkpoint_path = self._temp_dir / checkpoint_name
128
- shutil.copyfile(str(self._db_path), str(checkpoint_path))
181
+ # Use checkpoint-and-copy to ensure WAL data is flushed before copying
182
+ _checkpoint_and_copy_database(self._db_path, checkpoint_path)
129
183
  return {"db_type": "sqlite", "checkpoint_path": str(checkpoint_path)}
130
184
 
131
185
  async def restore(self, state_data: Dict[str, Any]) -> None:
@@ -147,7 +201,8 @@ class SQLResource(ForkableResource):
147
201
  if not self._db_path:
148
202
  self._db_path = self._temp_dir / f"restored_{uuid.uuid4().hex}.sqlite"
149
203
 
150
- shutil.copyfile(str(checkpoint_path), str(self._db_path))
204
+ # Use checkpoint-and-copy to ensure WAL data is flushed before copying
205
+ _checkpoint_and_copy_database(checkpoint_path, self._db_path)
151
206
  self._base_db_path = self._db_path # The restored state becomes the new base for future forks
152
207
 
153
208
  async def step(self, action_name: str, action_params: Dict[str, Any]) -> Any:
@@ -0,0 +1,176 @@
1
+ import logging
2
+ import os
3
+ from typing import Dict, Optional
4
+
5
+ import requests
6
+ from dotenv import dotenv_values, find_dotenv, load_dotenv
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+
11
+ def find_dotenv_path(search_path: Optional[str] = None) -> Optional[str]:
12
+ """
13
+ Find the .env file path, searching .env.dev first, then .env.
14
+
15
+ Args:
16
+ search_path: Directory to search from. If None, uses current working directory.
17
+
18
+ Returns:
19
+ Path to the .env file if found, otherwise None.
20
+ """
21
+ # If a specific search path is provided, look there first
22
+ if search_path:
23
+ env_dev_path = os.path.join(search_path, ".env.dev")
24
+ if os.path.isfile(env_dev_path):
25
+ return env_dev_path
26
+ env_path = os.path.join(search_path, ".env")
27
+ if os.path.isfile(env_path):
28
+ return env_path
29
+ return None
30
+
31
+ # Otherwise use find_dotenv to search up the directory tree
32
+ env_dev_path = find_dotenv(filename=".env.dev", raise_error_if_not_found=False, usecwd=True)
33
+ if env_dev_path:
34
+ return env_dev_path
35
+ env_path = find_dotenv(filename=".env", raise_error_if_not_found=False, usecwd=True)
36
+ if env_path:
37
+ return env_path
38
+ return None
39
+
40
+
41
+ def get_dotenv_values(search_path: Optional[str] = None) -> Dict[str, Optional[str]]:
42
+ """
43
+ Get all key-value pairs from the .env file.
44
+
45
+ Args:
46
+ search_path: Directory to search from. If None, uses current working directory.
47
+
48
+ Returns:
49
+ Dictionary of environment variable names to values.
50
+ """
51
+ dotenv_path = find_dotenv_path(search_path)
52
+ if dotenv_path:
53
+ return dotenv_values(dotenv_path)
54
+ return {}
55
+
56
+
57
+ # --- Load .env files ---
58
+ # Attempt to load .env.dev first, then .env as a fallback.
59
+ # This happens when the module is imported.
60
+ # We use override=False (default) so that existing environment variables
61
+ # (e.g., set in the shell) are NOT overridden by .env files.
62
+ _DOTENV_PATH = find_dotenv_path()
63
+ if _DOTENV_PATH:
64
+ load_dotenv(dotenv_path=_DOTENV_PATH, override=False)
65
+ logger.debug(f"eval_protocol.auth: Loaded environment variables from: {_DOTENV_PATH}")
66
+ else:
67
+ logger.debug(
68
+ "eval_protocol.auth: No .env.dev or .env file found. Relying on shell/existing environment variables."
69
+ )
70
+ # --- End .env loading ---
71
+
72
+
73
+ def get_fireworks_api_key() -> Optional[str]:
74
+ """
75
+ Retrieves the Fireworks API key.
76
+
77
+ Returns:
78
+ The API key if found, otherwise None.
79
+ """
80
+ api_key = os.environ.get("FIREWORKS_API_KEY")
81
+ if api_key and api_key.strip():
82
+ logger.debug("Using FIREWORKS_API_KEY from environment variable.")
83
+ return api_key.strip()
84
+ logger.debug("Fireworks API key not found in environment variables.")
85
+ return None
86
+
87
+
88
+ def get_fireworks_account_id() -> Optional[str]:
89
+ """
90
+ Retrieves the Fireworks Account ID.
91
+
92
+ Returns:
93
+ The Account ID if found, otherwise None.
94
+ """
95
+ # Account id is derived from the API key (single source of truth).
96
+ try:
97
+ api_key_for_verify = get_fireworks_api_key()
98
+ if api_key_for_verify:
99
+ resolved = verify_api_key_and_get_account_id(api_key=api_key_for_verify, api_base=get_fireworks_api_base())
100
+ if resolved:
101
+ logger.debug("Resolved account id via verifyApiKey: %s", resolved)
102
+ return resolved
103
+ except Exception as e:
104
+ logger.debug("Failed to resolve account id via verifyApiKey: %s", e)
105
+
106
+ logger.debug("Fireworks Account ID not found via verifyApiKey.")
107
+ return None
108
+
109
+
110
+ def get_fireworks_api_base() -> str:
111
+ """
112
+ Retrieves the Fireworks API base URL.
113
+
114
+ The base URL is sourced from the FIREWORKS_API_BASE environment variable.
115
+ If not set, it defaults to "https://api.fireworks.ai".
116
+
117
+ Returns:
118
+ The API base URL.
119
+ """
120
+ api_base = os.environ.get("FIREWORKS_API_BASE", "https://api.fireworks.ai")
121
+ if os.environ.get("FIREWORKS_API_BASE"):
122
+ logger.debug("Using FIREWORKS_API_BASE from environment variable.")
123
+ else:
124
+ logger.debug("FIREWORKS_API_BASE not set in environment, defaulting to %s.", api_base)
125
+ return api_base
126
+
127
+
128
+ def verify_api_key_and_get_account_id(
129
+ api_key: Optional[str] = None,
130
+ api_base: Optional[str] = None,
131
+ ) -> Optional[str]:
132
+ """
133
+ Calls the Fireworks API verify endpoint to validate the API key and returns the
134
+ account id from response headers when available.
135
+
136
+ Args:
137
+ api_key: Optional explicit API key. When None, resolves via get_fireworks_api_key().
138
+ api_base: Optional explicit API base. When None, resolves via get_fireworks_api_base().
139
+ If api_base is api.fireworks.ai, it is used directly. Otherwise, defaults to
140
+ dev.api.fireworks.ai for the verification call.
141
+
142
+ Returns:
143
+ The resolved account id if verification succeeds and the header is present; otherwise None.
144
+ """
145
+ try:
146
+ resolved_key = api_key or get_fireworks_api_key()
147
+ if not resolved_key:
148
+ return None
149
+ provided_base = api_base or get_fireworks_api_base()
150
+ # Use api.fireworks.ai if explicitly provided, otherwise fall back to dev
151
+ if "api.fireworks.ai" in provided_base:
152
+ resolved_base = provided_base
153
+ else:
154
+ resolved_base = "https://dev.api.fireworks.ai"
155
+
156
+ from .common_utils import get_user_agent
157
+
158
+ url = f"{resolved_base.rstrip('/')}/verifyApiKey"
159
+ headers = {
160
+ "Authorization": f"Bearer {resolved_key}",
161
+ "User-Agent": get_user_agent(),
162
+ }
163
+ resp = requests.get(url, headers=headers, timeout=10)
164
+
165
+ if resp.status_code != 200:
166
+ logger.debug("verifyApiKey returned status %s", resp.status_code)
167
+ return None
168
+ # Header keys could vary in case; requests provides case-insensitive dict
169
+ account_id = resp.headers.get("x-fireworks-account-id") or resp.headers.get("X-Fireworks-Account-Id")
170
+ if account_id and account_id.strip():
171
+ logger.debug("Resolved account id via verifyApiKey: %s", account_id)
172
+ return account_id.strip()
173
+ return None
174
+ except Exception as e:
175
+ logger.debug("Failed to verify API key for account id resolution: %s", e)
176
+ return None