eval-protocol 0.2.98.dev1__tar.gz → 0.3.9.dev1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (484) hide show
  1. {eval_protocol-0.2.98.dev1/eval_protocol.egg-info → eval_protocol-0.3.9.dev1}/PKG-INFO +6 -3
  2. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/_version.py +3 -3
  3. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/adapters/__init__.py +16 -0
  4. eval_protocol-0.3.9.dev1/eval_protocol/adapters/dataframe.py +66 -0
  5. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/adapters/fireworks_tracing.py +2 -1
  6. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/agent/resources/sql_resource.py +60 -5
  7. eval_protocol-0.3.9.dev1/eval_protocol/auth.py +106 -0
  8. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/benchmarks/test_glm_streaming_compliance.py +255 -130
  9. eval_protocol-0.3.9.dev1/eval_protocol/cli.py +367 -0
  10. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/cli_commands/create_rft.py +135 -180
  11. eval_protocol-0.3.9.dev1/eval_protocol/cli_commands/export_docs.py +300 -0
  12. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/cli_commands/local_test.py +2 -12
  13. eval_protocol-0.3.9.dev1/eval_protocol/cli_commands/logs.py +146 -0
  14. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/cli_commands/upload.py +138 -45
  15. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/cli_commands/utils.py +286 -43
  16. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/dataset_logger/local_fs_dataset_logger_adapter.py +1 -1
  17. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/dataset_logger/sqlite_evaluation_row_store.py +23 -4
  18. eval_protocol-0.3.9.dev1/eval_protocol/evaluation.py +387 -0
  19. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/event_bus/__init__.py +6 -0
  20. eval_protocol-0.3.9.dev1/eval_protocol/event_bus/sqlite_event_bus_database.py +255 -0
  21. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/fireworks_rft.py +4 -32
  22. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/integrations/tinker_rollout_processor.py +1 -1
  23. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/log_utils/fireworks_tracing_http_handler.py +7 -5
  24. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp/execution/manager.py +1 -1
  25. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/models.py +105 -3
  26. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/platform_api.py +66 -119
  27. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/proxy/proxy_core/app.py +5 -1
  28. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/proxy/proxy_core/redis_utils.py +9 -2
  29. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/pytest/__init__.py +13 -0
  30. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/pytest/default_agent_rollout_processor.py +1 -1
  31. eval_protocol-0.3.9.dev1/eval_protocol/pytest/default_klavis_sandbox_rollout_processor.py +174 -0
  32. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/pytest/default_pydantic_ai_rollout_processor.py +1 -1
  33. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/pytest/default_single_turn_rollout_process.py +51 -3
  34. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/pytest/evaluation_test.py +64 -26
  35. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/pytest/evaluation_test_utils.py +31 -4
  36. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/pytest/github_action_rollout_processor.py +7 -10
  37. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/pytest/integrations/openenv_trl_vllm.py +7 -4
  38. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/pytest/openenv_rollout_processor.py +3 -3
  39. eval_protocol-0.3.9.dev1/eval_protocol/pytest/priority_scheduler.py +515 -0
  40. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/pytest/remote_rollout_processor.py +44 -51
  41. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/pytest/rollout_processor.py +4 -0
  42. eval_protocol-0.3.9.dev1/eval_protocol/training/__init__.py +45 -0
  43. eval_protocol-0.3.9.dev1/eval_protocol/training/gepa_trainer.py +522 -0
  44. eval_protocol-0.3.9.dev1/eval_protocol/training/gepa_utils.py +489 -0
  45. eval_protocol-0.3.9.dev1/eval_protocol/training/trainer.py +19 -0
  46. eval_protocol-0.3.9.dev1/eval_protocol/training/utils.py +19 -0
  47. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1/eval_protocol.egg-info}/PKG-INFO +6 -3
  48. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol.egg-info/SOURCES.txt +11 -7
  49. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol.egg-info/requires.txt +6 -2
  50. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/pyproject.toml +9 -5
  51. eval_protocol-0.3.9.dev1/tests/test_auth.py +73 -0
  52. eval_protocol-0.3.9.dev1/tests/test_cli_args.py +43 -0
  53. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_cli_create_rft.py +291 -224
  54. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_ep_upload_e2e.py +197 -153
  55. eval_protocol-0.3.9.dev1/tests/test_evaluation.py +118 -0
  56. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_examples_end_to_end.py +1 -1
  57. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_minimal.py +2 -0
  58. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_models.py +32 -0
  59. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_priority_scheduler.py +18 -10
  60. eval_protocol-0.3.9.dev1/tests/test_rollout_logprobs.py +58 -0
  61. eval_protocol-0.3.9.dev1/tests/test_sqlite_hardening.py +474 -0
  62. eval_protocol-0.3.9.dev1/tests/test_training_utils.py +32 -0
  63. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vite-app/dist/assets/index-CuQbfdPD.js +1 -1
  64. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vite-app/dist/assets/index-CuQbfdPD.js.map +1 -1
  65. eval_protocol-0.2.98.dev1/eval_protocol/auth.py +0 -331
  66. eval_protocol-0.2.98.dev1/eval_protocol/cli.py +0 -714
  67. eval_protocol-0.2.98.dev1/eval_protocol/cli_commands/deploy.py +0 -509
  68. eval_protocol-0.2.98.dev1/eval_protocol/cli_commands/deploy_mcp.py +0 -290
  69. eval_protocol-0.2.98.dev1/eval_protocol/cli_commands/logs.py +0 -57
  70. eval_protocol-0.2.98.dev1/eval_protocol/cli_commands/preview.py +0 -186
  71. eval_protocol-0.2.98.dev1/eval_protocol/evaluation.py +0 -1471
  72. eval_protocol-0.2.98.dev1/eval_protocol/event_bus/sqlite_event_bus_database.py +0 -93
  73. eval_protocol-0.2.98.dev1/eval_protocol/pytest/priority_scheduler.py +0 -348
  74. eval_protocol-0.2.98.dev1/tests/test_auth.py +0 -396
  75. eval_protocol-0.2.98.dev1/tests/test_cli.py +0 -170
  76. eval_protocol-0.2.98.dev1/tests/test_cli_args.py +0 -156
  77. eval_protocol-0.2.98.dev1/tests/test_deploy_integration.py +0 -214
  78. eval_protocol-0.2.98.dev1/tests/test_evaluation.py +0 -431
  79. eval_protocol-0.2.98.dev1/tests/test_evaluation_integration.py +0 -365
  80. eval_protocol-0.2.98.dev1/tests/test_evaluation_preview_integration.py +0 -470
  81. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/LICENSE +0 -0
  82. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/README.md +0 -0
  83. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/development/__init__.py +0 -0
  84. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/development/normalize_sandbox_fusion.py +0 -0
  85. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/development/utils/__init__.py +0 -0
  86. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/development/utils/generate_api_key.py +0 -0
  87. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/development/utils/subprocess_manager.py +0 -0
  88. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/__init__.py +0 -0
  89. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/__main__.py +0 -0
  90. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/adapters/base.py +0 -0
  91. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/adapters/bigquery.py +0 -0
  92. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/adapters/braintrust.py +0 -0
  93. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/adapters/huggingface.py +0 -0
  94. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/adapters/langchain.py +0 -0
  95. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/adapters/langfuse.py +0 -0
  96. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/adapters/langsmith.py +0 -0
  97. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/adapters/openai_responses.py +0 -0
  98. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/adapters/trl.py +0 -0
  99. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/adapters/utils.py +0 -0
  100. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/adapters/weave.py +0 -0
  101. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/agent/__init__.py +0 -0
  102. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/agent/models.py +0 -0
  103. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/agent/orchestrator.py +0 -0
  104. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/agent/resource_abc.py +0 -0
  105. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/agent/resource_pool.py +0 -0
  106. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/agent/resources/__init__.py +0 -0
  107. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/agent/resources/bfcl_envs/__init__.py +0 -0
  108. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/agent/resources/bfcl_envs/gorilla_file_system.py +0 -0
  109. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/agent/resources/bfcl_envs/math_api.py +0 -0
  110. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/agent/resources/bfcl_envs/posting_api.py +0 -0
  111. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/agent/resources/bfcl_sim_api_resource.py +0 -0
  112. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/agent/resources/docker_resource.py +0 -0
  113. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/agent/resources/filesystem_resource.py +0 -0
  114. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/agent/resources/python_state_resource.py +0 -0
  115. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/agent/task_manager.py +0 -0
  116. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/agent/tool_registry.py +0 -0
  117. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/benchmarks/__init__.py +0 -0
  118. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/benchmarks/data/airline_dataset.jsonl +0 -0
  119. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/benchmarks/data/retail_dataset.jsonl +0 -0
  120. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/benchmarks/test_aime25.py +0 -0
  121. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/benchmarks/test_frozen_lake.py +0 -0
  122. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/benchmarks/test_gpqa.py +0 -0
  123. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/benchmarks/test_livebench_data_analysis.py +0 -0
  124. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/benchmarks/test_tau_bench_airline.py +0 -0
  125. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/benchmarks/test_tau_bench_retail.py +0 -0
  126. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/cli_commands/__init__.py +0 -0
  127. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/cli_commands/agent_eval_cmd.py +0 -0
  128. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/cli_commands/common.py +0 -0
  129. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/cli_commands/run_eval_cmd.py +0 -0
  130. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/common_utils.py +0 -0
  131. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/config.py +0 -0
  132. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/data_loader/__init__.py +0 -0
  133. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/data_loader/dynamic_data_loader.py +0 -0
  134. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/data_loader/factory_data_loader.py +0 -0
  135. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/data_loader/inline_data_loader.py +0 -0
  136. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/data_loader/jsonl_data_loader.py +0 -0
  137. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/data_loader/models.py +0 -0
  138. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/dataset_logger/__init__.py +0 -0
  139. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/dataset_logger/dataset_logger.py +0 -0
  140. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/dataset_logger/sqlite_dataset_logger_adapter.py +0 -0
  141. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/datasets/__init__.py +0 -0
  142. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/datasets/loader.py +0 -0
  143. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/directory_utils.py +0 -0
  144. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/event_bus/event_bus.py +0 -0
  145. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/event_bus/logger.py +0 -0
  146. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/event_bus/sqlite_event_bus.py +0 -0
  147. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/exceptions.py +0 -0
  148. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/execution/__init__.py +0 -0
  149. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/execution/pipeline.py +0 -0
  150. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/gcp_tools.py +0 -0
  151. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/generation/cache.py +0 -0
  152. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/generation/clients/base.py +0 -0
  153. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/generation/clients.py +0 -0
  154. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/generic_server.py +0 -0
  155. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/get_pep440_version.py +0 -0
  156. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/human_id/__init__.py +0 -0
  157. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/human_id/dictionary.py +0 -0
  158. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/integrations/__init__.py +0 -0
  159. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/integrations/deepeval.py +0 -0
  160. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/integrations/openai_rft.py +0 -0
  161. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/integrations/openeval.py +0 -0
  162. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/integrations/tinker_cookbook.py +0 -0
  163. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/integrations/trl.py +0 -0
  164. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/log_utils/__init__.py +0 -0
  165. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/log_utils/elasticsearch_client.py +0 -0
  166. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/log_utils/elasticsearch_direct_http_handler.py +0 -0
  167. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/log_utils/elasticsearch_index_manager.py +0 -0
  168. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/log_utils/init.py +0 -0
  169. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/log_utils/rollout_context.py +0 -0
  170. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/log_utils/rollout_id_filter.py +0 -0
  171. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/log_utils/util.py +0 -0
  172. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/logging_utils.py +0 -0
  173. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp/__init__.py +0 -0
  174. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp/adapter.py +0 -0
  175. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp/client/__init__.py +0 -0
  176. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp/client/connection.py +0 -0
  177. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp/clients.py +0 -0
  178. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp/execution/__init__.py +0 -0
  179. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp/execution/base_policy.py +0 -0
  180. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp/execution/policy.py +0 -0
  181. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp/execution/vllm_policy.py +0 -0
  182. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp/grid_renderer.py +0 -0
  183. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp/mcp_multi_client.py +0 -0
  184. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp/mcpgym.py +0 -0
  185. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp/process_manager.py +0 -0
  186. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp/session/__init__.py +0 -0
  187. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp/session/manager.py +0 -0
  188. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp/simple_process_manager.py +0 -0
  189. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp/simulation_server.py +0 -0
  190. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp_agent/__init__.py +0 -0
  191. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp_agent/config.py +0 -0
  192. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp_agent/main.py +0 -0
  193. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp_agent/orchestration/__init__.py +0 -0
  194. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp_agent/orchestration/base_client.py +0 -0
  195. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp_agent/orchestration/local_docker_client.py +0 -0
  196. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp_agent/orchestration/stdio_mcp_client_helper.py +0 -0
  197. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp_env.py +0 -0
  198. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp_servers/__init__.py +0 -0
  199. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp_servers/frozen_lake/frozen_lake_adapter.py +0 -0
  200. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp_servers/frozen_lake/frozen_lake_mcp.py +0 -0
  201. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp_servers/frozen_lake/server.py +0 -0
  202. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp_servers/tau2/README.md +0 -0
  203. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp_servers/tau2/__init__.py +0 -0
  204. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp_servers/tau2/airplane_environment/airline_environment.py +0 -0
  205. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp_servers/tau2/mock_environment/mock_environment.py +0 -0
  206. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp_servers/tau2/retail_environment/retail_environment.py +0 -0
  207. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp_servers/tau2/server.py +0 -0
  208. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp_servers/tau2/tau2_mcp.py +0 -0
  209. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp_servers/tau2/tests/system_prompts/airline_agent_system_prompt.md +0 -0
  210. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp_servers/tau2/tests/system_prompts/mock_agent_system_prompt.md +0 -0
  211. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp_servers/tau2/tests/system_prompts/retail_agent_system_prompt.md +0 -0
  212. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp_servers/tau2/tests/test_tau2_e2e.py +0 -0
  213. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/packaging.py +0 -0
  214. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/playback_policy.py +0 -0
  215. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/proxy/__init__.py +0 -0
  216. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/proxy/proxy_core/__init__.py +0 -0
  217. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/proxy/proxy_core/auth.py +0 -0
  218. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/proxy/proxy_core/langfuse.py +0 -0
  219. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/proxy/proxy_core/litellm.py +0 -0
  220. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/proxy/proxy_core/main.py +0 -0
  221. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/proxy/proxy_core/models.py +0 -0
  222. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/pytest/buffer.py +0 -0
  223. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/pytest/default_dataset_adapter.py +0 -0
  224. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/pytest/default_langchain_rollout_processor.py +0 -0
  225. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/pytest/default_mcp_gym_rollout_processor.py +0 -0
  226. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/pytest/default_no_op_rollout_processor.py +0 -0
  227. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/pytest/dual_mode_wrapper.py +0 -0
  228. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/pytest/elasticsearch_setup.py +0 -0
  229. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/pytest/evaluation_test_postprocess.py +0 -0
  230. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/pytest/exception_config.py +0 -0
  231. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/pytest/execution.py +0 -0
  232. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/pytest/generate_parameter_combinations.py +0 -0
  233. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/pytest/handle_persist_flow.py +0 -0
  234. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/pytest/parameterize.py +0 -0
  235. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/pytest/plugin.py +0 -0
  236. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/pytest/rollout_result_post_processor.py +0 -0
  237. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/pytest/store_experiment_link.py +0 -0
  238. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/pytest/store_results_url.py +0 -0
  239. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/pytest/tracing_utils.py +0 -0
  240. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/pytest/types.py +0 -0
  241. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/pytest/validate_signature.py +0 -0
  242. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/quickstart/__init__.py +0 -0
  243. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/quickstart/aha_judge/__init__.py +0 -0
  244. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/quickstart/aha_judge/llm_judge.py +0 -0
  245. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/quickstart/aha_judge/llm_judge_braintrust.py +0 -0
  246. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/quickstart/aha_judge/llm_judge_langfuse.py +0 -0
  247. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/quickstart/aha_judge/llm_judge_langsmith.py +0 -0
  248. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/quickstart/aha_judge/llm_judge_openai_responses.py +0 -0
  249. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/quickstart/aha_judge/utils.py +0 -0
  250. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/quickstart/llm_judge.py +0 -0
  251. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/quickstart/llm_judge_braintrust.py +0 -0
  252. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/quickstart/svg_agent/evaluator/test_svgagent.py +0 -0
  253. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/quickstart/svg_agent/evaluator/utils.py +0 -0
  254. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/quickstart/svg_agent/vercel_svg_server/api/init.py +0 -0
  255. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/quickstart/utils.py +0 -0
  256. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/resources.py +0 -0
  257. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/reward_function.py +0 -0
  258. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/rewards/__init__.py +0 -0
  259. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/rewards/accuracy.py +0 -0
  260. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/rewards/accuracy_length.py +0 -0
  261. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/rewards/apps_coding_reward.py +0 -0
  262. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/rewards/apps_execution_utils.py +0 -0
  263. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/rewards/apps_testing_util.py +0 -0
  264. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/rewards/bfcl_reward.py +0 -0
  265. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/rewards/code_execution.py +0 -0
  266. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/rewards/code_execution_utils.py +0 -0
  267. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/rewards/cpp_code.py +0 -0
  268. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/rewards/deepcoder_reward.py +0 -0
  269. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/rewards/format.py +0 -0
  270. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/rewards/function_calling.py +0 -0
  271. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/rewards/json_schema.py +0 -0
  272. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/rewards/language_consistency.py +0 -0
  273. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/rewards/lean_prover.py +0 -0
  274. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/rewards/length.py +0 -0
  275. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/rewards/list_comparison_math_reward.py +0 -0
  276. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/rewards/math.py +0 -0
  277. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/rewards/multiple_choice_math_reward.py +0 -0
  278. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/rewards/reasoning_steps.py +0 -0
  279. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/rewards/repetition.py +0 -0
  280. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/rewards/tag_count.py +0 -0
  281. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/rl_processing.py +0 -0
  282. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/server.py +0 -0
  283. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/stats/__init__.py +0 -0
  284. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/stats/confidence_intervals.py +0 -0
  285. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/typed_interface.py +0 -0
  286. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/types/__init__.py +0 -0
  287. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/types/errors.py +0 -0
  288. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/types/remote_rollout_processor.py +0 -0
  289. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/types/types.py +0 -0
  290. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/utils/__init__.py +0 -0
  291. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/utils/batch_evaluation.py +0 -0
  292. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/utils/batch_transformation.py +0 -0
  293. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/utils/browser_utils.py +0 -0
  294. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/utils/check_server_status.py +0 -0
  295. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/utils/dataset_helpers.py +0 -0
  296. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/utils/evaluation_row_utils.py +0 -0
  297. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/utils/logs_models.py +0 -0
  298. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/utils/logs_server.py +0 -0
  299. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/utils/module_loader.py +0 -0
  300. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/utils/packaging_utils.py +0 -0
  301. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/utils/show_results_url.py +0 -0
  302. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/utils/static_policy.py +0 -0
  303. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/utils/subprocess_utils.py +0 -0
  304. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/utils/vite_server.py +0 -0
  305. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol.egg-info/dependency_links.txt +0 -0
  306. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol.egg-info/entry_points.txt +0 -0
  307. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol.egg-info/top_level.txt +0 -0
  308. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/setup.cfg +0 -0
  309. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/setup.py +0 -0
  310. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_accuracy.py +0 -0
  311. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_accuracy_length.py +0 -0
  312. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_adapters_e2e.py +0 -0
  313. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_agent_orchestrator.py +0 -0
  314. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_agent_resources.py +0 -0
  315. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_batch_evaluation.py +0 -0
  316. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_cli_agent.py +0 -0
  317. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_cli_local_test.py +0 -0
  318. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_code_execution.py +0 -0
  319. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_config.py +0 -0
  320. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_control_plane_separation.py +0 -0
  321. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_cpp_code.py +0 -0
  322. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_data_driven_task_manager.py +0 -0
  323. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_deepcoder_reward.py +0 -0
  324. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_deepeval_integration.py +0 -0
  325. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_directory_utils.py +0 -0
  326. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_e2b_integration.py +0 -0
  327. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_e2b_js_integration.py +0 -0
  328. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_edge_cases.py +0 -0
  329. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_eval_protocol_import.py +0 -0
  330. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_evaluation_postprocess.py +0 -0
  331. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_event_bus.py +0 -0
  332. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_event_bus_helper.py +0 -0
  333. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_exception_config.py +0 -0
  334. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_exceptions.py +0 -0
  335. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_fireworks_api.py +0 -0
  336. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_format.py +0 -0
  337. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_fractional_code.py +0 -0
  338. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_function_calling.py +0 -0
  339. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_gcp_tools.py +0 -0
  340. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_generic_server.py +0 -0
  341. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_human_id.py +0 -0
  342. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_integration.py +0 -0
  343. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_json_schema.py +0 -0
  344. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_kwargs_validation.py +0 -0
  345. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_language_consistency.py +0 -0
  346. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_lean_prover.py +0 -0
  347. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_lean_prover_runner.py +0 -0
  348. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_length.py +0 -0
  349. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_list_comparison_math_reward.py +0 -0
  350. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_litellm_policy_provider_fields.py +0 -0
  351. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_logs_server.py +0 -0
  352. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_logs_server_simple.py +0 -0
  353. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_math.py +0 -0
  354. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_message_field_filtering.py +0 -0
  355. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_models_rl.py +0 -0
  356. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_multiple_choice_math_reward.py +0 -0
  357. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_n_variant_batch_integration.py +0 -0
  358. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_n_variant_integration.py +0 -0
  359. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_openai_compatibility.py +0 -0
  360. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_openai_rft_integration.py +0 -0
  361. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_openeval_integration.py +0 -0
  362. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_packaging.py +0 -0
  363. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_parallel_rollouts.py +0 -0
  364. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_platform_api.py +0 -0
  365. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_quickstart_utils.py +0 -0
  366. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_readiness.py +0 -0
  367. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_reasoning_steps.py +0 -0
  368. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_repetition.py +0 -0
  369. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_repetition_debug.py +0 -0
  370. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_retry_mechanism.py +0 -0
  371. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_reward_function.py +0 -0
  372. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_reward_protocol_import.py +0 -0
  373. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_rl_processing.py +0 -0
  374. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_rollout_control_plane_integration.py +0 -0
  375. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_server.py +0 -0
  376. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_show_results_url.py +0 -0
  377. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_status_migration_changes.py +0 -0
  378. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_status_migration_integration.py +0 -0
  379. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_status_model.py +0 -0
  380. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_tag_count.py +0 -0
  381. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_tau_bench_airline_smoke.py +0 -0
  382. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_typed_interface.py +0 -0
  383. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_typed_interface_rl.py +0 -0
  384. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_upload_entrypoint.py +0 -0
  385. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_url_handling.py +0 -0
  386. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_vite_server.py +0 -0
  387. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/__init__.py +0 -0
  388. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/agent/__init__.py +0 -0
  389. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/agent/base.py +0 -0
  390. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/agent/llm_agent.py +0 -0
  391. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/api_service/__init__.py +0 -0
  392. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/api_service/api_config.py +0 -0
  393. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/api_service/data_model.py +0 -0
  394. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/api_service/simulation_service.py +0 -0
  395. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/cli.py +0 -0
  396. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/config.py +0 -0
  397. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/data/domains/airline/policy.md +0 -0
  398. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/data/domains/mock/policy.md +0 -0
  399. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/data/domains/mock/policy_solo.md +0 -0
  400. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/data/domains/retail/policy.md +0 -0
  401. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/data/domains/telecom/main_policy.md +0 -0
  402. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/data/domains/telecom/main_policy_solo.md +0 -0
  403. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/data/domains/telecom/tech_support_manual.md +0 -0
  404. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/data/domains/telecom/tech_support_workflow.md +0 -0
  405. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/data/domains/telecom/tech_support_workflow_solo.md +0 -0
  406. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/data/user_simulator/simulation_guidelines.md +0 -0
  407. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/data/user_simulator/simulation_guidelines_tools.md +0 -0
  408. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/data_model/__init__.py +0 -0
  409. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/data_model/message.py +0 -0
  410. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/data_model/simulation.py +0 -0
  411. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/data_model/tasks.py +0 -0
  412. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/domains/__init__.py +0 -0
  413. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/domains/airline/__init__.py +0 -0
  414. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/domains/airline/data_model.py +0 -0
  415. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/domains/airline/environment.py +0 -0
  416. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/domains/airline/tools.py +0 -0
  417. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/domains/airline/utils.py +0 -0
  418. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/domains/mock/__init__.py +0 -0
  419. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/domains/mock/data_model.py +0 -0
  420. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/domains/mock/environment.py +0 -0
  421. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/domains/mock/tools.py +0 -0
  422. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/domains/mock/utils.py +0 -0
  423. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/domains/retail/__init__.py +0 -0
  424. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/domains/retail/data_model.py +0 -0
  425. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/domains/retail/environment.py +0 -0
  426. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/domains/retail/tools.py +0 -0
  427. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/domains/retail/utils.py +0 -0
  428. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/domains/telecom/__init__.py +0 -0
  429. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/domains/telecom/data_model.py +0 -0
  430. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/domains/telecom/environment.py +0 -0
  431. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/domains/telecom/tasks/__init__.py +0 -0
  432. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/domains/telecom/tasks/const.py +0 -0
  433. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/domains/telecom/tasks/create_tasks.py +0 -0
  434. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/domains/telecom/tasks/manager.py +0 -0
  435. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/domains/telecom/tasks/mms_issues.py +0 -0
  436. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/domains/telecom/tasks/mobile_data_issues.py +0 -0
  437. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/domains/telecom/tasks/service_issues.py +0 -0
  438. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/domains/telecom/tasks/utils.py +0 -0
  439. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/domains/telecom/tools.py +0 -0
  440. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/domains/telecom/user_data_model.py +0 -0
  441. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/domains/telecom/user_tools.py +0 -0
  442. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/domains/telecom/utils.py +0 -0
  443. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/environment/__init__.py +0 -0
  444. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/environment/db.py +0 -0
  445. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/environment/environment.py +0 -0
  446. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/environment/server.py +0 -0
  447. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/environment/tool.py +0 -0
  448. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/environment/toolkit.py +0 -0
  449. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/environment/utils/interface_agent.py +0 -0
  450. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/evaluator/__init__.py +0 -0
  451. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/evaluator/evaluator.py +0 -0
  452. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/evaluator/evaluator_action.py +0 -0
  453. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/evaluator/evaluator_base.py +0 -0
  454. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/evaluator/evaluator_communicate.py +0 -0
  455. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/evaluator/evaluator_env.py +0 -0
  456. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/evaluator/evaluator_nl_assertions.py +0 -0
  457. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/metrics/__init__.py +0 -0
  458. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/metrics/agent_metrics.py +0 -0
  459. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/metrics/break_down_metrics.py +0 -0
  460. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/orchestrator/__init__.py +0 -0
  461. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/orchestrator/environment_manager.py +0 -0
  462. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/orchestrator/orchestrator.py +0 -0
  463. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/orchestrator/utils.py +0 -0
  464. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/registry.py +0 -0
  465. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/run.py +0 -0
  466. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/scripts/__init__.py +0 -0
  467. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/scripts/check_data.py +0 -0
  468. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/scripts/show_domain_doc.py +0 -0
  469. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/scripts/start_servers.py +0 -0
  470. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/scripts/view_simulations.py +0 -0
  471. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/user/__init__.py +0 -0
  472. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/user/base.py +0 -0
  473. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/user/user_simulator.py +0 -0
  474. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/utils/__init__.py +0 -0
  475. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/utils/display.py +0 -0
  476. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/utils/io_utils.py +0 -0
  477. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/utils/llm_utils.py +0 -0
  478. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/utils/pydantic_utils.py +0 -0
  479. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/utils/utils.py +0 -0
  480. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/versioneer.py +0 -0
  481. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vite-app/dist/assets/favicon-BkAAWQga.png +0 -0
  482. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vite-app/dist/assets/index-iZp_HgyW.css +0 -0
  483. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vite-app/dist/assets/logo-light-BprIBJQW.png +0 -0
  484. {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vite-app/dist/index.html +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: eval-protocol
3
- Version: 0.2.98.dev1
3
+ Version: 0.3.9.dev1
4
4
  Summary: The official Python SDK for Eval Protocol (EP.) EP is an open protocol that standardizes how developers author evals for large language model (LLM) applications.
5
5
  Author-email: Fireworks AI <info@fireworks.ai>
6
6
  License-Expression: MIT
@@ -29,6 +29,7 @@ Requires-Dist: pytest>=6.0.0
29
29
  Requires-Dist: pytest-asyncio>=0.21.0
30
30
  Requires-Dist: peewee>=3.18.2
31
31
  Requires-Dist: backoff>=2.2.0
32
+ Requires-Dist: fireworks-ai==1.0.0a20
32
33
  Requires-Dist: questionary>=2.0.0
33
34
  Requires-Dist: toml>=0.10.0
34
35
  Requires-Dist: loguru>=0.6.0
@@ -72,8 +73,6 @@ Requires-Dist: transformers>=4.0.0; extra == "trl"
72
73
  Requires-Dist: accelerate>=0.28.0; extra == "trl"
73
74
  Provides-Extra: openevals
74
75
  Requires-Dist: openevals>=0.1.0; extra == "openevals"
75
- Provides-Extra: fireworks
76
- Requires-Dist: fireworks-ai>=0.19.19; extra == "fireworks"
77
76
  Provides-Extra: box2d
78
77
  Requires-Dist: swig; extra == "box2d"
79
78
  Requires-Dist: gymnasium[box2d]>=0.29.0; extra == "box2d"
@@ -102,6 +101,10 @@ Provides-Extra: braintrust
102
101
  Requires-Dist: braintrust[otel]; extra == "braintrust"
103
102
  Provides-Extra: openenv
104
103
  Requires-Dist: openenv-core; extra == "openenv"
104
+ Provides-Extra: dspy
105
+ Requires-Dist: dspy>=3.0.0; extra == "dspy"
106
+ Provides-Extra: klavis
107
+ Requires-Dist: klavis>=2.18.0; extra == "klavis"
105
108
  Provides-Extra: langgraph
106
109
  Requires-Dist: langgraph>=0.6.7; extra == "langgraph"
107
110
  Requires-Dist: langchain-core>=0.3.75; extra == "langgraph"
@@ -8,11 +8,11 @@ import json
8
8
 
9
9
  version_json = '''
10
10
  {
11
- "date": "2025-12-15T16:40:32-0800",
11
+ "date": "2026-01-08T13:29:17-0800",
12
12
  "dirty": false,
13
13
  "error": null,
14
- "full-revisionid": "438a49431d16626a8e883cfb04afecfb188eb9dc",
15
- "version": "0.2.98.dev.1"
14
+ "full-revisionid": "764ac4f132c35fe01c354b4150cbc19c7eedea12",
15
+ "version": "0.3.9.dev.1"
16
16
  }
17
17
  ''' # END VERSION_JSON
18
18
 
@@ -99,3 +99,19 @@ try:
99
99
  __all__.extend(["WeaveAdapter"])
100
100
  except ImportError:
101
101
  pass
102
+
103
+ # DataFrame adapter (pandas integration for Lilac, etc.)
104
+ try:
105
+ from .dataframe import (
106
+ evaluation_rows_to_dataframe,
107
+ dataframe_to_evaluation_rows,
108
+ )
109
+
110
+ __all__.extend(
111
+ [
112
+ "evaluation_rows_to_dataframe",
113
+ "dataframe_to_evaluation_rows",
114
+ ]
115
+ )
116
+ except ImportError:
117
+ pass
@@ -0,0 +1,66 @@
1
+ """
2
+ Pandas DataFrame adapter for Eval Protocol.
3
+
4
+ This module provides utilities for converting between EvaluationRow format
5
+ and pandas DataFrame format, enabling integration with data curation tools
6
+ such as Lilac, Great Expectations, or any pandas-based workflow.
7
+
8
+ Example usage:
9
+ >>> from eval_protocol.adapters.dataframe import (
10
+ ... evaluation_rows_to_dataframe,
11
+ ... dataframe_to_evaluation_rows,
12
+ ... )
13
+ >>>
14
+ >>> # Convert EvaluationRows to DataFrame
15
+ >>> df = evaluation_rows_to_dataframe(rows)
16
+ >>>
17
+ >>> # Convert back to EvaluationRows
18
+ >>> rows = dataframe_to_evaluation_rows(df)
19
+ """
20
+
21
+ from __future__ import annotations
22
+
23
+ import logging
24
+
25
+ import pandas as pd
26
+
27
+ from ..models import EvaluationRow
28
+
29
+ logger = logging.getLogger(__name__)
30
+
31
+
32
+ def evaluation_rows_to_dataframe(rows: list[EvaluationRow]) -> pd.DataFrame:
33
+ """Convert EvaluationRows to a pandas DataFrame.
34
+
35
+ Uses EvaluationRow.to_dict() for serialization.
36
+
37
+ Args:
38
+ rows: List of EvaluationRow objects
39
+
40
+ Returns:
41
+ DataFrame with 'data_json' containing serialized rows plus convenience fields
42
+ """
43
+ records = [row.to_dict() for row in rows]
44
+ return pd.DataFrame(records)
45
+
46
+
47
+ def dataframe_to_evaluation_rows(df: pd.DataFrame) -> list[EvaluationRow]:
48
+ """Convert a pandas DataFrame back to EvaluationRows.
49
+
50
+ Uses EvaluationRow.from_dict() for deserialization.
51
+
52
+ Args:
53
+ df: DataFrame with 'data_json' column containing serialized EvaluationRows
54
+
55
+ Returns:
56
+ List of EvaluationRow objects
57
+ """
58
+ rows = []
59
+ for _, row_data in df.iterrows():
60
+ try:
61
+ row = EvaluationRow.from_dict(row_data.to_dict())
62
+ rows.append(row)
63
+ except Exception as e:
64
+ logger.warning(f"Failed to convert row: {e}")
65
+ continue
66
+ return rows
@@ -268,7 +268,7 @@ class FireworksTracingAdapter(BaseAdapter):
268
268
  def search_logs(self, tags: List[str], limit: int = 100, hours_back: int = 24) -> List[Dict[str, Any]]:
269
269
  """Fetch logs from Fireworks tracing gateway /logs endpoint.
270
270
 
271
- Returns entries with keys: timestamp, message, severity, tags.
271
+ Returns entries with keys: timestamp, message, severity, tags, status, extras.
272
272
  """
273
273
  if not tags:
274
274
  raise ValueError("At least one tag is required to fetch logs")
@@ -315,6 +315,7 @@ class FireworksTracingAdapter(BaseAdapter):
315
315
  "severity": e.get("severity", "INFO"),
316
316
  "tags": e.get("tags", []),
317
317
  "status": e.get("status"),
318
+ "extras": e.get("extras"),
318
319
  }
319
320
  )
320
321
  return results
@@ -12,6 +12,50 @@ from typing import Any, Dict, List, Optional
12
12
  from ..resource_abc import ForkableResource
13
13
 
14
14
 
15
+ # SQLite connection settings for hardened concurrency safety
16
+ SQLITE_CONNECTION_TIMEOUT = 30 # 30 seconds
17
+
18
+
19
+ def _apply_hardened_pragmas(conn: sqlite3.Connection) -> None:
20
+ """Apply hardened SQLite pragmas for concurrency safety."""
21
+ conn.execute("PRAGMA journal_mode=WAL") # Write-Ahead Logging
22
+ conn.execute("PRAGMA synchronous=NORMAL") # Balance safety and performance
23
+ conn.execute("PRAGMA busy_timeout=30000") # 30 second timeout
24
+ conn.execute("PRAGMA wal_autocheckpoint=1000") # Checkpoint every 1000 pages
25
+ conn.execute("PRAGMA cache_size=-64000") # 64MB cache
26
+ conn.execute("PRAGMA foreign_keys=ON") # Enable foreign key constraints
27
+ conn.execute("PRAGMA temp_store=MEMORY") # Store temp tables in memory
28
+
29
+
30
+ def _checkpoint_and_copy_database(
31
+ source_path: Path, dest_path: Path, timeout: int = SQLITE_CONNECTION_TIMEOUT
32
+ ) -> None:
33
+ """
34
+ Safely copy a SQLite database by checkpointing WAL first.
35
+
36
+ In WAL mode, data may exist in the -wal file that hasn't been written
37
+ to the main database file. This function performs a TRUNCATE checkpoint
38
+ to flush all WAL data to the main file before copying, ensuring a
39
+ complete and consistent copy.
40
+
41
+ Args:
42
+ source_path: Path to the source database file.
43
+ dest_path: Path where the copy should be created.
44
+ timeout: Connection timeout in seconds.
45
+ """
46
+ # First, checkpoint the WAL to ensure all data is in the main file
47
+ conn = sqlite3.connect(str(source_path), timeout=timeout)
48
+ try:
49
+ # TRUNCATE mode: checkpoint and truncate the WAL file to zero bytes
50
+ # This ensures all data is flushed to the main database file
51
+ conn.execute("PRAGMA wal_checkpoint(TRUNCATE)")
52
+ finally:
53
+ conn.close()
54
+
55
+ # Now safely copy just the main database file
56
+ shutil.copyfile(str(source_path), str(dest_path))
57
+
58
+
15
59
  class SQLResource(ForkableResource):
16
60
  """
17
61
  A ForkableResource for managing SQL database states, primarily SQLite.
@@ -20,6 +64,8 @@ class SQLResource(ForkableResource):
20
64
  and seed data, forked (by copying the DB file), checkpointed (by copying),
21
65
  and restored.
22
66
 
67
+ Uses hardened SQLite settings for concurrency safety.
68
+
23
69
  Attributes:
24
70
  _config (Dict[str, Any]): Configuration for the resource.
25
71
  _db_path (Optional[Path]): Path to the current SQLite database file.
@@ -38,8 +84,14 @@ class SQLResource(ForkableResource):
38
84
  def _get_db_connection(self) -> sqlite3.Connection:
39
85
  if not self._db_path:
40
86
  raise ConnectionError("Database path not set. Call setup() or fork() first.")
41
- # Set timeout to prevent indefinite hangs
42
- return sqlite3.connect(str(self._db_path), timeout=10)
87
+ # Set timeout to prevent indefinite hangs with hardened settings
88
+ conn = sqlite3.connect(
89
+ str(self._db_path),
90
+ timeout=SQLITE_CONNECTION_TIMEOUT,
91
+ isolation_level="DEFERRED", # Better for concurrent access
92
+ )
93
+ _apply_hardened_pragmas(conn)
94
+ return conn
43
95
 
44
96
  async def setup(self, config: Dict[str, Any]) -> None:
45
97
  """
@@ -111,7 +163,8 @@ class SQLResource(ForkableResource):
111
163
  forked_db_name = f"fork_{uuid.uuid4().hex}.sqlite"
112
164
  forked_resource._db_path = self._temp_dir / forked_db_name
113
165
 
114
- shutil.copyfile(str(self._db_path), str(forked_resource._db_path))
166
+ # Use checkpoint-and-copy to ensure WAL data is flushed before copying
167
+ _checkpoint_and_copy_database(self._db_path, forked_resource._db_path)
115
168
  return forked_resource
116
169
 
117
170
  async def checkpoint(self) -> Dict[str, Any]:
@@ -125,7 +178,8 @@ class SQLResource(ForkableResource):
125
178
 
126
179
  checkpoint_name = f"checkpoint_{self._db_path.stem}_{uuid.uuid4().hex}.sqlite"
127
180
  checkpoint_path = self._temp_dir / checkpoint_name
128
- shutil.copyfile(str(self._db_path), str(checkpoint_path))
181
+ # Use checkpoint-and-copy to ensure WAL data is flushed before copying
182
+ _checkpoint_and_copy_database(self._db_path, checkpoint_path)
129
183
  return {"db_type": "sqlite", "checkpoint_path": str(checkpoint_path)}
130
184
 
131
185
  async def restore(self, state_data: Dict[str, Any]) -> None:
@@ -147,7 +201,8 @@ class SQLResource(ForkableResource):
147
201
  if not self._db_path:
148
202
  self._db_path = self._temp_dir / f"restored_{uuid.uuid4().hex}.sqlite"
149
203
 
150
- shutil.copyfile(str(checkpoint_path), str(self._db_path))
204
+ # Use checkpoint-and-copy to ensure WAL data is flushed before copying
205
+ _checkpoint_and_copy_database(checkpoint_path, self._db_path)
151
206
  self._base_db_path = self._db_path # The restored state becomes the new base for future forks
152
207
 
153
208
  async def step(self, action_name: str, action_params: Dict[str, Any]) -> Any:
@@ -0,0 +1,106 @@
1
+ import logging
2
+ import os
3
+ from typing import Optional
4
+
5
+ import requests
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+
10
+ def get_fireworks_api_key() -> Optional[str]:
11
+ """
12
+ Retrieves the Fireworks API key.
13
+
14
+ Returns:
15
+ The API key if found, otherwise None.
16
+ """
17
+ api_key = os.environ.get("FIREWORKS_API_KEY")
18
+ if api_key and api_key.strip():
19
+ logger.debug("Using FIREWORKS_API_KEY from environment variable.")
20
+ return api_key.strip()
21
+ logger.debug("Fireworks API key not found in environment variables.")
22
+ return None
23
+
24
+
25
+ def get_fireworks_account_id() -> Optional[str]:
26
+ """
27
+ Retrieves the Fireworks Account ID.
28
+
29
+ Returns:
30
+ The Account ID if found, otherwise None.
31
+ """
32
+ # Account id is derived from the API key (single source of truth).
33
+ try:
34
+ api_key_for_verify = get_fireworks_api_key()
35
+ if api_key_for_verify:
36
+ resolved = verify_api_key_and_get_account_id(api_key=api_key_for_verify, api_base=get_fireworks_api_base())
37
+ if resolved:
38
+ logger.debug("Resolved account id via verifyApiKey: %s", resolved)
39
+ return resolved
40
+ except Exception as e:
41
+ logger.debug("Failed to resolve account id via verifyApiKey: %s", e)
42
+
43
+ logger.debug("Fireworks Account ID not found via verifyApiKey.")
44
+ return None
45
+
46
+
47
+ def get_fireworks_api_base() -> str:
48
+ """
49
+ Retrieves the Fireworks API base URL.
50
+
51
+ The base URL is sourced from the FIREWORKS_API_BASE environment variable.
52
+ If not set, it defaults to "https://api.fireworks.ai".
53
+
54
+ Returns:
55
+ The API base URL.
56
+ """
57
+ api_base = os.environ.get("FIREWORKS_API_BASE", "https://api.fireworks.ai")
58
+ if os.environ.get("FIREWORKS_API_BASE"):
59
+ logger.debug("Using FIREWORKS_API_BASE from environment variable.")
60
+ else:
61
+ logger.debug("FIREWORKS_API_BASE not set in environment, defaulting to %s.", api_base)
62
+ return api_base
63
+
64
+
65
+ def verify_api_key_and_get_account_id(
66
+ api_key: Optional[str] = None,
67
+ api_base: Optional[str] = None,
68
+ ) -> Optional[str]:
69
+ """
70
+ Calls the Fireworks API verify endpoint to validate the API key and returns the
71
+ account id from response headers when available.
72
+
73
+ Args:
74
+ api_key: Optional explicit API key. When None, resolves via get_fireworks_api_key().
75
+ api_base: Optional explicit API base. When None, resolves via get_fireworks_api_base().
76
+
77
+ Returns:
78
+ The resolved account id if verification succeeds and the header is present; otherwise None.
79
+ """
80
+ try:
81
+ resolved_key = api_key or get_fireworks_api_key()
82
+ if not resolved_key:
83
+ return None
84
+ resolved_base = api_base or get_fireworks_api_base()
85
+
86
+ from .common_utils import get_user_agent
87
+
88
+ url = f"{resolved_base.rstrip('/')}/verifyApiKey"
89
+ headers = {
90
+ "Authorization": f"Bearer {resolved_key}",
91
+ "User-Agent": get_user_agent(),
92
+ }
93
+ resp = requests.get(url, headers=headers, timeout=10)
94
+
95
+ if resp.status_code != 200:
96
+ logger.debug("verifyApiKey returned status %s", resp.status_code)
97
+ return None
98
+ # Header keys could vary in case; requests provides case-insensitive dict
99
+ account_id = resp.headers.get("x-fireworks-account-id") or resp.headers.get("X-Fireworks-Account-Id")
100
+ if account_id and account_id.strip():
101
+ logger.debug("Resolved account id via verifyApiKey: %s", account_id)
102
+ return account_id.strip()
103
+ return None
104
+ except Exception as e:
105
+ logger.debug("Failed to verify API key for account id resolution: %s", e)
106
+ return None