benchflow 0.5.2.dev875__tar.gz → 0.5.3.dev879__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (397) hide show
  1. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/CHANGELOG.md +10 -0
  2. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/PKG-INFO +4 -4
  3. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/README.md +3 -3
  4. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/pyproject.toml +1 -1
  5. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/.gitignore +0 -0
  6. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/LICENSE +0 -0
  7. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/__init__.py +0 -0
  8. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/_dotenv.py +0 -0
  9. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/_paths.py +0 -0
  10. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/_run.py +0 -0
  11. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/_types.py +0 -0
  12. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/_utils/__init__.py +0 -0
  13. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/_utils/benchmark_repos.py +0 -0
  14. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/_utils/config.py +0 -0
  15. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/_utils/evaluation_results.py +0 -0
  16. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/_utils/json_safe.py +0 -0
  17. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/_utils/learner_memory.py +0 -0
  18. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/_utils/result_metadata.py +0 -0
  19. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/_utils/reward_events.py +0 -0
  20. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/_utils/scoring.py +0 -0
  21. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/_utils/source_provenance.py +0 -0
  22. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/_utils/task_authoring.py +0 -0
  23. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/_utils/yaml_loader.py +0 -0
  24. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/acp/__init__.py +0 -0
  25. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/acp/client.py +0 -0
  26. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/acp/container_transport.py +0 -0
  27. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/acp/runtime.py +0 -0
  28. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/acp/session.py +0 -0
  29. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/acp/transport.py +0 -0
  30. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/acp/types.py +0 -0
  31. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/adapters/__init__.py +0 -0
  32. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/adapters/harbor.py +0 -0
  33. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/adapters/inbound.py +0 -0
  34. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/adapters/inspect_ai.py +0 -0
  35. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/adapters/ors.py +0 -0
  36. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/adapters/terminal_bench.py +0 -0
  37. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/agents/__init__.py +0 -0
  38. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/agents/codex_config.py +0 -0
  39. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/agents/credentials.py +0 -0
  40. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/agents/env.py +0 -0
  41. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/agents/errors.py +0 -0
  42. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/agents/harvey_lab_acp_shim.py +0 -0
  43. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/agents/install.py +0 -0
  44. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/agents/openclaw_acp_shim.py +0 -0
  45. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/agents/pi_acp_launcher.py +0 -0
  46. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/agents/protocol.py +0 -0
  47. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/agents/providers.py +0 -0
  48. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/agents/registry.py +0 -0
  49. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/branch.py +0 -0
  50. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/cli/__init__.py +0 -0
  51. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/cli/main.py +0 -0
  52. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/cli/trace_import.py +0 -0
  53. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/compat/__init__.py +0 -0
  54. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/compat/harbor_registry.py +0 -0
  55. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/contracts/__init__.py +0 -0
  56. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/contracts/planes.py +0 -0
  57. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/contracts/user.py +0 -0
  58. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/demo_task/environment/Dockerfile +0 -0
  59. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/demo_task/instruction.md +0 -0
  60. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/demo_task/task.toml +0 -0
  61. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/demo_task/tests/test.sh +0 -0
  62. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/diagnostics.py +0 -0
  63. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/environment/__init__.py +0 -0
  64. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/environment/manifest.py +0 -0
  65. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/environment/manifest_env.py +0 -0
  66. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/environment/protocol.py +0 -0
  67. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/environment/readiness.py +0 -0
  68. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/eval_sharding.py +0 -0
  69. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/eval_worker.py +0 -0
  70. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/evaluation.py +0 -0
  71. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/experimental/__init__.py +0 -0
  72. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/experimental/mcp/__init__.py +0 -0
  73. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/experimental/mcp/hooks.py +0 -0
  74. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/experimental/mcp/reviewer_server.py +0 -0
  75. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/hosted_env.py +0 -0
  76. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/learner_skills.py +0 -0
  77. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/learner_store.py +0 -0
  78. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/metrics.py +0 -0
  79. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/models.py +0 -0
  80. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/monitor.py +0 -0
  81. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/providers/__init__.py +0 -0
  82. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/providers/litellm_bedrock_patch.py +0 -0
  83. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/providers/litellm_config.py +0 -0
  84. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/providers/litellm_logging.py +0 -0
  85. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/providers/litellm_runtime.py +0 -0
  86. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/providers/runtime.py +0 -0
  87. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/py.typed +0 -0
  88. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/rewards/README.md +0 -0
  89. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/rewards/__init__.py +0 -0
  90. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/rewards/builtins.py +0 -0
  91. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/rewards/events.py +0 -0
  92. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/rewards/file_readers.py +0 -0
  93. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/rewards/llm.py +0 -0
  94. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/rewards/memory_scorer.py +0 -0
  95. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/rewards/node.py +0 -0
  96. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/rewards/protocol.py +0 -0
  97. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/rewards/rubric.py +0 -0
  98. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/rewards/rubric_config.py +0 -0
  99. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/rewards/validation.py +0 -0
  100. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/rollout.py +0 -0
  101. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/rollout_branch.py +0 -0
  102. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/rollout_planes.py +0 -0
  103. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/runtime.py +0 -0
  104. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/sandbox/__init__.py +0 -0
  105. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/sandbox/_base.py +0 -0
  106. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/sandbox/_compose.py +0 -0
  107. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/sandbox/_compose_files/docker-compose-base.yaml +0 -0
  108. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/sandbox/_compose_files/docker-compose-build.yaml +0 -0
  109. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/sandbox/_compose_files/docker-compose-no-network.yaml +0 -0
  110. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/sandbox/_compose_files/docker-compose-prebuilt.yaml +0 -0
  111. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/sandbox/_sdk_ops.py +0 -0
  112. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/sandbox/daytona.py +0 -0
  113. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/sandbox/docker.py +0 -0
  114. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/sandbox/lockdown.py +0 -0
  115. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/sandbox/metadata.py +0 -0
  116. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/sandbox/modal_impl.py +0 -0
  117. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/sandbox/process.py +0 -0
  118. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/sandbox/protocol.py +0 -0
  119. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/sandbox/services.py +0 -0
  120. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/sandbox/setup.py +0 -0
  121. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/sandbox/snapshot.py +0 -0
  122. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/sandbox/user.py +0 -0
  123. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/scenes.py +0 -0
  124. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/sdk.py +0 -0
  125. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/self_gen.py +0 -0
  126. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/skill_eval/__init__.py +0 -0
  127. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/skill_eval/_core.py +0 -0
  128. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/skill_eval/gepa_export.py +0 -0
  129. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/skill_eval/schema.py +0 -0
  130. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/skill_policy.py +0 -0
  131. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/skills.py +0 -0
  132. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/task/__init__.py +0 -0
  133. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/task/config.py +0 -0
  134. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/task/env.py +0 -0
  135. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/task/paths.py +0 -0
  136. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/task/task.py +0 -0
  137. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/task/verifier.py +0 -0
  138. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/templates/__init__.py +0 -0
  139. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/templates/judge.py.tmpl +0 -0
  140. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/templates/test.sh.tmpl +0 -0
  141. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/traces/__init__.py +0 -0
  142. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/traces/huggingface.py +0 -0
  143. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/traces/local.py +0 -0
  144. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/traces/models.py +0 -0
  145. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/traces/parsers.py +0 -0
  146. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/traces/task_gen.py +0 -0
  147. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/trajectories/__init__.py +0 -0
  148. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/trajectories/_capture.py +0 -0
  149. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/trajectories/export.py +0 -0
  150. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/trajectories/metrics.py +0 -0
  151. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/trajectories/otel.py +0 -0
  152. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/trajectories/tree.py +0 -0
  153. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/trajectories/types.py +0 -0
  154. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/trajectories/viewer.py +0 -0
  155. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/usage_tracking.py +0 -0
  156. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/__init__.py +0 -0
  157. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/agents/__init__.py +0 -0
  158. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/agents/test_protocol.py +0 -0
  159. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/conformance/README.md +0 -0
  160. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/conformance/acp_smoke/environment/Dockerfile +0 -0
  161. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/conformance/acp_smoke/environment/docker-compose.yaml +0 -0
  162. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/conformance/acp_smoke/environment/skills/conformance-writer/SKILL.md +0 -0
  163. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/conformance/acp_smoke/instruction.md +0 -0
  164. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/conformance/acp_smoke/solution/solve.sh +0 -0
  165. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/conformance/acp_smoke/task.toml +0 -0
  166. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/conformance/acp_smoke/tests/test.sh +0 -0
  167. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/conformance/conformance-results.json +0 -0
  168. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/conformance/proof_multi_agent.py +0 -0
  169. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/conformance/proof_snapshot.py +0 -0
  170. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/conformance/run_conformance.py +0 -0
  171. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/conformance/self_gen_smoke_skills/skill-creator/SKILL.md +0 -0
  172. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/conftest.py +0 -0
  173. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/environment/__init__.py +0 -0
  174. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/environment/test_chibench_manifest.py +0 -0
  175. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/environment/test_clawsbench_manifest.py +0 -0
  176. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/environment/test_manifest.py +0 -0
  177. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/environment/test_manifest_env.py +0 -0
  178. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/environment/test_protocol.py +0 -0
  179. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/environment/test_readiness.py +0 -0
  180. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/examples/hello-world-task/environment/Dockerfile +0 -0
  181. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/examples/hello-world-task/instruction.md +0 -0
  182. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/examples/hello-world-task/solution/solve.sh +0 -0
  183. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/examples/hello-world-task/task.toml +0 -0
  184. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/examples/hello-world-task/tests/test.sh +0 -0
  185. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/examples/terminal-bench-smoke-task/environment/Dockerfile +0 -0
  186. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/examples/terminal-bench-smoke-task/instruction.md +0 -0
  187. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/examples/terminal-bench-smoke-task/solution/solve.sh +0 -0
  188. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/examples/terminal-bench-smoke-task/task.toml +0 -0
  189. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/examples/terminal-bench-smoke-task/tests/test.sh +0 -0
  190. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/examples/terminal-bench-smoke-task/tests/test_state.py +0 -0
  191. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/examples/test_claude.sh +0 -0
  192. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/examples/test_codex.sh +0 -0
  193. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/examples/test_codex_custom_provider.sh +0 -0
  194. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/examples/test_gemini.sh +0 -0
  195. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/examples/test_openclaw.sh +0 -0
  196. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/examples/traces/minimal-claude.jsonl +0 -0
  197. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/examples/traces/minimal-opentraces.jsonl +0 -0
  198. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/fixtures/mock_acp_agent.py +0 -0
  199. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/fixtures/mock_acp_agent_interleaved.py +0 -0
  200. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/fixtures/mock_acp_agent_multi_turn.py +0 -0
  201. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/fixtures/mock_openai_responses_server.py +0 -0
  202. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/integration/check_adapter_evidence.py +0 -0
  203. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/integration/check_hosted_env_evidence.py +0 -0
  204. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/integration/check_results.py +0 -0
  205. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/integration/check_skillsbench_harbor_parity.py +0 -0
  206. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/integration/check_trace_to_task_evidence.py +0 -0
  207. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/integration/configs/claude-agent-acp.yaml +0 -0
  208. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/integration/configs/codex-acp.yaml +0 -0
  209. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/integration/configs/gemini.yaml +0 -0
  210. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/integration/configs/harvey-lab-harness.yaml +0 -0
  211. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/integration/configs/openclaw.yaml +0 -0
  212. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/integration/configs/opencode.yaml +0 -0
  213. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/integration/configs/openhands.yaml +0 -0
  214. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/integration/configs/pi-acp.yaml +0 -0
  215. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/integration/run.sh +0 -0
  216. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/integration/run_suite.py +0 -0
  217. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/integration/suites/release.yaml +0 -0
  218. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_acp.py +0 -0
  219. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_acp_capability_advertising.py +0 -0
  220. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_acp_model_config_dispatch.py +0 -0
  221. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_acp_pinned_protocol_guard.py +0 -0
  222. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_acp_setup_failure_propagation.py +0 -0
  223. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_adapter_scripts.py +0 -0
  224. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_adapters.py +0 -0
  225. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_agent_cli.py +0 -0
  226. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_agent_env_resolution.py +0 -0
  227. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_agent_gemini_defaults.py +0 -0
  228. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_agent_idle_timeout_cli.py +0 -0
  229. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_agent_model_decouple.py +0 -0
  230. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_agent_registry.py +0 -0
  231. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_agent_setup.py +0 -0
  232. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_agent_spec.py +0 -0
  233. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_base_install_imports.py +0 -0
  234. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_bedrock_thinking.py +0 -0
  235. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_branch.py +0 -0
  236. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_capture_trajectory.py +0 -0
  237. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_clawsbench_slice.py +0 -0
  238. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_cli_daytona.py +0 -0
  239. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_cli_docs_drift.py +0 -0
  240. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_cli_misc.py +0 -0
  241. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_compat_harbor_registry.py +0 -0
  242. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_config_redaction.py +0 -0
  243. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_connect_as_env.py +0 -0
  244. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_continuallearningbench_adapter.py +0 -0
  245. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_dashboard_credential_env_scrub.py +0 -0
  246. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_dashboard_daytona_key.py +0 -0
  247. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_dashboard_no_host_paths.py +0 -0
  248. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_dashboard_release_evidence.py +0 -0
  249. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_dashboard_roadmap.py +0 -0
  250. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_dashboard_symlink_ingestion.py +0 -0
  251. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_dashboard_sync.py +0 -0
  252. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_daytona_command_polling.py +0 -0
  253. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_daytona_litellm_runtime.py +0 -0
  254. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_daytona_status.py +0 -0
  255. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_docker_prune_scoping.py +0 -0
  256. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_docker_uploads.py +0 -0
  257. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_docs_examples.py +0 -0
  258. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_eng50_capabilities.py +0 -0
  259. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_env_setup.py +0 -0
  260. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_environment_manifest_controls.py +0 -0
  261. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_eval_filters_applied.py +0 -0
  262. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_eval_sharding.py +0 -0
  263. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_eval_single_task_summary.py +0 -0
  264. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_eval_source_provenance.py +0 -0
  265. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_eval_worker_retry.py +0 -0
  266. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_eval_zero_task_guard.py +0 -0
  267. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_evaluation_environment_manifest.py +0 -0
  268. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_exclude_tasks.py +0 -0
  269. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_experiments_status.py +0 -0
  270. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_harvey_lab_shim.py +0 -0
  271. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_hilbench_adapter.py +0 -0
  272. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_hosted_env.py +0 -0
  273. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_hosted_env_rollout_contract.py +0 -0
  274. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_inbound_adapter_manifest.py +0 -0
  275. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_inbound_adapters.py +0 -0
  276. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_integration_check_results.py +0 -0
  277. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_integration_run_suite.py +0 -0
  278. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_internet_policy.py +0 -0
  279. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_job.py +0 -0
  280. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_job_sequential_shared.py +0 -0
  281. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_job_sequential_shared_resume.py +0 -0
  282. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_judge_symlink_ingestion.py +0 -0
  283. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_learner_skills.py +0 -0
  284. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_learner_skills_traversal.py +0 -0
  285. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_learner_store.py +0 -0
  286. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_learner_store_persistence.py +0 -0
  287. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_litellm_config.py +0 -0
  288. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_litellm_hardening.py +0 -0
  289. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_litellm_logging.py +0 -0
  290. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_litellm_runtime.py +0 -0
  291. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_litellm_smoke.py +0 -0
  292. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_llm_judge.py +0 -0
  293. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_llm_judge_event_tags.py +0 -0
  294. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_llm_judge_verifier.py +0 -0
  295. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_memory_scorer.py +0 -0
  296. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_metrics.py +0 -0
  297. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_mock_openai_responses_server.py +0 -0
  298. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_monitor_scaffold.py +0 -0
  299. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_native_acp_usage.py +0 -0
  300. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_no_cross_provider_fallback.py +0 -0
  301. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_notification_order_real.py +0 -0
  302. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_oracle.py +0 -0
  303. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_oracle_chokepoint.py +0 -0
  304. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_paths_safe.py +0 -0
  305. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_paths_symlink_helpers.py +0 -0
  306. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_pi_acp_launcher.py +0 -0
  307. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_process.py +0 -0
  308. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_provider_auth_detection.py +0 -0
  309. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_providers.py +0 -0
  310. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_reexport.py +0 -0
  311. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_registry_invariants.py +0 -0
  312. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_release_version.py +0 -0
  313. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_resolve_env_helpers.py +0 -0
  314. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_reward_node.py +0 -0
  315. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_reward_unified_contract.py +0 -0
  316. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_rewards.py +0 -0
  317. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_rewards_jsonl.py +0 -0
  318. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_rollout_architecture.py +0 -0
  319. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_rollout_branch.py +0 -0
  320. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_rollout_config_path_coercion.py +0 -0
  321. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_rollout_environment.py +0 -0
  322. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_rollout_import_no_side_effects.py +0 -0
  323. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_rollout_on_ask_user_wiring.py +0 -0
  324. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_rollout_probe_sandbox_health.py +0 -0
  325. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_rollout_upload.py +0 -0
  326. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_rubric_config.py +0 -0
  327. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_runtime.py +0 -0
  328. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_runtime_config_wired.py +0 -0
  329. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_runtime_live_sandbox.py +0 -0
  330. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_sandbox.py +0 -0
  331. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_sandbox_exec_secret_handling.py +0 -0
  332. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_sandbox_hardening.py +0 -0
  333. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_sandbox_isolation_copy_traversal.py +0 -0
  334. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_sandbox_multi_service.py +0 -0
  335. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_sandbox_protocol.py +0 -0
  336. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_sandbox_setup.py +0 -0
  337. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_sandbox_snapshot_contract.py +0 -0
  338. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_sandbox_upload_symlink.py +0 -0
  339. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_sandbox_verifier_workspace.py +0 -0
  340. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_scene.py +0 -0
  341. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_scene_outbox_trial.py +0 -0
  342. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_scene_parallel_group.py +0 -0
  343. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_scene_result_aggregation.py +0 -0
  344. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_scoring.py +0 -0
  345. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_sdk_internals.py +0 -0
  346. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_sdk_lockdown.py +0 -0
  347. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_self_gen_cli.py +0 -0
  348. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_self_gen_export_error_channel.py +0 -0
  349. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_self_gen_export_failures.py +0 -0
  350. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_self_gen_orchestration.py +0 -0
  351. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_session_request_permission_dispatch.py +0 -0
  352. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_skill_eval.py +0 -0
  353. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_skill_eval_dryrun.py +0 -0
  354. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_skill_eval_integration.py +0 -0
  355. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_skill_eval_sweep.py +0 -0
  356. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_skill_eval_traversal.py +0 -0
  357. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_skill_invocation_artifacts.py +0 -0
  358. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_skill_policy.py +0 -0
  359. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_skills.py +0 -0
  360. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_skills_dir_agent_home_link.py +0 -0
  361. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_skillsbench_harbor_parity.py +0 -0
  362. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_skillsbench_harbor_run_suite.py +0 -0
  363. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_smoke.py +0 -0
  364. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_subscription_auth.py +0 -0
  365. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_task_check_eval_consistency.py +0 -0
  366. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_task_config.py +0 -0
  367. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_task_download.py +0 -0
  368. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_tasks.py +0 -0
  369. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_token_usage_normalization.py +0 -0
  370. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_trace_import_cli.py +0 -0
  371. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_trace_task_gen_traversal.py +0 -0
  372. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_trace_to_task_evidence.py +0 -0
  373. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_traces_huggingface.py +0 -0
  374. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_traces_parsers.py +0 -0
  375. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_traces_task_gen.py +0 -0
  376. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_train_mode_artifact_emission.py +0 -0
  377. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_trajectory_integration.py +0 -0
  378. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_trajectory_streaming.py +0 -0
  379. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_trial_agent_timeout_verify.py +0 -0
  380. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_trial_install_agent_timeout.py +0 -0
  381. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_trial_litellm_runtime.py +0 -0
  382. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_usage_litellm.py +0 -0
  383. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_usage_required.py +0 -0
  384. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_usage_tracking.py +0 -0
  385. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_user.py +0 -0
  386. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_verifier_multi_container.py +0 -0
  387. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_verifier_output.py +0 -0
  388. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_verifier_output_freshness.py +0 -0
  389. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_verify.py +0 -0
  390. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_workflow_action_pinning.py +0 -0
  391. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_yaml_config.py +0 -0
  392. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/trajectories/__init__.py +0 -0
  393. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/trajectories/test_export.py +0 -0
  394. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/trajectories/test_export_nan_handling.py +0 -0
  395. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/trajectories/test_redaction.py +0 -0
  396. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/trajectories/test_step_granularity.py +0 -0
  397. {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/trajectories/test_tree.py +0 -0
@@ -2,6 +2,16 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## 0.5.2 — 2026-06-05
6
+
7
+ ### Changed
8
+
9
+ - **PyPI project README badge** — replace the dynamic PyPI version badge with
10
+ a stable package badge so the rendered project description cannot show a
11
+ stale external version image after a public release.
12
+ - **Release documentation refresh** — update public install snippets,
13
+ release-channel docs, examples, and citation metadata to `0.5.2`.
14
+
5
15
  ## 0.5.1 — 2026-06-05
6
16
 
7
17
  ### Added
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: benchflow
3
- Version: 0.5.2.dev875
3
+ Version: 0.5.3.dev879
4
4
  Summary: Multi-turn agent benchmarking with ACP — run any agent, any model, any provider.
5
5
  Project-URL: Homepage, https://github.com/benchflow-ai/benchflow
6
6
  Project-URL: Repository, https://github.com/benchflow-ai/benchflow
@@ -52,7 +52,7 @@ Description-Content-Type: text/markdown
52
52
  <h1>BenchFlow</h1>
53
53
  <p>Multi-turn agent benchmarking — Scene-based lifecycle for any ACP agent</p>
54
54
  <a href="https://pypi.org/project/benchflow/" target="_blank">
55
- <img src="https://img.shields.io/pypi/v/benchflow?style=for-the-badge&logo=pypi" alt="PyPI">
55
+ <img src="https://img.shields.io/badge/PyPI-benchflow-3775A9?style=for-the-badge&logo=pypi&logoColor=white" alt="PyPI package">
56
56
  </a>
57
57
  <a href="https://discord.gg/mZ9Rc8q8W3" target="_blank">
58
58
  <img src="https://img.shields.io/badge/Discord-5865F2?style=for-the-badge&logo=discord&logoColor=white" alt="Discord">
@@ -70,7 +70,7 @@ BenchFlow runs AI agents against benchmark tasks in sandboxed environments. Sing
70
70
 
71
71
  ## Install
72
72
 
73
- BenchFlow's current public release is `0.5.1`:
73
+ BenchFlow's current public release is `0.5.2`:
74
74
 
75
75
  ```bash
76
76
  pip install --upgrade benchflow
@@ -79,7 +79,7 @@ pip install --upgrade benchflow
79
79
  For a `uv`-managed CLI install of the public release:
80
80
 
81
81
  ```bash
82
- uv tool install --prerelease allow 'benchflow==0.5.1'
82
+ uv tool install --prerelease allow 'benchflow==0.5.2'
83
83
  ```
84
84
 
85
85
  Requires Python 3.12+ and [uv](https://docs.astral.sh/uv/). Set `DAYTONA_API_KEY` for Daytona runs or configure Modal auth for Modal runs; export the relevant agent API key (`GEMINI_API_KEY`, `ANTHROPIC_API_KEY`, etc.) or run `claude login` / `codex --login` for subscription auth. Provider-prefixed models may use provider-specific credentials; Azure Foundry models use `AZURE_API_KEY` plus `AZURE_API_ENDPOINT`.
@@ -2,7 +2,7 @@
2
2
  <h1>BenchFlow</h1>
3
3
  <p>Multi-turn agent benchmarking — Scene-based lifecycle for any ACP agent</p>
4
4
  <a href="https://pypi.org/project/benchflow/" target="_blank">
5
- <img src="https://img.shields.io/pypi/v/benchflow?style=for-the-badge&logo=pypi" alt="PyPI">
5
+ <img src="https://img.shields.io/badge/PyPI-benchflow-3775A9?style=for-the-badge&logo=pypi&logoColor=white" alt="PyPI package">
6
6
  </a>
7
7
  <a href="https://discord.gg/mZ9Rc8q8W3" target="_blank">
8
8
  <img src="https://img.shields.io/badge/Discord-5865F2?style=for-the-badge&logo=discord&logoColor=white" alt="Discord">
@@ -20,7 +20,7 @@ BenchFlow runs AI agents against benchmark tasks in sandboxed environments. Sing
20
20
 
21
21
  ## Install
22
22
 
23
- BenchFlow's current public release is `0.5.1`:
23
+ BenchFlow's current public release is `0.5.2`:
24
24
 
25
25
  ```bash
26
26
  pip install --upgrade benchflow
@@ -29,7 +29,7 @@ pip install --upgrade benchflow
29
29
  For a `uv`-managed CLI install of the public release:
30
30
 
31
31
  ```bash
32
- uv tool install --prerelease allow 'benchflow==0.5.1'
32
+ uv tool install --prerelease allow 'benchflow==0.5.2'
33
33
  ```
34
34
 
35
35
  Requires Python 3.12+ and [uv](https://docs.astral.sh/uv/). Set `DAYTONA_API_KEY` for Daytona runs or configure Modal auth for Modal runs; export the relevant agent API key (`GEMINI_API_KEY`, `ANTHROPIC_API_KEY`, etc.) or run `claude login` / `codex --login` for subscription auth. Provider-prefixed models may use provider-specific credentials; Azure Foundry models use `AZURE_API_KEY` plus `AZURE_API_ENDPOINT`.
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "benchflow"
3
- version = "0.5.2.dev875"
3
+ version = "0.5.3.dev879"
4
4
  description = "Multi-turn agent benchmarking with ACP — run any agent, any model, any provider."
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.12"