benchflow 0.5.2.dev875__tar.gz → 0.5.3.dev879__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/CHANGELOG.md +10 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/PKG-INFO +4 -4
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/README.md +3 -3
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/pyproject.toml +1 -1
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/.gitignore +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/LICENSE +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/__init__.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/_dotenv.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/_paths.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/_run.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/_types.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/_utils/__init__.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/_utils/benchmark_repos.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/_utils/config.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/_utils/evaluation_results.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/_utils/json_safe.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/_utils/learner_memory.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/_utils/result_metadata.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/_utils/reward_events.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/_utils/scoring.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/_utils/source_provenance.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/_utils/task_authoring.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/_utils/yaml_loader.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/acp/__init__.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/acp/client.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/acp/container_transport.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/acp/runtime.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/acp/session.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/acp/transport.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/acp/types.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/adapters/__init__.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/adapters/harbor.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/adapters/inbound.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/adapters/inspect_ai.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/adapters/ors.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/adapters/terminal_bench.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/agents/__init__.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/agents/codex_config.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/agents/credentials.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/agents/env.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/agents/errors.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/agents/harvey_lab_acp_shim.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/agents/install.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/agents/openclaw_acp_shim.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/agents/pi_acp_launcher.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/agents/protocol.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/agents/providers.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/agents/registry.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/branch.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/cli/__init__.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/cli/main.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/cli/trace_import.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/compat/__init__.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/compat/harbor_registry.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/contracts/__init__.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/contracts/planes.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/contracts/user.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/demo_task/environment/Dockerfile +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/demo_task/instruction.md +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/demo_task/task.toml +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/demo_task/tests/test.sh +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/diagnostics.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/environment/__init__.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/environment/manifest.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/environment/manifest_env.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/environment/protocol.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/environment/readiness.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/eval_sharding.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/eval_worker.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/evaluation.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/experimental/__init__.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/experimental/mcp/__init__.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/experimental/mcp/hooks.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/experimental/mcp/reviewer_server.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/hosted_env.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/learner_skills.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/learner_store.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/metrics.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/models.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/monitor.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/providers/__init__.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/providers/litellm_bedrock_patch.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/providers/litellm_config.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/providers/litellm_logging.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/providers/litellm_runtime.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/providers/runtime.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/py.typed +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/rewards/README.md +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/rewards/__init__.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/rewards/builtins.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/rewards/events.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/rewards/file_readers.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/rewards/llm.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/rewards/memory_scorer.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/rewards/node.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/rewards/protocol.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/rewards/rubric.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/rewards/rubric_config.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/rewards/validation.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/rollout.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/rollout_branch.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/rollout_planes.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/runtime.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/sandbox/__init__.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/sandbox/_base.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/sandbox/_compose.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/sandbox/_compose_files/docker-compose-base.yaml +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/sandbox/_compose_files/docker-compose-build.yaml +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/sandbox/_compose_files/docker-compose-no-network.yaml +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/sandbox/_compose_files/docker-compose-prebuilt.yaml +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/sandbox/_sdk_ops.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/sandbox/daytona.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/sandbox/docker.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/sandbox/lockdown.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/sandbox/metadata.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/sandbox/modal_impl.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/sandbox/process.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/sandbox/protocol.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/sandbox/services.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/sandbox/setup.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/sandbox/snapshot.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/sandbox/user.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/scenes.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/sdk.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/self_gen.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/skill_eval/__init__.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/skill_eval/_core.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/skill_eval/gepa_export.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/skill_eval/schema.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/skill_policy.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/skills.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/task/__init__.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/task/config.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/task/env.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/task/paths.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/task/task.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/task/verifier.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/templates/__init__.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/templates/judge.py.tmpl +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/templates/test.sh.tmpl +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/traces/__init__.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/traces/huggingface.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/traces/local.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/traces/models.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/traces/parsers.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/traces/task_gen.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/trajectories/__init__.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/trajectories/_capture.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/trajectories/export.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/trajectories/metrics.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/trajectories/otel.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/trajectories/tree.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/trajectories/types.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/trajectories/viewer.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/usage_tracking.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/__init__.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/agents/__init__.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/agents/test_protocol.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/conformance/README.md +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/conformance/acp_smoke/environment/Dockerfile +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/conformance/acp_smoke/environment/docker-compose.yaml +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/conformance/acp_smoke/environment/skills/conformance-writer/SKILL.md +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/conformance/acp_smoke/instruction.md +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/conformance/acp_smoke/solution/solve.sh +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/conformance/acp_smoke/task.toml +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/conformance/acp_smoke/tests/test.sh +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/conformance/conformance-results.json +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/conformance/proof_multi_agent.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/conformance/proof_snapshot.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/conformance/run_conformance.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/conformance/self_gen_smoke_skills/skill-creator/SKILL.md +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/conftest.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/environment/__init__.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/environment/test_chibench_manifest.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/environment/test_clawsbench_manifest.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/environment/test_manifest.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/environment/test_manifest_env.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/environment/test_protocol.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/environment/test_readiness.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/examples/hello-world-task/environment/Dockerfile +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/examples/hello-world-task/instruction.md +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/examples/hello-world-task/solution/solve.sh +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/examples/hello-world-task/task.toml +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/examples/hello-world-task/tests/test.sh +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/examples/terminal-bench-smoke-task/environment/Dockerfile +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/examples/terminal-bench-smoke-task/instruction.md +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/examples/terminal-bench-smoke-task/solution/solve.sh +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/examples/terminal-bench-smoke-task/task.toml +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/examples/terminal-bench-smoke-task/tests/test.sh +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/examples/terminal-bench-smoke-task/tests/test_state.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/examples/test_claude.sh +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/examples/test_codex.sh +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/examples/test_codex_custom_provider.sh +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/examples/test_gemini.sh +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/examples/test_openclaw.sh +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/examples/traces/minimal-claude.jsonl +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/examples/traces/minimal-opentraces.jsonl +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/fixtures/mock_acp_agent.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/fixtures/mock_acp_agent_interleaved.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/fixtures/mock_acp_agent_multi_turn.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/fixtures/mock_openai_responses_server.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/integration/check_adapter_evidence.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/integration/check_hosted_env_evidence.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/integration/check_results.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/integration/check_skillsbench_harbor_parity.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/integration/check_trace_to_task_evidence.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/integration/configs/claude-agent-acp.yaml +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/integration/configs/codex-acp.yaml +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/integration/configs/gemini.yaml +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/integration/configs/harvey-lab-harness.yaml +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/integration/configs/openclaw.yaml +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/integration/configs/opencode.yaml +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/integration/configs/openhands.yaml +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/integration/configs/pi-acp.yaml +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/integration/run.sh +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/integration/run_suite.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/integration/suites/release.yaml +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_acp.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_acp_capability_advertising.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_acp_model_config_dispatch.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_acp_pinned_protocol_guard.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_acp_setup_failure_propagation.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_adapter_scripts.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_adapters.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_agent_cli.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_agent_env_resolution.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_agent_gemini_defaults.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_agent_idle_timeout_cli.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_agent_model_decouple.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_agent_registry.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_agent_setup.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_agent_spec.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_base_install_imports.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_bedrock_thinking.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_branch.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_capture_trajectory.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_clawsbench_slice.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_cli_daytona.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_cli_docs_drift.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_cli_misc.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_compat_harbor_registry.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_config_redaction.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_connect_as_env.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_continuallearningbench_adapter.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_dashboard_credential_env_scrub.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_dashboard_daytona_key.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_dashboard_no_host_paths.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_dashboard_release_evidence.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_dashboard_roadmap.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_dashboard_symlink_ingestion.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_dashboard_sync.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_daytona_command_polling.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_daytona_litellm_runtime.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_daytona_status.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_docker_prune_scoping.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_docker_uploads.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_docs_examples.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_eng50_capabilities.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_env_setup.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_environment_manifest_controls.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_eval_filters_applied.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_eval_sharding.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_eval_single_task_summary.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_eval_source_provenance.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_eval_worker_retry.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_eval_zero_task_guard.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_evaluation_environment_manifest.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_exclude_tasks.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_experiments_status.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_harvey_lab_shim.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_hilbench_adapter.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_hosted_env.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_hosted_env_rollout_contract.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_inbound_adapter_manifest.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_inbound_adapters.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_integration_check_results.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_integration_run_suite.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_internet_policy.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_job.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_job_sequential_shared.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_job_sequential_shared_resume.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_judge_symlink_ingestion.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_learner_skills.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_learner_skills_traversal.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_learner_store.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_learner_store_persistence.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_litellm_config.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_litellm_hardening.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_litellm_logging.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_litellm_runtime.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_litellm_smoke.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_llm_judge.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_llm_judge_event_tags.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_llm_judge_verifier.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_memory_scorer.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_metrics.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_mock_openai_responses_server.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_monitor_scaffold.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_native_acp_usage.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_no_cross_provider_fallback.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_notification_order_real.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_oracle.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_oracle_chokepoint.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_paths_safe.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_paths_symlink_helpers.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_pi_acp_launcher.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_process.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_provider_auth_detection.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_providers.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_reexport.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_registry_invariants.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_release_version.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_resolve_env_helpers.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_reward_node.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_reward_unified_contract.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_rewards.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_rewards_jsonl.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_rollout_architecture.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_rollout_branch.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_rollout_config_path_coercion.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_rollout_environment.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_rollout_import_no_side_effects.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_rollout_on_ask_user_wiring.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_rollout_probe_sandbox_health.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_rollout_upload.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_rubric_config.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_runtime.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_runtime_config_wired.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_runtime_live_sandbox.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_sandbox.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_sandbox_exec_secret_handling.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_sandbox_hardening.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_sandbox_isolation_copy_traversal.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_sandbox_multi_service.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_sandbox_protocol.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_sandbox_setup.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_sandbox_snapshot_contract.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_sandbox_upload_symlink.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_sandbox_verifier_workspace.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_scene.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_scene_outbox_trial.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_scene_parallel_group.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_scene_result_aggregation.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_scoring.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_sdk_internals.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_sdk_lockdown.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_self_gen_cli.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_self_gen_export_error_channel.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_self_gen_export_failures.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_self_gen_orchestration.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_session_request_permission_dispatch.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_skill_eval.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_skill_eval_dryrun.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_skill_eval_integration.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_skill_eval_sweep.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_skill_eval_traversal.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_skill_invocation_artifacts.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_skill_policy.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_skills.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_skills_dir_agent_home_link.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_skillsbench_harbor_parity.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_skillsbench_harbor_run_suite.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_smoke.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_subscription_auth.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_task_check_eval_consistency.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_task_config.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_task_download.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_tasks.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_token_usage_normalization.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_trace_import_cli.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_trace_task_gen_traversal.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_trace_to_task_evidence.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_traces_huggingface.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_traces_parsers.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_traces_task_gen.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_train_mode_artifact_emission.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_trajectory_integration.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_trajectory_streaming.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_trial_agent_timeout_verify.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_trial_install_agent_timeout.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_trial_litellm_runtime.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_usage_litellm.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_usage_required.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_usage_tracking.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_user.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_verifier_multi_container.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_verifier_output.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_verifier_output_freshness.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_verify.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_workflow_action_pinning.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/test_yaml_config.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/trajectories/__init__.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/trajectories/test_export.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/trajectories/test_export_nan_handling.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/trajectories/test_redaction.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/trajectories/test_step_granularity.py +0 -0
- {benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/trajectories/test_tree.py +0 -0
|
@@ -2,6 +2,16 @@
|
|
|
2
2
|
|
|
3
3
|
## [Unreleased]
|
|
4
4
|
|
|
5
|
+
## 0.5.2 — 2026-06-05
|
|
6
|
+
|
|
7
|
+
### Changed
|
|
8
|
+
|
|
9
|
+
- **PyPI project README badge** — replace the dynamic PyPI version badge with
|
|
10
|
+
a stable package badge so the rendered project description cannot show a
|
|
11
|
+
stale external version image after a public release.
|
|
12
|
+
- **Release documentation refresh** — update public install snippets,
|
|
13
|
+
release-channel docs, examples, and citation metadata to `0.5.2`.
|
|
14
|
+
|
|
5
15
|
## 0.5.1 — 2026-06-05
|
|
6
16
|
|
|
7
17
|
### Added
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: benchflow
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.3.dev879
|
|
4
4
|
Summary: Multi-turn agent benchmarking with ACP — run any agent, any model, any provider.
|
|
5
5
|
Project-URL: Homepage, https://github.com/benchflow-ai/benchflow
|
|
6
6
|
Project-URL: Repository, https://github.com/benchflow-ai/benchflow
|
|
@@ -52,7 +52,7 @@ Description-Content-Type: text/markdown
|
|
|
52
52
|
<h1>BenchFlow</h1>
|
|
53
53
|
<p>Multi-turn agent benchmarking — Scene-based lifecycle for any ACP agent</p>
|
|
54
54
|
<a href="https://pypi.org/project/benchflow/" target="_blank">
|
|
55
|
-
<img src="https://img.shields.io/
|
|
55
|
+
<img src="https://img.shields.io/badge/PyPI-benchflow-3775A9?style=for-the-badge&logo=pypi&logoColor=white" alt="PyPI package">
|
|
56
56
|
</a>
|
|
57
57
|
<a href="https://discord.gg/mZ9Rc8q8W3" target="_blank">
|
|
58
58
|
<img src="https://img.shields.io/badge/Discord-5865F2?style=for-the-badge&logo=discord&logoColor=white" alt="Discord">
|
|
@@ -70,7 +70,7 @@ BenchFlow runs AI agents against benchmark tasks in sandboxed environments. Sing
|
|
|
70
70
|
|
|
71
71
|
## Install
|
|
72
72
|
|
|
73
|
-
BenchFlow's current public release is `0.5.
|
|
73
|
+
BenchFlow's current public release is `0.5.2`:
|
|
74
74
|
|
|
75
75
|
```bash
|
|
76
76
|
pip install --upgrade benchflow
|
|
@@ -79,7 +79,7 @@ pip install --upgrade benchflow
|
|
|
79
79
|
For a `uv`-managed CLI install of the public release:
|
|
80
80
|
|
|
81
81
|
```bash
|
|
82
|
-
uv tool install --prerelease allow 'benchflow==0.5.
|
|
82
|
+
uv tool install --prerelease allow 'benchflow==0.5.2'
|
|
83
83
|
```
|
|
84
84
|
|
|
85
85
|
Requires Python 3.12+ and [uv](https://docs.astral.sh/uv/). Set `DAYTONA_API_KEY` for Daytona runs or configure Modal auth for Modal runs; export the relevant agent API key (`GEMINI_API_KEY`, `ANTHROPIC_API_KEY`, etc.) or run `claude login` / `codex --login` for subscription auth. Provider-prefixed models may use provider-specific credentials; Azure Foundry models use `AZURE_API_KEY` plus `AZURE_API_ENDPOINT`.
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
<h1>BenchFlow</h1>
|
|
3
3
|
<p>Multi-turn agent benchmarking — Scene-based lifecycle for any ACP agent</p>
|
|
4
4
|
<a href="https://pypi.org/project/benchflow/" target="_blank">
|
|
5
|
-
<img src="https://img.shields.io/
|
|
5
|
+
<img src="https://img.shields.io/badge/PyPI-benchflow-3775A9?style=for-the-badge&logo=pypi&logoColor=white" alt="PyPI package">
|
|
6
6
|
</a>
|
|
7
7
|
<a href="https://discord.gg/mZ9Rc8q8W3" target="_blank">
|
|
8
8
|
<img src="https://img.shields.io/badge/Discord-5865F2?style=for-the-badge&logo=discord&logoColor=white" alt="Discord">
|
|
@@ -20,7 +20,7 @@ BenchFlow runs AI agents against benchmark tasks in sandboxed environments. Sing
|
|
|
20
20
|
|
|
21
21
|
## Install
|
|
22
22
|
|
|
23
|
-
BenchFlow's current public release is `0.5.
|
|
23
|
+
BenchFlow's current public release is `0.5.2`:
|
|
24
24
|
|
|
25
25
|
```bash
|
|
26
26
|
pip install --upgrade benchflow
|
|
@@ -29,7 +29,7 @@ pip install --upgrade benchflow
|
|
|
29
29
|
For a `uv`-managed CLI install of the public release:
|
|
30
30
|
|
|
31
31
|
```bash
|
|
32
|
-
uv tool install --prerelease allow 'benchflow==0.5.
|
|
32
|
+
uv tool install --prerelease allow 'benchflow==0.5.2'
|
|
33
33
|
```
|
|
34
34
|
|
|
35
35
|
Requires Python 3.12+ and [uv](https://docs.astral.sh/uv/). Set `DAYTONA_API_KEY` for Daytona runs or configure Modal auth for Modal runs; export the relevant agent API key (`GEMINI_API_KEY`, `ANTHROPIC_API_KEY`, etc.) or run `claude login` / `codex --login` for subscription auth. Provider-prefixed models may use provider-specific credentials; Azure Foundry models use `AZURE_API_KEY` plus `AZURE_API_ENDPOINT`.
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/_utils/evaluation_results.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/agents/harvey_lab_acp_shim.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/demo_task/environment/Dockerfile
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/experimental/mcp/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/experimental/mcp/reviewer_server.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/providers/litellm_bedrock_patch.py
RENAMED
|
File without changes
|
|
File without changes
|
{benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/providers/litellm_logging.py
RENAMED
|
File without changes
|
{benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/src/benchflow/providers/litellm_runtime.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/conformance/acp_smoke/environment/Dockerfile
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/conformance/acp_smoke/instruction.md
RENAMED
|
File without changes
|
{benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/conformance/acp_smoke/solution/solve.sh
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/conformance/conformance-results.json
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/environment/test_chibench_manifest.py
RENAMED
|
File without changes
|
{benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/environment/test_clawsbench_manifest.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/examples/hello-world-task/instruction.md
RENAMED
|
File without changes
|
{benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/examples/hello-world-task/solution/solve.sh
RENAMED
|
File without changes
|
|
File without changes
|
{benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/examples/hello-world-task/tests/test.sh
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/examples/terminal-bench-smoke-task/task.toml
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{benchflow-0.5.2.dev875 → benchflow-0.5.3.dev879}/tests/examples/test_codex_custom_provider.sh
RENAMED
|
File without changes
|
|
File without changes
|