benchflow 0.5.1.dev869__tar.gz → 0.5.2.dev875__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/CHANGELOG.md +23 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/PKG-INFO +10 -2
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/README.md +9 -1
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/pyproject.toml +1 -1
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/.gitignore +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/LICENSE +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/__init__.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/_dotenv.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/_paths.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/_run.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/_types.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/_utils/__init__.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/_utils/benchmark_repos.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/_utils/config.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/_utils/evaluation_results.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/_utils/json_safe.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/_utils/learner_memory.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/_utils/result_metadata.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/_utils/reward_events.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/_utils/scoring.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/_utils/source_provenance.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/_utils/task_authoring.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/_utils/yaml_loader.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/acp/__init__.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/acp/client.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/acp/container_transport.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/acp/runtime.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/acp/session.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/acp/transport.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/acp/types.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/adapters/__init__.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/adapters/harbor.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/adapters/inbound.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/adapters/inspect_ai.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/adapters/ors.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/adapters/terminal_bench.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/agents/__init__.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/agents/codex_config.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/agents/credentials.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/agents/env.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/agents/errors.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/agents/harvey_lab_acp_shim.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/agents/install.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/agents/openclaw_acp_shim.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/agents/pi_acp_launcher.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/agents/protocol.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/agents/providers.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/agents/registry.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/branch.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/cli/__init__.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/cli/main.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/cli/trace_import.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/compat/__init__.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/compat/harbor_registry.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/contracts/__init__.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/contracts/planes.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/contracts/user.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/demo_task/environment/Dockerfile +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/demo_task/instruction.md +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/demo_task/task.toml +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/demo_task/tests/test.sh +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/diagnostics.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/environment/__init__.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/environment/manifest.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/environment/manifest_env.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/environment/protocol.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/environment/readiness.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/eval_sharding.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/eval_worker.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/evaluation.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/experimental/__init__.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/experimental/mcp/__init__.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/experimental/mcp/hooks.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/experimental/mcp/reviewer_server.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/hosted_env.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/learner_skills.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/learner_store.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/metrics.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/models.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/monitor.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/providers/__init__.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/providers/litellm_bedrock_patch.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/providers/litellm_config.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/providers/litellm_logging.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/providers/litellm_runtime.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/providers/runtime.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/py.typed +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/rewards/README.md +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/rewards/__init__.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/rewards/builtins.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/rewards/events.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/rewards/file_readers.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/rewards/llm.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/rewards/memory_scorer.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/rewards/node.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/rewards/protocol.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/rewards/rubric.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/rewards/rubric_config.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/rewards/validation.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/rollout.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/rollout_branch.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/rollout_planes.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/runtime.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/sandbox/__init__.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/sandbox/_base.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/sandbox/_compose.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/sandbox/_compose_files/docker-compose-base.yaml +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/sandbox/_compose_files/docker-compose-build.yaml +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/sandbox/_compose_files/docker-compose-no-network.yaml +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/sandbox/_compose_files/docker-compose-prebuilt.yaml +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/sandbox/_sdk_ops.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/sandbox/daytona.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/sandbox/docker.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/sandbox/lockdown.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/sandbox/metadata.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/sandbox/modal_impl.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/sandbox/process.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/sandbox/protocol.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/sandbox/services.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/sandbox/setup.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/sandbox/snapshot.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/sandbox/user.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/scenes.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/sdk.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/self_gen.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/skill_eval/__init__.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/skill_eval/_core.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/skill_eval/gepa_export.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/skill_eval/schema.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/skill_policy.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/skills.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/task/__init__.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/task/config.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/task/env.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/task/paths.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/task/task.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/task/verifier.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/templates/__init__.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/templates/judge.py.tmpl +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/templates/test.sh.tmpl +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/traces/__init__.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/traces/huggingface.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/traces/local.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/traces/models.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/traces/parsers.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/traces/task_gen.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/trajectories/__init__.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/trajectories/_capture.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/trajectories/export.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/trajectories/metrics.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/trajectories/otel.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/trajectories/tree.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/trajectories/types.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/trajectories/viewer.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/usage_tracking.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/__init__.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/agents/__init__.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/agents/test_protocol.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/conformance/README.md +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/conformance/acp_smoke/environment/Dockerfile +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/conformance/acp_smoke/environment/docker-compose.yaml +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/conformance/acp_smoke/environment/skills/conformance-writer/SKILL.md +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/conformance/acp_smoke/instruction.md +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/conformance/acp_smoke/solution/solve.sh +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/conformance/acp_smoke/task.toml +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/conformance/acp_smoke/tests/test.sh +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/conformance/conformance-results.json +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/conformance/proof_multi_agent.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/conformance/proof_snapshot.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/conformance/run_conformance.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/conformance/self_gen_smoke_skills/skill-creator/SKILL.md +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/conftest.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/environment/__init__.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/environment/test_chibench_manifest.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/environment/test_clawsbench_manifest.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/environment/test_manifest.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/environment/test_manifest_env.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/environment/test_protocol.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/environment/test_readiness.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/examples/hello-world-task/environment/Dockerfile +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/examples/hello-world-task/instruction.md +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/examples/hello-world-task/solution/solve.sh +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/examples/hello-world-task/task.toml +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/examples/hello-world-task/tests/test.sh +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/examples/terminal-bench-smoke-task/environment/Dockerfile +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/examples/terminal-bench-smoke-task/instruction.md +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/examples/terminal-bench-smoke-task/solution/solve.sh +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/examples/terminal-bench-smoke-task/task.toml +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/examples/terminal-bench-smoke-task/tests/test.sh +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/examples/terminal-bench-smoke-task/tests/test_state.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/examples/test_claude.sh +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/examples/test_codex.sh +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/examples/test_codex_custom_provider.sh +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/examples/test_gemini.sh +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/examples/test_openclaw.sh +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/examples/traces/minimal-claude.jsonl +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/examples/traces/minimal-opentraces.jsonl +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/fixtures/mock_acp_agent.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/fixtures/mock_acp_agent_interleaved.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/fixtures/mock_acp_agent_multi_turn.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/fixtures/mock_openai_responses_server.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/integration/check_adapter_evidence.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/integration/check_hosted_env_evidence.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/integration/check_results.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/integration/check_skillsbench_harbor_parity.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/integration/check_trace_to_task_evidence.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/integration/configs/claude-agent-acp.yaml +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/integration/configs/codex-acp.yaml +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/integration/configs/gemini.yaml +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/integration/configs/harvey-lab-harness.yaml +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/integration/configs/openclaw.yaml +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/integration/configs/opencode.yaml +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/integration/configs/openhands.yaml +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/integration/configs/pi-acp.yaml +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/integration/run.sh +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/integration/run_suite.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/integration/suites/release.yaml +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_acp.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_acp_capability_advertising.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_acp_model_config_dispatch.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_acp_pinned_protocol_guard.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_acp_setup_failure_propagation.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_adapter_scripts.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_adapters.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_agent_cli.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_agent_env_resolution.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_agent_gemini_defaults.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_agent_idle_timeout_cli.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_agent_model_decouple.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_agent_registry.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_agent_setup.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_agent_spec.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_base_install_imports.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_bedrock_thinking.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_branch.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_capture_trajectory.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_clawsbench_slice.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_cli_daytona.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_cli_docs_drift.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_cli_misc.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_compat_harbor_registry.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_config_redaction.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_connect_as_env.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_continuallearningbench_adapter.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_dashboard_credential_env_scrub.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_dashboard_daytona_key.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_dashboard_no_host_paths.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_dashboard_release_evidence.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_dashboard_roadmap.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_dashboard_symlink_ingestion.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_dashboard_sync.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_daytona_command_polling.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_daytona_litellm_runtime.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_daytona_status.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_docker_prune_scoping.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_docker_uploads.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_docs_examples.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_eng50_capabilities.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_env_setup.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_environment_manifest_controls.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_eval_filters_applied.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_eval_sharding.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_eval_single_task_summary.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_eval_source_provenance.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_eval_worker_retry.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_eval_zero_task_guard.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_evaluation_environment_manifest.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_exclude_tasks.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_experiments_status.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_harvey_lab_shim.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_hilbench_adapter.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_hosted_env.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_hosted_env_rollout_contract.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_inbound_adapter_manifest.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_inbound_adapters.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_integration_check_results.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_integration_run_suite.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_internet_policy.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_job.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_job_sequential_shared.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_job_sequential_shared_resume.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_judge_symlink_ingestion.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_learner_skills.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_learner_skills_traversal.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_learner_store.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_learner_store_persistence.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_litellm_config.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_litellm_hardening.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_litellm_logging.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_litellm_runtime.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_litellm_smoke.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_llm_judge.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_llm_judge_event_tags.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_llm_judge_verifier.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_memory_scorer.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_metrics.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_mock_openai_responses_server.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_monitor_scaffold.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_native_acp_usage.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_no_cross_provider_fallback.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_notification_order_real.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_oracle.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_oracle_chokepoint.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_paths_safe.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_paths_symlink_helpers.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_pi_acp_launcher.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_process.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_provider_auth_detection.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_providers.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_reexport.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_registry_invariants.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_release_version.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_resolve_env_helpers.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_reward_node.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_reward_unified_contract.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_rewards.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_rewards_jsonl.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_rollout_architecture.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_rollout_branch.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_rollout_config_path_coercion.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_rollout_environment.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_rollout_import_no_side_effects.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_rollout_on_ask_user_wiring.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_rollout_probe_sandbox_health.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_rollout_upload.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_rubric_config.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_runtime.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_runtime_config_wired.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_runtime_live_sandbox.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_sandbox.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_sandbox_exec_secret_handling.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_sandbox_hardening.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_sandbox_isolation_copy_traversal.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_sandbox_multi_service.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_sandbox_protocol.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_sandbox_setup.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_sandbox_snapshot_contract.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_sandbox_upload_symlink.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_sandbox_verifier_workspace.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_scene.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_scene_outbox_trial.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_scene_parallel_group.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_scene_result_aggregation.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_scoring.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_sdk_internals.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_sdk_lockdown.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_self_gen_cli.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_self_gen_export_error_channel.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_self_gen_export_failures.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_self_gen_orchestration.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_session_request_permission_dispatch.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_skill_eval.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_skill_eval_dryrun.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_skill_eval_integration.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_skill_eval_sweep.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_skill_eval_traversal.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_skill_invocation_artifacts.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_skill_policy.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_skills.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_skills_dir_agent_home_link.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_skillsbench_harbor_parity.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_skillsbench_harbor_run_suite.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_smoke.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_subscription_auth.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_task_check_eval_consistency.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_task_config.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_task_download.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_tasks.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_token_usage_normalization.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_trace_import_cli.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_trace_task_gen_traversal.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_trace_to_task_evidence.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_traces_huggingface.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_traces_parsers.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_traces_task_gen.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_train_mode_artifact_emission.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_trajectory_integration.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_trajectory_streaming.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_trial_agent_timeout_verify.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_trial_install_agent_timeout.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_trial_litellm_runtime.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_usage_litellm.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_usage_required.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_usage_tracking.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_user.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_verifier_multi_container.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_verifier_output.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_verifier_output_freshness.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_verify.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_workflow_action_pinning.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/test_yaml_config.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/trajectories/__init__.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/trajectories/test_export.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/trajectories/test_export_nan_handling.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/trajectories/test_redaction.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/trajectories/test_step_granularity.py +0 -0
- {benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/trajectories/test_tree.py +0 -0
|
@@ -2,16 +2,39 @@
|
|
|
2
2
|
|
|
3
3
|
## [Unreleased]
|
|
4
4
|
|
|
5
|
+
## 0.5.1 — 2026-06-05
|
|
6
|
+
|
|
5
7
|
### Added
|
|
6
8
|
|
|
7
9
|
- **Daytona usage telemetry by default** — Daytona runs now start a sandbox-local provider usage proxy so token/cost telemetry works without an external tunnel; use `--usage-tracking off` to bypass proxying when needed.
|
|
8
10
|
- **Azure AI Foundry providers** — new `azure-foundry-openai/` and `azure-foundry-anthropic/` prefixes routing through Foundry's unified resource. Export `AZURE_API_KEY` plus `AZURE_API_ENDPOINT` (e.g. `https://<resource>.openai.azure.com/`); benchflow derives the resource name from the endpoint host, builds the per-surface base URL, and maps the key onto the agent-native auth env automatically. Missing/unrecognized endpoints and unsupported agent/provider protocol pairings fail fast with clear errors instead of falling through to the wrong endpoint.
|
|
9
11
|
- **Azure Foundry auth guidance** — agent discovery output and docs now call out that provider-prefixed models can use provider-specific credentials instead of the agent's native/default API key.
|
|
10
12
|
|
|
13
|
+
### Changed
|
|
14
|
+
|
|
15
|
+
- **PyPI project documentation refresh** — the public package README, install snippets, release-channel docs, examples, and citation metadata now point at `0.5.1`.
|
|
16
|
+
|
|
11
17
|
### Fixed
|
|
12
18
|
|
|
13
19
|
- Inherit `BENCHFLOW_PROVIDER_BASE_URL` / `BENCHFLOW_PROVIDER_API_KEY` from the host environment so self-hosted / OpenAI-compatible endpoints route correctly instead of falling back to `api.openai.com`; empty or whitespace-only host values are skipped so they cannot shadow the resolved provider URL (benchflow-ai/skillsbench#817).
|
|
14
20
|
|
|
21
|
+
## 0.5.0 — 2026-06-04
|
|
22
|
+
|
|
23
|
+
### Added
|
|
24
|
+
|
|
25
|
+
- **Public/internal preview release channels** — tag-driven public releases publish stable PyPI packages and GitHub Releases; merges to `main` publish internal preview `.devN` packages after CI passes.
|
|
26
|
+
- **v0.5 integration evidence** — release validation docs now cover urgent blocker closure, SkillsBench infra-fix validation, adapter evidence, trace-to-task evidence, hosted env compatibility, and diagnostic fields.
|
|
27
|
+
- **Release automation guardrails** — public release tags must point at commits contained in `main`, version tags must match `pyproject.toml`, and PyPI publishing uses Trusted Publishing/OIDC instead of stored tokens.
|
|
28
|
+
|
|
29
|
+
### Changed
|
|
30
|
+
|
|
31
|
+
- `main` now tracks the next public version as `0.5.1.dev0`; the published public SDK is `0.5.0`, and internal previews are emitted as `0.5.1.dev<N>`.
|
|
32
|
+
- Documentation now directs downstream users to depend on public PyPI releases by default and use prerelease-enabled internal previews only for validation before the next public cut.
|
|
33
|
+
|
|
34
|
+
### Fixed
|
|
35
|
+
|
|
36
|
+
- Closed the v0.5 release blocker set covering structured sandbox/verifier diagnostics, Daytona startup/export retries, verifier dependency classification, CTRF path consistency, and SkillsBench task compatibility evidence.
|
|
37
|
+
|
|
15
38
|
## 0.3.3 — 2026-05-15
|
|
16
39
|
|
|
17
40
|
### Added
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: benchflow
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.2.dev875
|
|
4
4
|
Summary: Multi-turn agent benchmarking with ACP — run any agent, any model, any provider.
|
|
5
5
|
Project-URL: Homepage, https://github.com/benchflow-ai/benchflow
|
|
6
6
|
Project-URL: Repository, https://github.com/benchflow-ai/benchflow
|
|
@@ -70,8 +70,16 @@ BenchFlow runs AI agents against benchmark tasks in sandboxed environments. Sing
|
|
|
70
70
|
|
|
71
71
|
## Install
|
|
72
72
|
|
|
73
|
+
BenchFlow's current public release is `0.5.1`:
|
|
74
|
+
|
|
75
|
+
```bash
|
|
76
|
+
pip install --upgrade benchflow
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
For a `uv`-managed CLI install of the public release:
|
|
80
|
+
|
|
73
81
|
```bash
|
|
74
|
-
uv tool install benchflow
|
|
82
|
+
uv tool install --prerelease allow 'benchflow==0.5.1'
|
|
75
83
|
```
|
|
76
84
|
|
|
77
85
|
Requires Python 3.12+ and [uv](https://docs.astral.sh/uv/). Set `DAYTONA_API_KEY` for Daytona runs or configure Modal auth for Modal runs; export the relevant agent API key (`GEMINI_API_KEY`, `ANTHROPIC_API_KEY`, etc.) or run `claude login` / `codex --login` for subscription auth. Provider-prefixed models may use provider-specific credentials; Azure Foundry models use `AZURE_API_KEY` plus `AZURE_API_ENDPOINT`.
|
|
@@ -20,8 +20,16 @@ BenchFlow runs AI agents against benchmark tasks in sandboxed environments. Sing
|
|
|
20
20
|
|
|
21
21
|
## Install
|
|
22
22
|
|
|
23
|
+
BenchFlow's current public release is `0.5.1`:
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
pip install --upgrade benchflow
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
For a `uv`-managed CLI install of the public release:
|
|
30
|
+
|
|
23
31
|
```bash
|
|
24
|
-
uv tool install benchflow
|
|
32
|
+
uv tool install --prerelease allow 'benchflow==0.5.1'
|
|
25
33
|
```
|
|
26
34
|
|
|
27
35
|
Requires Python 3.12+ and [uv](https://docs.astral.sh/uv/). Set `DAYTONA_API_KEY` for Daytona runs or configure Modal auth for Modal runs; export the relevant agent API key (`GEMINI_API_KEY`, `ANTHROPIC_API_KEY`, etc.) or run `claude login` / `codex --login` for subscription auth. Provider-prefixed models may use provider-specific credentials; Azure Foundry models use `AZURE_API_KEY` plus `AZURE_API_ENDPOINT`.
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/_utils/evaluation_results.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/agents/harvey_lab_acp_shim.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/demo_task/environment/Dockerfile
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/experimental/mcp/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/experimental/mcp/reviewer_server.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/providers/litellm_bedrock_patch.py
RENAMED
|
File without changes
|
|
File without changes
|
{benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/providers/litellm_logging.py
RENAMED
|
File without changes
|
{benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/src/benchflow/providers/litellm_runtime.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/conformance/acp_smoke/environment/Dockerfile
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/conformance/acp_smoke/instruction.md
RENAMED
|
File without changes
|
{benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/conformance/acp_smoke/solution/solve.sh
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/conformance/conformance-results.json
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/environment/test_chibench_manifest.py
RENAMED
|
File without changes
|
{benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/environment/test_clawsbench_manifest.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/examples/hello-world-task/instruction.md
RENAMED
|
File without changes
|
{benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/examples/hello-world-task/solution/solve.sh
RENAMED
|
File without changes
|
|
File without changes
|
{benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/examples/hello-world-task/tests/test.sh
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{benchflow-0.5.1.dev869 → benchflow-0.5.2.dev875}/tests/examples/terminal-bench-smoke-task/task.toml
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|