benchflow 0.5.1.dev871__tar.gz → 0.5.2.dev875__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/CHANGELOG.md +6 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/PKG-INFO +3 -3
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/README.md +2 -2
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/pyproject.toml +1 -1
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/.gitignore +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/LICENSE +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/__init__.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/_dotenv.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/_paths.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/_run.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/_types.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/_utils/__init__.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/_utils/benchmark_repos.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/_utils/config.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/_utils/evaluation_results.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/_utils/json_safe.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/_utils/learner_memory.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/_utils/result_metadata.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/_utils/reward_events.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/_utils/scoring.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/_utils/source_provenance.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/_utils/task_authoring.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/_utils/yaml_loader.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/acp/__init__.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/acp/client.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/acp/container_transport.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/acp/runtime.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/acp/session.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/acp/transport.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/acp/types.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/adapters/__init__.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/adapters/harbor.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/adapters/inbound.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/adapters/inspect_ai.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/adapters/ors.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/adapters/terminal_bench.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/agents/__init__.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/agents/codex_config.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/agents/credentials.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/agents/env.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/agents/errors.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/agents/harvey_lab_acp_shim.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/agents/install.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/agents/openclaw_acp_shim.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/agents/pi_acp_launcher.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/agents/protocol.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/agents/providers.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/agents/registry.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/branch.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/cli/__init__.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/cli/main.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/cli/trace_import.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/compat/__init__.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/compat/harbor_registry.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/contracts/__init__.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/contracts/planes.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/contracts/user.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/demo_task/environment/Dockerfile +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/demo_task/instruction.md +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/demo_task/task.toml +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/demo_task/tests/test.sh +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/diagnostics.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/environment/__init__.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/environment/manifest.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/environment/manifest_env.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/environment/protocol.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/environment/readiness.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/eval_sharding.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/eval_worker.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/evaluation.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/experimental/__init__.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/experimental/mcp/__init__.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/experimental/mcp/hooks.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/experimental/mcp/reviewer_server.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/hosted_env.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/learner_skills.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/learner_store.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/metrics.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/models.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/monitor.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/providers/__init__.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/providers/litellm_bedrock_patch.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/providers/litellm_config.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/providers/litellm_logging.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/providers/litellm_runtime.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/providers/runtime.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/py.typed +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/rewards/README.md +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/rewards/__init__.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/rewards/builtins.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/rewards/events.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/rewards/file_readers.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/rewards/llm.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/rewards/memory_scorer.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/rewards/node.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/rewards/protocol.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/rewards/rubric.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/rewards/rubric_config.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/rewards/validation.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/rollout.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/rollout_branch.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/rollout_planes.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/runtime.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/sandbox/__init__.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/sandbox/_base.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/sandbox/_compose.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/sandbox/_compose_files/docker-compose-base.yaml +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/sandbox/_compose_files/docker-compose-build.yaml +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/sandbox/_compose_files/docker-compose-no-network.yaml +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/sandbox/_compose_files/docker-compose-prebuilt.yaml +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/sandbox/_sdk_ops.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/sandbox/daytona.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/sandbox/docker.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/sandbox/lockdown.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/sandbox/metadata.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/sandbox/modal_impl.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/sandbox/process.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/sandbox/protocol.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/sandbox/services.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/sandbox/setup.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/sandbox/snapshot.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/sandbox/user.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/scenes.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/sdk.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/self_gen.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/skill_eval/__init__.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/skill_eval/_core.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/skill_eval/gepa_export.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/skill_eval/schema.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/skill_policy.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/skills.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/task/__init__.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/task/config.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/task/env.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/task/paths.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/task/task.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/task/verifier.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/templates/__init__.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/templates/judge.py.tmpl +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/templates/test.sh.tmpl +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/traces/__init__.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/traces/huggingface.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/traces/local.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/traces/models.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/traces/parsers.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/traces/task_gen.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/trajectories/__init__.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/trajectories/_capture.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/trajectories/export.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/trajectories/metrics.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/trajectories/otel.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/trajectories/tree.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/trajectories/types.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/trajectories/viewer.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/usage_tracking.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/__init__.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/agents/__init__.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/agents/test_protocol.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/conformance/README.md +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/conformance/acp_smoke/environment/Dockerfile +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/conformance/acp_smoke/environment/docker-compose.yaml +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/conformance/acp_smoke/environment/skills/conformance-writer/SKILL.md +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/conformance/acp_smoke/instruction.md +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/conformance/acp_smoke/solution/solve.sh +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/conformance/acp_smoke/task.toml +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/conformance/acp_smoke/tests/test.sh +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/conformance/conformance-results.json +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/conformance/proof_multi_agent.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/conformance/proof_snapshot.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/conformance/run_conformance.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/conformance/self_gen_smoke_skills/skill-creator/SKILL.md +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/conftest.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/environment/__init__.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/environment/test_chibench_manifest.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/environment/test_clawsbench_manifest.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/environment/test_manifest.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/environment/test_manifest_env.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/environment/test_protocol.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/environment/test_readiness.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/examples/hello-world-task/environment/Dockerfile +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/examples/hello-world-task/instruction.md +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/examples/hello-world-task/solution/solve.sh +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/examples/hello-world-task/task.toml +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/examples/hello-world-task/tests/test.sh +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/examples/terminal-bench-smoke-task/environment/Dockerfile +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/examples/terminal-bench-smoke-task/instruction.md +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/examples/terminal-bench-smoke-task/solution/solve.sh +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/examples/terminal-bench-smoke-task/task.toml +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/examples/terminal-bench-smoke-task/tests/test.sh +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/examples/terminal-bench-smoke-task/tests/test_state.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/examples/test_claude.sh +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/examples/test_codex.sh +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/examples/test_codex_custom_provider.sh +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/examples/test_gemini.sh +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/examples/test_openclaw.sh +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/examples/traces/minimal-claude.jsonl +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/examples/traces/minimal-opentraces.jsonl +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/fixtures/mock_acp_agent.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/fixtures/mock_acp_agent_interleaved.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/fixtures/mock_acp_agent_multi_turn.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/fixtures/mock_openai_responses_server.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/integration/check_adapter_evidence.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/integration/check_hosted_env_evidence.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/integration/check_results.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/integration/check_skillsbench_harbor_parity.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/integration/check_trace_to_task_evidence.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/integration/configs/claude-agent-acp.yaml +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/integration/configs/codex-acp.yaml +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/integration/configs/gemini.yaml +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/integration/configs/harvey-lab-harness.yaml +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/integration/configs/openclaw.yaml +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/integration/configs/opencode.yaml +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/integration/configs/openhands.yaml +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/integration/configs/pi-acp.yaml +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/integration/run.sh +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/integration/run_suite.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/integration/suites/release.yaml +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_acp.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_acp_capability_advertising.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_acp_model_config_dispatch.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_acp_pinned_protocol_guard.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_acp_setup_failure_propagation.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_adapter_scripts.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_adapters.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_agent_cli.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_agent_env_resolution.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_agent_gemini_defaults.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_agent_idle_timeout_cli.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_agent_model_decouple.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_agent_registry.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_agent_setup.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_agent_spec.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_base_install_imports.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_bedrock_thinking.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_branch.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_capture_trajectory.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_clawsbench_slice.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_cli_daytona.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_cli_docs_drift.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_cli_misc.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_compat_harbor_registry.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_config_redaction.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_connect_as_env.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_continuallearningbench_adapter.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_dashboard_credential_env_scrub.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_dashboard_daytona_key.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_dashboard_no_host_paths.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_dashboard_release_evidence.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_dashboard_roadmap.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_dashboard_symlink_ingestion.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_dashboard_sync.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_daytona_command_polling.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_daytona_litellm_runtime.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_daytona_status.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_docker_prune_scoping.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_docker_uploads.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_docs_examples.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_eng50_capabilities.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_env_setup.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_environment_manifest_controls.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_eval_filters_applied.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_eval_sharding.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_eval_single_task_summary.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_eval_source_provenance.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_eval_worker_retry.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_eval_zero_task_guard.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_evaluation_environment_manifest.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_exclude_tasks.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_experiments_status.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_harvey_lab_shim.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_hilbench_adapter.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_hosted_env.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_hosted_env_rollout_contract.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_inbound_adapter_manifest.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_inbound_adapters.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_integration_check_results.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_integration_run_suite.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_internet_policy.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_job.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_job_sequential_shared.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_job_sequential_shared_resume.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_judge_symlink_ingestion.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_learner_skills.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_learner_skills_traversal.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_learner_store.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_learner_store_persistence.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_litellm_config.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_litellm_hardening.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_litellm_logging.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_litellm_runtime.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_litellm_smoke.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_llm_judge.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_llm_judge_event_tags.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_llm_judge_verifier.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_memory_scorer.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_metrics.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_mock_openai_responses_server.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_monitor_scaffold.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_native_acp_usage.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_no_cross_provider_fallback.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_notification_order_real.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_oracle.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_oracle_chokepoint.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_paths_safe.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_paths_symlink_helpers.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_pi_acp_launcher.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_process.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_provider_auth_detection.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_providers.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_reexport.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_registry_invariants.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_release_version.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_resolve_env_helpers.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_reward_node.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_reward_unified_contract.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_rewards.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_rewards_jsonl.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_rollout_architecture.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_rollout_branch.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_rollout_config_path_coercion.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_rollout_environment.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_rollout_import_no_side_effects.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_rollout_on_ask_user_wiring.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_rollout_probe_sandbox_health.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_rollout_upload.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_rubric_config.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_runtime.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_runtime_config_wired.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_runtime_live_sandbox.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_sandbox.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_sandbox_exec_secret_handling.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_sandbox_hardening.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_sandbox_isolation_copy_traversal.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_sandbox_multi_service.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_sandbox_protocol.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_sandbox_setup.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_sandbox_snapshot_contract.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_sandbox_upload_symlink.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_sandbox_verifier_workspace.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_scene.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_scene_outbox_trial.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_scene_parallel_group.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_scene_result_aggregation.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_scoring.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_sdk_internals.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_sdk_lockdown.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_self_gen_cli.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_self_gen_export_error_channel.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_self_gen_export_failures.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_self_gen_orchestration.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_session_request_permission_dispatch.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_skill_eval.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_skill_eval_dryrun.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_skill_eval_integration.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_skill_eval_sweep.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_skill_eval_traversal.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_skill_invocation_artifacts.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_skill_policy.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_skills.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_skills_dir_agent_home_link.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_skillsbench_harbor_parity.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_skillsbench_harbor_run_suite.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_smoke.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_subscription_auth.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_task_check_eval_consistency.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_task_config.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_task_download.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_tasks.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_token_usage_normalization.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_trace_import_cli.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_trace_task_gen_traversal.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_trace_to_task_evidence.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_traces_huggingface.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_traces_parsers.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_traces_task_gen.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_train_mode_artifact_emission.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_trajectory_integration.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_trajectory_streaming.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_trial_agent_timeout_verify.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_trial_install_agent_timeout.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_trial_litellm_runtime.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_usage_litellm.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_usage_required.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_usage_tracking.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_user.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_verifier_multi_container.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_verifier_output.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_verifier_output_freshness.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_verify.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_workflow_action_pinning.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/test_yaml_config.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/trajectories/__init__.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/trajectories/test_export.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/trajectories/test_export_nan_handling.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/trajectories/test_redaction.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/trajectories/test_step_granularity.py +0 -0
- {benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/trajectories/test_tree.py +0 -0
|
@@ -2,12 +2,18 @@
|
|
|
2
2
|
|
|
3
3
|
## [Unreleased]
|
|
4
4
|
|
|
5
|
+
## 0.5.1 — 2026-06-05
|
|
6
|
+
|
|
5
7
|
### Added
|
|
6
8
|
|
|
7
9
|
- **Daytona usage telemetry by default** — Daytona runs now start a sandbox-local provider usage proxy so token/cost telemetry works without an external tunnel; use `--usage-tracking off` to bypass proxying when needed.
|
|
8
10
|
- **Azure AI Foundry providers** — new `azure-foundry-openai/` and `azure-foundry-anthropic/` prefixes routing through Foundry's unified resource. Export `AZURE_API_KEY` plus `AZURE_API_ENDPOINT` (e.g. `https://<resource>.openai.azure.com/`); benchflow derives the resource name from the endpoint host, builds the per-surface base URL, and maps the key onto the agent-native auth env automatically. Missing/unrecognized endpoints and unsupported agent/provider protocol pairings fail fast with clear errors instead of falling through to the wrong endpoint.
|
|
9
11
|
- **Azure Foundry auth guidance** — agent discovery output and docs now call out that provider-prefixed models can use provider-specific credentials instead of the agent's native/default API key.
|
|
10
12
|
|
|
13
|
+
### Changed
|
|
14
|
+
|
|
15
|
+
- **PyPI project documentation refresh** — the public package README, install snippets, release-channel docs, examples, and citation metadata now point at `0.5.1`.
|
|
16
|
+
|
|
11
17
|
### Fixed
|
|
12
18
|
|
|
13
19
|
- Inherit `BENCHFLOW_PROVIDER_BASE_URL` / `BENCHFLOW_PROVIDER_API_KEY` from the host environment so self-hosted / OpenAI-compatible endpoints route correctly instead of falling back to `api.openai.com`; empty or whitespace-only host values are skipped so they cannot shadow the resolved provider URL (benchflow-ai/skillsbench#817).
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: benchflow
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.2.dev875
|
|
4
4
|
Summary: Multi-turn agent benchmarking with ACP — run any agent, any model, any provider.
|
|
5
5
|
Project-URL: Homepage, https://github.com/benchflow-ai/benchflow
|
|
6
6
|
Project-URL: Repository, https://github.com/benchflow-ai/benchflow
|
|
@@ -70,7 +70,7 @@ BenchFlow runs AI agents against benchmark tasks in sandboxed environments. Sing
|
|
|
70
70
|
|
|
71
71
|
## Install
|
|
72
72
|
|
|
73
|
-
BenchFlow's current public release is `0.5.
|
|
73
|
+
BenchFlow's current public release is `0.5.1`:
|
|
74
74
|
|
|
75
75
|
```bash
|
|
76
76
|
pip install --upgrade benchflow
|
|
@@ -79,7 +79,7 @@ pip install --upgrade benchflow
|
|
|
79
79
|
For a `uv`-managed CLI install of the public release:
|
|
80
80
|
|
|
81
81
|
```bash
|
|
82
|
-
uv tool install --prerelease allow 'benchflow==0.5.
|
|
82
|
+
uv tool install --prerelease allow 'benchflow==0.5.1'
|
|
83
83
|
```
|
|
84
84
|
|
|
85
85
|
Requires Python 3.12+ and [uv](https://docs.astral.sh/uv/). Set `DAYTONA_API_KEY` for Daytona runs or configure Modal auth for Modal runs; export the relevant agent API key (`GEMINI_API_KEY`, `ANTHROPIC_API_KEY`, etc.) or run `claude login` / `codex --login` for subscription auth. Provider-prefixed models may use provider-specific credentials; Azure Foundry models use `AZURE_API_KEY` plus `AZURE_API_ENDPOINT`.
|
|
@@ -20,7 +20,7 @@ BenchFlow runs AI agents against benchmark tasks in sandboxed environments. Sing
|
|
|
20
20
|
|
|
21
21
|
## Install
|
|
22
22
|
|
|
23
|
-
BenchFlow's current public release is `0.5.
|
|
23
|
+
BenchFlow's current public release is `0.5.1`:
|
|
24
24
|
|
|
25
25
|
```bash
|
|
26
26
|
pip install --upgrade benchflow
|
|
@@ -29,7 +29,7 @@ pip install --upgrade benchflow
|
|
|
29
29
|
For a `uv`-managed CLI install of the public release:
|
|
30
30
|
|
|
31
31
|
```bash
|
|
32
|
-
uv tool install --prerelease allow 'benchflow==0.5.
|
|
32
|
+
uv tool install --prerelease allow 'benchflow==0.5.1'
|
|
33
33
|
```
|
|
34
34
|
|
|
35
35
|
Requires Python 3.12+ and [uv](https://docs.astral.sh/uv/). Set `DAYTONA_API_KEY` for Daytona runs or configure Modal auth for Modal runs; export the relevant agent API key (`GEMINI_API_KEY`, `ANTHROPIC_API_KEY`, etc.) or run `claude login` / `codex --login` for subscription auth. Provider-prefixed models may use provider-specific credentials; Azure Foundry models use `AZURE_API_KEY` plus `AZURE_API_ENDPOINT`.
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/_utils/evaluation_results.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/agents/harvey_lab_acp_shim.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/demo_task/environment/Dockerfile
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/experimental/mcp/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/experimental/mcp/reviewer_server.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/providers/litellm_bedrock_patch.py
RENAMED
|
File without changes
|
|
File without changes
|
{benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/providers/litellm_logging.py
RENAMED
|
File without changes
|
{benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/src/benchflow/providers/litellm_runtime.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/conformance/acp_smoke/environment/Dockerfile
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/conformance/acp_smoke/instruction.md
RENAMED
|
File without changes
|
{benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/conformance/acp_smoke/solution/solve.sh
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/conformance/conformance-results.json
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/environment/test_chibench_manifest.py
RENAMED
|
File without changes
|
{benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/environment/test_clawsbench_manifest.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/examples/hello-world-task/instruction.md
RENAMED
|
File without changes
|
{benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/examples/hello-world-task/solution/solve.sh
RENAMED
|
File without changes
|
|
File without changes
|
{benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/examples/hello-world-task/tests/test.sh
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/examples/terminal-bench-smoke-task/task.toml
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/examples/test_codex_custom_provider.sh
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/examples/traces/minimal-claude.jsonl
RENAMED
|
File without changes
|
{benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/examples/traces/minimal-opentraces.jsonl
RENAMED
|
File without changes
|
|
File without changes
|
{benchflow-0.5.1.dev871 → benchflow-0.5.2.dev875}/tests/fixtures/mock_acp_agent_interleaved.py
RENAMED
|
File without changes
|