mcpbr 0.5.0__tar.gz → 0.6.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mcpbr-0.5.0 → mcpbr-0.6.0}/.claude-plugin/marketplace.json +2 -2
- {mcpbr-0.5.0 → mcpbr-0.6.0}/.claude-plugin/package.json +1 -1
- {mcpbr-0.5.0 → mcpbr-0.6.0}/.claude-plugin/plugin.json +1 -1
- mcpbr-0.6.0/.dockerignore +55 -0
- mcpbr-0.6.0/.gitattributes +2 -0
- mcpbr-0.6.0/.github/workflows/build-binaries.yml +83 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/.gitignore +1 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/.pre-commit-config.yaml +1 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/CHANGELOG.md +33 -0
- mcpbr-0.6.0/Dockerfile.app +61 -0
- mcpbr-0.6.0/Formula/mcpbr.rb +51 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/PKG-INFO +8 -1
- mcpbr-0.6.0/action/README.md +46 -0
- mcpbr-0.6.0/action/action.yml +90 -0
- mcpbr-0.6.0/action/examples/basic.yml +25 -0
- mcpbr-0.6.0/action/examples/matrix.yml +62 -0
- mcpbr-0.6.0/ci-templates/circleci/README.md +56 -0
- mcpbr-0.6.0/ci-templates/circleci/orb.yml +134 -0
- mcpbr-0.6.0/ci-templates/gitlab/.gitlab-ci-mcpbr.yml +69 -0
- mcpbr-0.6.0/ci-templates/gitlab/README.md +44 -0
- mcpbr-0.6.0/conda/README.md +54 -0
- mcpbr-0.6.0/conda/bld.bat +3 -0
- mcpbr-0.6.0/conda/build.sh +5 -0
- mcpbr-0.6.0/conda/meta.yaml +59 -0
- mcpbr-0.6.0/docker/README.md +59 -0
- mcpbr-0.6.0/docker/docker-compose.yml +29 -0
- mcpbr-0.6.0/docker/entrypoint.sh +16 -0
- mcpbr-0.6.0/homebrew/README.md +39 -0
- mcpbr-0.6.0/mcpbr.spec +83 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/package.json +1 -1
- {mcpbr-0.5.0 → mcpbr-0.6.0}/pyproject.toml +4 -1
- mcpbr-0.6.0/src/mcpbr/__init__.py +25 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/config.py +37 -1
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/docker_env.py +2 -1
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/docker_prewarm.py +2 -1
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/dry_run.py +2 -1
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/gpu_support.py +2 -1
- mcpbr-0.6.0/src/mcpbr/graceful_degradation.py +277 -0
- mcpbr-0.6.0/src/mcpbr/languages.py +228 -0
- mcpbr-0.6.0/src/mcpbr/logging_config.py +207 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/models.py +66 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/preflight.py +2 -1
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/pricing.py +72 -0
- mcpbr-0.6.0/src/mcpbr/providers.py +549 -0
- mcpbr-0.6.0/src/mcpbr/sdk.py +264 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/smoke_test.py +2 -1
- mcpbr-0.6.0/tests/test_graceful_degradation.py +621 -0
- mcpbr-0.6.0/tests/test_logging_config.py +595 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_models.py +2 -2
- mcpbr-0.6.0/tests/test_multi_language.py +454 -0
- mcpbr-0.6.0/tests/test_multi_provider.py +625 -0
- mcpbr-0.6.0/tests/test_platform_files.py +706 -0
- mcpbr-0.6.0/tests/test_sdk.py +515 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/uv.lock +305 -5
- mcpbr-0.5.0/src/mcpbr/__init__.py +0 -6
- mcpbr-0.5.0/src/mcpbr/providers.py +0 -236
- {mcpbr-0.5.0 → mcpbr-0.6.0}/.claude/settings.json +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/.claude-plugin/README.md +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/.claude-plugin/skills/README.md +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/.claude-plugin/skills/benchmark-swe-lite/SKILL.md +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/.claude-plugin/skills/mcpbr-config/SKILL.md +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/.claude-plugin/skills/mcpbr-eval/SKILL.md +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/.github/ISSUE_TEMPLATE/config.yml +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/.github/dependabot.yml +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/.github/release-drafter.yml +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/.github/workflows/ci.yml +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/.github/workflows/post-release-bump.yml +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/.github/workflows/publish-npm.yml +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/.github/workflows/publish.yml +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/.github/workflows/release-drafter.yml +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/AGENTS.md +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/CLAUDE.md +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/CODE_OF_CONDUCT.md +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/CONTRIBUTING.md +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/Dockerfile +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/HUMANEVAL_FIX_SUMMARY.md +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/LICENSE +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/Makefile +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/PR_SUMMARY.md +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/README.md +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/SECURITY.md +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/assets/mcpbr-demo.gif +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/assets/mcpbr-eval-results.png +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/assets/mcpbr-logo.jpg +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/bin/mcpbr.js +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/config/example.yaml +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/config/humaneval.yaml +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/config/supermodel.yaml +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/examples/azure-config-example.yaml +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/examples/custom-benchmark.yaml +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/examples/env-vars-example.yaml +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/examples/inheritance/README.md +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/examples/inheritance/base-config.yaml +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/examples/inheritance/dev-config.yaml +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/examples/inheritance/multi-extend-config.yaml +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/examples/inheritance/production-config.yaml +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/examples/inheritance/shared-mcp-settings.yaml +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/examples/local-config-example.yaml +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/examples/quick-start/gsm8k-math-reasoning.yaml +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/examples/quick-start/test-your-mcp-server.yaml +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/install.sh +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/requirements.txt +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/scripts/sync_version.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/scripts/validate_plugin_manifests.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/__main__.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/agent.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/benchmarks/__init__.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/benchmarks/adversarial.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/benchmarks/agentbench.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/benchmarks/aider_polyglot.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/benchmarks/apps.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/benchmarks/arc.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/benchmarks/base.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/benchmarks/bigbench_hard.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/benchmarks/bigcodebench.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/benchmarks/codecontests.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/benchmarks/codereval.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/benchmarks/custom.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/benchmarks/cybergym.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/benchmarks/gaia.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/benchmarks/gsm8k.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/benchmarks/hellaswag.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/benchmarks/humaneval.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/benchmarks/intercode.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/benchmarks/leetcode.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/benchmarks/longbench.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/benchmarks/math_benchmark.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/benchmarks/mbpp.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/benchmarks/mcptoolbench.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/benchmarks/mlagentbench.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/benchmarks/mmmu.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/benchmarks/repoqa.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/benchmarks/swebench.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/benchmarks/terminalbench.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/benchmarks/toolbench.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/benchmarks/truthfulqa.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/benchmarks/webarena.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/cache.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/cli.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/config_inheritance.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/config_migration.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/config_validator.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/config_wizard.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/custom_metrics.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/dashboard.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/data/templates/brave-search.yaml +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/data/templates/filesystem.yaml +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/data/templates/github.yaml +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/data/templates/google-maps.yaml +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/data/templates/postgres.yaml +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/data/templates/slack.yaml +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/data/templates/sqlite.yaml +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/dataset_streaming.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/dataset_versioning.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/docker_cache.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/env_expansion.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/evaluation.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/failure_analysis.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/few_shot.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/formatting.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/harness.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/harnesses.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/incremental_save.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/infrastructure/__init__.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/infrastructure/azure.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/infrastructure/azure_health.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/infrastructure/base.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/infrastructure/local.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/infrastructure/manager.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/junit_reporter.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/latency_metrics.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/log_formatter.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/output_validator.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/profiler.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/regression.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/reporting.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/resource_limits.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/result_streaming.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/sampling.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/schema.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/state_tracker.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/statistics.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/streaming.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/swebench_test_specs.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/task_batching.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/task_scheduler.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/src/mcpbr/templates.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/__init__.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/infrastructure/__init__.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/infrastructure/test_azure.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/infrastructure/test_azure_health.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/infrastructure/test_base.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/infrastructure/test_cli_infrastructure.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/infrastructure/test_config.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/infrastructure/test_local.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/infrastructure/test_manager.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_adversarial_benchmark.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_agent.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_benchmark_filtering.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_benchmark_integration.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_benchmarks.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_build_test_command.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_cache.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_claude_plugin.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_cli_templates.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_comparison_aggregation.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_comparison_config.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_comparison_integration.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_comparison_reporting.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_config.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_config_env_vars.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_config_inheritance.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_config_migration.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_config_validator.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_config_validator_inheritance.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_config_wizard.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_cost_calculation.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_custom_benchmark.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_custom_metrics.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_dashboard.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_dataset_streaming.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_dataset_versioning.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_default_logging.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_docker_cache.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_docker_cleanup.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_docker_label_fix.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_docker_prewarm.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_docker_retry.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_dry_run.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_env_expansion.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_error_messages.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_evaluation.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_exit_codes.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_export.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_failure_analysis.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_few_shot.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_formatting.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_git_diff_new_files.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_gpu_support.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_incremental_save.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_integration.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_junit_reporter.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_latency_metrics.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_log_formatter_read_tool.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_longbench_benchmark.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_mcp_health_check.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_mcp_logging.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_mcptoolbench_benchmark.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_mmmu_benchmark.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_output_validator.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_parse_errors.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_preflight.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_pricing.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_profiler.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_regression.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_reporting.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_resource_limits.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_result_streaming.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_runtime_tracking.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_sampling.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_schema.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_smoke_test.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_state_tracker.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_statistics.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_statistics_integration.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_streaming.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_string_concat_bug.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_task_batching.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_task_scheduler.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_templates.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_thinking_budget.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_timeout_tracking.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_tool_failure_tracking.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_trial_mode.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_type_safety.py +0 -0
- {mcpbr-0.5.0 → mcpbr-0.6.0}/tests/test_xml_export.py +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"$schema": "https://anthropic.com/claude-code/marketplace.schema.json",
|
|
3
3
|
"name": "mcpbr",
|
|
4
|
-
"version": "0.
|
|
4
|
+
"version": "0.6.0",
|
|
5
5
|
"description": "mcpbr - MCP Benchmark Runner plugin marketplace",
|
|
6
6
|
"owner": {
|
|
7
7
|
"name": "mcpbr Contributors",
|
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
{
|
|
12
12
|
"name": "mcpbr",
|
|
13
13
|
"description": "Expert benchmark runner for MCP servers using mcpbr. Handles Docker checks, config generation, and result parsing.",
|
|
14
|
-
"version": "0.
|
|
14
|
+
"version": "0.6.0",
|
|
15
15
|
"author": {
|
|
16
16
|
"name": "mcpbr Contributors"
|
|
17
17
|
},
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# Git
|
|
2
|
+
.git
|
|
3
|
+
.gitignore
|
|
4
|
+
|
|
5
|
+
# Python
|
|
6
|
+
__pycache__
|
|
7
|
+
*.pyc
|
|
8
|
+
*.pyo
|
|
9
|
+
*.egg-info
|
|
10
|
+
dist/
|
|
11
|
+
build/
|
|
12
|
+
.eggs/
|
|
13
|
+
*.egg
|
|
14
|
+
|
|
15
|
+
# Virtual environments
|
|
16
|
+
.venv
|
|
17
|
+
venv
|
|
18
|
+
env
|
|
19
|
+
|
|
20
|
+
# IDE
|
|
21
|
+
.vscode
|
|
22
|
+
.idea
|
|
23
|
+
*.swp
|
|
24
|
+
*.swo
|
|
25
|
+
|
|
26
|
+
# Testing
|
|
27
|
+
.pytest_cache
|
|
28
|
+
.coverage
|
|
29
|
+
htmlcov/
|
|
30
|
+
.tox
|
|
31
|
+
|
|
32
|
+
# Documentation
|
|
33
|
+
docs/
|
|
34
|
+
site/
|
|
35
|
+
mkdocs.yml
|
|
36
|
+
|
|
37
|
+
# CI/CD
|
|
38
|
+
.github/
|
|
39
|
+
ci-templates/
|
|
40
|
+
|
|
41
|
+
# Docker (avoid recursive copies)
|
|
42
|
+
docker/results/
|
|
43
|
+
|
|
44
|
+
# Node
|
|
45
|
+
node_modules/
|
|
46
|
+
npm-debug.log
|
|
47
|
+
|
|
48
|
+
# OS
|
|
49
|
+
.DS_Store
|
|
50
|
+
Thumbs.db
|
|
51
|
+
|
|
52
|
+
# Secrets
|
|
53
|
+
.env
|
|
54
|
+
.env.*
|
|
55
|
+
credentials.json
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
name: Build Binaries
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
release:
|
|
5
|
+
types: [published]
|
|
6
|
+
workflow_dispatch:
|
|
7
|
+
inputs:
|
|
8
|
+
tag:
|
|
9
|
+
description: "Release tag to build (e.g., v0.6.0)"
|
|
10
|
+
required: false
|
|
11
|
+
type: string
|
|
12
|
+
|
|
13
|
+
permissions:
|
|
14
|
+
contents: write
|
|
15
|
+
|
|
16
|
+
jobs:
|
|
17
|
+
build:
|
|
18
|
+
runs-on: ${{ matrix.os }}
|
|
19
|
+
strategy:
|
|
20
|
+
fail-fast: false
|
|
21
|
+
matrix:
|
|
22
|
+
include:
|
|
23
|
+
- os: ubuntu-latest
|
|
24
|
+
platform: linux
|
|
25
|
+
arch: x86_64
|
|
26
|
+
artifact-name: mcpbr-linux-x86_64
|
|
27
|
+
- os: macos-latest
|
|
28
|
+
platform: macos
|
|
29
|
+
arch: arm64
|
|
30
|
+
artifact-name: mcpbr-macos-arm64
|
|
31
|
+
- os: macos-13-large
|
|
32
|
+
platform: macos
|
|
33
|
+
arch: x86_64
|
|
34
|
+
artifact-name: mcpbr-macos-x86_64
|
|
35
|
+
- os: windows-latest
|
|
36
|
+
platform: windows
|
|
37
|
+
arch: x86_64
|
|
38
|
+
artifact-name: mcpbr-windows-x86_64
|
|
39
|
+
|
|
40
|
+
steps:
|
|
41
|
+
- uses: actions/checkout@v4
|
|
42
|
+
with:
|
|
43
|
+
ref: ${{ github.event.inputs.tag || github.ref }}
|
|
44
|
+
|
|
45
|
+
- name: Set up Python
|
|
46
|
+
uses: actions/setup-python@v5
|
|
47
|
+
with:
|
|
48
|
+
python-version: "3.11"
|
|
49
|
+
|
|
50
|
+
- name: Install dependencies
|
|
51
|
+
run: |
|
|
52
|
+
python -m pip install --upgrade pip
|
|
53
|
+
pip install pyinstaller
|
|
54
|
+
pip install -e "."
|
|
55
|
+
|
|
56
|
+
- name: Build binary (Unix)
|
|
57
|
+
if: matrix.platform != 'windows'
|
|
58
|
+
run: |
|
|
59
|
+
pyinstaller mcpbr.spec
|
|
60
|
+
cd dist
|
|
61
|
+
tar -czf ${{ matrix.artifact-name }}.tar.gz mcpbr
|
|
62
|
+
cd ..
|
|
63
|
+
|
|
64
|
+
- name: Build binary (Windows)
|
|
65
|
+
if: matrix.platform == 'windows'
|
|
66
|
+
run: |
|
|
67
|
+
pyinstaller mcpbr.spec
|
|
68
|
+
cd dist
|
|
69
|
+
Compress-Archive -Path mcpbr.exe -DestinationPath ${{ matrix.artifact-name }}.zip
|
|
70
|
+
cd ..
|
|
71
|
+
shell: pwsh
|
|
72
|
+
|
|
73
|
+
- name: Upload artifact
|
|
74
|
+
uses: actions/upload-artifact@v4
|
|
75
|
+
with:
|
|
76
|
+
name: ${{ matrix.artifact-name }}
|
|
77
|
+
path: dist/mcpbr-*.*
|
|
78
|
+
|
|
79
|
+
- name: Upload to release
|
|
80
|
+
if: github.event_name == 'release'
|
|
81
|
+
uses: softprops/action-gh-release@v2
|
|
82
|
+
with:
|
|
83
|
+
files: dist/mcpbr-*.*
|
|
@@ -7,6 +7,37 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
7
7
|
|
|
8
8
|
## [Unreleased]
|
|
9
9
|
|
|
10
|
+
## [0.6.0] - 2026-02-05
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
|
|
14
|
+
- **Graceful degradation** (#70): Fault-tolerant task execution with failure isolation, classification (transient/permanent/unknown), configurable `continue_on_error` and `max_failures` policies, execution checkpointing for crash recovery, and partial report generation
|
|
15
|
+
- New config fields: `continue_on_error`, `max_failures`, `checkpoint_interval`, `resume_from_checkpoint`
|
|
16
|
+
- **Multi-provider support** (#229): Added OpenAI, Google Gemini, and Alibaba Qwen as model providers alongside Anthropic
|
|
17
|
+
- `OpenAIProvider` for GPT-4o, GPT-4 Turbo, and GPT-4o Mini models
|
|
18
|
+
- `GeminiProvider` for Gemini 2.0 Flash, Gemini 1.5 Pro, and Gemini 1.5 Flash models
|
|
19
|
+
- `QwenProvider` for Qwen Plus, Qwen Turbo, and Qwen Max models via DashScope API
|
|
20
|
+
- New optional dependencies: `openai`, `gemini`, `all-providers` extras
|
|
21
|
+
- Pricing data for all 9 new models
|
|
22
|
+
- Model registry entries with context window and tool support metadata
|
|
23
|
+
- **Multi-language support** (#230): Cross-language benchmark execution for Python, JavaScript, TypeScript, Java, and Go
|
|
24
|
+
- Per-language Docker images, run/compile commands, and test framework configs
|
|
25
|
+
- Automatic language detection from filenames and code patterns
|
|
26
|
+
- Cross-language metrics aggregation
|
|
27
|
+
- **Structured logging** (#231): JSON and human-readable log formatters with contextual metadata
|
|
28
|
+
- File rotation, configurable log levels via `MCPBR_LOG_LEVEL` env var
|
|
29
|
+
- `LogContext` context manager for injecting task/benchmark fields into log records
|
|
30
|
+
- **Public Python SDK** (#232): Programmatic API for configuring and running benchmarks
|
|
31
|
+
- `MCPBenchmark` class with config from dict, YAML, or `HarnessConfig`
|
|
32
|
+
- `list_benchmarks()`, `list_providers()`, `list_models()`, `get_version()` helpers
|
|
33
|
+
- Exported in top-level `mcpbr` package for `from mcpbr import MCPBenchmark`
|
|
34
|
+
- **Platform distribution files**: Docker, Conda, Homebrew, GitHub Actions, and CI templates
|
|
35
|
+
- `Dockerfile.app` multi-stage build for container deployment
|
|
36
|
+
- `docker/docker-compose.yml` for multi-container orchestration
|
|
37
|
+
- `conda/meta.yaml` recipe for Conda packaging
|
|
38
|
+
- `action/action.yml` GitHub Action with basic and matrix examples
|
|
39
|
+
- `ci-templates/` for GitLab CI and CircleCI integration
|
|
40
|
+
|
|
10
41
|
## [0.5.0] - 2026-02-05
|
|
11
42
|
|
|
12
43
|
### Added
|
|
@@ -751,6 +782,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
751
782
|
[0.3.14]: https://github.com/greynewell/mcpbr/releases/tag/v0.3.14
|
|
752
783
|
[0.3.13]: https://github.com/greynewell/mcpbr/releases/tag/v0.3.13
|
|
753
784
|
[0.3.12]: https://github.com/greynewell/mcpbr/releases/tag/v0.3.12
|
|
785
|
+
[0.6.0]: https://github.com/greynewell/mcpbr/releases/tag/v0.6.0
|
|
786
|
+
[0.5.0]: https://github.com/greynewell/mcpbr/releases/tag/v0.5.0
|
|
754
787
|
[0.4.16]: https://github.com/greynewell/mcpbr/releases/tag/v0.4.16
|
|
755
788
|
[0.3.11]: https://github.com/greynewell/mcpbr/releases/tag/v0.3.11
|
|
756
789
|
[0.3.10]: https://github.com/greynewell/mcpbr/releases/tag/v0.3.10
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# Multi-stage Dockerfile for running mcpbr CLI
|
|
2
|
+
# This is NOT the task environment Dockerfile - see Dockerfile for that.
|
|
3
|
+
# This image packages the mcpbr benchmark runner itself.
|
|
4
|
+
|
|
5
|
+
# Stage 1: Build
|
|
6
|
+
FROM python:3.11-slim AS builder
|
|
7
|
+
|
|
8
|
+
WORKDIR /build
|
|
9
|
+
|
|
10
|
+
# Install build dependencies
|
|
11
|
+
RUN pip install --no-cache-dir build hatchling
|
|
12
|
+
|
|
13
|
+
# Copy project files
|
|
14
|
+
COPY pyproject.toml README.md LICENSE ./
|
|
15
|
+
COPY src/ src/
|
|
16
|
+
|
|
17
|
+
# Build the wheel
|
|
18
|
+
RUN python -m build --wheel --outdir /build/dist
|
|
19
|
+
|
|
20
|
+
# Stage 2: Runtime
|
|
21
|
+
FROM python:3.11-slim
|
|
22
|
+
|
|
23
|
+
LABEL maintainer="mcpbr Contributors"
|
|
24
|
+
LABEL org.opencontainers.image.source="https://github.com/greynewell/mcpbr"
|
|
25
|
+
LABEL org.opencontainers.image.description="MCP Benchmark Runner - evaluate MCP servers against software engineering benchmarks"
|
|
26
|
+
LABEL org.opencontainers.image.licenses="MIT"
|
|
27
|
+
|
|
28
|
+
# Install runtime system dependencies
|
|
29
|
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
30
|
+
git \
|
|
31
|
+
curl \
|
|
32
|
+
ca-certificates \
|
|
33
|
+
&& rm -rf /var/lib/apt/lists/*
|
|
34
|
+
|
|
35
|
+
# Create non-root user
|
|
36
|
+
RUN groupadd --gid 1000 mcpbr && \
|
|
37
|
+
useradd --uid 1000 --gid mcpbr --shell /bin/bash --create-home mcpbr
|
|
38
|
+
|
|
39
|
+
WORKDIR /home/mcpbr
|
|
40
|
+
|
|
41
|
+
# Install the built wheel from the builder stage
|
|
42
|
+
COPY --from=builder /build/dist/*.whl /tmp/
|
|
43
|
+
RUN pip install --no-cache-dir /tmp/*.whl && \
|
|
44
|
+
rm -f /tmp/*.whl
|
|
45
|
+
|
|
46
|
+
# Copy entrypoint
|
|
47
|
+
COPY docker/entrypoint.sh /usr/local/bin/entrypoint.sh
|
|
48
|
+
RUN chmod +x /usr/local/bin/entrypoint.sh
|
|
49
|
+
|
|
50
|
+
# Create directories for configs and results
|
|
51
|
+
RUN mkdir -p /home/mcpbr/configs /home/mcpbr/results && \
|
|
52
|
+
chown -R mcpbr:mcpbr /home/mcpbr
|
|
53
|
+
|
|
54
|
+
# Switch to non-root user
|
|
55
|
+
USER mcpbr
|
|
56
|
+
|
|
57
|
+
# Mount points for user configs and results
|
|
58
|
+
VOLUME ["/home/mcpbr/configs", "/home/mcpbr/results"]
|
|
59
|
+
|
|
60
|
+
ENTRYPOINT ["entrypoint.sh"]
|
|
61
|
+
CMD ["--help"]
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
class Mcpbr < Formula
|
|
2
|
+
include Language::Python::Virtualenv
|
|
3
|
+
|
|
4
|
+
desc "Model Context Protocol Benchmark Runner - evaluate MCP servers against software engineering benchmarks"
|
|
5
|
+
homepage "https://github.com/greynewell/mcpbr"
|
|
6
|
+
# NOTE: Update URL and sha256 when publishing a release.
|
|
7
|
+
# Run: curl -sL <url> | shasum -a 256
|
|
8
|
+
url "https://files.pythonhosted.org/packages/source/m/mcpbr/mcpbr-0.6.0.tar.gz"
|
|
9
|
+
sha256 "PLACEHOLDER_SHA256_REPLACE_ON_RELEASE"
|
|
10
|
+
license "MIT"
|
|
11
|
+
|
|
12
|
+
depends_on "python@3.11"
|
|
13
|
+
|
|
14
|
+
resource "anthropic" do
|
|
15
|
+
url "https://files.pythonhosted.org/packages/source/a/anthropic/anthropic-0.40.0.tar.gz"
|
|
16
|
+
sha256 "PLACEHOLDER_SHA256"
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
resource "click" do
|
|
20
|
+
url "https://files.pythonhosted.org/packages/source/c/click/click-8.1.7.tar.gz"
|
|
21
|
+
sha256 "PLACEHOLDER_SHA256"
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
resource "docker" do
|
|
25
|
+
url "https://files.pythonhosted.org/packages/source/d/docker/docker-7.0.0.tar.gz"
|
|
26
|
+
sha256 "PLACEHOLDER_SHA256"
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
resource "pydantic" do
|
|
30
|
+
url "https://files.pythonhosted.org/packages/source/p/pydantic/pydantic-2.0.0.tar.gz"
|
|
31
|
+
sha256 "PLACEHOLDER_SHA256"
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
resource "pyyaml" do
|
|
35
|
+
url "https://files.pythonhosted.org/packages/source/p/PyYAML/PyYAML-6.0.1.tar.gz"
|
|
36
|
+
sha256 "PLACEHOLDER_SHA256"
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
resource "rich" do
|
|
40
|
+
url "https://files.pythonhosted.org/packages/source/r/rich/rich-13.0.0.tar.gz"
|
|
41
|
+
sha256 "PLACEHOLDER_SHA256"
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def install
|
|
45
|
+
virtualenv_install_with_resources
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
test do
|
|
49
|
+
assert_match "mcpbr", shell_output("#{bin}/mcpbr --version")
|
|
50
|
+
end
|
|
51
|
+
end
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mcpbr
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.6.0
|
|
4
4
|
Summary: Model Context Protocol Benchmark Runner - evaluate MCP servers against software engineering benchmarks
|
|
5
5
|
Project-URL: Homepage, https://github.com/greynewell/mcpbr
|
|
6
6
|
Project-URL: Repository, https://github.com/greynewell/mcpbr
|
|
@@ -30,6 +30,9 @@ Requires-Dist: pydantic>=2.0.0
|
|
|
30
30
|
Requires-Dist: pyyaml>=6.0.0
|
|
31
31
|
Requires-Dist: requests>=2.31.0
|
|
32
32
|
Requires-Dist: rich>=13.0.0
|
|
33
|
+
Provides-Extra: all-providers
|
|
34
|
+
Requires-Dist: google-generativeai>=0.3.0; extra == 'all-providers'
|
|
35
|
+
Requires-Dist: openai>=1.0.0; extra == 'all-providers'
|
|
33
36
|
Provides-Extra: dev
|
|
34
37
|
Requires-Dist: pre-commit>=3.0.0; extra == 'dev'
|
|
35
38
|
Requires-Dist: pytest-asyncio>=0.21.0; extra == 'dev'
|
|
@@ -40,6 +43,10 @@ Requires-Dist: mkdocs-material>=9.5.0; extra == 'docs'
|
|
|
40
43
|
Requires-Dist: mkdocs-minify-plugin>=0.7.0; extra == 'docs'
|
|
41
44
|
Requires-Dist: mkdocs>=1.5.0; extra == 'docs'
|
|
42
45
|
Requires-Dist: mkdocstrings[python]>=0.24.0; extra == 'docs'
|
|
46
|
+
Provides-Extra: gemini
|
|
47
|
+
Requires-Dist: google-generativeai>=0.3.0; extra == 'gemini'
|
|
48
|
+
Provides-Extra: openai
|
|
49
|
+
Requires-Dist: openai>=1.0.0; extra == 'openai'
|
|
43
50
|
Description-Content-Type: text/markdown
|
|
44
51
|
|
|
45
52
|
# mcpbr
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# mcpbr GitHub Action
|
|
2
|
+
|
|
3
|
+
A composite GitHub Action to run MCP server benchmarks in your CI/CD pipeline.
|
|
4
|
+
|
|
5
|
+
## Usage
|
|
6
|
+
|
|
7
|
+
```yaml
|
|
8
|
+
- uses: greynewell/mcpbr@main
|
|
9
|
+
with:
|
|
10
|
+
config: benchmarks/config.yaml
|
|
11
|
+
anthropic-api-key: ${{ secrets.ANTHROPIC_API_KEY }}
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
## Inputs
|
|
15
|
+
|
|
16
|
+
| Input | Description | Required | Default |
|
|
17
|
+
|---|---|---|---|
|
|
18
|
+
| `config` | Path to benchmark configuration YAML | Yes | - |
|
|
19
|
+
| `version` | mcpbr version to install | No | `latest` |
|
|
20
|
+
| `python-version` | Python version to use | No | `3.11` |
|
|
21
|
+
| `output-dir` | Directory for results | No | `results` |
|
|
22
|
+
| `extra-args` | Additional CLI arguments | No | `""` |
|
|
23
|
+
| `anthropic-api-key` | Anthropic API key | No | - |
|
|
24
|
+
| `openai-api-key` | OpenAI API key | No | - |
|
|
25
|
+
|
|
26
|
+
## Outputs
|
|
27
|
+
|
|
28
|
+
| Output | Description |
|
|
29
|
+
|---|---|
|
|
30
|
+
| `results-path` | Path to the results directory |
|
|
31
|
+
| `exit-code` | Exit code from the benchmark run |
|
|
32
|
+
|
|
33
|
+
## Examples
|
|
34
|
+
|
|
35
|
+
See the [examples](examples/) directory for:
|
|
36
|
+
|
|
37
|
+
- [basic.yml](examples/basic.yml) - Simple benchmark run on push
|
|
38
|
+
- [matrix.yml](examples/matrix.yml) - Matrix builds across benchmarks and Python versions
|
|
39
|
+
|
|
40
|
+
## Security
|
|
41
|
+
|
|
42
|
+
Always use GitHub Secrets for API keys. Never hardcode them in workflow files.
|
|
43
|
+
|
|
44
|
+
```yaml
|
|
45
|
+
anthropic-api-key: ${{ secrets.ANTHROPIC_API_KEY }}
|
|
46
|
+
```
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
name: "mcpbr Benchmark Runner"
|
|
2
|
+
description: "Run MCP server benchmarks in your CI/CD pipeline using mcpbr"
|
|
3
|
+
author: "mcpbr Contributors"
|
|
4
|
+
|
|
5
|
+
branding:
|
|
6
|
+
icon: "bar-chart-2"
|
|
7
|
+
color: "blue"
|
|
8
|
+
|
|
9
|
+
inputs:
|
|
10
|
+
config:
|
|
11
|
+
description: "Path to benchmark configuration YAML file"
|
|
12
|
+
required: true
|
|
13
|
+
version:
|
|
14
|
+
description: "Version of mcpbr to install (e.g., '0.6.0' or 'latest')"
|
|
15
|
+
required: false
|
|
16
|
+
default: "latest"
|
|
17
|
+
python-version:
|
|
18
|
+
description: "Python version to use"
|
|
19
|
+
required: false
|
|
20
|
+
default: "3.11"
|
|
21
|
+
output-dir:
|
|
22
|
+
description: "Directory for benchmark results"
|
|
23
|
+
required: false
|
|
24
|
+
default: "results"
|
|
25
|
+
extra-args:
|
|
26
|
+
description: "Additional arguments to pass to mcpbr run"
|
|
27
|
+
required: false
|
|
28
|
+
default: ""
|
|
29
|
+
anthropic-api-key:
|
|
30
|
+
description: "Anthropic API key (prefer using secrets)"
|
|
31
|
+
required: false
|
|
32
|
+
openai-api-key:
|
|
33
|
+
description: "OpenAI API key (prefer using secrets)"
|
|
34
|
+
required: false
|
|
35
|
+
|
|
36
|
+
outputs:
|
|
37
|
+
results-path:
|
|
38
|
+
description: "Path to the results directory"
|
|
39
|
+
value: ${{ steps.run-benchmark.outputs.results-path }}
|
|
40
|
+
exit-code:
|
|
41
|
+
description: "Exit code from mcpbr run"
|
|
42
|
+
value: ${{ steps.run-benchmark.outputs.exit-code }}
|
|
43
|
+
|
|
44
|
+
runs:
|
|
45
|
+
using: "composite"
|
|
46
|
+
steps:
|
|
47
|
+
- name: Set up Python
|
|
48
|
+
uses: actions/setup-python@v5
|
|
49
|
+
with:
|
|
50
|
+
python-version: ${{ inputs.python-version }}
|
|
51
|
+
|
|
52
|
+
- name: Install mcpbr
|
|
53
|
+
shell: bash
|
|
54
|
+
run: |
|
|
55
|
+
python -m pip install --upgrade pip
|
|
56
|
+
if [ "${{ inputs.version }}" = "latest" ]; then
|
|
57
|
+
pip install mcpbr
|
|
58
|
+
else
|
|
59
|
+
pip install "mcpbr==${{ inputs.version }}"
|
|
60
|
+
fi
|
|
61
|
+
|
|
62
|
+
- name: Create output directory
|
|
63
|
+
shell: bash
|
|
64
|
+
run: mkdir -p "${{ inputs.output-dir }}"
|
|
65
|
+
|
|
66
|
+
- name: Run benchmarks
|
|
67
|
+
id: run-benchmark
|
|
68
|
+
shell: bash
|
|
69
|
+
env:
|
|
70
|
+
ANTHROPIC_API_KEY: ${{ inputs.anthropic-api-key }}
|
|
71
|
+
OPENAI_API_KEY: ${{ inputs.openai-api-key }}
|
|
72
|
+
run: |
|
|
73
|
+
set +e
|
|
74
|
+
mcpbr run \
|
|
75
|
+
--config "${{ inputs.config }}" \
|
|
76
|
+
--output-dir "${{ inputs.output-dir }}" \
|
|
77
|
+
${{ inputs.extra-args }}
|
|
78
|
+
EXIT_CODE=$?
|
|
79
|
+
set -e
|
|
80
|
+
echo "exit-code=${EXIT_CODE}" >> "$GITHUB_OUTPUT"
|
|
81
|
+
echo "results-path=${{ inputs.output-dir }}" >> "$GITHUB_OUTPUT"
|
|
82
|
+
exit ${EXIT_CODE}
|
|
83
|
+
|
|
84
|
+
- name: Upload results artifact
|
|
85
|
+
if: always()
|
|
86
|
+
uses: actions/upload-artifact@v4
|
|
87
|
+
with:
|
|
88
|
+
name: mcpbr-results
|
|
89
|
+
path: ${{ inputs.output-dir }}
|
|
90
|
+
retention-days: 30
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# Basic example: Run mcpbr benchmarks on push to main
|
|
2
|
+
name: MCP Benchmark
|
|
3
|
+
|
|
4
|
+
on:
|
|
5
|
+
push:
|
|
6
|
+
branches: [main]
|
|
7
|
+
workflow_dispatch:
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
benchmark:
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
steps:
|
|
13
|
+
- uses: actions/checkout@v4
|
|
14
|
+
|
|
15
|
+
- name: Run MCP benchmarks
|
|
16
|
+
uses: greynewell/mcpbr@main
|
|
17
|
+
with:
|
|
18
|
+
config: benchmarks/config.yaml
|
|
19
|
+
anthropic-api-key: ${{ secrets.ANTHROPIC_API_KEY }}
|
|
20
|
+
output-dir: results
|
|
21
|
+
|
|
22
|
+
- name: Check results
|
|
23
|
+
run: |
|
|
24
|
+
echo "Benchmark results saved to results/"
|
|
25
|
+
ls -la results/
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
# Matrix example: Run benchmarks across multiple configurations
|
|
2
|
+
name: MCP Benchmark Matrix
|
|
3
|
+
|
|
4
|
+
on:
|
|
5
|
+
pull_request:
|
|
6
|
+
branches: [main]
|
|
7
|
+
schedule:
|
|
8
|
+
- cron: "0 6 * * 1" # Weekly on Monday at 6am UTC
|
|
9
|
+
|
|
10
|
+
jobs:
|
|
11
|
+
benchmark:
|
|
12
|
+
runs-on: ubuntu-latest
|
|
13
|
+
strategy:
|
|
14
|
+
fail-fast: false
|
|
15
|
+
matrix:
|
|
16
|
+
benchmark:
|
|
17
|
+
- swebench
|
|
18
|
+
- humaneval
|
|
19
|
+
- mbpp
|
|
20
|
+
python-version:
|
|
21
|
+
- "3.11"
|
|
22
|
+
- "3.12"
|
|
23
|
+
|
|
24
|
+
steps:
|
|
25
|
+
- uses: actions/checkout@v4
|
|
26
|
+
|
|
27
|
+
- name: Run ${{ matrix.benchmark }} benchmark
|
|
28
|
+
uses: greynewell/mcpbr@main
|
|
29
|
+
with:
|
|
30
|
+
config: benchmarks/${{ matrix.benchmark }}.yaml
|
|
31
|
+
python-version: ${{ matrix.python-version }}
|
|
32
|
+
anthropic-api-key: ${{ secrets.ANTHROPIC_API_KEY }}
|
|
33
|
+
output-dir: results-${{ matrix.benchmark }}-py${{ matrix.python-version }}
|
|
34
|
+
|
|
35
|
+
- name: Upload individual results
|
|
36
|
+
uses: actions/upload-artifact@v4
|
|
37
|
+
with:
|
|
38
|
+
name: results-${{ matrix.benchmark }}-py${{ matrix.python-version }}
|
|
39
|
+
path: results-${{ matrix.benchmark }}-py${{ matrix.python-version }}
|
|
40
|
+
|
|
41
|
+
aggregate:
|
|
42
|
+
needs: benchmark
|
|
43
|
+
runs-on: ubuntu-latest
|
|
44
|
+
if: always()
|
|
45
|
+
steps:
|
|
46
|
+
- name: Download all results
|
|
47
|
+
uses: actions/download-artifact@v4
|
|
48
|
+
with:
|
|
49
|
+
path: all-results
|
|
50
|
+
|
|
51
|
+
- name: Summary
|
|
52
|
+
run: |
|
|
53
|
+
echo "## Benchmark Results" >> $GITHUB_STEP_SUMMARY
|
|
54
|
+
for dir in all-results/results-*; do
|
|
55
|
+
benchmark=$(basename "$dir")
|
|
56
|
+
echo "### ${benchmark}" >> $GITHUB_STEP_SUMMARY
|
|
57
|
+
for json_file in "${dir}"/*.json; do
|
|
58
|
+
if [ -f "$json_file" ]; then
|
|
59
|
+
cat "$json_file" >> $GITHUB_STEP_SUMMARY
|
|
60
|
+
fi
|
|
61
|
+
done
|
|
62
|
+
done
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
# CircleCI Orb for mcpbr
|
|
2
|
+
|
|
3
|
+
Run MCP server benchmarks in your CircleCI pipeline.
|
|
4
|
+
|
|
5
|
+
## Quick Start
|
|
6
|
+
|
|
7
|
+
```yaml
|
|
8
|
+
version: 2.1
|
|
9
|
+
|
|
10
|
+
orbs:
|
|
11
|
+
mcpbr: greynewell/mcpbr@1.0
|
|
12
|
+
|
|
13
|
+
workflows:
|
|
14
|
+
benchmark:
|
|
15
|
+
jobs:
|
|
16
|
+
- mcpbr/benchmark:
|
|
17
|
+
config: benchmarks/config.yaml
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
## Commands
|
|
21
|
+
|
|
22
|
+
| Command | Description |
|
|
23
|
+
|---|---|
|
|
24
|
+
| `install` | Install mcpbr (with optional version) |
|
|
25
|
+
| `run-benchmark` | Run benchmarks with a config file |
|
|
26
|
+
| `smoke-test` | Quick verification that mcpbr works |
|
|
27
|
+
|
|
28
|
+
## Jobs
|
|
29
|
+
|
|
30
|
+
| Job | Description |
|
|
31
|
+
|---|---|
|
|
32
|
+
| `benchmark` | Full benchmark run with install + execute |
|
|
33
|
+
| `smoke-test` | Quick smoke test |
|
|
34
|
+
|
|
35
|
+
## Parameters
|
|
36
|
+
|
|
37
|
+
### `install`
|
|
38
|
+
|
|
39
|
+
| Parameter | Type | Default | Description |
|
|
40
|
+
|---|---|---|---|
|
|
41
|
+
| `version` | string | `latest` | mcpbr version to install |
|
|
42
|
+
|
|
43
|
+
### `run-benchmark`
|
|
44
|
+
|
|
45
|
+
| Parameter | Type | Default | Description |
|
|
46
|
+
|---|---|---|---|
|
|
47
|
+
| `config` | string | (required) | Path to config YAML |
|
|
48
|
+
| `output-dir` | string | `results` | Results directory |
|
|
49
|
+
| `extra-args` | string | `""` | Additional CLI args |
|
|
50
|
+
|
|
51
|
+
## Environment Variables
|
|
52
|
+
|
|
53
|
+
Set these in your CircleCI project settings:
|
|
54
|
+
|
|
55
|
+
- `ANTHROPIC_API_KEY`
|
|
56
|
+
- `OPENAI_API_KEY`
|