mcpbr 0.4.12__tar.gz → 0.4.13__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (196) hide show
  1. {mcpbr-0.4.12 → mcpbr-0.4.13}/.claude-plugin/marketplace.json +2 -2
  2. {mcpbr-0.4.12 → mcpbr-0.4.13}/.claude-plugin/package.json +1 -1
  3. {mcpbr-0.4.12 → mcpbr-0.4.13}/.claude-plugin/plugin.json +1 -1
  4. {mcpbr-0.4.12 → mcpbr-0.4.13}/PKG-INFO +1 -1
  5. {mcpbr-0.4.12 → mcpbr-0.4.13}/package.json +1 -1
  6. {mcpbr-0.4.12 → mcpbr-0.4.13}/pyproject.toml +1 -1
  7. {mcpbr-0.4.12 → mcpbr-0.4.13}/src/mcpbr/harnesses.py +61 -98
  8. {mcpbr-0.4.12 → mcpbr-0.4.13}/.claude/settings.json +0 -0
  9. {mcpbr-0.4.12 → mcpbr-0.4.13}/.claude-plugin/README.md +0 -0
  10. {mcpbr-0.4.12 → mcpbr-0.4.13}/.claude-plugin/skills/README.md +0 -0
  11. {mcpbr-0.4.12 → mcpbr-0.4.13}/.claude-plugin/skills/benchmark-swe-lite/SKILL.md +0 -0
  12. {mcpbr-0.4.12 → mcpbr-0.4.13}/.claude-plugin/skills/mcpbr-config/SKILL.md +0 -0
  13. {mcpbr-0.4.12 → mcpbr-0.4.13}/.claude-plugin/skills/mcpbr-eval/SKILL.md +0 -0
  14. {mcpbr-0.4.12 → mcpbr-0.4.13}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  15. {mcpbr-0.4.12 → mcpbr-0.4.13}/.github/ISSUE_TEMPLATE/config.yml +0 -0
  16. {mcpbr-0.4.12 → mcpbr-0.4.13}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  17. {mcpbr-0.4.12 → mcpbr-0.4.13}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
  18. {mcpbr-0.4.12 → mcpbr-0.4.13}/.github/dependabot.yml +0 -0
  19. {mcpbr-0.4.12 → mcpbr-0.4.13}/.github/release-drafter.yml +0 -0
  20. {mcpbr-0.4.12 → mcpbr-0.4.13}/.github/workflows/ci.yml +0 -0
  21. {mcpbr-0.4.12 → mcpbr-0.4.13}/.github/workflows/post-release-bump.yml +0 -0
  22. {mcpbr-0.4.12 → mcpbr-0.4.13}/.github/workflows/publish-npm.yml +0 -0
  23. {mcpbr-0.4.12 → mcpbr-0.4.13}/.github/workflows/publish.yml +0 -0
  24. {mcpbr-0.4.12 → mcpbr-0.4.13}/.github/workflows/release-drafter.yml +0 -0
  25. {mcpbr-0.4.12 → mcpbr-0.4.13}/.gitignore +0 -0
  26. {mcpbr-0.4.12 → mcpbr-0.4.13}/.pre-commit-config.yaml +0 -0
  27. {mcpbr-0.4.12 → mcpbr-0.4.13}/AGENTS.md +0 -0
  28. {mcpbr-0.4.12 → mcpbr-0.4.13}/CHANGELOG.md +0 -0
  29. {mcpbr-0.4.12 → mcpbr-0.4.13}/CLAUDE.md +0 -0
  30. {mcpbr-0.4.12 → mcpbr-0.4.13}/CODE_OF_CONDUCT.md +0 -0
  31. {mcpbr-0.4.12 → mcpbr-0.4.13}/CONTRIBUTING.md +0 -0
  32. {mcpbr-0.4.12 → mcpbr-0.4.13}/Dockerfile +0 -0
  33. {mcpbr-0.4.12 → mcpbr-0.4.13}/HUMANEVAL_FIX_SUMMARY.md +0 -0
  34. {mcpbr-0.4.12 → mcpbr-0.4.13}/LICENSE +0 -0
  35. {mcpbr-0.4.12 → mcpbr-0.4.13}/Makefile +0 -0
  36. {mcpbr-0.4.12 → mcpbr-0.4.13}/PR_SUMMARY.md +0 -0
  37. {mcpbr-0.4.12 → mcpbr-0.4.13}/README.md +0 -0
  38. {mcpbr-0.4.12 → mcpbr-0.4.13}/SECURITY.md +0 -0
  39. {mcpbr-0.4.12 → mcpbr-0.4.13}/assets/mcpbr-demo.gif +0 -0
  40. {mcpbr-0.4.12 → mcpbr-0.4.13}/assets/mcpbr-eval-results.png +0 -0
  41. {mcpbr-0.4.12 → mcpbr-0.4.13}/assets/mcpbr-logo.jpg +0 -0
  42. {mcpbr-0.4.12 → mcpbr-0.4.13}/bin/mcpbr.js +0 -0
  43. {mcpbr-0.4.12 → mcpbr-0.4.13}/config/example.yaml +0 -0
  44. {mcpbr-0.4.12 → mcpbr-0.4.13}/config/humaneval.yaml +0 -0
  45. {mcpbr-0.4.12 → mcpbr-0.4.13}/config/supermodel.yaml +0 -0
  46. {mcpbr-0.4.12 → mcpbr-0.4.13}/examples/azure-config-example.yaml +0 -0
  47. {mcpbr-0.4.12 → mcpbr-0.4.13}/examples/env-vars-example.yaml +0 -0
  48. {mcpbr-0.4.12 → mcpbr-0.4.13}/examples/inheritance/README.md +0 -0
  49. {mcpbr-0.4.12 → mcpbr-0.4.13}/examples/inheritance/base-config.yaml +0 -0
  50. {mcpbr-0.4.12 → mcpbr-0.4.13}/examples/inheritance/dev-config.yaml +0 -0
  51. {mcpbr-0.4.12 → mcpbr-0.4.13}/examples/inheritance/multi-extend-config.yaml +0 -0
  52. {mcpbr-0.4.12 → mcpbr-0.4.13}/examples/inheritance/production-config.yaml +0 -0
  53. {mcpbr-0.4.12 → mcpbr-0.4.13}/examples/inheritance/shared-mcp-settings.yaml +0 -0
  54. {mcpbr-0.4.12 → mcpbr-0.4.13}/examples/local-config-example.yaml +0 -0
  55. {mcpbr-0.4.12 → mcpbr-0.4.13}/examples/quick-start/gsm8k-math-reasoning.yaml +0 -0
  56. {mcpbr-0.4.12 → mcpbr-0.4.13}/examples/quick-start/test-your-mcp-server.yaml +0 -0
  57. {mcpbr-0.4.12 → mcpbr-0.4.13}/install.sh +0 -0
  58. {mcpbr-0.4.12 → mcpbr-0.4.13}/requirements.txt +0 -0
  59. {mcpbr-0.4.12 → mcpbr-0.4.13}/scripts/sync_version.py +0 -0
  60. {mcpbr-0.4.12 → mcpbr-0.4.13}/scripts/validate_plugin_manifests.py +0 -0
  61. {mcpbr-0.4.12 → mcpbr-0.4.13}/src/mcpbr/__init__.py +0 -0
  62. {mcpbr-0.4.12 → mcpbr-0.4.13}/src/mcpbr/__main__.py +0 -0
  63. {mcpbr-0.4.12 → mcpbr-0.4.13}/src/mcpbr/agent.py +0 -0
  64. {mcpbr-0.4.12 → mcpbr-0.4.13}/src/mcpbr/benchmarks/__init__.py +0 -0
  65. {mcpbr-0.4.12 → mcpbr-0.4.13}/src/mcpbr/benchmarks/agentbench.py +0 -0
  66. {mcpbr-0.4.12 → mcpbr-0.4.13}/src/mcpbr/benchmarks/aider_polyglot.py +0 -0
  67. {mcpbr-0.4.12 → mcpbr-0.4.13}/src/mcpbr/benchmarks/apps.py +0 -0
  68. {mcpbr-0.4.12 → mcpbr-0.4.13}/src/mcpbr/benchmarks/arc.py +0 -0
  69. {mcpbr-0.4.12 → mcpbr-0.4.13}/src/mcpbr/benchmarks/base.py +0 -0
  70. {mcpbr-0.4.12 → mcpbr-0.4.13}/src/mcpbr/benchmarks/bigbench_hard.py +0 -0
  71. {mcpbr-0.4.12 → mcpbr-0.4.13}/src/mcpbr/benchmarks/bigcodebench.py +0 -0
  72. {mcpbr-0.4.12 → mcpbr-0.4.13}/src/mcpbr/benchmarks/codecontests.py +0 -0
  73. {mcpbr-0.4.12 → mcpbr-0.4.13}/src/mcpbr/benchmarks/codereval.py +0 -0
  74. {mcpbr-0.4.12 → mcpbr-0.4.13}/src/mcpbr/benchmarks/cybergym.py +0 -0
  75. {mcpbr-0.4.12 → mcpbr-0.4.13}/src/mcpbr/benchmarks/gaia.py +0 -0
  76. {mcpbr-0.4.12 → mcpbr-0.4.13}/src/mcpbr/benchmarks/gsm8k.py +0 -0
  77. {mcpbr-0.4.12 → mcpbr-0.4.13}/src/mcpbr/benchmarks/hellaswag.py +0 -0
  78. {mcpbr-0.4.12 → mcpbr-0.4.13}/src/mcpbr/benchmarks/humaneval.py +0 -0
  79. {mcpbr-0.4.12 → mcpbr-0.4.13}/src/mcpbr/benchmarks/intercode.py +0 -0
  80. {mcpbr-0.4.12 → mcpbr-0.4.13}/src/mcpbr/benchmarks/leetcode.py +0 -0
  81. {mcpbr-0.4.12 → mcpbr-0.4.13}/src/mcpbr/benchmarks/math_benchmark.py +0 -0
  82. {mcpbr-0.4.12 → mcpbr-0.4.13}/src/mcpbr/benchmarks/mbpp.py +0 -0
  83. {mcpbr-0.4.12 → mcpbr-0.4.13}/src/mcpbr/benchmarks/mcptoolbench.py +0 -0
  84. {mcpbr-0.4.12 → mcpbr-0.4.13}/src/mcpbr/benchmarks/mlagentbench.py +0 -0
  85. {mcpbr-0.4.12 → mcpbr-0.4.13}/src/mcpbr/benchmarks/repoqa.py +0 -0
  86. {mcpbr-0.4.12 → mcpbr-0.4.13}/src/mcpbr/benchmarks/swebench.py +0 -0
  87. {mcpbr-0.4.12 → mcpbr-0.4.13}/src/mcpbr/benchmarks/terminalbench.py +0 -0
  88. {mcpbr-0.4.12 → mcpbr-0.4.13}/src/mcpbr/benchmarks/toolbench.py +0 -0
  89. {mcpbr-0.4.12 → mcpbr-0.4.13}/src/mcpbr/benchmarks/truthfulqa.py +0 -0
  90. {mcpbr-0.4.12 → mcpbr-0.4.13}/src/mcpbr/benchmarks/webarena.py +0 -0
  91. {mcpbr-0.4.12 → mcpbr-0.4.13}/src/mcpbr/cache.py +0 -0
  92. {mcpbr-0.4.12 → mcpbr-0.4.13}/src/mcpbr/cli.py +0 -0
  93. {mcpbr-0.4.12 → mcpbr-0.4.13}/src/mcpbr/config.py +0 -0
  94. {mcpbr-0.4.12 → mcpbr-0.4.13}/src/mcpbr/config_inheritance.py +0 -0
  95. {mcpbr-0.4.12 → mcpbr-0.4.13}/src/mcpbr/config_validator.py +0 -0
  96. {mcpbr-0.4.12 → mcpbr-0.4.13}/src/mcpbr/data/templates/brave-search.yaml +0 -0
  97. {mcpbr-0.4.12 → mcpbr-0.4.13}/src/mcpbr/data/templates/filesystem.yaml +0 -0
  98. {mcpbr-0.4.12 → mcpbr-0.4.13}/src/mcpbr/data/templates/github.yaml +0 -0
  99. {mcpbr-0.4.12 → mcpbr-0.4.13}/src/mcpbr/data/templates/google-maps.yaml +0 -0
  100. {mcpbr-0.4.12 → mcpbr-0.4.13}/src/mcpbr/data/templates/postgres.yaml +0 -0
  101. {mcpbr-0.4.12 → mcpbr-0.4.13}/src/mcpbr/data/templates/slack.yaml +0 -0
  102. {mcpbr-0.4.12 → mcpbr-0.4.13}/src/mcpbr/data/templates/sqlite.yaml +0 -0
  103. {mcpbr-0.4.12 → mcpbr-0.4.13}/src/mcpbr/docker_env.py +0 -0
  104. {mcpbr-0.4.12 → mcpbr-0.4.13}/src/mcpbr/env_expansion.py +0 -0
  105. {mcpbr-0.4.12 → mcpbr-0.4.13}/src/mcpbr/evaluation.py +0 -0
  106. {mcpbr-0.4.12 → mcpbr-0.4.13}/src/mcpbr/harness.py +0 -0
  107. {mcpbr-0.4.12 → mcpbr-0.4.13}/src/mcpbr/incremental_save.py +0 -0
  108. {mcpbr-0.4.12 → mcpbr-0.4.13}/src/mcpbr/infrastructure/__init__.py +0 -0
  109. {mcpbr-0.4.12 → mcpbr-0.4.13}/src/mcpbr/infrastructure/azure.py +0 -0
  110. {mcpbr-0.4.12 → mcpbr-0.4.13}/src/mcpbr/infrastructure/azure_health.py +0 -0
  111. {mcpbr-0.4.12 → mcpbr-0.4.13}/src/mcpbr/infrastructure/base.py +0 -0
  112. {mcpbr-0.4.12 → mcpbr-0.4.13}/src/mcpbr/infrastructure/local.py +0 -0
  113. {mcpbr-0.4.12 → mcpbr-0.4.13}/src/mcpbr/infrastructure/manager.py +0 -0
  114. {mcpbr-0.4.12 → mcpbr-0.4.13}/src/mcpbr/junit_reporter.py +0 -0
  115. {mcpbr-0.4.12 → mcpbr-0.4.13}/src/mcpbr/log_formatter.py +0 -0
  116. {mcpbr-0.4.12 → mcpbr-0.4.13}/src/mcpbr/models.py +0 -0
  117. {mcpbr-0.4.12 → mcpbr-0.4.13}/src/mcpbr/output_validator.py +0 -0
  118. {mcpbr-0.4.12 → mcpbr-0.4.13}/src/mcpbr/preflight.py +0 -0
  119. {mcpbr-0.4.12 → mcpbr-0.4.13}/src/mcpbr/pricing.py +0 -0
  120. {mcpbr-0.4.12 → mcpbr-0.4.13}/src/mcpbr/profiler.py +0 -0
  121. {mcpbr-0.4.12 → mcpbr-0.4.13}/src/mcpbr/providers.py +0 -0
  122. {mcpbr-0.4.12 → mcpbr-0.4.13}/src/mcpbr/regression.py +0 -0
  123. {mcpbr-0.4.12 → mcpbr-0.4.13}/src/mcpbr/reporting.py +0 -0
  124. {mcpbr-0.4.12 → mcpbr-0.4.13}/src/mcpbr/schema.py +0 -0
  125. {mcpbr-0.4.12 → mcpbr-0.4.13}/src/mcpbr/smoke_test.py +0 -0
  126. {mcpbr-0.4.12 → mcpbr-0.4.13}/src/mcpbr/state_tracker.py +0 -0
  127. {mcpbr-0.4.12 → mcpbr-0.4.13}/src/mcpbr/statistics.py +0 -0
  128. {mcpbr-0.4.12 → mcpbr-0.4.13}/src/mcpbr/streaming.py +0 -0
  129. {mcpbr-0.4.12 → mcpbr-0.4.13}/src/mcpbr/templates.py +0 -0
  130. {mcpbr-0.4.12 → mcpbr-0.4.13}/tests/__init__.py +0 -0
  131. {mcpbr-0.4.12 → mcpbr-0.4.13}/tests/infrastructure/__init__.py +0 -0
  132. {mcpbr-0.4.12 → mcpbr-0.4.13}/tests/infrastructure/test_azure.py +0 -0
  133. {mcpbr-0.4.12 → mcpbr-0.4.13}/tests/infrastructure/test_azure_health.py +0 -0
  134. {mcpbr-0.4.12 → mcpbr-0.4.13}/tests/infrastructure/test_base.py +0 -0
  135. {mcpbr-0.4.12 → mcpbr-0.4.13}/tests/infrastructure/test_cli_infrastructure.py +0 -0
  136. {mcpbr-0.4.12 → mcpbr-0.4.13}/tests/infrastructure/test_config.py +0 -0
  137. {mcpbr-0.4.12 → mcpbr-0.4.13}/tests/infrastructure/test_local.py +0 -0
  138. {mcpbr-0.4.12 → mcpbr-0.4.13}/tests/infrastructure/test_manager.py +0 -0
  139. {mcpbr-0.4.12 → mcpbr-0.4.13}/tests/test_agent.py +0 -0
  140. {mcpbr-0.4.12 → mcpbr-0.4.13}/tests/test_benchmark_filtering.py +0 -0
  141. {mcpbr-0.4.12 → mcpbr-0.4.13}/tests/test_benchmark_integration.py +0 -0
  142. {mcpbr-0.4.12 → mcpbr-0.4.13}/tests/test_benchmarks.py +0 -0
  143. {mcpbr-0.4.12 → mcpbr-0.4.13}/tests/test_cache.py +0 -0
  144. {mcpbr-0.4.12 → mcpbr-0.4.13}/tests/test_claude_plugin.py +0 -0
  145. {mcpbr-0.4.12 → mcpbr-0.4.13}/tests/test_cli_templates.py +0 -0
  146. {mcpbr-0.4.12 → mcpbr-0.4.13}/tests/test_comparison_aggregation.py +0 -0
  147. {mcpbr-0.4.12 → mcpbr-0.4.13}/tests/test_comparison_config.py +0 -0
  148. {mcpbr-0.4.12 → mcpbr-0.4.13}/tests/test_comparison_integration.py +0 -0
  149. {mcpbr-0.4.12 → mcpbr-0.4.13}/tests/test_comparison_reporting.py +0 -0
  150. {mcpbr-0.4.12 → mcpbr-0.4.13}/tests/test_config.py +0 -0
  151. {mcpbr-0.4.12 → mcpbr-0.4.13}/tests/test_config_env_vars.py +0 -0
  152. {mcpbr-0.4.12 → mcpbr-0.4.13}/tests/test_config_inheritance.py +0 -0
  153. {mcpbr-0.4.12 → mcpbr-0.4.13}/tests/test_config_validator.py +0 -0
  154. {mcpbr-0.4.12 → mcpbr-0.4.13}/tests/test_config_validator_inheritance.py +0 -0
  155. {mcpbr-0.4.12 → mcpbr-0.4.13}/tests/test_cost_calculation.py +0 -0
  156. {mcpbr-0.4.12 → mcpbr-0.4.13}/tests/test_default_logging.py +0 -0
  157. {mcpbr-0.4.12 → mcpbr-0.4.13}/tests/test_django_runner.py +0 -0
  158. {mcpbr-0.4.12 → mcpbr-0.4.13}/tests/test_docker_cleanup.py +0 -0
  159. {mcpbr-0.4.12 → mcpbr-0.4.13}/tests/test_docker_label_fix.py +0 -0
  160. {mcpbr-0.4.12 → mcpbr-0.4.13}/tests/test_docker_retry.py +0 -0
  161. {mcpbr-0.4.12 → mcpbr-0.4.13}/tests/test_env_expansion.py +0 -0
  162. {mcpbr-0.4.12 → mcpbr-0.4.13}/tests/test_error_messages.py +0 -0
  163. {mcpbr-0.4.12 → mcpbr-0.4.13}/tests/test_evaluation.py +0 -0
  164. {mcpbr-0.4.12 → mcpbr-0.4.13}/tests/test_exit_codes.py +0 -0
  165. {mcpbr-0.4.12 → mcpbr-0.4.13}/tests/test_export.py +0 -0
  166. {mcpbr-0.4.12 → mcpbr-0.4.13}/tests/test_git_diff_new_files.py +0 -0
  167. {mcpbr-0.4.12 → mcpbr-0.4.13}/tests/test_incremental_save.py +0 -0
  168. {mcpbr-0.4.12 → mcpbr-0.4.13}/tests/test_integration.py +0 -0
  169. {mcpbr-0.4.12 → mcpbr-0.4.13}/tests/test_junit_reporter.py +0 -0
  170. {mcpbr-0.4.12 → mcpbr-0.4.13}/tests/test_log_formatter_read_tool.py +0 -0
  171. {mcpbr-0.4.12 → mcpbr-0.4.13}/tests/test_mcp_health_check.py +0 -0
  172. {mcpbr-0.4.12 → mcpbr-0.4.13}/tests/test_mcp_logging.py +0 -0
  173. {mcpbr-0.4.12 → mcpbr-0.4.13}/tests/test_models.py +0 -0
  174. {mcpbr-0.4.12 → mcpbr-0.4.13}/tests/test_output_validator.py +0 -0
  175. {mcpbr-0.4.12 → mcpbr-0.4.13}/tests/test_parse_errors.py +0 -0
  176. {mcpbr-0.4.12 → mcpbr-0.4.13}/tests/test_preflight.py +0 -0
  177. {mcpbr-0.4.12 → mcpbr-0.4.13}/tests/test_pricing.py +0 -0
  178. {mcpbr-0.4.12 → mcpbr-0.4.13}/tests/test_profiler.py +0 -0
  179. {mcpbr-0.4.12 → mcpbr-0.4.13}/tests/test_regression.py +0 -0
  180. {mcpbr-0.4.12 → mcpbr-0.4.13}/tests/test_reporting.py +0 -0
  181. {mcpbr-0.4.12 → mcpbr-0.4.13}/tests/test_runtime_tracking.py +0 -0
  182. {mcpbr-0.4.12 → mcpbr-0.4.13}/tests/test_schema.py +0 -0
  183. {mcpbr-0.4.12 → mcpbr-0.4.13}/tests/test_smoke_test.py +0 -0
  184. {mcpbr-0.4.12 → mcpbr-0.4.13}/tests/test_state_tracker.py +0 -0
  185. {mcpbr-0.4.12 → mcpbr-0.4.13}/tests/test_statistics.py +0 -0
  186. {mcpbr-0.4.12 → mcpbr-0.4.13}/tests/test_statistics_integration.py +0 -0
  187. {mcpbr-0.4.12 → mcpbr-0.4.13}/tests/test_streaming.py +0 -0
  188. {mcpbr-0.4.12 → mcpbr-0.4.13}/tests/test_string_concat_bug.py +0 -0
  189. {mcpbr-0.4.12 → mcpbr-0.4.13}/tests/test_templates.py +0 -0
  190. {mcpbr-0.4.12 → mcpbr-0.4.13}/tests/test_thinking_budget.py +0 -0
  191. {mcpbr-0.4.12 → mcpbr-0.4.13}/tests/test_timeout_tracking.py +0 -0
  192. {mcpbr-0.4.12 → mcpbr-0.4.13}/tests/test_tool_failure_tracking.py +0 -0
  193. {mcpbr-0.4.12 → mcpbr-0.4.13}/tests/test_trial_mode.py +0 -0
  194. {mcpbr-0.4.12 → mcpbr-0.4.13}/tests/test_type_safety.py +0 -0
  195. {mcpbr-0.4.12 → mcpbr-0.4.13}/tests/test_xml_export.py +0 -0
  196. {mcpbr-0.4.12 → mcpbr-0.4.13}/uv.lock +0 -0
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "$schema": "https://anthropic.com/claude-code/marketplace.schema.json",
3
3
  "name": "mcpbr",
4
- "version": "0.4.12",
4
+ "version": "0.4.13",
5
5
  "description": "mcpbr - MCP Benchmark Runner plugin marketplace",
6
6
  "owner": {
7
7
  "name": "mcpbr Contributors",
@@ -11,7 +11,7 @@
11
11
  {
12
12
  "name": "mcpbr",
13
13
  "description": "Expert benchmark runner for MCP servers using mcpbr. Handles Docker checks, config generation, and result parsing.",
14
- "version": "0.4.12",
14
+ "version": "0.4.13",
15
15
  "author": {
16
16
  "name": "mcpbr Contributors"
17
17
  },
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@greynewell/mcpbr-claude-plugin",
3
- "version": "0.4.12",
3
+ "version": "0.4.13",
4
4
  "description": "Claude Code plugin for mcpbr - Expert benchmark runner for MCP servers with specialized skills",
5
5
  "keywords": [
6
6
  "claude-code",
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mcpbr",
3
- "version": "0.4.12",
3
+ "version": "0.4.13",
4
4
  "description": "Expert benchmark runner for MCP servers using mcpbr. Handles Docker checks, config generation, and result parsing.",
5
5
  "schema_version": "1.0"
6
6
  }
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mcpbr
3
- Version: 0.4.12
3
+ Version: 0.4.13
4
4
  Summary: Model Context Protocol Benchmark Runner - evaluate MCP servers against software engineering benchmarks
5
5
  Project-URL: Homepage, https://github.com/greynewell/mcpbr
6
6
  Project-URL: Repository, https://github.com/greynewell/mcpbr
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@greynewell/mcpbr",
3
- "version": "0.4.12",
3
+ "version": "0.4.13",
4
4
  "description": "Model Context Protocol Benchmark Runner - CLI tool for evaluating MCP servers",
5
5
  "keywords": [
6
6
  "mcpbr",
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "mcpbr"
7
- version = "0.4.12"
7
+ version = "0.4.13"
8
8
  description = "Model Context Protocol Benchmark Runner - evaluate MCP servers against software engineering benchmarks"
9
9
  readme = "README.md"
10
10
  license = "MIT"
@@ -452,9 +452,10 @@ DEFAULT_PROMPT = (
452
452
  )
453
453
 
454
454
  MCP_PROMPT_SUFFIX = (
455
- "\n\nYou have access to an MCP server with additional tools. "
456
- "Consider using the MCP tools (prefixed with mcp__) when they would "
457
- "help you understand or navigate the codebase more effectively."
455
+ "\n\nYou have access to an MCP server with additional tools for codebase analysis. "
456
+ "Use these tools to understand the codebase structure, find definitions, trace call chains, "
457
+ "and navigate dependencies before making changes. The MCP tools are especially useful for "
458
+ "understanding how code is connected across files."
458
459
  )
459
460
 
460
461
 
@@ -594,25 +595,27 @@ class ClaudeCodeHarness:
594
595
  instance_id = task_id or task.get("instance_id", "unknown")
595
596
 
596
597
  mcp_server_name = None
598
+ mcp_json_path = None
597
599
  if self.mcp_server:
598
600
  mcp_server_name = self.mcp_server.name
599
601
  args = self.mcp_server.get_args_for_workdir(workdir)
600
602
  mcp_env = self.mcp_server.get_expanded_env()
601
- add_cmd = [
602
- "claude",
603
- "mcp",
604
- "add",
605
- mcp_server_name,
606
- "--",
607
- self.mcp_server.command,
608
- ] + args
609
- exit_code, stdout, stderr = await _run_cli_command(
610
- add_cmd, workdir, timeout=30, env=mcp_env
611
- )
612
- if exit_code != 0:
613
- self._console.print(
614
- f"[yellow]Warning: MCP server add failed (exit {exit_code}): {stderr or stdout}[/yellow]"
615
- )
603
+
604
+ # Write .mcp.json file for Claude Code to discover MCP tools.
605
+ # This is more reliable than `claude mcp add` which can create broken
606
+ # tool registrations where the server connects but tools aren't routable.
607
+ mcp_config = {
608
+ "mcpServers": {
609
+ mcp_server_name: {
610
+ "type": "stdio",
611
+ "command": self.mcp_server.command,
612
+ "args": args,
613
+ "env": mcp_env,
614
+ }
615
+ }
616
+ }
617
+ mcp_json_path = os.path.join(workdir, ".mcp.json")
618
+ Path(mcp_json_path).write_text(json.dumps(mcp_config, indent=2))
616
619
 
617
620
  try:
618
621
  command = [
@@ -683,12 +686,8 @@ class ClaudeCodeHarness:
683
686
 
684
687
  if exit_code != 0:
685
688
  error_msg = stderr or "Unknown error"
686
- if mcp_server_name:
687
- await _run_cli_command(
688
- ["claude", "mcp", "remove", mcp_server_name],
689
- workdir,
690
- timeout=10,
691
- )
689
+ if mcp_json_path and os.path.exists(mcp_json_path):
690
+ os.remove(mcp_json_path)
692
691
  return AgentResult(
693
692
  patch="",
694
693
  success=False,
@@ -705,12 +704,8 @@ class ClaudeCodeHarness:
705
704
  cost_usd=cost_usd,
706
705
  )
707
706
 
708
- if mcp_server_name:
709
- await _run_cli_command(
710
- ["claude", "mcp", "remove", mcp_server_name],
711
- workdir,
712
- timeout=10,
713
- )
707
+ if mcp_json_path and os.path.exists(mcp_json_path):
708
+ os.remove(mcp_json_path)
714
709
 
715
710
  # Check git status to understand what happened
716
711
  git_exit, git_status, git_stderr = await _run_cli_command(
@@ -747,12 +742,8 @@ class ClaudeCodeHarness:
747
742
  cost_usd=cost_usd,
748
743
  )
749
744
  except Exception:
750
- if mcp_server_name:
751
- await _run_cli_command(
752
- ["claude", "mcp", "remove", mcp_server_name],
753
- workdir,
754
- timeout=10,
755
- )
745
+ if mcp_json_path and os.path.exists(mcp_json_path):
746
+ os.remove(mcp_json_path)
756
747
  raise
757
748
 
758
749
  async def _solve_in_docker(
@@ -846,37 +837,36 @@ class ClaudeCodeHarness:
846
837
  self._console.print(f"[cyan]Registering MCP server: {mcp_server_name}[/cyan]")
847
838
  self._console.print(f"[dim] Command: {self.mcp_server.command} {args_str}[/dim]")
848
839
 
849
- # Register MCP server separately with its own timeout
850
- # Use shlex.quote() to prevent shell injection and handle spaces/special characters
851
- quoted_workdir = shlex.quote(env.workdir)
852
- quoted_env_file = shlex.quote(env_file)
853
- quoted_server_name = shlex.quote(mcp_server_name)
854
- quoted_command = shlex.quote(self.mcp_server.command)
855
- quoted_args = " ".join(shlex.quote(arg) for arg in args)
856
-
857
- mcp_add_cmd = [
858
- "/bin/bash",
859
- "-c",
860
- f"cd {quoted_workdir} && su mcpbr -c 'source {quoted_env_file} && cd {quoted_workdir} && claude mcp add {quoted_server_name} -- {quoted_command} {quoted_args}'",
861
- ]
840
+ # Write .mcp.json to workdir for Claude Code to discover MCP tools.
841
+ # File-based config is more reliable than `claude mcp add` which can create
842
+ # broken tool registrations where the server connects but tools aren't routable.
843
+ mcp_config = {
844
+ "mcpServers": {
845
+ mcp_server_name: {
846
+ "type": "stdio",
847
+ "command": self.mcp_server.command,
848
+ "args": args,
849
+ "env": self.mcp_server.get_expanded_env(),
850
+ }
851
+ }
852
+ }
853
+ mcp_json_content = json.dumps(mcp_config, indent=2)
854
+ mcp_json_path = f"{env.workdir}/.mcp.json"
862
855
 
863
856
  try:
864
857
  mcp_exit_code, mcp_stdout, mcp_stderr = await env.exec_command(
865
- mcp_add_cmd,
866
- timeout=60, # Separate 60s timeout for MCP registration
867
- environment=docker_env,
858
+ f"cat > {mcp_json_path} << 'MCP_JSON_EOF'\n{mcp_json_content}\nMCP_JSON_EOF",
859
+ timeout=10,
868
860
  )
861
+ await env.exec_command(f"chown mcpbr:mcpbr {mcp_json_path}", timeout=5)
869
862
 
870
863
  if mcp_exit_code != 0:
871
- error_msg = f"MCP server registration failed (exit {mcp_exit_code})"
864
+ error_msg = f"MCP config write failed (exit {mcp_exit_code})"
872
865
  if mcp_stderr:
873
866
  error_msg += f": {mcp_stderr}"
874
- if mcp_stdout:
875
- error_msg += f"\nStdout: {mcp_stdout}"
876
867
  if verbose:
877
868
  self._console.print(f"[red]✗ {error_msg}[/red]")
878
869
 
879
- # Clean up temp files before early return
880
870
  await env.exec_command(f"rm -f {prompt_file} {env_file}", timeout=5)
881
871
 
882
872
  return AgentResult(
@@ -889,16 +879,13 @@ class ClaudeCodeHarness:
889
879
  )
890
880
 
891
881
  if verbose:
892
- self._console.print("[green]✓ MCP server registered successfully[/green]")
893
- if mcp_stdout.strip():
894
- self._console.print(f"[dim]{mcp_stdout.strip()}[/dim]")
882
+ self._console.print("[green]✓ MCP server configured via .mcp.json[/green]")
895
883
 
896
884
  except asyncio.TimeoutError:
897
- error_msg = "MCP server registration timed out after 60s. The MCP server may have failed to start or is hanging during initialization."
885
+ error_msg = "Failed to write MCP configuration file."
898
886
  if verbose:
899
887
  self._console.print(f"[red]✗ {error_msg}[/red]")
900
888
 
901
- # Clean up temp files before early return
902
889
  await env.exec_command(f"rm -f {prompt_file} {env_file}", timeout=5)
903
890
 
904
891
  return AgentResult(
@@ -1039,16 +1026,9 @@ class ClaudeCodeHarness:
1039
1026
  error_msg += f"\nMCP server logs saved to: {mcp_log_path}"
1040
1027
 
1041
1028
  if mcp_server_name:
1042
- # Use shlex.quote() for MCP removal command
1043
- quoted_env_file = shlex.quote(env_file)
1044
- quoted_server_name = shlex.quote(mcp_server_name)
1045
- remove_cmd = (
1046
- f"source {quoted_env_file} && claude mcp remove {quoted_server_name}"
1047
- )
1048
1029
  await env.exec_command(
1049
- f"su mcpbr -c {shlex.quote(remove_cmd)}",
1050
- timeout=10,
1051
- environment=docker_env,
1030
+ f"rm -f {env.workdir}/.mcp.json",
1031
+ timeout=5,
1052
1032
  )
1053
1033
 
1054
1034
  return AgentResult(
@@ -1068,14 +1048,9 @@ class ClaudeCodeHarness:
1068
1048
  )
1069
1049
 
1070
1050
  if mcp_server_name:
1071
- # Use shlex.quote() for MCP removal command
1072
- quoted_env_file = shlex.quote(env_file)
1073
- quoted_server_name = shlex.quote(mcp_server_name)
1074
- remove_cmd = f"source {quoted_env_file} && claude mcp remove {quoted_server_name}"
1075
1051
  await env.exec_command(
1076
- f"su mcpbr -c {shlex.quote(remove_cmd)}",
1077
- timeout=10,
1078
- environment=docker_env,
1052
+ f"rm -f {env.workdir}/.mcp.json",
1053
+ timeout=5,
1079
1054
  )
1080
1055
 
1081
1056
  _, git_status, git_stderr = await env.exec_command(
@@ -1160,20 +1135,13 @@ class ClaudeCodeHarness:
1160
1135
 
1161
1136
  if mcp_server_name:
1162
1137
  try:
1163
- # Use shlex.quote() for MCP removal command
1164
- quoted_env_file = shlex.quote(env_file)
1165
- quoted_server_name = shlex.quote(mcp_server_name)
1166
- remove_cmd = (
1167
- f"source {quoted_env_file} && claude mcp remove {quoted_server_name}"
1168
- )
1169
1138
  await env.exec_command(
1170
- f"su mcpbr -c {shlex.quote(remove_cmd)}",
1171
- timeout=10,
1172
- environment=docker_env,
1139
+ f"rm -f {env.workdir}/.mcp.json",
1140
+ timeout=5,
1173
1141
  )
1174
1142
  except Exception as e:
1175
1143
  if verbose:
1176
- self._console.print(f"[dim red]Failed to remove MCP server: {e}[/dim red]")
1144
+ self._console.print(f"[dim red]Failed to clean up .mcp.json: {e}[/dim red]")
1177
1145
 
1178
1146
  error_msg = f"Task execution timed out after {timeout}s."
1179
1147
  if self.mcp_server:
@@ -1204,20 +1172,13 @@ class ClaudeCodeHarness:
1204
1172
  except Exception:
1205
1173
  if mcp_server_name:
1206
1174
  try:
1207
- # Use shlex.quote() for MCP removal command
1208
- quoted_env_file = shlex.quote(env_file)
1209
- quoted_server_name = shlex.quote(mcp_server_name)
1210
- remove_cmd = (
1211
- f"source {quoted_env_file} && claude mcp remove {quoted_server_name}"
1212
- )
1213
1175
  await env.exec_command(
1214
- f"su mcpbr -c {shlex.quote(remove_cmd)}",
1215
- timeout=10,
1216
- environment=docker_env,
1176
+ f"rm -f {env.workdir}/.mcp.json",
1177
+ timeout=5,
1217
1178
  )
1218
1179
  except Exception as e:
1219
1180
  if verbose:
1220
- self._console.print(f"[dim red]Failed to remove MCP server: {e}[/dim red]")
1181
+ self._console.print(f"[dim red]Failed to clean up .mcp.json: {e}[/dim red]")
1221
1182
  raise
1222
1183
  finally:
1223
1184
  # Close MCP log file if it was opened
@@ -1230,7 +1191,9 @@ class ClaudeCodeHarness:
1230
1191
  if verbose:
1231
1192
  self._console.print(f"[dim red]Failed to close MCP log file: {e}[/dim red]")
1232
1193
 
1233
- await env.exec_command(f"rm -f {prompt_file} {env_file}", timeout=5)
1194
+ await env.exec_command(
1195
+ f"rm -f {prompt_file} {env_file} {env.workdir}/.mcp.json", timeout=5
1196
+ )
1234
1197
 
1235
1198
 
1236
1199
  HARNESS_REGISTRY: dict[str, type] = {
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes