PyPI - mcpbr - Versions diffs - 0.4.12__py3-none-any.whl → 0.4.14__py3-none-any.whl - Mend

mcpbr 0.4.12py3-none-any.whl → 0.4.14py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

mcpbr/config.py CHANGED Viewed

@@ -109,6 +109,16 @@ class MCPServerConfig(BaseModel):
         default=900000,
         description="Timeout in milliseconds for MCP tool execution (default: 15 min for long-running tools)",
     )
+    setup_command: str | None = Field(
+        default=None,
+        description="Shell command to run inside the container BEFORE the agent starts. "
+        "Runs outside the task timer (does not count against timeout_seconds). "
+        "Use {workdir} as placeholder. Useful for pre-computing caches.",
+    )
+    setup_timeout_ms: int = Field(
+        default=900000,
+        description="Timeout in milliseconds for the setup_command (default: 15 min)",
+    )
     def get_args_for_workdir(self, workdir: str) -> list[str]:
         """Replace {workdir} placeholder in args with actual path."""
@@ -117,6 +127,12 @@ class MCPServerConfig(BaseModel):
             result.append(arg.replace("{workdir}", workdir))
         return result
+    def get_setup_command_for_workdir(self, workdir: str) -> str | None:
+        """Replace {workdir} placeholder in setup_command with actual path."""
+        if self.setup_command is None:
+            return None
+        return self.setup_command.replace("{workdir}", workdir)
     def get_expanded_env(self) -> dict[str, str]:
         """Expand ${VAR} references in env values using os.environ.
@@ -400,6 +416,12 @@ class HarnessConfig(BaseModel):
         description="Enable comprehensive performance profiling (tool latency, memory, overhead)",
     )
+    volumes: dict[str, str] = Field(
+        default_factory=dict,
+        description="Additional volume mounts (read-write) for Docker containers (host_path: container_path). "
+        "Mounted into every container, persists across tasks. Useful for pre-computed caches.",
+    )
     infrastructure: InfrastructureConfig = Field(
         default_factory=InfrastructureConfig,
         description="Infrastructure configuration (local or azure)",

mcpbr/docker_env.py CHANGED Viewed

@@ -314,14 +314,18 @@ class DockerEnvironmentManager:
     FALLBACK_IMAGE = "mcpbr-env"
     DOCKERFILE_PATH = Path(__file__).parent.parent.parent / "Dockerfile"
-    def __init__(self, use_prebuilt: bool = True) -> None:
+    def __init__(
+        self, use_prebuilt: bool = True, extra_volumes: dict[str, str] | None = None
+    ) -> None:
         """Initialize the Docker environment manager.
         Args:
             use_prebuilt: If True, try to use pre-built SWE-bench images first.
+            extra_volumes: Additional volume mounts (read-write) (host_path -> container_path).
         """
         self.client = docker.from_env()
         self.use_prebuilt = use_prebuilt
+        self._extra_volumes = extra_volumes or {}
         self._fallback_image_built = False
         self._temp_dirs: list[tempfile.TemporaryDirectory[str]] = []
         self._containers: list[Container] = []
@@ -488,6 +492,15 @@ CMD ["/bin/bash"]
             for attempt in range(max_retries + 1):
                 try:
+                    volumes_dict: dict[str, dict[str, str]] = {
+                        host_workdir: {"bind": "/workspace", "mode": "rw"},
+                    }
+                    for host_path, container_path in self._extra_volumes.items():
+                        volumes_dict[os.path.abspath(host_path)] = {
+                            "bind": container_path,
+                            "mode": "rw",
+                        }
                     container = self.client.containers.run(
                         image_name,
                         command="tail -f /dev/null",
@@ -495,9 +508,7 @@ CMD ["/bin/bash"]
                         detach=True,
                         platform="linux/amd64" if uses_prebuilt else None,
                         network_mode="bridge",  # Enable network for API calls
-                        volumes={
-                            host_workdir: {"bind": "/workspace", "mode": "rw"},
-                        },
+                        volumes=volumes_dict,
                         working_dir=container_workdir,
                         remove=False,
                         labels={

mcpbr/evaluation.py CHANGED Viewed

@@ -137,6 +137,7 @@ async def run_tests(
     timeout: int = 120,
     uses_prebuilt: bool = False,
     workdir: str | None = None,
+    repo: str | None = None,
 ) -> TestResults:
     """Run a list of tests and return results.
@@ -146,6 +147,7 @@ async def run_tests(
         timeout: Timeout per test in seconds.
         uses_prebuilt: Whether a pre-built SWE-bench image is being used.
         workdir: Working directory to run tests from. Defaults to env.workdir.
+        repo: Repository identifier for looking up the correct test runner.
     Returns:
         TestResults with pass/fail counts.
@@ -157,7 +159,7 @@ async def run_tests(
     passed = 0
     for test in tests:
-        test_cmd = _build_test_command(test, uses_prebuilt)
+        test_cmd = _build_test_command(test, uses_prebuilt, repo=repo)
         try:
             exit_code, stdout, stderr = await env.exec_command(
@@ -198,7 +200,7 @@ async def run_tests(
     )
-def _build_test_command(test: str, uses_prebuilt: bool = False) -> str:
+def _build_test_command(test: str, uses_prebuilt: bool = False, repo: str | None = None) -> str:
     """Build a test command for the given test identifier.
     Args:
@@ -206,18 +208,29 @@ def _build_test_command(test: str, uses_prebuilt: bool = False) -> str:
             - pytest: "tests/test_file.py::test_func" or "tests/test_file.py"
             - Django: "test_method (module.TestClass)" or "module.tests.TestClass.test_method"
         uses_prebuilt: If True, activate the testbed conda environment first.
+        repo: Repository identifier (e.g., "sympy/sympy") for looking up
+            the correct test runner from upstream SWE-bench specs.
     Returns:
         Shell command string to run the test.
     """
     import re
+    from .swebench_test_specs import get_repo_test_command
     # Pre-built SWE-bench images use a conda environment called 'testbed'
     if uses_prebuilt:
         activate = "source /opt/miniconda3/etc/profile.d/conda.sh && conda activate testbed && "
     else:
         activate = ""
+    # Check upstream SWE-bench test command mapping for non-pytest runners
+    if repo:
+        upstream_cmd = get_repo_test_command(repo)
+        if upstream_cmd and "runtests.py" not in upstream_cmd and "pytest" not in upstream_cmd:
+            # Non-pytest, non-Django project (e.g., sympy uses bin/test)
+            return f"{activate}{upstream_cmd} {test}"
     # Detect Django test format: "test_method (module.TestClass)"
     if "(" in test and ")" in test and "." in test:
         # Extract module path from parentheses
@@ -344,12 +357,15 @@ async def evaluate_patch(
     if not env.uses_prebuilt:
         await _install_dependencies(env)
+    repo = task.get("repo")
     fail_to_pass_results = await run_tests(
         env,
         fail_to_pass_tests,
         timeout=test_timeout,
         uses_prebuilt=env.uses_prebuilt,
         workdir=eval_workdir,
+        repo=repo,
     )
     pass_to_pass_results = await run_tests(
@@ -358,6 +374,7 @@ async def evaluate_patch(
         timeout=test_timeout,
         uses_prebuilt=env.uses_prebuilt,
         workdir=eval_workdir,
+        repo=repo,
     )
     resolved = (

mcpbr/harness.py CHANGED Viewed

@@ -962,7 +962,10 @@ async def run_evaluation(
                 "args": config.mcp_server.args if config.mcp_server else [],
             }
-    docker_manager = DockerEnvironmentManager(use_prebuilt=config.use_prebuilt_images)
+    docker_manager = DockerEnvironmentManager(
+        use_prebuilt=config.use_prebuilt_images,
+        extra_volumes=config.volumes,
+    )
     results: list[TaskResult] = []
     # Add cached results if using state tracker

mcpbr/harnesses.py CHANGED Viewed

@@ -452,9 +452,10 @@ DEFAULT_PROMPT = (
 )
 MCP_PROMPT_SUFFIX = (
-    "\n\nYou have access to an MCP server with additional tools. "
-    "Consider using the MCP tools (prefixed with mcp__) when they would "
-    "help you understand or navigate the codebase more effectively."
+    "\n\nYou have access to an MCP server with additional tools for codebase analysis. "
+    "Use these tools to understand the codebase structure, find definitions, trace call chains, "
+    "and navigate dependencies before making changes. The MCP tools are especially useful for "
+    "understanding how code is connected across files."
 )
@@ -594,25 +595,27 @@ class ClaudeCodeHarness:
         instance_id = task_id or task.get("instance_id", "unknown")
         mcp_server_name = None
+        mcp_json_path = None
         if self.mcp_server:
             mcp_server_name = self.mcp_server.name
             args = self.mcp_server.get_args_for_workdir(workdir)
             mcp_env = self.mcp_server.get_expanded_env()
-            add_cmd = [
-                "claude",
-                "mcp",
-                "add",
-                mcp_server_name,
-                "--",
-                self.mcp_server.command,
-            ] + args
-            exit_code, stdout, stderr = await _run_cli_command(
-                add_cmd, workdir, timeout=30, env=mcp_env
-            )
-            if exit_code != 0:
-                self._console.print(
-                    f"[yellow]Warning: MCP server add failed (exit {exit_code}): {stderr or stdout}[/yellow]"
-                )
+            # Write .mcp.json file for Claude Code to discover MCP tools.
+            # This is more reliable than `claude mcp add` which can create broken
+            # tool registrations where the server connects but tools aren't routable.
+            mcp_config = {
+                "mcpServers": {
+                    mcp_server_name: {
+                        "type": "stdio",
+                        "command": self.mcp_server.command,
+                        "args": args,
+                        "env": mcp_env,
+                    }
+                }
+            }
+            mcp_json_path = os.path.join(workdir, ".mcp.json")
+            Path(mcp_json_path).write_text(json.dumps(mcp_config, indent=2))
         try:
             command = [
@@ -683,12 +686,8 @@ class ClaudeCodeHarness:
             if exit_code != 0:
                 error_msg = stderr or "Unknown error"
-                if mcp_server_name:
-                    await _run_cli_command(
-                        ["claude", "mcp", "remove", mcp_server_name],
-                        workdir,
-                        timeout=10,
-                    )
+                if mcp_json_path and os.path.exists(mcp_json_path):
+                    os.remove(mcp_json_path)
                 return AgentResult(
                     patch="",
                     success=False,
@@ -705,12 +704,8 @@ class ClaudeCodeHarness:
                     cost_usd=cost_usd,
                 )
-            if mcp_server_name:
-                await _run_cli_command(
-                    ["claude", "mcp", "remove", mcp_server_name],
-                    workdir,
-                    timeout=10,
-                )
+            if mcp_json_path and os.path.exists(mcp_json_path):
+                os.remove(mcp_json_path)
             # Check git status to understand what happened
             git_exit, git_status, git_stderr = await _run_cli_command(
@@ -747,12 +742,8 @@ class ClaudeCodeHarness:
                 cost_usd=cost_usd,
             )
         except Exception:
-            if mcp_server_name:
-                await _run_cli_command(
-                    ["claude", "mcp", "remove", mcp_server_name],
-                    workdir,
-                    timeout=10,
-                )
+            if mcp_json_path and os.path.exists(mcp_json_path):
+                os.remove(mcp_json_path)
             raise
     async def _solve_in_docker(
@@ -846,37 +837,36 @@ class ClaudeCodeHarness:
                 self._console.print(f"[cyan]Registering MCP server: {mcp_server_name}[/cyan]")
                 self._console.print(f"[dim]  Command: {self.mcp_server.command} {args_str}[/dim]")
-            # Register MCP server separately with its own timeout
-            # Use shlex.quote() to prevent shell injection and handle spaces/special characters
-            quoted_workdir = shlex.quote(env.workdir)
-            quoted_env_file = shlex.quote(env_file)
-            quoted_server_name = shlex.quote(mcp_server_name)
-            quoted_command = shlex.quote(self.mcp_server.command)
-            quoted_args = " ".join(shlex.quote(arg) for arg in args)
-            mcp_add_cmd = [
-                "/bin/bash",
-                "-c",
-                f"cd {quoted_workdir} && su mcpbr -c 'source {quoted_env_file} && cd {quoted_workdir} && claude mcp add {quoted_server_name} -- {quoted_command} {quoted_args}'",
-            ]
+            # Write .mcp.json to workdir for Claude Code to discover MCP tools.
+            # File-based config is more reliable than `claude mcp add` which can create
+            # broken tool registrations where the server connects but tools aren't routable.
+            mcp_config = {
+                "mcpServers": {
+                    mcp_server_name: {
+                        "type": "stdio",
+                        "command": self.mcp_server.command,
+                        "args": args,
+                        "env": self.mcp_server.get_expanded_env(),
+                    }
+                }
+            }
+            mcp_json_content = json.dumps(mcp_config, indent=2)
+            mcp_json_path = f"{env.workdir}/.mcp.json"
             try:
                 mcp_exit_code, mcp_stdout, mcp_stderr = await env.exec_command(
-                    mcp_add_cmd,
-                    timeout=60,  # Separate 60s timeout for MCP registration
-                    environment=docker_env,
+                    f"cat > {mcp_json_path} << 'MCP_JSON_EOF'\n{mcp_json_content}\nMCP_JSON_EOF",
+                    timeout=10,
                 )
+                await env.exec_command(f"chown mcpbr:mcpbr {mcp_json_path}", timeout=5)
                 if mcp_exit_code != 0:
-                    error_msg = f"MCP server registration failed (exit {mcp_exit_code})"
+                    error_msg = f"MCP config write failed (exit {mcp_exit_code})"
                     if mcp_stderr:
                         error_msg += f": {mcp_stderr}"
-                    if mcp_stdout:
-                        error_msg += f"\nStdout: {mcp_stdout}"
                     if verbose:
                         self._console.print(f"[red]✗ {error_msg}[/red]")
-                    # Clean up temp files before early return
                     await env.exec_command(f"rm -f {prompt_file} {env_file}", timeout=5)
                     return AgentResult(
@@ -889,16 +879,13 @@ class ClaudeCodeHarness:
                     )
                 if verbose:
-                    self._console.print("[green]✓ MCP server registered successfully[/green]")
-                    if mcp_stdout.strip():
-                        self._console.print(f"[dim]{mcp_stdout.strip()}[/dim]")
+                    self._console.print("[green]✓ MCP server configured via .mcp.json[/green]")
             except asyncio.TimeoutError:
-                error_msg = "MCP server registration timed out after 60s. The MCP server may have failed to start or is hanging during initialization."
+                error_msg = "Failed to write MCP configuration file."
                 if verbose:
                     self._console.print(f"[red]✗ {error_msg}[/red]")
-                # Clean up temp files before early return
                 await env.exec_command(f"rm -f {prompt_file} {env_file}", timeout=5)
                 return AgentResult(
@@ -908,6 +895,35 @@ class ClaudeCodeHarness:
                     cost_usd=None,
                 )
+        # Run setup_command if configured (BEFORE agent, OUTSIDE task timer).
+        # This is the right place for expensive one-time operations like
+        # pre-computing caches that should not count against timeout_seconds.
+        if self.mcp_server and self.mcp_server.setup_command:
+            setup_cmd = self.mcp_server.get_setup_command_for_workdir(env.workdir)
+            setup_timeout = int(self.mcp_server.setup_timeout_ms / 1000)
+            if verbose:
+                self._console.print(
+                    f"[cyan]Running setup command (timeout: {setup_timeout:.0f}s)...[/cyan]"
+                )
+            setup_full_cmd = f"source {shlex.quote(env_file)} && {setup_cmd}"
+            setup_exit, _setup_stdout, setup_stderr = await env.exec_command(
+                ["/bin/bash", "-c", setup_full_cmd],
+                timeout=setup_timeout,
+            )
+            if setup_exit != 0:
+                if verbose:
+                    self._console.print(
+                        f"[yellow]⚠ Setup command exited with code {setup_exit}[/yellow]"
+                    )
+                    if setup_stderr:
+                        self._console.print(f"[dim]{setup_stderr[:500]}[/dim]")
+                # Non-fatal: continue with agent even if setup fails
+            elif verbose:
+                self._console.print("[green]✓ Setup command completed[/green]")
         try:
             claude_args = [
                 "--print",
@@ -1039,16 +1055,9 @@ class ClaudeCodeHarness:
                             error_msg += f"\nMCP server logs saved to: {mcp_log_path}"
                 if mcp_server_name:
-                    # Use shlex.quote() for MCP removal command
-                    quoted_env_file = shlex.quote(env_file)
-                    quoted_server_name = shlex.quote(mcp_server_name)
-                    remove_cmd = (
-                        f"source {quoted_env_file} && claude mcp remove {quoted_server_name}"
-                    )
                     await env.exec_command(
-                        f"su mcpbr -c {shlex.quote(remove_cmd)}",
-                        timeout=10,
-                        environment=docker_env,
+                        f"rm -f {env.workdir}/.mcp.json",
+                        timeout=5,
                     )
                 return AgentResult(
@@ -1068,14 +1077,9 @@ class ClaudeCodeHarness:
                 )
             if mcp_server_name:
-                # Use shlex.quote() for MCP removal command
-                quoted_env_file = shlex.quote(env_file)
-                quoted_server_name = shlex.quote(mcp_server_name)
-                remove_cmd = f"source {quoted_env_file} && claude mcp remove {quoted_server_name}"
                 await env.exec_command(
-                    f"su mcpbr -c {shlex.quote(remove_cmd)}",
-                    timeout=10,
-                    environment=docker_env,
+                    f"rm -f {env.workdir}/.mcp.json",
+                    timeout=5,
                 )
             _, git_status, git_stderr = await env.exec_command(
@@ -1160,20 +1164,13 @@ class ClaudeCodeHarness:
             if mcp_server_name:
                 try:
-                    # Use shlex.quote() for MCP removal command
-                    quoted_env_file = shlex.quote(env_file)
-                    quoted_server_name = shlex.quote(mcp_server_name)
-                    remove_cmd = (
-                        f"source {quoted_env_file} && claude mcp remove {quoted_server_name}"
-                    )
                     await env.exec_command(
-                        f"su mcpbr -c {shlex.quote(remove_cmd)}",
-                        timeout=10,
-                        environment=docker_env,
+                        f"rm -f {env.workdir}/.mcp.json",
+                        timeout=5,
                     )
                 except Exception as e:
                     if verbose:
-                        self._console.print(f"[dim red]Failed to remove MCP server: {e}[/dim red]")
+                        self._console.print(f"[dim red]Failed to clean up .mcp.json: {e}[/dim red]")
             error_msg = f"Task execution timed out after {timeout}s."
             if self.mcp_server:
@@ -1204,20 +1201,13 @@ class ClaudeCodeHarness:
         except Exception:
             if mcp_server_name:
                 try:
-                    # Use shlex.quote() for MCP removal command
-                    quoted_env_file = shlex.quote(env_file)
-                    quoted_server_name = shlex.quote(mcp_server_name)
-                    remove_cmd = (
-                        f"source {quoted_env_file} && claude mcp remove {quoted_server_name}"
-                    )
                     await env.exec_command(
-                        f"su mcpbr -c {shlex.quote(remove_cmd)}",
-                        timeout=10,
-                        environment=docker_env,
+                        f"rm -f {env.workdir}/.mcp.json",
+                        timeout=5,
                     )
                 except Exception as e:
                     if verbose:
-                        self._console.print(f"[dim red]Failed to remove MCP server: {e}[/dim red]")
+                        self._console.print(f"[dim red]Failed to clean up .mcp.json: {e}[/dim red]")
             raise
         finally:
             # Close MCP log file if it was opened
@@ -1230,7 +1220,9 @@ class ClaudeCodeHarness:
                     if verbose:
                         self._console.print(f"[dim red]Failed to close MCP log file: {e}[/dim red]")
-            await env.exec_command(f"rm -f {prompt_file} {env_file}", timeout=5)
+            await env.exec_command(
+                f"rm -f {prompt_file} {env_file} {env.workdir}/.mcp.json", timeout=5
+            )
 HARNESS_REGISTRY: dict[str, type] = {

mcpbr/swebench_test_specs.py ADDED Viewed

@@ -0,0 +1,33 @@
+"""Test command specs from upstream SWE-bench harness.
+Maps repositories to their correct test commands. mcpbr defaults to pytest
+for all non-Django projects, but some projects (e.g., sympy) use custom test
+runners that aren't pytest-compatible.
+Source: https://github.com/SWE-bench/SWE-bench/blob/main/swebench/harness/constants/python.py
+"""
+# Base test commands per framework (from upstream constants/python.py)
+TEST_PYTEST = "pytest --no-header -rA --tb=no -p no:cacheprovider"
+TEST_DJANGO = "./tests/runtests.py --verbosity 2 --settings=test_sqlite --parallel 1"
+TEST_SYMPY = "PYTHONWARNINGS='ignore::UserWarning,ignore::SyntaxWarning' bin/test -C --verbose"
+TEST_SPHINX = "tox --current-env -epy39 -v --"
+TEST_ASTROPY = "pytest -rA -vv -o console_output_style=classic --tb=no"
+TEST_SEABORN = "pytest --no-header -rA"
+# Repo → test command mapping
+# Only non-pytest entries need to be here — pytest is the default fallback.
+# Django is included for documentation but its existing handler takes precedence.
+REPO_TO_TEST_CMD: dict[str, str] = {
+    "sympy/sympy": TEST_SYMPY,
+    "django/django": TEST_DJANGO,
+    "sphinx-doc/sphinx": TEST_SPHINX,
+}
+def get_repo_test_command(repo: str) -> str | None:
+    """Look up the upstream test command for a repo.
+    Returns None if repo uses standard pytest (handled by existing logic).
+    """
+    return REPO_TO_TEST_CMD.get(repo)

{mcpbr-0.4.12.dist-info → mcpbr-0.4.14.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: mcpbr
-Version: 0.4.12
+Version: 0.4.14
 Summary: Model Context Protocol Benchmark Runner - evaluate MCP servers against software engineering benchmarks
 Project-URL: Homepage, https://github.com/greynewell/mcpbr
 Project-URL: Repository, https://github.com/greynewell/mcpbr

{mcpbr-0.4.12.dist-info → mcpbr-0.4.14.dist-info}/RECORD RENAMED Viewed

@@ -3,14 +3,14 @@ mcpbr/__main__.py,sha256=WmeQsAqtW_9tMTNKArH1m76DPBokZpXuy6dMZp13gXA,132
 mcpbr/agent.py,sha256=aSFH2S3ExKZfdVfMbzk6D1nRhpKt4JmpRzmF4Vi6Gmo,5795
 mcpbr/cache.py,sha256=YiP13omwMbXLb6NhNocJvL58enXEx9J8OrvTZnWUkw4,13254
 mcpbr/cli.py,sha256=xvh7gpJx0LzjV3g-Te4FF7BfHubGzDxOiYQsSeQnCEc,68276
-mcpbr/config.py,sha256=quB2KPKsFY7Y86wTZr9GjlZRYsh13MngNikdOTBKEvY,17864
+mcpbr/config.py,sha256=E9Icedjk_VFONnnEZbWW5WN7El5RaJD5pGi-JQlrlV0,18890
 mcpbr/config_inheritance.py,sha256=0EV9Tv62UFNgZoc8mY7yYjHEbnMM_R5EAhSeuK7ajAA,6617
 mcpbr/config_validator.py,sha256=ZMEIeK4y6fSwyY46Xv5dK5v3jM4HDKcYkosnIcn7iyI,20488
-mcpbr/docker_env.py,sha256=GKjQULslYANGSkyY8ZLaAEy9WWl0MYqS1LZ0VavmhXc,31085
+mcpbr/docker_env.py,sha256=vpbjL227L9qLjrS7CzXevxzo9393qmOrrxWG7lP1s44,31629
 mcpbr/env_expansion.py,sha256=Rkhth-tWV8CptQlSSk9exuMsUaSTTW9hj69z4snZd_U,6122
-mcpbr/evaluation.py,sha256=LQXSLn_4yIkZ0jwZ85AaKku2dHcPirmj5c7-nhpPMfY,11994
-mcpbr/harness.py,sha256=6_p_MFrs8RulosXToVtB9-P4Ej8XzR6ZzCKDP4mUeGY,51026
-mcpbr/harnesses.py,sha256=y2M2Warbj2eWpF5LwAPfdkIDLMGdd4hw9Rw-Ko_OCzU,47814
+mcpbr/evaluation.py,sha256=EjPREWv7hBRqhBhNan0ERh2imqMBegT0Y2cgZlTxRGk,12765
+mcpbr/harness.py,sha256=sEMP2PnrQP_BKK-4yixz05qXcY-0OsJNJ5e5JU2Rtsc,51079
+mcpbr/harnesses.py,sha256=h9iDp4qkPABNwO9OXbJ61qcD4n0oAUTU7AQksxRKLcg,47335
 mcpbr/incremental_save.py,sha256=1dm3pGiEIhP8cVk_Y6XF_cAdo3B_vyRc6CO8Wt-MyIA,4830
 mcpbr/junit_reporter.py,sha256=M_02zJbFbA3VoIYG5oR7VDecqWHEpIee-JOUShWNuLU,9261
 mcpbr/log_formatter.py,sha256=d2jWH7z4IRSbr8-PbnEt3TmLAqk8vgdPT38uTnTCN5c,21488
@@ -27,6 +27,7 @@ mcpbr/smoke_test.py,sha256=srYGOn_auspRbt_a6ebYDDDq_nujA_iZGman5nU1ikU,14925
 mcpbr/state_tracker.py,sha256=rIP9LIHtQg6oBsLIxnwRjE865Kw6U7DMO_GzzuMRC0E,10790
 mcpbr/statistics.py,sha256=Ny8TMdBrIpS4KfKCJcuFfTeaGuTmEkS1G_uHBlboYdA,19134
 mcpbr/streaming.py,sha256=XPhkXO1R1EsWtkoPvCpyy4TehEom7hkuOeP-00joX3o,13853
+mcpbr/swebench_test_specs.py,sha256=Mh_BPjcexkgDT3p4zT2p31925b8w5tgsxxRpYZQZalM,1390
 mcpbr/templates.py,sha256=dqwboVB-yfE06w2rgDOvuWJB4Hx5duH_W-jvLBqmlKg,10683
 mcpbr/benchmarks/__init__.py,sha256=RK0TxNTSqhUX_WtGs0CcV1MX2uiCBTUWkEHYpo_7T5M,4099
 mcpbr/benchmarks/agentbench.py,sha256=jQ8OG_5cn-PvOZizXivysLTw9xvtA8c_MWfw3jXq0TQ,6512
@@ -68,15 +69,15 @@ mcpbr/infrastructure/azure_health.py,sha256=xITmIa9IfYIwxcVhY0sJ81a-6WNKiT8kSQTd
 mcpbr/infrastructure/base.py,sha256=Olj6uiNBeGoUqltZI1NHZfa26kzT-6jfp8YIXSykFKM,3037
 mcpbr/infrastructure/local.py,sha256=VK6UAg7Dzvb9v1LAJgNGA_s0blQKrHAQEXBAC75zAL8,4237
 mcpbr/infrastructure/manager.py,sha256=j0T7U1Tbajmfve4SNfhYKikvL9kgSVT01fYKMC-sH-s,4796
-mcpbr-0.4.12.data/data/mcpbr/data/templates/brave-search.yaml,sha256=PYHXJOaDqYKoqdJc3JV1WbaL-BacrdkQPck1eKGbMPo,1098
-mcpbr-0.4.12.data/data/mcpbr/data/templates/filesystem.yaml,sha256=1p6Z6ChViFYHAODYD71JFst6gdhR5y5rnWNf7Pp5zOY,1091
-mcpbr-0.4.12.data/data/mcpbr/data/templates/github.yaml,sha256=uzPwq5_loFegvH6RNov1MQclbBiFBgYWzpiKLfEN9H4,1133
-mcpbr-0.4.12.data/data/mcpbr/data/templates/google-maps.yaml,sha256=ldR7E9UmuAA-3nJZ1SShD7PhG0_AwDJOSYuy19hQ6cI,1116
-mcpbr-0.4.12.data/data/mcpbr/data/templates/postgres.yaml,sha256=r6R1069BhV4ADQGPZ-T9r6xMNwbr2yrNh8-IHPb4XiI,1178
-mcpbr-0.4.12.data/data/mcpbr/data/templates/slack.yaml,sha256=dBn_YqlFJMJai_55sRDb4hXClgxRpcyYTlWl4LBkpuo,1072
-mcpbr-0.4.12.data/data/mcpbr/data/templates/sqlite.yaml,sha256=UR5yN9f8v_BC6oskny2xMldHWzZrB9b_PpFSmv5eccg,1080
-mcpbr-0.4.12.dist-info/METADATA,sha256=G1WBoJD0EzwXw6HtSeabkbBugGWOVBYdyQe5A4syqP0,54809
-mcpbr-0.4.12.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
-mcpbr-0.4.12.dist-info/entry_points.txt,sha256=lLL8icujqBF36V9bF4gfaB2at4cFKCiv2IdJ1i5hT9U,41
-mcpbr-0.4.12.dist-info/licenses/LICENSE,sha256=mcXLPreEXzD-816yLKmocCPr9_k3gFFo62TjrSuKkIQ,1075
-mcpbr-0.4.12.dist-info/RECORD,,
+mcpbr-0.4.14.data/data/mcpbr/data/templates/brave-search.yaml,sha256=PYHXJOaDqYKoqdJc3JV1WbaL-BacrdkQPck1eKGbMPo,1098
+mcpbr-0.4.14.data/data/mcpbr/data/templates/filesystem.yaml,sha256=1p6Z6ChViFYHAODYD71JFst6gdhR5y5rnWNf7Pp5zOY,1091
+mcpbr-0.4.14.data/data/mcpbr/data/templates/github.yaml,sha256=uzPwq5_loFegvH6RNov1MQclbBiFBgYWzpiKLfEN9H4,1133
+mcpbr-0.4.14.data/data/mcpbr/data/templates/google-maps.yaml,sha256=ldR7E9UmuAA-3nJZ1SShD7PhG0_AwDJOSYuy19hQ6cI,1116
+mcpbr-0.4.14.data/data/mcpbr/data/templates/postgres.yaml,sha256=r6R1069BhV4ADQGPZ-T9r6xMNwbr2yrNh8-IHPb4XiI,1178
+mcpbr-0.4.14.data/data/mcpbr/data/templates/slack.yaml,sha256=dBn_YqlFJMJai_55sRDb4hXClgxRpcyYTlWl4LBkpuo,1072
+mcpbr-0.4.14.data/data/mcpbr/data/templates/sqlite.yaml,sha256=UR5yN9f8v_BC6oskny2xMldHWzZrB9b_PpFSmv5eccg,1080
+mcpbr-0.4.14.dist-info/METADATA,sha256=f2PEinjR_XbBOmFtDAZxoDHdBLwKxLX4V9kjYqh_UtA,54809
+mcpbr-0.4.14.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+mcpbr-0.4.14.dist-info/entry_points.txt,sha256=lLL8icujqBF36V9bF4gfaB2at4cFKCiv2IdJ1i5hT9U,41
+mcpbr-0.4.14.dist-info/licenses/LICENSE,sha256=mcXLPreEXzD-816yLKmocCPr9_k3gFFo62TjrSuKkIQ,1075
+mcpbr-0.4.14.dist-info/RECORD,,