mcpbr 0.4.12__py3-none-any.whl → 0.4.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mcpbr/config.py CHANGED
@@ -109,6 +109,16 @@ class MCPServerConfig(BaseModel):
109
109
  default=900000,
110
110
  description="Timeout in milliseconds for MCP tool execution (default: 15 min for long-running tools)",
111
111
  )
112
+ setup_command: str | None = Field(
113
+ default=None,
114
+ description="Shell command to run inside the container BEFORE the agent starts. "
115
+ "Runs outside the task timer (does not count against timeout_seconds). "
116
+ "Use {workdir} as placeholder. Useful for pre-computing caches.",
117
+ )
118
+ setup_timeout_ms: int = Field(
119
+ default=900000,
120
+ description="Timeout in milliseconds for the setup_command (default: 15 min)",
121
+ )
112
122
 
113
123
  def get_args_for_workdir(self, workdir: str) -> list[str]:
114
124
  """Replace {workdir} placeholder in args with actual path."""
@@ -117,6 +127,12 @@ class MCPServerConfig(BaseModel):
117
127
  result.append(arg.replace("{workdir}", workdir))
118
128
  return result
119
129
 
130
+ def get_setup_command_for_workdir(self, workdir: str) -> str | None:
131
+ """Replace {workdir} placeholder in setup_command with actual path."""
132
+ if self.setup_command is None:
133
+ return None
134
+ return self.setup_command.replace("{workdir}", workdir)
135
+
120
136
  def get_expanded_env(self) -> dict[str, str]:
121
137
  """Expand ${VAR} references in env values using os.environ.
122
138
 
@@ -400,6 +416,12 @@ class HarnessConfig(BaseModel):
400
416
  description="Enable comprehensive performance profiling (tool latency, memory, overhead)",
401
417
  )
402
418
 
419
+ volumes: dict[str, str] = Field(
420
+ default_factory=dict,
421
+ description="Additional volume mounts (read-write) for Docker containers (host_path: container_path). "
422
+ "Mounted into every container, persists across tasks. Useful for pre-computed caches.",
423
+ )
424
+
403
425
  infrastructure: InfrastructureConfig = Field(
404
426
  default_factory=InfrastructureConfig,
405
427
  description="Infrastructure configuration (local or azure)",
mcpbr/docker_env.py CHANGED
@@ -314,14 +314,18 @@ class DockerEnvironmentManager:
314
314
  FALLBACK_IMAGE = "mcpbr-env"
315
315
  DOCKERFILE_PATH = Path(__file__).parent.parent.parent / "Dockerfile"
316
316
 
317
- def __init__(self, use_prebuilt: bool = True) -> None:
317
+ def __init__(
318
+ self, use_prebuilt: bool = True, extra_volumes: dict[str, str] | None = None
319
+ ) -> None:
318
320
  """Initialize the Docker environment manager.
319
321
 
320
322
  Args:
321
323
  use_prebuilt: If True, try to use pre-built SWE-bench images first.
324
+ extra_volumes: Additional volume mounts (read-write) (host_path -> container_path).
322
325
  """
323
326
  self.client = docker.from_env()
324
327
  self.use_prebuilt = use_prebuilt
328
+ self._extra_volumes = extra_volumes or {}
325
329
  self._fallback_image_built = False
326
330
  self._temp_dirs: list[tempfile.TemporaryDirectory[str]] = []
327
331
  self._containers: list[Container] = []
@@ -488,6 +492,15 @@ CMD ["/bin/bash"]
488
492
 
489
493
  for attempt in range(max_retries + 1):
490
494
  try:
495
+ volumes_dict: dict[str, dict[str, str]] = {
496
+ host_workdir: {"bind": "/workspace", "mode": "rw"},
497
+ }
498
+ for host_path, container_path in self._extra_volumes.items():
499
+ volumes_dict[os.path.abspath(host_path)] = {
500
+ "bind": container_path,
501
+ "mode": "rw",
502
+ }
503
+
491
504
  container = self.client.containers.run(
492
505
  image_name,
493
506
  command="tail -f /dev/null",
@@ -495,9 +508,7 @@ CMD ["/bin/bash"]
495
508
  detach=True,
496
509
  platform="linux/amd64" if uses_prebuilt else None,
497
510
  network_mode="bridge", # Enable network for API calls
498
- volumes={
499
- host_workdir: {"bind": "/workspace", "mode": "rw"},
500
- },
511
+ volumes=volumes_dict,
501
512
  working_dir=container_workdir,
502
513
  remove=False,
503
514
  labels={
mcpbr/evaluation.py CHANGED
@@ -137,6 +137,7 @@ async def run_tests(
137
137
  timeout: int = 120,
138
138
  uses_prebuilt: bool = False,
139
139
  workdir: str | None = None,
140
+ repo: str | None = None,
140
141
  ) -> TestResults:
141
142
  """Run a list of tests and return results.
142
143
 
@@ -146,6 +147,7 @@ async def run_tests(
146
147
  timeout: Timeout per test in seconds.
147
148
  uses_prebuilt: Whether a pre-built SWE-bench image is being used.
148
149
  workdir: Working directory to run tests from. Defaults to env.workdir.
150
+ repo: Repository identifier for looking up the correct test runner.
149
151
 
150
152
  Returns:
151
153
  TestResults with pass/fail counts.
@@ -157,7 +159,7 @@ async def run_tests(
157
159
  passed = 0
158
160
 
159
161
  for test in tests:
160
- test_cmd = _build_test_command(test, uses_prebuilt)
162
+ test_cmd = _build_test_command(test, uses_prebuilt, repo=repo)
161
163
 
162
164
  try:
163
165
  exit_code, stdout, stderr = await env.exec_command(
@@ -198,7 +200,7 @@ async def run_tests(
198
200
  )
199
201
 
200
202
 
201
- def _build_test_command(test: str, uses_prebuilt: bool = False) -> str:
203
+ def _build_test_command(test: str, uses_prebuilt: bool = False, repo: str | None = None) -> str:
202
204
  """Build a test command for the given test identifier.
203
205
 
204
206
  Args:
@@ -206,18 +208,29 @@ def _build_test_command(test: str, uses_prebuilt: bool = False) -> str:
206
208
  - pytest: "tests/test_file.py::test_func" or "tests/test_file.py"
207
209
  - Django: "test_method (module.TestClass)" or "module.tests.TestClass.test_method"
208
210
  uses_prebuilt: If True, activate the testbed conda environment first.
211
+ repo: Repository identifier (e.g., "sympy/sympy") for looking up
212
+ the correct test runner from upstream SWE-bench specs.
209
213
 
210
214
  Returns:
211
215
  Shell command string to run the test.
212
216
  """
213
217
  import re
214
218
 
219
+ from .swebench_test_specs import get_repo_test_command
220
+
215
221
  # Pre-built SWE-bench images use a conda environment called 'testbed'
216
222
  if uses_prebuilt:
217
223
  activate = "source /opt/miniconda3/etc/profile.d/conda.sh && conda activate testbed && "
218
224
  else:
219
225
  activate = ""
220
226
 
227
+ # Check upstream SWE-bench test command mapping for non-pytest runners
228
+ if repo:
229
+ upstream_cmd = get_repo_test_command(repo)
230
+ if upstream_cmd and "runtests.py" not in upstream_cmd and "pytest" not in upstream_cmd:
231
+ # Non-pytest, non-Django project (e.g., sympy uses bin/test)
232
+ return f"{activate}{upstream_cmd} {test}"
233
+
221
234
  # Detect Django test format: "test_method (module.TestClass)"
222
235
  if "(" in test and ")" in test and "." in test:
223
236
  # Extract module path from parentheses
@@ -344,12 +357,15 @@ async def evaluate_patch(
344
357
  if not env.uses_prebuilt:
345
358
  await _install_dependencies(env)
346
359
 
360
+ repo = task.get("repo")
361
+
347
362
  fail_to_pass_results = await run_tests(
348
363
  env,
349
364
  fail_to_pass_tests,
350
365
  timeout=test_timeout,
351
366
  uses_prebuilt=env.uses_prebuilt,
352
367
  workdir=eval_workdir,
368
+ repo=repo,
353
369
  )
354
370
 
355
371
  pass_to_pass_results = await run_tests(
@@ -358,6 +374,7 @@ async def evaluate_patch(
358
374
  timeout=test_timeout,
359
375
  uses_prebuilt=env.uses_prebuilt,
360
376
  workdir=eval_workdir,
377
+ repo=repo,
361
378
  )
362
379
 
363
380
  resolved = (
mcpbr/harness.py CHANGED
@@ -962,7 +962,10 @@ async def run_evaluation(
962
962
  "args": config.mcp_server.args if config.mcp_server else [],
963
963
  }
964
964
 
965
- docker_manager = DockerEnvironmentManager(use_prebuilt=config.use_prebuilt_images)
965
+ docker_manager = DockerEnvironmentManager(
966
+ use_prebuilt=config.use_prebuilt_images,
967
+ extra_volumes=config.volumes,
968
+ )
966
969
 
967
970
  results: list[TaskResult] = []
968
971
  # Add cached results if using state tracker
mcpbr/harnesses.py CHANGED
@@ -452,9 +452,10 @@ DEFAULT_PROMPT = (
452
452
  )
453
453
 
454
454
  MCP_PROMPT_SUFFIX = (
455
- "\n\nYou have access to an MCP server with additional tools. "
456
- "Consider using the MCP tools (prefixed with mcp__) when they would "
457
- "help you understand or navigate the codebase more effectively."
455
+ "\n\nYou have access to an MCP server with additional tools for codebase analysis. "
456
+ "Use these tools to understand the codebase structure, find definitions, trace call chains, "
457
+ "and navigate dependencies before making changes. The MCP tools are especially useful for "
458
+ "understanding how code is connected across files."
458
459
  )
459
460
 
460
461
 
@@ -594,25 +595,27 @@ class ClaudeCodeHarness:
594
595
  instance_id = task_id or task.get("instance_id", "unknown")
595
596
 
596
597
  mcp_server_name = None
598
+ mcp_json_path = None
597
599
  if self.mcp_server:
598
600
  mcp_server_name = self.mcp_server.name
599
601
  args = self.mcp_server.get_args_for_workdir(workdir)
600
602
  mcp_env = self.mcp_server.get_expanded_env()
601
- add_cmd = [
602
- "claude",
603
- "mcp",
604
- "add",
605
- mcp_server_name,
606
- "--",
607
- self.mcp_server.command,
608
- ] + args
609
- exit_code, stdout, stderr = await _run_cli_command(
610
- add_cmd, workdir, timeout=30, env=mcp_env
611
- )
612
- if exit_code != 0:
613
- self._console.print(
614
- f"[yellow]Warning: MCP server add failed (exit {exit_code}): {stderr or stdout}[/yellow]"
615
- )
603
+
604
+ # Write .mcp.json file for Claude Code to discover MCP tools.
605
+ # This is more reliable than `claude mcp add` which can create broken
606
+ # tool registrations where the server connects but tools aren't routable.
607
+ mcp_config = {
608
+ "mcpServers": {
609
+ mcp_server_name: {
610
+ "type": "stdio",
611
+ "command": self.mcp_server.command,
612
+ "args": args,
613
+ "env": mcp_env,
614
+ }
615
+ }
616
+ }
617
+ mcp_json_path = os.path.join(workdir, ".mcp.json")
618
+ Path(mcp_json_path).write_text(json.dumps(mcp_config, indent=2))
616
619
 
617
620
  try:
618
621
  command = [
@@ -683,12 +686,8 @@ class ClaudeCodeHarness:
683
686
 
684
687
  if exit_code != 0:
685
688
  error_msg = stderr or "Unknown error"
686
- if mcp_server_name:
687
- await _run_cli_command(
688
- ["claude", "mcp", "remove", mcp_server_name],
689
- workdir,
690
- timeout=10,
691
- )
689
+ if mcp_json_path and os.path.exists(mcp_json_path):
690
+ os.remove(mcp_json_path)
692
691
  return AgentResult(
693
692
  patch="",
694
693
  success=False,
@@ -705,12 +704,8 @@ class ClaudeCodeHarness:
705
704
  cost_usd=cost_usd,
706
705
  )
707
706
 
708
- if mcp_server_name:
709
- await _run_cli_command(
710
- ["claude", "mcp", "remove", mcp_server_name],
711
- workdir,
712
- timeout=10,
713
- )
707
+ if mcp_json_path and os.path.exists(mcp_json_path):
708
+ os.remove(mcp_json_path)
714
709
 
715
710
  # Check git status to understand what happened
716
711
  git_exit, git_status, git_stderr = await _run_cli_command(
@@ -747,12 +742,8 @@ class ClaudeCodeHarness:
747
742
  cost_usd=cost_usd,
748
743
  )
749
744
  except Exception:
750
- if mcp_server_name:
751
- await _run_cli_command(
752
- ["claude", "mcp", "remove", mcp_server_name],
753
- workdir,
754
- timeout=10,
755
- )
745
+ if mcp_json_path and os.path.exists(mcp_json_path):
746
+ os.remove(mcp_json_path)
756
747
  raise
757
748
 
758
749
  async def _solve_in_docker(
@@ -846,37 +837,36 @@ class ClaudeCodeHarness:
846
837
  self._console.print(f"[cyan]Registering MCP server: {mcp_server_name}[/cyan]")
847
838
  self._console.print(f"[dim] Command: {self.mcp_server.command} {args_str}[/dim]")
848
839
 
849
- # Register MCP server separately with its own timeout
850
- # Use shlex.quote() to prevent shell injection and handle spaces/special characters
851
- quoted_workdir = shlex.quote(env.workdir)
852
- quoted_env_file = shlex.quote(env_file)
853
- quoted_server_name = shlex.quote(mcp_server_name)
854
- quoted_command = shlex.quote(self.mcp_server.command)
855
- quoted_args = " ".join(shlex.quote(arg) for arg in args)
856
-
857
- mcp_add_cmd = [
858
- "/bin/bash",
859
- "-c",
860
- f"cd {quoted_workdir} && su mcpbr -c 'source {quoted_env_file} && cd {quoted_workdir} && claude mcp add {quoted_server_name} -- {quoted_command} {quoted_args}'",
861
- ]
840
+ # Write .mcp.json to workdir for Claude Code to discover MCP tools.
841
+ # File-based config is more reliable than `claude mcp add` which can create
842
+ # broken tool registrations where the server connects but tools aren't routable.
843
+ mcp_config = {
844
+ "mcpServers": {
845
+ mcp_server_name: {
846
+ "type": "stdio",
847
+ "command": self.mcp_server.command,
848
+ "args": args,
849
+ "env": self.mcp_server.get_expanded_env(),
850
+ }
851
+ }
852
+ }
853
+ mcp_json_content = json.dumps(mcp_config, indent=2)
854
+ mcp_json_path = f"{env.workdir}/.mcp.json"
862
855
 
863
856
  try:
864
857
  mcp_exit_code, mcp_stdout, mcp_stderr = await env.exec_command(
865
- mcp_add_cmd,
866
- timeout=60, # Separate 60s timeout for MCP registration
867
- environment=docker_env,
858
+ f"cat > {mcp_json_path} << 'MCP_JSON_EOF'\n{mcp_json_content}\nMCP_JSON_EOF",
859
+ timeout=10,
868
860
  )
861
+ await env.exec_command(f"chown mcpbr:mcpbr {mcp_json_path}", timeout=5)
869
862
 
870
863
  if mcp_exit_code != 0:
871
- error_msg = f"MCP server registration failed (exit {mcp_exit_code})"
864
+ error_msg = f"MCP config write failed (exit {mcp_exit_code})"
872
865
  if mcp_stderr:
873
866
  error_msg += f": {mcp_stderr}"
874
- if mcp_stdout:
875
- error_msg += f"\nStdout: {mcp_stdout}"
876
867
  if verbose:
877
868
  self._console.print(f"[red]✗ {error_msg}[/red]")
878
869
 
879
- # Clean up temp files before early return
880
870
  await env.exec_command(f"rm -f {prompt_file} {env_file}", timeout=5)
881
871
 
882
872
  return AgentResult(
@@ -889,16 +879,13 @@ class ClaudeCodeHarness:
889
879
  )
890
880
 
891
881
  if verbose:
892
- self._console.print("[green]✓ MCP server registered successfully[/green]")
893
- if mcp_stdout.strip():
894
- self._console.print(f"[dim]{mcp_stdout.strip()}[/dim]")
882
+ self._console.print("[green]✓ MCP server configured via .mcp.json[/green]")
895
883
 
896
884
  except asyncio.TimeoutError:
897
- error_msg = "MCP server registration timed out after 60s. The MCP server may have failed to start or is hanging during initialization."
885
+ error_msg = "Failed to write MCP configuration file."
898
886
  if verbose:
899
887
  self._console.print(f"[red]✗ {error_msg}[/red]")
900
888
 
901
- # Clean up temp files before early return
902
889
  await env.exec_command(f"rm -f {prompt_file} {env_file}", timeout=5)
903
890
 
904
891
  return AgentResult(
@@ -908,6 +895,35 @@ class ClaudeCodeHarness:
908
895
  cost_usd=None,
909
896
  )
910
897
 
898
+ # Run setup_command if configured (BEFORE agent, OUTSIDE task timer).
899
+ # This is the right place for expensive one-time operations like
900
+ # pre-computing caches that should not count against timeout_seconds.
901
+ if self.mcp_server and self.mcp_server.setup_command:
902
+ setup_cmd = self.mcp_server.get_setup_command_for_workdir(env.workdir)
903
+ setup_timeout = int(self.mcp_server.setup_timeout_ms / 1000)
904
+
905
+ if verbose:
906
+ self._console.print(
907
+ f"[cyan]Running setup command (timeout: {setup_timeout:.0f}s)...[/cyan]"
908
+ )
909
+
910
+ setup_full_cmd = f"source {shlex.quote(env_file)} && {setup_cmd}"
911
+ setup_exit, _setup_stdout, setup_stderr = await env.exec_command(
912
+ ["/bin/bash", "-c", setup_full_cmd],
913
+ timeout=setup_timeout,
914
+ )
915
+
916
+ if setup_exit != 0:
917
+ if verbose:
918
+ self._console.print(
919
+ f"[yellow]⚠ Setup command exited with code {setup_exit}[/yellow]"
920
+ )
921
+ if setup_stderr:
922
+ self._console.print(f"[dim]{setup_stderr[:500]}[/dim]")
923
+ # Non-fatal: continue with agent even if setup fails
924
+ elif verbose:
925
+ self._console.print("[green]✓ Setup command completed[/green]")
926
+
911
927
  try:
912
928
  claude_args = [
913
929
  "--print",
@@ -1039,16 +1055,9 @@ class ClaudeCodeHarness:
1039
1055
  error_msg += f"\nMCP server logs saved to: {mcp_log_path}"
1040
1056
 
1041
1057
  if mcp_server_name:
1042
- # Use shlex.quote() for MCP removal command
1043
- quoted_env_file = shlex.quote(env_file)
1044
- quoted_server_name = shlex.quote(mcp_server_name)
1045
- remove_cmd = (
1046
- f"source {quoted_env_file} && claude mcp remove {quoted_server_name}"
1047
- )
1048
1058
  await env.exec_command(
1049
- f"su mcpbr -c {shlex.quote(remove_cmd)}",
1050
- timeout=10,
1051
- environment=docker_env,
1059
+ f"rm -f {env.workdir}/.mcp.json",
1060
+ timeout=5,
1052
1061
  )
1053
1062
 
1054
1063
  return AgentResult(
@@ -1068,14 +1077,9 @@ class ClaudeCodeHarness:
1068
1077
  )
1069
1078
 
1070
1079
  if mcp_server_name:
1071
- # Use shlex.quote() for MCP removal command
1072
- quoted_env_file = shlex.quote(env_file)
1073
- quoted_server_name = shlex.quote(mcp_server_name)
1074
- remove_cmd = f"source {quoted_env_file} && claude mcp remove {quoted_server_name}"
1075
1080
  await env.exec_command(
1076
- f"su mcpbr -c {shlex.quote(remove_cmd)}",
1077
- timeout=10,
1078
- environment=docker_env,
1081
+ f"rm -f {env.workdir}/.mcp.json",
1082
+ timeout=5,
1079
1083
  )
1080
1084
 
1081
1085
  _, git_status, git_stderr = await env.exec_command(
@@ -1160,20 +1164,13 @@ class ClaudeCodeHarness:
1160
1164
 
1161
1165
  if mcp_server_name:
1162
1166
  try:
1163
- # Use shlex.quote() for MCP removal command
1164
- quoted_env_file = shlex.quote(env_file)
1165
- quoted_server_name = shlex.quote(mcp_server_name)
1166
- remove_cmd = (
1167
- f"source {quoted_env_file} && claude mcp remove {quoted_server_name}"
1168
- )
1169
1167
  await env.exec_command(
1170
- f"su mcpbr -c {shlex.quote(remove_cmd)}",
1171
- timeout=10,
1172
- environment=docker_env,
1168
+ f"rm -f {env.workdir}/.mcp.json",
1169
+ timeout=5,
1173
1170
  )
1174
1171
  except Exception as e:
1175
1172
  if verbose:
1176
- self._console.print(f"[dim red]Failed to remove MCP server: {e}[/dim red]")
1173
+ self._console.print(f"[dim red]Failed to clean up .mcp.json: {e}[/dim red]")
1177
1174
 
1178
1175
  error_msg = f"Task execution timed out after {timeout}s."
1179
1176
  if self.mcp_server:
@@ -1204,20 +1201,13 @@ class ClaudeCodeHarness:
1204
1201
  except Exception:
1205
1202
  if mcp_server_name:
1206
1203
  try:
1207
- # Use shlex.quote() for MCP removal command
1208
- quoted_env_file = shlex.quote(env_file)
1209
- quoted_server_name = shlex.quote(mcp_server_name)
1210
- remove_cmd = (
1211
- f"source {quoted_env_file} && claude mcp remove {quoted_server_name}"
1212
- )
1213
1204
  await env.exec_command(
1214
- f"su mcpbr -c {shlex.quote(remove_cmd)}",
1215
- timeout=10,
1216
- environment=docker_env,
1205
+ f"rm -f {env.workdir}/.mcp.json",
1206
+ timeout=5,
1217
1207
  )
1218
1208
  except Exception as e:
1219
1209
  if verbose:
1220
- self._console.print(f"[dim red]Failed to remove MCP server: {e}[/dim red]")
1210
+ self._console.print(f"[dim red]Failed to clean up .mcp.json: {e}[/dim red]")
1221
1211
  raise
1222
1212
  finally:
1223
1213
  # Close MCP log file if it was opened
@@ -1230,7 +1220,9 @@ class ClaudeCodeHarness:
1230
1220
  if verbose:
1231
1221
  self._console.print(f"[dim red]Failed to close MCP log file: {e}[/dim red]")
1232
1222
 
1233
- await env.exec_command(f"rm -f {prompt_file} {env_file}", timeout=5)
1223
+ await env.exec_command(
1224
+ f"rm -f {prompt_file} {env_file} {env.workdir}/.mcp.json", timeout=5
1225
+ )
1234
1226
 
1235
1227
 
1236
1228
  HARNESS_REGISTRY: dict[str, type] = {
@@ -0,0 +1,33 @@
1
+ """Test command specs from upstream SWE-bench harness.
2
+
3
+ Maps repositories to their correct test commands. mcpbr defaults to pytest
4
+ for all non-Django projects, but some projects (e.g., sympy) use custom test
5
+ runners that aren't pytest-compatible.
6
+
7
+ Source: https://github.com/SWE-bench/SWE-bench/blob/main/swebench/harness/constants/python.py
8
+ """
9
+
10
+ # Base test commands per framework (from upstream constants/python.py)
11
+ TEST_PYTEST = "pytest --no-header -rA --tb=no -p no:cacheprovider"
12
+ TEST_DJANGO = "./tests/runtests.py --verbosity 2 --settings=test_sqlite --parallel 1"
13
+ TEST_SYMPY = "PYTHONWARNINGS='ignore::UserWarning,ignore::SyntaxWarning' bin/test -C --verbose"
14
+ TEST_SPHINX = "tox --current-env -epy39 -v --"
15
+ TEST_ASTROPY = "pytest -rA -vv -o console_output_style=classic --tb=no"
16
+ TEST_SEABORN = "pytest --no-header -rA"
17
+
18
+ # Repo → test command mapping
19
+ # Only non-pytest entries need to be here — pytest is the default fallback.
20
+ # Django is included for documentation but its existing handler takes precedence.
21
+ REPO_TO_TEST_CMD: dict[str, str] = {
22
+ "sympy/sympy": TEST_SYMPY,
23
+ "django/django": TEST_DJANGO,
24
+ "sphinx-doc/sphinx": TEST_SPHINX,
25
+ }
26
+
27
+
28
+ def get_repo_test_command(repo: str) -> str | None:
29
+ """Look up the upstream test command for a repo.
30
+
31
+ Returns None if repo uses standard pytest (handled by existing logic).
32
+ """
33
+ return REPO_TO_TEST_CMD.get(repo)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mcpbr
3
- Version: 0.4.12
3
+ Version: 0.4.14
4
4
  Summary: Model Context Protocol Benchmark Runner - evaluate MCP servers against software engineering benchmarks
5
5
  Project-URL: Homepage, https://github.com/greynewell/mcpbr
6
6
  Project-URL: Repository, https://github.com/greynewell/mcpbr
@@ -3,14 +3,14 @@ mcpbr/__main__.py,sha256=WmeQsAqtW_9tMTNKArH1m76DPBokZpXuy6dMZp13gXA,132
3
3
  mcpbr/agent.py,sha256=aSFH2S3ExKZfdVfMbzk6D1nRhpKt4JmpRzmF4Vi6Gmo,5795
4
4
  mcpbr/cache.py,sha256=YiP13omwMbXLb6NhNocJvL58enXEx9J8OrvTZnWUkw4,13254
5
5
  mcpbr/cli.py,sha256=xvh7gpJx0LzjV3g-Te4FF7BfHubGzDxOiYQsSeQnCEc,68276
6
- mcpbr/config.py,sha256=quB2KPKsFY7Y86wTZr9GjlZRYsh13MngNikdOTBKEvY,17864
6
+ mcpbr/config.py,sha256=E9Icedjk_VFONnnEZbWW5WN7El5RaJD5pGi-JQlrlV0,18890
7
7
  mcpbr/config_inheritance.py,sha256=0EV9Tv62UFNgZoc8mY7yYjHEbnMM_R5EAhSeuK7ajAA,6617
8
8
  mcpbr/config_validator.py,sha256=ZMEIeK4y6fSwyY46Xv5dK5v3jM4HDKcYkosnIcn7iyI,20488
9
- mcpbr/docker_env.py,sha256=GKjQULslYANGSkyY8ZLaAEy9WWl0MYqS1LZ0VavmhXc,31085
9
+ mcpbr/docker_env.py,sha256=vpbjL227L9qLjrS7CzXevxzo9393qmOrrxWG7lP1s44,31629
10
10
  mcpbr/env_expansion.py,sha256=Rkhth-tWV8CptQlSSk9exuMsUaSTTW9hj69z4snZd_U,6122
11
- mcpbr/evaluation.py,sha256=LQXSLn_4yIkZ0jwZ85AaKku2dHcPirmj5c7-nhpPMfY,11994
12
- mcpbr/harness.py,sha256=6_p_MFrs8RulosXToVtB9-P4Ej8XzR6ZzCKDP4mUeGY,51026
13
- mcpbr/harnesses.py,sha256=y2M2Warbj2eWpF5LwAPfdkIDLMGdd4hw9Rw-Ko_OCzU,47814
11
+ mcpbr/evaluation.py,sha256=EjPREWv7hBRqhBhNan0ERh2imqMBegT0Y2cgZlTxRGk,12765
12
+ mcpbr/harness.py,sha256=sEMP2PnrQP_BKK-4yixz05qXcY-0OsJNJ5e5JU2Rtsc,51079
13
+ mcpbr/harnesses.py,sha256=h9iDp4qkPABNwO9OXbJ61qcD4n0oAUTU7AQksxRKLcg,47335
14
14
  mcpbr/incremental_save.py,sha256=1dm3pGiEIhP8cVk_Y6XF_cAdo3B_vyRc6CO8Wt-MyIA,4830
15
15
  mcpbr/junit_reporter.py,sha256=M_02zJbFbA3VoIYG5oR7VDecqWHEpIee-JOUShWNuLU,9261
16
16
  mcpbr/log_formatter.py,sha256=d2jWH7z4IRSbr8-PbnEt3TmLAqk8vgdPT38uTnTCN5c,21488
@@ -27,6 +27,7 @@ mcpbr/smoke_test.py,sha256=srYGOn_auspRbt_a6ebYDDDq_nujA_iZGman5nU1ikU,14925
27
27
  mcpbr/state_tracker.py,sha256=rIP9LIHtQg6oBsLIxnwRjE865Kw6U7DMO_GzzuMRC0E,10790
28
28
  mcpbr/statistics.py,sha256=Ny8TMdBrIpS4KfKCJcuFfTeaGuTmEkS1G_uHBlboYdA,19134
29
29
  mcpbr/streaming.py,sha256=XPhkXO1R1EsWtkoPvCpyy4TehEom7hkuOeP-00joX3o,13853
30
+ mcpbr/swebench_test_specs.py,sha256=Mh_BPjcexkgDT3p4zT2p31925b8w5tgsxxRpYZQZalM,1390
30
31
  mcpbr/templates.py,sha256=dqwboVB-yfE06w2rgDOvuWJB4Hx5duH_W-jvLBqmlKg,10683
31
32
  mcpbr/benchmarks/__init__.py,sha256=RK0TxNTSqhUX_WtGs0CcV1MX2uiCBTUWkEHYpo_7T5M,4099
32
33
  mcpbr/benchmarks/agentbench.py,sha256=jQ8OG_5cn-PvOZizXivysLTw9xvtA8c_MWfw3jXq0TQ,6512
@@ -68,15 +69,15 @@ mcpbr/infrastructure/azure_health.py,sha256=xITmIa9IfYIwxcVhY0sJ81a-6WNKiT8kSQTd
68
69
  mcpbr/infrastructure/base.py,sha256=Olj6uiNBeGoUqltZI1NHZfa26kzT-6jfp8YIXSykFKM,3037
69
70
  mcpbr/infrastructure/local.py,sha256=VK6UAg7Dzvb9v1LAJgNGA_s0blQKrHAQEXBAC75zAL8,4237
70
71
  mcpbr/infrastructure/manager.py,sha256=j0T7U1Tbajmfve4SNfhYKikvL9kgSVT01fYKMC-sH-s,4796
71
- mcpbr-0.4.12.data/data/mcpbr/data/templates/brave-search.yaml,sha256=PYHXJOaDqYKoqdJc3JV1WbaL-BacrdkQPck1eKGbMPo,1098
72
- mcpbr-0.4.12.data/data/mcpbr/data/templates/filesystem.yaml,sha256=1p6Z6ChViFYHAODYD71JFst6gdhR5y5rnWNf7Pp5zOY,1091
73
- mcpbr-0.4.12.data/data/mcpbr/data/templates/github.yaml,sha256=uzPwq5_loFegvH6RNov1MQclbBiFBgYWzpiKLfEN9H4,1133
74
- mcpbr-0.4.12.data/data/mcpbr/data/templates/google-maps.yaml,sha256=ldR7E9UmuAA-3nJZ1SShD7PhG0_AwDJOSYuy19hQ6cI,1116
75
- mcpbr-0.4.12.data/data/mcpbr/data/templates/postgres.yaml,sha256=r6R1069BhV4ADQGPZ-T9r6xMNwbr2yrNh8-IHPb4XiI,1178
76
- mcpbr-0.4.12.data/data/mcpbr/data/templates/slack.yaml,sha256=dBn_YqlFJMJai_55sRDb4hXClgxRpcyYTlWl4LBkpuo,1072
77
- mcpbr-0.4.12.data/data/mcpbr/data/templates/sqlite.yaml,sha256=UR5yN9f8v_BC6oskny2xMldHWzZrB9b_PpFSmv5eccg,1080
78
- mcpbr-0.4.12.dist-info/METADATA,sha256=G1WBoJD0EzwXw6HtSeabkbBugGWOVBYdyQe5A4syqP0,54809
79
- mcpbr-0.4.12.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
80
- mcpbr-0.4.12.dist-info/entry_points.txt,sha256=lLL8icujqBF36V9bF4gfaB2at4cFKCiv2IdJ1i5hT9U,41
81
- mcpbr-0.4.12.dist-info/licenses/LICENSE,sha256=mcXLPreEXzD-816yLKmocCPr9_k3gFFo62TjrSuKkIQ,1075
82
- mcpbr-0.4.12.dist-info/RECORD,,
72
+ mcpbr-0.4.14.data/data/mcpbr/data/templates/brave-search.yaml,sha256=PYHXJOaDqYKoqdJc3JV1WbaL-BacrdkQPck1eKGbMPo,1098
73
+ mcpbr-0.4.14.data/data/mcpbr/data/templates/filesystem.yaml,sha256=1p6Z6ChViFYHAODYD71JFst6gdhR5y5rnWNf7Pp5zOY,1091
74
+ mcpbr-0.4.14.data/data/mcpbr/data/templates/github.yaml,sha256=uzPwq5_loFegvH6RNov1MQclbBiFBgYWzpiKLfEN9H4,1133
75
+ mcpbr-0.4.14.data/data/mcpbr/data/templates/google-maps.yaml,sha256=ldR7E9UmuAA-3nJZ1SShD7PhG0_AwDJOSYuy19hQ6cI,1116
76
+ mcpbr-0.4.14.data/data/mcpbr/data/templates/postgres.yaml,sha256=r6R1069BhV4ADQGPZ-T9r6xMNwbr2yrNh8-IHPb4XiI,1178
77
+ mcpbr-0.4.14.data/data/mcpbr/data/templates/slack.yaml,sha256=dBn_YqlFJMJai_55sRDb4hXClgxRpcyYTlWl4LBkpuo,1072
78
+ mcpbr-0.4.14.data/data/mcpbr/data/templates/sqlite.yaml,sha256=UR5yN9f8v_BC6oskny2xMldHWzZrB9b_PpFSmv5eccg,1080
79
+ mcpbr-0.4.14.dist-info/METADATA,sha256=f2PEinjR_XbBOmFtDAZxoDHdBLwKxLX4V9kjYqh_UtA,54809
80
+ mcpbr-0.4.14.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
81
+ mcpbr-0.4.14.dist-info/entry_points.txt,sha256=lLL8icujqBF36V9bF4gfaB2at4cFKCiv2IdJ1i5hT9U,41
82
+ mcpbr-0.4.14.dist-info/licenses/LICENSE,sha256=mcXLPreEXzD-816yLKmocCPr9_k3gFFo62TjrSuKkIQ,1075
83
+ mcpbr-0.4.14.dist-info/RECORD,,
File without changes