mcpbr 0.5.0__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mcpbr/__init__.py CHANGED
@@ -3,4 +3,4 @@
3
3
  A benchmark runner for evaluating MCP servers against SWE-bench tasks.
4
4
  """
5
5
 
6
- __version__ = "0.3.23"
6
+ __version__ = "0.5.1"
mcpbr/harness.py CHANGED
@@ -431,6 +431,17 @@ async def _run_mcp_evaluation(
431
431
  config, benchmark, verbosity, log_file, mcp_logs_dir, mcp_server_config
432
432
  )
433
433
 
434
+ # Run setup_command OUTSIDE the agent timer. This is for expensive
435
+ # one-time operations (e.g. pre-computing code graphs) that must not
436
+ # count against timeout_seconds.
437
+ if env and hasattr(agent, "run_setup_command"):
438
+ try:
439
+ await agent.run_setup_command(env, verbose=verbose)
440
+ except asyncio.TimeoutError:
441
+ # Setup timeout is non-fatal – the agent still gets its
442
+ # full timeout budget even if setup didn't finish.
443
+ pass
444
+
434
445
  # Sample memory before agent execution
435
446
  if profiler:
436
447
  profiler.sample_memory()
mcpbr/harnesses.py CHANGED
@@ -555,6 +555,48 @@ class ClaudeCodeHarness:
555
555
  self.thinking_budget = thinking_budget
556
556
  self._console = Console()
557
557
 
558
+ async def run_setup_command(
559
+ self,
560
+ env: TaskEnvironment,
561
+ verbose: bool = False,
562
+ ) -> None:
563
+ """Run MCP server setup_command inside the container.
564
+
565
+ This MUST be called from the evaluation harness BEFORE the agent timer
566
+ starts (i.e. before asyncio.wait_for wraps agent.solve()). Expensive
567
+ operations like pre-computing code graphs happen here and should never
568
+ count against the task timeout.
569
+ """
570
+ if not self.mcp_server or not self.mcp_server.setup_command:
571
+ return
572
+
573
+ setup_cmd = self.mcp_server.get_setup_command_for_workdir(env.workdir)
574
+ setup_timeout = max(1, int(self.mcp_server.setup_timeout_ms / 1000))
575
+
576
+ if verbose:
577
+ self._console.print(
578
+ f"[cyan]Running setup command (timeout: {setup_timeout:.0f}s)...[/cyan]"
579
+ )
580
+
581
+ # Source the env file so setup_command has access to API keys etc.
582
+ env_file = "/tmp/.mcpbr_env.sh"
583
+ setup_full_cmd = f"source {shlex.quote(env_file)} && {setup_cmd}"
584
+ setup_exit, _setup_stdout, setup_stderr = await env.exec_command(
585
+ ["/bin/bash", "-c", setup_full_cmd],
586
+ timeout=setup_timeout,
587
+ )
588
+
589
+ if setup_exit != 0:
590
+ if verbose:
591
+ self._console.print(
592
+ f"[yellow]⚠ Setup command exited with code {setup_exit}[/yellow]"
593
+ )
594
+ if setup_stderr:
595
+ self._console.print(f"[dim]{setup_stderr[:500]}[/dim]")
596
+ # Non-fatal: continue with agent even if setup fails
597
+ elif verbose:
598
+ self._console.print("[green]✓ Setup command completed[/green]")
599
+
558
600
  async def solve(
559
601
  self,
560
602
  task: dict[str, Any],
@@ -895,34 +937,10 @@ class ClaudeCodeHarness:
895
937
  cost_usd=None,
896
938
  )
897
939
 
898
- # Run setup_command if configured (BEFORE agent, OUTSIDE task timer).
899
- # This is the right place for expensive one-time operations like
900
- # pre-computing caches that should not count against timeout_seconds.
901
- if self.mcp_server and self.mcp_server.setup_command:
902
- setup_cmd = self.mcp_server.get_setup_command_for_workdir(env.workdir)
903
- setup_timeout = int(self.mcp_server.setup_timeout_ms / 1000)
904
-
905
- if verbose:
906
- self._console.print(
907
- f"[cyan]Running setup command (timeout: {setup_timeout:.0f}s)...[/cyan]"
908
- )
909
-
910
- setup_full_cmd = f"source {shlex.quote(env_file)} && {setup_cmd}"
911
- setup_exit, _setup_stdout, setup_stderr = await env.exec_command(
912
- ["/bin/bash", "-c", setup_full_cmd],
913
- timeout=setup_timeout,
914
- )
915
-
916
- if setup_exit != 0:
917
- if verbose:
918
- self._console.print(
919
- f"[yellow]⚠ Setup command exited with code {setup_exit}[/yellow]"
920
- )
921
- if setup_stderr:
922
- self._console.print(f"[dim]{setup_stderr[:500]}[/dim]")
923
- # Non-fatal: continue with agent even if setup fails
924
- elif verbose:
925
- self._console.print("[green]✓ Setup command completed[/green]")
940
+ # NOTE: setup_command is intentionally NOT run here. It must be called
941
+ # from the evaluation harness (harness.py) BEFORE the agent timer starts,
942
+ # using run_setup_command(). Running it here would include it in the
943
+ # asyncio.wait_for() timeout that wraps agent.solve().
926
944
 
927
945
  try:
928
946
  claude_args = [
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mcpbr
3
- Version: 0.5.0
3
+ Version: 0.5.1
4
4
  Summary: Model Context Protocol Benchmark Runner - evaluate MCP servers against software engineering benchmarks
5
5
  Project-URL: Homepage, https://github.com/greynewell/mcpbr
6
6
  Project-URL: Repository, https://github.com/greynewell/mcpbr
@@ -1,4 +1,4 @@
1
- mcpbr/__init__.py,sha256=3vhpKV9kVECjuPapKpCPEHTjlOsyhuoiLZxBv9O1eL0,152
1
+ mcpbr/__init__.py,sha256=fGX9CC8F1Z1g8nbw8yXbxV3_0aRYxlbv5UAtXDYllgo,151
2
2
  mcpbr/__main__.py,sha256=WmeQsAqtW_9tMTNKArH1m76DPBokZpXuy6dMZp13gXA,132
3
3
  mcpbr/agent.py,sha256=aSFH2S3ExKZfdVfMbzk6D1nRhpKt4JmpRzmF4Vi6Gmo,5795
4
4
  mcpbr/cache.py,sha256=YiP13omwMbXLb6NhNocJvL58enXEx9J8OrvTZnWUkw4,13254
@@ -22,8 +22,8 @@ mcpbr/failure_analysis.py,sha256=N5xp9YPe2d7P9fTa2LVSHsPgB1WOQtWMeClq3bOv4_c,198
22
22
  mcpbr/few_shot.py,sha256=bFDdes_kgZAFWoFZQEfZG5Z2Es9rmkB1jsxSMp4aCCM,11684
23
23
  mcpbr/formatting.py,sha256=lwZcb4fD5osBzJlerICyvAVb4KHSm_nRTBg1dVfD6Lo,14193
24
24
  mcpbr/gpu_support.py,sha256=eroBiLkt1A3Q2ODJDSyqrd_BzcMh8tFkjtPn7PsvJJc,5070
25
- mcpbr/harness.py,sha256=Rc6CqzZOMJyuHqfuOIDisLOoPka-cqAqYiL7zr7ALFg,53193
26
- mcpbr/harnesses.py,sha256=h9iDp4qkPABNwO9OXbJ61qcD4n0oAUTU7AQksxRKLcg,47335
25
+ mcpbr/harness.py,sha256=xfnD4si0DflBor1cfu_4wrCpECJ9_8eudLEsgVCU6Oo,53731
26
+ mcpbr/harnesses.py,sha256=1FmUfFSQF0HBvmJsNEbyW_Km4ChsWhShY70aQP6_TBI,47947
27
27
  mcpbr/incremental_save.py,sha256=1dm3pGiEIhP8cVk_Y6XF_cAdo3B_vyRc6CO8Wt-MyIA,4830
28
28
  mcpbr/junit_reporter.py,sha256=M_02zJbFbA3VoIYG5oR7VDecqWHEpIee-JOUShWNuLU,9261
29
29
  mcpbr/latency_metrics.py,sha256=xNMaUzGMSbOIfuoyZGyIfyMk5uAmoj6K65ZAs5D6Z8c,10476
@@ -92,15 +92,15 @@ mcpbr/infrastructure/azure_health.py,sha256=xITmIa9IfYIwxcVhY0sJ81a-6WNKiT8kSQTd
92
92
  mcpbr/infrastructure/base.py,sha256=Olj6uiNBeGoUqltZI1NHZfa26kzT-6jfp8YIXSykFKM,3037
93
93
  mcpbr/infrastructure/local.py,sha256=VK6UAg7Dzvb9v1LAJgNGA_s0blQKrHAQEXBAC75zAL8,4237
94
94
  mcpbr/infrastructure/manager.py,sha256=j0T7U1Tbajmfve4SNfhYKikvL9kgSVT01fYKMC-sH-s,4796
95
- mcpbr-0.5.0.data/data/mcpbr/data/templates/brave-search.yaml,sha256=PYHXJOaDqYKoqdJc3JV1WbaL-BacrdkQPck1eKGbMPo,1098
96
- mcpbr-0.5.0.data/data/mcpbr/data/templates/filesystem.yaml,sha256=1p6Z6ChViFYHAODYD71JFst6gdhR5y5rnWNf7Pp5zOY,1091
97
- mcpbr-0.5.0.data/data/mcpbr/data/templates/github.yaml,sha256=uzPwq5_loFegvH6RNov1MQclbBiFBgYWzpiKLfEN9H4,1133
98
- mcpbr-0.5.0.data/data/mcpbr/data/templates/google-maps.yaml,sha256=ldR7E9UmuAA-3nJZ1SShD7PhG0_AwDJOSYuy19hQ6cI,1116
99
- mcpbr-0.5.0.data/data/mcpbr/data/templates/postgres.yaml,sha256=r6R1069BhV4ADQGPZ-T9r6xMNwbr2yrNh8-IHPb4XiI,1178
100
- mcpbr-0.5.0.data/data/mcpbr/data/templates/slack.yaml,sha256=dBn_YqlFJMJai_55sRDb4hXClgxRpcyYTlWl4LBkpuo,1072
101
- mcpbr-0.5.0.data/data/mcpbr/data/templates/sqlite.yaml,sha256=UR5yN9f8v_BC6oskny2xMldHWzZrB9b_PpFSmv5eccg,1080
102
- mcpbr-0.5.0.dist-info/METADATA,sha256=fMqq-Q3zU5arV5f777AXScxNJ2C7sHAEbUqliT7rOn4,55068
103
- mcpbr-0.5.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
104
- mcpbr-0.5.0.dist-info/entry_points.txt,sha256=lLL8icujqBF36V9bF4gfaB2at4cFKCiv2IdJ1i5hT9U,41
105
- mcpbr-0.5.0.dist-info/licenses/LICENSE,sha256=mcXLPreEXzD-816yLKmocCPr9_k3gFFo62TjrSuKkIQ,1075
106
- mcpbr-0.5.0.dist-info/RECORD,,
95
+ mcpbr-0.5.1.data/data/mcpbr/data/templates/brave-search.yaml,sha256=PYHXJOaDqYKoqdJc3JV1WbaL-BacrdkQPck1eKGbMPo,1098
96
+ mcpbr-0.5.1.data/data/mcpbr/data/templates/filesystem.yaml,sha256=1p6Z6ChViFYHAODYD71JFst6gdhR5y5rnWNf7Pp5zOY,1091
97
+ mcpbr-0.5.1.data/data/mcpbr/data/templates/github.yaml,sha256=uzPwq5_loFegvH6RNov1MQclbBiFBgYWzpiKLfEN9H4,1133
98
+ mcpbr-0.5.1.data/data/mcpbr/data/templates/google-maps.yaml,sha256=ldR7E9UmuAA-3nJZ1SShD7PhG0_AwDJOSYuy19hQ6cI,1116
99
+ mcpbr-0.5.1.data/data/mcpbr/data/templates/postgres.yaml,sha256=r6R1069BhV4ADQGPZ-T9r6xMNwbr2yrNh8-IHPb4XiI,1178
100
+ mcpbr-0.5.1.data/data/mcpbr/data/templates/slack.yaml,sha256=dBn_YqlFJMJai_55sRDb4hXClgxRpcyYTlWl4LBkpuo,1072
101
+ mcpbr-0.5.1.data/data/mcpbr/data/templates/sqlite.yaml,sha256=UR5yN9f8v_BC6oskny2xMldHWzZrB9b_PpFSmv5eccg,1080
102
+ mcpbr-0.5.1.dist-info/METADATA,sha256=1iupVSrsq687pZ0s77Hu5q0aDex74p-x7ODS876ey3E,55068
103
+ mcpbr-0.5.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
104
+ mcpbr-0.5.1.dist-info/entry_points.txt,sha256=lLL8icujqBF36V9bF4gfaB2at4cFKCiv2IdJ1i5hT9U,41
105
+ mcpbr-0.5.1.dist-info/licenses/LICENSE,sha256=mcXLPreEXzD-816yLKmocCPr9_k3gFFo62TjrSuKkIQ,1075
106
+ mcpbr-0.5.1.dist-info/RECORD,,
File without changes