mcpbr 0.4.16__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mcpbr/config_migration.py +470 -0
- mcpbr/config_wizard.py +647 -0
- mcpbr/dashboard.py +619 -0
- mcpbr/dataset_streaming.py +491 -0
- mcpbr/docker_cache.py +539 -0
- mcpbr/docker_prewarm.py +369 -0
- mcpbr/dry_run.py +532 -0
- mcpbr/formatting.py +444 -0
- mcpbr/harness.py +38 -4
- mcpbr/resource_limits.py +487 -0
- mcpbr/result_streaming.py +519 -0
- mcpbr/task_batching.py +403 -0
- mcpbr/task_scheduler.py +468 -0
- {mcpbr-0.4.16.dist-info → mcpbr-0.5.0.dist-info}/METADATA +1 -1
- {mcpbr-0.4.16.dist-info → mcpbr-0.5.0.dist-info}/RECORD +25 -13
- {mcpbr-0.4.16.data → mcpbr-0.5.0.data}/data/mcpbr/data/templates/brave-search.yaml +0 -0
- {mcpbr-0.4.16.data → mcpbr-0.5.0.data}/data/mcpbr/data/templates/filesystem.yaml +0 -0
- {mcpbr-0.4.16.data → mcpbr-0.5.0.data}/data/mcpbr/data/templates/github.yaml +0 -0
- {mcpbr-0.4.16.data → mcpbr-0.5.0.data}/data/mcpbr/data/templates/google-maps.yaml +0 -0
- {mcpbr-0.4.16.data → mcpbr-0.5.0.data}/data/mcpbr/data/templates/postgres.yaml +0 -0
- {mcpbr-0.4.16.data → mcpbr-0.5.0.data}/data/mcpbr/data/templates/slack.yaml +0 -0
- {mcpbr-0.4.16.data → mcpbr-0.5.0.data}/data/mcpbr/data/templates/sqlite.yaml +0 -0
- {mcpbr-0.4.16.dist-info → mcpbr-0.5.0.dist-info}/WHEEL +0 -0
- {mcpbr-0.4.16.dist-info → mcpbr-0.5.0.dist-info}/entry_points.txt +0 -0
- {mcpbr-0.4.16.dist-info → mcpbr-0.5.0.dist-info}/licenses/LICENSE +0 -0
mcpbr/formatting.py
ADDED
|
@@ -0,0 +1,444 @@
|
|
|
1
|
+
"""Color and formatting options for CLI output.
|
|
2
|
+
|
|
3
|
+
Provides configurable themes and formatting utilities for consistent CLI output
|
|
4
|
+
across the mcpbr tool. Supports the NO_COLOR convention (https://no-color.org/)
|
|
5
|
+
and configurable themes via the MCPBR_THEME environment variable or CLI flags.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import os
|
|
11
|
+
import sys
|
|
12
|
+
from dataclasses import dataclass
|
|
13
|
+
from enum import Enum
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
16
|
+
from rich.console import Console
|
|
17
|
+
from rich.progress import (
|
|
18
|
+
BarColumn,
|
|
19
|
+
MofNCompleteColumn,
|
|
20
|
+
Progress,
|
|
21
|
+
SpinnerColumn,
|
|
22
|
+
TextColumn,
|
|
23
|
+
TimeElapsedColumn,
|
|
24
|
+
TimeRemainingColumn,
|
|
25
|
+
)
|
|
26
|
+
from rich.table import Table
|
|
27
|
+
from rich.text import Text
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class Theme(Enum):
|
|
31
|
+
"""Available output themes.
|
|
32
|
+
|
|
33
|
+
Attributes:
|
|
34
|
+
DEFAULT: Rich colors with bold styles for maximum readability.
|
|
35
|
+
MINIMAL: Subdued colors for less visual noise.
|
|
36
|
+
PLAIN: No formatting or color at all.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
DEFAULT = "default"
|
|
40
|
+
MINIMAL = "minimal"
|
|
41
|
+
PLAIN = "plain"
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@dataclass(frozen=True)
|
|
45
|
+
class ThemeConfig:
|
|
46
|
+
"""Style configuration for a theme.
|
|
47
|
+
|
|
48
|
+
Each field is a Rich markup style string used to format the corresponding
|
|
49
|
+
message category (e.g., ``"bold green"`` for success messages).
|
|
50
|
+
|
|
51
|
+
Attributes:
|
|
52
|
+
success_style: Style for success messages.
|
|
53
|
+
error_style: Style for error messages.
|
|
54
|
+
warning_style: Style for warning messages.
|
|
55
|
+
info_style: Style for informational messages.
|
|
56
|
+
header_style: Style for section headers.
|
|
57
|
+
dim_style: Style for secondary/dimmed text.
|
|
58
|
+
highlight_style: Style for highlighted/emphasized text.
|
|
59
|
+
"""
|
|
60
|
+
|
|
61
|
+
success_style: str = "bold green"
|
|
62
|
+
error_style: str = "bold red"
|
|
63
|
+
warning_style: str = "bold yellow"
|
|
64
|
+
info_style: str = "bold blue"
|
|
65
|
+
header_style: str = "bold magenta"
|
|
66
|
+
dim_style: str = "dim"
|
|
67
|
+
highlight_style: str = "bold cyan"
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
THEME_CONFIGS: dict[Theme, ThemeConfig] = {
|
|
71
|
+
Theme.DEFAULT: ThemeConfig(
|
|
72
|
+
success_style="bold green",
|
|
73
|
+
error_style="bold red",
|
|
74
|
+
warning_style="bold yellow",
|
|
75
|
+
info_style="bold blue",
|
|
76
|
+
header_style="bold magenta",
|
|
77
|
+
dim_style="dim",
|
|
78
|
+
highlight_style="bold cyan",
|
|
79
|
+
),
|
|
80
|
+
Theme.MINIMAL: ThemeConfig(
|
|
81
|
+
success_style="green",
|
|
82
|
+
error_style="red",
|
|
83
|
+
warning_style="yellow",
|
|
84
|
+
info_style="blue",
|
|
85
|
+
header_style="magenta",
|
|
86
|
+
dim_style="dim",
|
|
87
|
+
highlight_style="cyan",
|
|
88
|
+
),
|
|
89
|
+
Theme.PLAIN: ThemeConfig(
|
|
90
|
+
success_style="",
|
|
91
|
+
error_style="",
|
|
92
|
+
warning_style="",
|
|
93
|
+
info_style="",
|
|
94
|
+
header_style="",
|
|
95
|
+
dim_style="",
|
|
96
|
+
highlight_style="",
|
|
97
|
+
),
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def _resolve_theme(theme_name: str | None = None) -> Theme:
|
|
102
|
+
"""Resolve a theme name string to a Theme enum value.
|
|
103
|
+
|
|
104
|
+
Checks the provided name first, then the MCPBR_THEME environment variable,
|
|
105
|
+
and falls back to ``Theme.DEFAULT``.
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
theme_name: Optional theme name (case-insensitive). One of
|
|
109
|
+
``"default"``, ``"minimal"``, or ``"plain"``.
|
|
110
|
+
|
|
111
|
+
Returns:
|
|
112
|
+
The resolved Theme enum value.
|
|
113
|
+
|
|
114
|
+
Raises:
|
|
115
|
+
ValueError: If the theme name is not recognized.
|
|
116
|
+
"""
|
|
117
|
+
name = theme_name or os.environ.get("MCPBR_THEME")
|
|
118
|
+
if name is None:
|
|
119
|
+
return Theme.DEFAULT
|
|
120
|
+
|
|
121
|
+
try:
|
|
122
|
+
return Theme(name.strip().lower())
|
|
123
|
+
except ValueError:
|
|
124
|
+
valid = ", ".join(t.value for t in Theme)
|
|
125
|
+
raise ValueError(f"Unknown theme '{name}'. Valid themes: {valid}") from None
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def detect_color_support(force_color: bool | None = None) -> bool:
|
|
129
|
+
"""Determine whether the current environment supports color output.
|
|
130
|
+
|
|
131
|
+
Resolution order:
|
|
132
|
+
1. ``force_color`` parameter (explicit override).
|
|
133
|
+
2. ``NO_COLOR`` environment variable -- if set (any value), colors are
|
|
134
|
+
disabled per https://no-color.org/.
|
|
135
|
+
3. ``MCPBR_THEME`` environment variable -- if set to ``"plain"``, colors
|
|
136
|
+
are disabled.
|
|
137
|
+
4. Terminal detection -- colors are enabled when stdout is a TTY.
|
|
138
|
+
|
|
139
|
+
Args:
|
|
140
|
+
force_color: Explicit override. ``True`` forces colors on, ``False``
|
|
141
|
+
forces them off, ``None`` uses auto-detection.
|
|
142
|
+
|
|
143
|
+
Returns:
|
|
144
|
+
``True`` if color output should be used, ``False`` otherwise.
|
|
145
|
+
"""
|
|
146
|
+
if force_color is not None:
|
|
147
|
+
return force_color
|
|
148
|
+
|
|
149
|
+
# NO_COLOR convention: any value (including empty string) disables color
|
|
150
|
+
if "NO_COLOR" in os.environ:
|
|
151
|
+
return False
|
|
152
|
+
|
|
153
|
+
# MCPBR_THEME=plain disables color
|
|
154
|
+
theme_env = os.environ.get("MCPBR_THEME", "").strip().lower()
|
|
155
|
+
if theme_env == "plain":
|
|
156
|
+
return False
|
|
157
|
+
|
|
158
|
+
# Auto-detect: color only when stdout is a TTY
|
|
159
|
+
return hasattr(sys.stdout, "isatty") and sys.stdout.isatty()
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
class OutputFormatter:
|
|
163
|
+
"""Formatted output for CLI messages.
|
|
164
|
+
|
|
165
|
+
Provides methods to print and format success, error, warning, info, and
|
|
166
|
+
header messages using Rich markup styles. Also supports table and progress
|
|
167
|
+
bar rendering.
|
|
168
|
+
|
|
169
|
+
Args:
|
|
170
|
+
theme: The theme to use for formatting. Defaults to ``Theme.DEFAULT``.
|
|
171
|
+
force_color: Explicit color override. ``True`` forces colors on,
|
|
172
|
+
``False`` forces them off, ``None`` uses auto-detection.
|
|
173
|
+
console: Optional Rich Console instance. If not provided, one is
|
|
174
|
+
created based on color support settings.
|
|
175
|
+
"""
|
|
176
|
+
|
|
177
|
+
def __init__(
|
|
178
|
+
self,
|
|
179
|
+
theme: Theme = Theme.DEFAULT,
|
|
180
|
+
force_color: bool | None = None,
|
|
181
|
+
console: Console | None = None,
|
|
182
|
+
) -> None:
|
|
183
|
+
self._theme = theme
|
|
184
|
+
self._config = THEME_CONFIGS[theme]
|
|
185
|
+
self._color_enabled = detect_color_support(force_color)
|
|
186
|
+
|
|
187
|
+
if console is not None:
|
|
188
|
+
self._console = console
|
|
189
|
+
else:
|
|
190
|
+
# When color is disabled, use no_color=True so Rich strips markup
|
|
191
|
+
self._console = Console(no_color=not self._color_enabled)
|
|
192
|
+
|
|
193
|
+
@property
|
|
194
|
+
def theme(self) -> Theme:
|
|
195
|
+
"""The active theme."""
|
|
196
|
+
return self._theme
|
|
197
|
+
|
|
198
|
+
@property
|
|
199
|
+
def config(self) -> ThemeConfig:
|
|
200
|
+
"""The active theme configuration."""
|
|
201
|
+
return self._config
|
|
202
|
+
|
|
203
|
+
@property
|
|
204
|
+
def color_enabled(self) -> bool:
|
|
205
|
+
"""Whether color output is enabled."""
|
|
206
|
+
return self._color_enabled
|
|
207
|
+
|
|
208
|
+
@property
|
|
209
|
+
def console(self) -> Console:
|
|
210
|
+
"""The underlying Rich console."""
|
|
211
|
+
return self._console
|
|
212
|
+
|
|
213
|
+
# ------------------------------------------------------------------
|
|
214
|
+
# Print methods (write directly to console)
|
|
215
|
+
# ------------------------------------------------------------------
|
|
216
|
+
|
|
217
|
+
def success(self, message: str) -> None:
|
|
218
|
+
"""Print a success message.
|
|
219
|
+
|
|
220
|
+
Args:
|
|
221
|
+
message: The message text.
|
|
222
|
+
"""
|
|
223
|
+
self._print_styled(message, self._config.success_style, prefix="[ok]")
|
|
224
|
+
|
|
225
|
+
def error(self, message: str) -> None:
|
|
226
|
+
"""Print an error message.
|
|
227
|
+
|
|
228
|
+
Args:
|
|
229
|
+
message: The message text.
|
|
230
|
+
"""
|
|
231
|
+
self._print_styled(message, self._config.error_style, prefix="[error]")
|
|
232
|
+
|
|
233
|
+
def warning(self, message: str) -> None:
|
|
234
|
+
"""Print a warning message.
|
|
235
|
+
|
|
236
|
+
Args:
|
|
237
|
+
message: The message text.
|
|
238
|
+
"""
|
|
239
|
+
self._print_styled(message, self._config.warning_style, prefix="[warn]")
|
|
240
|
+
|
|
241
|
+
def info(self, message: str) -> None:
|
|
242
|
+
"""Print an informational message.
|
|
243
|
+
|
|
244
|
+
Args:
|
|
245
|
+
message: The message text.
|
|
246
|
+
"""
|
|
247
|
+
self._print_styled(message, self._config.info_style, prefix="[info]")
|
|
248
|
+
|
|
249
|
+
def header(self, message: str) -> None:
|
|
250
|
+
"""Print a section header.
|
|
251
|
+
|
|
252
|
+
Args:
|
|
253
|
+
message: The header text.
|
|
254
|
+
"""
|
|
255
|
+
self._print_styled(message, self._config.header_style)
|
|
256
|
+
|
|
257
|
+
# ------------------------------------------------------------------
|
|
258
|
+
# Format methods (return styled strings without printing)
|
|
259
|
+
# ------------------------------------------------------------------
|
|
260
|
+
|
|
261
|
+
def format_success(self, message: str) -> str:
|
|
262
|
+
"""Return a Rich-markup formatted success string.
|
|
263
|
+
|
|
264
|
+
Args:
|
|
265
|
+
message: The message text.
|
|
266
|
+
|
|
267
|
+
Returns:
|
|
268
|
+
Formatted string with Rich markup tags, or plain text when
|
|
269
|
+
colors are disabled.
|
|
270
|
+
"""
|
|
271
|
+
return self._format_styled(message, self._config.success_style, prefix="[ok]")
|
|
272
|
+
|
|
273
|
+
def format_error(self, message: str) -> str:
|
|
274
|
+
"""Return a Rich-markup formatted error string.
|
|
275
|
+
|
|
276
|
+
Args:
|
|
277
|
+
message: The message text.
|
|
278
|
+
|
|
279
|
+
Returns:
|
|
280
|
+
Formatted string with Rich markup tags, or plain text when
|
|
281
|
+
colors are disabled.
|
|
282
|
+
"""
|
|
283
|
+
return self._format_styled(message, self._config.error_style, prefix="[error]")
|
|
284
|
+
|
|
285
|
+
def format_warning(self, message: str) -> str:
|
|
286
|
+
"""Return a Rich-markup formatted warning string.
|
|
287
|
+
|
|
288
|
+
Args:
|
|
289
|
+
message: The message text.
|
|
290
|
+
|
|
291
|
+
Returns:
|
|
292
|
+
Formatted string with Rich markup tags, or plain text when
|
|
293
|
+
colors are disabled.
|
|
294
|
+
"""
|
|
295
|
+
return self._format_styled(message, self._config.warning_style, prefix="[warn]")
|
|
296
|
+
|
|
297
|
+
def format_info(self, message: str) -> str:
|
|
298
|
+
"""Return a Rich-markup formatted info string.
|
|
299
|
+
|
|
300
|
+
Args:
|
|
301
|
+
message: The message text.
|
|
302
|
+
|
|
303
|
+
Returns:
|
|
304
|
+
Formatted string with Rich markup tags, or plain text when
|
|
305
|
+
colors are disabled.
|
|
306
|
+
"""
|
|
307
|
+
return self._format_styled(message, self._config.info_style, prefix="[info]")
|
|
308
|
+
|
|
309
|
+
def format_header(self, message: str) -> str:
|
|
310
|
+
"""Return a Rich-markup formatted header string.
|
|
311
|
+
|
|
312
|
+
Args:
|
|
313
|
+
message: The message text.
|
|
314
|
+
|
|
315
|
+
Returns:
|
|
316
|
+
Formatted string with Rich markup tags, or plain text when
|
|
317
|
+
colors are disabled.
|
|
318
|
+
"""
|
|
319
|
+
return self._format_styled(message, self._config.header_style)
|
|
320
|
+
|
|
321
|
+
# ------------------------------------------------------------------
|
|
322
|
+
# Table rendering
|
|
323
|
+
# ------------------------------------------------------------------
|
|
324
|
+
|
|
325
|
+
def table(
|
|
326
|
+
self,
|
|
327
|
+
title: str,
|
|
328
|
+
columns: list[str],
|
|
329
|
+
rows: list[list[Any]],
|
|
330
|
+
) -> None:
|
|
331
|
+
"""Print a formatted Rich table.
|
|
332
|
+
|
|
333
|
+
Args:
|
|
334
|
+
title: Table title displayed above the table.
|
|
335
|
+
columns: List of column header names.
|
|
336
|
+
rows: List of rows, where each row is a list of cell values.
|
|
337
|
+
Values are converted to strings automatically.
|
|
338
|
+
"""
|
|
339
|
+
tbl = Table(title=title, show_header=True, header_style=self._config.header_style)
|
|
340
|
+
for col in columns:
|
|
341
|
+
tbl.add_column(col)
|
|
342
|
+
for row in rows:
|
|
343
|
+
tbl.add_row(*(str(cell) for cell in row))
|
|
344
|
+
self._console.print(tbl)
|
|
345
|
+
|
|
346
|
+
# ------------------------------------------------------------------
|
|
347
|
+
# Progress bar
|
|
348
|
+
# ------------------------------------------------------------------
|
|
349
|
+
|
|
350
|
+
def progress_bar(self) -> Progress:
|
|
351
|
+
"""Return a configured Rich Progress instance.
|
|
352
|
+
|
|
353
|
+
Returns:
|
|
354
|
+
A ``rich.progress.Progress`` object with spinner, description,
|
|
355
|
+
bar, completion count, elapsed time, and remaining time columns.
|
|
356
|
+
"""
|
|
357
|
+
return Progress(
|
|
358
|
+
SpinnerColumn(),
|
|
359
|
+
TextColumn("[progress.description]{task.description}"),
|
|
360
|
+
BarColumn(),
|
|
361
|
+
MofNCompleteColumn(),
|
|
362
|
+
TimeElapsedColumn(),
|
|
363
|
+
TimeRemainingColumn(),
|
|
364
|
+
console=self._console,
|
|
365
|
+
)
|
|
366
|
+
|
|
367
|
+
# ------------------------------------------------------------------
|
|
368
|
+
# Internal helpers
|
|
369
|
+
# ------------------------------------------------------------------
|
|
370
|
+
|
|
371
|
+
def _print_styled(self, message: str, style: str, prefix: str = "") -> None:
|
|
372
|
+
"""Print a message with a Rich style and optional prefix.
|
|
373
|
+
|
|
374
|
+
Uses ``rich.text.Text`` objects throughout to prevent Rich from
|
|
375
|
+
interpreting bracket-style prefixes (e.g. ``[ok]``) as markup tags.
|
|
376
|
+
|
|
377
|
+
Args:
|
|
378
|
+
message: The message text.
|
|
379
|
+
style: Rich style string (e.g., ``"bold green"``).
|
|
380
|
+
prefix: Optional prefix tag like ``"[ok]"`` or ``"[error]"``.
|
|
381
|
+
"""
|
|
382
|
+
text = Text()
|
|
383
|
+
if not self._color_enabled or not style:
|
|
384
|
+
if prefix:
|
|
385
|
+
text.append(f"{prefix} ")
|
|
386
|
+
text.append(message)
|
|
387
|
+
else:
|
|
388
|
+
if prefix:
|
|
389
|
+
text.append(f"{prefix} ", style=style)
|
|
390
|
+
text.append(message, style=style)
|
|
391
|
+
self._console.print(text)
|
|
392
|
+
|
|
393
|
+
def _format_styled(self, message: str, style: str, prefix: str = "") -> str:
|
|
394
|
+
"""Return a message formatted with Rich markup.
|
|
395
|
+
|
|
396
|
+
When colors are disabled or the style is empty, returns plain text.
|
|
397
|
+
|
|
398
|
+
Args:
|
|
399
|
+
message: The message text.
|
|
400
|
+
style: Rich style string.
|
|
401
|
+
prefix: Optional prefix tag.
|
|
402
|
+
|
|
403
|
+
Returns:
|
|
404
|
+
Formatted string.
|
|
405
|
+
"""
|
|
406
|
+
if not self._color_enabled or not style:
|
|
407
|
+
return f"{prefix} {message}" if prefix else message
|
|
408
|
+
|
|
409
|
+
if prefix:
|
|
410
|
+
return f"[{style}]{prefix} {message}[/{style}]"
|
|
411
|
+
return f"[{style}]{message}[/{style}]"
|
|
412
|
+
|
|
413
|
+
|
|
414
|
+
def get_formatter(
|
|
415
|
+
theme: str | None = None,
|
|
416
|
+
no_color: bool = False,
|
|
417
|
+
console: Console | None = None,
|
|
418
|
+
) -> OutputFormatter:
|
|
419
|
+
"""Factory function to create a configured OutputFormatter.
|
|
420
|
+
|
|
421
|
+
This is the primary entry point for obtaining a formatter instance.
|
|
422
|
+
It resolves the theme from the provided argument, the ``MCPBR_THEME``
|
|
423
|
+
environment variable, or the default theme. It also respects the
|
|
424
|
+
``NO_COLOR`` environment variable and the ``no_color`` parameter.
|
|
425
|
+
|
|
426
|
+
Args:
|
|
427
|
+
theme: Theme name (``"default"``, ``"minimal"``, or ``"plain"``).
|
|
428
|
+
Falls back to the ``MCPBR_THEME`` environment variable, then
|
|
429
|
+
``"default"``.
|
|
430
|
+
no_color: If ``True``, forces color off regardless of other settings.
|
|
431
|
+
console: Optional Rich Console instance to use.
|
|
432
|
+
|
|
433
|
+
Returns:
|
|
434
|
+
A configured ``OutputFormatter`` instance.
|
|
435
|
+
|
|
436
|
+
Raises:
|
|
437
|
+
ValueError: If the theme name is not recognized.
|
|
438
|
+
"""
|
|
439
|
+
resolved_theme = _resolve_theme(theme)
|
|
440
|
+
force_color: bool | None = None
|
|
441
|
+
if no_color:
|
|
442
|
+
force_color = False
|
|
443
|
+
|
|
444
|
+
return OutputFormatter(theme=resolved_theme, force_color=force_color, console=console)
|
mcpbr/harness.py
CHANGED
|
@@ -418,6 +418,7 @@ async def _run_mcp_evaluation(
|
|
|
418
418
|
|
|
419
419
|
start_time = time.time()
|
|
420
420
|
env: TaskEnvironment | None = None
|
|
421
|
+
agent_result: AgentResult | None = None
|
|
421
422
|
try:
|
|
422
423
|
# Track Docker environment creation time
|
|
423
424
|
docker_start = time.time()
|
|
@@ -480,10 +481,15 @@ async def _run_mcp_evaluation(
|
|
|
480
481
|
return result
|
|
481
482
|
|
|
482
483
|
except asyncio.TimeoutError:
|
|
483
|
-
# Note: The agent harness should have captured partial statistics in the AgentResult
|
|
484
|
-
# before raising TimeoutError, but this is a fallback for unexpected timeout locations
|
|
485
484
|
end_time = time.time()
|
|
486
485
|
runtime_seconds = end_time - start_time
|
|
486
|
+
# Preserve agent metrics if the agent completed before the timeout
|
|
487
|
+
# (timeout may have occurred during evaluation, not during agent solve)
|
|
488
|
+
if agent_result is not None:
|
|
489
|
+
result = agent_result_to_dict(agent_result, None, config.model, runtime_seconds)
|
|
490
|
+
result["status"] = "timeout"
|
|
491
|
+
result["error"] = "Evaluation timed out after agent completed"
|
|
492
|
+
return result
|
|
487
493
|
cost = calculate_cost(config.model, 0, 0)
|
|
488
494
|
return {
|
|
489
495
|
"resolved": False,
|
|
@@ -499,6 +505,11 @@ async def _run_mcp_evaluation(
|
|
|
499
505
|
except Exception as e:
|
|
500
506
|
end_time = time.time()
|
|
501
507
|
runtime_seconds = end_time - start_time
|
|
508
|
+
# Preserve agent metrics if the agent completed before the error
|
|
509
|
+
if agent_result is not None:
|
|
510
|
+
result = agent_result_to_dict(agent_result, None, config.model, runtime_seconds)
|
|
511
|
+
result["error"] = str(e)
|
|
512
|
+
return result
|
|
502
513
|
cost = calculate_cost(config.model, 0, 0)
|
|
503
514
|
return {
|
|
504
515
|
"resolved": False,
|
|
@@ -562,6 +573,7 @@ async def _run_baseline_evaluation(
|
|
|
562
573
|
|
|
563
574
|
start_time = time.time()
|
|
564
575
|
env: TaskEnvironment | None = None
|
|
576
|
+
agent_result: AgentResult | None = None
|
|
565
577
|
try:
|
|
566
578
|
# Track Docker environment creation time
|
|
567
579
|
docker_start = time.time()
|
|
@@ -622,10 +634,15 @@ async def _run_baseline_evaluation(
|
|
|
622
634
|
return result
|
|
623
635
|
|
|
624
636
|
except asyncio.TimeoutError:
|
|
625
|
-
# Note: The agent harness should have captured partial statistics in the AgentResult
|
|
626
|
-
# before raising TimeoutError, but this is a fallback for unexpected timeout locations
|
|
627
637
|
end_time = time.time()
|
|
628
638
|
runtime_seconds = end_time - start_time
|
|
639
|
+
# Preserve agent metrics if the agent completed before the timeout
|
|
640
|
+
# (timeout may have occurred during evaluation, not during agent solve)
|
|
641
|
+
if agent_result is not None:
|
|
642
|
+
result = agent_result_to_dict(agent_result, None, config.model, runtime_seconds)
|
|
643
|
+
result["status"] = "timeout"
|
|
644
|
+
result["error"] = "Evaluation timed out after agent completed"
|
|
645
|
+
return result
|
|
629
646
|
cost = calculate_cost(config.model, 0, 0)
|
|
630
647
|
return {
|
|
631
648
|
"resolved": False,
|
|
@@ -641,6 +658,11 @@ async def _run_baseline_evaluation(
|
|
|
641
658
|
except Exception as e:
|
|
642
659
|
end_time = time.time()
|
|
643
660
|
runtime_seconds = end_time - start_time
|
|
661
|
+
# Preserve agent metrics if the agent completed before the error
|
|
662
|
+
if agent_result is not None:
|
|
663
|
+
result = agent_result_to_dict(agent_result, None, config.model, runtime_seconds)
|
|
664
|
+
result["error"] = str(e)
|
|
665
|
+
return result
|
|
644
666
|
cost = calculate_cost(config.model, 0, 0)
|
|
645
667
|
return {
|
|
646
668
|
"resolved": False,
|
|
@@ -1182,6 +1204,18 @@ async def run_evaluation(
|
|
|
1182
1204
|
progress.stop()
|
|
1183
1205
|
finally:
|
|
1184
1206
|
await docker_manager.cleanup_all()
|
|
1207
|
+
# Force-shutdown the default executor to prevent asyncio.run() from
|
|
1208
|
+
# hanging during cleanup. Docker SDK background threads (urllib3
|
|
1209
|
+
# connection pool) may linger after client.close(), causing
|
|
1210
|
+
# executor.shutdown(wait=True) to block indefinitely.
|
|
1211
|
+
try:
|
|
1212
|
+
loop = asyncio.get_running_loop()
|
|
1213
|
+
executor = getattr(loop, "_default_executor", None)
|
|
1214
|
+
if executor is not None:
|
|
1215
|
+
executor.shutdown(wait=False, cancel_futures=True)
|
|
1216
|
+
loop._default_executor = None
|
|
1217
|
+
except RuntimeError as exc:
|
|
1218
|
+
console.print(f"[yellow]Default executor shutdown skipped: {exc}[/yellow]")
|
|
1185
1219
|
|
|
1186
1220
|
# Check if we're in comparison mode
|
|
1187
1221
|
if config.comparison_mode:
|