mcpbr 0.5.0__py3-none-any.whl → 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,207 @@
1
+ """Structured logging configuration for mcpbr.
2
+
3
+ Provides JSON-structured and human-readable log formatting, log file rotation,
4
+ environment variable overrides, and contextual log fields for benchmark runs.
5
+ """
6
+
7
+ import json
8
+ import logging
9
+ import logging.handlers
10
+ import os
11
+ from pathlib import Path
12
+ from typing import Any
13
+
14
+
15
+ class StructuredFormatter(logging.Formatter):
16
+ """JSON-structured log formatter.
17
+
18
+ Produces one JSON object per log record, including optional context fields
19
+ like task_id and benchmark when they are attached to the record.
20
+ """
21
+
22
+ def format(self, record: logging.LogRecord) -> str:
23
+ """Format a log record as a JSON string.
24
+
25
+ Args:
26
+ record: The log record to format.
27
+
28
+ Returns:
29
+ A JSON-encoded string representing the log entry.
30
+ """
31
+ log_data: dict[str, Any] = {
32
+ "timestamp": self.formatTime(record),
33
+ "level": record.levelname,
34
+ "logger": record.name,
35
+ "message": record.getMessage(),
36
+ }
37
+ if hasattr(record, "task_id"):
38
+ log_data["task_id"] = record.task_id
39
+ if hasattr(record, "benchmark"):
40
+ log_data["benchmark"] = record.benchmark
41
+ if record.exc_info and record.exc_info[0] is not None:
42
+ log_data["exception"] = self.formatException(record.exc_info)
43
+ return json.dumps(log_data)
44
+
45
+
46
+ class HumanFormatter(logging.Formatter):
47
+ """Human-readable log formatter.
48
+
49
+ Produces log lines in the format: [LEVEL] logger.name: message
50
+ """
51
+
52
+ FORMAT = "[%(levelname)s] %(name)s: %(message)s"
53
+
54
+ def __init__(self) -> None:
55
+ """Initialize the formatter with the human-readable format string."""
56
+ super().__init__(self.FORMAT)
57
+
58
+
59
+ def setup_logging(
60
+ level: str = "INFO",
61
+ log_file: Path | None = None,
62
+ structured: bool = False,
63
+ max_bytes: int = 10_485_760,
64
+ backup_count: int = 5,
65
+ debug: bool = False,
66
+ quiet: bool = False,
67
+ ) -> None:
68
+ """Configure mcpbr logging.
69
+
70
+ Sets up the 'mcpbr' root logger with console and optional file handlers.
71
+ Supports structured JSON output, log rotation, and environment variable
72
+ overrides via MCPBR_LOG_LEVEL.
73
+
74
+ Args:
75
+ level: Default log level string (e.g., 'DEBUG', 'INFO', 'WARNING', 'ERROR').
76
+ log_file: Optional path to a log file. If provided, a rotating file handler
77
+ is added. Parent directories are created automatically.
78
+ structured: If True, use JSON-structured formatting. Otherwise use
79
+ human-readable formatting.
80
+ max_bytes: Maximum log file size in bytes before rotation (default 10 MB).
81
+ backup_count: Number of rotated backup files to keep (default 5).
82
+ debug: If True, override level to DEBUG.
83
+ quiet: If True, override level to WARNING (suppresses INFO and below).
84
+ """
85
+ mcpbr_logger = logging.getLogger("mcpbr")
86
+
87
+ # Clear existing handlers to allow reconfiguration
88
+ mcpbr_logger.handlers.clear()
89
+
90
+ # Determine effective log level
91
+ # Priority: env var > debug/quiet flags > level parameter
92
+ env_level = os.environ.get("MCPBR_LOG_LEVEL")
93
+ if env_level:
94
+ effective_level = getattr(logging, env_level.upper(), logging.INFO)
95
+ elif debug:
96
+ effective_level = logging.DEBUG
97
+ elif quiet:
98
+ effective_level = logging.WARNING
99
+ else:
100
+ effective_level = getattr(logging, level.upper(), logging.INFO)
101
+
102
+ mcpbr_logger.setLevel(effective_level)
103
+
104
+ # Choose formatter
105
+ if structured:
106
+ formatter: logging.Formatter = StructuredFormatter()
107
+ else:
108
+ formatter = HumanFormatter()
109
+
110
+ # Console handler (always added)
111
+ console_handler = logging.StreamHandler()
112
+ console_handler.setFormatter(formatter)
113
+ mcpbr_logger.addHandler(console_handler)
114
+
115
+ # File handler (optional, with rotation)
116
+ if log_file is not None:
117
+ log_path = Path(log_file)
118
+ log_path.parent.mkdir(parents=True, exist_ok=True)
119
+
120
+ file_handler = logging.handlers.RotatingFileHandler(
121
+ filename=str(log_path),
122
+ maxBytes=max_bytes,
123
+ backupCount=backup_count,
124
+ )
125
+ file_handler.setFormatter(formatter)
126
+ mcpbr_logger.addHandler(file_handler)
127
+
128
+ # Prevent propagation to the root logger to avoid duplicate output
129
+ mcpbr_logger.propagate = False
130
+
131
+
132
+ def get_logger(name: str) -> logging.Logger:
133
+ """Get a named mcpbr logger.
134
+
135
+ Returns a logger under the 'mcpbr' namespace. For example,
136
+ get_logger('evaluation') returns the logger 'mcpbr.evaluation'.
137
+
138
+ Args:
139
+ name: The logger name suffix (will be prefixed with 'mcpbr.').
140
+
141
+ Returns:
142
+ A logging.Logger instance.
143
+ """
144
+ return logging.getLogger(f"mcpbr.{name}")
145
+
146
+
147
+ class _ContextFilter(logging.Filter):
148
+ """A logging filter that injects context fields into log records."""
149
+
150
+ def __init__(self, **kwargs: Any) -> None:
151
+ """Initialize the filter with context key-value pairs.
152
+
153
+ Args:
154
+ **kwargs: Arbitrary context fields to add to every log record.
155
+ """
156
+ super().__init__()
157
+ self.context = kwargs
158
+
159
+ def filter(self, record: logging.LogRecord) -> bool:
160
+ """Add context fields to the log record.
161
+
162
+ Args:
163
+ record: The log record being processed.
164
+
165
+ Returns:
166
+ Always returns True (never filters out records).
167
+ """
168
+ for key, value in self.context.items():
169
+ setattr(record, key, value)
170
+ return True
171
+
172
+
173
+ class LogContext:
174
+ """Add structured context fields to log records via a filter.
175
+
176
+ Use as a context manager to temporarily inject fields like task_id and
177
+ benchmark into all log records produced by the given logger.
178
+
179
+ Example:
180
+ logger = get_logger("evaluation")
181
+ with LogContext(logger, task_id="django-12345", benchmark="swebench"):
182
+ logger.info("Starting evaluation")
183
+ # Log record will include task_id and benchmark fields
184
+ """
185
+
186
+ def __init__(self, logger: logging.Logger, **kwargs: Any) -> None:
187
+ """Initialize the log context.
188
+
189
+ Args:
190
+ logger: The logger to attach context fields to.
191
+ **kwargs: Context fields to add (e.g., task_id, benchmark).
192
+ """
193
+ self.logger = logger
194
+ self.kwargs = kwargs
195
+ self._filter: _ContextFilter | None = None
196
+
197
+ def __enter__(self) -> "LogContext":
198
+ """Enter the context and attach the filter to the logger."""
199
+ self._filter = _ContextFilter(**self.kwargs)
200
+ self.logger.addFilter(self._filter)
201
+ return self
202
+
203
+ def __exit__(self, *args: Any) -> None:
204
+ """Exit the context and remove the filter from the logger."""
205
+ if self._filter is not None:
206
+ self.logger.removeFilter(self._filter)
207
+ self._filter = None
mcpbr/models.py CHANGED
@@ -61,6 +61,72 @@ SUPPORTED_MODELS: dict[str, ModelInfo] = {
61
61
  context_window=200000,
62
62
  notes="Resolves to latest Haiku model",
63
63
  ),
64
+ # OpenAI models
65
+ "gpt-4o": ModelInfo(
66
+ id="gpt-4o",
67
+ provider="OpenAI",
68
+ display_name="GPT-4o",
69
+ context_window=128000,
70
+ notes="Most capable OpenAI model with vision",
71
+ ),
72
+ "gpt-4-turbo": ModelInfo(
73
+ id="gpt-4-turbo",
74
+ provider="OpenAI",
75
+ display_name="GPT-4 Turbo",
76
+ context_window=128000,
77
+ notes="High capability with faster inference",
78
+ ),
79
+ "gpt-4o-mini": ModelInfo(
80
+ id="gpt-4o-mini",
81
+ provider="OpenAI",
82
+ display_name="GPT-4o Mini",
83
+ context_window=128000,
84
+ notes="Compact and cost-effective GPT-4o variant",
85
+ ),
86
+ # Google Gemini models
87
+ "gemini-2.0-flash": ModelInfo(
88
+ id="gemini-2.0-flash",
89
+ provider="Google",
90
+ display_name="Gemini 2.0 Flash",
91
+ context_window=1048576,
92
+ notes="Latest fast Gemini model",
93
+ ),
94
+ "gemini-1.5-pro": ModelInfo(
95
+ id="gemini-1.5-pro",
96
+ provider="Google",
97
+ display_name="Gemini 1.5 Pro",
98
+ context_window=2097152,
99
+ notes="High-capability model with 2M token context",
100
+ ),
101
+ "gemini-1.5-flash": ModelInfo(
102
+ id="gemini-1.5-flash",
103
+ provider="Google",
104
+ display_name="Gemini 1.5 Flash",
105
+ context_window=1048576,
106
+ notes="Fast and cost-effective Gemini model",
107
+ ),
108
+ # Alibaba Qwen models (via DashScope)
109
+ "qwen-plus": ModelInfo(
110
+ id="qwen-plus",
111
+ provider="Alibaba",
112
+ display_name="Qwen Plus",
113
+ context_window=131072,
114
+ notes="Balanced Qwen model for general tasks",
115
+ ),
116
+ "qwen-turbo": ModelInfo(
117
+ id="qwen-turbo",
118
+ provider="Alibaba",
119
+ display_name="Qwen Turbo",
120
+ context_window=131072,
121
+ notes="Fast and cost-effective Qwen model",
122
+ ),
123
+ "qwen-max": ModelInfo(
124
+ id="qwen-max",
125
+ provider="Alibaba",
126
+ display_name="Qwen Max",
127
+ context_window=131072,
128
+ notes="Most capable Qwen model",
129
+ ),
64
130
  }
65
131
 
66
132
  DEFAULT_MODEL = "sonnet"
mcpbr/preflight.py CHANGED
@@ -5,10 +5,11 @@ import shutil
5
5
  from dataclasses import dataclass
6
6
  from pathlib import Path
7
7
 
8
- import docker
9
8
  from rich.console import Console
10
9
  from rich.table import Table
11
10
 
11
+ import docker
12
+
12
13
  from .config import HarnessConfig
13
14
 
14
15
  console = Console()
mcpbr/pricing.py CHANGED
@@ -6,6 +6,9 @@ calculating API costs based on token usage.
6
6
  Pricing is per million tokens (MTok) and is current as of January 2026.
7
7
  Prices may change - check official provider documentation for updates:
8
8
  - Anthropic: https://www.anthropic.com/pricing
9
+ - OpenAI: https://openai.com/pricing
10
+ - Google: https://ai.google.dev/pricing
11
+ - Alibaba (Qwen): https://www.alibabacloud.com/help/en/model-studio/developer-reference/billing
9
12
  """
10
13
 
11
14
  from dataclasses import dataclass
@@ -90,6 +93,75 @@ MODEL_PRICING: dict[str, ModelPricing] = {
90
93
  cache_read_price_per_mtok=0.10,
91
94
  notes="Alias for latest Haiku model",
92
95
  ),
96
+ # OpenAI models
97
+ "gpt-4o": ModelPricing(
98
+ model_id="gpt-4o",
99
+ provider="OpenAI",
100
+ input_price_per_mtok=2.50,
101
+ output_price_per_mtok=10.00,
102
+ notes="Most capable OpenAI model with vision",
103
+ ),
104
+ "gpt-4-turbo": ModelPricing(
105
+ model_id="gpt-4-turbo",
106
+ provider="OpenAI",
107
+ input_price_per_mtok=10.00,
108
+ output_price_per_mtok=30.00,
109
+ notes="High capability with faster inference",
110
+ ),
111
+ "gpt-4o-mini": ModelPricing(
112
+ model_id="gpt-4o-mini",
113
+ provider="OpenAI",
114
+ input_price_per_mtok=0.15,
115
+ output_price_per_mtok=0.60,
116
+ notes="Compact and cost-effective GPT-4o variant",
117
+ ),
118
+ # Google Gemini models
119
+ "gemini-2.0-flash": ModelPricing(
120
+ model_id="gemini-2.0-flash",
121
+ provider="Google",
122
+ input_price_per_mtok=0.10,
123
+ output_price_per_mtok=0.40,
124
+ notes="Latest fast Gemini model",
125
+ ),
126
+ "gemini-1.5-pro": ModelPricing(
127
+ model_id="gemini-1.5-pro",
128
+ provider="Google",
129
+ input_price_per_mtok=1.25,
130
+ output_price_per_mtok=5.00,
131
+ supports_prompt_caching=True,
132
+ cache_creation_price_per_mtok=0.3125,
133
+ cache_read_price_per_mtok=0.3125,
134
+ notes="High-capability model with 2M token context",
135
+ ),
136
+ "gemini-1.5-flash": ModelPricing(
137
+ model_id="gemini-1.5-flash",
138
+ provider="Google",
139
+ input_price_per_mtok=0.075,
140
+ output_price_per_mtok=0.30,
141
+ notes="Fast and cost-effective Gemini model",
142
+ ),
143
+ # Alibaba Qwen models (via DashScope)
144
+ "qwen-plus": ModelPricing(
145
+ model_id="qwen-plus",
146
+ provider="Alibaba",
147
+ input_price_per_mtok=0.40,
148
+ output_price_per_mtok=1.20,
149
+ notes="Base tier (0-256K tokens); scales to $1.20/$3.60 for 256K-1M",
150
+ ),
151
+ "qwen-turbo": ModelPricing(
152
+ model_id="qwen-turbo",
153
+ provider="Alibaba",
154
+ input_price_per_mtok=0.30,
155
+ output_price_per_mtok=0.60,
156
+ notes="Fast Qwen model; pricing may vary by context tier",
157
+ ),
158
+ "qwen-max": ModelPricing(
159
+ model_id="qwen-max",
160
+ provider="Alibaba",
161
+ input_price_per_mtok=1.20,
162
+ output_price_per_mtok=6.00,
163
+ notes="Base tier (0-32K tokens); scales to $2.40/$12.00 for 32K-128K",
164
+ ),
93
165
  }
94
166
 
95
167