mcpbr 0.4.16__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mcpbr/config_migration.py +470 -0
- mcpbr/config_wizard.py +647 -0
- mcpbr/dashboard.py +619 -0
- mcpbr/dataset_streaming.py +491 -0
- mcpbr/docker_cache.py +539 -0
- mcpbr/docker_prewarm.py +369 -0
- mcpbr/dry_run.py +532 -0
- mcpbr/formatting.py +444 -0
- mcpbr/harness.py +38 -4
- mcpbr/resource_limits.py +487 -0
- mcpbr/result_streaming.py +519 -0
- mcpbr/task_batching.py +403 -0
- mcpbr/task_scheduler.py +468 -0
- {mcpbr-0.4.16.dist-info → mcpbr-0.5.0.dist-info}/METADATA +1 -1
- {mcpbr-0.4.16.dist-info → mcpbr-0.5.0.dist-info}/RECORD +25 -13
- {mcpbr-0.4.16.data → mcpbr-0.5.0.data}/data/mcpbr/data/templates/brave-search.yaml +0 -0
- {mcpbr-0.4.16.data → mcpbr-0.5.0.data}/data/mcpbr/data/templates/filesystem.yaml +0 -0
- {mcpbr-0.4.16.data → mcpbr-0.5.0.data}/data/mcpbr/data/templates/github.yaml +0 -0
- {mcpbr-0.4.16.data → mcpbr-0.5.0.data}/data/mcpbr/data/templates/google-maps.yaml +0 -0
- {mcpbr-0.4.16.data → mcpbr-0.5.0.data}/data/mcpbr/data/templates/postgres.yaml +0 -0
- {mcpbr-0.4.16.data → mcpbr-0.5.0.data}/data/mcpbr/data/templates/slack.yaml +0 -0
- {mcpbr-0.4.16.data → mcpbr-0.5.0.data}/data/mcpbr/data/templates/sqlite.yaml +0 -0
- {mcpbr-0.4.16.dist-info → mcpbr-0.5.0.dist-info}/WHEEL +0 -0
- {mcpbr-0.4.16.dist-info → mcpbr-0.5.0.dist-info}/entry_points.txt +0 -0
- {mcpbr-0.4.16.dist-info → mcpbr-0.5.0.dist-info}/licenses/LICENSE +0 -0
mcpbr/task_batching.py
ADDED
|
@@ -0,0 +1,403 @@
|
|
|
1
|
+
"""Task batching with smart scheduling for efficient batch execution.
|
|
2
|
+
|
|
3
|
+
Groups similar benchmark tasks to minimize Docker container restarts and
|
|
4
|
+
maximize resource reuse. Supports multiple batching strategies including
|
|
5
|
+
repo-based, image-based, category-based, fixed-size, and adaptive grouping.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import uuid
|
|
9
|
+
from collections import defaultdict
|
|
10
|
+
from dataclasses import dataclass
|
|
11
|
+
from enum import Enum
|
|
12
|
+
from typing import Any
|
|
13
|
+
|
|
14
|
+
# Estimated overhead per Docker container restart in seconds
|
|
15
|
+
_CONTAINER_RESTART_OVERHEAD_SECONDS = 30.0
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class BatchStrategy(Enum):
|
|
19
|
+
"""Strategy for grouping tasks into batches.
|
|
20
|
+
|
|
21
|
+
Attributes:
|
|
22
|
+
BY_REPO: Group tasks that share the same repository.
|
|
23
|
+
BY_IMAGE: Group tasks that require the same Docker image.
|
|
24
|
+
BY_CATEGORY: Group tasks that belong to the same benchmark category.
|
|
25
|
+
FIXED_SIZE: Split tasks into fixed-size chunks regardless of similarity.
|
|
26
|
+
ADAPTIVE: Dynamically size batches based on task similarity signals.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
BY_REPO = "by_repo"
|
|
30
|
+
BY_IMAGE = "by_image"
|
|
31
|
+
BY_CATEGORY = "by_category"
|
|
32
|
+
FIXED_SIZE = "fixed_size"
|
|
33
|
+
ADAPTIVE = "adaptive"
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@dataclass
|
|
37
|
+
class TaskBatch:
|
|
38
|
+
"""A batch of grouped tasks for efficient execution.
|
|
39
|
+
|
|
40
|
+
Attributes:
|
|
41
|
+
batch_id: Unique identifier for this batch.
|
|
42
|
+
tasks: List of task dictionaries in this batch.
|
|
43
|
+
common_image: Shared Docker image if all tasks use the same one, else None.
|
|
44
|
+
common_repo: Shared repository if all tasks target the same repo, else None.
|
|
45
|
+
batch_size: Number of tasks in this batch.
|
|
46
|
+
estimated_savings_seconds: Estimated time saved by batching vs individual execution.
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
batch_id: str
|
|
50
|
+
tasks: list[dict[str, Any]]
|
|
51
|
+
common_image: str | None = None
|
|
52
|
+
common_repo: str | None = None
|
|
53
|
+
batch_size: int = 0
|
|
54
|
+
estimated_savings_seconds: float = 0.0
|
|
55
|
+
|
|
56
|
+
def __post_init__(self) -> None:
|
|
57
|
+
"""Compute batch_size from tasks if not explicitly set."""
|
|
58
|
+
if self.batch_size == 0 and self.tasks:
|
|
59
|
+
self.batch_size = len(self.tasks)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
@dataclass
|
|
63
|
+
class BatchSavings:
|
|
64
|
+
"""Estimated savings from batching tasks.
|
|
65
|
+
|
|
66
|
+
Attributes:
|
|
67
|
+
total_batches: Total number of batches created.
|
|
68
|
+
avg_batch_size: Average number of tasks per batch.
|
|
69
|
+
estimated_container_reuse: Number of container restarts avoided.
|
|
70
|
+
estimated_time_saved_seconds: Total estimated time saved in seconds.
|
|
71
|
+
"""
|
|
72
|
+
|
|
73
|
+
total_batches: int = 0
|
|
74
|
+
avg_batch_size: float = 0.0
|
|
75
|
+
estimated_container_reuse: int = 0
|
|
76
|
+
estimated_time_saved_seconds: float = 0.0
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class TaskBatcher:
|
|
80
|
+
"""Groups benchmark tasks into batches for efficient execution.
|
|
81
|
+
|
|
82
|
+
Batching reduces Docker container restarts by grouping tasks that share
|
|
83
|
+
common requirements (repository, image, category). Supports multiple
|
|
84
|
+
strategies and configurable batch sizes.
|
|
85
|
+
|
|
86
|
+
Args:
|
|
87
|
+
strategy: Batching strategy to use.
|
|
88
|
+
max_batch_size: Maximum number of tasks per batch.
|
|
89
|
+
min_batch_size: Minimum number of tasks to form a batch. Groups smaller
|
|
90
|
+
than this are still returned as batches (no tasks are dropped).
|
|
91
|
+
|
|
92
|
+
Example:
|
|
93
|
+
>>> batcher = TaskBatcher(strategy=BatchStrategy.BY_REPO, max_batch_size=5)
|
|
94
|
+
>>> tasks = [{"instance_id": "t1", "repo": "org/repo1"}, ...]
|
|
95
|
+
>>> batches = batcher.batch(tasks)
|
|
96
|
+
>>> print(batcher.preview(batches))
|
|
97
|
+
"""
|
|
98
|
+
|
|
99
|
+
def __init__(
|
|
100
|
+
self,
|
|
101
|
+
strategy: BatchStrategy = BatchStrategy.BY_REPO,
|
|
102
|
+
max_batch_size: int = 10,
|
|
103
|
+
min_batch_size: int = 2,
|
|
104
|
+
) -> None:
|
|
105
|
+
"""Initialize the TaskBatcher.
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
strategy: Batching strategy to use.
|
|
109
|
+
max_batch_size: Maximum number of tasks per batch.
|
|
110
|
+
min_batch_size: Minimum number of tasks to form a batch.
|
|
111
|
+
|
|
112
|
+
Raises:
|
|
113
|
+
ValueError: If max_batch_size < 1 or min_batch_size < 1 or
|
|
114
|
+
min_batch_size > max_batch_size.
|
|
115
|
+
"""
|
|
116
|
+
if max_batch_size < 1:
|
|
117
|
+
raise ValueError(f"max_batch_size must be >= 1, got {max_batch_size}")
|
|
118
|
+
if min_batch_size < 1:
|
|
119
|
+
raise ValueError(f"min_batch_size must be >= 1, got {min_batch_size}")
|
|
120
|
+
if min_batch_size > max_batch_size:
|
|
121
|
+
raise ValueError(
|
|
122
|
+
f"min_batch_size ({min_batch_size}) must be <= max_batch_size ({max_batch_size})"
|
|
123
|
+
)
|
|
124
|
+
self.strategy = strategy
|
|
125
|
+
self.max_batch_size = max_batch_size
|
|
126
|
+
self.min_batch_size = min_batch_size
|
|
127
|
+
|
|
128
|
+
def batch(self, tasks: list[dict[str, Any]]) -> list[TaskBatch]:
|
|
129
|
+
"""Group tasks into batches using the configured strategy.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
tasks: List of task dictionaries to batch. Each task should have
|
|
133
|
+
at minimum an ``instance_id`` key. Depending on the strategy,
|
|
134
|
+
``repo``, ``image``, and ``category`` fields are also used.
|
|
135
|
+
|
|
136
|
+
Returns:
|
|
137
|
+
List of TaskBatch objects. Every input task appears in exactly one
|
|
138
|
+
batch. Batches are sorted by descending size for scheduling efficiency.
|
|
139
|
+
"""
|
|
140
|
+
if not tasks:
|
|
141
|
+
return []
|
|
142
|
+
|
|
143
|
+
if self.strategy == BatchStrategy.BY_REPO:
|
|
144
|
+
return self._batch_by_field(tasks, "repo")
|
|
145
|
+
elif self.strategy == BatchStrategy.BY_IMAGE:
|
|
146
|
+
return self._batch_by_field(tasks, "image")
|
|
147
|
+
elif self.strategy == BatchStrategy.BY_CATEGORY:
|
|
148
|
+
return self._batch_by_field(tasks, "category")
|
|
149
|
+
elif self.strategy == BatchStrategy.FIXED_SIZE:
|
|
150
|
+
return self._batch_fixed_size(tasks)
|
|
151
|
+
elif self.strategy == BatchStrategy.ADAPTIVE:
|
|
152
|
+
return self._batch_adaptive(tasks)
|
|
153
|
+
else:
|
|
154
|
+
raise ValueError(f"Unknown batch strategy: {self.strategy}")
|
|
155
|
+
|
|
156
|
+
def estimate_savings(self, batches: list[TaskBatch]) -> BatchSavings:
|
|
157
|
+
"""Estimate time saved by batching compared to individual execution.
|
|
158
|
+
|
|
159
|
+
The savings come primarily from container reuse: tasks in the same batch
|
|
160
|
+
can share a Docker container instead of each requiring a fresh one.
|
|
161
|
+
|
|
162
|
+
Args:
|
|
163
|
+
batches: List of TaskBatch objects to analyze.
|
|
164
|
+
|
|
165
|
+
Returns:
|
|
166
|
+
BatchSavings with estimated metrics.
|
|
167
|
+
"""
|
|
168
|
+
if not batches:
|
|
169
|
+
return BatchSavings()
|
|
170
|
+
|
|
171
|
+
total_tasks = sum(b.batch_size for b in batches)
|
|
172
|
+
total_batches = len(batches)
|
|
173
|
+
avg_batch_size = total_tasks / total_batches if total_batches > 0 else 0.0
|
|
174
|
+
|
|
175
|
+
# Without batching, each task needs its own container restart.
|
|
176
|
+
# With batching, only the first task in each batch needs a restart.
|
|
177
|
+
container_reuse = total_tasks - total_batches
|
|
178
|
+
time_saved = container_reuse * _CONTAINER_RESTART_OVERHEAD_SECONDS
|
|
179
|
+
|
|
180
|
+
return BatchSavings(
|
|
181
|
+
total_batches=total_batches,
|
|
182
|
+
avg_batch_size=round(avg_batch_size, 2),
|
|
183
|
+
estimated_container_reuse=container_reuse,
|
|
184
|
+
estimated_time_saved_seconds=round(time_saved, 2),
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
def preview(self, batches: list[TaskBatch]) -> str:
|
|
188
|
+
"""Generate a formatted preview of the batching plan.
|
|
189
|
+
|
|
190
|
+
Args:
|
|
191
|
+
batches: List of TaskBatch objects to preview.
|
|
192
|
+
|
|
193
|
+
Returns:
|
|
194
|
+
Human-readable string summarizing the batches and estimated savings.
|
|
195
|
+
"""
|
|
196
|
+
if not batches:
|
|
197
|
+
return "No batches to preview."
|
|
198
|
+
|
|
199
|
+
savings = self.estimate_savings(batches)
|
|
200
|
+
lines: list[str] = []
|
|
201
|
+
lines.append(f"Batch Plan ({self.strategy.value})")
|
|
202
|
+
lines.append("=" * 50)
|
|
203
|
+
lines.append(f"Total batches: {savings.total_batches}")
|
|
204
|
+
lines.append(f"Average batch size: {savings.avg_batch_size}")
|
|
205
|
+
lines.append(f"Estimated container reuse: {savings.estimated_container_reuse}")
|
|
206
|
+
lines.append(f"Estimated time saved: {savings.estimated_time_saved_seconds:.1f}s")
|
|
207
|
+
lines.append("")
|
|
208
|
+
|
|
209
|
+
for i, b in enumerate(batches, 1):
|
|
210
|
+
label_parts: list[str] = []
|
|
211
|
+
if b.common_repo:
|
|
212
|
+
label_parts.append(f"repo={b.common_repo}")
|
|
213
|
+
if b.common_image:
|
|
214
|
+
label_parts.append(f"image={b.common_image}")
|
|
215
|
+
label = ", ".join(label_parts) if label_parts else "mixed"
|
|
216
|
+
|
|
217
|
+
lines.append(f" Batch {i}: {b.batch_size} tasks ({label})")
|
|
218
|
+
task_ids = [t.get("instance_id", "?") for t in b.tasks[:5]]
|
|
219
|
+
if b.batch_size > 5:
|
|
220
|
+
task_ids.append(f"... +{b.batch_size - 5} more")
|
|
221
|
+
for tid in task_ids:
|
|
222
|
+
lines.append(f" - {tid}")
|
|
223
|
+
|
|
224
|
+
return "\n".join(lines)
|
|
225
|
+
|
|
226
|
+
# ------------------------------------------------------------------
|
|
227
|
+
# Private helpers
|
|
228
|
+
# ------------------------------------------------------------------
|
|
229
|
+
|
|
230
|
+
def _batch_by_field(self, tasks: list[dict[str, Any]], field_name: str) -> list[TaskBatch]:
|
|
231
|
+
"""Group tasks by a shared field, then split into max-sized chunks.
|
|
232
|
+
|
|
233
|
+
Args:
|
|
234
|
+
tasks: List of task dictionaries.
|
|
235
|
+
field_name: Key to group tasks by (e.g. "repo", "image", "category").
|
|
236
|
+
|
|
237
|
+
Returns:
|
|
238
|
+
Sorted list of TaskBatch objects.
|
|
239
|
+
"""
|
|
240
|
+
groups: dict[str, list[dict[str, Any]]] = defaultdict(list)
|
|
241
|
+
for task in tasks:
|
|
242
|
+
key = str(task.get(field_name, "_ungrouped_"))
|
|
243
|
+
groups[key].append(task)
|
|
244
|
+
|
|
245
|
+
batches: list[TaskBatch] = []
|
|
246
|
+
for key, group_tasks in sorted(groups.items()):
|
|
247
|
+
for chunk in self._split_into_chunks(group_tasks):
|
|
248
|
+
common_image = self._common_value(chunk, "image")
|
|
249
|
+
common_repo = self._common_value(chunk, "repo")
|
|
250
|
+
savings = self._estimate_batch_savings(len(chunk))
|
|
251
|
+
batches.append(
|
|
252
|
+
TaskBatch(
|
|
253
|
+
batch_id=str(uuid.uuid4()),
|
|
254
|
+
tasks=chunk,
|
|
255
|
+
common_image=common_image,
|
|
256
|
+
common_repo=common_repo,
|
|
257
|
+
batch_size=len(chunk),
|
|
258
|
+
estimated_savings_seconds=savings,
|
|
259
|
+
)
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
# Sort largest first for better scheduling
|
|
263
|
+
batches.sort(key=lambda b: b.batch_size, reverse=True)
|
|
264
|
+
return batches
|
|
265
|
+
|
|
266
|
+
def _batch_fixed_size(self, tasks: list[dict[str, Any]]) -> list[TaskBatch]:
|
|
267
|
+
"""Split tasks into fixed-size chunks.
|
|
268
|
+
|
|
269
|
+
Args:
|
|
270
|
+
tasks: List of task dictionaries.
|
|
271
|
+
|
|
272
|
+
Returns:
|
|
273
|
+
Sorted list of TaskBatch objects.
|
|
274
|
+
"""
|
|
275
|
+
batches: list[TaskBatch] = []
|
|
276
|
+
for chunk in self._split_into_chunks(tasks):
|
|
277
|
+
common_image = self._common_value(chunk, "image")
|
|
278
|
+
common_repo = self._common_value(chunk, "repo")
|
|
279
|
+
savings = self._estimate_batch_savings(len(chunk))
|
|
280
|
+
batches.append(
|
|
281
|
+
TaskBatch(
|
|
282
|
+
batch_id=str(uuid.uuid4()),
|
|
283
|
+
tasks=chunk,
|
|
284
|
+
common_image=common_image,
|
|
285
|
+
common_repo=common_repo,
|
|
286
|
+
batch_size=len(chunk),
|
|
287
|
+
estimated_savings_seconds=savings,
|
|
288
|
+
)
|
|
289
|
+
)
|
|
290
|
+
return batches
|
|
291
|
+
|
|
292
|
+
def _batch_adaptive(self, tasks: list[dict[str, Any]]) -> list[TaskBatch]:
|
|
293
|
+
"""Adaptively group tasks based on multi-field similarity.
|
|
294
|
+
|
|
295
|
+
Tasks are first grouped by a composite key of all available grouping
|
|
296
|
+
fields (repo, image, category). Groups that share more fields get
|
|
297
|
+
larger batch sizes (up to max_batch_size). Groups with no shared
|
|
298
|
+
fields get smaller batches (down toward min_batch_size).
|
|
299
|
+
|
|
300
|
+
Args:
|
|
301
|
+
tasks: List of task dictionaries.
|
|
302
|
+
|
|
303
|
+
Returns:
|
|
304
|
+
Sorted list of TaskBatch objects.
|
|
305
|
+
"""
|
|
306
|
+
# Build composite similarity groups
|
|
307
|
+
similarity_fields = ["repo", "image", "category"]
|
|
308
|
+
groups: dict[str, list[dict[str, Any]]] = defaultdict(list)
|
|
309
|
+
for task in tasks:
|
|
310
|
+
parts = []
|
|
311
|
+
for f in similarity_fields:
|
|
312
|
+
parts.append(str(task.get(f, "_")))
|
|
313
|
+
key = "|".join(parts)
|
|
314
|
+
groups[key].append(task)
|
|
315
|
+
|
|
316
|
+
batches: list[TaskBatch] = []
|
|
317
|
+
for key, group_tasks in sorted(groups.items()):
|
|
318
|
+
# Determine how many fields are shared (non-default)
|
|
319
|
+
key_parts = key.split("|")
|
|
320
|
+
shared_count = sum(1 for p in key_parts if p != "_")
|
|
321
|
+
|
|
322
|
+
# Scale batch size based on similarity: more shared fields -> larger batches
|
|
323
|
+
similarity_ratio = shared_count / len(similarity_fields) if similarity_fields else 0
|
|
324
|
+
adaptive_max = self.min_batch_size + int(
|
|
325
|
+
(self.max_batch_size - self.min_batch_size) * similarity_ratio
|
|
326
|
+
)
|
|
327
|
+
adaptive_max = max(adaptive_max, self.min_batch_size)
|
|
328
|
+
|
|
329
|
+
for chunk in self._split_into_chunks(group_tasks, max_size=adaptive_max):
|
|
330
|
+
common_image = self._common_value(chunk, "image")
|
|
331
|
+
common_repo = self._common_value(chunk, "repo")
|
|
332
|
+
savings = self._estimate_batch_savings(len(chunk))
|
|
333
|
+
batches.append(
|
|
334
|
+
TaskBatch(
|
|
335
|
+
batch_id=str(uuid.uuid4()),
|
|
336
|
+
tasks=chunk,
|
|
337
|
+
common_image=common_image,
|
|
338
|
+
common_repo=common_repo,
|
|
339
|
+
batch_size=len(chunk),
|
|
340
|
+
estimated_savings_seconds=savings,
|
|
341
|
+
)
|
|
342
|
+
)
|
|
343
|
+
|
|
344
|
+
batches.sort(key=lambda b: b.batch_size, reverse=True)
|
|
345
|
+
return batches
|
|
346
|
+
|
|
347
|
+
def _split_into_chunks(
|
|
348
|
+
self,
|
|
349
|
+
tasks: list[dict[str, Any]],
|
|
350
|
+
max_size: int | None = None,
|
|
351
|
+
) -> list[list[dict[str, Any]]]:
|
|
352
|
+
"""Split a list of tasks into chunks of at most max_size.
|
|
353
|
+
|
|
354
|
+
Args:
|
|
355
|
+
tasks: Tasks to split.
|
|
356
|
+
max_size: Override for maximum chunk size. Defaults to self.max_batch_size.
|
|
357
|
+
|
|
358
|
+
Returns:
|
|
359
|
+
List of task sublists.
|
|
360
|
+
"""
|
|
361
|
+
size = max_size if max_size is not None else self.max_batch_size
|
|
362
|
+
if size < 1:
|
|
363
|
+
size = 1
|
|
364
|
+
chunks: list[list[dict[str, Any]]] = []
|
|
365
|
+
for i in range(0, len(tasks), size):
|
|
366
|
+
chunks.append(tasks[i : i + size])
|
|
367
|
+
return chunks
|
|
368
|
+
|
|
369
|
+
@staticmethod
|
|
370
|
+
def _common_value(tasks: list[dict[str, Any]], field_name: str) -> str | None:
|
|
371
|
+
"""Return the shared value for a field if all tasks agree, else None.
|
|
372
|
+
|
|
373
|
+
Args:
|
|
374
|
+
tasks: List of task dictionaries.
|
|
375
|
+
field_name: Key to check.
|
|
376
|
+
|
|
377
|
+
Returns:
|
|
378
|
+
The common value string, or None if tasks differ or field is absent.
|
|
379
|
+
"""
|
|
380
|
+
if not tasks:
|
|
381
|
+
return None
|
|
382
|
+
values = {t.get(field_name) for t in tasks}
|
|
383
|
+
values.discard(None)
|
|
384
|
+
if len(values) == 1:
|
|
385
|
+
return str(values.pop())
|
|
386
|
+
return None
|
|
387
|
+
|
|
388
|
+
@staticmethod
|
|
389
|
+
def _estimate_batch_savings(batch_size: int) -> float:
|
|
390
|
+
"""Estimate time saved for a single batch.
|
|
391
|
+
|
|
392
|
+
Each additional task in a batch beyond the first avoids one container
|
|
393
|
+
restart.
|
|
394
|
+
|
|
395
|
+
Args:
|
|
396
|
+
batch_size: Number of tasks in the batch.
|
|
397
|
+
|
|
398
|
+
Returns:
|
|
399
|
+
Estimated time saved in seconds.
|
|
400
|
+
"""
|
|
401
|
+
if batch_size <= 1:
|
|
402
|
+
return 0.0
|
|
403
|
+
return (batch_size - 1) * _CONTAINER_RESTART_OVERHEAD_SECONDS
|