mcpbr 0.4.16__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,468 @@
1
+ """Task prioritization and scheduling for benchmark evaluations.
2
+
3
+ Provides intelligent task ordering strategies to optimize benchmark runs
4
+ for speed, cost, coverage diversity, or custom scoring functions. Tasks
5
+ can be reordered before execution to get faster feedback, reduce costs,
6
+ or ensure diverse coverage across repositories and categories.
7
+
8
+ Addresses GitHub issue #92: Task Prioritization and Scheduling.
9
+ """
10
+
11
+ from collections import defaultdict
12
+ from collections.abc import Callable
13
+ from dataclasses import dataclass, field
14
+ from enum import Enum
15
+ from typing import Any
16
+
17
+
18
+ class SchedulingStrategy(Enum):
19
+ """Strategy for ordering benchmark tasks before execution.
20
+
21
+ Attributes:
22
+ DEFAULT: Preserve original task order (no reordering).
23
+ SPEED_FIRST: Run fastest tasks first for quick feedback.
24
+ COST_FIRST: Run cheapest tasks first to minimize early spend.
25
+ COVERAGE_FIRST: Round-robin across categories/repos for diverse early results.
26
+ CUSTOM: Use a user-provided scoring function.
27
+ """
28
+
29
+ DEFAULT = "default"
30
+ SPEED_FIRST = "speed"
31
+ COST_FIRST = "cost"
32
+ COVERAGE_FIRST = "coverage"
33
+ CUSTOM = "custom"
34
+
35
+
36
+ @dataclass
37
+ class TaskPriority:
38
+ """Priority metadata for a single benchmark task.
39
+
40
+ Attributes:
41
+ task_id: Unique identifier for the task (e.g., instance_id).
42
+ priority_score: Computed priority score (lower = higher priority / runs first).
43
+ estimated_time_seconds: Rough estimate of task execution time in seconds.
44
+ estimated_cost_usd: Rough estimate of task cost in USD.
45
+ category: Category or grouping key (e.g., repo name, difficulty level).
46
+ metadata: Additional metadata associated with the task.
47
+ """
48
+
49
+ task_id: str
50
+ priority_score: float = 0.0
51
+ estimated_time_seconds: float | None = None
52
+ estimated_cost_usd: float | None = None
53
+ category: str | None = None
54
+ metadata: dict[str, Any] = field(default_factory=dict)
55
+
56
+
57
+ # Default token-per-character ratio for cost estimation.
58
+ # Based on empirical observation that ~4 characters is roughly 1 token for English text.
59
+ _DEFAULT_CHARS_PER_TOKEN = 4
60
+
61
+ # Default assumed output-to-input token ratio for agent tasks.
62
+ # Agents typically produce 3-5x more output than input for coding tasks.
63
+ _DEFAULT_OUTPUT_INPUT_RATIO = 4.0
64
+
65
+ # Baseline seconds per 1000 characters of problem statement.
66
+ # Longer problems tend to require more exploration and tool calls.
67
+ _DEFAULT_SECONDS_PER_KCHAR = 30.0
68
+
69
+ # Minimum estimated time for any task (seconds).
70
+ _MIN_ESTIMATED_TIME = 30.0
71
+
72
+ # Minimum estimated cost for any task (USD).
73
+ _MIN_ESTIMATED_COST = 0.001
74
+
75
+
76
+ class TaskScheduler:
77
+ """Scheduler that reorders benchmark tasks based on a chosen strategy.
78
+
79
+ The scheduler assigns priority scores to tasks and returns them in
80
+ sorted order. It supports preset strategies (speed, cost, coverage)
81
+ and custom scoring functions.
82
+
83
+ Args:
84
+ strategy: The scheduling strategy to use.
85
+ custom_scorer: A callable that takes a task dict and returns a float
86
+ priority score (lower = runs first). Required when strategy is CUSTOM.
87
+
88
+ Raises:
89
+ ValueError: If strategy is CUSTOM but no custom_scorer is provided.
90
+
91
+ Example:
92
+ >>> scheduler = TaskScheduler(strategy=SchedulingStrategy.SPEED_FIRST)
93
+ >>> ordered = scheduler.schedule(tasks)
94
+ >>> print(scheduler.preview(tasks))
95
+ """
96
+
97
+ def __init__(
98
+ self,
99
+ strategy: SchedulingStrategy = SchedulingStrategy.DEFAULT,
100
+ custom_scorer: Callable[[dict[str, Any]], float] | None = None,
101
+ ) -> None:
102
+ if strategy == SchedulingStrategy.CUSTOM and custom_scorer is None:
103
+ raise ValueError(
104
+ "custom_scorer is required when strategy is CUSTOM. "
105
+ "Provide a callable that takes a task dict and returns a float score."
106
+ )
107
+ self._strategy = strategy
108
+ self._custom_scorer = custom_scorer
109
+
110
+ @property
111
+ def strategy(self) -> SchedulingStrategy:
112
+ """The active scheduling strategy."""
113
+ return self._strategy
114
+
115
+ def schedule(self, tasks: list[dict[str, Any]]) -> list[dict[str, Any]]:
116
+ """Reorder tasks according to the active scheduling strategy.
117
+
118
+ Args:
119
+ tasks: List of task dictionaries to schedule. Each task should have
120
+ at least an ``instance_id`` key. Additional keys like
121
+ ``problem_statement``, ``repo``, and ``category`` improve
122
+ estimation accuracy.
123
+
124
+ Returns:
125
+ A new list of task dictionaries in the scheduled order.
126
+ The original list is not mutated.
127
+ """
128
+ if not tasks:
129
+ return []
130
+
131
+ if self._strategy == SchedulingStrategy.DEFAULT:
132
+ return list(tasks)
133
+
134
+ priorities = self._compute_priorities(tasks)
135
+
136
+ # Sort by priority_score ascending (lower = runs first)
137
+ priorities.sort(key=lambda p: p.priority_score)
138
+
139
+ # Build task lookup by id for efficient reordering
140
+ task_by_id: dict[str, dict[str, Any]] = {}
141
+ for task in tasks:
142
+ tid = task.get("instance_id", str(id(task)))
143
+ task_by_id[tid] = task
144
+
145
+ return [task_by_id[p.task_id] for p in priorities]
146
+
147
+ def preview(self, tasks: list[dict[str, Any]]) -> str:
148
+ """Generate a human-readable preview of the scheduled task order.
149
+
150
+ Args:
151
+ tasks: List of task dictionaries to preview.
152
+
153
+ Returns:
154
+ A formatted string showing the scheduled order with priority
155
+ details, suitable for display before execution.
156
+ """
157
+ if not tasks:
158
+ return "No tasks to schedule."
159
+
160
+ scheduled = self.schedule(tasks)
161
+ priorities = self._compute_priorities(tasks)
162
+ priorities.sort(key=lambda p: p.priority_score)
163
+
164
+ # Build a lookup for priority info
165
+ priority_by_id: dict[str, TaskPriority] = {p.task_id: p for p in priorities}
166
+
167
+ lines: list[str] = []
168
+ lines.append(f"Schedule Preview (strategy: {self._strategy.value})")
169
+ lines.append(f"Total tasks: {len(scheduled)}")
170
+ lines.append("-" * 70)
171
+ lines.append(f"{'#':<4} {'Task ID':<35} {'Score':<8} {'Est. Time':<12} {'Est. Cost':<10}")
172
+ lines.append("-" * 70)
173
+
174
+ for i, task in enumerate(scheduled, start=1):
175
+ tid = task.get("instance_id", str(id(task)))
176
+ priority = priority_by_id.get(tid)
177
+
178
+ if priority is not None:
179
+ score_str = f"{priority.priority_score:.2f}"
180
+ time_str = (
181
+ f"{priority.estimated_time_seconds:.0f}s"
182
+ if priority.estimated_time_seconds is not None
183
+ else "N/A"
184
+ )
185
+ cost_str = (
186
+ f"${priority.estimated_cost_usd:.4f}"
187
+ if priority.estimated_cost_usd is not None
188
+ else "N/A"
189
+ )
190
+ else:
191
+ score_str = "N/A"
192
+ time_str = "N/A"
193
+ cost_str = "N/A"
194
+
195
+ lines.append(f"{i:<4} {tid:<35} {score_str:<8} {time_str:<12} {cost_str:<10}")
196
+
197
+ lines.append("-" * 70)
198
+ return "\n".join(lines)
199
+
200
+ def estimate_task_cost(self, task: dict[str, Any], model: str = "sonnet") -> float:
201
+ """Estimate the cost of running a single task in USD.
202
+
203
+ The estimate is based on the length of the problem statement and
204
+ the model's pricing. Longer problems produce more tokens and cost more.
205
+
206
+ Args:
207
+ task: Task dictionary, ideally containing a ``problem_statement`` key.
208
+ model: Model identifier used for pricing lookup (default: ``"sonnet"``).
209
+
210
+ Returns:
211
+ Estimated cost in USD. Returns ``_MIN_ESTIMATED_COST`` if pricing
212
+ data is unavailable or the problem statement is missing.
213
+ """
214
+ from .pricing import get_model_pricing
215
+
216
+ problem = task.get("problem_statement", "")
217
+ problem_len = len(problem) if isinstance(problem, str) else 0
218
+
219
+ pricing = get_model_pricing(model)
220
+ if pricing is None:
221
+ return _MIN_ESTIMATED_COST
222
+
223
+ # Estimate input tokens from problem length
224
+ input_tokens = max(problem_len / _DEFAULT_CHARS_PER_TOKEN, 100)
225
+
226
+ # Estimate output tokens as a multiple of input
227
+ output_tokens = input_tokens * _DEFAULT_OUTPUT_INPUT_RATIO
228
+
229
+ # Calculate cost in USD
230
+ input_cost = (input_tokens / 1_000_000) * pricing.input_price_per_mtok
231
+ output_cost = (output_tokens / 1_000_000) * pricing.output_price_per_mtok
232
+
233
+ return max(input_cost + output_cost, _MIN_ESTIMATED_COST)
234
+
235
+ def estimate_task_time(self, task: dict[str, Any]) -> float:
236
+ """Estimate the execution time for a single task in seconds.
237
+
238
+ The estimate is based on the length of the problem statement.
239
+ Longer problems typically require more exploration time.
240
+
241
+ Args:
242
+ task: Task dictionary, ideally containing a ``problem_statement`` key.
243
+
244
+ Returns:
245
+ Estimated execution time in seconds (minimum ``_MIN_ESTIMATED_TIME``).
246
+ """
247
+ problem = task.get("problem_statement", "")
248
+ problem_len = len(problem) if isinstance(problem, str) else 0
249
+
250
+ # Scale linearly with problem length
251
+ estimated = (problem_len / 1000) * _DEFAULT_SECONDS_PER_KCHAR
252
+
253
+ return max(estimated, _MIN_ESTIMATED_TIME)
254
+
255
+ def _compute_priorities(self, tasks: list[dict[str, Any]]) -> list[TaskPriority]:
256
+ """Compute priority scores for all tasks based on the active strategy.
257
+
258
+ Args:
259
+ tasks: List of task dictionaries.
260
+
261
+ Returns:
262
+ List of TaskPriority objects with computed scores.
263
+ """
264
+ if self._strategy == SchedulingStrategy.SPEED_FIRST:
265
+ return self._prioritize_by_speed(tasks)
266
+ elif self._strategy == SchedulingStrategy.COST_FIRST:
267
+ return self._prioritize_by_cost(tasks)
268
+ elif self._strategy == SchedulingStrategy.COVERAGE_FIRST:
269
+ return self._prioritize_by_coverage(tasks)
270
+ elif self._strategy == SchedulingStrategy.CUSTOM:
271
+ return self._prioritize_by_custom(tasks)
272
+ else:
273
+ # DEFAULT: preserve original order via index-based scoring
274
+ return [
275
+ TaskPriority(
276
+ task_id=task.get("instance_id", str(id(task))),
277
+ priority_score=float(i),
278
+ )
279
+ for i, task in enumerate(tasks)
280
+ ]
281
+
282
+ def _prioritize_by_speed(self, tasks: list[dict[str, Any]]) -> list[TaskPriority]:
283
+ """Assign priority scores based on estimated execution time (ascending).
284
+
285
+ Args:
286
+ tasks: List of task dictionaries.
287
+
288
+ Returns:
289
+ List of TaskPriority objects scored by estimated time.
290
+ """
291
+ priorities: list[TaskPriority] = []
292
+ for task in tasks:
293
+ tid = task.get("instance_id", str(id(task)))
294
+ est_time = self.estimate_task_time(task)
295
+ est_cost = self.estimate_task_cost(task)
296
+ category = _extract_category(task)
297
+
298
+ priorities.append(
299
+ TaskPriority(
300
+ task_id=tid,
301
+ priority_score=est_time,
302
+ estimated_time_seconds=est_time,
303
+ estimated_cost_usd=est_cost,
304
+ category=category,
305
+ )
306
+ )
307
+ return priorities
308
+
309
+ def _prioritize_by_cost(self, tasks: list[dict[str, Any]]) -> list[TaskPriority]:
310
+ """Assign priority scores based on estimated cost (ascending).
311
+
312
+ Args:
313
+ tasks: List of task dictionaries.
314
+
315
+ Returns:
316
+ List of TaskPriority objects scored by estimated cost.
317
+ """
318
+ priorities: list[TaskPriority] = []
319
+ for task in tasks:
320
+ tid = task.get("instance_id", str(id(task)))
321
+ est_time = self.estimate_task_time(task)
322
+ est_cost = self.estimate_task_cost(task)
323
+ category = _extract_category(task)
324
+
325
+ priorities.append(
326
+ TaskPriority(
327
+ task_id=tid,
328
+ priority_score=est_cost,
329
+ estimated_time_seconds=est_time,
330
+ estimated_cost_usd=est_cost,
331
+ category=category,
332
+ )
333
+ )
334
+ return priorities
335
+
336
+ def _prioritize_by_coverage(self, tasks: list[dict[str, Any]]) -> list[TaskPriority]:
337
+ """Assign priority scores using round-robin across categories.
338
+
339
+ Tasks are grouped by category (repo, difficulty, or explicit category),
340
+ then interleaved so that early execution covers diverse categories.
341
+
342
+ Args:
343
+ tasks: List of task dictionaries.
344
+
345
+ Returns:
346
+ List of TaskPriority objects with interleaved category ordering.
347
+ """
348
+ # Group tasks by category
349
+ groups: dict[str, list[dict[str, Any]]] = defaultdict(list)
350
+ for task in tasks:
351
+ category = _extract_category(task) or "_uncategorized_"
352
+ groups[category].append(task)
353
+
354
+ # Sort group keys for deterministic ordering
355
+ sorted_keys = sorted(groups.keys())
356
+
357
+ # Round-robin interleave: take one task from each category in turn
358
+ result: list[TaskPriority] = []
359
+ score = 0.0
360
+ max_group_len = max(len(g) for g in groups.values()) if groups else 0
361
+
362
+ for round_idx in range(max_group_len):
363
+ for key in sorted_keys:
364
+ group = groups[key]
365
+ if round_idx < len(group):
366
+ task = group[round_idx]
367
+ tid = task.get("instance_id", str(id(task)))
368
+ est_time = self.estimate_task_time(task)
369
+ est_cost = self.estimate_task_cost(task)
370
+
371
+ result.append(
372
+ TaskPriority(
373
+ task_id=tid,
374
+ priority_score=score,
375
+ estimated_time_seconds=est_time,
376
+ estimated_cost_usd=est_cost,
377
+ category=key,
378
+ )
379
+ )
380
+ score += 1.0
381
+
382
+ return result
383
+
384
+ def _prioritize_by_custom(self, tasks: list[dict[str, Any]]) -> list[TaskPriority]:
385
+ """Assign priority scores using the user-provided custom scorer.
386
+
387
+ Args:
388
+ tasks: List of task dictionaries.
389
+
390
+ Returns:
391
+ List of TaskPriority objects scored by the custom function.
392
+
393
+ Raises:
394
+ RuntimeError: If custom_scorer is None (should not happen due to
395
+ __init__ validation).
396
+ """
397
+ if self._custom_scorer is None:
398
+ raise RuntimeError("custom_scorer is None but strategy is CUSTOM")
399
+
400
+ priorities: list[TaskPriority] = []
401
+ for task in tasks:
402
+ tid = task.get("instance_id", str(id(task)))
403
+ score = self._custom_scorer(task)
404
+ est_time = self.estimate_task_time(task)
405
+ est_cost = self.estimate_task_cost(task)
406
+ category = _extract_category(task)
407
+
408
+ priorities.append(
409
+ TaskPriority(
410
+ task_id=tid,
411
+ priority_score=score,
412
+ estimated_time_seconds=est_time,
413
+ estimated_cost_usd=est_cost,
414
+ category=category,
415
+ )
416
+ )
417
+ return priorities
418
+
419
+
420
+ def _extract_category(task: dict[str, Any]) -> str | None:
421
+ """Extract a category label from a task dictionary.
422
+
423
+ Checks common fields in order of preference: ``category``, ``repo``,
424
+ ``difficulty``. Returns the first non-empty string found, or None.
425
+
426
+ Args:
427
+ task: Task dictionary.
428
+
429
+ Returns:
430
+ Category string, or None if no category field is found.
431
+ """
432
+ for key in ("category", "repo", "difficulty"):
433
+ value = task.get(key)
434
+ if value is not None:
435
+ return str(value)
436
+ return None
437
+
438
+
439
+ def create_scheduler(preset: str, **kwargs: Any) -> TaskScheduler:
440
+ """Create a TaskScheduler from a preset name.
441
+
442
+ Convenience factory function that maps human-readable preset names to
443
+ scheduling strategies.
444
+
445
+ Args:
446
+ preset: One of ``"default"``, ``"speed"``, ``"cost"``, ``"coverage"``.
447
+ **kwargs: Additional keyword arguments passed to TaskScheduler
448
+ (e.g., ``custom_scorer``).
449
+
450
+ Returns:
451
+ Configured TaskScheduler instance.
452
+
453
+ Raises:
454
+ ValueError: If the preset name is not recognized.
455
+ """
456
+ preset_map: dict[str, SchedulingStrategy] = {
457
+ "default": SchedulingStrategy.DEFAULT,
458
+ "speed": SchedulingStrategy.SPEED_FIRST,
459
+ "cost": SchedulingStrategy.COST_FIRST,
460
+ "coverage": SchedulingStrategy.COVERAGE_FIRST,
461
+ }
462
+
463
+ strategy = preset_map.get(preset.lower())
464
+ if strategy is None:
465
+ valid_presets = ", ".join(sorted(preset_map.keys()))
466
+ raise ValueError(f"Unknown scheduling preset: '{preset}'. Valid presets: {valid_presets}")
467
+
468
+ return TaskScheduler(strategy=strategy, **kwargs)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mcpbr
3
- Version: 0.4.16
3
+ Version: 0.5.0
4
4
  Summary: Model Context Protocol Benchmark Runner - evaluate MCP servers against software engineering benchmarks
5
5
  Project-URL: Homepage, https://github.com/greynewell/mcpbr
6
6
  Project-URL: Repository, https://github.com/greynewell/mcpbr
@@ -5,16 +5,24 @@ mcpbr/cache.py,sha256=YiP13omwMbXLb6NhNocJvL58enXEx9J8OrvTZnWUkw4,13254
5
5
  mcpbr/cli.py,sha256=xvh7gpJx0LzjV3g-Te4FF7BfHubGzDxOiYQsSeQnCEc,68276
6
6
  mcpbr/config.py,sha256=7lWV0ZtzyD6WZ07IR4yhT9lyBBPONzlanaO4XHm9OoE,18952
7
7
  mcpbr/config_inheritance.py,sha256=0EV9Tv62UFNgZoc8mY7yYjHEbnMM_R5EAhSeuK7ajAA,6617
8
+ mcpbr/config_migration.py,sha256=vTs52uYLO0DusB07nHZT2Y27-_eZdZKhaXYWhDFcnJI,16098
8
9
  mcpbr/config_validator.py,sha256=ZMEIeK4y6fSwyY46Xv5dK5v3jM4HDKcYkosnIcn7iyI,20488
10
+ mcpbr/config_wizard.py,sha256=IPk5a2xI_6CTvo9tIDe2MLGewbc_iVYg3k_NOTC6A6c,22522
9
11
  mcpbr/custom_metrics.py,sha256=4pMO9-BPpeQ_GUTnZ18TQXINFScAMH3cIYm0HG-C51o,13213
12
+ mcpbr/dashboard.py,sha256=wt2A-yFgDvQc94wgPPJlz70gFAkyUi41xgfqPL9xRQY,21884
13
+ mcpbr/dataset_streaming.py,sha256=XwQSdvy97yurlcAC5hUwto8bLuCf2A9FSMcwjTD_Tho,16720
10
14
  mcpbr/dataset_versioning.py,sha256=Y_ZSGhl8ihl6Kgee_p7VbkNwGhgwIdMZPlRunvk4knY,7149
15
+ mcpbr/docker_cache.py,sha256=jn_9Ak2d8omNmedSCBwA7wrswtEQvB-Bu8TIP2cm-F0,18704
11
16
  mcpbr/docker_env.py,sha256=_45OUZKjUevE9O3YLF_1uvQtdOyJ7yZIYWmSvXN3cFw,31794
17
+ mcpbr/docker_prewarm.py,sha256=GVRD2B10HA7OpWq_CC7CkNkJ1OUjAU7GzKOpJ5VFrXk,12638
18
+ mcpbr/dry_run.py,sha256=w_1L5K4Bk3SzeXfZY2NDbXims_Qh6711wIGm6p3tr84,18218
12
19
  mcpbr/env_expansion.py,sha256=Rkhth-tWV8CptQlSSk9exuMsUaSTTW9hj69z4snZd_U,6122
13
20
  mcpbr/evaluation.py,sha256=EjPREWv7hBRqhBhNan0ERh2imqMBegT0Y2cgZlTxRGk,12765
14
21
  mcpbr/failure_analysis.py,sha256=N5xp9YPe2d7P9fTa2LVSHsPgB1WOQtWMeClq3bOv4_c,19883
15
22
  mcpbr/few_shot.py,sha256=bFDdes_kgZAFWoFZQEfZG5Z2Es9rmkB1jsxSMp4aCCM,11684
23
+ mcpbr/formatting.py,sha256=lwZcb4fD5osBzJlerICyvAVb4KHSm_nRTBg1dVfD6Lo,14193
16
24
  mcpbr/gpu_support.py,sha256=eroBiLkt1A3Q2ODJDSyqrd_BzcMh8tFkjtPn7PsvJJc,5070
17
- mcpbr/harness.py,sha256=8-qmcPR2CDFuoBib9g6lPx7aMOK-5PuZgpWhpGs-Ils,51419
25
+ mcpbr/harness.py,sha256=Rc6CqzZOMJyuHqfuOIDisLOoPka-cqAqYiL7zr7ALFg,53193
18
26
  mcpbr/harnesses.py,sha256=h9iDp4qkPABNwO9OXbJ61qcD4n0oAUTU7AQksxRKLcg,47335
19
27
  mcpbr/incremental_save.py,sha256=1dm3pGiEIhP8cVk_Y6XF_cAdo3B_vyRc6CO8Wt-MyIA,4830
20
28
  mcpbr/junit_reporter.py,sha256=M_02zJbFbA3VoIYG5oR7VDecqWHEpIee-JOUShWNuLU,9261
@@ -28,6 +36,8 @@ mcpbr/profiler.py,sha256=SRXLKf2TOlpnMbQpGvjRy1Agv-XaEz6lDmBa5WGNv8c,15954
28
36
  mcpbr/providers.py,sha256=ebrnH6RXODxX4Ma9r7Is5VBHYFNP5LwCs-vpLbbHP8o,6598
29
37
  mcpbr/regression.py,sha256=xm_ago8ZP3RAOrDNjtINwyRUvzKWJcJDWbzf3hp6LlU,12827
30
38
  mcpbr/reporting.py,sha256=Odzb7EgpimW-qh01VQedhb2X594ACrOcGe4jshgiwTg,56111
39
+ mcpbr/resource_limits.py,sha256=MnmKcJLPCD-cyydGyiZz1TOPsQSpmdsmrbAvYWlxM5s,16594
40
+ mcpbr/result_streaming.py,sha256=uXKCPNSXhBd8VV-Sc5aldC7zSM1MG8pAiMgsMP7ZZOA,16602
31
41
  mcpbr/sampling.py,sha256=Hpgh2TayI3QGcno-Np9eYi8sklxKEZQXyhpaQlc9T4Q,6248
32
42
  mcpbr/schema.py,sha256=fdjiKmp1au2oN5aXcPRoCbyvwm2XeMD5DmeWSurMk4A,6858
33
43
  mcpbr/smoke_test.py,sha256=srYGOn_auspRbt_a6ebYDDDq_nujA_iZGman5nU1ikU,14925
@@ -35,6 +45,8 @@ mcpbr/state_tracker.py,sha256=rIP9LIHtQg6oBsLIxnwRjE865Kw6U7DMO_GzzuMRC0E,10790
35
45
  mcpbr/statistics.py,sha256=Ny8TMdBrIpS4KfKCJcuFfTeaGuTmEkS1G_uHBlboYdA,19134
36
46
  mcpbr/streaming.py,sha256=XPhkXO1R1EsWtkoPvCpyy4TehEom7hkuOeP-00joX3o,13853
37
47
  mcpbr/swebench_test_specs.py,sha256=Mh_BPjcexkgDT3p4zT2p31925b8w5tgsxxRpYZQZalM,1390
48
+ mcpbr/task_batching.py,sha256=KFw_RxZUvVgjRr3pc0ehBX2GAoPT5yTP4VdM43e1_Dk,15275
49
+ mcpbr/task_scheduler.py,sha256=HBR8ebJJ-ZDzM2nnWaIHhOqdbdqRjYzbEVGYjGiVYEw,17183
38
50
  mcpbr/templates.py,sha256=dqwboVB-yfE06w2rgDOvuWJB4Hx5duH_W-jvLBqmlKg,10683
39
51
  mcpbr/benchmarks/__init__.py,sha256=2-7Ebg6-wHo1QGfVKWjjbREcLG_A-6Q0XfZGiyXrOeE,4489
40
52
  mcpbr/benchmarks/adversarial.py,sha256=69VBTZv6BhR1JwjQepA_YwAu3b--vJviGd6IWs2h1QA,12357
@@ -80,15 +92,15 @@ mcpbr/infrastructure/azure_health.py,sha256=xITmIa9IfYIwxcVhY0sJ81a-6WNKiT8kSQTd
80
92
  mcpbr/infrastructure/base.py,sha256=Olj6uiNBeGoUqltZI1NHZfa26kzT-6jfp8YIXSykFKM,3037
81
93
  mcpbr/infrastructure/local.py,sha256=VK6UAg7Dzvb9v1LAJgNGA_s0blQKrHAQEXBAC75zAL8,4237
82
94
  mcpbr/infrastructure/manager.py,sha256=j0T7U1Tbajmfve4SNfhYKikvL9kgSVT01fYKMC-sH-s,4796
83
- mcpbr-0.4.16.data/data/mcpbr/data/templates/brave-search.yaml,sha256=PYHXJOaDqYKoqdJc3JV1WbaL-BacrdkQPck1eKGbMPo,1098
84
- mcpbr-0.4.16.data/data/mcpbr/data/templates/filesystem.yaml,sha256=1p6Z6ChViFYHAODYD71JFst6gdhR5y5rnWNf7Pp5zOY,1091
85
- mcpbr-0.4.16.data/data/mcpbr/data/templates/github.yaml,sha256=uzPwq5_loFegvH6RNov1MQclbBiFBgYWzpiKLfEN9H4,1133
86
- mcpbr-0.4.16.data/data/mcpbr/data/templates/google-maps.yaml,sha256=ldR7E9UmuAA-3nJZ1SShD7PhG0_AwDJOSYuy19hQ6cI,1116
87
- mcpbr-0.4.16.data/data/mcpbr/data/templates/postgres.yaml,sha256=r6R1069BhV4ADQGPZ-T9r6xMNwbr2yrNh8-IHPb4XiI,1178
88
- mcpbr-0.4.16.data/data/mcpbr/data/templates/slack.yaml,sha256=dBn_YqlFJMJai_55sRDb4hXClgxRpcyYTlWl4LBkpuo,1072
89
- mcpbr-0.4.16.data/data/mcpbr/data/templates/sqlite.yaml,sha256=UR5yN9f8v_BC6oskny2xMldHWzZrB9b_PpFSmv5eccg,1080
90
- mcpbr-0.4.16.dist-info/METADATA,sha256=GeSnMZw0x7-XPhblIu50aCO7NXaNfjgVScnBOp6ZaOA,55069
91
- mcpbr-0.4.16.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
92
- mcpbr-0.4.16.dist-info/entry_points.txt,sha256=lLL8icujqBF36V9bF4gfaB2at4cFKCiv2IdJ1i5hT9U,41
93
- mcpbr-0.4.16.dist-info/licenses/LICENSE,sha256=mcXLPreEXzD-816yLKmocCPr9_k3gFFo62TjrSuKkIQ,1075
94
- mcpbr-0.4.16.dist-info/RECORD,,
95
+ mcpbr-0.5.0.data/data/mcpbr/data/templates/brave-search.yaml,sha256=PYHXJOaDqYKoqdJc3JV1WbaL-BacrdkQPck1eKGbMPo,1098
96
+ mcpbr-0.5.0.data/data/mcpbr/data/templates/filesystem.yaml,sha256=1p6Z6ChViFYHAODYD71JFst6gdhR5y5rnWNf7Pp5zOY,1091
97
+ mcpbr-0.5.0.data/data/mcpbr/data/templates/github.yaml,sha256=uzPwq5_loFegvH6RNov1MQclbBiFBgYWzpiKLfEN9H4,1133
98
+ mcpbr-0.5.0.data/data/mcpbr/data/templates/google-maps.yaml,sha256=ldR7E9UmuAA-3nJZ1SShD7PhG0_AwDJOSYuy19hQ6cI,1116
99
+ mcpbr-0.5.0.data/data/mcpbr/data/templates/postgres.yaml,sha256=r6R1069BhV4ADQGPZ-T9r6xMNwbr2yrNh8-IHPb4XiI,1178
100
+ mcpbr-0.5.0.data/data/mcpbr/data/templates/slack.yaml,sha256=dBn_YqlFJMJai_55sRDb4hXClgxRpcyYTlWl4LBkpuo,1072
101
+ mcpbr-0.5.0.data/data/mcpbr/data/templates/sqlite.yaml,sha256=UR5yN9f8v_BC6oskny2xMldHWzZrB9b_PpFSmv5eccg,1080
102
+ mcpbr-0.5.0.dist-info/METADATA,sha256=fMqq-Q3zU5arV5f777AXScxNJ2C7sHAEbUqliT7rOn4,55068
103
+ mcpbr-0.5.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
104
+ mcpbr-0.5.0.dist-info/entry_points.txt,sha256=lLL8icujqBF36V9bF4gfaB2at4cFKCiv2IdJ1i5hT9U,41
105
+ mcpbr-0.5.0.dist-info/licenses/LICENSE,sha256=mcXLPreEXzD-816yLKmocCPr9_k3gFFo62TjrSuKkIQ,1075
106
+ mcpbr-0.5.0.dist-info/RECORD,,
File without changes