mcpbr 0.4.16__py3-none-any.whl → 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. mcpbr/__init__.py +20 -1
  2. mcpbr/config.py +37 -1
  3. mcpbr/config_migration.py +470 -0
  4. mcpbr/config_wizard.py +647 -0
  5. mcpbr/dashboard.py +619 -0
  6. mcpbr/dataset_streaming.py +491 -0
  7. mcpbr/docker_cache.py +539 -0
  8. mcpbr/docker_env.py +2 -1
  9. mcpbr/docker_prewarm.py +370 -0
  10. mcpbr/dry_run.py +533 -0
  11. mcpbr/formatting.py +444 -0
  12. mcpbr/gpu_support.py +2 -1
  13. mcpbr/graceful_degradation.py +277 -0
  14. mcpbr/harness.py +38 -4
  15. mcpbr/languages.py +228 -0
  16. mcpbr/logging_config.py +207 -0
  17. mcpbr/models.py +66 -0
  18. mcpbr/preflight.py +2 -1
  19. mcpbr/pricing.py +72 -0
  20. mcpbr/providers.py +316 -3
  21. mcpbr/resource_limits.py +487 -0
  22. mcpbr/result_streaming.py +519 -0
  23. mcpbr/sdk.py +264 -0
  24. mcpbr/smoke_test.py +2 -1
  25. mcpbr/task_batching.py +403 -0
  26. mcpbr/task_scheduler.py +468 -0
  27. {mcpbr-0.4.16.dist-info → mcpbr-0.6.0.dist-info}/METADATA +8 -1
  28. {mcpbr-0.4.16.dist-info → mcpbr-0.6.0.dist-info}/RECORD +38 -22
  29. {mcpbr-0.4.16.data → mcpbr-0.6.0.data}/data/mcpbr/data/templates/brave-search.yaml +0 -0
  30. {mcpbr-0.4.16.data → mcpbr-0.6.0.data}/data/mcpbr/data/templates/filesystem.yaml +0 -0
  31. {mcpbr-0.4.16.data → mcpbr-0.6.0.data}/data/mcpbr/data/templates/github.yaml +0 -0
  32. {mcpbr-0.4.16.data → mcpbr-0.6.0.data}/data/mcpbr/data/templates/google-maps.yaml +0 -0
  33. {mcpbr-0.4.16.data → mcpbr-0.6.0.data}/data/mcpbr/data/templates/postgres.yaml +0 -0
  34. {mcpbr-0.4.16.data → mcpbr-0.6.0.data}/data/mcpbr/data/templates/slack.yaml +0 -0
  35. {mcpbr-0.4.16.data → mcpbr-0.6.0.data}/data/mcpbr/data/templates/sqlite.yaml +0 -0
  36. {mcpbr-0.4.16.dist-info → mcpbr-0.6.0.dist-info}/WHEEL +0 -0
  37. {mcpbr-0.4.16.dist-info → mcpbr-0.6.0.dist-info}/entry_points.txt +0 -0
  38. {mcpbr-0.4.16.dist-info → mcpbr-0.6.0.dist-info}/licenses/LICENSE +0 -0
mcpbr/dashboard.py ADDED
@@ -0,0 +1,619 @@
1
+ """Real-time web dashboard for monitoring benchmark evaluations.
2
+
3
+ This module provides a FastAPI-based local web server that displays live
4
+ evaluation progress, including tasks completed, resolution rates, ETA,
5
+ and per-task status. It also exposes REST API endpoints for pause, resume,
6
+ and cancel controls.
7
+
8
+ Requires optional dependencies: ``fastapi`` and ``uvicorn``.
9
+ Install them with::
10
+
11
+ pip install fastapi uvicorn[standard]
12
+
13
+ Usage::
14
+
15
+ from mcpbr.dashboard import DashboardServer, DashboardState
16
+
17
+ state = DashboardState(total_tasks=50)
18
+ server = DashboardServer(state, port=8080)
19
+ server.start()
20
+
21
+ # From evaluation loop:
22
+ state.update_task("django__django-12345", resolved=True)
23
+
24
+ server.stop()
25
+ """
26
+
27
+ from __future__ import annotations
28
+
29
+ import asyncio
30
+ import json
31
+ import logging
32
+ import threading
33
+ import time
34
+ from dataclasses import dataclass, field
35
+ from typing import Any
36
+
37
+ logger = logging.getLogger(__name__)
38
+
39
+ try:
40
+ from fastapi import FastAPI, WebSocket, WebSocketDisconnect
41
+ from fastapi.responses import HTMLResponse, JSONResponse
42
+
43
+ HAS_FASTAPI = True
44
+ except ImportError:
45
+ HAS_FASTAPI = False
46
+
47
+ try:
48
+ import uvicorn
49
+
50
+ HAS_UVICORN = True
51
+ except ImportError:
52
+ HAS_UVICORN = False
53
+
54
+
55
+ # ---------------------------------------------------------------------------
56
+ # Data types
57
+ # ---------------------------------------------------------------------------
58
+
59
+
60
+ @dataclass
61
+ class TaskStatus:
62
+ """Status information for a single evaluation task.
63
+
64
+ Attributes:
65
+ instance_id: Unique identifier for the task.
66
+ status: One of ``"pending"``, ``"running"``, ``"resolved"``, ``"failed"``.
67
+ started_at: Epoch timestamp when the task started, or ``None``.
68
+ finished_at: Epoch timestamp when the task finished, or ``None``.
69
+ error: Error message if the task failed, or ``None``.
70
+ """
71
+
72
+ instance_id: str
73
+ status: str = "pending"
74
+ started_at: float | None = None
75
+ finished_at: float | None = None
76
+ error: str | None = None
77
+
78
+ def to_dict(self) -> dict[str, Any]:
79
+ """Serialize to a JSON-safe dictionary.
80
+
81
+ Returns:
82
+ Dictionary representation of the task status.
83
+ """
84
+ return {
85
+ "instance_id": self.instance_id,
86
+ "status": self.status,
87
+ "started_at": self.started_at,
88
+ "finished_at": self.finished_at,
89
+ "error": self.error,
90
+ }
91
+
92
+
93
+ @dataclass
94
+ class DashboardState:
95
+ """Shared mutable state that the evaluation harness updates in real time.
96
+
97
+ This object is passed to ``DashboardServer`` and should be mutated by the
98
+ harness evaluation loop via :meth:`update_task` and :meth:`start_task`.
99
+
100
+ Attributes:
101
+ total_tasks: Total number of tasks in the evaluation.
102
+ completed_tasks: Number of tasks that have finished (resolved + failed).
103
+ resolved_tasks: Number of tasks that resolved successfully.
104
+ failed_tasks: Number of tasks that failed.
105
+ current_task_id: Instance ID of the currently running task, or ``None``.
106
+ start_time: Epoch timestamp when the evaluation started.
107
+ task_results: Ordered list of per-task :class:`TaskStatus` objects.
108
+ is_paused: Whether the evaluation is currently paused.
109
+ is_cancelled: Whether the evaluation has been cancelled.
110
+ """
111
+
112
+ total_tasks: int = 0
113
+ completed_tasks: int = 0
114
+ resolved_tasks: int = 0
115
+ failed_tasks: int = 0
116
+ current_task_id: str | None = None
117
+ start_time: float = field(default_factory=time.time)
118
+ task_results: list[TaskStatus] = field(default_factory=list)
119
+ is_paused: bool = False
120
+ is_cancelled: bool = False
121
+ _lock: threading.RLock = field(default_factory=threading.RLock, init=False, repr=False)
122
+
123
+ # -- Mutation helpers used by the evaluation harness --------------------
124
+
125
+ def start_task(self, instance_id: str) -> None:
126
+ """Mark a task as currently running.
127
+
128
+ Args:
129
+ instance_id: The task's unique identifier.
130
+ """
131
+ with self._lock:
132
+ self.current_task_id = instance_id
133
+ task = self._find_or_create_task(instance_id)
134
+ task.status = "running"
135
+ task.started_at = time.time()
136
+
137
+ def update_task(
138
+ self,
139
+ instance_id: str,
140
+ *,
141
+ resolved: bool = False,
142
+ error: str | None = None,
143
+ ) -> None:
144
+ """Record the outcome of a completed task.
145
+
146
+ Args:
147
+ instance_id: The task's unique identifier.
148
+ resolved: Whether the task was resolved successfully.
149
+ error: An error message if the task failed.
150
+ """
151
+ with self._lock:
152
+ task = self._find_or_create_task(instance_id)
153
+ task.finished_at = time.time()
154
+
155
+ if error is not None:
156
+ task.status = "failed"
157
+ task.error = error
158
+ self.failed_tasks += 1
159
+ elif resolved:
160
+ task.status = "resolved"
161
+ self.resolved_tasks += 1
162
+ else:
163
+ task.status = "failed"
164
+ self.failed_tasks += 1
165
+
166
+ self.completed_tasks += 1
167
+
168
+ # Clear current task if it matches
169
+ if self.current_task_id == instance_id:
170
+ self.current_task_id = None
171
+
172
+ def get_resolution_rate(self) -> float:
173
+ """Return the current resolution rate as a fraction (0.0 -- 1.0).
174
+
175
+ Returns:
176
+ Resolved tasks divided by completed tasks, or 0.0 if none completed.
177
+ """
178
+ with self._lock:
179
+ if self.completed_tasks == 0:
180
+ return 0.0
181
+ return self.resolved_tasks / self.completed_tasks
182
+
183
+ def get_eta_seconds(self) -> float | None:
184
+ """Estimate remaining seconds based on average task completion time.
185
+
186
+ Returns:
187
+ Estimated seconds remaining, or ``None`` if no tasks have completed.
188
+ """
189
+ with self._lock:
190
+ if self.completed_tasks == 0:
191
+ return None
192
+ elapsed = time.time() - self.start_time
193
+ avg_per_task = elapsed / self.completed_tasks
194
+ remaining_tasks = self.total_tasks - self.completed_tasks
195
+ return avg_per_task * remaining_tasks
196
+
197
+ def to_dict(self) -> dict[str, Any]:
198
+ """Serialize the full dashboard state to a JSON-safe dictionary.
199
+
200
+ Returns:
201
+ Dictionary representation of the dashboard state.
202
+ """
203
+ with self._lock:
204
+ eta = self._get_eta_seconds_unlocked()
205
+ return {
206
+ "total_tasks": self.total_tasks,
207
+ "completed_tasks": self.completed_tasks,
208
+ "resolved_tasks": self.resolved_tasks,
209
+ "failed_tasks": self.failed_tasks,
210
+ "current_task_id": self.current_task_id,
211
+ "resolution_rate": (
212
+ self.resolved_tasks / self.completed_tasks if self.completed_tasks > 0 else 0.0
213
+ ),
214
+ "eta_seconds": eta,
215
+ "elapsed_seconds": time.time() - self.start_time,
216
+ "is_paused": self.is_paused,
217
+ "is_cancelled": self.is_cancelled,
218
+ "task_results": [t.to_dict() for t in self.task_results],
219
+ }
220
+
221
+ def _get_eta_seconds_unlocked(self) -> float | None:
222
+ """Estimate remaining seconds (caller must hold _lock).
223
+
224
+ Returns:
225
+ Estimated seconds remaining, or ``None`` if no tasks have completed.
226
+ """
227
+ if self.completed_tasks == 0:
228
+ return None
229
+ elapsed = time.time() - self.start_time
230
+ avg_per_task = elapsed / self.completed_tasks
231
+ remaining_tasks = self.total_tasks - self.completed_tasks
232
+ return avg_per_task * remaining_tasks
233
+
234
+ # -- Internal helpers --------------------------------------------------
235
+
236
+ def _find_or_create_task(self, instance_id: str) -> TaskStatus:
237
+ """Retrieve an existing TaskStatus or create a new one.
238
+
239
+ Args:
240
+ instance_id: The task's unique identifier.
241
+
242
+ Returns:
243
+ The matching :class:`TaskStatus` object.
244
+ """
245
+ for task in self.task_results:
246
+ if task.instance_id == instance_id:
247
+ return task
248
+ task = TaskStatus(instance_id=instance_id)
249
+ self.task_results.append(task)
250
+ return task
251
+
252
+
253
+ # ---------------------------------------------------------------------------
254
+ # HTML dashboard template
255
+ # ---------------------------------------------------------------------------
256
+
257
+ DASHBOARD_HTML = """<!DOCTYPE html>
258
+ <html lang="en">
259
+ <head>
260
+ <meta charset="utf-8">
261
+ <meta name="viewport" content="width=device-width, initial-scale=1">
262
+ <title>mcpbr Dashboard</title>
263
+ <style>
264
+ :root { --bg: #0f172a; --card: #1e293b; --text: #e2e8f0; --accent: #38bdf8;
265
+ --green: #4ade80; --red: #f87171; --yellow: #fbbf24; --border: #334155; }
266
+ * { box-sizing: border-box; margin: 0; padding: 0; }
267
+ body { font-family: system-ui, -apple-system, sans-serif; background: var(--bg);
268
+ color: var(--text); padding: 1.5rem; }
269
+ h1 { font-size: 1.5rem; margin-bottom: 1rem; color: var(--accent); }
270
+ .grid { display: grid; grid-template-columns: repeat(auto-fit, minmax(180px, 1fr));
271
+ gap: 1rem; margin-bottom: 1.5rem; }
272
+ .card { background: var(--card); border: 1px solid var(--border); border-radius: 0.5rem;
273
+ padding: 1rem; }
274
+ .card .label { font-size: 0.75rem; text-transform: uppercase; color: #94a3b8;
275
+ margin-bottom: 0.25rem; }
276
+ .card .value { font-size: 1.5rem; font-weight: 700; }
277
+ .progress-bar { width: 100%; height: 0.5rem; background: var(--border);
278
+ border-radius: 0.25rem; overflow: hidden; margin: 0.5rem 0; }
279
+ .progress-fill { height: 100%; background: var(--accent); transition: width 0.3s; }
280
+ .controls { display: flex; gap: 0.5rem; margin-bottom: 1.5rem; }
281
+ .controls button { padding: 0.5rem 1rem; border: none; border-radius: 0.25rem;
282
+ cursor: pointer; font-weight: 600; font-size: 0.875rem; }
283
+ .btn-pause { background: var(--yellow); color: #000; }
284
+ .btn-resume { background: var(--green); color: #000; }
285
+ .btn-cancel { background: var(--red); color: #fff; }
286
+ table { width: 100%; border-collapse: collapse; font-size: 0.875rem; }
287
+ th, td { text-align: left; padding: 0.5rem 0.75rem; border-bottom: 1px solid var(--border); }
288
+ th { color: #94a3b8; text-transform: uppercase; font-size: 0.75rem; }
289
+ .status-resolved { color: var(--green); }
290
+ .status-failed { color: var(--red); }
291
+ .status-running { color: var(--yellow); }
292
+ .status-pending { color: #64748b; }
293
+ #connection { font-size: 0.75rem; color: #64748b; margin-bottom: 1rem; }
294
+ </style>
295
+ </head>
296
+ <body>
297
+ <h1>mcpbr Evaluation Dashboard</h1>
298
+ <div id="connection">Connecting...</div>
299
+
300
+ <div class="grid">
301
+ <div class="card"><div class="label">Completed</div>
302
+ <div class="value" id="completed">0 / 0</div></div>
303
+ <div class="card"><div class="label">Resolution Rate</div>
304
+ <div class="value" id="rate">0.0%</div></div>
305
+ <div class="card"><div class="label">ETA</div>
306
+ <div class="value" id="eta">--</div></div>
307
+ <div class="card"><div class="label">Elapsed</div>
308
+ <div class="value" id="elapsed">0s</div></div>
309
+ <div class="card"><div class="label">Status</div>
310
+ <div class="value" id="run-status">Running</div></div>
311
+ </div>
312
+
313
+ <div class="progress-bar"><div class="progress-fill" id="progress" style="width:0%"></div></div>
314
+
315
+ <div class="controls">
316
+ <button class="btn-pause" onclick="sendControl('pause')">Pause</button>
317
+ <button class="btn-resume" onclick="sendControl('resume')">Resume</button>
318
+ <button class="btn-cancel" onclick="sendControl('cancel')">Cancel</button>
319
+ </div>
320
+
321
+ <table>
322
+ <thead><tr><th>Instance ID</th><th>Status</th><th>Duration</th><th>Error</th></tr></thead>
323
+ <tbody id="tasks"></tbody>
324
+ </table>
325
+
326
+ <script>
327
+ let ws;
328
+ function connect() {
329
+ const loc = window.location;
330
+ const proto = loc.protocol === "https:" ? "wss:" : "ws:";
331
+ ws = new WebSocket(proto + "//" + loc.host + "/ws");
332
+ ws.onopen = () => {
333
+ document.getElementById("connection").textContent = "Connected (live)";
334
+ };
335
+ ws.onclose = () => {
336
+ document.getElementById("connection").textContent = "Disconnected - reconnecting...";
337
+ setTimeout(connect, 2000);
338
+ };
339
+ ws.onmessage = (evt) => {
340
+ const data = JSON.parse(evt.data);
341
+ update(data);
342
+ };
343
+ }
344
+
345
+ function update(d) {
346
+ document.getElementById("completed").textContent = d.completed_tasks + " / " + d.total_tasks;
347
+ document.getElementById("rate").textContent = (d.resolution_rate * 100).toFixed(1) + "%";
348
+ document.getElementById("elapsed").textContent = fmtTime(d.elapsed_seconds);
349
+ document.getElementById("eta").textContent = d.eta_seconds != null ? fmtTime(d.eta_seconds) : "--";
350
+ const pct = d.total_tasks > 0 ? (d.completed_tasks / d.total_tasks * 100) : 0;
351
+ document.getElementById("progress").style.width = pct + "%";
352
+
353
+ let status = "Running";
354
+ if (d.is_cancelled) status = "Cancelled";
355
+ else if (d.is_paused) status = "Paused";
356
+ document.getElementById("run-status").textContent = status;
357
+
358
+ const tbody = document.getElementById("tasks");
359
+ tbody.innerHTML = "";
360
+ (d.task_results || []).forEach(t => {
361
+ const tr = document.createElement("tr");
362
+ const dur = (t.started_at && t.finished_at)
363
+ ? fmtTime(t.finished_at - t.started_at)
364
+ : (t.started_at ? "running..." : "-");
365
+ const tdId = document.createElement("td");
366
+ tdId.textContent = t.instance_id;
367
+ const tdStatus = document.createElement("td");
368
+ tdStatus.textContent = t.status;
369
+ tdStatus.className = "status-" + t.status;
370
+ const tdDur = document.createElement("td");
371
+ tdDur.textContent = dur;
372
+ const tdErr = document.createElement("td");
373
+ tdErr.textContent = t.error || "";
374
+ tr.appendChild(tdId);
375
+ tr.appendChild(tdStatus);
376
+ tr.appendChild(tdDur);
377
+ tr.appendChild(tdErr);
378
+ tbody.appendChild(tr);
379
+ });
380
+ }
381
+
382
+ function fmtTime(s) {
383
+ if (s == null) return "--";
384
+ s = Math.round(s);
385
+ const m = Math.floor(s / 60);
386
+ const sec = s % 60;
387
+ return m > 0 ? m + "m " + sec + "s" : sec + "s";
388
+ }
389
+
390
+ function sendControl(action) {
391
+ fetch("/api/" + action, { method: "POST" })
392
+ .then(r => r.json())
393
+ .then(d => { if (d.error) alert(d.error); });
394
+ }
395
+
396
+ connect();
397
+ </script>
398
+ </body>
399
+ </html>"""
400
+
401
+
402
+ # ---------------------------------------------------------------------------
403
+ # FastAPI application factory
404
+ # ---------------------------------------------------------------------------
405
+
406
+
407
+ def _check_dependencies() -> None:
408
+ """Raise ``ImportError`` if required optional dependencies are missing."""
409
+ missing = []
410
+ if not HAS_FASTAPI:
411
+ missing.append("fastapi")
412
+ if not HAS_UVICORN:
413
+ missing.append("uvicorn")
414
+ if missing:
415
+ raise ImportError(
416
+ f"Dashboard requires optional dependencies: {', '.join(missing)}. "
417
+ f"Install them with: pip install {' '.join(missing)}"
418
+ )
419
+
420
+
421
+ def create_app(state: DashboardState) -> "FastAPI":
422
+ """Build and return a configured FastAPI application.
423
+
424
+ Args:
425
+ state: Shared dashboard state that will be read / mutated by endpoints.
426
+
427
+ Returns:
428
+ A :class:`FastAPI` instance with all routes registered.
429
+
430
+ Raises:
431
+ ImportError: If ``fastapi`` is not installed.
432
+ """
433
+ _check_dependencies()
434
+
435
+ app = FastAPI(title="mcpbr Dashboard", docs_url=None, redoc_url=None)
436
+ connected_websockets: list[WebSocket] = []
437
+
438
+ # -- HTML page ---------------------------------------------------------
439
+
440
+ @app.get("/", response_class=HTMLResponse)
441
+ async def index() -> HTMLResponse:
442
+ """Serve the single-page dashboard HTML."""
443
+ return HTMLResponse(content=DASHBOARD_HTML)
444
+
445
+ # -- REST API ----------------------------------------------------------
446
+
447
+ @app.get("/api/status", response_class=JSONResponse)
448
+ async def api_status() -> JSONResponse:
449
+ """Return current evaluation state as JSON."""
450
+ return JSONResponse(content=state.to_dict())
451
+
452
+ @app.post("/api/pause", response_class=JSONResponse)
453
+ async def api_pause() -> JSONResponse:
454
+ """Pause the evaluation loop."""
455
+ if state.is_cancelled:
456
+ return JSONResponse(
457
+ content={"error": "Evaluation is already cancelled."},
458
+ status_code=409,
459
+ )
460
+ state.is_paused = True
461
+ await _broadcast(state.to_dict(), connected_websockets)
462
+ return JSONResponse(content={"status": "paused"})
463
+
464
+ @app.post("/api/resume", response_class=JSONResponse)
465
+ async def api_resume() -> JSONResponse:
466
+ """Resume a paused evaluation."""
467
+ if state.is_cancelled:
468
+ return JSONResponse(
469
+ content={"error": "Evaluation is already cancelled."},
470
+ status_code=409,
471
+ )
472
+ state.is_paused = False
473
+ await _broadcast(state.to_dict(), connected_websockets)
474
+ return JSONResponse(content={"status": "resumed"})
475
+
476
+ @app.post("/api/cancel", response_class=JSONResponse)
477
+ async def api_cancel() -> JSONResponse:
478
+ """Cancel the evaluation."""
479
+ state.is_cancelled = True
480
+ state.is_paused = False
481
+ await _broadcast(state.to_dict(), connected_websockets)
482
+ return JSONResponse(content={"status": "cancelled"})
483
+
484
+ # -- WebSocket ---------------------------------------------------------
485
+
486
+ @app.websocket("/ws")
487
+ async def websocket_endpoint(websocket: WebSocket) -> None:
488
+ """Handle a WebSocket connection for live state updates."""
489
+ await websocket.accept()
490
+ connected_websockets.append(websocket)
491
+ try:
492
+ # Send initial state immediately
493
+ await websocket.send_text(json.dumps(state.to_dict()))
494
+ # Keep connection alive and push updates periodically
495
+ while True:
496
+ await asyncio.sleep(1)
497
+ await websocket.send_text(json.dumps(state.to_dict()))
498
+ except WebSocketDisconnect:
499
+ pass
500
+ except Exception:
501
+ logger.debug("WebSocket connection closed unexpectedly.")
502
+ finally:
503
+ if websocket in connected_websockets:
504
+ connected_websockets.remove(websocket)
505
+
506
+ # Expose internals for testing
507
+ app.state.connected_websockets = connected_websockets # type: ignore[attr-defined]
508
+ app.state.dashboard_state = state # type: ignore[attr-defined]
509
+
510
+ return app
511
+
512
+
513
+ async def _broadcast(data: dict[str, Any], websockets: list[Any]) -> None:
514
+ """Send *data* to every connected WebSocket, removing dead connections.
515
+
516
+ Args:
517
+ data: JSON-serializable dictionary to send.
518
+ websockets: Mutable list of active WebSocket connections.
519
+ """
520
+ payload = json.dumps(data)
521
+ dead: list[Any] = []
522
+ for ws in websockets:
523
+ try:
524
+ await ws.send_text(payload)
525
+ except Exception:
526
+ dead.append(ws)
527
+ for ws in dead:
528
+ websockets.remove(ws)
529
+
530
+
531
+ # ---------------------------------------------------------------------------
532
+ # Server wrapper
533
+ # ---------------------------------------------------------------------------
534
+
535
+
536
+ class DashboardServer:
537
+ """Manages the lifecycle of the dashboard web server.
538
+
539
+ The server runs in a background daemon thread so that it does not block
540
+ the evaluation loop.
541
+
542
+ Args:
543
+ state: Shared :class:`DashboardState` updated by the harness.
544
+ host: Bind address. Defaults to ``"127.0.0.1"``.
545
+ port: TCP port. Defaults to ``8080``.
546
+
547
+ Raises:
548
+ ImportError: If ``fastapi`` or ``uvicorn`` are not installed.
549
+
550
+ Example::
551
+
552
+ state = DashboardState(total_tasks=50)
553
+ server = DashboardServer(state)
554
+ server.start() # non-blocking
555
+ # ... run evaluation ...
556
+ server.stop()
557
+ """
558
+
559
+ def __init__(
560
+ self,
561
+ state: DashboardState,
562
+ host: str = "127.0.0.1",
563
+ port: int = 8080,
564
+ ) -> None:
565
+ _check_dependencies()
566
+ self.state = state
567
+ self.host = host
568
+ self.port = port
569
+ self.app = create_app(state)
570
+ self._server: uvicorn.Server | None = None # type: ignore[name-defined]
571
+ self._thread: threading.Thread | None = None
572
+
573
+ def start(self) -> None:
574
+ """Start the dashboard server in a background thread.
575
+
576
+ The thread is a daemon so it will not prevent process exit.
577
+ """
578
+ config = uvicorn.Config( # type: ignore[name-defined]
579
+ app=self.app,
580
+ host=self.host,
581
+ port=self.port,
582
+ log_level="warning",
583
+ loop="asyncio",
584
+ )
585
+ self._server = uvicorn.Server(config) # type: ignore[name-defined]
586
+ self._thread = threading.Thread(target=self._server.run, daemon=True)
587
+ self._thread.start()
588
+ logger.info("Dashboard started at http://%s:%s", self.host, self.port)
589
+
590
+ def stop(self) -> None:
591
+ """Signal the server to shut down and wait for the thread to finish."""
592
+ if self._server is not None:
593
+ self._server.should_exit = True
594
+ if self._thread is not None:
595
+ self._thread.join(timeout=5)
596
+ self._thread = None
597
+ self._server = None
598
+ logger.info("Dashboard stopped.")
599
+
600
+ @property
601
+ def is_running(self) -> bool:
602
+ """Return ``True`` if the background server thread is alive."""
603
+ return self._thread is not None and self._thread.is_alive()
604
+
605
+ def update_task(
606
+ self,
607
+ instance_id: str,
608
+ *,
609
+ resolved: bool = False,
610
+ error: str | None = None,
611
+ ) -> None:
612
+ """Convenience proxy to :meth:`DashboardState.update_task`.
613
+
614
+ Args:
615
+ instance_id: The task's unique identifier.
616
+ resolved: Whether the task resolved successfully.
617
+ error: An error message if the task failed.
618
+ """
619
+ self.state.update_task(instance_id, resolved=resolved, error=error)