selfevals 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. selfevals/.agents/skills/error-analysis/SKILL.md +149 -0
  2. selfevals/__init__.py +19 -0
  3. selfevals/_errors.py +44 -0
  4. selfevals/_internal/__init__.py +0 -0
  5. selfevals/_internal/hashing.py +23 -0
  6. selfevals/_internal/ids.py +65 -0
  7. selfevals/_internal/time.py +17 -0
  8. selfevals/analysis/__init__.py +23 -0
  9. selfevals/analysis/bundle.py +162 -0
  10. selfevals/analysis/hypothesis.py +26 -0
  11. selfevals/analysis/ingest.py +185 -0
  12. selfevals/analysis/schemas.py +119 -0
  13. selfevals/analysis/staging.py +34 -0
  14. selfevals/api/__init__.py +24 -0
  15. selfevals/api/__main__.py +47 -0
  16. selfevals/api/app.py +351 -0
  17. selfevals/api/broker.py +210 -0
  18. selfevals/api/broker_bridge.py +29 -0
  19. selfevals/api/queries.py +447 -0
  20. selfevals/api/schemas.py +151 -0
  21. selfevals/api/sse.py +114 -0
  22. selfevals/cli/__init__.py +15 -0
  23. selfevals/cli/_friendly.py +180 -0
  24. selfevals/cli/_help.py +55 -0
  25. selfevals/cli/analyze_commands.py +169 -0
  26. selfevals/cli/commands.py +615 -0
  27. selfevals/cli/main.py +409 -0
  28. selfevals/decision/__init__.py +34 -0
  29. selfevals/decision/matrix.py +185 -0
  30. selfevals/examples/__init__.py +8 -0
  31. selfevals/examples/evals/datasets/pingpong.jsonl +2 -0
  32. selfevals/examples/evals/experiments/example_pingpong.yaml +58 -0
  33. selfevals/examples/pingpong.py +21 -0
  34. selfevals/graders/__init__.py +46 -0
  35. selfevals/graders/base.py +54 -0
  36. selfevals/graders/calibration.py +145 -0
  37. selfevals/graders/deterministic.py +143 -0
  38. selfevals/graders/llm_judge.py +187 -0
  39. selfevals/graders/registry.py +66 -0
  40. selfevals/optimization/__init__.py +47 -0
  41. selfevals/optimization/aggregator.py +246 -0
  42. selfevals/optimization/loop.py +432 -0
  43. selfevals/optimization/proposers.py +202 -0
  44. selfevals/py.typed +0 -0
  45. selfevals/repo/__init__.py +28 -0
  46. selfevals/repo/loader.py +276 -0
  47. selfevals/reporter/__init__.py +21 -0
  48. selfevals/reporter/_metrics.py +114 -0
  49. selfevals/reporter/compare.py +221 -0
  50. selfevals/reporter/json_report.py +105 -0
  51. selfevals/reporter/markdown.py +232 -0
  52. selfevals/runner/__init__.py +42 -0
  53. selfevals/runner/adapters.py +268 -0
  54. selfevals/runner/executor.py +234 -0
  55. selfevals/runner/otlp_receiver.py +343 -0
  56. selfevals/runner/otlp_to_recorder.py +180 -0
  57. selfevals/runner/sandbox.py +46 -0
  58. selfevals/schemas/__init__.py +213 -0
  59. selfevals/schemas/_base.py +82 -0
  60. selfevals/schemas/annotation.py +55 -0
  61. selfevals/schemas/dataset.py +111 -0
  62. selfevals/schemas/enums.py +324 -0
  63. selfevals/schemas/eval_case.py +189 -0
  64. selfevals/schemas/experiment.py +367 -0
  65. selfevals/schemas/failure_mode.py +76 -0
  66. selfevals/schemas/fleet.py +111 -0
  67. selfevals/schemas/grader_card.py +112 -0
  68. selfevals/schemas/iteration.py +219 -0
  69. selfevals/schemas/registry.py +125 -0
  70. selfevals/schemas/tool.py +43 -0
  71. selfevals/schemas/trace.py +384 -0
  72. selfevals/schemas/workspace.py +69 -0
  73. selfevals/sdk/__init__.py +24 -0
  74. selfevals/sdk/auto_instrument.py +165 -0
  75. selfevals/sdk/context.py +45 -0
  76. selfevals/sdk/exporter.py +50 -0
  77. selfevals/sdk/facade.py +203 -0
  78. selfevals/skills/__init__.py +61 -0
  79. selfevals/storage/__init__.py +53 -0
  80. selfevals/storage/errors.py +66 -0
  81. selfevals/storage/filesystem.py +137 -0
  82. selfevals/storage/interface.py +135 -0
  83. selfevals/storage/migrations/__init__.py +80 -0
  84. selfevals/storage/migrations/m0001_initial.py +57 -0
  85. selfevals/storage/seed.py +199 -0
  86. selfevals/storage/sqlite.py +232 -0
  87. selfevals/trace/__init__.py +31 -0
  88. selfevals/trace/otel_importer.py +455 -0
  89. selfevals/trace/payload_router.py +106 -0
  90. selfevals/trace/recorder.py +540 -0
  91. selfevals/version.py +1 -0
  92. selfevals-0.2.2.dist-info/METADATA +283 -0
  93. selfevals-0.2.2.dist-info/RECORD +96 -0
  94. selfevals-0.2.2.dist-info/WHEEL +4 -0
  95. selfevals-0.2.2.dist-info/entry_points.txt +2 -0
  96. selfevals-0.2.2.dist-info/licenses/LICENSE +17 -0
selfevals/api/app.py ADDED
@@ -0,0 +1,351 @@
1
+ """FastAPI app — read-mostly HTTP bridge over the SQLite store.
2
+
3
+ Mounted on `/` (no version prefix; this is a single internal service).
4
+ Endpoints map 1:1 to the pages of the web UI; payload shapes match
5
+ the existing Pydantic models so the web side can validate against the
6
+ same canonical JSON.
7
+
8
+ Auth: stubbed via a single `X-SelfEvals-User` header (default
9
+ `"local"`). Real auth lands later; everything else is forward-compat.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import asyncio
15
+ import os
16
+ from collections.abc import AsyncIterator
17
+ from contextlib import asynccontextmanager
18
+ from pathlib import Path
19
+ from typing import Annotated, Any
20
+
21
+ from fastapi import Depends, FastAPI, Header, HTTPException, Query
22
+ from fastapi.middleware.cors import CORSMiddleware
23
+ from fastapi.responses import StreamingResponse
24
+
25
+ from selfevals.api.broker import get_broker
26
+ from selfevals.api.queries import (
27
+ AnchorPoint,
28
+ anchor_set_history,
29
+ experiment_decisions,
30
+ experiment_detail,
31
+ experiment_iterations,
32
+ iteration_detail,
33
+ list_experiments,
34
+ list_workspaces,
35
+ load_thread,
36
+ load_trace,
37
+ workspace_detail,
38
+ )
39
+ from selfevals.api.schemas import (
40
+ CreateWorkspaceRequest,
41
+ ExperimentDetailResponse,
42
+ HealthResponse,
43
+ IterationListResponse,
44
+ ThreadResponse,
45
+ TraceResponse,
46
+ WorkspaceListResponse,
47
+ WorkspaceResponse,
48
+ )
49
+ from selfevals.api.sse import stream_trace
50
+ from selfevals.storage.sqlite import SQLiteStorage
51
+
52
+ DEFAULT_DB_PATH = "./selfevals.sqlite"
53
+ _USER_HEADER = "X-SelfEvals-User"
54
+
55
+ UserHeader = Annotated[
56
+ str | None,
57
+ Header(alias=_USER_HEADER, description="Stubbed user id (auth is post-MVP)."),
58
+ ]
59
+
60
+
61
+ def _resolve_db_path(db_path: str | None) -> str:
62
+ return db_path or os.environ.get("SELFEVALS_DB", DEFAULT_DB_PATH)
63
+
64
+
65
+ def build_app(*, db_path: str | None = None) -> FastAPI:
66
+ """Construct the FastAPI app, parameterized on the SQLite db path."""
67
+ resolved = _resolve_db_path(db_path)
68
+ Path(resolved).parent.mkdir(parents=True, exist_ok=True)
69
+
70
+ @asynccontextmanager
71
+ async def lifespan(_app: FastAPI) -> AsyncIterator[None]:
72
+ # Capture the running event loop so the OTLP receiver thread
73
+ # (which runs sync) can schedule span publishes onto it.
74
+ get_broker().bind_loop(asyncio.get_running_loop())
75
+ yield
76
+
77
+ app = FastAPI(
78
+ title="selfevals",
79
+ description="HTTP bridge for the selfevals evals framework.",
80
+ version="0.0.1",
81
+ docs_url="/api/docs",
82
+ redoc_url=None,
83
+ openapi_url="/api/openapi.json",
84
+ lifespan=lifespan,
85
+ )
86
+
87
+ app.add_middleware(
88
+ CORSMiddleware,
89
+ allow_origins=["http://localhost:5173", "http://127.0.0.1:5173"],
90
+ allow_credentials=False,
91
+ allow_methods=["GET", "POST", "OPTIONS"],
92
+ allow_headers=["*"],
93
+ )
94
+
95
+ def _storage() -> SQLiteStorage:
96
+ return SQLiteStorage(resolved)
97
+
98
+ def _storage_factory() -> SQLiteStorage:
99
+ return SQLiteStorage(resolved)
100
+
101
+ @app.get("/api/health", response_model=HealthResponse, tags=["meta"])
102
+ def health() -> HealthResponse:
103
+ return HealthResponse(status="ok", db_path=resolved)
104
+
105
+ @app.get(
106
+ "/api/workspaces",
107
+ response_model=WorkspaceListResponse,
108
+ tags=["workspaces"],
109
+ )
110
+ def workspaces_index(
111
+ storage: SQLiteStorage = Depends(_storage),
112
+ _user: UserHeader = None,
113
+ ) -> WorkspaceListResponse:
114
+ try:
115
+ return WorkspaceListResponse(workspaces=list_workspaces(storage))
116
+ finally:
117
+ storage.close()
118
+
119
+ @app.get(
120
+ "/api/workspaces/{workspace_id}",
121
+ response_model=WorkspaceResponse,
122
+ tags=["workspaces"],
123
+ )
124
+ def workspaces_show(
125
+ workspace_id: str,
126
+ storage: SQLiteStorage = Depends(_storage),
127
+ _user: UserHeader = None,
128
+ ) -> WorkspaceResponse:
129
+ try:
130
+ ws = workspace_detail(storage, workspace_id=workspace_id)
131
+ if ws is None:
132
+ raise HTTPException(status_code=404, detail="workspace not found")
133
+ return ws
134
+ finally:
135
+ storage.close()
136
+
137
+ @app.post(
138
+ "/api/workspaces",
139
+ response_model=WorkspaceResponse,
140
+ status_code=201,
141
+ tags=["workspaces"],
142
+ )
143
+ def workspaces_create(
144
+ body: CreateWorkspaceRequest,
145
+ storage: SQLiteStorage = Depends(_storage),
146
+ user: UserHeader = None,
147
+ ) -> WorkspaceResponse:
148
+ from selfevals.storage.seed import seed_workspace
149
+
150
+ try:
151
+ seeded = seed_workspace(
152
+ storage,
153
+ slug=body.slug,
154
+ name=body.name or body.slug,
155
+ user_id=user or "local",
156
+ description=body.description,
157
+ )
158
+ ws = seeded.workspace
159
+ return WorkspaceResponse(
160
+ id=ws.id,
161
+ slug=ws.slug,
162
+ name=ws.name,
163
+ description=ws.description,
164
+ owner_id=ws.owner_id,
165
+ created_at=ws.created_at,
166
+ experiment_count=0,
167
+ recent_health=None,
168
+ )
169
+ finally:
170
+ storage.close()
171
+
172
+ @app.get(
173
+ "/api/workspaces/{workspace_id}/experiments",
174
+ response_model=list[dict[str, Any]],
175
+ tags=["experiments"],
176
+ )
177
+ def experiments_index(
178
+ workspace_id: str,
179
+ storage: SQLiteStorage = Depends(_storage),
180
+ limit: Annotated[int, Query(ge=1, le=500)] = 100,
181
+ _user: UserHeader = None,
182
+ ) -> list[dict[str, Any]]:
183
+ try:
184
+ return list_experiments(storage, workspace_id=workspace_id, limit=limit)
185
+ finally:
186
+ storage.close()
187
+
188
+ @app.get(
189
+ "/api/workspaces/{workspace_id}/experiments/{experiment_id}",
190
+ response_model=ExperimentDetailResponse,
191
+ tags=["experiments"],
192
+ )
193
+ def experiments_show(
194
+ workspace_id: str,
195
+ experiment_id: str,
196
+ storage: SQLiteStorage = Depends(_storage),
197
+ _user: UserHeader = None,
198
+ ) -> ExperimentDetailResponse:
199
+ try:
200
+ detail = experiment_detail(
201
+ storage,
202
+ workspace_id=workspace_id,
203
+ experiment_id=experiment_id,
204
+ )
205
+ if detail is None:
206
+ raise HTTPException(
207
+ status_code=404,
208
+ detail=f"experiment {experiment_id} not found",
209
+ )
210
+ return detail
211
+ finally:
212
+ storage.close()
213
+
214
+ @app.get(
215
+ "/api/workspaces/{workspace_id}/experiments/{experiment_id}/iterations",
216
+ response_model=IterationListResponse,
217
+ tags=["experiments"],
218
+ )
219
+ def experiments_iterations(
220
+ workspace_id: str,
221
+ experiment_id: str,
222
+ storage: SQLiteStorage = Depends(_storage),
223
+ _user: UserHeader = None,
224
+ ) -> IterationListResponse:
225
+ try:
226
+ return IterationListResponse(
227
+ iterations=experiment_iterations(
228
+ storage,
229
+ workspace_id=workspace_id,
230
+ experiment_id=experiment_id,
231
+ )
232
+ )
233
+ finally:
234
+ storage.close()
235
+
236
+ @app.get(
237
+ "/api/workspaces/{workspace_id}/experiments/{experiment_id}/decisions",
238
+ tags=["experiments"],
239
+ )
240
+ def experiments_decisions(
241
+ workspace_id: str,
242
+ experiment_id: str,
243
+ storage: SQLiteStorage = Depends(_storage),
244
+ _user: UserHeader = None,
245
+ ) -> list[dict[str, Any]]:
246
+ try:
247
+ return experiment_decisions(
248
+ storage,
249
+ workspace_id=workspace_id,
250
+ experiment_id=experiment_id,
251
+ )
252
+ finally:
253
+ storage.close()
254
+
255
+ @app.get(
256
+ "/api/workspaces/{workspace_id}/iterations/{iteration_id}",
257
+ tags=["experiments"],
258
+ )
259
+ def iterations_show(
260
+ workspace_id: str,
261
+ iteration_id: str,
262
+ storage: SQLiteStorage = Depends(_storage),
263
+ _user: UserHeader = None,
264
+ ) -> dict[str, Any]:
265
+ try:
266
+ detail = iteration_detail(
267
+ storage,
268
+ workspace_id=workspace_id,
269
+ iteration_id=iteration_id,
270
+ )
271
+ if detail is None:
272
+ raise HTTPException(status_code=404, detail="iteration not found")
273
+ return detail
274
+ finally:
275
+ storage.close()
276
+
277
+ @app.get(
278
+ "/api/workspaces/{workspace_id}/traces/{trace_id}",
279
+ response_model=TraceResponse,
280
+ tags=["traces"],
281
+ )
282
+ def traces_show(
283
+ workspace_id: str,
284
+ trace_id: str,
285
+ storage: SQLiteStorage = Depends(_storage),
286
+ _user: UserHeader = None,
287
+ ) -> TraceResponse:
288
+ try:
289
+ trace = load_trace(storage, workspace_id=workspace_id, trace_id=trace_id)
290
+ if trace is None:
291
+ raise HTTPException(status_code=404, detail="trace not found")
292
+ return trace
293
+ finally:
294
+ storage.close()
295
+
296
+ @app.get(
297
+ "/api/workspaces/{workspace_id}/threads/{thread_id}",
298
+ response_model=ThreadResponse,
299
+ tags=["traces"],
300
+ )
301
+ def threads_show(
302
+ workspace_id: str,
303
+ thread_id: str,
304
+ storage: SQLiteStorage = Depends(_storage),
305
+ _user: UserHeader = None,
306
+ ) -> ThreadResponse:
307
+ try:
308
+ thread = load_thread(storage, workspace_id=workspace_id, thread_id=thread_id)
309
+ if thread is None:
310
+ raise HTTPException(status_code=404, detail="thread not found")
311
+ return thread
312
+ finally:
313
+ storage.close()
314
+
315
+ @app.get("/api/runs/active", tags=["traces"])
316
+ def runs_active(_user: UserHeader = None) -> list[dict[str, str]]:
317
+ return [{"workspace_id": ws, "run_id": run} for (ws, run) in get_broker().active_runs()]
318
+
319
+ @app.get(
320
+ "/api/workspaces/{workspace_id}/traces/{run_id}/stream",
321
+ tags=["traces"],
322
+ response_class=StreamingResponse,
323
+ )
324
+ async def traces_stream(
325
+ workspace_id: str,
326
+ run_id: str,
327
+ _user: UserHeader = None,
328
+ ) -> StreamingResponse:
329
+ return await stream_trace(
330
+ workspace_id=workspace_id,
331
+ run_id=run_id,
332
+ broker=get_broker(),
333
+ storage_factory=_storage_factory,
334
+ )
335
+
336
+ @app.get(
337
+ "/api/workspaces/{workspace_id}/anchor-set",
338
+ response_model=list[AnchorPoint],
339
+ tags=["anchor-set"],
340
+ )
341
+ def anchor_set(
342
+ workspace_id: str,
343
+ storage: SQLiteStorage = Depends(_storage),
344
+ _user: UserHeader = None,
345
+ ) -> list[AnchorPoint]:
346
+ try:
347
+ return anchor_set_history(storage, workspace_id=workspace_id)
348
+ finally:
349
+ storage.close()
350
+
351
+ return app
@@ -0,0 +1,210 @@
1
+ """In-process pub/sub for live trace streaming.
2
+
3
+ The OTLP receiver thread calls `publish_threadsafe()` when a span lands.
4
+ FastAPI SSE handlers call `subscribe()` to get an async generator of
5
+ events for a given `(workspace_id, run_id)` pair.
6
+
7
+ Why this lives in `selfevals.api`: the broker is a *transport* concern,
8
+ not a capture concern. The receiver doesn't know what it's for; it
9
+ just calls a callback. Coupling the broker to the API package keeps
10
+ the capture pipeline import-graph clean (the SDK / OTLP receiver
11
+ don't need to know FastAPI exists).
12
+
13
+ Scaling note: this is a single-process in-memory broker. The contract
14
+ (`publish` + `subscribe`) is intentionally narrow so a Redis-backed
15
+ implementation can drop in later without touching callers.
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import asyncio
21
+ import logging
22
+ from collections.abc import AsyncIterator
23
+ from contextlib import suppress
24
+ from dataclasses import dataclass, field
25
+ from typing import Any
26
+
27
+ logger = logging.getLogger(__name__)
28
+
29
+
30
+ # Sentinel objects on the queue — clearer than magic dicts.
31
+ @dataclass(frozen=True)
32
+ class _Closed:
33
+ final_state: str = "completed"
34
+
35
+
36
+ _QUEUE_MAXSIZE = 256
37
+ """Per-subscriber queue depth. If we exceed it, the slowest subscriber
38
+ gets disconnected — it's the wrong behaviour to backpressure the
39
+ receiver thread for one stuck browser tab."""
40
+
41
+
42
+ @dataclass
43
+ class _Subscriber:
44
+ queue: asyncio.Queue[dict[str, Any] | _Closed]
45
+ workspace_id: str
46
+ run_id: str
47
+ closed: bool = False
48
+
49
+
50
+ @dataclass
51
+ class _Channel:
52
+ """All subscribers for one (workspace_id, run_id) pair."""
53
+
54
+ subscribers: list[_Subscriber] = field(default_factory=list)
55
+ closed: bool = False
56
+ final_state: str | None = None
57
+
58
+
59
+ class SpanBroker:
60
+ """In-proc fan-out from the OTLP receiver to SSE subscribers."""
61
+
62
+ def __init__(self) -> None:
63
+ self._channels: dict[tuple[str, str], _Channel] = {}
64
+ self._lock = asyncio.Lock()
65
+ self._loop: asyncio.AbstractEventLoop | None = None
66
+
67
+ def bind_loop(self, loop: asyncio.AbstractEventLoop) -> None:
68
+ """Capture the FastAPI event loop so the receiver thread can
69
+ schedule publishes onto it. Call once at app startup."""
70
+ self._loop = loop
71
+
72
+ def active_runs(self) -> list[tuple[str, str]]:
73
+ """Snapshot of (workspace_id, run_id) channels that are open.
74
+
75
+ Used by `GET /api/runs/active` so the web shell can show a
76
+ live pill for in-flight runs. Includes runs whose channel was
77
+ opened by `mark_run_active` even before any spans arrived."""
78
+ return [(ws, run) for (ws, run), ch in self._channels.items() if not ch.closed]
79
+
80
+ def mark_run_active_threadsafe(self, workspace_id: str, run_id: str) -> None:
81
+ """Open the channel for a run before any spans flow. Lets the
82
+ web's "active runs" pill light up the moment a run starts."""
83
+ loop = self._loop
84
+ if loop is None:
85
+ return
86
+ with suppress(RuntimeError):
87
+ loop.call_soon_threadsafe(self._mark_active_sync, workspace_id, run_id)
88
+
89
+ def _mark_active_sync(self, workspace_id: str, run_id: str) -> None:
90
+ key = (workspace_id, run_id)
91
+ self._channels.setdefault(key, _Channel())
92
+
93
+ async def subscribe(
94
+ self, workspace_id: str, run_id: str
95
+ ) -> AsyncIterator[dict[str, Any] | _Closed]:
96
+ """Async-iterate events for one run. Caller is responsible for
97
+ cancelling the iteration when the client disconnects.
98
+
99
+ Note: this does NOT replay history. The SSE handler emits a
100
+ snapshot of the current Trace state *before* calling
101
+ subscribe(), so the subscriber only needs new spans from here.
102
+ """
103
+ key = (workspace_id, run_id)
104
+ sub = _Subscriber(
105
+ queue=asyncio.Queue(maxsize=_QUEUE_MAXSIZE),
106
+ workspace_id=workspace_id,
107
+ run_id=run_id,
108
+ )
109
+ async with self._lock:
110
+ channel = self._channels.setdefault(key, _Channel())
111
+ channel.subscribers.append(sub)
112
+ # If the channel is already closed, emit the close event and
113
+ # return without ever blocking.
114
+ already_closed = channel.closed
115
+ final_state = channel.final_state
116
+ if already_closed:
117
+ yield _Closed(final_state=final_state or "completed")
118
+ return
119
+ try:
120
+ while True:
121
+ event = await sub.queue.get()
122
+ if isinstance(event, _Closed):
123
+ yield event
124
+ return
125
+ yield event
126
+ finally:
127
+ sub.closed = True
128
+ async with self._lock:
129
+ ch = self._channels.get(key)
130
+ if ch is not None:
131
+ ch.subscribers = [s for s in ch.subscribers if not s.closed]
132
+ if not ch.subscribers and ch.closed:
133
+ self._channels.pop(key, None)
134
+
135
+ def publish_threadsafe(
136
+ self, workspace_id: str, run_id: str, span_payload: dict[str, Any]
137
+ ) -> None:
138
+ """Called from the OTLP receiver's background thread.
139
+
140
+ Hops onto the FastAPI event loop via call_soon_threadsafe.
141
+ If no loop is bound, drops silently — the broker is best-effort,
142
+ not the source of truth (SQLite is)."""
143
+ loop = self._loop
144
+ if loop is None:
145
+ return
146
+ # Loop may be closed during process shutdown — best-effort.
147
+ with suppress(RuntimeError):
148
+ loop.call_soon_threadsafe(self._publish_sync, workspace_id, run_id, span_payload)
149
+
150
+ def close_run_threadsafe(
151
+ self, workspace_id: str, run_id: str, final_state: str = "completed"
152
+ ) -> None:
153
+ loop = self._loop
154
+ if loop is None:
155
+ return
156
+ with suppress(RuntimeError):
157
+ loop.call_soon_threadsafe(self._close_sync, workspace_id, run_id, final_state)
158
+
159
+ def _publish_sync(self, workspace_id: str, run_id: str, span_payload: dict[str, Any]) -> None:
160
+ key = (workspace_id, run_id)
161
+ channel = self._channels.get(key)
162
+ if channel is None or channel.closed:
163
+ # No live subscribers and channel hasn't been opened — drop.
164
+ # A late subscriber will start from the SQLite snapshot.
165
+ return
166
+ for sub in list(channel.subscribers):
167
+ try:
168
+ sub.queue.put_nowait(span_payload)
169
+ except asyncio.QueueFull:
170
+ logger.warning(
171
+ "SpanBroker: dropping slow subscriber ws=%s run=%s",
172
+ workspace_id,
173
+ run_id,
174
+ )
175
+ sub.closed = True
176
+ with suppress(asyncio.QueueFull):
177
+ sub.queue.put_nowait(_Closed(final_state="disconnected"))
178
+
179
+ def _close_sync(self, workspace_id: str, run_id: str, final_state: str) -> None:
180
+ key = (workspace_id, run_id)
181
+ channel = self._channels.get(key)
182
+ if channel is None:
183
+ # Subscribers may attach later; record the closed state so
184
+ # subscribe() can emit _Closed immediately.
185
+ self._channels[key] = _Channel(closed=True, final_state=final_state)
186
+ return
187
+ channel.closed = True
188
+ channel.final_state = final_state
189
+ close_event = _Closed(final_state=final_state)
190
+ for sub in channel.subscribers:
191
+ with suppress(asyncio.QueueFull):
192
+ sub.queue.put_nowait(close_event)
193
+
194
+
195
+ # Module-level singleton bound at build_app() time.
196
+ _broker: SpanBroker | None = None
197
+
198
+
199
+ def get_broker() -> SpanBroker:
200
+ """Return the process-wide broker, lazily constructed."""
201
+ global _broker
202
+ if _broker is None:
203
+ _broker = SpanBroker()
204
+ return _broker
205
+
206
+
207
+ def reset_for_tests() -> None:
208
+ """Drop the singleton — used by test fixtures to keep state clean."""
209
+ global _broker
210
+ _broker = None
@@ -0,0 +1,29 @@
1
+ """Adapter that lets the OTLP receiver feed the SSE broker.
2
+
3
+ Lives in `selfevals.api` so the `runner/` package stays unaware that
4
+ SSE / FastAPI exist. Only `selfevals serve` imports this; CLI-only
5
+ runs never load it.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from typing import Any
11
+
12
+ from selfevals.api.broker import SpanBroker
13
+ from selfevals.runner.otlp_receiver import SpanPublisher
14
+
15
+
16
+ class BrokerPublisher(SpanPublisher):
17
+ """SpanPublisher impl that forwards to a SpanBroker."""
18
+
19
+ def __init__(self, broker: SpanBroker) -> None:
20
+ self._broker = broker
21
+
22
+ def mark_active(self, workspace_id: str, run_id: str) -> None:
23
+ self._broker.mark_run_active_threadsafe(workspace_id, run_id)
24
+
25
+ def publish(self, workspace_id: str, run_id: str, span_payload: dict[str, Any]) -> None:
26
+ self._broker.publish_threadsafe(workspace_id, run_id, span_payload)
27
+
28
+ def close(self, workspace_id: str, run_id: str, final_state: str = "completed") -> None:
29
+ self._broker.close_run_threadsafe(workspace_id, run_id, final_state)