getbased-dashboard 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,9 @@
1
+ """getbased-dashboard — web UI for getbased-agents.
2
+
3
+ Orchestration layer that sits between the browser and the rag + mcp
4
+ packages. Holds no data of its own: proxies knowledge-base operations
5
+ to rag, spawns the mcp stdio process on demand for tool discovery and
6
+ config generation, reads the mcp's activity log for the dashboard feed.
7
+ """
8
+
9
+ __version__ = "0.1.0"
@@ -0,0 +1,6 @@
1
+ """Dashboard HTTP API.
2
+
3
+ Each submodule registers its routes onto a FastAPI app via a `register()`
4
+ function. Keeps the server.py entry point readable and lets each concern
5
+ (knowledge, mcp, activity) own its own dependencies.
6
+ """
@@ -0,0 +1,153 @@
1
+ """Activity API — tail the MCP's JSONL activity log and surface the
2
+ recent records + simple aggregations.
3
+
4
+ The log is written by getbased-mcp at `$XDG_STATE_HOME/getbased/mcp/
5
+ activity.jsonl` (configurable via LENS_MCP_ACTIVITY_LOG). One record
6
+ per tool call: tool name, timestamp, duration, ok flag, error class on
7
+ failure. Args are never logged upstream so we don't have to strip them.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import json
13
+ import os
14
+ from collections import defaultdict
15
+ from pathlib import Path
16
+
17
+ from fastapi import APIRouter, FastAPI, Request
18
+
19
+ from ..config import DashboardConfig
20
+ from ..server import _require_auth
21
+
22
+
23
+ def _cfg(request: Request) -> DashboardConfig:
24
+ return request.app.state.config
25
+
26
+
27
+ # Cap on how much of the log we read per request. Users with heavy agent
28
+ # usage can accumulate megabytes quickly — loading the entire file on
29
+ # every poll is wasteful. Tailing from the end keeps the endpoint O(cap)
30
+ # regardless of how long the log has been running.
31
+ _TAIL_BYTES = 512 * 1024
32
+
33
+
34
+ def _read_records(path: Path, limit: int) -> list[dict]:
35
+ """Return up to `limit` most-recent records. If the file is under
36
+ _TAIL_BYTES read the whole thing; otherwise seek from the end. Malformed
37
+ lines (partial last write, corrupt records) are silently skipped so
38
+ one bad line can't hide the rest."""
39
+ if not path.exists():
40
+ return []
41
+ size = path.stat().st_size
42
+ try:
43
+ if size <= _TAIL_BYTES:
44
+ text = path.read_text(encoding="utf-8", errors="replace")
45
+ else:
46
+ with path.open("rb") as f:
47
+ f.seek(size - _TAIL_BYTES)
48
+ # Drop the first (likely partial) line so we don't parse
49
+ # garbage. There will always be a complete line after the
50
+ # first newline we find, assuming writers use line-atomic
51
+ # append — which Python's text-mode write does.
52
+ f.readline()
53
+ text = f.read().decode("utf-8", errors="replace")
54
+ except OSError:
55
+ return []
56
+
57
+ records: list[dict] = []
58
+ for line in text.splitlines():
59
+ line = line.strip()
60
+ if not line:
61
+ continue
62
+ try:
63
+ rec = json.loads(line)
64
+ if isinstance(rec, dict):
65
+ records.append(rec)
66
+ except json.JSONDecodeError:
67
+ continue
68
+
69
+ return records[-limit:]
70
+
71
+
72
+ def _aggregate(records: list[dict]) -> dict:
73
+ """Per-tool counts, success rate, and P50/P95 latency. O(N log N) —
74
+ fine up to the ~thousand records our tail window holds."""
75
+ by_tool: dict[str, list[dict]] = defaultdict(list)
76
+ for r in records:
77
+ t = r.get("tool")
78
+ if isinstance(t, str):
79
+ by_tool[t].append(r)
80
+
81
+ def _percentile(sorted_vals: list[int], p: float) -> int | None:
82
+ if not sorted_vals:
83
+ return None
84
+ idx = int(p * (len(sorted_vals) - 1))
85
+ return sorted_vals[idx]
86
+
87
+ tools: list[dict] = []
88
+ for name, group in sorted(by_tool.items()):
89
+ durations = sorted(
90
+ int(r.get("duration_ms", 0)) for r in group if isinstance(r.get("duration_ms"), (int, float))
91
+ )
92
+ errors = sum(1 for r in group if not r.get("ok", True))
93
+ tools.append(
94
+ {
95
+ "tool": name,
96
+ "calls": len(group),
97
+ "errors": errors,
98
+ "error_rate": (errors / len(group)) if group else 0.0,
99
+ "p50_ms": _percentile(durations, 0.5),
100
+ "p95_ms": _percentile(durations, 0.95),
101
+ }
102
+ )
103
+
104
+ total_errors = sum(1 for r in records if not r.get("ok", True))
105
+ return {
106
+ "total_calls": len(records),
107
+ "total_errors": total_errors,
108
+ "overall_error_rate": (total_errors / len(records)) if records else 0.0,
109
+ "tools": tools,
110
+ }
111
+
112
+
113
+ def register(app: FastAPI) -> None:
114
+ router = APIRouter(prefix="/api/activity", tags=["activity"])
115
+
116
+ @router.get("")
117
+ async def activity_feed(request: Request, limit: int = 200):
118
+ cfg = _cfg(request)
119
+ _require_auth(request, cfg)
120
+ # Bound limit so a client can't ask us to return 10 million records
121
+ # in one payload. 1000 is plenty for a dashboard tick.
122
+ limit = max(1, min(1000, int(limit)))
123
+ records = _read_records(cfg.activity_log, limit)
124
+ stats = _aggregate(records)
125
+ return {
126
+ "log_path": str(cfg.activity_log),
127
+ "log_exists": cfg.activity_log.exists(),
128
+ "records": records,
129
+ "stats": stats,
130
+ }
131
+
132
+ @router.delete("")
133
+ async def clear_activity(request: Request):
134
+ """Wipe the log. Useful for resetting the dashboard's view after
135
+ a period of testing. Returns the new (empty) state so the UI can
136
+ refresh in one round-trip."""
137
+ cfg = _cfg(request)
138
+ _require_auth(request, cfg)
139
+ try:
140
+ if cfg.activity_log.exists():
141
+ os.unlink(cfg.activity_log)
142
+ except OSError:
143
+ # File may have been created by another process or removed in
144
+ # a race; either way we want to return "nothing here" state.
145
+ pass
146
+ return {
147
+ "log_path": str(cfg.activity_log),
148
+ "log_exists": False,
149
+ "records": [],
150
+ "stats": _aggregate([]),
151
+ }
152
+
153
+ app.include_router(router)
@@ -0,0 +1,367 @@
1
+ """Knowledge tab API — thin proxy to getbased-rag.
2
+
3
+ We don't replicate rag's data model here; we just forward the browser's
4
+ bearer-authed requests to rag's endpoints, with one layer of timeout +
5
+ error normalisation so the frontend sees consistent JSON shapes.
6
+
7
+ All endpoints require the dashboard bearer token (same value as rag's).
8
+ Dashboard validates the browser's token, then uses the same key to
9
+ authenticate its upstream call to rag.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import json
15
+ import os
16
+ import tempfile
17
+ from pathlib import Path
18
+
19
+ import httpx
20
+ from fastapi import APIRouter, FastAPI, File, HTTPException, Request, UploadFile
21
+ from fastapi.responses import StreamingResponse
22
+
23
+ from ..config import DashboardConfig
24
+ from ..server import _require_auth
25
+
26
+ _KNOWLEDGE_TIMEOUT = 30.0
27
+ _INGEST_TIMEOUT = 300.0 # real-model ingest can run minutes on large files
28
+
29
+ # Size cap enforced at the dashboard hop. Mirrors rag's default so users
30
+ # see one consistent ceiling regardless of which layer catches an oversize
31
+ # upload. Configurable via env so power users with large corpora can
32
+ # raise it on both services deliberately. Note: rag has its own cap that
33
+ # ultimately bounds on-disk writes — this dashboard-side cap prevents
34
+ # full buffering into RAM before we reach rag.
35
+ _MAX_INGEST_BYTES = int(
36
+ os.environ.get("DASHBOARD_MAX_INGEST_BYTES", str(256 * 1024 * 1024))
37
+ )
38
+ # Stream chunk size — 64 KB is plenty for network forwarding and keeps
39
+ # per-request memory predictable.
40
+ _STREAM_CHUNK = 64 * 1024
41
+
42
+
43
+ def _cfg(request: Request) -> DashboardConfig:
44
+ return request.app.state.config
45
+
46
+
47
+ async def _proxy_json(
48
+ request: Request,
49
+ method: str,
50
+ path: str,
51
+ json_body: dict | None = None,
52
+ timeout: float = _KNOWLEDGE_TIMEOUT,
53
+ ):
54
+ """Forward a JSON call to rag with the dashboard's bearer key.
55
+ Normalises common failure modes into FastAPI HTTPException so the
56
+ frontend sees uniform `{error: ...}` bodies."""
57
+ cfg = _cfg(request)
58
+ _require_auth(request, cfg)
59
+ key = cfg.read_api_key()
60
+ try:
61
+ async with httpx.AsyncClient(timeout=timeout) as client:
62
+ r = await client.request(
63
+ method,
64
+ f"{cfg.lens_url}{path}",
65
+ headers={"Authorization": f"Bearer {key}"},
66
+ json=json_body,
67
+ )
68
+ except httpx.ConnectError:
69
+ raise HTTPException(
70
+ status_code=502,
71
+ detail=f"rag server not reachable at {cfg.lens_url}",
72
+ )
73
+ except httpx.RequestError as e:
74
+ raise HTTPException(status_code=502, detail=f"rag request failed: {e}")
75
+ if r.status_code >= 400:
76
+ # Bubble rag's error body (already JSON with an `error` key per
77
+ # rag's exception handler) with its status code.
78
+ try:
79
+ body = r.json()
80
+ except ValueError:
81
+ body = {"error": r.text or f"rag returned {r.status_code}"}
82
+ raise HTTPException(status_code=r.status_code, detail=body.get("error") or body)
83
+ return r.json() if r.content else {}
84
+
85
+
86
+ def register(app: FastAPI) -> None:
87
+ router = APIRouter(prefix="/api/knowledge", tags=["knowledge"])
88
+
89
+ @router.get("/libraries")
90
+ async def list_libraries(request: Request):
91
+ return await _proxy_json(request, "GET", "/libraries")
92
+
93
+ @router.post("/libraries")
94
+ async def create_library(request: Request, body: dict):
95
+ # Forward the body verbatim — rag's LibraryCreateRequest handles
96
+ # validation. Wrapping with our own pydantic model would add no
97
+ # value and would drift when rag changes its schema.
98
+ return await _proxy_json(request, "POST", "/libraries", json_body=body)
99
+
100
+ @router.post("/libraries/{library_id}/activate")
101
+ async def activate_library(request: Request, library_id: str):
102
+ return await _proxy_json(
103
+ request, "POST", f"/libraries/{library_id}/activate"
104
+ )
105
+
106
+ @router.patch("/libraries/{library_id}")
107
+ async def rename_library(request: Request, library_id: str, body: dict):
108
+ return await _proxy_json(
109
+ request, "PATCH", f"/libraries/{library_id}", json_body=body
110
+ )
111
+
112
+ @router.delete("/libraries/{library_id}")
113
+ async def delete_library(request: Request, library_id: str):
114
+ return await _proxy_json(request, "DELETE", f"/libraries/{library_id}")
115
+
116
+ @router.post("/search")
117
+ async def search(request: Request, body: dict):
118
+ """Proxy to rag's /query. Frontend sends {query, top_k}; we stitch
119
+ on the protocol version so the UI stays simpler."""
120
+ payload = {
121
+ "version": 1,
122
+ "query": body.get("query", ""),
123
+ "top_k": int(body.get("top_k", 5)),
124
+ }
125
+ return await _proxy_json(request, "POST", "/query", json_body=payload)
126
+
127
+ @router.get("/stats")
128
+ async def stats(request: Request):
129
+ return await _proxy_json(request, "GET", "/stats")
130
+
131
+ @router.get("/info")
132
+ async def info(request: Request):
133
+ """Proxy rag's /info for the Knowledge tab's engine badge."""
134
+ return await _proxy_json(request, "GET", "/info")
135
+
136
+ @router.get("/models")
137
+ async def models(request: Request):
138
+ """Proxy rag's curated embedding-model list for the create-library
139
+ picker. Lets the dashboard render the dropdown without hard-coding
140
+ model ids + dims — if rag adds new ones, they appear automatically."""
141
+ return await _proxy_json(request, "GET", "/models")
142
+
143
+ @router.delete("/sources")
144
+ async def clear_sources(request: Request):
145
+ return await _proxy_json(request, "DELETE", "/sources")
146
+
147
+ @router.delete("/sources/{source:path}")
148
+ async def delete_source(request: Request, source: str):
149
+ return await _proxy_json(request, "DELETE", f"/sources/{source}")
150
+
151
+ @router.post("/ingest")
152
+ async def ingest(
153
+ request: Request,
154
+ files: list[UploadFile] = File(...),
155
+ ):
156
+ """Forward a multipart upload to rag's /ingest.
157
+
158
+ Streams the upload to a temp file in chunks, enforcing a byte
159
+ cap as we read — keeps the dashboard from buffering multi-GB
160
+ uploads into RAM while letting rag see the size quickly.
161
+ Filenames are basename-sanitised at the dashboard layer too.
162
+
163
+ When the browser sends `Accept: application/x-ndjson`, we open
164
+ a streaming request to rag and pipe each progress line through
165
+ as it arrives. Otherwise: single-shot JSON.
166
+ """
167
+ cfg = _cfg(request)
168
+ _require_auth(request, cfg)
169
+ key = cfg.read_api_key()
170
+
171
+ if not files:
172
+ raise HTTPException(status_code=400, detail="No files uploaded")
173
+
174
+ # Create tempdir without a `with` block — streaming needs the
175
+ # directory to outlive the handler return. Cleanup is explicit,
176
+ # either at end-of-handler (single-shot path) or in the generator's
177
+ # finally (streaming path).
178
+ tmpdir = tempfile.mkdtemp(prefix="gbd-ingest-")
179
+ tmp_path = Path(tmpdir)
180
+
181
+ def _cleanup_tmpdir() -> None:
182
+ import shutil as _shutil
183
+
184
+ _shutil.rmtree(tmpdir, ignore_errors=True)
185
+
186
+ try:
187
+ total_bytes = 0
188
+ saved: list[tuple[str, Path, str]] = []
189
+
190
+ for upload in files:
191
+ raw_name = (upload.filename or "").replace("\\", "/")
192
+ safe_name = os.path.basename(raw_name)
193
+ if not safe_name or safe_name in (".", ".."):
194
+ continue
195
+ dest = tmp_path / safe_name
196
+ with dest.open("wb") as out:
197
+ while True:
198
+ chunk = await upload.read(_STREAM_CHUNK)
199
+ if not chunk:
200
+ break
201
+ total_bytes += len(chunk)
202
+ if total_bytes > _MAX_INGEST_BYTES:
203
+ raise HTTPException(
204
+ status_code=413,
205
+ detail=f"Upload exceeds {_MAX_INGEST_BYTES} bytes",
206
+ )
207
+ out.write(chunk)
208
+ saved.append(
209
+ (
210
+ safe_name,
211
+ dest,
212
+ upload.content_type or "application/octet-stream",
213
+ )
214
+ )
215
+
216
+ if not saved:
217
+ raise HTTPException(status_code=400, detail="No valid files in upload")
218
+ except HTTPException:
219
+ _cleanup_tmpdir()
220
+ raise
221
+ except Exception:
222
+ _cleanup_tmpdir()
223
+ raise
224
+
225
+ accept = (request.headers.get("accept") or "").lower()
226
+ want_stream = "application/x-ndjson" in accept
227
+
228
+ if want_stream:
229
+ async def _pipe_stream():
230
+ """Pipe rag's NDJSON stream through to the browser. Each
231
+ line from rag's `r.aiter_raw()` yields as it arrives —
232
+ the browser sees progress events live instead of waiting
233
+ for ingest to complete. Temp dir + file handles are kept
234
+ alive here and cleaned in the finally."""
235
+ fhs: list = []
236
+ multipart: list[tuple[str, tuple[str, object, str]]] = []
237
+ client = httpx.AsyncClient(timeout=_INGEST_TIMEOUT)
238
+ try:
239
+ for name, disk_path, mime in saved:
240
+ fh = disk_path.open("rb")
241
+ fhs.append(fh)
242
+ multipart.append(("files", (name, fh, mime)))
243
+
244
+ try:
245
+ async with client.stream(
246
+ "POST",
247
+ f"{cfg.lens_url}/ingest",
248
+ headers={
249
+ "Authorization": f"Bearer {key}",
250
+ "Accept": "application/x-ndjson",
251
+ },
252
+ files=multipart,
253
+ ) as r:
254
+ if r.status_code >= 400:
255
+ body_bytes = b""
256
+ async for c in r.aiter_bytes():
257
+ body_bytes += c
258
+ if len(body_bytes) > 16 * 1024:
259
+ break
260
+ try:
261
+ body_obj = json.loads(body_bytes.decode())
262
+ except Exception:
263
+ body_obj = {
264
+ "error": body_bytes.decode(errors="replace")
265
+ or f"rag returned {r.status_code}"
266
+ }
267
+ msg = (
268
+ body_obj.get("error")
269
+ if isinstance(body_obj, dict)
270
+ else str(body_obj)
271
+ ) or f"rag returned {r.status_code}"
272
+ yield (
273
+ json.dumps(
274
+ {
275
+ "event": "error",
276
+ "message": msg,
277
+ "status": r.status_code,
278
+ }
279
+ )
280
+ + "\n"
281
+ ).encode()
282
+ return
283
+ # Pass through each chunk verbatim — rag
284
+ # emits one JSON object per line.
285
+ async for c in r.aiter_raw():
286
+ if c:
287
+ yield c
288
+ except httpx.ConnectError:
289
+ yield (
290
+ json.dumps(
291
+ {
292
+ "event": "error",
293
+ "message": f"rag server not reachable at {cfg.lens_url}",
294
+ }
295
+ )
296
+ + "\n"
297
+ ).encode()
298
+ except httpx.RequestError as e:
299
+ yield (
300
+ json.dumps(
301
+ {
302
+ "event": "error",
303
+ "message": f"ingest request failed: {e}",
304
+ }
305
+ )
306
+ + "\n"
307
+ ).encode()
308
+ finally:
309
+ await client.aclose()
310
+ for fh in fhs:
311
+ try:
312
+ fh.close()
313
+ except Exception:
314
+ pass
315
+ _cleanup_tmpdir()
316
+
317
+ return StreamingResponse(
318
+ _pipe_stream(), media_type="application/x-ndjson"
319
+ )
320
+
321
+ # Default: single-shot JSON path — open handles, POST, close,
322
+ # return the summary dict.
323
+ try:
324
+ fhs: list = []
325
+ multipart: list[tuple[str, tuple[str, object, str]]] = []
326
+ try:
327
+ for name, disk_path, mime in saved:
328
+ fh = disk_path.open("rb")
329
+ fhs.append(fh)
330
+ multipart.append(("files", (name, fh, mime)))
331
+
332
+ try:
333
+ async with httpx.AsyncClient(timeout=_INGEST_TIMEOUT) as client:
334
+ r = await client.post(
335
+ f"{cfg.lens_url}/ingest",
336
+ headers={"Authorization": f"Bearer {key}"},
337
+ files=multipart,
338
+ )
339
+ except httpx.ConnectError:
340
+ raise HTTPException(
341
+ status_code=502,
342
+ detail=f"rag server not reachable at {cfg.lens_url}",
343
+ )
344
+ except httpx.RequestError as e:
345
+ raise HTTPException(
346
+ status_code=502, detail=f"ingest request failed: {e}"
347
+ )
348
+ finally:
349
+ for fh in fhs:
350
+ try:
351
+ fh.close()
352
+ except Exception:
353
+ pass
354
+
355
+ if r.status_code >= 400:
356
+ try:
357
+ body = r.json()
358
+ except ValueError:
359
+ body = {"error": r.text or f"rag returned {r.status_code}"}
360
+ raise HTTPException(
361
+ status_code=r.status_code, detail=body.get("error") or body
362
+ )
363
+ return r.json() if r.content else {}
364
+ finally:
365
+ _cleanup_tmpdir()
366
+
367
+ app.include_router(router)