mcp-kb 0.3.1__py3-none-any.whl → 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mcp_kb/ui/api.py ADDED
@@ -0,0 +1,377 @@
1
+ """UI API helpers built on top of the knowledge base.
2
+
3
+ The functions in this module provide reusable primitives for serving the web
4
+ UI. They deliberately separate data shaping from the HTTP layer, which keeps
5
+ the request handler small and easy to test in isolation.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import json
11
+ import threading
12
+ from typing import Dict, List, Literal, Optional, TypedDict, Any
13
+
14
+ from mcp_kb.knowledge.store import KnowledgeBase, FileSegment
15
+ from mcp_kb.knowledge.search import search_text
16
+ from mcp_kb.knowledge.events import KnowledgeBaseSearchListener
17
+
18
+
19
+ class TreeNode(TypedDict):
20
+ """JSON-serializable representation of an entry in the file tree."""
21
+
22
+ name: str
23
+ path: str
24
+ type: Literal["file", "dir"]
25
+ children: List["TreeNode"]
26
+
27
+
28
+ def build_tree_json(kb: KnowledgeBase) -> TreeNode:
29
+ """Return a nested dictionary describing the current file tree.
30
+
31
+ The result starts at the knowledge base root and includes all active text
32
+ files, excluding soft-deleted files and the protected documentation folder
33
+ to align with the server's overview semantics.
34
+ """
35
+
36
+ # Build a nested dict tree keyed by name for deterministic ordering
37
+ root: Dict[str, Dict] = {}
38
+ for path in kb.iter_active_files():
39
+ parts = list(path.relative_to(kb.rules.root).parts)
40
+ cursor = root
41
+ for part in parts[:-1]:
42
+ cursor = cursor.setdefault(part, {})
43
+ cursor.setdefault(parts[-1], {})
44
+
45
+ def _to_node(name: str, subtree: Dict, prefix: str) -> TreeNode:
46
+ full_path = f"{prefix}/{name}" if prefix else name
47
+ if subtree: # directory
48
+ children: List[TreeNode] = [
49
+ _to_node(child, subtree[child], full_path)
50
+ for child in sorted(subtree.keys())
51
+ ]
52
+ return TreeNode(name=name, path=full_path, type="dir", children=children)
53
+ return TreeNode(name=name, path=full_path, type="file", children=[])
54
+
55
+ # Convert to the final shape with a synthetic root
56
+ children = [_to_node(k, v, "") for k, v in sorted(root.items())]
57
+ return TreeNode(name="", path="", type="dir", children=children)
58
+
59
+
60
+ def read_file_json(kb: KnowledgeBase, path: str) -> FileSegment:
61
+ """Read and return a file as a JSON-compatible dictionary."""
62
+
63
+ segment = kb.read_file(path)
64
+ segment.assert_path(kb.rules)
65
+ return segment
66
+
67
+
68
+ def write_file(kb: KnowledgeBase, path: str, content: str) -> None:
69
+ """Create or overwrite ``path`` with ``content`` using the knowledge base.
70
+
71
+ This operation routes through :meth:`~mcp_kb.knowledge.store.KnowledgeBase.create_file`
72
+ so that the same validation, locking, and listener notification behavior
73
+ applies as for MCP tool invocations.
74
+ """
75
+
76
+ kb.create_file(path, content)
77
+
78
+
79
+ __all__ = [
80
+ "TreeNode",
81
+ "build_tree_json",
82
+ "read_file_json",
83
+ "write_file",
84
+ "search_json",
85
+ "vector_status_json",
86
+ "vector_embeddings_json",
87
+ "vector_query_embedding_json",
88
+ "vector_reindex_json",
89
+ "vector_refit_json",
90
+ ]
91
+
92
+
93
+ def search_json(kb: KnowledgeBase, query: str, *, limit: int | None = None) -> List[Dict[str, Any]]:
94
+ """Return JSON-compatible search results for ``query``.
95
+
96
+ Each result includes the relative ``path`` (string), one-based ``line``
97
+ number (int) where the match occurs, and ``context`` (list[str]) with
98
+ surrounding lines.
99
+ """
100
+
101
+ # Reuse the same provider model as the MCP tool: if registered listeners
102
+ # expose a search capability (e.g., Chroma ingestion), consult them first
103
+ # before falling back to on-disk scans. This keeps UI and MCP semantics
104
+ # aligned so results are consistent across entry points.
105
+ providers: List[KnowledgeBaseSearchListener] = []
106
+ for listener in getattr(kb, "listeners", ()): # type: ignore[attr-defined]
107
+ if isinstance(listener, KnowledgeBaseSearchListener):
108
+ providers.append(listener)
109
+
110
+
111
+ all_meta={}
112
+ matches,meta = search_text(kb, query, providers=providers, n_results=limit)
113
+ payload: List[Dict[str, Any]] = []
114
+ for m in matches:
115
+ payload.append(
116
+ m.model_dump()
117
+ )
118
+ return {"results":payload,"meta":meta}
119
+
120
+
121
+ # ----------------------------- Vector Endpoints -----------------------------
122
+
123
+ def _find_chroma_listener(kb: KnowledgeBase) -> Optional[Any]:
124
+ """Return the first Chroma-like listener attached to ``kb`` or ``None``.
125
+
126
+ The UI exposes additional endpoints when a vector store is available. To
127
+ avoid introducing a hard dependency on Chroma, the discovery relies on
128
+ duck-typing: listeners that provide a ``collection`` attribute (Chroma
129
+ collection) and a ``configuration`` with an ``embedding`` field are treated
130
+ as vector-capable.
131
+ """
132
+
133
+ for listener in getattr(kb, "listeners", ()): # type: ignore[attr-defined]
134
+ if hasattr(listener, "collection"):
135
+ return listener
136
+ return None
137
+
138
+
139
+ def _to_list(x: Any) -> List[float]:
140
+ """Return ``x`` as a JSON-serializable list of floats.
141
+
142
+ Chroma may return embeddings as NumPy arrays. This helper converts any
143
+ sequence-like object to a plain Python list of floats using ``tolist`` when
144
+ available.
145
+ """
146
+
147
+ try:
148
+ if isinstance(x, str):
149
+ data = json.loads(x)
150
+ if isinstance(data, (list, tuple)):
151
+ return [float(v) for v in data]
152
+ if hasattr(x, "indices") and hasattr(x, "values"):
153
+ values = getattr(x, "values")
154
+ indices = getattr(x, "indices")
155
+ size = int(getattr(x, "size", len(values)))
156
+ dense = [0.0] * size
157
+ for idx, value in zip(indices, values):
158
+ dense[int(idx)] = float(value)
159
+ return dense
160
+ if hasattr(x, "tolist"):
161
+ return [float(v) for v in x.tolist()]
162
+ except Exception:
163
+ pass
164
+ # Fallback: best-effort cast
165
+ try:
166
+ return [float(v) for v in x]
167
+ except Exception:
168
+ return []
169
+
170
+
171
+ def vector_status_json(kb: KnowledgeBase) -> Dict[str, object]:
172
+ """Report whether a vector store is available and basic dataset stats.
173
+
174
+ Returns a JSON-compatible dictionary with the following keys:
175
+ - ``available`` (bool): true when a Chroma listener is attached.
176
+ - ``dimensions`` (int | null): length of the embedding vectors when
177
+ available; derived from a sample row.
178
+ - ``count`` (int | null): total number of stored embedding chunks.
179
+ """
180
+
181
+ listener = _find_chroma_listener(kb)
182
+ if listener is None:
183
+ return {"available": False, "dimensions": None, "count": None}
184
+
185
+ collection = getattr(listener, "collection")
186
+ # Total count
187
+ try:
188
+ total = collection.count() # type: ignore[no-untyped-call]
189
+ except Exception:
190
+ total = None
191
+
192
+ # Sample one embedding to infer dimensions
193
+ dims: Optional[int] = None
194
+ try:
195
+ sample = collection.get(limit=1, include=["embeddings"]) # type: ignore[no-untyped-call]
196
+ embs = sample.get("embeddings") or []
197
+ # Avoid ambiguous truth checks on NumPy arrays
198
+ if isinstance(embs, (list, tuple)) and len(embs) > 0:
199
+ first = _to_list(embs[0])
200
+ if first:
201
+ dims = len(first)
202
+ except Exception:
203
+ dims = None
204
+
205
+ return {"available": True, "dimensions": dims, "count": total}
206
+
207
+
208
+ def vector_embeddings_json(
209
+ kb: KnowledgeBase,
210
+ *,
211
+ limit: int = 1000,
212
+ offset: int = 0,
213
+ path: Optional[str] = None,
214
+ ) -> List[Dict[str, object]]:
215
+ """Return a page of embeddings with metadata for plotting.
216
+
217
+ Parameters
218
+ ----------
219
+ limit:
220
+ Maximum number of items to return (default: 1000).
221
+ offset:
222
+ Starting offset into the result set for paging.
223
+ path:
224
+ Optional relative path to filter embeddings from a specific file.
225
+
226
+ Returns
227
+ -------
228
+ list[dict]
229
+ Each item contains ``id`` (str), ``document_id`` (str|None), ``path``
230
+ (str), ``chunk`` (int), and ``embedding`` (list[float]). When the
231
+ backend provides precomputed UMAP projections the dictionaries also
232
+ surface ``umap2d`` and ``umap3d`` coordinates for downstream
233
+ visualisations.
234
+ """
235
+
236
+ listener = _find_chroma_listener(kb)
237
+ if listener is None:
238
+ return []
239
+
240
+ collection = getattr(listener, "collection")
241
+ where = {"path": path} if path else None
242
+ # Chroma's `include` only accepts certain fields; `ids` are always returned
243
+ # and must not be listed in `include`.
244
+ payload = collection.get( # type: ignore[no-untyped-call]
245
+ where=where,
246
+ include=["embeddings", "metadatas"],
247
+ limit=limit,
248
+ offset=offset,
249
+ )
250
+
251
+ ids = payload.get("ids")
252
+ embs = payload.get("embeddings")
253
+ metas = payload.get("metadatas")
254
+ if ids is None:
255
+ ids = []
256
+ if embs is None:
257
+ embs = []
258
+ if metas is None:
259
+ metas = []
260
+
261
+ results: List[Dict[str, object]] = []
262
+ for i, emb in enumerate(embs):
263
+ meta: Dict[str, Any] = metas[i] if i < len(metas) else {}
264
+ umap2d: List[float] = []
265
+ umap3d: List[float] = []
266
+ if isinstance(meta, dict):
267
+ umap2d = _to_list(meta.get("umap2d", []))
268
+ umap3d = _to_list(meta.get("umap3d", []))
269
+ document_id = meta.get("document_id")
270
+ results.append(
271
+ {
272
+ "id": ids[i] if i < len(ids) else str(i),
273
+ "document_id": document_id if isinstance(document_id, str) else None,
274
+ "path": meta.get("path", ""),
275
+ "chunk": int(meta.get("chunk_number", 0)),
276
+ "embedding": _to_list(emb),
277
+ "umap2d": umap2d,
278
+ "umap3d": umap3d,
279
+ }
280
+ )
281
+ return results
282
+
283
+
284
+ def vector_query_embedding_json(kb: KnowledgeBase, query: str) -> Dict[str, object]:
285
+ """Compute and return the embedding vector for ``query``.
286
+
287
+ The implementation reuses the Chroma collection's embedding function when
288
+ available. As a fallback it attempts to reconstruct an embedding function
289
+ instance based on the ingestor's configuration.
290
+ """
291
+
292
+ listener = _find_chroma_listener(kb)
293
+ if listener is None:
294
+ return {"embedding": [], "used_model": None}
295
+
296
+ collection = getattr(listener, "collection")
297
+
298
+ # Preferred path: use the collection's configured embedding function.
299
+ func = getattr(collection, "_embedding_function", None)
300
+ if callable(func):
301
+ try:
302
+ vecs = func([query]) # type: ignore[misc]
303
+ if isinstance(vecs, (list, tuple)) and len(vecs) > 0:
304
+ return {
305
+ "embedding": _to_list(vecs[0]),
306
+ "used_model": getattr(func, "__class__", type(func)).__name__,
307
+ }
308
+ except Exception:
309
+ pass
310
+
311
+ # Fallback: try to build a new embedding function of the same type.
312
+ try:
313
+ deps = getattr(listener, "_deps", None)
314
+ cfg = getattr(listener, "configuration", None)
315
+ if deps is not None and cfg is not None:
316
+ factories: Dict[str, Any] = getattr(deps, "embedding_factories", {})
317
+ emb_name = getattr(cfg, "embedding", "default")
318
+ factory = factories.get(emb_name)
319
+ if factory is not None:
320
+ inst = factory()
321
+ vecs = inst([query])
322
+ if isinstance(vecs, (list, tuple)) and len(vecs) > 0:
323
+ return {"embedding": _to_list(vecs[0]), "used_model": inst.__class__.__name__}
324
+ except Exception:
325
+ pass
326
+
327
+ return {"embedding": [], "used_model": None}
328
+
329
+
330
+ def vector_reindex_json(kb: KnowledgeBase) -> Dict[str, object]:
331
+ """Trigger a background rebuild of the Chroma index and report status."""
332
+
333
+ listener = _find_chroma_listener(kb)
334
+ if listener is None:
335
+ return {"status": "unavailable"}
336
+
337
+ starter = getattr(listener, "start_reindex_async", None)
338
+ if callable(starter):
339
+ started = bool(starter(kb))
340
+ return {"status": "queued" if started else "running"}
341
+
342
+ def _run() -> None:
343
+ try:
344
+ listener.reindex(kb) # type: ignore[attr-defined]
345
+ except Exception:
346
+ # Fallback should never raise; swallow to keep the API robust.
347
+ pass
348
+
349
+ try:
350
+ thread = threading.Thread(target=_run, name="kb-reindex", daemon=True)
351
+ thread.start()
352
+ return {"status": "queued"}
353
+ except Exception:
354
+ return {"status": "error"}
355
+
356
+
357
+ def vector_refit_json(kb: KnowledgeBase) -> Dict[str, object]:
358
+ """Trigger an immediate background UMAP refit when supported."""
359
+
360
+ listener = _find_chroma_listener(kb)
361
+ if listener is None:
362
+ return {"status": "unavailable"}
363
+
364
+ trigger = getattr(listener, "trigger_umap_refit_async", None)
365
+ if callable(trigger):
366
+ started = bool(trigger())
367
+ return {"status": "queued" if started else "error"}
368
+
369
+ scheduler = getattr(listener, "_schedule_umap_refit", None)
370
+ if callable(scheduler):
371
+ try:
372
+ scheduler(delay=0.0)
373
+ return {"status": "queued"}
374
+ except Exception:
375
+ return {"status": "error"}
376
+
377
+ return {"status": "error"}
@@ -0,0 +1 @@
1
+ :root{--bg: #0f1115;--panel: #171a21;--border: #2a2f3a;--text: #e8eaf0;--muted: #a0a7b4;--accent: #5aa3ff;--vec-base: #8a8a8a;--vec-selected: #3d7eff;--vec-result: #ffb02e;--vec-query: #ff4d4f}*{box-sizing:border-box}body{margin:0;font-family:ui-sans-serif,system-ui,-apple-system,Segoe UI,Roboto,Ubuntu,Cantarell,Noto Sans,Helvetica,Arial,"Apple Color Emoji","Segoe UI Emoji";background:var(--bg);color:var(--text)}.menubar{display:flex;justify-content:space-between;align-items:center;padding:10px 14px;background:var(--panel);border-bottom:1px solid var(--border);position:sticky;top:0;z-index:10}.menubar .brand{font-weight:600}.menubar nav button{background:transparent;color:var(--text);border:1px solid var(--border);padding:6px 10px;margin-left:6px;border-radius:6px;cursor:pointer}.menubar nav button.active{border-color:var(--accent);color:var(--accent)}.menubar .searchbar{display:flex;gap:6px;align-items:center;width:50%}.menubar .searchbar input[type=search]{flex:1;min-width:120px;background:#0f1115;color:var(--text);border:1px solid var(--border);border-radius:6px;padding:6px 8px;font-size:13px}.menubar .searchbar input[type=number]{width:64px;background:#0f1115;color:var(--text);border:1px solid var(--border);border-radius:6px;padding:6px 8px;font-size:13px}.menubar .searchbar button{background:transparent;color:var(--text);border:1px solid var(--border);padding:6px 10px;border-radius:6px;cursor:pointer}.layout{display:grid;grid-template-columns:300px 1fr;height:calc(100vh - 50px)}.sidebar{border-right:1px solid var(--border);overflow:auto;background:var(--panel)}.sidebar-header{display:flex;gap:6px;padding:8px;border-bottom:1px solid var(--border);position:sticky;top:0;background:var(--panel);z-index:5}.sidebar-header button{background:transparent;color:var(--text);border:1px solid var(--border);padding:6px 8px;border-radius:6px;cursor:pointer;font-size:13px}.tree{list-style:none;padding:8px;margin:0;font-size:14px}.tree li{margin:2px 0}.tree .dir{color:var(--muted);margin-top:6px}.tree .file{cursor:pointer;padding:2px 6px;border-radius:4px}.tree .file:hover,.tree .file.active{background:#222737}.results{padding:8px;border-top:1px solid var(--border);font-size:13px;color:var(--muted)}.results .result{padding:6px;border:1px solid var(--border);border-radius:6px;margin-top:6px;cursor:pointer;color:var(--text);background:#141822}.results .result:hover{border-color:var(--accent)}.results .path{color:var(--muted);font-size:12px}.results .line{color:var(--accent);font-weight:600;margin-right:6px}.flash{animation:flash-bg 1s ease-out 1}@keyframes flash-bg{0%{background-color:#3b82f620}to{background-color:transparent}}.editor{display:grid;grid-template-rows:auto 1fr auto;height:100%}.path{padding:8px 12px;border-bottom:1px solid var(--border);color:var(--muted);font-size:13px}#editor{width:100%;height:100%;background:transparent;color:var(--text);border:none;resize:none;outline:none;padding:12px;font-family:ui-monospace,SFMono-Regular,Menlo,Monaco,Consolas,Liberation Mono,Courier New,monospace;font-size:14px;line-height:1.5}.actions{display:flex;gap:8px;align-items:center;border-top:1px solid var(--border);padding:8px 12px}.actions button{background:var(--accent);color:#04132b;border:none;padding:8px 12px;border-radius:6px;cursor:pointer;font-weight:600}.actions button#cancel{background:transparent;color:var(--text);border:1px solid var(--border);font-weight:500}.actions button.danger{background:#f87171;color:#2a0b0b}.actions button:disabled{opacity:.5;cursor:not-allowed;filter:grayscale(.2)}.status{margin-left:auto;color:var(--muted);font-size:12px}.hidden{display:none!important}#vectors-view{display:grid;grid-template-rows:auto 1fr auto;height:100%}#vector-canvas-container{position:relative;background:#111;border:1px solid var(--border)}#vector-canvas{width:100%;height:100%;display:block}#vector-legend{font-size:12px;color:var(--muted);padding:6px 12px}
@@ -0,0 +1,62 @@
1
+ <!doctype html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="utf-8" />
5
+ <meta name="viewport" content="width=device-width, initial-scale=1" />
6
+ <title>Knowledge Base</title>
7
+ <link rel="icon" href="data:," />
8
+ <script type="module" crossorigin src="/static/entries/index.js"></script>
9
+ <link rel="stylesheet" crossorigin href="/static/assets/index.css">
10
+ </head>
11
+ <body>
12
+ <header class="menubar">
13
+ <div class="brand">Local Knowledge Base</div>
14
+ <div class="searchbar">
15
+ <input id="search-input" type="search" placeholder="Search…" aria-label="Search text" />
16
+ <input id="search-limit" type="number" min="1" value="10" title="Max results" aria-label="Max results" />
17
+ <button id="search-clear" title="Clear search">Clear</button>
18
+ </div>
19
+ <nav>
20
+ <button id="menu-browse" class="active" aria-pressed="true">Browse</button>
21
+ <button id="menu-vectors" class="hidden" aria-pressed="false" title="Embeddings view">
22
+ Vectors
23
+ </button>
24
+ <button id="menu-reindex" class="hidden" aria-pressed="false" title="Reindex Chroma DB">
25
+ Reindex
26
+ </button>
27
+ <button id="menu-umap-refit" class="hidden" aria-pressed="false" title="Refit UMAP (background)">
28
+ Refit
29
+ </button>
30
+ </nav>
31
+ </header>
32
+
33
+ <main class="layout">
34
+ <aside class="sidebar">
35
+ <div class="sidebar-header">
36
+ <button id="new-file">+ New File</button>
37
+ </div>
38
+ <ul id="file-tree" class="tree"></ul>
39
+ <div id="search-results" class="results" aria-live="polite"></div>
40
+ </aside>
41
+ <section id="browse-view" class="editor">
42
+ <div id="current-path" class="path"></div>
43
+ <textarea id="editor" spellcheck="false"></textarea>
44
+ <div class="actions">
45
+ <button id="save" disabled>Save</button>
46
+ <button id="cancel" disabled>Cancel</button>
47
+ <button id="delete" disabled class="danger">Delete</button>
48
+ <span id="status" class="status"></span>
49
+ </div>
50
+ </section>
51
+
52
+ <section id="vectors-view" class="editor hidden" aria-hidden="true">
53
+ <div id="vector-legend" class="legend" aria-live="polite"></div>
54
+ <div id="vector-canvas-container">
55
+ <canvas id="vector-canvas" aria-label="3D PCA scatter"></canvas>
56
+ </div>
57
+ <div id="vector-status" class="status"></div>
58
+ </section>
59
+ </main>
60
+
61
+ </body>
62
+ </html>