mcp-kb 0.3.1__py3-none-any.whl → 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mcp_kb/ui/server.py ADDED
@@ -0,0 +1,332 @@
1
+ """Lightweight HTTP server that hosts the human UI and JSON endpoints.
2
+
3
+ The implementation uses :mod:`http.server` from the Python standard library to
4
+ avoid adding web framework dependencies. Static assets are loaded from package
5
+ resources, while dynamic endpoints call into a shared
6
+ :class:`~mcp_kb.knowledge.store.KnowledgeBase` instance.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import json
12
+ import threading
13
+ from http import HTTPStatus
14
+ from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
15
+ from typing import ClassVar, Optional
16
+ from urllib.parse import parse_qs, urlparse
17
+
18
+ from importlib import resources
19
+ import logging
20
+ from mcp_kb.knowledge.store import KnowledgeBase
21
+
22
+ from .api import (
23
+ build_tree_json,
24
+ read_file_json,
25
+ write_file,
26
+ search_json,
27
+ vector_status_json,
28
+ vector_embeddings_json,
29
+ vector_query_embedding_json,
30
+ vector_reindex_json,
31
+ vector_refit_json,
32
+ )
33
+
34
+
35
+ logger = logging.getLogger(__name__)
36
+
37
+ class _UIRequestHandler(BaseHTTPRequestHandler):
38
+ """Request handler serving the web UI and API endpoints.
39
+
40
+ The handler reads assets from ``mcp_kb.ui.assets`` and forwards JSON API
41
+ requests to the injected knowledge base instance. An instance is attached
42
+ to the handler class at server startup via ``kb``.
43
+ """
44
+
45
+ kb: ClassVar[Optional[KnowledgeBase]] = None
46
+
47
+ # Silence default log output; the main process already logs startup info
48
+ def log_message(self, format: str, *args) -> None: # pragma: no cover - noise
49
+ return
50
+
51
+ def do_GET(self) -> None: # noqa: N802 - HTTP verb name
52
+ """Serve the index page, static assets, or a JSON read endpoint."""
53
+
54
+ assert self.kb is not None, "UI server not initialized with a KnowledgeBase"
55
+ parsed = urlparse(self.path)
56
+ if parsed.path == "/" or parsed.path == "/index.html":
57
+ self._serve_asset("index.html", content_type="text/html; charset=utf-8")
58
+ return
59
+ if parsed.path.startswith("/static/"):
60
+ name = parsed.path.split("/static/", 1)[1]
61
+ ctype = (
62
+ "text/javascript; charset=utf-8" if name.endswith(".js") else
63
+ "text/css; charset=utf-8" if name.endswith(".css") else
64
+ "application/octet-stream"
65
+ )
66
+ self._serve_asset(name, content_type=ctype)
67
+ return
68
+ if parsed.path == "/api/tree":
69
+ body = json.dumps(build_tree_json(self.kb)).encode("utf-8")
70
+ self._send_response(HTTPStatus.OK, body, "application/json")
71
+ return
72
+ if parsed.path == "/api/file":
73
+ params = parse_qs(parsed.query)
74
+ rel_path = params.get("path", [""])[0]
75
+ try:
76
+ payload = read_file_json(self.kb, rel_path)
77
+ except Exception as exc: # pragma: no cover - defensive
78
+ logger.exception(exc)
79
+ self._send_error(HTTPStatus.BAD_REQUEST, str(exc))
80
+ return
81
+ body = json.dumps(payload.model_dump()).encode("utf-8")
82
+ self._send_response(HTTPStatus.OK, body, "application/json")
83
+ return
84
+ if parsed.path == "/api/search":
85
+ params = parse_qs(parsed.query)
86
+ query = params.get("query", [""])[0]
87
+ if not query:
88
+ body = json.dumps([]).encode("utf-8")
89
+ self._send_response(HTTPStatus.OK, body, "application/json")
90
+ return
91
+ limit_raw = params.get("limit", params.get("n", [None]))[0]
92
+ limit = None
93
+ if limit_raw is not None:
94
+ try:
95
+ limit = max(1, int(limit_raw))
96
+ except ValueError:
97
+ self._send_error(HTTPStatus.BAD_REQUEST, "Invalid limit value")
98
+ return
99
+ try:
100
+ results = search_json(self.kb, query, limit=limit)
101
+ except Exception as exc: # pragma: no cover - defensive
102
+ logger.exception(exc)
103
+ self._send_error(HTTPStatus.BAD_REQUEST, str(exc))
104
+ return
105
+ body = json.dumps(results).encode("utf-8")
106
+ self._send_response(HTTPStatus.OK, body, "application/json")
107
+ return
108
+
109
+ if parsed.path == "/api/vector/status":
110
+ body = json.dumps(vector_status_json(self.kb)).encode("utf-8")
111
+ self._send_response(HTTPStatus.OK, body, "application/json")
112
+ return
113
+
114
+ if parsed.path == "/api/vector/embeddings":
115
+ params = parse_qs(parsed.query)
116
+ try:
117
+ raw_limit = params.get("limit", ["1000"])[0]
118
+ raw_offset = params.get("offset", ["0"])[0]
119
+ limit = max(1, int(raw_limit))
120
+ offset = max(0, int(raw_offset))
121
+ except ValueError:
122
+ self._send_error(HTTPStatus.BAD_REQUEST, "Invalid limit/offset value")
123
+ return
124
+ path = params.get("path", [None])[0]
125
+ try:
126
+ results = vector_embeddings_json(self.kb, limit=limit, offset=offset, path=path)
127
+ except Exception as exc: # pragma: no cover - defensive
128
+ logger.exception(exc)
129
+ self._send_error(HTTPStatus.BAD_REQUEST, str(exc))
130
+ return
131
+ body = json.dumps(results).encode("utf-8")
132
+ self._send_response(HTTPStatus.OK, body, "application/json")
133
+ return
134
+
135
+ if parsed.path == "/api/vector/query_embedding":
136
+ params = parse_qs(parsed.query)
137
+ query = params.get("query", [""])[0]
138
+ if not query:
139
+ body = json.dumps({"embedding": [], "used_model": None}).encode("utf-8")
140
+ self._send_response(HTTPStatus.OK, body, "application/json")
141
+ return
142
+ result = vector_query_embedding_json(self.kb, query)
143
+ body = json.dumps(result).encode("utf-8")
144
+ self._send_response(HTTPStatus.OK, body, "application/json")
145
+ return
146
+
147
+ self._send_error(HTTPStatus.NOT_FOUND, "Not Found")
148
+
149
+ def do_PUT(self) -> None: # noqa: N802 - HTTP verb name
150
+ """Handle file save requests at ``/api/file`` with JSON payloads."""
151
+
152
+ assert self.kb is not None, "UI server not initialized with a KnowledgeBase"
153
+ parsed = urlparse(self.path)
154
+ if parsed.path != "/api/file":
155
+ self._send_error(HTTPStatus.NOT_FOUND, "Not Found")
156
+ return
157
+ try:
158
+ content_len = int(self.headers.get("Content-Length", "0"))
159
+ except ValueError:
160
+ self._send_error(HTTPStatus.LENGTH_REQUIRED, "Content-Length required")
161
+ return
162
+ raw = self.rfile.read(content_len)
163
+ try:
164
+ payload = json.loads(raw.decode("utf-8"))
165
+ path = payload["path"]
166
+ content = payload.get("content", "")
167
+ write_file(self.kb, path, content)
168
+ except KeyError:
169
+ logger.exception(exc)
170
+ self._send_error(HTTPStatus.BAD_REQUEST, "Missing 'path' in JSON body")
171
+ return
172
+ except Exception as exc: # pragma: no cover - defensive
173
+ logger.exception(exc)
174
+ self._send_error(HTTPStatus.BAD_REQUEST, str(exc))
175
+ return
176
+ self._send_response(HTTPStatus.NO_CONTENT, b"", "application/json")
177
+
178
+ def do_POST(self) -> None: # noqa: N802 - HTTP verb name
179
+ """Handle administrative vector actions exposed via POST endpoints."""
180
+
181
+ assert self.kb is not None, "UI server not initialized with a KnowledgeBase"
182
+ parsed = urlparse(self.path)
183
+ if parsed.path == "/api/vector/reindex":
184
+ try:
185
+ payload = vector_reindex_json(self.kb)
186
+ body = json.dumps(payload).encode("utf-8")
187
+ self._send_response(HTTPStatus.OK, body, "application/json")
188
+ except Exception as exc: # pragma: no cover - defensive
189
+ logger.exception(exc)
190
+ self._send_error(HTTPStatus.BAD_REQUEST, str(exc))
191
+ return
192
+ if parsed.path == "/api/vector/refit":
193
+ try:
194
+ payload = vector_refit_json(self.kb)
195
+ body = json.dumps(payload).encode("utf-8")
196
+ self._send_response(HTTPStatus.OK, body, "application/json")
197
+ except Exception as exc: # pragma: no cover - defensive
198
+ logger.exception(exc)
199
+ self._send_error(HTTPStatus.BAD_REQUEST, str(exc))
200
+ return
201
+ self._send_error(HTTPStatus.NOT_FOUND, "Not Found")
202
+
203
+ def do_DELETE(self) -> None: # noqa: N802 - HTTP verb name
204
+ """Soft delete the requested file using ``kb.soft_delete``.
205
+
206
+ Endpoint: ``DELETE /api/file?path=...``
207
+ Returns ``204 No Content`` on success or ``400`` when validation fails.
208
+ """
209
+
210
+ assert self.kb is not None, "UI server not initialized with a KnowledgeBase"
211
+ parsed = urlparse(self.path)
212
+ if parsed.path != "/api/file":
213
+ self._send_error(HTTPStatus.NOT_FOUND, "Not Found")
214
+ return
215
+ params = parse_qs(parsed.query)
216
+ rel_path = params.get("path", [""])[0]
217
+ try:
218
+ self.kb.soft_delete(rel_path)
219
+ except Exception as exc: # pragma: no cover - defensive
220
+ logger.exception(exc)
221
+ self._send_error(HTTPStatus.BAD_REQUEST, str(exc))
222
+ return
223
+ self._send_response(HTTPStatus.NO_CONTENT, b"", "application/json")
224
+
225
+ def _serve_asset(self, name: str, *, content_type: str) -> None:
226
+ """Serve an embedded static asset by ``name`` with ``content_type``."""
227
+
228
+ try:
229
+ data = resources.files("mcp_kb.ui.assets").joinpath(name).read_bytes()
230
+ except FileNotFoundError:
231
+ self._send_error(HTTPStatus.NOT_FOUND, "Asset not found")
232
+ return
233
+ self._send_response(HTTPStatus.OK, data, content_type)
234
+
235
+ def _send_response(self, status: HTTPStatus, body: bytes, content_type: str) -> None:
236
+ """Write an HTTP response with headers and body."""
237
+
238
+ self.send_response(status)
239
+ self.send_header("Content-Type", content_type)
240
+ self.send_header("Content-Length", str(len(body)))
241
+ self.end_headers()
242
+ if body:
243
+ self.wfile.write(body)
244
+
245
+ def _send_error(self, status: HTTPStatus, message: str) -> None:
246
+ """Return a JSON error payload with ``status`` and ``message``."""
247
+
248
+ payload = json.dumps({"error": message}).encode("utf-8")
249
+ self._send_response(status, payload, "application/json")
250
+
251
+
252
+ DEFAULT_UI_PORT = 8765
253
+
254
+
255
+ class UIServer:
256
+ """Container managing the lifecycle of the UI HTTP server.
257
+
258
+ The server binds in a background thread so it can run alongside the MCP
259
+ transports. Call :meth:`stop` to shut it down from tests or other code.
260
+ By default, the server attempts to bind to :data:`DEFAULT_UI_PORT` and will
261
+ increment by one until a free port is found. Callers can provide a
262
+ ``port`` to override the starting point.
263
+ """
264
+
265
+ def __init__(self, kb: KnowledgeBase, host: str = "127.0.0.1", port: int | None = None) -> None:
266
+ """Create a server bound to ``host:port`` serving ``kb``.
267
+
268
+ Binding strategy
269
+ ----------------
270
+ - When ``port`` is ``None``, the server starts scanning from
271
+ :data:`DEFAULT_UI_PORT`.
272
+ - When ``port`` is provided, scanning starts from that value.
273
+ - On conflict (address already in use), the server increments the port
274
+ by one and retries until successful.
275
+ """
276
+
277
+ _UIRequestHandler.kb = kb
278
+ start = DEFAULT_UI_PORT if port is None else port
279
+ bound = None
280
+ last_error: Exception | None = None
281
+ for candidate in range(start, start + 200):
282
+ try:
283
+ httpd = ThreadingHTTPServer((host, candidate), _UIRequestHandler)
284
+ except OSError as exc: # port in use or permission error
285
+ last_error = exc
286
+ continue
287
+ else:
288
+ bound = (candidate, httpd)
289
+ break
290
+ if bound is None:
291
+ raise RuntimeError(
292
+ f"Failed to bind UI server on {host}:{start}-{start+199}: {last_error}"
293
+ )
294
+ self._httpd = bound[1]
295
+ self.host = host
296
+ self.port = bound[0]
297
+ self._thread: Optional[threading.Thread] = None
298
+
299
+ def start(self) -> None:
300
+ """Start the HTTP server in a daemon thread."""
301
+
302
+ def _target() -> None:
303
+ self._httpd.serve_forever(poll_interval=0.5)
304
+
305
+ self._thread = threading.Thread(target=_target, name="kb-ui", daemon=True)
306
+ self._thread.start()
307
+
308
+ def stop(self) -> None:
309
+ """Shutdown the server and wait for the thread to exit."""
310
+
311
+ self._httpd.shutdown()
312
+ self._httpd.server_close()
313
+ if self._thread is not None:
314
+ self._thread.join(timeout=5)
315
+
316
+
317
+ def start_ui_server(kb: KnowledgeBase, host: str = "127.0.0.1", port: int | None = None) -> UIServer:
318
+ """Start and return a :class:`UIServer` bound to ``host:port`` for ``kb``.
319
+
320
+ When ``port`` is ``None`` the server tries :data:`DEFAULT_UI_PORT` and
321
+ increments until an available port is found.
322
+ """
323
+
324
+ srv = UIServer(kb, host=host, port=port)
325
+ srv.start()
326
+ return srv
327
+
328
+
329
+ __all__ = [
330
+ "UIServer",
331
+ "start_ui_server",
332
+ ]
@@ -0,0 +1 @@
1
+ """Utility helpers shared across the knowledge base server modules."""
@@ -0,0 +1,128 @@
1
+ """Filesystem helpers wrapping Python's standard library primitives.
2
+
3
+ The knowledge base server performs numerous file operations. Consolidating the
4
+ logic in this module keeps the rest of the code focused on business semantics
5
+ such as validating incoming requests and shaping responses. Each helper function
6
+ is intentionally small so that callers can compose them for different workflows
7
+ without duplicating the low-level boilerplate.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from contextlib import contextmanager
13
+ from pathlib import Path
14
+ from threading import Lock
15
+ from typing import Dict, Iterator
16
+
17
+
18
+ class FileLockRegistry:
19
+ """In-memory lock registry to serialize write operations per file.
20
+
21
+ Using per-path locks prevents concurrent writes from interleaving content
22
+ and potentially corrupting files. The registry lazily creates locks when a
23
+ path is first encountered. We reuse locks for subsequent operations to avoid
24
+ unbounded memory usage.
25
+ """
26
+
27
+ def __init__(self) -> None:
28
+ """Initialize the registry with an empty dictionary."""
29
+
30
+ self._locks: Dict[Path, Lock] = {}
31
+ self._global_lock = Lock()
32
+
33
+ @contextmanager
34
+ def acquire(self, path: Path) -> Iterator[None]:
35
+ """Context manager that acquires a lock for the supplied path.
36
+
37
+ The helper nests two locks: a global mutex to retrieve or create the
38
+ per-path lock, and the per-path lock itself for the duration of the
39
+ caller's critical section.
40
+
41
+ Parameters
42
+ ----------
43
+ path:
44
+ Absolute path indicating which file should be protected.
45
+ """
46
+
47
+ with self._global_lock:
48
+ lock = self._locks.setdefault(path, Lock())
49
+ lock.acquire()
50
+ try:
51
+ yield
52
+ finally:
53
+ lock.release()
54
+
55
+
56
+ def write_text(path: Path, content: str) -> None:
57
+ """Write text content to ``path`` using UTF-8 encoding."""
58
+
59
+ path.write_text(content, encoding="utf-8")
60
+
61
+
62
+ def append_text(path: Path, content: str) -> None:
63
+ """Append text content to ``path`` using UTF-8 encoding."""
64
+
65
+ with path.open("a", encoding="utf-8") as handle:
66
+ handle.write(content)
67
+
68
+
69
+ def read_text(path: Path) -> str:
70
+ """Read UTF-8 text content from ``path`` and return it."""
71
+
72
+ return path.read_text(encoding="utf-8")
73
+
74
+
75
+ def ensure_parent_directory(path: Path) -> None:
76
+ """Ensure the parent directory of ``path`` exists by creating it."""
77
+
78
+ path.parent.mkdir(parents=True, exist_ok=True)
79
+
80
+
81
+ def rename(path: Path, target: Path) -> None:
82
+ """Rename ``path`` to ``target`` using ``Path.rename`` semantics."""
83
+
84
+ path.rename(target)
85
+
86
+
87
+ def is_text_file(path: Path, max_bytes: int = 2048) -> bool:
88
+ """Heuristically determine whether ``path`` contains UTF-8 text.
89
+
90
+ The check is designed to be fast and conservative for use when iterating
91
+ a directory tree. It reads at most ``max_bytes`` from the file in binary
92
+ mode and applies two filters:
93
+
94
+ - Reject files that contain NUL bytes, which are extremely uncommon in
95
+ textual formats and a strong indicator of binary content.
96
+ - Attempt to decode the sampled bytes as UTF-8. If decoding fails, the
97
+ file is treated as binary.
98
+
99
+ Parameters
100
+ ----------
101
+ path:
102
+ Absolute path to the file on disk.
103
+ max_bytes:
104
+ Upper bound on the number of bytes to sample from the head of the
105
+ file. A small sample keeps directory scans fast while remaining
106
+ accurate for typical text formats such as ``.md``, ``.txt``, ``.xml``,
107
+ and source files.
108
+
109
+ Returns
110
+ -------
111
+ bool
112
+ ``True`` if the file appears to be UTF-8 text; ``False`` otherwise.
113
+ """
114
+
115
+ try:
116
+ with path.open("rb") as handle:
117
+ sample = handle.read(max_bytes)
118
+ except (FileNotFoundError, PermissionError): # pragma: no cover - defensive
119
+ return False
120
+
121
+ if b"\x00" in sample:
122
+ return False
123
+
124
+ try:
125
+ sample.decode("utf-8")
126
+ return True
127
+ except UnicodeDecodeError:
128
+ return False