mcp-kb 0.2.0__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mcp_kb/cli/args.py +1 -1
- mcp_kb/cli/main.py +9 -2
- mcp_kb/cli/reindex.py +4 -2
- mcp_kb/config.py +7 -5
- mcp_kb/data/KNOWLEDBASE_DOC.md +151 -36
- mcp_kb/ingest/chroma.py +41 -19
- mcp_kb/knowledge/bootstrap.py +6 -1
- mcp_kb/knowledge/events.py +1 -0
- mcp_kb/knowledge/search.py +4 -1
- mcp_kb/knowledge/store.py +11 -5
- mcp_kb/security/path_validation.py +7 -2
- mcp_kb/server/app.py +14 -5
- mcp_kb/utils/filesystem.py +1 -0
- {mcp_kb-0.2.0.dist-info → mcp_kb-0.2.1.dist-info}/METADATA +1 -1
- mcp_kb-0.2.1.dist-info/RECORD +26 -0
- mcp_kb-0.2.0.dist-info/RECORD +0 -26
- {mcp_kb-0.2.0.dist-info → mcp_kb-0.2.1.dist-info}/WHEEL +0 -0
- {mcp_kb-0.2.0.dist-info → mcp_kb-0.2.1.dist-info}/entry_points.txt +0 -0
- {mcp_kb-0.2.0.dist-info → mcp_kb-0.2.1.dist-info}/top_level.txt +0 -0
mcp_kb/cli/args.py
CHANGED
@@ -4,6 +4,7 @@ This module centralizes the definition of common command-line options and
|
|
4
4
|
helpers so that multiple entry points (e.g., server and reindex commands) can
|
5
5
|
remain small and focused while sharing consistent behavior.
|
6
6
|
"""
|
7
|
+
|
7
8
|
from __future__ import annotations
|
8
9
|
|
9
10
|
import os
|
@@ -150,4 +151,3 @@ def build_chroma_listener(options: Namespace, root: Path) -> Optional[ChromaInge
|
|
150
151
|
if not configuration.enabled:
|
151
152
|
return None
|
152
153
|
return ChromaIngestor(configuration)
|
153
|
-
|
mcp_kb/cli/main.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
"""Command line interface for running the MCP knowledge base server."""
|
2
|
+
|
2
3
|
from __future__ import annotations
|
3
4
|
|
4
5
|
import argparse
|
@@ -100,9 +101,15 @@ def run_server(arguments: Iterable[str] | None = None) -> None:
|
|
100
101
|
listeners=listeners,
|
101
102
|
)
|
102
103
|
transports = options.transports or ["stdio"]
|
103
|
-
logger.info(
|
104
|
+
logger.info(
|
105
|
+
f"Running server on {options.host}:{options.port} with transports {transports}"
|
106
|
+
)
|
104
107
|
logger.info(f"Data root is {root_path}")
|
105
|
-
print(
|
108
|
+
print(
|
109
|
+
"--------------------------------",
|
110
|
+
root_path,
|
111
|
+
"--------------------------------",
|
112
|
+
)
|
106
113
|
asyncio.run(_run_transports(server, transports))
|
107
114
|
|
108
115
|
|
mcp_kb/cli/reindex.py
CHANGED
@@ -5,6 +5,7 @@ ingestors and calls their ``reindex`` method when available, allowing operators
|
|
5
5
|
to trigger a full rebuild of external indexes (e.g., Chroma) from the current
|
6
6
|
filesystem state.
|
7
7
|
"""
|
8
|
+
|
8
9
|
from __future__ import annotations
|
9
10
|
|
10
11
|
import argparse
|
@@ -24,7 +25,9 @@ logger = logging.getLogger(__name__)
|
|
24
25
|
def _build_argument_parser() -> argparse.ArgumentParser:
|
25
26
|
"""Return the argument parser for the reindex command."""
|
26
27
|
|
27
|
-
parser = argparse.ArgumentParser(
|
28
|
+
parser = argparse.ArgumentParser(
|
29
|
+
description="Reindex the knowledge base into configured backends"
|
30
|
+
)
|
28
31
|
parser.add_argument(
|
29
32
|
"--root",
|
30
33
|
dest="root",
|
@@ -88,4 +91,3 @@ def main() -> None:
|
|
88
91
|
|
89
92
|
if __name__ == "__main__":
|
90
93
|
main()
|
91
|
-
|
mcp_kb/config.py
CHANGED
@@ -7,6 +7,7 @@ logic more reusable across different deployment environments because callers can
|
|
7
7
|
swap configurations programmatically or via environment variables without
|
8
8
|
modifying the core modules.
|
9
9
|
"""
|
10
|
+
|
10
11
|
from __future__ import annotations
|
11
12
|
|
12
13
|
from pathlib import Path
|
@@ -58,12 +59,13 @@ def resolve_knowledge_base_root(provided_path: str | None = None) -> Path:
|
|
58
59
|
root directory.
|
59
60
|
"""
|
60
61
|
|
61
|
-
|
62
|
-
|
63
|
-
os.getenv(
|
64
|
-
|
62
|
+
candidate = (
|
63
|
+
provided_path
|
64
|
+
or os.getenv(ENV_ROOT_KEY)
|
65
|
+
or Path(os.getenv("WORKSPACE_FOLDER_PATHS") or Path.cwd())
|
66
|
+
/ DEFAULT_KNOWLEDGE_BASE_DIR
|
67
|
+
)
|
65
68
|
root_path = Path(candidate).expanduser().resolve()
|
66
69
|
root_path.mkdir(parents=True, exist_ok=True)
|
67
70
|
|
68
|
-
|
69
71
|
return root_path
|
mcp_kb/data/KNOWLEDBASE_DOC.md
CHANGED
@@ -1,36 +1,151 @@
|
|
1
|
-
# Knowledge Base
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
-
|
11
|
-
-
|
12
|
-
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
1
|
+
# LLM Operating Manual — MCP Knowledge Base (`mcp-kb`)
|
2
|
+
|
3
|
+
You are connected to a **local, text-only knowledge base**. Your job is to **search, read, create, update, and soft-delete** UTF‑8 text files under a single root directory while respecting safety rules below. Use the provided MCP tools exactly as specified.
|
4
|
+
|
5
|
+
---
|
6
|
+
|
7
|
+
## Ground Rules (enforced by the server)
|
8
|
+
|
9
|
+
- **Paths are relative only.** Absolute paths are rejected. No `..` traversal.
|
10
|
+
- **Protected folder:** `.data/` is read‑only. Do not write there.
|
11
|
+
- **Soft delete sentinel:** Files marked with `_DELETE_` in the name are considered deleted. Do not read/write them.
|
12
|
+
- **Text files only.** Binary-ish files are ignored by scans. Treat this KB as UTF‑8 text storage.
|
13
|
+
- **Concurrency:** Writes are serialized per file; still prefer read‑verify‑write sequences.
|
14
|
+
|
15
|
+
Constants (baked into the server):
|
16
|
+
- Protected folder: `.data`
|
17
|
+
- Documentation file name: `KNOWLEDBASE_DOC.md`
|
18
|
+
- Delete sentinel: `_DELETE_`
|
19
|
+
|
20
|
+
---
|
21
|
+
|
22
|
+
## Tools You Can Call
|
23
|
+
|
24
|
+
All tool names and parameter contracts are stable. Stick to these shapes.
|
25
|
+
|
26
|
+
### `create_file(path: str, content: str) -> str`
|
27
|
+
- Create or **overwrite** a text file at `path` with `content`.
|
28
|
+
- `path` must be **relative** and **outside** `.data/`.
|
29
|
+
|
30
|
+
### `read_file(path: str, start_line?: int, end_line?: int) -> { path, start_line, end_line, content }`
|
31
|
+
- Read full file or a 1‑based inclusive slice.
|
32
|
+
- If both bounds omitted ⇒ full file. If one bound omitted ⇒ server fills it.
|
33
|
+
|
34
|
+
### `append_file(path: str, content: str) -> str`
|
35
|
+
- Append text. If file is missing, it will be **created**.
|
36
|
+
|
37
|
+
### `regex_replace(path: str, pattern: str, replacement: str) -> { replacements: int }`
|
38
|
+
- Multiline regex (`re.MULTILINE`). Returns count. Always `read_file` afterwards to verify.
|
39
|
+
|
40
|
+
### `delete(path: str) -> str`
|
41
|
+
- **Soft delete**: renames `name.ext` to `name_DELETE_.ext`. Use when content is obsolete.
|
42
|
+
|
43
|
+
### `search(query: str, limit: int = 5) -> [{ path, line, context: string[] }]`
|
44
|
+
- Returns up to `limit` matches with short context.
|
45
|
+
- If Chroma mirroring is active, results are **semantic** first; otherwise plain scan.
|
46
|
+
- `limit` must be **> 0**.
|
47
|
+
|
48
|
+
### `overview() -> str`
|
49
|
+
- A deterministic `tree`-like view of active files under root (skips deleted and binaries).
|
50
|
+
|
51
|
+
### `documentation() -> str`
|
52
|
+
- Human usage guide (not this manual). For you, prefer this manual.
|
53
|
+
|
54
|
+
---
|
55
|
+
|
56
|
+
## How to Work Effectively
|
57
|
+
|
58
|
+
### 1) Discover
|
59
|
+
- Call `overview()` to understand the tree.
|
60
|
+
- If you need conventions or human guidelines, read `documentation()` (optional).
|
61
|
+
|
62
|
+
### 2) Locate Content
|
63
|
+
- Prefer `search("keywords", limit=5)` to find candidate files/snippets.
|
64
|
+
- Examine each `{path, line, context}`. The `context` is a short window around the hit.
|
65
|
+
- If results look thin, **increase `limit`** (e.g., 10–20) before broadening the query.
|
66
|
+
|
67
|
+
### 3) Read Precisely
|
68
|
+
- Use `read_file(path)` for the full file when structure matters.
|
69
|
+
- If the file is large but you know the region, use `read_file(path, start_line, end_line)` to minimize tokens.
|
70
|
+
|
71
|
+
### 4) Create New Knowledge
|
72
|
+
- Pick a **descriptive relative path** (folders based on topic, kebab‑case names).
|
73
|
+
- Example: `architecture/decision-records/adr-2025-10-06-edge-cache.md`
|
74
|
+
- Call `create_file(path, content)`.
|
75
|
+
- Keep the **title as the first Markdown heading** so search has context.
|
76
|
+
- Link related files with **relative Markdown links**.
|
77
|
+
|
78
|
+
### 5) Update Safely
|
79
|
+
- For small edits:
|
80
|
+
1) `read_file(...)` to confirm current state.
|
81
|
+
2) `regex_replace(path, pattern, replacement)` for targeted changes.
|
82
|
+
3) `read_file(...)` again to verify.
|
83
|
+
- For additive changes: `append_file(path, "\n...")`.
|
84
|
+
|
85
|
+
### 6) Deletion Policy
|
86
|
+
- Use `delete(path)` to **soft-delete**. Do not operate on files that already include `_DELETE_` in their name.
|
87
|
+
|
88
|
+
---
|
89
|
+
|
90
|
+
## Search Semantics (important)
|
91
|
+
|
92
|
+
- When Chroma ingestion is **enabled**, `search()` uses semantic ranking first and returns the **best slice per file** (the ingestor extracts one representative match per document chunk/file). If no obvious line match is found, you may get a **top-of-file preview** — then call `read_file()` to confirm.
|
93
|
+
- When Chroma is **not** enabled, `search()` scans files literally and returns all matches up to `limit`.
|
94
|
+
- Always **validate** by fetching the file segment with `read_file()` before making edits.
|
95
|
+
|
96
|
+
---
|
97
|
+
|
98
|
+
## Parameter Contracts and Gotchas
|
99
|
+
|
100
|
+
- `path` must be **relative** (e.g., `notes/today.md`). Absolute paths are rejected.
|
101
|
+
- Do **not** write into `.data/` (protected). Reads are allowed there.
|
102
|
+
- Line numbers in `read_file` are **1‑based** and the interval is **inclusive**.
|
103
|
+
- `regex_replace` uses Python’s `re.MULTILINE`. Validate your pattern; avoid overly broad substitutions.
|
104
|
+
- `append_file` will create a file if missing (useful for logs/progress notes).
|
105
|
+
|
106
|
+
---
|
107
|
+
|
108
|
+
## Typical Recipes
|
109
|
+
|
110
|
+
**Find → Read → Edit**
|
111
|
+
1. `search("beta feature toggle", limit=10)`
|
112
|
+
2. Pick a result: `read_file("features/toggles.md", 40, 80)`
|
113
|
+
3. Adjust: `regex_replace("features/toggles.md", "^Status:.*$", "Status: Enabled")`
|
114
|
+
4. Verify: `read_file("features/toggles.md")` (check the `Status:` header)
|
115
|
+
|
116
|
+
**Add a new doc**
|
117
|
+
1. `create_file("ops/runbooks/cache-invalidation.md", "# Cache Invalidation\n\n…")`
|
118
|
+
2. Optionally link it from an index: `append_file("ops/README.md", "\n- [Cache Invalidation](runbooks/cache-invalidation.md)")`
|
119
|
+
|
120
|
+
**Soft delete an obsolete note**
|
121
|
+
1. `delete("notes/old-incident.md")`
|
122
|
+
|
123
|
+
---
|
124
|
+
|
125
|
+
## Error Recovery
|
126
|
+
|
127
|
+
- **"Absolute paths are not permitted"** → Use a **relative** path.
|
128
|
+
- **"Writes are not allowed inside the protected folder '.data'"** → Choose a different folder (e.g., `docs/`).
|
129
|
+
- **"File 'X' does not exist"** on delete → Confirm with `overview()` or `search()`. Only existing non‑deleted files can be soft‑deleted.
|
130
|
+
- **No search hits** → Widen keywords, increase `limit`, or pivot to `overview()` to eyeball likely locations.
|
131
|
+
|
132
|
+
---
|
133
|
+
|
134
|
+
## Things You Should Not Do
|
135
|
+
|
136
|
+
- Do not fabricate file contents or paths. Always confirm with `overview()`, `search()`, and `read_file()`.
|
137
|
+
- Do not operate on files that include `_DELETE_` in their name.
|
138
|
+
- Do not attempt to talk directly to Chroma; you only use `search()`. Indexing is handled automatically after writes.
|
139
|
+
- Do not write binary or non‑UTF‑8 content.
|
140
|
+
|
141
|
+
---
|
142
|
+
|
143
|
+
## Performance Hints
|
144
|
+
|
145
|
+
- Prefer `search()` + targeted `read_file()` slices over reading entire large files.
|
146
|
+
- Keep `limit` modest (5–10) unless you must broaden the search.
|
147
|
+
- Batch edits in one file using a single `regex_replace` when safe (then verify).
|
148
|
+
|
149
|
+
---
|
150
|
+
|
151
|
+
You now have the minimal contract to operate this KB safely and efficiently.
|
mcp_kb/ingest/chroma.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
"""Integration layer that mirrors knowledge base updates into ChromaDB."""
|
2
|
+
|
2
3
|
from __future__ import annotations
|
3
4
|
|
4
5
|
import importlib
|
@@ -25,6 +26,7 @@ if TYPE_CHECKING: # pragma: no cover - type checking only imports
|
|
25
26
|
SUPPORTED_CLIENTS: Tuple[str, ...] = ("off", "ephemeral", "persistent", "http", "cloud")
|
26
27
|
"""Recognised client types exposed to operators enabling Chroma ingestion."""
|
27
28
|
|
29
|
+
|
28
30
|
@dataclass(frozen=True)
|
29
31
|
class ChromaConfiguration:
|
30
32
|
"""Runtime configuration controlling how Chroma ingestion behaves.
|
@@ -106,7 +108,7 @@ class ChromaConfiguration:
|
|
106
108
|
if data_directory:
|
107
109
|
resolved_directory = Path(data_directory).expanduser().resolve()
|
108
110
|
elif normalized_type == "persistent":
|
109
|
-
resolved_directory = (root/DATA_FOLDER_NAME / "chroma").resolve()
|
111
|
+
resolved_directory = (root / DATA_FOLDER_NAME / "chroma").resolve()
|
110
112
|
else:
|
111
113
|
resolved_directory = None
|
112
114
|
|
@@ -144,7 +146,9 @@ class ChromaConfiguration:
|
|
144
146
|
raise ValueError("Persistent Chroma client requires a data directory")
|
145
147
|
|
146
148
|
if self.client_type == "http" and not self.host:
|
147
|
-
raise ValueError(
|
149
|
+
raise ValueError(
|
150
|
+
"HTTP Chroma client requires --chroma-host or MCP_KB_CHROMA_HOST"
|
151
|
+
)
|
148
152
|
|
149
153
|
if self.client_type == "cloud":
|
150
154
|
missing = [
|
@@ -203,7 +207,9 @@ def _load_dependencies() -> _ChromaDependencies:
|
|
203
207
|
if hasattr(embedding_module, attr):
|
204
208
|
factories[alias] = getattr(embedding_module, attr)
|
205
209
|
if not factories:
|
206
|
-
raise RuntimeError(
|
210
|
+
raise RuntimeError(
|
211
|
+
"No embedding functions were found in chromadb.utils.embedding_functions"
|
212
|
+
)
|
207
213
|
|
208
214
|
return _ChromaDependencies(
|
209
215
|
chroma_module=chroma_module,
|
@@ -236,14 +242,14 @@ class ChromaIngestor(KnowledgeBaseListener, KnowledgeBaseReindexListener):
|
|
236
242
|
self._client = self._create_client()
|
237
243
|
self._collection = self._ensure_collection()
|
238
244
|
self.textsplitter = TokenTextSplitter(
|
239
|
-
chunk_size=200,
|
240
|
-
chunk_overlap=20,
|
241
|
-
add_start_index=True
|
245
|
+
chunk_size=200, chunk_overlap=20, add_start_index=True
|
242
246
|
)
|
243
247
|
|
244
|
-
def get_document_chunks(
|
248
|
+
def get_document_chunks(
|
249
|
+
self, document_id: str, include: List[str] = ["metadatas", "documents"]
|
250
|
+
) -> GetResult:
|
245
251
|
"""Get a document from the Chroma index."""
|
246
|
-
return self._collection.get(where={"document_id": document_id},include=include)
|
252
|
+
return self._collection.get(where={"document_id": document_id}, include=include)
|
247
253
|
|
248
254
|
def handle_upsert(self, event: FileUpsertEvent) -> None:
|
249
255
|
"""Upsert ``event`` into the configured Chroma collection.
|
@@ -263,7 +269,9 @@ class ChromaIngestor(KnowledgeBaseListener, KnowledgeBaseReindexListener):
|
|
263
269
|
|
264
270
|
def delete_document(self, document_id: str) -> None:
|
265
271
|
"""Delete a document from the Chroma index."""
|
266
|
-
self._collection.delete(
|
272
|
+
self._collection.delete(
|
273
|
+
ids=self.get_document_chunks(document_id, include=[])["ids"]
|
274
|
+
)
|
267
275
|
|
268
276
|
def handle_delete(self, event: FileDeleteEvent) -> None:
|
269
277
|
"""Remove documents associated with ``event`` from the Chroma index.
|
@@ -365,7 +373,9 @@ class ChromaIngestor(KnowledgeBaseListener, KnowledgeBaseReindexListener):
|
|
365
373
|
continue
|
366
374
|
|
367
375
|
lines = text.splitlines()
|
368
|
-
file_matches = self._extract_matches_from_lines(
|
376
|
+
file_matches = self._extract_matches_from_lines(
|
377
|
+
candidate, lines, query, context_lines
|
378
|
+
)
|
369
379
|
if file_matches:
|
370
380
|
matches.append(file_matches[0])
|
371
381
|
elif lines:
|
@@ -410,23 +420,29 @@ class ChromaIngestor(KnowledgeBaseListener, KnowledgeBaseReindexListener):
|
|
410
420
|
pass
|
411
421
|
|
412
422
|
payload_metadata = dict(metadata)
|
413
|
-
payload_metadata[
|
423
|
+
payload_metadata["document_id"] = document_id
|
414
424
|
|
415
425
|
# splitting
|
416
426
|
|
417
427
|
split_docs = self.textsplitter.create_documents([content])
|
418
|
-
|
428
|
+
|
419
429
|
for i, d in enumerate(split_docs):
|
420
430
|
d.metadata.update(payload_metadata)
|
421
|
-
d.metadata[
|
422
|
-
d.metadata[
|
423
|
-
|
424
|
-
|
431
|
+
d.metadata["chunk_number"] = i
|
432
|
+
d.metadata["startline"] = len(
|
433
|
+
content[: d.metadata["start_index"]].splitlines()
|
434
|
+
)
|
435
|
+
d.metadata["endline"] = (
|
436
|
+
d.metadata["startline"] + len(d.page_content.splitlines()) - 1
|
437
|
+
)
|
425
438
|
|
426
439
|
self._collection.add(
|
427
440
|
documents=[d.page_content for d in split_docs],
|
428
441
|
metadatas=[d.metadata for d in split_docs],
|
429
|
-
ids=[
|
442
|
+
ids=[
|
443
|
+
f"{d.metadata['document_id']}-{d.metadata['chunk_number']}"
|
444
|
+
for d in split_docs
|
445
|
+
],
|
430
446
|
)
|
431
447
|
|
432
448
|
# Optional full reindex -----------------------------------------------------
|
@@ -454,7 +470,11 @@ class ChromaIngestor(KnowledgeBaseListener, KnowledgeBaseReindexListener):
|
|
454
470
|
|
455
471
|
count = 0
|
456
472
|
root = kb.rules.root
|
457
|
-
with tqdm(
|
473
|
+
with tqdm(
|
474
|
+
kb.iter_active_files(include_docs=False),
|
475
|
+
desc="Reindexing Chroma",
|
476
|
+
total=kb.total_active_files(include_docs=False),
|
477
|
+
) as pbar:
|
458
478
|
for path in pbar:
|
459
479
|
pbar.set_description(f"Reindexing Chroma {path.name}")
|
460
480
|
try:
|
@@ -527,7 +547,9 @@ class ChromaIngestor(KnowledgeBaseListener, KnowledgeBaseReindexListener):
|
|
527
547
|
config = self.configuration
|
528
548
|
|
529
549
|
if not config.enabled:
|
530
|
-
raise RuntimeError(
|
550
|
+
raise RuntimeError(
|
551
|
+
"ChromaIngestor cannot be constructed when ingestion is disabled"
|
552
|
+
)
|
531
553
|
|
532
554
|
if config.client_type == "ephemeral":
|
533
555
|
return chroma.EphemeralClient()
|
mcp_kb/knowledge/bootstrap.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
"""Bootstrap helpers executed during server startup."""
|
2
|
+
|
2
3
|
from __future__ import annotations
|
3
4
|
|
4
5
|
import importlib.resources as resources
|
@@ -33,7 +34,11 @@ def install_default_documentation(root: Path) -> Path:
|
|
33
34
|
|
34
35
|
docs_dir.mkdir(parents=True, exist_ok=True)
|
35
36
|
|
36
|
-
with
|
37
|
+
with (
|
38
|
+
resources.files("mcp_kb.data")
|
39
|
+
.joinpath("KNOWLEDBASE_DOC.md")
|
40
|
+
.open("r", encoding="utf-8") as source
|
41
|
+
):
|
37
42
|
doc_path.write_text(source.read(), encoding="utf-8")
|
38
43
|
|
39
44
|
return doc_path
|
mcp_kb/knowledge/events.py
CHANGED
@@ -7,6 +7,7 @@ coupling the core filesystem logic to specific backends. Each event captures bot
|
|
7
7
|
absolute and knowledge-base-relative paths so that listeners can decide which
|
8
8
|
identifier best fits their storage requirements.
|
9
9
|
"""
|
10
|
+
|
10
11
|
from __future__ import annotations
|
11
12
|
|
12
13
|
from dataclasses import dataclass
|
mcp_kb/knowledge/search.py
CHANGED
@@ -5,6 +5,7 @@ can evolve independently. Search often benefits from dedicated caching or
|
|
5
5
|
indexing strategies; keeping it in its own module means the server can swap the
|
6
6
|
implementation later without changing the core file lifecycle API.
|
7
7
|
"""
|
8
|
+
|
8
9
|
from __future__ import annotations
|
9
10
|
|
10
11
|
from dataclasses import dataclass
|
@@ -147,7 +148,9 @@ def read_documentation(kb: KnowledgeBase) -> str:
|
|
147
148
|
return doc_path.read_text(encoding="utf-8")
|
148
149
|
|
149
150
|
|
150
|
-
def _extract_matches_for_path(
|
151
|
+
def _extract_matches_for_path(
|
152
|
+
path: Path, query: str, context_lines: int
|
153
|
+
) -> List[SearchMatch]:
|
151
154
|
"""Read ``path`` and return every match that contains ``query``."""
|
152
155
|
|
153
156
|
lines = path.read_text(encoding="utf-8").splitlines()
|
mcp_kb/knowledge/store.py
CHANGED
@@ -7,6 +7,7 @@ security constraints defined in the PRD. Each method returns plain Python data
|
|
7
7
|
structures so that higher-level layers (e.g., JSON-RPC handlers) can focus on
|
8
8
|
protocol serialization rather than filesystem minutiae.
|
9
9
|
"""
|
10
|
+
|
10
11
|
from __future__ import annotations
|
11
12
|
|
12
13
|
import re
|
@@ -15,7 +16,11 @@ from pathlib import Path
|
|
15
16
|
from typing import Iterable, Optional
|
16
17
|
|
17
18
|
from mcp_kb.config import DELETE_SENTINEL, DATA_FOLDER_NAME
|
18
|
-
from mcp_kb.knowledge.events import
|
19
|
+
from mcp_kb.knowledge.events import (
|
20
|
+
FileDeleteEvent,
|
21
|
+
FileUpsertEvent,
|
22
|
+
KnowledgeBaseListener,
|
23
|
+
)
|
19
24
|
from mcp_kb.security.path_validation import (
|
20
25
|
PathRules,
|
21
26
|
ensure_write_allowed,
|
@@ -185,10 +190,9 @@ class KnowledgeBase:
|
|
185
190
|
original_relative = self._relative_path(normalized)
|
186
191
|
self._notify_delete(target, original_relative)
|
187
192
|
return target
|
188
|
-
|
193
|
+
|
189
194
|
def total_active_files(self, include_docs: bool = False) -> int:
|
190
|
-
"""Return the total number of non-deleted UTF-8 text files under the root directory.
|
191
|
-
"""
|
195
|
+
"""Return the total number of non-deleted UTF-8 text files under the root directory."""
|
192
196
|
return sum(1 for _ in self.iter_active_files(include_docs=include_docs))
|
193
197
|
|
194
198
|
def iter_active_files(self, include_docs: bool = False) -> Iterable[Path]:
|
@@ -250,7 +254,9 @@ class KnowledgeBase:
|
|
250
254
|
event = FileDeleteEvent(absolute_path=absolute, relative_path=relative)
|
251
255
|
self._dispatch("handle_delete", event)
|
252
256
|
|
253
|
-
def _dispatch(
|
257
|
+
def _dispatch(
|
258
|
+
self, method_name: str, event: FileUpsertEvent | FileDeleteEvent
|
259
|
+
) -> None:
|
254
260
|
"""Call ``method_name`` on every listener and wrap failures for clarity."""
|
255
261
|
|
256
262
|
for listener in self.listeners:
|
@@ -7,6 +7,7 @@ that target the reserved documentation folder. The helper functions are written
|
|
7
7
|
so they can be reused both by the server runtime and by unit tests to keep the
|
8
8
|
security rules consistent.
|
9
9
|
"""
|
10
|
+
|
10
11
|
from __future__ import annotations
|
11
12
|
|
12
13
|
from dataclasses import dataclass
|
@@ -69,13 +70,17 @@ def normalize_path(candidate: str, rules: PathRules) -> Path:
|
|
69
70
|
|
70
71
|
path_obj = Path(candidate)
|
71
72
|
if path_obj.is_absolute():
|
72
|
-
raise PathValidationError(
|
73
|
+
raise PathValidationError(
|
74
|
+
"Absolute paths are not permitted inside the knowledge base"
|
75
|
+
)
|
73
76
|
|
74
77
|
normalized = (rules.root / path_obj).resolve()
|
75
78
|
try:
|
76
79
|
normalized.relative_to(rules.root)
|
77
80
|
except ValueError as exc:
|
78
|
-
raise PathValidationError(
|
81
|
+
raise PathValidationError(
|
82
|
+
"Path resolves outside the knowledge base root"
|
83
|
+
) from exc
|
79
84
|
|
80
85
|
if DELETE_SENTINEL in normalized.name:
|
81
86
|
raise PathValidationError("Operations on soft-deleted files are not permitted")
|
mcp_kb/server/app.py
CHANGED
@@ -5,6 +5,7 @@ operations defined elsewhere in the package. Using FastMCP drastically reduces
|
|
5
5
|
protocol boilerplate because the framework introspects type hints and
|
6
6
|
Docstrings to generate MCP-compatible tool schemas automatically.
|
7
7
|
"""
|
8
|
+
|
8
9
|
from __future__ import annotations
|
9
10
|
|
10
11
|
from dataclasses import dataclass
|
@@ -90,8 +91,8 @@ def create_fastmcp_app(
|
|
90
91
|
"mcp-knowledge-base",
|
91
92
|
instructions=(
|
92
93
|
"You are connected to a local text-based knowledge base. Use the provided "
|
93
|
-
"tools to create, inspect, and organize content
|
94
|
-
"
|
94
|
+
"tools to create, inspect, and organize content and search the knowledgebase for information.\n"
|
95
|
+
"Call the documentation tool first to get the latest documentation."
|
95
96
|
),
|
96
97
|
**fastmcp_kwargs,
|
97
98
|
)
|
@@ -107,11 +108,15 @@ def create_fastmcp_app(
|
|
107
108
|
return f"Created {created}"
|
108
109
|
|
109
110
|
@mcp.tool(name="read_file", title="Read File", structured_output=True)
|
110
|
-
def read_file(
|
111
|
+
def read_file(
|
112
|
+
path: str, start_line: int | None = None, end_line: int | None = None
|
113
|
+
) -> ReadFileResult:
|
111
114
|
"""Read a text file returning metadata about the extracted segment."""
|
112
115
|
|
113
116
|
try:
|
114
|
-
segment: FileSegment = kb.read_file(
|
117
|
+
segment: FileSegment = kb.read_file(
|
118
|
+
path, start_line=start_line, end_line=end_line
|
119
|
+
)
|
115
120
|
except PathValidationError as exc:
|
116
121
|
raise ValueError(str(exc)) from exc
|
117
122
|
except FileNotFoundError as exc:
|
@@ -176,7 +181,11 @@ def create_fastmcp_app(
|
|
176
181
|
)
|
177
182
|
return [
|
178
183
|
SearchMatchResult(
|
179
|
-
path=str(
|
184
|
+
path=str(
|
185
|
+
match.path.relative_to(kb.rules.root)
|
186
|
+
if match.path.is_absolute()
|
187
|
+
else match.path
|
188
|
+
),
|
180
189
|
line=match.line_number,
|
181
190
|
context=match.context,
|
182
191
|
)
|
mcp_kb/utils/filesystem.py
CHANGED
@@ -6,6 +6,7 @@ such as validating incoming requests and shaping responses. Each helper function
|
|
6
6
|
is intentionally small so that callers can compose them for different workflows
|
7
7
|
without duplicating the low-level boilerplate.
|
8
8
|
"""
|
9
|
+
|
9
10
|
from __future__ import annotations
|
10
11
|
|
11
12
|
from contextlib import contextmanager
|
@@ -0,0 +1,26 @@
|
|
1
|
+
mcp_kb/__init__.py,sha256=Ry7qODhfFQF6u6p2m3bwGWhB0-BdWTQcHDJB7NBYAio,74
|
2
|
+
mcp_kb/config.py,sha256=NUpzjDH4PQw4FyjGgYUcMsGMeenNiZTMrQj4U62xKlk,2530
|
3
|
+
mcp_kb/cli/__init__.py,sha256=dEIRWFycAfPkha1S1Bj_Y6zkvEZv4eF0qtbF9t74r60,67
|
4
|
+
mcp_kb/cli/args.py,sha256=YVO7teHGuk2Yc36Sqwtv457dgrRd_YB7YN5wwFLKSXs,5371
|
5
|
+
mcp_kb/cli/main.py,sha256=twwBKRyS21IPolkLyM2gDvFBko3QpR9_ho-NJ1uXcR4,3912
|
6
|
+
mcp_kb/cli/reindex.py,sha256=TAcBtjsEJ1wSyB8iOUWp8I7PHVOEuNFMe0-Mc7mk21Y,3047
|
7
|
+
mcp_kb/data/KNOWLEDBASE_DOC.md,sha256=lZqoSRQuIs7nN0UD5GJOnE6B7XvU3gywrBoY6ToK-pE,6582
|
8
|
+
mcp_kb/data/__init__.py,sha256=UYYuO_n2ikjpwkPSykgleiifYvC0V8_O-atUaRBQUm4,70
|
9
|
+
mcp_kb/ingest/__init__.py,sha256=8obrvfa8nLNLYPbi1MHlFUqfoFHgK9YfdryPzAXQ6kU,77
|
10
|
+
mcp_kb/ingest/chroma.py,sha256=KCNySeUpCy-FEHglacE1hh8WpgcDqbZ-UTTkFChFAuA,22436
|
11
|
+
mcp_kb/knowledge/__init__.py,sha256=W_dtRbtnQlrDJ_425vWR8BcoZGJ8gC5-wg1De1E654s,76
|
12
|
+
mcp_kb/knowledge/bootstrap.py,sha256=Og72GvxeJX7PLe_vHVMzRqnXIS06JswSrIdKcz776_8,1237
|
13
|
+
mcp_kb/knowledge/events.py,sha256=V-64uBbJZdKm8mwcbeOMtSC8VZW5NoN1DwUtcwfOFVc,3550
|
14
|
+
mcp_kb/knowledge/search.py,sha256=Qx2AxuQ1h0hdOMX9du9I6-yStzOlvIiB9d7F6fFGOf4,5908
|
15
|
+
mcp_kb/knowledge/store.py,sha256=eTBtCTTkyGizXoEz4cO_YGUmR26fmUXhyCSLrfZ3-CY,10021
|
16
|
+
mcp_kb/security/__init__.py,sha256=lF8_XAjzpwhAFresuskXMo0u9v7KFiTJId88wqOAM4Y,62
|
17
|
+
mcp_kb/security/path_validation.py,sha256=3f-0De4801-cMU4uwi1QM6NalAj4IU_hj8r-iK0NJ_k,3662
|
18
|
+
mcp_kb/server/__init__.py,sha256=j9TmxW_WLCoibyQvCsDT1MIuUqSL8sRh2h4u0M4eU0c,74
|
19
|
+
mcp_kb/server/app.py,sha256=KAnOXT-7TdKqt_uW_vobc6RLq8YWRzahTQSGl8vGZy8,7287
|
20
|
+
mcp_kb/utils/__init__.py,sha256=lKhRsjgnbhye1sSlch1_wsAI3eWKE1M6RVIiNlnsvLI,71
|
21
|
+
mcp_kb/utils/filesystem.py,sha256=1Jr9cxIimV-o91DJMh5lR9GLFE3BDknoGquVBFQ-fd4,4027
|
22
|
+
mcp_kb-0.2.1.dist-info/METADATA,sha256=53sKD_Z4cBkBFsCC36giXr4LsJGyoPMCTroHez2XZi4,5122
|
23
|
+
mcp_kb-0.2.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
24
|
+
mcp_kb-0.2.1.dist-info/entry_points.txt,sha256=qwJkR3vV7ZeydfS_IYMiDwLv4BdTkrOf4-5neWj25g0,96
|
25
|
+
mcp_kb-0.2.1.dist-info/top_level.txt,sha256=IBiz3TNE3FF3TwkbCZpC1kkk6ohTwtBQNSPJNV3-qGA,7
|
26
|
+
mcp_kb-0.2.1.dist-info/RECORD,,
|
mcp_kb-0.2.0.dist-info/RECORD
DELETED
@@ -1,26 +0,0 @@
|
|
1
|
-
mcp_kb/__init__.py,sha256=Ry7qODhfFQF6u6p2m3bwGWhB0-BdWTQcHDJB7NBYAio,74
|
2
|
-
mcp_kb/config.py,sha256=VallCc3_Bjcm2FPElthupvXdbMuXvDkiTkQKj0f4dkQ,2506
|
3
|
-
mcp_kb/cli/__init__.py,sha256=dEIRWFycAfPkha1S1Bj_Y6zkvEZv4eF0qtbF9t74r60,67
|
4
|
-
mcp_kb/cli/args.py,sha256=0yU5lwjjUkgk91ksocqOdpqO_u5JU6xuCaayiOJ-5pQ,5371
|
5
|
-
mcp_kb/cli/main.py,sha256=FMsnWcXmEsXXfETyvPMP2il9jREGFWmR8t23-6QfhMo,3864
|
6
|
-
mcp_kb/cli/reindex.py,sha256=UBBN7_u9rcgGXel5FKnnA3yd7a-AQMwHMwQjt7ZFrSs,3033
|
7
|
-
mcp_kb/data/KNOWLEDBASE_DOC.md,sha256=bkSpdK1W3F0KR6d3q4V_23fnY8Kw2IBjXPvTTRv06AI,1663
|
8
|
-
mcp_kb/data/__init__.py,sha256=UYYuO_n2ikjpwkPSykgleiifYvC0V8_O-atUaRBQUm4,70
|
9
|
-
mcp_kb/ingest/__init__.py,sha256=8obrvfa8nLNLYPbi1MHlFUqfoFHgK9YfdryPzAXQ6kU,77
|
10
|
-
mcp_kb/ingest/chroma.py,sha256=3Kt7or1Z9ng-wBeXeuOPhrVIfjokwkF25jKHdAeYSH8,22170
|
11
|
-
mcp_kb/knowledge/__init__.py,sha256=W_dtRbtnQlrDJ_425vWR8BcoZGJ8gC5-wg1De1E654s,76
|
12
|
-
mcp_kb/knowledge/bootstrap.py,sha256=WlbJUXhxglyWjlvwhUdT20oijLNLaZOePQ6nYwfBCxk,1202
|
13
|
-
mcp_kb/knowledge/events.py,sha256=A7CfD7U5bxo6mCCIic83yE7VPixAN05ssw_HKRF2zxw,3549
|
14
|
-
mcp_kb/knowledge/search.py,sha256=AKsyNipsA8bfRxIJb49tdsU4ICzbHeFrA1Ikvlk1u7w,5901
|
15
|
-
mcp_kb/knowledge/store.py,sha256=urZaLrSRjgqxb_hLC6RQjDeNw8Id14ripQH_6HdWb3o,10002
|
16
|
-
mcp_kb/security/__init__.py,sha256=lF8_XAjzpwhAFresuskXMo0u9v7KFiTJId88wqOAM4Y,62
|
17
|
-
mcp_kb/security/path_validation.py,sha256=21bfKdxjHY-ywDYw0DcGCeXDnvdXDILWVuueUsuuZUM,3617
|
18
|
-
mcp_kb/server/__init__.py,sha256=j9TmxW_WLCoibyQvCsDT1MIuUqSL8sRh2h4u0M4eU0c,74
|
19
|
-
mcp_kb/server/app.py,sha256=7s10UJWFopJ4CZPqZ4briTKsfjDiRo44ZbLAeWb6Lj8,7064
|
20
|
-
mcp_kb/utils/__init__.py,sha256=lKhRsjgnbhye1sSlch1_wsAI3eWKE1M6RVIiNlnsvLI,71
|
21
|
-
mcp_kb/utils/filesystem.py,sha256=0M-Waf2vfqhp8UL__2Emfpwpoqxshti3M7XLjXnpjJw,4026
|
22
|
-
mcp_kb-0.2.0.dist-info/METADATA,sha256=AjAH4xcztes0PzJkjW3J9qyes0VDXS7jfsJFQMq2s1g,5122
|
23
|
-
mcp_kb-0.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
24
|
-
mcp_kb-0.2.0.dist-info/entry_points.txt,sha256=qwJkR3vV7ZeydfS_IYMiDwLv4BdTkrOf4-5neWj25g0,96
|
25
|
-
mcp_kb-0.2.0.dist-info/top_level.txt,sha256=IBiz3TNE3FF3TwkbCZpC1kkk6ohTwtBQNSPJNV3-qGA,7
|
26
|
-
mcp_kb-0.2.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|