mcp-kb 0.2.0__tar.gz → 0.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. {mcp_kb-0.2.0 → mcp_kb-0.2.1}/PKG-INFO +1 -1
  2. {mcp_kb-0.2.0 → mcp_kb-0.2.1}/mcp_kb/cli/main.py +9 -2
  3. {mcp_kb-0.2.0 → mcp_kb-0.2.1}/mcp_kb/cli/reindex.py +4 -2
  4. {mcp_kb-0.2.0 → mcp_kb-0.2.1}/mcp_kb/config.py +7 -5
  5. mcp_kb-0.2.1/mcp_kb/data/KNOWLEDBASE_DOC.md +151 -0
  6. {mcp_kb-0.2.0 → mcp_kb-0.2.1}/mcp_kb/ingest/chroma.py +41 -19
  7. {mcp_kb-0.2.0 → mcp_kb-0.2.1}/mcp_kb/knowledge/bootstrap.py +6 -1
  8. {mcp_kb-0.2.0 → mcp_kb-0.2.1}/mcp_kb/knowledge/events.py +1 -0
  9. {mcp_kb-0.2.0 → mcp_kb-0.2.1}/mcp_kb/knowledge/search.py +4 -1
  10. {mcp_kb-0.2.0 → mcp_kb-0.2.1}/mcp_kb/knowledge/store.py +11 -5
  11. {mcp_kb-0.2.0 → mcp_kb-0.2.1}/mcp_kb/security/path_validation.py +7 -2
  12. {mcp_kb-0.2.0 → mcp_kb-0.2.1}/mcp_kb/server/app.py +14 -5
  13. {mcp_kb-0.2.0 → mcp_kb-0.2.1}/mcp_kb/utils/filesystem.py +1 -0
  14. {mcp_kb-0.2.0 → mcp_kb-0.2.1}/mcp_kb.egg-info/PKG-INFO +1 -1
  15. {mcp_kb-0.2.0 → mcp_kb-0.2.1}/pyproject.toml +1 -1
  16. mcp_kb-0.2.0/mcp_kb/data/KNOWLEDBASE_DOC.md +0 -36
  17. {mcp_kb-0.2.0 → mcp_kb-0.2.1}/README.md +0 -0
  18. {mcp_kb-0.2.0 → mcp_kb-0.2.1}/mcp_kb/__init__.py +0 -0
  19. {mcp_kb-0.2.0 → mcp_kb-0.2.1}/mcp_kb/cli/__init__.py +0 -0
  20. {mcp_kb-0.2.0 → mcp_kb-0.2.1}/mcp_kb/cli/args.py +1 -1
  21. {mcp_kb-0.2.0 → mcp_kb-0.2.1}/mcp_kb/data/__init__.py +0 -0
  22. {mcp_kb-0.2.0 → mcp_kb-0.2.1}/mcp_kb/ingest/__init__.py +0 -0
  23. {mcp_kb-0.2.0 → mcp_kb-0.2.1}/mcp_kb/knowledge/__init__.py +0 -0
  24. {mcp_kb-0.2.0 → mcp_kb-0.2.1}/mcp_kb/security/__init__.py +0 -0
  25. {mcp_kb-0.2.0 → mcp_kb-0.2.1}/mcp_kb/server/__init__.py +0 -0
  26. {mcp_kb-0.2.0 → mcp_kb-0.2.1}/mcp_kb/utils/__init__.py +0 -0
  27. {mcp_kb-0.2.0 → mcp_kb-0.2.1}/mcp_kb.egg-info/SOURCES.txt +0 -0
  28. {mcp_kb-0.2.0 → mcp_kb-0.2.1}/mcp_kb.egg-info/dependency_links.txt +0 -0
  29. {mcp_kb-0.2.0 → mcp_kb-0.2.1}/mcp_kb.egg-info/entry_points.txt +0 -0
  30. {mcp_kb-0.2.0 → mcp_kb-0.2.1}/mcp_kb.egg-info/requires.txt +0 -0
  31. {mcp_kb-0.2.0 → mcp_kb-0.2.1}/mcp_kb.egg-info/top_level.txt +0 -0
  32. {mcp_kb-0.2.0 → mcp_kb-0.2.1}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mcp-kb
3
- Version: 0.2.0
3
+ Version: 0.2.1
4
4
  Summary: MCP server exposing a local markdown knowledge base
5
5
  Author: LLM Maintainer
6
6
  Requires-Python: >=3.11
@@ -1,4 +1,5 @@
1
1
  """Command line interface for running the MCP knowledge base server."""
2
+
2
3
  from __future__ import annotations
3
4
 
4
5
  import argparse
@@ -100,9 +101,15 @@ def run_server(arguments: Iterable[str] | None = None) -> None:
100
101
  listeners=listeners,
101
102
  )
102
103
  transports = options.transports or ["stdio"]
103
- logger.info(f"Running server on {options.host}:{options.port} with transports {transports}")
104
+ logger.info(
105
+ f"Running server on {options.host}:{options.port} with transports {transports}"
106
+ )
104
107
  logger.info(f"Data root is {root_path}")
105
- print("--------------------------------",root_path,"--------------------------------")
108
+ print(
109
+ "--------------------------------",
110
+ root_path,
111
+ "--------------------------------",
112
+ )
106
113
  asyncio.run(_run_transports(server, transports))
107
114
 
108
115
 
@@ -5,6 +5,7 @@ ingestors and calls their ``reindex`` method when available, allowing operators
5
5
  to trigger a full rebuild of external indexes (e.g., Chroma) from the current
6
6
  filesystem state.
7
7
  """
8
+
8
9
  from __future__ import annotations
9
10
 
10
11
  import argparse
@@ -24,7 +25,9 @@ logger = logging.getLogger(__name__)
24
25
  def _build_argument_parser() -> argparse.ArgumentParser:
25
26
  """Return the argument parser for the reindex command."""
26
27
 
27
- parser = argparse.ArgumentParser(description="Reindex the knowledge base into configured backends")
28
+ parser = argparse.ArgumentParser(
29
+ description="Reindex the knowledge base into configured backends"
30
+ )
28
31
  parser.add_argument(
29
32
  "--root",
30
33
  dest="root",
@@ -88,4 +91,3 @@ def main() -> None:
88
91
 
89
92
  if __name__ == "__main__":
90
93
  main()
91
-
@@ -7,6 +7,7 @@ logic more reusable across different deployment environments because callers can
7
7
  swap configurations programmatically or via environment variables without
8
8
  modifying the core modules.
9
9
  """
10
+
10
11
  from __future__ import annotations
11
12
 
12
13
  from pathlib import Path
@@ -58,12 +59,13 @@ def resolve_knowledge_base_root(provided_path: str | None = None) -> Path:
58
59
  root directory.
59
60
  """
60
61
 
61
-
62
- candidate = provided_path or os.getenv(ENV_ROOT_KEY) or Path(
63
- os.getenv('WORKSPACE_FOLDER_PATHS') or Path.cwd()
64
- )/DEFAULT_KNOWLEDGE_BASE_DIR
62
+ candidate = (
63
+ provided_path
64
+ or os.getenv(ENV_ROOT_KEY)
65
+ or Path(os.getenv("WORKSPACE_FOLDER_PATHS") or Path.cwd())
66
+ / DEFAULT_KNOWLEDGE_BASE_DIR
67
+ )
65
68
  root_path = Path(candidate).expanduser().resolve()
66
69
  root_path.mkdir(parents=True, exist_ok=True)
67
70
 
68
-
69
71
  return root_path
@@ -0,0 +1,151 @@
1
+ # LLM Operating Manual — MCP Knowledge Base (`mcp-kb`)
2
+
3
+ You are connected to a **local, text-only knowledge base**. Your job is to **search, read, create, update, and soft-delete** UTF‑8 text files under a single root directory while respecting safety rules below. Use the provided MCP tools exactly as specified.
4
+
5
+ ---
6
+
7
+ ## Ground Rules (enforced by the server)
8
+
9
+ - **Paths are relative only.** Absolute paths are rejected. No `..` traversal.
10
+ - **Protected folder:** `.data/` is read‑only. Do not write there.
11
+ - **Soft delete sentinel:** Files marked with `_DELETE_` in the name are considered deleted. Do not read/write them.
12
+ - **Text files only.** Binary-ish files are ignored by scans. Treat this KB as UTF‑8 text storage.
13
+ - **Concurrency:** Writes are serialized per file; still prefer read‑verify‑write sequences.
14
+
15
+ Constants (baked into the server):
16
+ - Protected folder: `.data`
17
+ - Documentation file name: `KNOWLEDBASE_DOC.md`
18
+ - Delete sentinel: `_DELETE_`
19
+
20
+ ---
21
+
22
+ ## Tools You Can Call
23
+
24
+ All tool names and parameter contracts are stable. Stick to these shapes.
25
+
26
+ ### `create_file(path: str, content: str) -> str`
27
+ - Create or **overwrite** a text file at `path` with `content`.
28
+ - `path` must be **relative** and **outside** `.data/`.
29
+
30
+ ### `read_file(path: str, start_line?: int, end_line?: int) -> { path, start_line, end_line, content }`
31
+ - Read full file or a 1‑based inclusive slice.
32
+ - If both bounds omitted ⇒ full file. If one bound omitted ⇒ server fills it.
33
+
34
+ ### `append_file(path: str, content: str) -> str`
35
+ - Append text. If file is missing, it will be **created**.
36
+
37
+ ### `regex_replace(path: str, pattern: str, replacement: str) -> { replacements: int }`
38
+ - Multiline regex (`re.MULTILINE`). Returns count. Always `read_file` afterwards to verify.
39
+
40
+ ### `delete(path: str) -> str`
41
+ - **Soft delete**: renames `name.ext` to `name_DELETE_.ext`. Use when content is obsolete.
42
+
43
+ ### `search(query: str, limit: int = 5) -> [{ path, line, context: string[] }]`
44
+ - Returns up to `limit` matches with short context.
45
+ - If Chroma mirroring is active, results are **semantic** first; otherwise plain scan.
46
+ - `limit` must be **> 0**.
47
+
48
+ ### `overview() -> str`
49
+ - A deterministic `tree`-like view of active files under root (skips deleted and binaries).
50
+
51
+ ### `documentation() -> str`
52
+ - Human usage guide (not this manual). For you, prefer this manual.
53
+
54
+ ---
55
+
56
+ ## How to Work Effectively
57
+
58
+ ### 1) Discover
59
+ - Call `overview()` to understand the tree.
60
+ - If you need conventions or human guidelines, read `documentation()` (optional).
61
+
62
+ ### 2) Locate Content
63
+ - Prefer `search("keywords", limit=5)` to find candidate files/snippets.
64
+ - Examine each `{path, line, context}`. The `context` is a short window around the hit.
65
+ - If results look thin, **increase `limit`** (e.g., 10–20) before broadening the query.
66
+
67
+ ### 3) Read Precisely
68
+ - Use `read_file(path)` for the full file when structure matters.
69
+ - If the file is large but you know the region, use `read_file(path, start_line, end_line)` to minimize tokens.
70
+
71
+ ### 4) Create New Knowledge
72
+ - Pick a **descriptive relative path** (folders based on topic, kebab‑case names).
73
+ - Example: `architecture/decision-records/adr-2025-10-06-edge-cache.md`
74
+ - Call `create_file(path, content)`.
75
+ - Keep the **title as the first Markdown heading** so search has context.
76
+ - Link related files with **relative Markdown links**.
77
+
78
+ ### 5) Update Safely
79
+ - For small edits:
80
+ 1) `read_file(...)` to confirm current state.
81
+ 2) `regex_replace(path, pattern, replacement)` for targeted changes.
82
+ 3) `read_file(...)` again to verify.
83
+ - For additive changes: `append_file(path, "\n...")`.
84
+
85
+ ### 6) Deletion Policy
86
+ - Use `delete(path)` to **soft-delete**. Do not operate on files that already include `_DELETE_` in their name.
87
+
88
+ ---
89
+
90
+ ## Search Semantics (important)
91
+
92
+ - When Chroma ingestion is **enabled**, `search()` uses semantic ranking first and returns the **best slice per file** (the ingestor extracts one representative match per document chunk/file). If no obvious line match is found, you may get a **top-of-file preview** — then call `read_file()` to confirm.
93
+ - When Chroma is **not** enabled, `search()` scans files literally and returns all matches up to `limit`.
94
+ - Always **validate** by fetching the file segment with `read_file()` before making edits.
95
+
96
+ ---
97
+
98
+ ## Parameter Contracts and Gotchas
99
+
100
+ - `path` must be **relative** (e.g., `notes/today.md`). Absolute paths are rejected.
101
+ - Do **not** write into `.data/` (protected). Reads are allowed there.
102
+ - Line numbers in `read_file` are **1‑based** and the interval is **inclusive**.
103
+ - `regex_replace` uses Python’s `re.MULTILINE`. Validate your pattern; avoid overly broad substitutions.
104
+ - `append_file` will create a file if missing (useful for logs/progress notes).
105
+
106
+ ---
107
+
108
+ ## Typical Recipes
109
+
110
+ **Find → Read → Edit**
111
+ 1. `search("beta feature toggle", limit=10)`
112
+ 2. Pick a result: `read_file("features/toggles.md", 40, 80)`
113
+ 3. Adjust: `regex_replace("features/toggles.md", "^Status:.*$", "Status: Enabled")`
114
+ 4. Verify: `read_file("features/toggles.md")` (check the `Status:` header)
115
+
116
+ **Add a new doc**
117
+ 1. `create_file("ops/runbooks/cache-invalidation.md", "# Cache Invalidation\n\n…")`
118
+ 2. Optionally link it from an index: `append_file("ops/README.md", "\n- [Cache Invalidation](runbooks/cache-invalidation.md)")`
119
+
120
+ **Soft delete an obsolete note**
121
+ 1. `delete("notes/old-incident.md")`
122
+
123
+ ---
124
+
125
+ ## Error Recovery
126
+
127
+ - **"Absolute paths are not permitted"** → Use a **relative** path.
128
+ - **"Writes are not allowed inside the protected folder '.data'"** → Choose a different folder (e.g., `docs/`).
129
+ - **"File 'X' does not exist"** on delete → Confirm with `overview()` or `search()`. Only existing non‑deleted files can be soft‑deleted.
130
+ - **No search hits** → Widen keywords, increase `limit`, or pivot to `overview()` to eyeball likely locations.
131
+
132
+ ---
133
+
134
+ ## Things You Should Not Do
135
+
136
+ - Do not fabricate file contents or paths. Always confirm with `overview()`, `search()`, and `read_file()`.
137
+ - Do not operate on files that include `_DELETE_` in their name.
138
+ - Do not attempt to talk directly to Chroma; you only use `search()`. Indexing is handled automatically after writes.
139
+ - Do not write binary or non‑UTF‑8 content.
140
+
141
+ ---
142
+
143
+ ## Performance Hints
144
+
145
+ - Prefer `search()` + targeted `read_file()` slices over reading entire large files.
146
+ - Keep `limit` modest (5–10) unless you must broaden the search.
147
+ - Batch edits in one file using a single `regex_replace` when safe (then verify).
148
+
149
+ ---
150
+
151
+ You now have the minimal contract to operate this KB safely and efficiently.
@@ -1,4 +1,5 @@
1
1
  """Integration layer that mirrors knowledge base updates into ChromaDB."""
2
+
2
3
  from __future__ import annotations
3
4
 
4
5
  import importlib
@@ -25,6 +26,7 @@ if TYPE_CHECKING: # pragma: no cover - type checking only imports
25
26
  SUPPORTED_CLIENTS: Tuple[str, ...] = ("off", "ephemeral", "persistent", "http", "cloud")
26
27
  """Recognised client types exposed to operators enabling Chroma ingestion."""
27
28
 
29
+
28
30
  @dataclass(frozen=True)
29
31
  class ChromaConfiguration:
30
32
  """Runtime configuration controlling how Chroma ingestion behaves.
@@ -106,7 +108,7 @@ class ChromaConfiguration:
106
108
  if data_directory:
107
109
  resolved_directory = Path(data_directory).expanduser().resolve()
108
110
  elif normalized_type == "persistent":
109
- resolved_directory = (root/DATA_FOLDER_NAME / "chroma").resolve()
111
+ resolved_directory = (root / DATA_FOLDER_NAME / "chroma").resolve()
110
112
  else:
111
113
  resolved_directory = None
112
114
 
@@ -144,7 +146,9 @@ class ChromaConfiguration:
144
146
  raise ValueError("Persistent Chroma client requires a data directory")
145
147
 
146
148
  if self.client_type == "http" and not self.host:
147
- raise ValueError("HTTP Chroma client requires --chroma-host or MCP_KB_CHROMA_HOST")
149
+ raise ValueError(
150
+ "HTTP Chroma client requires --chroma-host or MCP_KB_CHROMA_HOST"
151
+ )
148
152
 
149
153
  if self.client_type == "cloud":
150
154
  missing = [
@@ -203,7 +207,9 @@ def _load_dependencies() -> _ChromaDependencies:
203
207
  if hasattr(embedding_module, attr):
204
208
  factories[alias] = getattr(embedding_module, attr)
205
209
  if not factories:
206
- raise RuntimeError("No embedding functions were found in chromadb.utils.embedding_functions")
210
+ raise RuntimeError(
211
+ "No embedding functions were found in chromadb.utils.embedding_functions"
212
+ )
207
213
 
208
214
  return _ChromaDependencies(
209
215
  chroma_module=chroma_module,
@@ -236,14 +242,14 @@ class ChromaIngestor(KnowledgeBaseListener, KnowledgeBaseReindexListener):
236
242
  self._client = self._create_client()
237
243
  self._collection = self._ensure_collection()
238
244
  self.textsplitter = TokenTextSplitter(
239
- chunk_size=200,
240
- chunk_overlap=20,
241
- add_start_index=True
245
+ chunk_size=200, chunk_overlap=20, add_start_index=True
242
246
  )
243
247
 
244
- def get_document_chunks(self, document_id: str, include: List[str] = ["metadatas", "documents"]) -> GetResult:
248
+ def get_document_chunks(
249
+ self, document_id: str, include: List[str] = ["metadatas", "documents"]
250
+ ) -> GetResult:
245
251
  """Get a document from the Chroma index."""
246
- return self._collection.get(where={"document_id": document_id},include=include)
252
+ return self._collection.get(where={"document_id": document_id}, include=include)
247
253
 
248
254
  def handle_upsert(self, event: FileUpsertEvent) -> None:
249
255
  """Upsert ``event`` into the configured Chroma collection.
@@ -263,7 +269,9 @@ class ChromaIngestor(KnowledgeBaseListener, KnowledgeBaseReindexListener):
263
269
 
264
270
  def delete_document(self, document_id: str) -> None:
265
271
  """Delete a document from the Chroma index."""
266
- self._collection.delete(ids=self.get_document_chunks(document_id,include=[])["ids"])
272
+ self._collection.delete(
273
+ ids=self.get_document_chunks(document_id, include=[])["ids"]
274
+ )
267
275
 
268
276
  def handle_delete(self, event: FileDeleteEvent) -> None:
269
277
  """Remove documents associated with ``event`` from the Chroma index.
@@ -365,7 +373,9 @@ class ChromaIngestor(KnowledgeBaseListener, KnowledgeBaseReindexListener):
365
373
  continue
366
374
 
367
375
  lines = text.splitlines()
368
- file_matches = self._extract_matches_from_lines(candidate, lines, query, context_lines)
376
+ file_matches = self._extract_matches_from_lines(
377
+ candidate, lines, query, context_lines
378
+ )
369
379
  if file_matches:
370
380
  matches.append(file_matches[0])
371
381
  elif lines:
@@ -410,23 +420,29 @@ class ChromaIngestor(KnowledgeBaseListener, KnowledgeBaseReindexListener):
410
420
  pass
411
421
 
412
422
  payload_metadata = dict(metadata)
413
- payload_metadata['document_id'] = document_id
423
+ payload_metadata["document_id"] = document_id
414
424
 
415
425
  # splitting
416
426
 
417
427
  split_docs = self.textsplitter.create_documents([content])
418
-
428
+
419
429
  for i, d in enumerate(split_docs):
420
430
  d.metadata.update(payload_metadata)
421
- d.metadata['chunk_number'] = i
422
- d.metadata['startline'] = len(content[:d.metadata['start_index']].splitlines())
423
- d.metadata['endline'] = d.metadata['startline'] + len(d.page_content.splitlines())-1
424
-
431
+ d.metadata["chunk_number"] = i
432
+ d.metadata["startline"] = len(
433
+ content[: d.metadata["start_index"]].splitlines()
434
+ )
435
+ d.metadata["endline"] = (
436
+ d.metadata["startline"] + len(d.page_content.splitlines()) - 1
437
+ )
425
438
 
426
439
  self._collection.add(
427
440
  documents=[d.page_content for d in split_docs],
428
441
  metadatas=[d.metadata for d in split_docs],
429
- ids=[f"{d.metadata['document_id']}-{d.metadata['chunk_number']}" for d in split_docs],
442
+ ids=[
443
+ f"{d.metadata['document_id']}-{d.metadata['chunk_number']}"
444
+ for d in split_docs
445
+ ],
430
446
  )
431
447
 
432
448
  # Optional full reindex -----------------------------------------------------
@@ -454,7 +470,11 @@ class ChromaIngestor(KnowledgeBaseListener, KnowledgeBaseReindexListener):
454
470
 
455
471
  count = 0
456
472
  root = kb.rules.root
457
- with tqdm(kb.iter_active_files(include_docs=False), desc="Reindexing Chroma",total=kb.total_active_files(include_docs=False)) as pbar:
473
+ with tqdm(
474
+ kb.iter_active_files(include_docs=False),
475
+ desc="Reindexing Chroma",
476
+ total=kb.total_active_files(include_docs=False),
477
+ ) as pbar:
458
478
  for path in pbar:
459
479
  pbar.set_description(f"Reindexing Chroma {path.name}")
460
480
  try:
@@ -527,7 +547,9 @@ class ChromaIngestor(KnowledgeBaseListener, KnowledgeBaseReindexListener):
527
547
  config = self.configuration
528
548
 
529
549
  if not config.enabled:
530
- raise RuntimeError("ChromaIngestor cannot be constructed when ingestion is disabled")
550
+ raise RuntimeError(
551
+ "ChromaIngestor cannot be constructed when ingestion is disabled"
552
+ )
531
553
 
532
554
  if config.client_type == "ephemeral":
533
555
  return chroma.EphemeralClient()
@@ -1,4 +1,5 @@
1
1
  """Bootstrap helpers executed during server startup."""
2
+
2
3
  from __future__ import annotations
3
4
 
4
5
  import importlib.resources as resources
@@ -33,7 +34,11 @@ def install_default_documentation(root: Path) -> Path:
33
34
 
34
35
  docs_dir.mkdir(parents=True, exist_ok=True)
35
36
 
36
- with resources.files("mcp_kb.data").joinpath("KNOWLEDBASE_DOC.md").open("r", encoding="utf-8") as source:
37
+ with (
38
+ resources.files("mcp_kb.data")
39
+ .joinpath("KNOWLEDBASE_DOC.md")
40
+ .open("r", encoding="utf-8") as source
41
+ ):
37
42
  doc_path.write_text(source.read(), encoding="utf-8")
38
43
 
39
44
  return doc_path
@@ -7,6 +7,7 @@ coupling the core filesystem logic to specific backends. Each event captures bot
7
7
  absolute and knowledge-base-relative paths so that listeners can decide which
8
8
  identifier best fits their storage requirements.
9
9
  """
10
+
10
11
  from __future__ import annotations
11
12
 
12
13
  from dataclasses import dataclass
@@ -5,6 +5,7 @@ can evolve independently. Search often benefits from dedicated caching or
5
5
  indexing strategies; keeping it in its own module means the server can swap the
6
6
  implementation later without changing the core file lifecycle API.
7
7
  """
8
+
8
9
  from __future__ import annotations
9
10
 
10
11
  from dataclasses import dataclass
@@ -147,7 +148,9 @@ def read_documentation(kb: KnowledgeBase) -> str:
147
148
  return doc_path.read_text(encoding="utf-8")
148
149
 
149
150
 
150
- def _extract_matches_for_path(path: Path, query: str, context_lines: int) -> List[SearchMatch]:
151
+ def _extract_matches_for_path(
152
+ path: Path, query: str, context_lines: int
153
+ ) -> List[SearchMatch]:
151
154
  """Read ``path`` and return every match that contains ``query``."""
152
155
 
153
156
  lines = path.read_text(encoding="utf-8").splitlines()
@@ -7,6 +7,7 @@ security constraints defined in the PRD. Each method returns plain Python data
7
7
  structures so that higher-level layers (e.g., JSON-RPC handlers) can focus on
8
8
  protocol serialization rather than filesystem minutiae.
9
9
  """
10
+
10
11
  from __future__ import annotations
11
12
 
12
13
  import re
@@ -15,7 +16,11 @@ from pathlib import Path
15
16
  from typing import Iterable, Optional
16
17
 
17
18
  from mcp_kb.config import DELETE_SENTINEL, DATA_FOLDER_NAME
18
- from mcp_kb.knowledge.events import FileDeleteEvent, FileUpsertEvent, KnowledgeBaseListener
19
+ from mcp_kb.knowledge.events import (
20
+ FileDeleteEvent,
21
+ FileUpsertEvent,
22
+ KnowledgeBaseListener,
23
+ )
19
24
  from mcp_kb.security.path_validation import (
20
25
  PathRules,
21
26
  ensure_write_allowed,
@@ -185,10 +190,9 @@ class KnowledgeBase:
185
190
  original_relative = self._relative_path(normalized)
186
191
  self._notify_delete(target, original_relative)
187
192
  return target
188
-
193
+
189
194
  def total_active_files(self, include_docs: bool = False) -> int:
190
- """Return the total number of non-deleted UTF-8 text files under the root directory.
191
- """
195
+ """Return the total number of non-deleted UTF-8 text files under the root directory."""
192
196
  return sum(1 for _ in self.iter_active_files(include_docs=include_docs))
193
197
 
194
198
  def iter_active_files(self, include_docs: bool = False) -> Iterable[Path]:
@@ -250,7 +254,9 @@ class KnowledgeBase:
250
254
  event = FileDeleteEvent(absolute_path=absolute, relative_path=relative)
251
255
  self._dispatch("handle_delete", event)
252
256
 
253
- def _dispatch(self, method_name: str, event: FileUpsertEvent | FileDeleteEvent) -> None:
257
+ def _dispatch(
258
+ self, method_name: str, event: FileUpsertEvent | FileDeleteEvent
259
+ ) -> None:
254
260
  """Call ``method_name`` on every listener and wrap failures for clarity."""
255
261
 
256
262
  for listener in self.listeners:
@@ -7,6 +7,7 @@ that target the reserved documentation folder. The helper functions are written
7
7
  so they can be reused both by the server runtime and by unit tests to keep the
8
8
  security rules consistent.
9
9
  """
10
+
10
11
  from __future__ import annotations
11
12
 
12
13
  from dataclasses import dataclass
@@ -69,13 +70,17 @@ def normalize_path(candidate: str, rules: PathRules) -> Path:
69
70
 
70
71
  path_obj = Path(candidate)
71
72
  if path_obj.is_absolute():
72
- raise PathValidationError("Absolute paths are not permitted inside the knowledge base")
73
+ raise PathValidationError(
74
+ "Absolute paths are not permitted inside the knowledge base"
75
+ )
73
76
 
74
77
  normalized = (rules.root / path_obj).resolve()
75
78
  try:
76
79
  normalized.relative_to(rules.root)
77
80
  except ValueError as exc:
78
- raise PathValidationError("Path resolves outside the knowledge base root") from exc
81
+ raise PathValidationError(
82
+ "Path resolves outside the knowledge base root"
83
+ ) from exc
79
84
 
80
85
  if DELETE_SENTINEL in normalized.name:
81
86
  raise PathValidationError("Operations on soft-deleted files are not permitted")
@@ -5,6 +5,7 @@ operations defined elsewhere in the package. Using FastMCP drastically reduces
5
5
  protocol boilerplate because the framework introspects type hints and
6
6
  Docstrings to generate MCP-compatible tool schemas automatically.
7
7
  """
8
+
8
9
  from __future__ import annotations
9
10
 
10
11
  from dataclasses import dataclass
@@ -90,8 +91,8 @@ def create_fastmcp_app(
90
91
  "mcp-knowledge-base",
91
92
  instructions=(
92
93
  "You are connected to a local text-based knowledge base. Use the provided "
93
- "tools to create, inspect, and organize content while respecting the "
94
- "soft deletion semantics and the protected documentation folder."
94
+ "tools to create, inspect, and organize content and search the knowledgebase for information.\n"
95
+ "Call the documentation tool first to get the latest documentation."
95
96
  ),
96
97
  **fastmcp_kwargs,
97
98
  )
@@ -107,11 +108,15 @@ def create_fastmcp_app(
107
108
  return f"Created {created}"
108
109
 
109
110
  @mcp.tool(name="read_file", title="Read File", structured_output=True)
110
- def read_file(path: str, start_line: int | None = None, end_line: int | None = None) -> ReadFileResult:
111
+ def read_file(
112
+ path: str, start_line: int | None = None, end_line: int | None = None
113
+ ) -> ReadFileResult:
111
114
  """Read a text file returning metadata about the extracted segment."""
112
115
 
113
116
  try:
114
- segment: FileSegment = kb.read_file(path, start_line=start_line, end_line=end_line)
117
+ segment: FileSegment = kb.read_file(
118
+ path, start_line=start_line, end_line=end_line
119
+ )
115
120
  except PathValidationError as exc:
116
121
  raise ValueError(str(exc)) from exc
117
122
  except FileNotFoundError as exc:
@@ -176,7 +181,11 @@ def create_fastmcp_app(
176
181
  )
177
182
  return [
178
183
  SearchMatchResult(
179
- path=str(match.path),
184
+ path=str(
185
+ match.path.relative_to(kb.rules.root)
186
+ if match.path.is_absolute()
187
+ else match.path
188
+ ),
180
189
  line=match.line_number,
181
190
  context=match.context,
182
191
  )
@@ -6,6 +6,7 @@ such as validating incoming requests and shaping responses. Each helper function
6
6
  is intentionally small so that callers can compose them for different workflows
7
7
  without duplicating the low-level boilerplate.
8
8
  """
9
+
9
10
  from __future__ import annotations
10
11
 
11
12
  from contextlib import contextmanager
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mcp-kb
3
- Version: 0.2.0
3
+ Version: 0.2.1
4
4
  Summary: MCP server exposing a local markdown knowledge base
5
5
  Author: LLM Maintainer
6
6
  Requires-Python: >=3.11
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "mcp-kb"
3
- version = "0.2.0"
3
+ version = "0.2.1"
4
4
  description = "MCP server exposing a local markdown knowledge base"
5
5
  readme = "README.md"
6
6
  authors = [{ name = "LLM Maintainer" }]
@@ -1,36 +0,0 @@
1
- # Knowledge Base Usage Guide
2
-
3
- Welcome to the MCP-managed knowledge base. This document is automatically
4
- installed the first time the server starts to ensure every deployment ships with
5
- baseline documentation. Customize it to describe project-specific conventions or
6
- operational practices.
7
-
8
- ## Structure
9
-
10
- - All knowledge content lives beneath the `.knowledgebase/` root.
11
- - Documentation and other non knowledge resides under `.data/` and is read-only from the MCP tools.
12
- - Soft-deleted files are suffixed with `_DELETE_` and ignored by search/overview.
13
-
14
- ## Recommended Practices
15
-
16
- 1. Organize content into topic-based folders (e.g., `architecture/`, `ops/`).
17
- 2. Keep document titles within the first heading so search results show context.
18
- 3. Use relative markdown links to connect related documents inside the knowledge
19
- base.
20
- 4. Periodically review `_DELETE_` files and clean up as necessary via direct
21
- filesystem operations.
22
-
23
- ## Default Tools
24
-
25
- | Tool | Purpose |
26
- | --------------- | ----------------------------------------- |
27
- | `create_file` | Create or overwrite markdown documents |
28
- | `read_file` | Read entire files or specific line ranges |
29
- | `append_file` | Append additional content to a file |
30
- | `regex_replace` | Run regex-based replacements |
31
- | `search` | Search text across active documents |
32
- | `overview` | Display a tree overview of the knowledge |
33
- | `documentation` | Read this documentation file |
34
- | `delete` | Soft-delete files safely |
35
-
36
- Update this document to reflect your team's workflows after deployment.
File without changes
File without changes
File without changes
@@ -4,6 +4,7 @@ This module centralizes the definition of common command-line options and
4
4
  helpers so that multiple entry points (e.g., server and reindex commands) can
5
5
  remain small and focused while sharing consistent behavior.
6
6
  """
7
+
7
8
  from __future__ import annotations
8
9
 
9
10
  import os
@@ -150,4 +151,3 @@ def build_chroma_listener(options: Namespace, root: Path) -> Optional[ChromaInge
150
151
  if not configuration.enabled:
151
152
  return None
152
153
  return ChromaIngestor(configuration)
153
-
File without changes
File without changes
File without changes
File without changes
File without changes