PyPI - mcp-kb - Versions diffs - 0.1.0__tar.gz → 0.2.1__tar.gz - Mend

mcp-kb 0.1.0tar.gz → 0.2.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

{mcp_kb-0.1.0 → mcp_kb-0.2.1}/PKG-INFO RENAMED Viewed

@@ -1,14 +1,14 @@
 Metadata-Version: 2.4
 Name: mcp-kb
-Version: 0.1.0
+Version: 0.2.1
 Summary: MCP server exposing a local markdown knowledge base
 Author: LLM Maintainer
 Requires-Python: >=3.11
 Description-Content-Type: text/markdown
-Requires-Dist: chromadb>=1.1.0
 Requires-Dist: httpx>=0.28.1
 Requires-Dist: mcp[cli]>=1.15.0
 Provides-Extra: vector
+Requires-Dist: chromadb>=1.1.0; extra == "vector"
 Requires-Dist: tiktoken>=0.11.0; extra == "vector"
 Requires-Dist: langchain-text-splitters>=0.3.11; extra == "vector"
@@ -36,7 +36,7 @@ uv run mcp-kb-server --transport http --host 0.0.0.0 --port 9000
 ```
 On first launch the server copies a bundled `KNOWLEDBASE_DOC.md` into the
-`.docs/` directory if it is missing so that every deployment starts with a
+`.data/` directory if it is missing so that every deployment starts with a
 baseline usage guide.
 ## Optional ChromaDB Mirroring

{mcp_kb-0.1.0 → mcp_kb-0.2.1}/README.md RENAMED Viewed

@@ -22,7 +22,7 @@ uv run mcp-kb-server --transport http --host 0.0.0.0 --port 9000
 ```
 On first launch the server copies a bundled `KNOWLEDBASE_DOC.md` into the
-`.docs/` directory if it is missing so that every deployment starts with a
+`.data/` directory if it is missing so that every deployment starts with a
 baseline usage guide.
 ## Optional ChromaDB Mirroring

{mcp_kb-0.1.0 → mcp_kb-0.2.1}/mcp_kb/cli/main.py RENAMED Viewed

@@ -1,4 +1,5 @@
 """Command line interface for running the MCP knowledge base server."""
 from __future__ import annotations
 import argparse
@@ -8,7 +9,7 @@ import os
 from pathlib import Path
 from typing import Iterable, List, Optional
-from mcp_kb.config import DOCS_FOLDER_NAME, resolve_knowledge_base_root
+from mcp_kb.config import DATA_FOLDER_NAME, resolve_knowledge_base_root
 from mcp_kb.cli.args import add_chroma_arguments, build_chroma_listener, parse_bool
 from mcp_kb.ingest.chroma import ChromaIngestor
 from mcp_kb.knowledge.bootstrap import install_default_documentation
@@ -79,7 +80,7 @@ def run_server(arguments: Iterable[str] | None = None) -> None:
     parser = _build_argument_parser()
     options = parser.parse_args(arguments)
     root_path = resolve_knowledge_base_root(options.root)
-    rules = PathRules(root=root_path, protected_folders=(DOCS_FOLDER_NAME,))
+    rules = PathRules(root=root_path, protected_folders=(DATA_FOLDER_NAME,))
     install_default_documentation(root_path)
     listeners: List[ChromaIngestor] = []
     try:
@@ -100,9 +101,15 @@ def run_server(arguments: Iterable[str] | None = None) -> None:
         listeners=listeners,
     )
     transports = options.transports or ["stdio"]
-    logger.info(f"Running server on {options.host}:{options.port} with transports {transports}")
+    logger.info(
+        f"Running server on {options.host}:{options.port} with transports {transports}"
+    )
     logger.info(f"Data root is {root_path}")
-    print("--------------------------------",root_path,"--------------------------------")
+    print(
+        "--------------------------------",
+        root_path,
+        "--------------------------------",
+    )
     asyncio.run(_run_transports(server, transports))

{mcp_kb-0.1.0 → mcp_kb-0.2.1}/mcp_kb/cli/reindex.py RENAMED Viewed

@@ -5,6 +5,7 @@ ingestors and calls their ``reindex`` method when available, allowing operators
 to trigger a full rebuild of external indexes (e.g., Chroma) from the current
 filesystem state.
 """
 from __future__ import annotations
 import argparse
@@ -12,7 +13,7 @@ import logging
 from typing import Iterable, List
 from mcp_kb.cli.args import add_chroma_arguments, build_chroma_listener
-from mcp_kb.config import DOCS_FOLDER_NAME, resolve_knowledge_base_root
+from mcp_kb.config import DATA_FOLDER_NAME, resolve_knowledge_base_root
 from mcp_kb.knowledge.events import KnowledgeBaseReindexListener
 from mcp_kb.knowledge.store import KnowledgeBase
 from mcp_kb.security.path_validation import PathRules
@@ -24,7 +25,9 @@ logger = logging.getLogger(__name__)
 def _build_argument_parser() -> argparse.ArgumentParser:
     """Return the argument parser for the reindex command."""
-    parser = argparse.ArgumentParser(description="Reindex the knowledge base into configured backends")
+    parser = argparse.ArgumentParser(
+        description="Reindex the knowledge base into configured backends"
+    )
     parser.add_argument(
         "--root",
         dest="root",
@@ -58,7 +61,7 @@ def run_reindex(arguments: Iterable[str] | None = None) -> int:
     parser = _build_argument_parser()
     options = parser.parse_args(arguments)
     root_path = resolve_knowledge_base_root(options.root)
-    rules = PathRules(root=root_path, protected_folders=(DOCS_FOLDER_NAME,))
+    rules = PathRules(root=root_path, protected_folders=(DATA_FOLDER_NAME,))
     kb = KnowledgeBase(rules)
     listeners: List[KnowledgeBaseReindexListener] = []
@@ -71,6 +74,7 @@ def run_reindex(arguments: Iterable[str] | None = None) -> int:
     total = 0
     for listener in listeners:
+        logger.info("Reindexing via %s", listener.__class__.__name__)
         count = listener.reindex(kb)
         logger.info("Reindexed %d documents via %s", count, listener.__class__.__name__)
         total += count
@@ -87,4 +91,3 @@ def main() -> None:
 if __name__ == "__main__":
     main()

{mcp_kb-0.1.0 → mcp_kb-0.2.1}/mcp_kb/config.py RENAMED Viewed

@@ -7,6 +7,7 @@ logic more reusable across different deployment environments because callers can
 swap configurations programmatically or via environment variables without
 modifying the core modules.
 """
 from __future__ import annotations
 from pathlib import Path
@@ -16,7 +17,7 @@ import os
 DEFAULT_KNOWLEDGE_BASE_DIR = ".knowledgebase"
 """str: Default relative directory for persisting knowledge base documents."""
-DOCS_FOLDER_NAME = ".docs"
+DATA_FOLDER_NAME = ".data"
 """str: Name of the documentation folder inside the knowledge base tree."""
 DOC_FILENAME = "KNOWLEDBASE_DOC.md"
@@ -58,12 +59,13 @@ def resolve_knowledge_base_root(provided_path: str | None = None) -> Path:
         root directory.
     """
-    candidate = provided_path or os.getenv(ENV_ROOT_KEY) or Path(
-        os.getenv('WORKSPACE_FOLDER_PATHS') or Path.cwd()
-    )/DEFAULT_KNOWLEDGE_BASE_DIR
+    candidate = (
+        provided_path
+        or os.getenv(ENV_ROOT_KEY)
+        or Path(os.getenv("WORKSPACE_FOLDER_PATHS") or Path.cwd())
+        / DEFAULT_KNOWLEDGE_BASE_DIR
+    )
     root_path = Path(candidate).expanduser().resolve()
     root_path.mkdir(parents=True, exist_ok=True)
     return root_path

mcp_kb-0.2.1/mcp_kb/data/KNOWLEDBASE_DOC.md ADDED Viewed

@@ -0,0 +1,151 @@
+# LLM Operating Manual — MCP Knowledge Base (`mcp-kb`)
+You are connected to a **local, text-only knowledge base**. Your job is to **search, read, create, update, and soft-delete** UTF‑8 text files under a single root directory while respecting safety rules below. Use the provided MCP tools exactly as specified.
+---
+## Ground Rules (enforced by the server)
+- **Paths are relative only.** Absolute paths are rejected. No `..` traversal.
+- **Protected folder:** `.data/` is read‑only. Do not write there.
+- **Soft delete sentinel:** Files marked with `_DELETE_` in the name are considered deleted. Do not read/write them.
+- **Text files only.** Binary-ish files are ignored by scans. Treat this KB as UTF‑8 text storage.
+- **Concurrency:** Writes are serialized per file; still prefer read‑verify‑write sequences.
+Constants (baked into the server):
+- Protected folder: `.data`
+- Documentation file name: `KNOWLEDBASE_DOC.md`
+- Delete sentinel: `_DELETE_`
+---
+## Tools You Can Call
+All tool names and parameter contracts are stable. Stick to these shapes.
+### `create_file(path: str, content: str) -> str`
+- Create or **overwrite** a text file at `path` with `content`.
+- `path` must be **relative** and **outside** `.data/`.
+### `read_file(path: str, start_line?: int, end_line?: int) -> { path, start_line, end_line, content }`
+- Read full file or a 1‑based inclusive slice.
+- If both bounds omitted ⇒ full file. If one bound omitted ⇒ server fills it.
+### `append_file(path: str, content: str) -> str`
+- Append text. If file is missing, it will be **created**.
+### `regex_replace(path: str, pattern: str, replacement: str) -> { replacements: int }`
+- Multiline regex (`re.MULTILINE`). Returns count. Always `read_file` afterwards to verify.
+### `delete(path: str) -> str`
+- **Soft delete**: renames `name.ext` to `name_DELETE_.ext`. Use when content is obsolete.
+### `search(query: str, limit: int = 5) -> [{ path, line, context: string[] }]`
+- Returns up to `limit` matches with short context.
+- If Chroma mirroring is active, results are **semantic** first; otherwise plain scan.
+- `limit` must be **> 0**.
+### `overview() -> str`
+- A deterministic `tree`-like view of active files under root (skips deleted and binaries).
+### `documentation() -> str`
+- Human usage guide (not this manual). For you, prefer this manual.
+---
+## How to Work Effectively
+### 1) Discover
+- Call `overview()` to understand the tree.
+- If you need conventions or human guidelines, read `documentation()` (optional).
+### 2) Locate Content
+- Prefer `search("keywords", limit=5)` to find candidate files/snippets.
+  - Examine each `{path, line, context}`. The `context` is a short window around the hit.
+  - If results look thin, **increase `limit`** (e.g., 10–20) before broadening the query.
+### 3) Read Precisely
+- Use `read_file(path)` for the full file when structure matters.
+- If the file is large but you know the region, use `read_file(path, start_line, end_line)` to minimize tokens.
+### 4) Create New Knowledge
+- Pick a **descriptive relative path** (folders based on topic, kebab‑case names).
+  - Example: `architecture/decision-records/adr-2025-10-06-edge-cache.md`
+- Call `create_file(path, content)`.
+- Keep the **title as the first Markdown heading** so search has context.
+- Link related files with **relative Markdown links**.
+### 5) Update Safely
+- For small edits:
+  1) `read_file(...)` to confirm current state.
+  2) `regex_replace(path, pattern, replacement)` for targeted changes.
+  3) `read_file(...)` again to verify.
+- For additive changes: `append_file(path, "\n...")`.
+### 6) Deletion Policy
+- Use `delete(path)` to **soft-delete**. Do not operate on files that already include `_DELETE_` in their name.
+---
+## Search Semantics (important)
+- When Chroma ingestion is **enabled**, `search()` uses semantic ranking first and returns the **best slice per file** (the ingestor extracts one representative match per document chunk/file). If no obvious line match is found, you may get a **top-of-file preview** — then call `read_file()` to confirm.
+- When Chroma is **not** enabled, `search()` scans files literally and returns all matches up to `limit`.
+- Always **validate** by fetching the file segment with `read_file()` before making edits.
+---
+## Parameter Contracts and Gotchas
+- `path` must be **relative** (e.g., `notes/today.md`). Absolute paths are rejected.
+- Do **not** write into `.data/` (protected). Reads are allowed there.
+- Line numbers in `read_file` are **1‑based** and the interval is **inclusive**.
+- `regex_replace` uses Python’s `re.MULTILINE`. Validate your pattern; avoid overly broad substitutions.
+- `append_file` will create a file if missing (useful for logs/progress notes).
+---
+## Typical Recipes
+**Find → Read → Edit**
+1. `search("beta feature toggle", limit=10)`
+2. Pick a result: `read_file("features/toggles.md", 40, 80)`
+3. Adjust: `regex_replace("features/toggles.md", "^Status:.*$", "Status: Enabled")`
+4. Verify: `read_file("features/toggles.md")` (check the `Status:` header)
+**Add a new doc**
+1. `create_file("ops/runbooks/cache-invalidation.md", "# Cache Invalidation\n\n…")`
+2. Optionally link it from an index: `append_file("ops/README.md", "\n- [Cache Invalidation](runbooks/cache-invalidation.md)")`
+**Soft delete an obsolete note**
+1. `delete("notes/old-incident.md")`
+---
+## Error Recovery
+- **"Absolute paths are not permitted"** → Use a **relative** path.
+- **"Writes are not allowed inside the protected folder '.data'"** → Choose a different folder (e.g., `docs/`).
+- **"File 'X' does not exist"** on delete → Confirm with `overview()` or `search()`. Only existing non‑deleted files can be soft‑deleted.
+- **No search hits** → Widen keywords, increase `limit`, or pivot to `overview()` to eyeball likely locations.
+---
+## Things You Should Not Do
+- Do not fabricate file contents or paths. Always confirm with `overview()`, `search()`, and `read_file()`.
+- Do not operate on files that include `_DELETE_` in their name.
+- Do not attempt to talk directly to Chroma; you only use `search()`. Indexing is handled automatically after writes.
+- Do not write binary or non‑UTF‑8 content.
+---
+## Performance Hints
+- Prefer `search()` + targeted `read_file()` slices over reading entire large files.
+- Keep `limit` modest (5–10) unless you must broaden the search.
+- Batch edits in one file using a single `regex_replace` when safe (then verify).
+---
+You now have the minimal contract to operate this KB safely and efficiently.

{mcp_kb-0.1.0 → mcp_kb-0.2.1}/mcp_kb/ingest/chroma.py RENAMED Viewed

@@ -1,4 +1,5 @@
 """Integration layer that mirrors knowledge base updates into ChromaDB."""
 from __future__ import annotations
 import importlib
@@ -6,7 +7,9 @@ from dataclasses import dataclass
 from pathlib import Path
 from typing import Any, Dict, List, Mapping, Optional, Set, Tuple, Type, TYPE_CHECKING
 from langchain_text_splitters import TokenTextSplitter
+from tqdm import tqdm
+from mcp_kb.config import DATA_FOLDER_NAME
 from mcp_kb.knowledge.events import (
     FileDeleteEvent,
     FileUpsertEvent,
@@ -23,6 +26,7 @@ if TYPE_CHECKING:  # pragma: no cover - type checking only imports
 SUPPORTED_CLIENTS: Tuple[str, ...] = ("off", "ephemeral", "persistent", "http", "cloud")
 """Recognised client types exposed to operators enabling Chroma ingestion."""
 @dataclass(frozen=True)
 class ChromaConfiguration:
     """Runtime configuration controlling how Chroma ingestion behaves.
@@ -104,7 +108,7 @@ class ChromaConfiguration:
         if data_directory:
             resolved_directory = Path(data_directory).expanduser().resolve()
         elif normalized_type == "persistent":
-            resolved_directory = (root / "chroma").resolve()
+            resolved_directory = (root / DATA_FOLDER_NAME / "chroma").resolve()
         else:
             resolved_directory = None
@@ -142,7 +146,9 @@ class ChromaConfiguration:
             raise ValueError("Persistent Chroma client requires a data directory")
         if self.client_type == "http" and not self.host:
-            raise ValueError("HTTP Chroma client requires --chroma-host or MCP_KB_CHROMA_HOST")
+            raise ValueError(
+                "HTTP Chroma client requires --chroma-host or MCP_KB_CHROMA_HOST"
+            )
         if self.client_type == "cloud":
             missing = [
@@ -201,7 +207,9 @@ def _load_dependencies() -> _ChromaDependencies:
         if hasattr(embedding_module, attr):
             factories[alias] = getattr(embedding_module, attr)
     if not factories:
-        raise RuntimeError("No embedding functions were found in chromadb.utils.embedding_functions")
+        raise RuntimeError(
+            "No embedding functions were found in chromadb.utils.embedding_functions"
+        )
     return _ChromaDependencies(
         chroma_module=chroma_module,
@@ -234,14 +242,14 @@ class ChromaIngestor(KnowledgeBaseListener, KnowledgeBaseReindexListener):
         self._client = self._create_client()
         self._collection = self._ensure_collection()
         self.textsplitter = TokenTextSplitter(
-            chunk_size=200,
-            chunk_overlap=20,
-            add_start_index=True
+            chunk_size=200, chunk_overlap=20, add_start_index=True
         )
-    def get_document_chunks(self, document_id: str, include: List[str] = ["metadatas", "documents"]) -> GetResult:
+    def get_document_chunks(
+        self, document_id: str, include: List[str] = ["metadatas", "documents"]
+    ) -> GetResult:
         """Get a document from the Chroma index."""
-        return self._collection.get(where={"document_id": document_id},include=include)
+        return self._collection.get(where={"document_id": document_id}, include=include)
     def handle_upsert(self, event: FileUpsertEvent) -> None:
         """Upsert ``event`` into the configured Chroma collection.
@@ -261,7 +269,9 @@ class ChromaIngestor(KnowledgeBaseListener, KnowledgeBaseReindexListener):
     def delete_document(self, document_id: str) -> None:
         """Delete a document from the Chroma index."""
-        self._collection.delete(ids=self.get_document_chunks(document_id,include=[])["ids"])
+        self._collection.delete(
+            ids=self.get_document_chunks(document_id, include=[])["ids"]
+        )
     def handle_delete(self, event: FileDeleteEvent) -> None:
         """Remove documents associated with ``event`` from the Chroma index.
@@ -363,7 +373,9 @@ class ChromaIngestor(KnowledgeBaseListener, KnowledgeBaseReindexListener):
                 continue
             lines = text.splitlines()
-            file_matches = self._extract_matches_from_lines(candidate, lines, query, context_lines)
+            file_matches = self._extract_matches_from_lines(
+                candidate, lines, query, context_lines
+            )
             if file_matches:
                 matches.append(file_matches[0])
             elif lines:
@@ -408,22 +420,29 @@ class ChromaIngestor(KnowledgeBaseListener, KnowledgeBaseReindexListener):
             pass
         payload_metadata = dict(metadata)
-        payload_metadata['document_id'] = document_id
+        payload_metadata["document_id"] = document_id
         # splitting
         split_docs = self.textsplitter.create_documents([content])
         for i, d in enumerate(split_docs):
             d.metadata.update(payload_metadata)
-            d.metadata['chunk_number'] = i
-            d.metadata['startline'] = len(content[:d.metadata['start_index']].splitlines())
-            d.metadata['endline'] =  d.metadata['startline']  + len(d.page_content.splitlines())-1
+            d.metadata["chunk_number"] = i
+            d.metadata["startline"] = len(
+                content[: d.metadata["start_index"]].splitlines()
+            )
+            d.metadata["endline"] = (
+                d.metadata["startline"] + len(d.page_content.splitlines()) - 1
+            )
         self._collection.add(
             documents=[d.page_content for d in split_docs],
             metadatas=[d.metadata for d in split_docs],
-            ids=[f"{d.metadata['document_id']}-{d.metadata['chunk_number']}" for d in split_docs],
+            ids=[
+                f"{d.metadata['document_id']}-{d.metadata['chunk_number']}"
+                for d in split_docs
+            ],
         )
     # Optional full reindex -----------------------------------------------------
@@ -451,19 +470,25 @@ class ChromaIngestor(KnowledgeBaseListener, KnowledgeBaseReindexListener):
         count = 0
         root = kb.rules.root
-        for path in kb.iter_active_files(include_docs=False):
-            try:
-                content = path.read_text(encoding="utf-8")
-            except FileNotFoundError:  # pragma: no cover - race with external edits
-                continue
-            relative = str(path.relative_to(root))
-            document_id = f"{self.configuration.id_prefix}{relative}"
-            metadata = {
-                "relative_path": relative,
-            }
-            self._reindex_document(document_id, content, metadata)
-            count += 1
+        with tqdm(
+            kb.iter_active_files(include_docs=False),
+            desc="Reindexing Chroma",
+            total=kb.total_active_files(include_docs=False),
+        ) as pbar:
+            for path in pbar:
+                pbar.set_description(f"Reindexing Chroma {path.name}")
+                try:
+                    content = path.read_text(encoding="utf-8")
+                except FileNotFoundError:  # pragma: no cover - race with external edits
+                    continue
+                relative = str(path.relative_to(root))
+                document_id = f"{self.configuration.id_prefix}{relative}"
+                metadata = {
+                    "relative_path": relative,
+                }
+                self._reindex_document(document_id, content, metadata)
+                count += 1
         return count
@@ -522,7 +547,9 @@ class ChromaIngestor(KnowledgeBaseListener, KnowledgeBaseReindexListener):
         config = self.configuration
         if not config.enabled:
-            raise RuntimeError("ChromaIngestor cannot be constructed when ingestion is disabled")
+            raise RuntimeError(
+                "ChromaIngestor cannot be constructed when ingestion is disabled"
+            )
         if config.client_type == "ephemeral":
             return chroma.EphemeralClient()

{mcp_kb-0.1.0 → mcp_kb-0.2.1}/mcp_kb/knowledge/bootstrap.py RENAMED Viewed

@@ -1,10 +1,11 @@
 """Bootstrap helpers executed during server startup."""
 from __future__ import annotations
 import importlib.resources as resources
 from pathlib import Path
-from mcp_kb.config import DOCS_FOLDER_NAME, DOC_FILENAME
+from mcp_kb.config import DATA_FOLDER_NAME, DOC_FILENAME
 def install_default_documentation(root: Path) -> Path:
@@ -26,14 +27,18 @@ def install_default_documentation(root: Path) -> Path:
         Path to the documentation file inside the knowledge base tree.
     """
-    docs_dir = root / DOCS_FOLDER_NAME
+    docs_dir = root / DATA_FOLDER_NAME
     doc_path = docs_dir / DOC_FILENAME
     if doc_path.exists():
         return doc_path
     docs_dir.mkdir(parents=True, exist_ok=True)
-    with resources.files("mcp_kb.data").joinpath("KNOWLEDBASE_DOC.md").open("r", encoding="utf-8") as source:
+    with (
+        resources.files("mcp_kb.data")
+        .joinpath("KNOWLEDBASE_DOC.md")
+        .open("r", encoding="utf-8") as source
+    ):
         doc_path.write_text(source.read(), encoding="utf-8")
     return doc_path

{mcp_kb-0.1.0 → mcp_kb-0.2.1}/mcp_kb/knowledge/events.py RENAMED Viewed

@@ -7,6 +7,7 @@ coupling the core filesystem logic to specific backends. Each event captures bot
 absolute and knowledge-base-relative paths so that listeners can decide which
 identifier best fits their storage requirements.
 """
 from __future__ import annotations
 from dataclasses import dataclass

{mcp_kb-0.1.0 → mcp_kb-0.2.1}/mcp_kb/knowledge/search.py RENAMED Viewed

@@ -5,13 +5,14 @@ can evolve independently. Search often benefits from dedicated caching or
 indexing strategies; keeping it in its own module means the server can swap the
 implementation later without changing the core file lifecycle API.
 """
 from __future__ import annotations
 from dataclasses import dataclass
 from pathlib import Path
 from typing import Dict, Iterable, List, Optional
-from mcp_kb.config import DOCS_FOLDER_NAME, DOC_FILENAME
+from mcp_kb.config import DATA_FOLDER_NAME, DOC_FILENAME
 from mcp_kb.knowledge.events import KnowledgeBaseSearchListener
 from mcp_kb.knowledge.store import KnowledgeBase
@@ -141,13 +142,15 @@ def read_documentation(kb: KnowledgeBase) -> str:
     folder.
     """
-    doc_path = kb.rules.root / DOCS_FOLDER_NAME / DOC_FILENAME
+    doc_path = kb.rules.root / DATA_FOLDER_NAME / DOC_FILENAME
     if not doc_path.exists():
         return ""
     return doc_path.read_text(encoding="utf-8")
-def _extract_matches_for_path(path: Path, query: str, context_lines: int) -> List[SearchMatch]:
+def _extract_matches_for_path(
+    path: Path, query: str, context_lines: int
+) -> List[SearchMatch]:
     """Read ``path`` and return every match that contains ``query``."""
     lines = path.read_text(encoding="utf-8").splitlines()

{mcp_kb-0.1.0 → mcp_kb-0.2.1}/mcp_kb/knowledge/store.py RENAMED Viewed

@@ -2,11 +2,12 @@
 This module exposes the ``KnowledgeBase`` class, which orchestrates validated
 filesystem operations for the MCP server. The class encapsulates logic for
-creating, reading, appending, and modifying markdown files while respecting the
+creating, reading, appending, and modifying text files while respecting the
 security constraints defined in the PRD. Each method returns plain Python data
 structures so that higher-level layers (e.g., JSON-RPC handlers) can focus on
 protocol serialization rather than filesystem minutiae.
 """
 from __future__ import annotations
 import re
@@ -14,8 +15,12 @@ from dataclasses import dataclass
 from pathlib import Path
 from typing import Iterable, Optional
-from mcp_kb.config import DELETE_SENTINEL, DOCS_FOLDER_NAME
-from mcp_kb.knowledge.events import FileDeleteEvent, FileUpsertEvent, KnowledgeBaseListener
+from mcp_kb.config import DELETE_SENTINEL, DATA_FOLDER_NAME
+from mcp_kb.knowledge.events import (
+    FileDeleteEvent,
+    FileUpsertEvent,
+    KnowledgeBaseListener,
+)
 from mcp_kb.security.path_validation import (
     PathRules,
     ensure_write_allowed,
@@ -79,7 +84,7 @@ class KnowledgeBase:
         self.listeners = tuple(listeners or ())
     def create_file(self, relative_path: str, content: str) -> Path:
-        """Create or overwrite a markdown file at ``relative_path``.
+        """Create or overwrite a text file at ``relative_path``.
         The method validates the path, ensures that the parent directory exists,
         and writes the provided content as UTF-8 text. Existing files are
@@ -186,8 +191,12 @@ class KnowledgeBase:
         self._notify_delete(target, original_relative)
         return target
+    def total_active_files(self, include_docs: bool = False) -> int:
+        """Return the total number of non-deleted UTF-8 text files under the root directory."""
+        return sum(1 for _ in self.iter_active_files(include_docs=include_docs))
     def iter_active_files(self, include_docs: bool = False) -> Iterable[Path]:
-        """Yield non-deleted markdown files under the root directory.
+        """Yield non-deleted UTF-8 text files under the root directory.
         Parameters
         ----------
@@ -197,13 +206,18 @@ class KnowledgeBase:
             the search and overview requirements from the PRD.
         """
-        for path in self.rules.root.rglob("*.md"):
+        from mcp_kb.utils.filesystem import is_text_file
+        for path in self.rules.root.rglob("*"):
+            if not path.is_file():
+                continue
             if DELETE_SENTINEL in path.name:
                 continue
             parts = path.relative_to(self.rules.root).parts
-            if parts and parts[0] == DOCS_FOLDER_NAME and not include_docs:
+            if parts and parts[0] == DATA_FOLDER_NAME and not include_docs:
                 continue
-            yield path
+            if is_text_file(path):
+                yield path
     def _relative_path(self, absolute: Path) -> str:
         """Return ``absolute`` rewritten relative to the knowledge base root."""
@@ -218,7 +232,7 @@ class KnowledgeBase:
         absolute:
             Fully resolved path that was modified on disk.
         content:
-            Markdown payload that should be provided to subscribers.
+            Text payload that should be provided to subscribers.
         """
         if not self.listeners:
@@ -240,7 +254,9 @@ class KnowledgeBase:
         event = FileDeleteEvent(absolute_path=absolute, relative_path=relative)
         self._dispatch("handle_delete", event)
-    def _dispatch(self, method_name: str, event: FileUpsertEvent | FileDeleteEvent) -> None:
+    def _dispatch(
+        self, method_name: str, event: FileUpsertEvent | FileDeleteEvent
+    ) -> None:
         """Call ``method_name`` on every listener and wrap failures for clarity."""
         for listener in self.listeners:

{mcp_kb-0.1.0 → mcp_kb-0.2.1}/mcp_kb/security/path_validation.py RENAMED Viewed

@@ -7,13 +7,14 @@ that target the reserved documentation folder. The helper functions are written
 so they can be reused both by the server runtime and by unit tests to keep the
 security rules consistent.
 """
 from __future__ import annotations
 from dataclasses import dataclass
 from pathlib import Path
 from typing import Iterable
-from mcp_kb.config import DOCS_FOLDER_NAME, DELETE_SENTINEL
+from mcp_kb.config import DATA_FOLDER_NAME, DELETE_SENTINEL
 class PathValidationError(ValueError):
@@ -69,13 +70,17 @@ def normalize_path(candidate: str, rules: PathRules) -> Path:
     path_obj = Path(candidate)
     if path_obj.is_absolute():
-        raise PathValidationError("Absolute paths are not permitted inside the knowledge base")
+        raise PathValidationError(
+            "Absolute paths are not permitted inside the knowledge base"
+        )
     normalized = (rules.root / path_obj).resolve()
     try:
         normalized.relative_to(rules.root)
     except ValueError as exc:
-        raise PathValidationError("Path resolves outside the knowledge base root") from exc
+        raise PathValidationError(
+            "Path resolves outside the knowledge base root"
+        ) from exc
     if DELETE_SENTINEL in normalized.name:
         raise PathValidationError("Operations on soft-deleted files are not permitted")

{mcp_kb-0.1.0 → mcp_kb-0.2.1}/mcp_kb/server/app.py RENAMED Viewed

@@ -5,6 +5,7 @@ operations defined elsewhere in the package. Using FastMCP drastically reduces
 protocol boilerplate because the framework introspects type hints and
 Docstrings to generate MCP-compatible tool schemas automatically.
 """
 from __future__ import annotations
 from dataclasses import dataclass
@@ -89,16 +90,16 @@ def create_fastmcp_app(
     mcp = FastMCP(
         "mcp-knowledge-base",
         instructions=(
-            "You are connected to a local markdown knowledge base. Use the provided "
-            "tools to create, inspect, and organize content while respecting the "
-            "soft deletion semantics and the protected documentation folder."
+            "You are connected to a local text-based knowledge base. Use the provided "
+            "tools to create, inspect, and organize content and search the knowledgebase for information.\n"
+            "Call the documentation tool first to get the latest documentation."
         ),
         **fastmcp_kwargs,
     )
     @mcp.tool(name="create_file", title="Create File")
     def create_file(path: str, content: str) -> str:
-        """Create or overwrite a markdown file at ``path`` with ``content``."""
+        """Create or overwrite a text file at ``path`` with ``content``."""
         try:
             created = kb.create_file(path, content)
@@ -107,11 +108,15 @@ def create_fastmcp_app(
         return f"Created {created}"
     @mcp.tool(name="read_file", title="Read File", structured_output=True)
-    def read_file(path: str, start_line: int | None = None, end_line: int | None = None) -> ReadFileResult:
-        """Read a markdown file returning metadata about the extracted segment."""
+    def read_file(
+        path: str, start_line: int | None = None, end_line: int | None = None
+    ) -> ReadFileResult:
+        """Read a text file returning metadata about the extracted segment."""
         try:
-            segment: FileSegment = kb.read_file(path, start_line=start_line, end_line=end_line)
+            segment: FileSegment = kb.read_file(
+                path, start_line=start_line, end_line=end_line
+            )
         except PathValidationError as exc:
             raise ValueError(str(exc)) from exc
         except FileNotFoundError as exc:
@@ -176,7 +181,11 @@ def create_fastmcp_app(
         )
         return [
             SearchMatchResult(
-                path=str(match.path),
+                path=str(
+                    match.path.relative_to(kb.rules.root)
+                    if match.path.is_absolute()
+                    else match.path
+                ),
                 line=match.line_number,
                 context=match.context,
             )

{mcp_kb-0.1.0 → mcp_kb-0.2.1}/mcp_kb/utils/filesystem.py RENAMED Viewed

@@ -6,6 +6,7 @@ such as validating incoming requests and shaping responses. Each helper function
 is intentionally small so that callers can compose them for different workflows
 without duplicating the low-level boilerplate.
 """
 from __future__ import annotations
 from contextlib import contextmanager
@@ -81,3 +82,47 @@ def rename(path: Path, target: Path) -> None:
     """Rename ``path`` to ``target`` using ``Path.rename`` semantics."""
     path.rename(target)
+def is_text_file(path: Path, max_bytes: int = 2048) -> bool:
+    """Heuristically determine whether ``path`` contains UTF-8 text.
+    The check is designed to be fast and conservative for use when iterating
+    a directory tree. It reads at most ``max_bytes`` from the file in binary
+    mode and applies two filters:
+    - Reject files that contain NUL bytes, which are extremely uncommon in
+      textual formats and a strong indicator of binary content.
+    - Attempt to decode the sampled bytes as UTF-8. If decoding fails, the
+      file is treated as binary.
+    Parameters
+    ----------
+    path:
+        Absolute path to the file on disk.
+    max_bytes:
+        Upper bound on the number of bytes to sample from the head of the
+        file. A small sample keeps directory scans fast while remaining
+        accurate for typical text formats such as ``.md``, ``.txt``, ``.xml``,
+        and source files.
+    Returns
+    -------
+    bool
+        ``True`` if the file appears to be UTF-8 text; ``False`` otherwise.
+    """
+    try:
+        with path.open("rb") as handle:
+            sample = handle.read(max_bytes)
+    except (FileNotFoundError, PermissionError):  # pragma: no cover - defensive
+        return False
+    if b"\x00" in sample:
+        return False
+    try:
+        sample.decode("utf-8")
+        return True
+    except UnicodeDecodeError:
+        return False

{mcp_kb-0.1.0 → mcp_kb-0.2.1}/mcp_kb.egg-info/PKG-INFO RENAMED Viewed

@@ -1,14 +1,14 @@
 Metadata-Version: 2.4
 Name: mcp-kb
-Version: 0.1.0
+Version: 0.2.1
 Summary: MCP server exposing a local markdown knowledge base
 Author: LLM Maintainer
 Requires-Python: >=3.11
 Description-Content-Type: text/markdown
-Requires-Dist: chromadb>=1.1.0
 Requires-Dist: httpx>=0.28.1
 Requires-Dist: mcp[cli]>=1.15.0
 Provides-Extra: vector
+Requires-Dist: chromadb>=1.1.0; extra == "vector"
 Requires-Dist: tiktoken>=0.11.0; extra == "vector"
 Requires-Dist: langchain-text-splitters>=0.3.11; extra == "vector"
@@ -36,7 +36,7 @@ uv run mcp-kb-server --transport http --host 0.0.0.0 --port 9000
 ```
 On first launch the server copies a bundled `KNOWLEDBASE_DOC.md` into the
-`.docs/` directory if it is missing so that every deployment starts with a
+`.data/` directory if it is missing so that every deployment starts with a
 baseline usage guide.
 ## Optional ChromaDB Mirroring

{mcp_kb-0.1.0 → mcp_kb-0.2.1}/pyproject.toml RENAMED Viewed

@@ -1,12 +1,11 @@
 [project]
 name = "mcp-kb"
-version = "0.1.0"
+version = "0.2.1"
 description = "MCP server exposing a local markdown knowledge base"
 readme = "README.md"
 authors = [{ name = "LLM Maintainer" }]
 requires-python = ">=3.11"
 dependencies = [
-    "chromadb>=1.1.0",
     "httpx>=0.28.1",
     "mcp[cli]>=1.15.0",
 ]
@@ -17,8 +16,10 @@ mcp-kb-reindex = "mcp_kb.cli.reindex:main"
 [project.optional-dependencies]
 vector = [
+    "chromadb>=1.1.0",
     "tiktoken>=0.11.0",
     "langchain-text-splitters>=0.3.11",
 ]
 [build-system]

mcp_kb-0.1.0/mcp_kb/data/KNOWLEDBASE_DOC.md DELETED Viewed

@@ -1,36 +0,0 @@
-# Knowledge Base Usage Guide
-Welcome to the MCP-managed knowledge base. This document is automatically
-installed the first time the server starts to ensure every deployment ships with
-baseline documentation. Customize it to describe project-specific conventions or
-operational practices.
-## Structure
-- All knowledge content lives beneath the `.knowledgebase/` root.
-- Documentation resides under `.docs/` and is read-only from the MCP tools.
-- Soft-deleted files are suffixed with `_DELETE_` and ignored by search/overview.
-## Recommended Practices
-1. Organize content into topic-based folders (e.g., `architecture/`, `ops/`).
-2. Keep document titles within the first heading so search results show context.
-3. Use relative markdown links to connect related documents inside the knowledge
-   base.
-4. Periodically review `_DELETE_` files and clean up as necessary via direct
-   filesystem operations.
-## Default Tools
-| Tool            | Purpose                                   |
-| --------------- | ----------------------------------------- |
-| `create_file`   | Create or overwrite markdown documents    |
-| `read_file`     | Read entire files or specific line ranges |
-| `append_file`   | Append additional content to a file       |
-| `regex_replace` | Run regex-based replacements              |
-| `search`        | Search text across active documents       |
-| `overview`      | Display a tree overview of the knowledge  |
-| `documentation` | Read this documentation file              |
-| `delete`        | Soft-delete files safely                  |
-Update this document to reflect your team's workflows after deployment.

{mcp_kb-0.1.0 → mcp_kb-0.2.1}/mcp_kb/__init__.py RENAMED Viewed

File without changes

{mcp_kb-0.1.0 → mcp_kb-0.2.1}/mcp_kb/cli/__init__.py RENAMED Viewed

File without changes

{mcp_kb-0.1.0 → mcp_kb-0.2.1}/mcp_kb/cli/args.py RENAMED Viewed

@@ -4,6 +4,7 @@ This module centralizes the definition of common command-line options and
 helpers so that multiple entry points (e.g., server and reindex commands) can
 remain small and focused while sharing consistent behavior.
 """
 from __future__ import annotations
 import os
@@ -150,4 +151,3 @@ def build_chroma_listener(options: Namespace, root: Path) -> Optional[ChromaInge
     if not configuration.enabled:
         return None
     return ChromaIngestor(configuration)