mcp-kb 0.2.1__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,16 +1,18 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mcp-kb
3
- Version: 0.2.1
3
+ Version: 0.3.0
4
4
  Summary: MCP server exposing a local markdown knowledge base
5
5
  Author: LLM Maintainer
6
6
  Requires-Python: >=3.11
7
7
  Description-Content-Type: text/markdown
8
8
  Requires-Dist: httpx>=0.28.1
9
9
  Requires-Dist: mcp[cli]>=1.15.0
10
+ Requires-Dist: pydantic>=2.11.9
10
11
  Provides-Extra: vector
11
12
  Requires-Dist: chromadb>=1.1.0; extra == "vector"
12
13
  Requires-Dist: tiktoken>=0.11.0; extra == "vector"
13
14
  Requires-Dist: langchain-text-splitters>=0.3.11; extra == "vector"
15
+ Requires-Dist: umap-learn>=0.5.9.post2; extra == "vector"
14
16
 
15
17
  # MCP Knowledge Base Server
16
18
 
@@ -0,0 +1,7 @@
1
+ mcp_kb/__init__.py,sha256=Ry7qODhfFQF6u6p2m3bwGWhB0-BdWTQcHDJB7NBYAio,74
2
+ mcp_kb/config.py,sha256=NUpzjDH4PQw4FyjGgYUcMsGMeenNiZTMrQj4U62xKlk,2530
3
+ mcp_kb-0.3.0.dist-info/METADATA,sha256=HPYp9PAZp75fDdriebR76e1KJKUkPQqj0zzTJQsZAaE,5212
4
+ mcp_kb-0.3.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
5
+ mcp_kb-0.3.0.dist-info/entry_points.txt,sha256=qwJkR3vV7ZeydfS_IYMiDwLv4BdTkrOf4-5neWj25g0,96
6
+ mcp_kb-0.3.0.dist-info/top_level.txt,sha256=IBiz3TNE3FF3TwkbCZpC1kkk6ohTwtBQNSPJNV3-qGA,7
7
+ mcp_kb-0.3.0.dist-info/RECORD,,
mcp_kb/cli/__init__.py DELETED
@@ -1 +0,0 @@
1
- """CLI subpackage exposing entry points for running the server."""
mcp_kb/cli/args.py DELETED
@@ -1,153 +0,0 @@
1
- """Shared CLI argument wiring for knowledge base utilities.
2
-
3
- This module centralizes the definition of common command-line options and
4
- helpers so that multiple entry points (e.g., server and reindex commands) can
5
- remain small and focused while sharing consistent behavior.
6
- """
7
-
8
- from __future__ import annotations
9
-
10
- import os
11
- from argparse import ArgumentParser, Namespace
12
- from pathlib import Path
13
- from typing import Optional
14
-
15
- from mcp_kb.ingest.chroma import SUPPORTED_CLIENTS, ChromaConfiguration, ChromaIngestor
16
-
17
-
18
- def parse_bool(value: str | bool | None) -> bool:
19
- """Return ``True`` when ``value`` represents an affirmative boolean string.
20
-
21
- The function accepts case-insensitive variants such as "true", "t",
22
- "yes", and "1". ``None`` yields ``False``.
23
- """
24
-
25
- if isinstance(value, bool):
26
- return value
27
- if value is None:
28
- return False
29
- return value.lower() in {"1", "true", "t", "yes", "y"}
30
-
31
-
32
- def add_chroma_arguments(parser: ArgumentParser) -> None:
33
- """Register Chroma ingestion arguments on ``parser``.
34
-
35
- Environment variables are used as defaults where available so that
36
- deployments can configure ingestion without repeating flags.
37
- """
38
-
39
- default_chroma_client = os.getenv("MCP_KB_CHROMA_CLIENT", "persistent").lower()
40
- default_collection = os.getenv("MCP_KB_CHROMA_COLLECTION", "knowledge-base")
41
- default_embedding = os.getenv("MCP_KB_CHROMA_EMBEDDING", "default")
42
- default_data_dir = os.getenv("MCP_KB_CHROMA_DATA_DIR")
43
- default_host = os.getenv("MCP_KB_CHROMA_HOST")
44
- default_port_env = os.getenv("MCP_KB_CHROMA_PORT")
45
- default_port = int(default_port_env) if default_port_env else None
46
- default_ssl = parse_bool(os.getenv("MCP_KB_CHROMA_SSL", "true"))
47
- default_tenant = os.getenv("MCP_KB_CHROMA_TENANT")
48
- default_database = os.getenv("MCP_KB_CHROMA_DATABASE")
49
- default_api_key = os.getenv("MCP_KB_CHROMA_API_KEY")
50
- default_custom_auth = os.getenv("MCP_KB_CHROMA_CUSTOM_AUTH")
51
- default_id_prefix = os.getenv("MCP_KB_CHROMA_ID_PREFIX")
52
-
53
- parser.add_argument(
54
- "--chroma-client",
55
- dest="chroma_client",
56
- choices=SUPPORTED_CLIENTS,
57
- default=default_chroma_client,
58
- help="Client implementation for mirroring data to ChromaDB (default: persistent).",
59
- )
60
- parser.add_argument(
61
- "--chroma-collection",
62
- dest="chroma_collection",
63
- default=default_collection,
64
- help="Chroma collection name used to store documents.",
65
- )
66
- parser.add_argument(
67
- "--chroma-embedding",
68
- dest="chroma_embedding",
69
- default=default_embedding,
70
- help="Embedding function name registered with chromadb.utils.embedding_functions.",
71
- )
72
- parser.add_argument(
73
- "--chroma-data-dir",
74
- dest="chroma_data_dir",
75
- default=default_data_dir,
76
- help="Storage directory for the persistent Chroma client.",
77
- )
78
- parser.add_argument(
79
- "--chroma-host",
80
- dest="chroma_host",
81
- default=default_host,
82
- help="Target host for HTTP or cloud Chroma clients.",
83
- )
84
- parser.add_argument(
85
- "--chroma-port",
86
- dest="chroma_port",
87
- type=int,
88
- default=default_port,
89
- help="Port for the HTTP Chroma client.",
90
- )
91
- parser.add_argument(
92
- "--chroma-ssl",
93
- dest="chroma_ssl",
94
- type=parse_bool,
95
- default=default_ssl,
96
- help="Toggle SSL for the HTTP Chroma client (default: true).",
97
- )
98
- parser.add_argument(
99
- "--chroma-tenant",
100
- dest="chroma_tenant",
101
- default=default_tenant,
102
- help="Tenant identifier for Chroma Cloud deployments.",
103
- )
104
- parser.add_argument(
105
- "--chroma-database",
106
- dest="chroma_database",
107
- default=default_database,
108
- help="Database name for Chroma Cloud deployments.",
109
- )
110
- parser.add_argument(
111
- "--chroma-api-key",
112
- dest="chroma_api_key",
113
- default=default_api_key,
114
- help="API key used to authenticate against Chroma Cloud.",
115
- )
116
- parser.add_argument(
117
- "--chroma-custom-auth",
118
- dest="chroma_custom_auth",
119
- default=default_custom_auth,
120
- help="Optional custom auth credentials for self-hosted HTTP deployments.",
121
- )
122
- parser.add_argument(
123
- "--chroma-id-prefix",
124
- dest="chroma_id_prefix",
125
- default=default_id_prefix,
126
- help="Prefix applied to document IDs stored in Chroma (default: kb::).",
127
- )
128
-
129
-
130
- def build_chroma_listener(options: Namespace, root: Path) -> Optional[ChromaIngestor]:
131
- """Construct a Chroma listener from parsed CLI options when enabled.
132
-
133
- Returns ``None`` when the configured client type is ``off``.
134
- """
135
-
136
- configuration = ChromaConfiguration.from_options(
137
- root=root,
138
- client_type=options.chroma_client,
139
- collection_name=options.chroma_collection,
140
- embedding=options.chroma_embedding,
141
- data_directory=options.chroma_data_dir,
142
- host=options.chroma_host,
143
- port=options.chroma_port,
144
- ssl=options.chroma_ssl,
145
- tenant=options.chroma_tenant,
146
- database=options.chroma_database,
147
- api_key=options.chroma_api_key,
148
- custom_auth_credentials=options.chroma_custom_auth,
149
- id_prefix=options.chroma_id_prefix,
150
- )
151
- if not configuration.enabled:
152
- return None
153
- return ChromaIngestor(configuration)
mcp_kb/cli/main.py DELETED
@@ -1,123 +0,0 @@
1
- """Command line interface for running the MCP knowledge base server."""
2
-
3
- from __future__ import annotations
4
-
5
- import argparse
6
- import asyncio
7
- import logging
8
- import os
9
- from pathlib import Path
10
- from typing import Iterable, List, Optional
11
-
12
- from mcp_kb.config import DATA_FOLDER_NAME, resolve_knowledge_base_root
13
- from mcp_kb.cli.args import add_chroma_arguments, build_chroma_listener, parse_bool
14
- from mcp_kb.ingest.chroma import ChromaIngestor
15
- from mcp_kb.knowledge.bootstrap import install_default_documentation
16
- from mcp_kb.security.path_validation import PathRules
17
- from mcp_kb.server.app import create_fastmcp_app
18
- from mcp.server.fastmcp import FastMCP
19
-
20
- logging.basicConfig(level=logging.INFO)
21
-
22
- logger = logging.getLogger(__name__)
23
-
24
-
25
- def _build_argument_parser() -> argparse.ArgumentParser:
26
- """Create and return the argument parser used by ``main``."""
27
-
28
- parser = argparse.ArgumentParser(description="Run the MCP knowledge base server")
29
- parser.add_argument(
30
- "--root",
31
- dest="root",
32
- default=None,
33
- help="Optional path to the knowledge base root (defaults to environment configuration)",
34
- )
35
- parser.add_argument(
36
- "--transport",
37
- dest="transports",
38
- action="append",
39
- choices=["stdio", "sse", "http"],
40
- help="Transport protocol to enable (repeatable). Defaults to stdio only.",
41
- )
42
- parser.add_argument(
43
- "--host",
44
- dest="host",
45
- default=None,
46
- help="Host interface for HTTP/SSE transports (default 127.0.0.1).",
47
- )
48
- parser.add_argument(
49
- "--port",
50
- dest="port",
51
- type=int,
52
- default=None,
53
- help="Port for HTTP/SSE transports (default 8000).",
54
- )
55
-
56
- add_chroma_arguments(parser)
57
- return parser
58
-
59
-
60
- async def _run_transports(server: FastMCP, transports: List[str]) -> None:
61
- """Run all selected transport protocols concurrently."""
62
-
63
- coroutines = []
64
- for name in transports:
65
- if name == "stdio":
66
- coroutines.append(server.run_stdio_async())
67
- elif name == "sse":
68
- coroutines.append(server.run_sse_async())
69
- elif name == "http":
70
- coroutines.append(server.run_streamable_http_async())
71
- else: # pragma: no cover - argparse restricts values
72
- raise ValueError(f"Unsupported transport: {name}")
73
-
74
- await asyncio.gather(*coroutines)
75
-
76
-
77
- def run_server(arguments: Iterable[str] | None = None) -> None:
78
- """Entry point used by both CLI invocations and unit tests."""
79
-
80
- parser = _build_argument_parser()
81
- options = parser.parse_args(arguments)
82
- root_path = resolve_knowledge_base_root(options.root)
83
- rules = PathRules(root=root_path, protected_folders=(DATA_FOLDER_NAME,))
84
- install_default_documentation(root_path)
85
- listeners: List[ChromaIngestor] = []
86
- try:
87
- listener = build_chroma_listener(options, root_path)
88
- except Exception as exc: # pragma: no cover - configuration errors
89
- raise SystemExit(f"Failed to configure Chroma ingestion: {exc}") from exc
90
- if listener is not None:
91
- listeners.append(listener)
92
- logger.info(
93
- "Chroma ingestion enabled (client=%s, collection=%s)",
94
- options.chroma_client,
95
- options.chroma_collection,
96
- )
97
- server = create_fastmcp_app(
98
- rules,
99
- host=options.host,
100
- port=options.port,
101
- listeners=listeners,
102
- )
103
- transports = options.transports or ["stdio"]
104
- logger.info(
105
- f"Running server on {options.host}:{options.port} with transports {transports}"
106
- )
107
- logger.info(f"Data root is {root_path}")
108
- print(
109
- "--------------------------------",
110
- root_path,
111
- "--------------------------------",
112
- )
113
- asyncio.run(_run_transports(server, transports))
114
-
115
-
116
- def main() -> None:
117
- """CLI hook that executes :func:`run_server`."""
118
-
119
- run_server()
120
-
121
-
122
- if __name__ == "__main__":
123
- main()
mcp_kb/cli/reindex.py DELETED
@@ -1,93 +0,0 @@
1
- """CLI command to reindex the knowledge base into configured ingestors.
2
-
3
- This command does not expose an MCP tool. Instead, it builds the configured
4
- ingestors and calls their ``reindex`` method when available, allowing operators
5
- to trigger a full rebuild of external indexes (e.g., Chroma) from the current
6
- filesystem state.
7
- """
8
-
9
- from __future__ import annotations
10
-
11
- import argparse
12
- import logging
13
- from typing import Iterable, List
14
-
15
- from mcp_kb.cli.args import add_chroma_arguments, build_chroma_listener
16
- from mcp_kb.config import DATA_FOLDER_NAME, resolve_knowledge_base_root
17
- from mcp_kb.knowledge.events import KnowledgeBaseReindexListener
18
- from mcp_kb.knowledge.store import KnowledgeBase
19
- from mcp_kb.security.path_validation import PathRules
20
-
21
-
22
- logger = logging.getLogger(__name__)
23
-
24
-
25
- def _build_argument_parser() -> argparse.ArgumentParser:
26
- """Return the argument parser for the reindex command."""
27
-
28
- parser = argparse.ArgumentParser(
29
- description="Reindex the knowledge base into configured backends"
30
- )
31
- parser.add_argument(
32
- "--root",
33
- dest="root",
34
- default=None,
35
- help="Optional path to the knowledge base root (defaults to environment configuration)",
36
- )
37
- add_chroma_arguments(parser)
38
- return parser
39
-
40
-
41
- def run_reindex(arguments: Iterable[str] | None = None) -> int:
42
- """Execute a reindex run across all registered ingestors.
43
-
44
- The function constructs a :class:`~mcp_kb.knowledge.store.KnowledgeBase`
45
- using the same root resolution logic as the server, builds any enabled
46
- ingestion listeners from CLI options, and invokes ``reindex`` on those that
47
- implement the optional protocol.
48
-
49
- Parameters
50
- ----------
51
- arguments:
52
- Optional iterable of command-line arguments, primarily used by tests.
53
-
54
- Returns
55
- -------
56
- int
57
- The total number of documents processed across all reindex-capable
58
- listeners.
59
- """
60
-
61
- parser = _build_argument_parser()
62
- options = parser.parse_args(arguments)
63
- root_path = resolve_knowledge_base_root(options.root)
64
- rules = PathRules(root=root_path, protected_folders=(DATA_FOLDER_NAME,))
65
- kb = KnowledgeBase(rules)
66
-
67
- listeners: List[KnowledgeBaseReindexListener] = []
68
- try:
69
- chroma = build_chroma_listener(options, root_path)
70
- except Exception as exc: # pragma: no cover - configuration errors
71
- raise SystemExit(f"Failed to configure Chroma ingestion: {exc}") from exc
72
- if chroma is not None and isinstance(chroma, KnowledgeBaseReindexListener):
73
- listeners.append(chroma)
74
-
75
- total = 0
76
- for listener in listeners:
77
- logger.info("Reindexing via %s", listener.__class__.__name__)
78
- count = listener.reindex(kb)
79
- logger.info("Reindexed %d documents via %s", count, listener.__class__.__name__)
80
- total += count
81
-
82
- return total
83
-
84
-
85
- def main() -> None:
86
- """CLI hook that executes :func:`run_reindex` and prints a summary."""
87
-
88
- total = run_reindex()
89
- print(f"Reindexed {total} documents")
90
-
91
-
92
- if __name__ == "__main__":
93
- main()
@@ -1,151 +0,0 @@
1
- # LLM Operating Manual — MCP Knowledge Base (`mcp-kb`)
2
-
3
- You are connected to a **local, text-only knowledge base**. Your job is to **search, read, create, update, and soft-delete** UTF‑8 text files under a single root directory while respecting safety rules below. Use the provided MCP tools exactly as specified.
4
-
5
- ---
6
-
7
- ## Ground Rules (enforced by the server)
8
-
9
- - **Paths are relative only.** Absolute paths are rejected. No `..` traversal.
10
- - **Protected folder:** `.data/` is read‑only. Do not write there.
11
- - **Soft delete sentinel:** Files marked with `_DELETE_` in the name are considered deleted. Do not read/write them.
12
- - **Text files only.** Binary-ish files are ignored by scans. Treat this KB as UTF‑8 text storage.
13
- - **Concurrency:** Writes are serialized per file; still prefer read‑verify‑write sequences.
14
-
15
- Constants (baked into the server):
16
- - Protected folder: `.data`
17
- - Documentation file name: `KNOWLEDBASE_DOC.md`
18
- - Delete sentinel: `_DELETE_`
19
-
20
- ---
21
-
22
- ## Tools You Can Call
23
-
24
- All tool names and parameter contracts are stable. Stick to these shapes.
25
-
26
- ### `create_file(path: str, content: str) -> str`
27
- - Create or **overwrite** a text file at `path` with `content`.
28
- - `path` must be **relative** and **outside** `.data/`.
29
-
30
- ### `read_file(path: str, start_line?: int, end_line?: int) -> { path, start_line, end_line, content }`
31
- - Read full file or a 1‑based inclusive slice.
32
- - If both bounds omitted ⇒ full file. If one bound omitted ⇒ server fills it.
33
-
34
- ### `append_file(path: str, content: str) -> str`
35
- - Append text. If file is missing, it will be **created**.
36
-
37
- ### `regex_replace(path: str, pattern: str, replacement: str) -> { replacements: int }`
38
- - Multiline regex (`re.MULTILINE`). Returns count. Always `read_file` afterwards to verify.
39
-
40
- ### `delete(path: str) -> str`
41
- - **Soft delete**: renames `name.ext` to `name_DELETE_.ext`. Use when content is obsolete.
42
-
43
- ### `search(query: str, limit: int = 5) -> [{ path, line, context: string[] }]`
44
- - Returns up to `limit` matches with short context.
45
- - If Chroma mirroring is active, results are **semantic** first; otherwise plain scan.
46
- - `limit` must be **> 0**.
47
-
48
- ### `overview() -> str`
49
- - A deterministic `tree`-like view of active files under root (skips deleted and binaries).
50
-
51
- ### `documentation() -> str`
52
- - Human usage guide (not this manual). For you, prefer this manual.
53
-
54
- ---
55
-
56
- ## How to Work Effectively
57
-
58
- ### 1) Discover
59
- - Call `overview()` to understand the tree.
60
- - If you need conventions or human guidelines, read `documentation()` (optional).
61
-
62
- ### 2) Locate Content
63
- - Prefer `search("keywords", limit=5)` to find candidate files/snippets.
64
- - Examine each `{path, line, context}`. The `context` is a short window around the hit.
65
- - If results look thin, **increase `limit`** (e.g., 10–20) before broadening the query.
66
-
67
- ### 3) Read Precisely
68
- - Use `read_file(path)` for the full file when structure matters.
69
- - If the file is large but you know the region, use `read_file(path, start_line, end_line)` to minimize tokens.
70
-
71
- ### 4) Create New Knowledge
72
- - Pick a **descriptive relative path** (folders based on topic, kebab‑case names).
73
- - Example: `architecture/decision-records/adr-2025-10-06-edge-cache.md`
74
- - Call `create_file(path, content)`.
75
- - Keep the **title as the first Markdown heading** so search has context.
76
- - Link related files with **relative Markdown links**.
77
-
78
- ### 5) Update Safely
79
- - For small edits:
80
- 1) `read_file(...)` to confirm current state.
81
- 2) `regex_replace(path, pattern, replacement)` for targeted changes.
82
- 3) `read_file(...)` again to verify.
83
- - For additive changes: `append_file(path, "\n...")`.
84
-
85
- ### 6) Deletion Policy
86
- - Use `delete(path)` to **soft-delete**. Do not operate on files that already include `_DELETE_` in their name.
87
-
88
- ---
89
-
90
- ## Search Semantics (important)
91
-
92
- - When Chroma ingestion is **enabled**, `search()` uses semantic ranking first and returns the **best slice per file** (the ingestor extracts one representative match per document chunk/file). If no obvious line match is found, you may get a **top-of-file preview** — then call `read_file()` to confirm.
93
- - When Chroma is **not** enabled, `search()` scans files literally and returns all matches up to `limit`.
94
- - Always **validate** by fetching the file segment with `read_file()` before making edits.
95
-
96
- ---
97
-
98
- ## Parameter Contracts and Gotchas
99
-
100
- - `path` must be **relative** (e.g., `notes/today.md`). Absolute paths are rejected.
101
- - Do **not** write into `.data/` (protected). Reads are allowed there.
102
- - Line numbers in `read_file` are **1‑based** and the interval is **inclusive**.
103
- - `regex_replace` uses Python’s `re.MULTILINE`. Validate your pattern; avoid overly broad substitutions.
104
- - `append_file` will create a file if missing (useful for logs/progress notes).
105
-
106
- ---
107
-
108
- ## Typical Recipes
109
-
110
- **Find → Read → Edit**
111
- 1. `search("beta feature toggle", limit=10)`
112
- 2. Pick a result: `read_file("features/toggles.md", 40, 80)`
113
- 3. Adjust: `regex_replace("features/toggles.md", "^Status:.*$", "Status: Enabled")`
114
- 4. Verify: `read_file("features/toggles.md")` (check the `Status:` header)
115
-
116
- **Add a new doc**
117
- 1. `create_file("ops/runbooks/cache-invalidation.md", "# Cache Invalidation\n\n…")`
118
- 2. Optionally link it from an index: `append_file("ops/README.md", "\n- [Cache Invalidation](runbooks/cache-invalidation.md)")`
119
-
120
- **Soft delete an obsolete note**
121
- 1. `delete("notes/old-incident.md")`
122
-
123
- ---
124
-
125
- ## Error Recovery
126
-
127
- - **"Absolute paths are not permitted"** → Use a **relative** path.
128
- - **"Writes are not allowed inside the protected folder '.data'"** → Choose a different folder (e.g., `docs/`).
129
- - **"File 'X' does not exist"** on delete → Confirm with `overview()` or `search()`. Only existing non‑deleted files can be soft‑deleted.
130
- - **No search hits** → Widen keywords, increase `limit`, or pivot to `overview()` to eyeball likely locations.
131
-
132
- ---
133
-
134
- ## Things You Should Not Do
135
-
136
- - Do not fabricate file contents or paths. Always confirm with `overview()`, `search()`, and `read_file()`.
137
- - Do not operate on files that include `_DELETE_` in their name.
138
- - Do not attempt to talk directly to Chroma; you only use `search()`. Indexing is handled automatically after writes.
139
- - Do not write binary or non‑UTF‑8 content.
140
-
141
- ---
142
-
143
- ## Performance Hints
144
-
145
- - Prefer `search()` + targeted `read_file()` slices over reading entire large files.
146
- - Keep `limit` modest (5–10) unless you must broaden the search.
147
- - Batch edits in one file using a single `regex_replace` when safe (then verify).
148
-
149
- ---
150
-
151
- You now have the minimal contract to operate this KB safely and efficiently.
mcp_kb/data/__init__.py DELETED
@@ -1 +0,0 @@
1
- """Embedded data files shipped with the MCP knowledge base server."""
mcp_kb/ingest/__init__.py DELETED
@@ -1 +0,0 @@
1
- """Pluggable ingestion adapters for synchronizing knowledge base content."""