mcp-kb 0.3.1__py3-none-any.whl → 0.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mcp_kb/cli/__init__.py ADDED
@@ -0,0 +1 @@
1
+ """CLI subpackage exposing entry points for running the server."""
mcp_kb/cli/args.py ADDED
@@ -0,0 +1,175 @@
1
+ """Shared CLI argument wiring for knowledge base utilities.
2
+
3
+ This module centralizes the definition of common command-line options and
4
+ helpers so that multiple entry points (e.g., server and reindex commands) can
5
+ remain small and focused while sharing consistent behavior. The helpers are
6
+ careful to avoid embedding environment defaults directly into the argparse
7
+ objects so that downstream consumers can layer persisted runtime configuration
8
+ in between CLI flags and built-in defaults.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import argparse
14
+ from argparse import ArgumentParser, Namespace
15
+ from pathlib import Path
16
+ from typing import Optional
17
+
18
+ try:
19
+ from mcp_kb.ingest.chroma import SUPPORTED_CLIENTS, ChromaConfiguration, ChromaIngestor
20
+ W_CHROMA = True
21
+ except ImportError:
22
+ W_CHROMA = False
23
+
24
+ def parse_bool(value: str | bool | None) -> bool:
25
+ """Return ``True`` when ``value`` represents an affirmative boolean string.
26
+
27
+ The function accepts case-insensitive variants such as "true", "t",
28
+ "yes", and "1". ``None`` yields ``False``.
29
+ """
30
+
31
+ if isinstance(value, bool):
32
+ return value
33
+ if value is None:
34
+ return False
35
+ return value.lower() in {"1", "true", "t", "yes", "y"}
36
+
37
+
38
+ def add_chroma_arguments(parser: ArgumentParser) -> None:
39
+ """Register Chroma ingestion arguments on ``parser``.
40
+
41
+ The parser intentionally suppresses defaults for all options so that the
42
+ calling code can merge CLI flags, environment variables, and persisted
43
+ runtime configuration explicitly. This keeps precedence handling in a
44
+ single location rather than scattering logic across the argument
45
+ registrations themselves.
46
+ """
47
+ if not W_CHROMA:
48
+ return None
49
+
50
+ parser.add_argument(
51
+ "--chroma-client",
52
+ dest="chroma_client",
53
+ choices=SUPPORTED_CLIENTS,
54
+ default=argparse.SUPPRESS,
55
+ help="Client implementation for mirroring data to ChromaDB (default: persistent).",
56
+ )
57
+ parser.add_argument(
58
+ "--chroma-collection",
59
+ dest="chroma_collection",
60
+ default=argparse.SUPPRESS,
61
+ help="Chroma collection name used to store documents.",
62
+ )
63
+ parser.add_argument(
64
+ "--chroma-embedding",
65
+ dest="chroma_embedding",
66
+ default=argparse.SUPPRESS,
67
+ help="Embedding function name registered with chromadb.utils.embedding_functions.",
68
+ )
69
+ parser.add_argument(
70
+ "--chroma-data-dir",
71
+ dest="chroma_data_dir",
72
+ default=argparse.SUPPRESS,
73
+ help="Storage directory for the persistent Chroma client.",
74
+ )
75
+ parser.add_argument(
76
+ "--chroma-host",
77
+ dest="chroma_host",
78
+ default=argparse.SUPPRESS,
79
+ help="Target host for HTTP or cloud Chroma clients.",
80
+ )
81
+ parser.add_argument(
82
+ "--chroma-port",
83
+ dest="chroma_port",
84
+ type=int,
85
+ default=argparse.SUPPRESS,
86
+ help="Port for the HTTP Chroma client.",
87
+ )
88
+ parser.add_argument(
89
+ "--chroma-ssl",
90
+ dest="chroma_ssl",
91
+ type=parse_bool,
92
+ default=argparse.SUPPRESS,
93
+ help="Toggle SSL for the HTTP Chroma client (default: true).",
94
+ )
95
+ parser.add_argument(
96
+ "--chroma-tenant",
97
+ dest="chroma_tenant",
98
+ default=argparse.SUPPRESS,
99
+ help="Tenant identifier for Chroma Cloud deployments.",
100
+ )
101
+ parser.add_argument(
102
+ "--chroma-database",
103
+ dest="chroma_database",
104
+ default=argparse.SUPPRESS,
105
+ help="Database name for Chroma Cloud deployments.",
106
+ )
107
+ parser.add_argument(
108
+ "--chroma-api-key",
109
+ dest="chroma_api_key",
110
+ default=argparse.SUPPRESS,
111
+ help="API key used to authenticate against Chroma Cloud.",
112
+ )
113
+ parser.add_argument(
114
+ "--chroma-custom-auth",
115
+ dest="chroma_custom_auth",
116
+ default=argparse.SUPPRESS,
117
+ help="Optional custom auth credentials for self-hosted HTTP deployments.",
118
+ )
119
+ parser.add_argument(
120
+ "--chroma-id-prefix",
121
+ dest="chroma_id_prefix",
122
+ default=argparse.SUPPRESS,
123
+ help="Prefix applied to document IDs stored in Chroma (default: kb::).",
124
+ )
125
+ parser.add_argument(
126
+ "--chroma-sentence-transformer",
127
+ dest="chroma_sentence_transformer",
128
+ default=argparse.SUPPRESS,
129
+ help="Sentence transformer model name.",
130
+ )
131
+ parser.add_argument(
132
+ "--chroma-chunk-size",
133
+ dest="chroma_chunk_size",
134
+ type=int,
135
+ default=argparse.SUPPRESS,
136
+ help="Chunk size for the sentence transformer model.",
137
+ )
138
+ parser.add_argument(
139
+ "--chroma-chunk-overlap",
140
+ dest="chroma_chunk_overlap",
141
+ type=int,
142
+ default=argparse.SUPPRESS,
143
+ help="Chunk overlap for the sentence transformer model.",
144
+ )
145
+
146
+
147
+ def build_chroma_listener(options: Namespace, root: Path) -> Optional[ChromaIngestor]:
148
+ """Construct a Chroma listener from parsed CLI options when enabled.
149
+
150
+ Returns ``None`` when the configured client type is ``off``.
151
+ """
152
+ if not W_CHROMA:
153
+ return None
154
+
155
+ configuration = ChromaConfiguration.from_options(
156
+ root=root,
157
+ client_type=options.chroma_client,
158
+ collection_name=options.chroma_collection,
159
+ embedding=options.chroma_embedding,
160
+ data_directory=options.chroma_data_dir,
161
+ host=options.chroma_host,
162
+ port=options.chroma_port,
163
+ ssl=options.chroma_ssl,
164
+ tenant=options.chroma_tenant,
165
+ database=options.chroma_database,
166
+ api_key=options.chroma_api_key,
167
+ custom_auth_credentials=options.chroma_custom_auth,
168
+ id_prefix=options.chroma_id_prefix,
169
+ sentence_transformer=options.chroma_sentence_transformer,
170
+ chunk_size=options.chroma_chunk_size,
171
+ chunk_overlap=options.chroma_chunk_overlap,
172
+ )
173
+ if not configuration.enabled:
174
+ return None
175
+ return ChromaIngestor(configuration)
mcp_kb/cli/main.py ADDED
@@ -0,0 +1,181 @@
1
+ """Command line interface for running the MCP knowledge base server."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import asyncio
7
+ import logging
8
+ from typing import Iterable, List
9
+
10
+ from mcp_kb.config import DATA_FOLDER_NAME, resolve_knowledge_base_root
11
+ from mcp_kb.cli.args import add_chroma_arguments, build_chroma_listener
12
+ from mcp_kb.cli.runtime_config import (
13
+ apply_cli_runtime_configuration,
14
+ load_runtime_configuration,
15
+ persist_runtime_configuration,
16
+ )
17
+ try:
18
+ from mcp_kb.ingest.chroma import ChromaIngestor
19
+ W_CHROMA = True
20
+ except ImportError:
21
+ if TYPE_CHECKING:
22
+ ChromaIngestor = None
23
+ W_CHROMA = False
24
+ from mcp_kb.knowledge.bootstrap import install_default_documentation
25
+ from mcp_kb.security.path_validation import PathRules
26
+ from mcp_kb.server.app import create_fastmcp_app
27
+ from mcp.server.fastmcp import FastMCP
28
+ from mcp_kb.ui import start_ui_server
29
+
30
+ logging.basicConfig(level=logging.INFO)
31
+
32
+ logger = logging.getLogger(__name__)
33
+
34
+
35
+ def _build_argument_parser() -> argparse.ArgumentParser:
36
+ """Create and return the argument parser used by ``main``."""
37
+
38
+ parser = argparse.ArgumentParser(
39
+ description="Run the MCP knowledge base server", allow_abbrev=False
40
+ )
41
+ parser.add_argument(
42
+ "--root",
43
+ dest="root",
44
+ default=argparse.SUPPRESS,
45
+ help="Optional path to the knowledge base root (defaults to environment configuration)",
46
+ )
47
+ parser.add_argument(
48
+ "--transport",
49
+ dest="transports",
50
+ action="append",
51
+ choices=["stdio", "sse", "http"],
52
+ default=argparse.SUPPRESS,
53
+ help="Transport protocol to enable (repeatable). Defaults to stdio only.",
54
+ )
55
+ parser.add_argument(
56
+ "--host",
57
+ dest="host",
58
+ default=argparse.SUPPRESS,
59
+ help="Host interface for HTTP/SSE transports (default 127.0.0.1).",
60
+ )
61
+ parser.add_argument(
62
+ "--port",
63
+ dest="port",
64
+ type=int,
65
+ default=argparse.SUPPRESS,
66
+ help="Port for HTTP/SSE transports (default 8000).",
67
+ )
68
+ parser.add_argument(
69
+ "--ui-port",
70
+ dest="ui_port",
71
+ type=int,
72
+ default=argparse.SUPPRESS,
73
+ help=(
74
+ "Starting port for the human UI (default 8765). If occupied, the UI "
75
+ "server increments the port by 1 until a free one is found."
76
+ ),
77
+ )
78
+ parser.add_argument(
79
+ "--no-ui",
80
+ dest="no_ui",
81
+ action="store_true",
82
+ help=(
83
+ "Disable the human UI entirely, even when HTTP/SSE transports are active."
84
+ ),
85
+ )
86
+
87
+ if W_CHROMA:
88
+ add_chroma_arguments(parser)
89
+ return parser
90
+
91
+
92
+ async def _run_transports(server: FastMCP, transports: List[str]) -> None:
93
+ """Run all selected transport protocols concurrently."""
94
+
95
+ coroutines = []
96
+ for name in transports:
97
+ if name == "stdio":
98
+ coroutines.append(server.run_stdio_async())
99
+ elif name == "sse":
100
+ coroutines.append(server.run_sse_async())
101
+ elif name == "http":
102
+ coroutines.append(server.run_streamable_http_async())
103
+ else: # pragma: no cover - argparse restricts values
104
+ raise ValueError(f"Unsupported transport: {name}")
105
+
106
+ await asyncio.gather(*coroutines)
107
+
108
+
109
+ def run_server(arguments: Iterable[str] | None = None) -> None:
110
+ """Entry point used by both CLI invocations and unit tests.
111
+
112
+ Besides orchestrating the server lifecycle, the function resolves
113
+ configuration values by layering command-line arguments over environment
114
+ variables and any persisted defaults stored in the knowledge base data
115
+ directory. The resolved mapping is written back to disk so that future runs
116
+ inherit the same defaults unless explicitly overridden.
117
+ """
118
+
119
+ parser = _build_argument_parser()
120
+ options = parser.parse_args(arguments)
121
+ root_path = resolve_knowledge_base_root(getattr(options, "root", None))
122
+
123
+ persisted_config = load_runtime_configuration(root_path)
124
+ resolved_config = apply_cli_runtime_configuration(
125
+ options,
126
+ root=root_path,
127
+ persisted=persisted_config,
128
+ )
129
+ rules = PathRules(root=root_path, protected_folders=(DATA_FOLDER_NAME,))
130
+ install_default_documentation(root_path)
131
+ listeners: List[ChromaIngestor] = []
132
+ try:
133
+ listener = build_chroma_listener(options, root_path)
134
+ except Exception as exc: # pragma: no cover - configuration errors
135
+ logger.exception(exc)
136
+ raise SystemExit(f"Failed to configure Chroma ingestion: {exc}") from exc
137
+ if listener is not None:
138
+ listeners.append(listener)
139
+ logger.info(
140
+ "Chroma ingestion enabled (client=%s, collection=%s)",
141
+ options.chroma_client,
142
+ options.chroma_collection,
143
+ )
144
+ server = create_fastmcp_app(
145
+ rules,
146
+ host=options.host,
147
+ port=options.port,
148
+ listeners=listeners,
149
+ )
150
+ transports = options.transports or ["stdio"]
151
+ options.transports = transports
152
+ resolved_config["transports"] = transports
153
+ logger.info(
154
+ f"Running server on {options.host}:{options.port} with transports {transports}"
155
+ )
156
+ logger.info(f"Data root is {root_path}")
157
+
158
+ # Start the human-accessible UI when an HTTP-capable transport is active.
159
+ if not options.no_ui and any(t in ("http", "sse") for t in transports):
160
+ kb = getattr(server, "kb", None)
161
+ if kb is not None:
162
+ ui = start_ui_server(
163
+ kb,
164
+ host=options.host or "127.0.0.1",
165
+ port=options.ui_port,
166
+ )
167
+ logger.info("UI available at http://%s:%d", ui.host, ui.port)
168
+
169
+ persist_runtime_configuration(root_path, resolved_config)
170
+
171
+ asyncio.run(_run_transports(server, transports))
172
+
173
+
174
+ def main() -> None:
175
+ """CLI hook that executes :func:`run_server`."""
176
+
177
+ run_server()
178
+
179
+
180
+ if __name__ == "__main__":
181
+ main()
mcp_kb/cli/reindex.py ADDED
@@ -0,0 +1,113 @@
1
+ """CLI command to reindex the knowledge base into configured ingestors.
2
+
3
+ This command does not expose an MCP tool. Instead, it builds the configured
4
+ ingestors and calls their ``reindex`` method when available, allowing operators
5
+ to trigger a full rebuild of external indexes (e.g., Chroma) from the current
6
+ filesystem state.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import argparse
12
+ import logging
13
+ from typing import Iterable, List
14
+
15
+ from mcp_kb.cli.args import add_chroma_arguments, build_chroma_listener
16
+ from mcp_kb.cli.runtime_config import (
17
+ apply_cli_runtime_configuration,
18
+ load_runtime_configuration,
19
+ persist_runtime_configuration,
20
+ )
21
+ from mcp_kb.config import DATA_FOLDER_NAME, resolve_knowledge_base_root
22
+ from mcp_kb.knowledge.events import KnowledgeBaseReindexListener
23
+ from mcp_kb.knowledge.store import KnowledgeBase
24
+ from mcp_kb.security.path_validation import PathRules
25
+
26
+
27
+ logger = logging.getLogger(__name__)
28
+
29
+
30
+ def _build_argument_parser() -> argparse.ArgumentParser:
31
+ """Return the argument parser for the reindex command."""
32
+
33
+ parser = argparse.ArgumentParser(
34
+ description="Reindex the knowledge base into configured backends",
35
+ allow_abbrev=False,
36
+ )
37
+ parser.add_argument(
38
+ "--root",
39
+ dest="root",
40
+ default=argparse.SUPPRESS,
41
+ help="Optional path to the knowledge base root (defaults to environment configuration)",
42
+ )
43
+ add_chroma_arguments(parser)
44
+ return parser
45
+
46
+
47
+ def run_reindex(arguments: Iterable[str] | None = None) -> int:
48
+ """Execute a reindex run across all registered ingestors.
49
+
50
+ The function constructs a :class:`~mcp_kb.knowledge.store.KnowledgeBase`
51
+ using the same root resolution logic as the server, builds any enabled
52
+ ingestion listeners from CLI options, and invokes ``reindex`` on those that
53
+ implement the optional protocol. Configuration precedence mirrors the main
54
+ server: command-line arguments override environment variables, which in
55
+ turn override the last persisted configuration snapshot. The resolved
56
+ mapping is written back to disk so the next invocation inherits the same
57
+ defaults.
58
+
59
+ Parameters
60
+ ----------
61
+ arguments:
62
+ Optional iterable of command-line arguments, primarily used by tests.
63
+
64
+ Returns
65
+ -------
66
+ int
67
+ The total number of documents processed across all reindex-capable
68
+ listeners.
69
+ """
70
+
71
+ parser = _build_argument_parser()
72
+ options = parser.parse_args(arguments)
73
+ root_path = resolve_knowledge_base_root(getattr(options, "root", None))
74
+
75
+ persisted_config = load_runtime_configuration(root_path)
76
+ resolved_config = apply_cli_runtime_configuration(
77
+ options,
78
+ root=root_path,
79
+ persisted=persisted_config,
80
+ )
81
+ rules = PathRules(root=root_path, protected_folders=(DATA_FOLDER_NAME,))
82
+ kb = KnowledgeBase(rules)
83
+
84
+ listeners: List[KnowledgeBaseReindexListener] = []
85
+ try:
86
+ chroma = build_chroma_listener(options, root_path)
87
+ except Exception as exc: # pragma: no cover - configuration errors
88
+ logger.exception(exc)
89
+ raise SystemExit(f"Failed to configure Chroma ingestion: {exc}") from exc
90
+ if chroma is not None and isinstance(chroma, KnowledgeBaseReindexListener):
91
+ listeners.append(chroma)
92
+
93
+ total = 0
94
+ for listener in listeners:
95
+ logger.info("Reindexing via %s", listener.__class__.__name__)
96
+ count = listener.reindex(kb)
97
+ logger.info("Reindexed %d documents via %s", count, listener.__class__.__name__)
98
+ total += count
99
+
100
+ persist_runtime_configuration(root_path, resolved_config)
101
+
102
+ return total
103
+
104
+
105
+ def main() -> None:
106
+ """CLI hook that executes :func:`run_reindex` and prints a summary."""
107
+
108
+ total = run_reindex()
109
+ print(f"Reindexed {total} documents")
110
+
111
+
112
+ if __name__ == "__main__":
113
+ main()