mcp-kb 0.3.1__py3-none-any.whl → 0.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1 @@
1
+ """Knowledge layer that encapsulates content storage and search helpers."""
@@ -0,0 +1,44 @@
1
+ """Bootstrap helpers executed during server startup."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import importlib.resources as resources
6
+ from pathlib import Path
7
+
8
+ from mcp_kb.config import DATA_FOLDER_NAME, DOC_FILENAME
9
+
10
+
11
+ def install_default_documentation(root: Path) -> Path:
12
+ """Ensure the default documentation file exists under ``root``.
13
+
14
+ The function creates the documentation directory if necessary and copies the
15
+ packaged ``KNOWLEDBASE_DOC.md`` file into place. Existing documentation is
16
+ preserved so that operators can customize the file without losing changes on
17
+ subsequent startups.
18
+
19
+ Parameters
20
+ ----------
21
+ root:
22
+ Absolute path representing the knowledge base root directory.
23
+
24
+ Returns
25
+ -------
26
+ Path
27
+ Path to the documentation file inside the knowledge base tree.
28
+ """
29
+
30
+ docs_dir = root / DATA_FOLDER_NAME
31
+ doc_path = docs_dir / DOC_FILENAME
32
+ if doc_path.exists():
33
+ return doc_path
34
+
35
+ docs_dir.mkdir(parents=True, exist_ok=True)
36
+
37
+ with (
38
+ resources.files("mcp_kb.data")
39
+ .joinpath("KNOWLEDBASE_DOC.md")
40
+ .open("r", encoding="utf-8") as source
41
+ ):
42
+ doc_path.write_text(source.read(), encoding="utf-8")
43
+
44
+ return doc_path
@@ -0,0 +1,105 @@
1
+ """Change event types and listener contracts for knowledge base updates.
2
+
3
+ The knowledge base emits high-level events whenever a markdown document is
4
+ created, updated, or soft deleted. Downstream components can subscribe to these
5
+ notifications to implement side effects such as vector database ingestion without
6
+ coupling the core filesystem logic to specific backends. Each event captures both
7
+ absolute and knowledge-base-relative paths so that listeners can decide which
8
+ identifier best fits their storage requirements.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ from ast import Tuple
14
+ from pathlib import Path
15
+ from typing import Optional, Protocol, runtime_checkable, TYPE_CHECKING, Dict, Any
16
+ from pydantic import BaseModel, model_validator
17
+
18
+ if TYPE_CHECKING: # pragma: no cover - type hints only
19
+ from typing import List
20
+
21
+ from mcp_kb.knowledge.store import KnowledgeBase
22
+
23
+
24
+ class FileUpsertEvent(BaseModel):
25
+ """Describes a document that was created or updated inside the knowledge base.
26
+
27
+ Attributes
28
+ ----------
29
+ path:
30
+ Path relative to the configured knowledge base root. This identifier is
31
+ stable across restarts and makes for concise IDs in downstream systems.
32
+ content:
33
+ Full markdown content of the updated document at the time the event was
34
+ emitted. Listeners can avoid re-reading the file when they only need the
35
+ text payload.
36
+ """
37
+ path: str
38
+ content: str
39
+
40
+ # make sure path is a string
41
+ @model_validator(mode="before")
42
+ @classmethod
43
+ def check_path(cls, values: dict) -> dict:
44
+ if isinstance(values["path"], Path):
45
+ values["path"] = str(values["path"])
46
+ return values
47
+
48
+
49
+ class FileDeleteEvent(BaseModel):
50
+ """Signals that a document has been soft deleted according to PRD semantics.
51
+
52
+ Attributes
53
+ ----------
54
+ path:
55
+ Original knowledge-base-relative path before soft deletion. Downstream
56
+ systems should remove entries keyed by this relative path to stay in
57
+ sync with the knowledge base state.
58
+ """
59
+ path: str
60
+
61
+ # make sure path is a string
62
+ @model_validator(mode="before")
63
+ @classmethod
64
+ def check_path(cls, values: dict) -> dict:
65
+ if isinstance(values["path"], Path):
66
+ values["path"] = str(values["path"])
67
+ return values
68
+
69
+
70
+ class KnowledgeBaseListener(Protocol):
71
+ """Interface for components that react to knowledge base change events."""
72
+
73
+ def handle_upsert(self, event: FileUpsertEvent) -> None:
74
+ """Persist changes triggered by a document creation or update event."""
75
+
76
+ def handle_delete(self, event: FileDeleteEvent) -> None:
77
+ """Process the removal of a previously ingested document."""
78
+
79
+
80
+ @runtime_checkable
81
+ class KnowledgeBaseSearchListener(Protocol):
82
+ """Optional extension that allows listeners to service search requests."""
83
+
84
+ def search(
85
+ self,
86
+ kb: "KnowledgeBase",
87
+ query: str,
88
+ *,
89
+ context_lines: int = 2,
90
+ limit: Optional[int] = None,
91
+ ) -> "Tuple[List[FileSegment], Dict[str, Any]]":
92
+ """Return semantic search matches for ``query`` or an empty list."""
93
+
94
+
95
+ @runtime_checkable
96
+ class KnowledgeBaseReindexListener(Protocol):
97
+ """Optional extension that allows listeners to perform full reindexing.
98
+
99
+ Implementations can expose a ``reindex`` method to rebuild any external
100
+ indexes from the current state of the knowledge base. The method should be
101
+ idempotent and safe to run multiple times.
102
+ """
103
+
104
+ def reindex(self, kb: "KnowledgeBase") -> int:
105
+ """Rebuild indexes, returning the number of documents processed."""
@@ -0,0 +1,177 @@
1
+ """Search utilities that operate on the knowledge base filesystem.
2
+
3
+ The functions in this module are separate from ``KnowledgeBase`` so that they
4
+ can evolve independently. Search often benefits from dedicated caching or
5
+ indexing strategies; keeping it in its own module means the server can swap the
6
+ implementation later without changing the core file lifecycle API.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from pathlib import Path
12
+ from typing import Dict, Iterable, List, Optional, Literal, Any, Tuple
13
+
14
+ from mcp_kb.config import DATA_FOLDER_NAME, DOC_FILENAME
15
+ from mcp_kb.knowledge.events import KnowledgeBaseSearchListener
16
+ from mcp_kb.knowledge.store import KnowledgeBase, FileSegment
17
+ from pydantic import BaseModel
18
+
19
+
20
+ def search_text(
21
+ kb: KnowledgeBase,
22
+ query: str,
23
+ context_lines: int = 2,
24
+ *,
25
+ providers: Iterable[KnowledgeBaseSearchListener] | None = None,
26
+ n_results: Optional[int] = None,
27
+ ) -> Tuple[List[FileSegment], Dict[str, Any]]:
28
+ """Search for ``query`` in all non-deleted knowledge base files.
29
+
30
+ Parameters
31
+ ----------
32
+ kb:
33
+ Active knowledge base instance used to iterate over files.
34
+ query:
35
+ Literal string that should be located within the files. The helper does
36
+ not treat the query as a regular expression to avoid surprising matches
37
+ when characters such as ``*`` appear in user input.
38
+ context_lines:
39
+ Number of lines to include before and after each match. Defaults to two
40
+ lines, aligning with the PRD's requirement for contextual snippets.
41
+ providers:
42
+ Optional iterable of listeners capable of serving semantic search
43
+ results. Providers are consulted in order and the first non-empty
44
+ response is returned to the caller. When no provider produces results the
45
+ function falls back to a filesystem scan.
46
+ n_results:
47
+ Maximum number of matches to return. ``None`` keeps the legacy behaviour
48
+ of returning every match discovered on disk.
49
+
50
+ Returns
51
+ -------
52
+ list[FileSegment]
53
+ Ordered list of matches. Each match contains the absolute path, the
54
+ one-based line number where the query was found, and the extracted
55
+ context lines.
56
+ """
57
+
58
+ all_matches: List[FileSegment] = []
59
+ all_meta: Dict[str, Any] = {}
60
+ for provider in providers or ():
61
+ try:
62
+ matches,meta = provider.search(
63
+ kb,
64
+ query,
65
+ context_lines=context_lines,
66
+ limit=n_results,
67
+ )
68
+ except Exception as exc: # pragma: no cover - defensive path
69
+ raise RuntimeError(f"Search provider {provider!r} failed: {exc}") from exc
70
+ if matches:
71
+ all_matches.extend(matches)
72
+ all_meta.update(meta)
73
+
74
+ all_matches.extend(_search_by_scanning(kb, query, context_lines, n_results))
75
+ for match in all_matches:
76
+ match.assert_path(kb.rules)
77
+ return all_matches,all_meta
78
+
79
+
80
+ def _search_by_scanning(
81
+ kb: KnowledgeBase,
82
+ query: str,
83
+ context_lines: int,
84
+ n_results: Optional[int],
85
+ ) -> List[FileSegment]:
86
+ """Return search matches by scanning files on disk."""
87
+
88
+ matches: List[FileSegment] = []
89
+ for path in kb.iter_active_files():
90
+ matches.extend(_extract_matches_for_path(path, query, context_lines))
91
+ if n_results is not None and len(matches) >= n_results:
92
+ return matches[:n_results]
93
+ return matches
94
+
95
+
96
+ def _build_tree(paths: List[List[str]]) -> Dict[str, Dict]:
97
+ """Construct a nested dictionary representing the directory tree."""
98
+
99
+ tree: Dict[str, Dict] = {}
100
+ for parts in paths:
101
+ current = tree
102
+ for part in parts:
103
+ current = current.setdefault(part, {})
104
+ return tree
105
+
106
+
107
+ def _flatten_tree(tree: Dict[str, Dict], prefix: str = " ") -> List[str]:
108
+ """Convert a nested dictionary tree into indented lines."""
109
+
110
+ lines: List[str] = []
111
+ for name in sorted(tree.keys()):
112
+ lines.append(f"{prefix}- {name}")
113
+ lines.extend(_flatten_tree(tree[name], prefix + " "))
114
+ return lines
115
+
116
+
117
+ def build_tree_overview(kb: KnowledgeBase) -> str:
118
+ """Produce a textual tree showing the structure of the knowledge base.
119
+
120
+ The output intentionally mirrors a simplified ``tree`` command but remains
121
+ deterministic across operating systems by controlling ordering and
122
+ indentation.
123
+ """
124
+
125
+ paths = [
126
+ list(path.relative_to(kb.rules.root).parts) for path in kb.iter_active_files()
127
+ ]
128
+ tree = _build_tree(paths)
129
+ lines = []
130
+ lines.extend(_flatten_tree(tree,prefix=""))
131
+ return "\n".join(lines)
132
+
133
+
134
+ def read_documentation(kb: KnowledgeBase) -> str:
135
+ """Return documentation content if the canonical file exists.
136
+
137
+ The helper intentionally performs no access control checks because read
138
+ operations are always permitted, even for the protected documentation
139
+ folder.
140
+ """
141
+
142
+ doc_path = kb.rules.root / DATA_FOLDER_NAME / DOC_FILENAME
143
+ if not doc_path.exists():
144
+ return ""
145
+ return doc_path.read_text(encoding="utf-8")
146
+
147
+
148
+ def _extract_matches_for_path(
149
+ path: Path, query: str, context_lines: int
150
+ ) -> List[FileSegment]:
151
+ """Read ``path`` and return every match that contains ``query``."""
152
+
153
+ lines = path.read_text(encoding="utf-8").splitlines()
154
+ return _extract_matches_from_lines(path, lines, query, context_lines)
155
+
156
+
157
+ def _extract_matches_from_lines(
158
+ path: Path,
159
+ lines: List[str],
160
+ query: str,
161
+ context_lines: int,
162
+ ) -> List[FileSegment]:
163
+ """Return matches using the provided ``lines`` buffer."""
164
+
165
+ matches: List[FileSegment] = []
166
+ for index, line in enumerate(lines, start=1):
167
+ if query in line:
168
+ start = max(0, index - context_lines - 1)
169
+ end = min(len(lines), index + context_lines)
170
+ context = '\n'.join(lines[start:end])
171
+ matches.append(FileSegment(
172
+ path=path, start_line=start, end_line=end, content=context))
173
+ return matches
174
+
175
+ __all__ = [
176
+ "search_text",
177
+ ]
@@ -0,0 +1,294 @@
1
+ """Core knowledge base operations for file lifecycle management.
2
+
3
+ This module exposes the ``KnowledgeBase`` class, which orchestrates validated
4
+ filesystem operations for the MCP server. The class encapsulates logic for
5
+ creating, reading, appending, and modifying text files while respecting the
6
+ security constraints defined in the PRD. Each method returns plain Python data
7
+ structures so that higher-level layers (e.g., JSON-RPC handlers) can focus on
8
+ protocol serialization rather than filesystem minutiae.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import re
14
+ from pathlib import Path
15
+ from typing import Iterable, Optional, Union
16
+
17
+ from mcp_kb.config import DELETE_SENTINEL, DATA_FOLDER_NAME
18
+ from mcp_kb.knowledge.events import (
19
+ FileDeleteEvent,
20
+ FileUpsertEvent,
21
+ KnowledgeBaseListener,
22
+ )
23
+ from mcp_kb.security.path_validation import (
24
+ PathRules,
25
+ ensure_write_allowed,
26
+ normalize_path,
27
+ )
28
+ from mcp_kb.utils.filesystem import (
29
+ FileLockRegistry,
30
+ append_text,
31
+ ensure_parent_directory,
32
+ read_text,
33
+ rename,
34
+ write_text,
35
+ )
36
+
37
+
38
+ from pydantic import BaseModel, model_validator
39
+
40
+
41
+ class FileSegment(BaseModel):
42
+ """Represents a snippet of file content returned to MCP clients.
43
+
44
+ The model captures a ``path`` (relative to the knowledge base root)
45
+ along with one-based ``start_line`` and ``end_line`` indices and the
46
+ extracted text ``content``. Using a Pydantic model makes structured output and
47
+ validation consistent across API layers.
48
+ """
49
+
50
+ path: str
51
+ start_line: int
52
+ end_line: int
53
+ content: str
54
+
55
+ @model_validator(mode="before")
56
+ @classmethod
57
+ def check_path(cls, values: dict) -> dict:
58
+ if isinstance(values["path"], Path):
59
+ values["path"] = str(values["path"])
60
+ return values
61
+
62
+ def assert_path(self,rules: PathRules) -> None:
63
+ rel_path = Path(self.path)
64
+ if not rel_path.is_absolute():
65
+ abspath = rules.root / rel_path
66
+ else:
67
+ abspath = rel_path
68
+ # make sure the relative path is inside the knowledge base root
69
+ if not abspath.is_relative_to(rules.root):
70
+ raise ValueError(f"Relative path {rel_path} is not in the knowledge base root")
71
+ # make sure the relative path is not in the protected folders
72
+ self.path = str(abspath.relative_to(rules.root))
73
+
74
+
75
+ class KnowledgeBase:
76
+ """High-level API that executes validated knowledge base operations.
77
+
78
+ The class is intentionally stateless aside from the path rules and lock
79
+ registry. Stateless methods make this component easy to reuse across tests
80
+ and potential future transports. Locking responsibilities are scoped to the
81
+ knowledge base to keep write safety consistent across entry points.
82
+ """
83
+
84
+ def __init__(
85
+ self,
86
+ rules: PathRules,
87
+ lock_registry: FileLockRegistry | None = None,
88
+ listeners: Iterable[KnowledgeBaseListener] | None = None,
89
+ ) -> None:
90
+ """Initialize the knowledge base with path rules and optional locks.
91
+
92
+ Parameters
93
+ ----------
94
+ rules:
95
+ Active path rules that govern which paths are safe to touch.
96
+ lock_registry:
97
+ Optional ``FileLockRegistry`` allowing tests to inject deterministic
98
+ locking behavior. A new registry is created when omitted.
99
+ listeners:
100
+ Optional iterable of callback objects that subscribe to change
101
+ events. Each listener must implement the
102
+ :class:`~mcp_kb.knowledge.events.KnowledgeBaseListener` protocol.
103
+ Events are dispatched synchronously after filesystem operations
104
+ succeed, which allows callers to maintain eventual consistency with
105
+ external systems such as vector databases.
106
+ """
107
+
108
+ self.rules = rules
109
+ self.locks = lock_registry or FileLockRegistry()
110
+ self.listeners = tuple(listeners or ())
111
+
112
+ def create_file(self, path: Union[str, Path], content: str) -> Path:
113
+ """Create or overwrite a text file at ``path``.
114
+
115
+ The method validates the path, ensures that the parent directory exists,
116
+ and writes the provided content as UTF-8 text. Existing files are
117
+ overwritten to match the PRD, which views creation as setting the file
118
+ contents.
119
+ """
120
+
121
+ normalized = normalize_path(path, self.rules)
122
+ ensure_write_allowed(normalized, self.rules)
123
+ ensure_parent_directory(normalized)
124
+ with self.locks.acquire(normalized):
125
+ write_text(normalized, content)
126
+ self._notify_upsert(self._path(normalized), content)
127
+ return normalized
128
+
129
+ def read_file(
130
+ self,
131
+ path: Union[str, Path],
132
+ start_line: Optional[int] = None,
133
+ end_line: Optional[int] = None,
134
+ ) -> FileSegment:
135
+ """Read content from ``path`` optionally constraining lines.
136
+
137
+ Parameters
138
+ ----------
139
+ path:
140
+ Target file path relative to the knowledge base root.
141
+ start_line:
142
+ Zero- based index for the first line to include. ``None`` means start
143
+ from the beginning of the file.
144
+ end_line:
145
+ Zero-based index signaling the last line to include. ``None`` means
146
+ include content through the end of the file.
147
+ """
148
+
149
+ normalized = normalize_path(path, self.rules)
150
+ full_content = read_text(normalized)
151
+ lines = full_content.splitlines()
152
+
153
+ if start_line is None and end_line is None:
154
+ segment_content = full_content
155
+ actual_start = 0
156
+ actual_end = len(lines)-1
157
+ else:
158
+ actual_start = start_line or 0
159
+ actual_end = end_line or len(lines)-1
160
+ if actual_start < 0 or actual_end < actual_start:
161
+ raise ValueError("Invalid line interval requested")
162
+ selected = lines[actual_start : actual_end + 1]
163
+ segment_content = "\n".join(selected)
164
+
165
+ return FileSegment(
166
+ path=normalized,
167
+ start_line=actual_start,
168
+ end_line=actual_end,
169
+ content=segment_content,
170
+ )
171
+
172
+ def append_file(self, path: Union[str, Path], content: str) -> Path:
173
+ """Append ``content`` to the file located at ``path``.
174
+
175
+ Missing files are created automatically so that append operations remain
176
+ idempotent for clients.
177
+ """
178
+
179
+ normalized = normalize_path(path, self.rules)
180
+ ensure_write_allowed(normalized, self.rules)
181
+ ensure_parent_directory(normalized)
182
+ with self.locks.acquire(normalized):
183
+ if not normalized.exists():
184
+ write_text(normalized, content)
185
+ else:
186
+ append_text(normalized, content)
187
+ updated_text = read_text(normalized)
188
+ self._notify_upsert(self._path(normalized), updated_text)
189
+ return normalized
190
+
191
+ def regex_replace(self, path: Union[str, Path], pattern: str, replacement: str) -> int:
192
+ """Perform regex replacement and return the number of substitutions."""
193
+
194
+ normalized = normalize_path(path, self.rules)
195
+ ensure_write_allowed(normalized, self.rules)
196
+ with self.locks.acquire(normalized):
197
+ text = read_text(normalized)
198
+ new_text, count = re.subn(pattern, replacement, text, flags=re.MULTILINE)
199
+ write_text(normalized, new_text)
200
+ self._notify_upsert(self._path(normalized), new_text)
201
+ return count
202
+
203
+ def soft_delete(self, path: Union[str, Path]) -> Path:
204
+ """Apply soft deletion semantics by appending the deletion sentinel."""
205
+
206
+ normalized = normalize_path(path, self.rules)
207
+ ensure_write_allowed(normalized, self.rules)
208
+ if not normalized.exists():
209
+ raise FileNotFoundError(f"File '{path}' does not exist")
210
+
211
+ target_name = f"{normalized.stem}{DELETE_SENTINEL}{normalized.suffix}"
212
+ target = normalized.with_name(target_name)
213
+ ensure_write_allowed(target, self.rules)
214
+ with self.locks.acquire(normalized):
215
+ rename(normalized, target)
216
+ original_relative = self._path(normalized)
217
+ self._notify_delete(original_relative)
218
+ return target
219
+
220
+ def total_active_files(self, include_docs: bool = False) -> int:
221
+ """Return the total number of non-deleted UTF-8 text files under the root directory."""
222
+ return sum(1 for _ in self.iter_active_files(include_docs=include_docs))
223
+
224
+ def iter_active_files(self, include_docs: bool = False) -> Iterable[Path]:
225
+ """Yield non-deleted UTF-8 text files under the root directory.
226
+
227
+ Parameters
228
+ ----------
229
+ include_docs:
230
+ When ``True`` the generator includes files located in the protected
231
+ documentation folder. By default those files are skipped to match
232
+ the search and overview requirements from the PRD.
233
+ """
234
+
235
+ from mcp_kb.utils.filesystem import is_text_file
236
+
237
+ for path in self.rules.root.rglob("*"):
238
+ if not path.is_file():
239
+ continue
240
+ if DELETE_SENTINEL in path.name:
241
+ continue
242
+ parts = path.relative_to(self.rules.root).parts
243
+ if parts and parts[0] == DATA_FOLDER_NAME and not include_docs:
244
+ continue
245
+ if is_text_file(path):
246
+ yield path
247
+
248
+ def _path(self, absolute: Path) -> str:
249
+ """Return ``absolute`` rewritten relative to the knowledge base root."""
250
+
251
+ return str(absolute.relative_to(self.rules.root))
252
+
253
+ def _notify_upsert(self, relative: str, content: str) -> None:
254
+ """Dispatch an upsert event to registered listeners.
255
+
256
+ Parameters
257
+ ----------
258
+ absolute:
259
+ Fully resolved path that was modified on disk.
260
+ content:
261
+ Text payload that should be provided to subscribers.
262
+ """
263
+
264
+ if not self.listeners:
265
+ return
266
+
267
+ event = FileUpsertEvent(
268
+ path=relative,
269
+ content=content,
270
+ )
271
+ self._dispatch("handle_upsert", event)
272
+
273
+ def _notify_delete(self,relative: str) -> None:
274
+ """Dispatch a delete event to registered listeners."""
275
+
276
+ if not self.listeners:
277
+ return
278
+
279
+ event = FileDeleteEvent(path=relative)
280
+ self._dispatch("handle_delete", event)
281
+
282
+ def _dispatch(
283
+ self, method_name: str, event: FileUpsertEvent | FileDeleteEvent
284
+ ) -> None:
285
+ """Call ``method_name`` on every listener and wrap failures for clarity."""
286
+
287
+ for listener in self.listeners:
288
+ handler = getattr(listener, method_name)
289
+ try:
290
+ handler(event) # type: ignore[misc]
291
+ except Exception as exc: # pragma: no cover - defensive logging path
292
+ raise RuntimeError(
293
+ f"Knowledge base listener {listener!r} failed during {method_name}: {exc}"
294
+ ) from exc
@@ -0,0 +1 @@
1
+ """Security-related helpers such as path validation rules."""