mcp-kb 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,100 +0,0 @@
1
- """Change event types and listener contracts for knowledge base updates.
2
-
3
- The knowledge base emits high-level events whenever a markdown document is
4
- created, updated, or soft deleted. Downstream components can subscribe to these
5
- notifications to implement side effects such as vector database ingestion without
6
- coupling the core filesystem logic to specific backends. Each event captures both
7
- absolute and knowledge-base-relative paths so that listeners can decide which
8
- identifier best fits their storage requirements.
9
- """
10
- from __future__ import annotations
11
-
12
- from dataclasses import dataclass
13
- from pathlib import Path
14
- from typing import Optional, Protocol, runtime_checkable, TYPE_CHECKING
15
-
16
- if TYPE_CHECKING: # pragma: no cover - type hints only
17
- from typing import List
18
-
19
- from mcp_kb.knowledge.search import SearchMatch
20
- from mcp_kb.knowledge.store import KnowledgeBase
21
-
22
-
23
- @dataclass(frozen=True)
24
- class FileUpsertEvent:
25
- """Describes a document that was created or updated inside the knowledge base.
26
-
27
- Attributes
28
- ----------
29
- absolute_path:
30
- Fully resolved path to the document on disk. Listeners that need direct
31
- filesystem access can rely on this value to open the file again.
32
- relative_path:
33
- Path relative to the configured knowledge base root. This identifier is
34
- stable across restarts and makes for concise IDs in downstream systems.
35
- content:
36
- Full markdown content of the updated document at the time the event was
37
- emitted. Listeners can avoid re-reading the file when they only need the
38
- text payload.
39
- """
40
-
41
- absolute_path: Path
42
- relative_path: str
43
- content: str
44
-
45
-
46
- @dataclass(frozen=True)
47
- class FileDeleteEvent:
48
- """Signals that a document has been soft deleted according to PRD semantics.
49
-
50
- Attributes
51
- ----------
52
- absolute_path:
53
- Fully resolved path that now contains the soft-deleted file. The path
54
- may include the configured delete sentinel in its name.
55
- relative_path:
56
- Original knowledge-base-relative path before soft deletion. Downstream
57
- systems should remove entries keyed by this relative path to stay in
58
- sync with the knowledge base state.
59
- """
60
-
61
- absolute_path: Path
62
- relative_path: str
63
-
64
-
65
- class KnowledgeBaseListener(Protocol):
66
- """Interface for components that react to knowledge base change events."""
67
-
68
- def handle_upsert(self, event: FileUpsertEvent) -> None:
69
- """Persist changes triggered by a document creation or update event."""
70
-
71
- def handle_delete(self, event: FileDeleteEvent) -> None:
72
- """Process the removal of a previously ingested document."""
73
-
74
-
75
- @runtime_checkable
76
- class KnowledgeBaseSearchListener(Protocol):
77
- """Optional extension that allows listeners to service search requests."""
78
-
79
- def search(
80
- self,
81
- kb: "KnowledgeBase",
82
- query: str,
83
- *,
84
- context_lines: int = 2,
85
- limit: Optional[int] = None,
86
- ) -> "List[SearchMatch]":
87
- """Return semantic search matches for ``query`` or an empty list."""
88
-
89
-
90
- @runtime_checkable
91
- class KnowledgeBaseReindexListener(Protocol):
92
- """Optional extension that allows listeners to perform full reindexing.
93
-
94
- Implementations can expose a ``reindex`` method to rebuild any external
95
- indexes from the current state of the knowledge base. The method should be
96
- idempotent and safe to run multiple times.
97
- """
98
-
99
- def reindex(self, kb: "KnowledgeBase") -> int:
100
- """Rebuild indexes, returning the number of documents processed."""
@@ -1,178 +0,0 @@
1
- """Search utilities that operate on the knowledge base filesystem.
2
-
3
- The functions in this module are separate from ``KnowledgeBase`` so that they
4
- can evolve independently. Search often benefits from dedicated caching or
5
- indexing strategies; keeping it in its own module means the server can swap the
6
- implementation later without changing the core file lifecycle API.
7
- """
8
- from __future__ import annotations
9
-
10
- from dataclasses import dataclass
11
- from pathlib import Path
12
- from typing import Dict, Iterable, List, Optional
13
-
14
- from mcp_kb.config import DATA_FOLDER_NAME, DOC_FILENAME
15
- from mcp_kb.knowledge.events import KnowledgeBaseSearchListener
16
- from mcp_kb.knowledge.store import KnowledgeBase
17
-
18
-
19
- @dataclass
20
- class SearchMatch:
21
- """Represents a search hit with surrounding context."""
22
-
23
- path: Path
24
- line_number: int
25
- context: List[str]
26
-
27
-
28
- def search_text(
29
- kb: KnowledgeBase,
30
- query: str,
31
- context_lines: int = 2,
32
- *,
33
- providers: Iterable[KnowledgeBaseSearchListener] | None = None,
34
- n_results: Optional[int] = None,
35
- ) -> List[SearchMatch]:
36
- """Search for ``query`` in all non-deleted knowledge base files.
37
-
38
- Parameters
39
- ----------
40
- kb:
41
- Active knowledge base instance used to iterate over files.
42
- query:
43
- Literal string that should be located within the files. The helper does
44
- not treat the query as a regular expression to avoid surprising matches
45
- when characters such as ``*`` appear in user input.
46
- context_lines:
47
- Number of lines to include before and after each match. Defaults to two
48
- lines, aligning with the PRD's requirement for contextual snippets.
49
- providers:
50
- Optional iterable of listeners capable of serving semantic search
51
- results. Providers are consulted in order and the first non-empty
52
- response is returned to the caller. When no provider produces results the
53
- function falls back to a filesystem scan.
54
- n_results:
55
- Maximum number of matches to return. ``None`` keeps the legacy behaviour
56
- of returning every match discovered on disk.
57
-
58
- Returns
59
- -------
60
- list[SearchMatch]
61
- Ordered list of matches. Each match contains the absolute path, the
62
- one-based line number where the query was found, and the extracted
63
- context lines.
64
- """
65
-
66
- for provider in providers or ():
67
- try:
68
- matches = provider.search(
69
- kb,
70
- query,
71
- context_lines=context_lines,
72
- limit=n_results,
73
- )
74
- except Exception as exc: # pragma: no cover - defensive path
75
- raise RuntimeError(f"Search provider {provider!r} failed: {exc}") from exc
76
- if matches:
77
- return matches
78
-
79
- return _search_by_scanning(kb, query, context_lines, n_results)
80
-
81
-
82
- def _search_by_scanning(
83
- kb: KnowledgeBase,
84
- query: str,
85
- context_lines: int,
86
- n_results: Optional[int],
87
- ) -> List[SearchMatch]:
88
- """Return search matches by scanning files on disk."""
89
-
90
- matches: List[SearchMatch] = []
91
- for path in kb.iter_active_files():
92
- matches.extend(_extract_matches_for_path(path, query, context_lines))
93
- if n_results is not None and len(matches) >= n_results:
94
- return matches[:n_results]
95
- return matches
96
-
97
-
98
- def _build_tree(paths: List[List[str]]) -> Dict[str, Dict]:
99
- """Construct a nested dictionary representing the directory tree."""
100
-
101
- tree: Dict[str, Dict] = {}
102
- for parts in paths:
103
- current = tree
104
- for part in parts:
105
- current = current.setdefault(part, {})
106
- return tree
107
-
108
-
109
- def _flatten_tree(tree: Dict[str, Dict], prefix: str = " ") -> List[str]:
110
- """Convert a nested dictionary tree into indented lines."""
111
-
112
- lines: List[str] = []
113
- for name in sorted(tree.keys()):
114
- lines.append(f"{prefix}- {name}")
115
- lines.extend(_flatten_tree(tree[name], prefix + " "))
116
- return lines
117
-
118
-
119
- def build_tree_overview(kb: KnowledgeBase) -> str:
120
- """Produce a textual tree showing the structure of the knowledge base.
121
-
122
- The output intentionally mirrors a simplified ``tree`` command but remains
123
- deterministic across operating systems by controlling ordering and
124
- indentation.
125
- """
126
-
127
- relative_paths = [
128
- list(path.relative_to(kb.rules.root).parts) for path in kb.iter_active_files()
129
- ]
130
- tree = _build_tree(relative_paths)
131
- lines = [f"{kb.rules.root.name}/"] if kb.rules.root.name else ["./"]
132
- lines.extend(_flatten_tree(tree))
133
- return "\n".join(lines)
134
-
135
-
136
- def read_documentation(kb: KnowledgeBase) -> str:
137
- """Return documentation content if the canonical file exists.
138
-
139
- The helper intentionally performs no access control checks because read
140
- operations are always permitted, even for the protected documentation
141
- folder.
142
- """
143
-
144
- doc_path = kb.rules.root / DATA_FOLDER_NAME / DOC_FILENAME
145
- if not doc_path.exists():
146
- return ""
147
- return doc_path.read_text(encoding="utf-8")
148
-
149
-
150
- def _extract_matches_for_path(path: Path, query: str, context_lines: int) -> List[SearchMatch]:
151
- """Read ``path`` and return every match that contains ``query``."""
152
-
153
- lines = path.read_text(encoding="utf-8").splitlines()
154
- return _extract_matches_from_lines(path, lines, query, context_lines)
155
-
156
-
157
- def _extract_matches_from_lines(
158
- path: Path,
159
- lines: List[str],
160
- query: str,
161
- context_lines: int,
162
- ) -> List[SearchMatch]:
163
- """Return matches using the provided ``lines`` buffer."""
164
-
165
- matches: List[SearchMatch] = []
166
- for index, line in enumerate(lines, start=1):
167
- if query in line:
168
- start = max(0, index - context_lines - 1)
169
- end = min(len(lines), index + context_lines)
170
- context = lines[start:end]
171
- matches.append(SearchMatch(path=path, line_number=index, context=context))
172
- return matches
173
-
174
-
175
- __all__ = [
176
- "SearchMatch",
177
- "search_text",
178
- ]
mcp_kb/knowledge/store.py DELETED
@@ -1,263 +0,0 @@
1
- """Core knowledge base operations for file lifecycle management.
2
-
3
- This module exposes the ``KnowledgeBase`` class, which orchestrates validated
4
- filesystem operations for the MCP server. The class encapsulates logic for
5
- creating, reading, appending, and modifying text files while respecting the
6
- security constraints defined in the PRD. Each method returns plain Python data
7
- structures so that higher-level layers (e.g., JSON-RPC handlers) can focus on
8
- protocol serialization rather than filesystem minutiae.
9
- """
10
- from __future__ import annotations
11
-
12
- import re
13
- from dataclasses import dataclass
14
- from pathlib import Path
15
- from typing import Iterable, Optional
16
-
17
- from mcp_kb.config import DELETE_SENTINEL, DATA_FOLDER_NAME
18
- from mcp_kb.knowledge.events import FileDeleteEvent, FileUpsertEvent, KnowledgeBaseListener
19
- from mcp_kb.security.path_validation import (
20
- PathRules,
21
- ensure_write_allowed,
22
- normalize_path,
23
- )
24
- from mcp_kb.utils.filesystem import (
25
- FileLockRegistry,
26
- append_text,
27
- ensure_parent_directory,
28
- read_text,
29
- rename,
30
- write_text,
31
- )
32
-
33
-
34
- @dataclass
35
- class FileSegment:
36
- """Represents a snippet of file content returned to MCP clients."""
37
-
38
- path: Path
39
- start_line: int
40
- end_line: int
41
- content: str
42
-
43
-
44
- class KnowledgeBase:
45
- """High-level API that executes validated knowledge base operations.
46
-
47
- The class is intentionally stateless aside from the path rules and lock
48
- registry. Stateless methods make this component easy to reuse across tests
49
- and potential future transports. Locking responsibilities are scoped to the
50
- knowledge base to keep write safety consistent across entry points.
51
- """
52
-
53
- def __init__(
54
- self,
55
- rules: PathRules,
56
- lock_registry: FileLockRegistry | None = None,
57
- listeners: Iterable[KnowledgeBaseListener] | None = None,
58
- ) -> None:
59
- """Initialize the knowledge base with path rules and optional locks.
60
-
61
- Parameters
62
- ----------
63
- rules:
64
- Active path rules that govern which paths are safe to touch.
65
- lock_registry:
66
- Optional ``FileLockRegistry`` allowing tests to inject deterministic
67
- locking behavior. A new registry is created when omitted.
68
- listeners:
69
- Optional iterable of callback objects that subscribe to change
70
- events. Each listener must implement the
71
- :class:`~mcp_kb.knowledge.events.KnowledgeBaseListener` protocol.
72
- Events are dispatched synchronously after filesystem operations
73
- succeed, which allows callers to maintain eventual consistency with
74
- external systems such as vector databases.
75
- """
76
-
77
- self.rules = rules
78
- self.locks = lock_registry or FileLockRegistry()
79
- self.listeners = tuple(listeners or ())
80
-
81
- def create_file(self, relative_path: str, content: str) -> Path:
82
- """Create or overwrite a text file at ``relative_path``.
83
-
84
- The method validates the path, ensures that the parent directory exists,
85
- and writes the provided content as UTF-8 text. Existing files are
86
- overwritten to match the PRD, which views creation as setting the file
87
- contents.
88
- """
89
-
90
- normalized = normalize_path(relative_path, self.rules)
91
- ensure_write_allowed(normalized, self.rules)
92
- ensure_parent_directory(normalized)
93
- with self.locks.acquire(normalized):
94
- write_text(normalized, content)
95
- self._notify_upsert(normalized, content)
96
- return normalized
97
-
98
- def read_file(
99
- self,
100
- relative_path: str,
101
- start_line: Optional[int] = None,
102
- end_line: Optional[int] = None,
103
- ) -> FileSegment:
104
- """Read content from ``relative_path`` optionally constraining lines.
105
-
106
- Parameters
107
- ----------
108
- relative_path:
109
- Target file path relative to the knowledge base root.
110
- start_line:
111
- One-based index for the first line to include. ``None`` means start
112
- from the beginning of the file.
113
- end_line:
114
- One-based index signaling the last line to include. ``None`` means
115
- include content through the end of the file.
116
- """
117
-
118
- normalized = normalize_path(relative_path, self.rules)
119
- full_content = read_text(normalized)
120
- lines = full_content.splitlines()
121
-
122
- if start_line is None and end_line is None:
123
- segment_content = full_content
124
- actual_start = 1
125
- actual_end = len(lines)
126
- else:
127
- actual_start = start_line or 1
128
- actual_end = end_line or len(lines)
129
- if actual_start < 1 or actual_end < actual_start:
130
- raise ValueError("Invalid line interval requested")
131
- selected = lines[actual_start - 1 : actual_end]
132
- segment_content = "\n".join(selected)
133
-
134
- return FileSegment(
135
- path=normalized,
136
- start_line=actual_start,
137
- end_line=actual_end,
138
- content=segment_content,
139
- )
140
-
141
- def append_file(self, relative_path: str, content: str) -> Path:
142
- """Append ``content`` to the file located at ``relative_path``.
143
-
144
- Missing files are created automatically so that append operations remain
145
- idempotent for clients.
146
- """
147
-
148
- normalized = normalize_path(relative_path, self.rules)
149
- ensure_write_allowed(normalized, self.rules)
150
- ensure_parent_directory(normalized)
151
- with self.locks.acquire(normalized):
152
- if not normalized.exists():
153
- write_text(normalized, content)
154
- else:
155
- append_text(normalized, content)
156
- updated_text = read_text(normalized)
157
- self._notify_upsert(normalized, updated_text)
158
- return normalized
159
-
160
- def regex_replace(self, relative_path: str, pattern: str, replacement: str) -> int:
161
- """Perform regex replacement and return the number of substitutions."""
162
-
163
- normalized = normalize_path(relative_path, self.rules)
164
- ensure_write_allowed(normalized, self.rules)
165
- with self.locks.acquire(normalized):
166
- text = read_text(normalized)
167
- new_text, count = re.subn(pattern, replacement, text, flags=re.MULTILINE)
168
- write_text(normalized, new_text)
169
- self._notify_upsert(normalized, new_text)
170
- return count
171
-
172
- def soft_delete(self, relative_path: str) -> Path:
173
- """Apply soft deletion semantics by appending the deletion sentinel."""
174
-
175
- normalized = normalize_path(relative_path, self.rules)
176
- ensure_write_allowed(normalized, self.rules)
177
- if not normalized.exists():
178
- raise FileNotFoundError(f"File '{relative_path}' does not exist")
179
-
180
- target_name = f"{normalized.stem}{DELETE_SENTINEL}{normalized.suffix}"
181
- target = normalized.with_name(target_name)
182
- ensure_write_allowed(target, self.rules)
183
- with self.locks.acquire(normalized):
184
- rename(normalized, target)
185
- original_relative = self._relative_path(normalized)
186
- self._notify_delete(target, original_relative)
187
- return target
188
-
189
- def total_active_files(self, include_docs: bool = False) -> int:
190
- """Return the total number of non-deleted UTF-8 text files under the root directory.
191
- """
192
- return sum(1 for _ in self.iter_active_files(include_docs=include_docs))
193
-
194
- def iter_active_files(self, include_docs: bool = False) -> Iterable[Path]:
195
- """Yield non-deleted UTF-8 text files under the root directory.
196
-
197
- Parameters
198
- ----------
199
- include_docs:
200
- When ``True`` the generator includes files located in the protected
201
- documentation folder. By default those files are skipped to match
202
- the search and overview requirements from the PRD.
203
- """
204
-
205
- from mcp_kb.utils.filesystem import is_text_file
206
-
207
- for path in self.rules.root.rglob("*"):
208
- if not path.is_file():
209
- continue
210
- if DELETE_SENTINEL in path.name:
211
- continue
212
- parts = path.relative_to(self.rules.root).parts
213
- if parts and parts[0] == DATA_FOLDER_NAME and not include_docs:
214
- continue
215
- if is_text_file(path):
216
- yield path
217
-
218
- def _relative_path(self, absolute: Path) -> str:
219
- """Return ``absolute`` rewritten relative to the knowledge base root."""
220
-
221
- return str(absolute.relative_to(self.rules.root))
222
-
223
- def _notify_upsert(self, absolute: Path, content: str) -> None:
224
- """Dispatch an upsert event to registered listeners.
225
-
226
- Parameters
227
- ----------
228
- absolute:
229
- Fully resolved path that was modified on disk.
230
- content:
231
- Text payload that should be provided to subscribers.
232
- """
233
-
234
- if not self.listeners:
235
- return
236
-
237
- event = FileUpsertEvent(
238
- absolute_path=absolute,
239
- relative_path=self._relative_path(absolute),
240
- content=content,
241
- )
242
- self._dispatch("handle_upsert", event)
243
-
244
- def _notify_delete(self, absolute: Path, relative: str) -> None:
245
- """Dispatch a delete event to registered listeners."""
246
-
247
- if not self.listeners:
248
- return
249
-
250
- event = FileDeleteEvent(absolute_path=absolute, relative_path=relative)
251
- self._dispatch("handle_delete", event)
252
-
253
- def _dispatch(self, method_name: str, event: FileUpsertEvent | FileDeleteEvent) -> None:
254
- """Call ``method_name`` on every listener and wrap failures for clarity."""
255
-
256
- for listener in self.listeners:
257
- handler = getattr(listener, method_name)
258
- try:
259
- handler(event) # type: ignore[misc]
260
- except Exception as exc: # pragma: no cover - defensive logging path
261
- raise RuntimeError(
262
- f"Knowledge base listener {listener!r} failed during {method_name}: {exc}"
263
- ) from exc
@@ -1 +0,0 @@
1
- """Security-related helpers such as path validation rules."""
@@ -1,105 +0,0 @@
1
- """Path validation utilities to protect the knowledge base filesystem.
2
-
3
- This module implements reusable helpers that ensure every file operation stays
4
- inside the configured knowledge base root. The checks defend against directory
5
- traversal attempts (".." components), accidental absolute paths, and writes
6
- that target the reserved documentation folder. The helper functions are written
7
- so they can be reused both by the server runtime and by unit tests to keep the
8
- security rules consistent.
9
- """
10
- from __future__ import annotations
11
-
12
- from dataclasses import dataclass
13
- from pathlib import Path
14
- from typing import Iterable
15
-
16
- from mcp_kb.config import DATA_FOLDER_NAME, DELETE_SENTINEL
17
-
18
-
19
- class PathValidationError(ValueError):
20
- """Error raised when a path fails validation rules.
21
-
22
- The server treats any instance of this exception as a client error: callers
23
- attempted to access a disallowed path. Raising a dedicated subclass of
24
- ``ValueError`` enables precise error handling and cleaner unit tests.
25
- """
26
-
27
-
28
- @dataclass(frozen=True)
29
- class PathRules:
30
- """Container for server-specific path constraints.
31
-
32
- Attributes
33
- ----------
34
- root:
35
- Absolute path that represents the root of the knowledge base. All file
36
- operations must remain inside this directory tree.
37
- protected_folders:
38
- Iterable of folder names that are protected against mutations. The
39
- server uses this to forbid modifications to the documentation folder
40
- while still allowing read operations.
41
- """
42
-
43
- root: Path
44
- protected_folders: Iterable[str]
45
-
46
-
47
- def normalize_path(candidate: str, rules: PathRules) -> Path:
48
- """Normalize a relative path and ensure it stays inside the root.
49
-
50
- Parameters
51
- ----------
52
- candidate:
53
- The user-provided path, typically originating from an MCP tool request.
54
- rules:
55
- The active ``PathRules`` instance describing allowed operations.
56
-
57
- Returns
58
- -------
59
- Path
60
- A fully-resolved path that is guaranteed to be inside the root
61
- directory.
62
-
63
- Raises
64
- ------
65
- PathValidationError
66
- If the candidate path is absolute, attempts traversal outside the root,
67
- or resolves to a location that is not within the permitted tree.
68
- """
69
-
70
- path_obj = Path(candidate)
71
- if path_obj.is_absolute():
72
- raise PathValidationError("Absolute paths are not permitted inside the knowledge base")
73
-
74
- normalized = (rules.root / path_obj).resolve()
75
- try:
76
- normalized.relative_to(rules.root)
77
- except ValueError as exc:
78
- raise PathValidationError("Path resolves outside the knowledge base root") from exc
79
-
80
- if DELETE_SENTINEL in normalized.name:
81
- raise PathValidationError("Operations on soft-deleted files are not permitted")
82
-
83
- return normalized
84
-
85
-
86
- def ensure_write_allowed(path: Path, rules: PathRules) -> None:
87
- """Validate that a path resides outside protected folders before writing.
88
-
89
- The function raises a ``PathValidationError`` when the path is located
90
- inside one of the configured protected folders. Read operations can still
91
- access those directories by skipping this check.
92
-
93
- Parameters
94
- ----------
95
- path:
96
- The already-normalized absolute path that will be used for writing.
97
- rules:
98
- The active ``PathRules`` instance describing allowed operations.
99
- """
100
-
101
- relative_parts = path.relative_to(rules.root).parts
102
- if relative_parts and relative_parts[0] in set(rules.protected_folders):
103
- raise PathValidationError(
104
- f"Writes are not allowed inside the protected folder '{relative_parts[0]}'"
105
- )
mcp_kb/server/__init__.py DELETED
@@ -1 +0,0 @@
1
- """Server subpackage powering the FastMCP-based knowledge base server."""