mcp-kb 0.3.1__py3-none-any.whl → 0.3.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mcp_kb/cli/__init__.py +1 -0
- mcp_kb/cli/args.py +175 -0
- mcp_kb/cli/main.py +181 -0
- mcp_kb/cli/reindex.py +113 -0
- mcp_kb/cli/runtime_config.py +421 -0
- mcp_kb/data/KNOWLEDBASE_DOC.md +151 -0
- mcp_kb/data/__init__.py +1 -0
- mcp_kb/ingest/__init__.py +1 -0
- mcp_kb/ingest/chroma.py +1287 -0
- mcp_kb/knowledge/__init__.py +1 -0
- mcp_kb/knowledge/bootstrap.py +44 -0
- mcp_kb/knowledge/events.py +105 -0
- mcp_kb/knowledge/search.py +177 -0
- mcp_kb/knowledge/store.py +294 -0
- mcp_kb/security/__init__.py +1 -0
- mcp_kb/security/path_validation.py +108 -0
- mcp_kb/server/__init__.py +1 -0
- mcp_kb/server/app.py +201 -0
- mcp_kb/ui/__init__.py +17 -0
- mcp_kb/ui/api.py +377 -0
- mcp_kb/ui/assets/assets/index.css +1 -0
- mcp_kb/ui/assets/index.html +62 -0
- mcp_kb/ui/server.py +332 -0
- mcp_kb/utils/__init__.py +1 -0
- mcp_kb/utils/filesystem.py +128 -0
- mcp_kb-0.3.3.dist-info/METADATA +338 -0
- mcp_kb-0.3.3.dist-info/RECORD +32 -0
- mcp_kb-0.3.1.dist-info/METADATA +0 -181
- mcp_kb-0.3.1.dist-info/RECORD +0 -7
- {mcp_kb-0.3.1.dist-info → mcp_kb-0.3.3.dist-info}/WHEEL +0 -0
- {mcp_kb-0.3.1.dist-info → mcp_kb-0.3.3.dist-info}/entry_points.txt +0 -0
- {mcp_kb-0.3.1.dist-info → mcp_kb-0.3.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1 @@
|
|
1
|
+
"""Knowledge layer that encapsulates content storage and search helpers."""
|
@@ -0,0 +1,44 @@
|
|
1
|
+
"""Bootstrap helpers executed during server startup."""
|
2
|
+
|
3
|
+
from __future__ import annotations
|
4
|
+
|
5
|
+
import importlib.resources as resources
|
6
|
+
from pathlib import Path
|
7
|
+
|
8
|
+
from mcp_kb.config import DATA_FOLDER_NAME, DOC_FILENAME
|
9
|
+
|
10
|
+
|
11
|
+
def install_default_documentation(root: Path) -> Path:
|
12
|
+
"""Ensure the default documentation file exists under ``root``.
|
13
|
+
|
14
|
+
The function creates the documentation directory if necessary and copies the
|
15
|
+
packaged ``KNOWLEDBASE_DOC.md`` file into place. Existing documentation is
|
16
|
+
preserved so that operators can customize the file without losing changes on
|
17
|
+
subsequent startups.
|
18
|
+
|
19
|
+
Parameters
|
20
|
+
----------
|
21
|
+
root:
|
22
|
+
Absolute path representing the knowledge base root directory.
|
23
|
+
|
24
|
+
Returns
|
25
|
+
-------
|
26
|
+
Path
|
27
|
+
Path to the documentation file inside the knowledge base tree.
|
28
|
+
"""
|
29
|
+
|
30
|
+
docs_dir = root / DATA_FOLDER_NAME
|
31
|
+
doc_path = docs_dir / DOC_FILENAME
|
32
|
+
if doc_path.exists():
|
33
|
+
return doc_path
|
34
|
+
|
35
|
+
docs_dir.mkdir(parents=True, exist_ok=True)
|
36
|
+
|
37
|
+
with (
|
38
|
+
resources.files("mcp_kb.data")
|
39
|
+
.joinpath("KNOWLEDBASE_DOC.md")
|
40
|
+
.open("r", encoding="utf-8") as source
|
41
|
+
):
|
42
|
+
doc_path.write_text(source.read(), encoding="utf-8")
|
43
|
+
|
44
|
+
return doc_path
|
@@ -0,0 +1,105 @@
|
|
1
|
+
"""Change event types and listener contracts for knowledge base updates.
|
2
|
+
|
3
|
+
The knowledge base emits high-level events whenever a markdown document is
|
4
|
+
created, updated, or soft deleted. Downstream components can subscribe to these
|
5
|
+
notifications to implement side effects such as vector database ingestion without
|
6
|
+
coupling the core filesystem logic to specific backends. Each event captures both
|
7
|
+
absolute and knowledge-base-relative paths so that listeners can decide which
|
8
|
+
identifier best fits their storage requirements.
|
9
|
+
"""
|
10
|
+
|
11
|
+
from __future__ import annotations
|
12
|
+
|
13
|
+
from ast import Tuple
|
14
|
+
from pathlib import Path
|
15
|
+
from typing import Optional, Protocol, runtime_checkable, TYPE_CHECKING, Dict, Any
|
16
|
+
from pydantic import BaseModel, model_validator
|
17
|
+
|
18
|
+
if TYPE_CHECKING: # pragma: no cover - type hints only
|
19
|
+
from typing import List
|
20
|
+
|
21
|
+
from mcp_kb.knowledge.store import KnowledgeBase
|
22
|
+
|
23
|
+
|
24
|
+
class FileUpsertEvent(BaseModel):
|
25
|
+
"""Describes a document that was created or updated inside the knowledge base.
|
26
|
+
|
27
|
+
Attributes
|
28
|
+
----------
|
29
|
+
path:
|
30
|
+
Path relative to the configured knowledge base root. This identifier is
|
31
|
+
stable across restarts and makes for concise IDs in downstream systems.
|
32
|
+
content:
|
33
|
+
Full markdown content of the updated document at the time the event was
|
34
|
+
emitted. Listeners can avoid re-reading the file when they only need the
|
35
|
+
text payload.
|
36
|
+
"""
|
37
|
+
path: str
|
38
|
+
content: str
|
39
|
+
|
40
|
+
# make sure path is a string
|
41
|
+
@model_validator(mode="before")
|
42
|
+
@classmethod
|
43
|
+
def check_path(cls, values: dict) -> dict:
|
44
|
+
if isinstance(values["path"], Path):
|
45
|
+
values["path"] = str(values["path"])
|
46
|
+
return values
|
47
|
+
|
48
|
+
|
49
|
+
class FileDeleteEvent(BaseModel):
|
50
|
+
"""Signals that a document has been soft deleted according to PRD semantics.
|
51
|
+
|
52
|
+
Attributes
|
53
|
+
----------
|
54
|
+
path:
|
55
|
+
Original knowledge-base-relative path before soft deletion. Downstream
|
56
|
+
systems should remove entries keyed by this relative path to stay in
|
57
|
+
sync with the knowledge base state.
|
58
|
+
"""
|
59
|
+
path: str
|
60
|
+
|
61
|
+
# make sure path is a string
|
62
|
+
@model_validator(mode="before")
|
63
|
+
@classmethod
|
64
|
+
def check_path(cls, values: dict) -> dict:
|
65
|
+
if isinstance(values["path"], Path):
|
66
|
+
values["path"] = str(values["path"])
|
67
|
+
return values
|
68
|
+
|
69
|
+
|
70
|
+
class KnowledgeBaseListener(Protocol):
|
71
|
+
"""Interface for components that react to knowledge base change events."""
|
72
|
+
|
73
|
+
def handle_upsert(self, event: FileUpsertEvent) -> None:
|
74
|
+
"""Persist changes triggered by a document creation or update event."""
|
75
|
+
|
76
|
+
def handle_delete(self, event: FileDeleteEvent) -> None:
|
77
|
+
"""Process the removal of a previously ingested document."""
|
78
|
+
|
79
|
+
|
80
|
+
@runtime_checkable
|
81
|
+
class KnowledgeBaseSearchListener(Protocol):
|
82
|
+
"""Optional extension that allows listeners to service search requests."""
|
83
|
+
|
84
|
+
def search(
|
85
|
+
self,
|
86
|
+
kb: "KnowledgeBase",
|
87
|
+
query: str,
|
88
|
+
*,
|
89
|
+
context_lines: int = 2,
|
90
|
+
limit: Optional[int] = None,
|
91
|
+
) -> "Tuple[List[FileSegment], Dict[str, Any]]":
|
92
|
+
"""Return semantic search matches for ``query`` or an empty list."""
|
93
|
+
|
94
|
+
|
95
|
+
@runtime_checkable
|
96
|
+
class KnowledgeBaseReindexListener(Protocol):
|
97
|
+
"""Optional extension that allows listeners to perform full reindexing.
|
98
|
+
|
99
|
+
Implementations can expose a ``reindex`` method to rebuild any external
|
100
|
+
indexes from the current state of the knowledge base. The method should be
|
101
|
+
idempotent and safe to run multiple times.
|
102
|
+
"""
|
103
|
+
|
104
|
+
def reindex(self, kb: "KnowledgeBase") -> int:
|
105
|
+
"""Rebuild indexes, returning the number of documents processed."""
|
@@ -0,0 +1,177 @@
|
|
1
|
+
"""Search utilities that operate on the knowledge base filesystem.
|
2
|
+
|
3
|
+
The functions in this module are separate from ``KnowledgeBase`` so that they
|
4
|
+
can evolve independently. Search often benefits from dedicated caching or
|
5
|
+
indexing strategies; keeping it in its own module means the server can swap the
|
6
|
+
implementation later without changing the core file lifecycle API.
|
7
|
+
"""
|
8
|
+
|
9
|
+
from __future__ import annotations
|
10
|
+
|
11
|
+
from pathlib import Path
|
12
|
+
from typing import Dict, Iterable, List, Optional, Literal, Any, Tuple
|
13
|
+
|
14
|
+
from mcp_kb.config import DATA_FOLDER_NAME, DOC_FILENAME
|
15
|
+
from mcp_kb.knowledge.events import KnowledgeBaseSearchListener
|
16
|
+
from mcp_kb.knowledge.store import KnowledgeBase, FileSegment
|
17
|
+
from pydantic import BaseModel
|
18
|
+
|
19
|
+
|
20
|
+
def search_text(
|
21
|
+
kb: KnowledgeBase,
|
22
|
+
query: str,
|
23
|
+
context_lines: int = 2,
|
24
|
+
*,
|
25
|
+
providers: Iterable[KnowledgeBaseSearchListener] | None = None,
|
26
|
+
n_results: Optional[int] = None,
|
27
|
+
) -> Tuple[List[FileSegment], Dict[str, Any]]:
|
28
|
+
"""Search for ``query`` in all non-deleted knowledge base files.
|
29
|
+
|
30
|
+
Parameters
|
31
|
+
----------
|
32
|
+
kb:
|
33
|
+
Active knowledge base instance used to iterate over files.
|
34
|
+
query:
|
35
|
+
Literal string that should be located within the files. The helper does
|
36
|
+
not treat the query as a regular expression to avoid surprising matches
|
37
|
+
when characters such as ``*`` appear in user input.
|
38
|
+
context_lines:
|
39
|
+
Number of lines to include before and after each match. Defaults to two
|
40
|
+
lines, aligning with the PRD's requirement for contextual snippets.
|
41
|
+
providers:
|
42
|
+
Optional iterable of listeners capable of serving semantic search
|
43
|
+
results. Providers are consulted in order and the first non-empty
|
44
|
+
response is returned to the caller. When no provider produces results the
|
45
|
+
function falls back to a filesystem scan.
|
46
|
+
n_results:
|
47
|
+
Maximum number of matches to return. ``None`` keeps the legacy behaviour
|
48
|
+
of returning every match discovered on disk.
|
49
|
+
|
50
|
+
Returns
|
51
|
+
-------
|
52
|
+
list[FileSegment]
|
53
|
+
Ordered list of matches. Each match contains the absolute path, the
|
54
|
+
one-based line number where the query was found, and the extracted
|
55
|
+
context lines.
|
56
|
+
"""
|
57
|
+
|
58
|
+
all_matches: List[FileSegment] = []
|
59
|
+
all_meta: Dict[str, Any] = {}
|
60
|
+
for provider in providers or ():
|
61
|
+
try:
|
62
|
+
matches,meta = provider.search(
|
63
|
+
kb,
|
64
|
+
query,
|
65
|
+
context_lines=context_lines,
|
66
|
+
limit=n_results,
|
67
|
+
)
|
68
|
+
except Exception as exc: # pragma: no cover - defensive path
|
69
|
+
raise RuntimeError(f"Search provider {provider!r} failed: {exc}") from exc
|
70
|
+
if matches:
|
71
|
+
all_matches.extend(matches)
|
72
|
+
all_meta.update(meta)
|
73
|
+
|
74
|
+
all_matches.extend(_search_by_scanning(kb, query, context_lines, n_results))
|
75
|
+
for match in all_matches:
|
76
|
+
match.assert_path(kb.rules)
|
77
|
+
return all_matches,all_meta
|
78
|
+
|
79
|
+
|
80
|
+
def _search_by_scanning(
|
81
|
+
kb: KnowledgeBase,
|
82
|
+
query: str,
|
83
|
+
context_lines: int,
|
84
|
+
n_results: Optional[int],
|
85
|
+
) -> List[FileSegment]:
|
86
|
+
"""Return search matches by scanning files on disk."""
|
87
|
+
|
88
|
+
matches: List[FileSegment] = []
|
89
|
+
for path in kb.iter_active_files():
|
90
|
+
matches.extend(_extract_matches_for_path(path, query, context_lines))
|
91
|
+
if n_results is not None and len(matches) >= n_results:
|
92
|
+
return matches[:n_results]
|
93
|
+
return matches
|
94
|
+
|
95
|
+
|
96
|
+
def _build_tree(paths: List[List[str]]) -> Dict[str, Dict]:
|
97
|
+
"""Construct a nested dictionary representing the directory tree."""
|
98
|
+
|
99
|
+
tree: Dict[str, Dict] = {}
|
100
|
+
for parts in paths:
|
101
|
+
current = tree
|
102
|
+
for part in parts:
|
103
|
+
current = current.setdefault(part, {})
|
104
|
+
return tree
|
105
|
+
|
106
|
+
|
107
|
+
def _flatten_tree(tree: Dict[str, Dict], prefix: str = " ") -> List[str]:
|
108
|
+
"""Convert a nested dictionary tree into indented lines."""
|
109
|
+
|
110
|
+
lines: List[str] = []
|
111
|
+
for name in sorted(tree.keys()):
|
112
|
+
lines.append(f"{prefix}- {name}")
|
113
|
+
lines.extend(_flatten_tree(tree[name], prefix + " "))
|
114
|
+
return lines
|
115
|
+
|
116
|
+
|
117
|
+
def build_tree_overview(kb: KnowledgeBase) -> str:
|
118
|
+
"""Produce a textual tree showing the structure of the knowledge base.
|
119
|
+
|
120
|
+
The output intentionally mirrors a simplified ``tree`` command but remains
|
121
|
+
deterministic across operating systems by controlling ordering and
|
122
|
+
indentation.
|
123
|
+
"""
|
124
|
+
|
125
|
+
paths = [
|
126
|
+
list(path.relative_to(kb.rules.root).parts) for path in kb.iter_active_files()
|
127
|
+
]
|
128
|
+
tree = _build_tree(paths)
|
129
|
+
lines = []
|
130
|
+
lines.extend(_flatten_tree(tree,prefix=""))
|
131
|
+
return "\n".join(lines)
|
132
|
+
|
133
|
+
|
134
|
+
def read_documentation(kb: KnowledgeBase) -> str:
|
135
|
+
"""Return documentation content if the canonical file exists.
|
136
|
+
|
137
|
+
The helper intentionally performs no access control checks because read
|
138
|
+
operations are always permitted, even for the protected documentation
|
139
|
+
folder.
|
140
|
+
"""
|
141
|
+
|
142
|
+
doc_path = kb.rules.root / DATA_FOLDER_NAME / DOC_FILENAME
|
143
|
+
if not doc_path.exists():
|
144
|
+
return ""
|
145
|
+
return doc_path.read_text(encoding="utf-8")
|
146
|
+
|
147
|
+
|
148
|
+
def _extract_matches_for_path(
|
149
|
+
path: Path, query: str, context_lines: int
|
150
|
+
) -> List[FileSegment]:
|
151
|
+
"""Read ``path`` and return every match that contains ``query``."""
|
152
|
+
|
153
|
+
lines = path.read_text(encoding="utf-8").splitlines()
|
154
|
+
return _extract_matches_from_lines(path, lines, query, context_lines)
|
155
|
+
|
156
|
+
|
157
|
+
def _extract_matches_from_lines(
|
158
|
+
path: Path,
|
159
|
+
lines: List[str],
|
160
|
+
query: str,
|
161
|
+
context_lines: int,
|
162
|
+
) -> List[FileSegment]:
|
163
|
+
"""Return matches using the provided ``lines`` buffer."""
|
164
|
+
|
165
|
+
matches: List[FileSegment] = []
|
166
|
+
for index, line in enumerate(lines, start=1):
|
167
|
+
if query in line:
|
168
|
+
start = max(0, index - context_lines - 1)
|
169
|
+
end = min(len(lines), index + context_lines)
|
170
|
+
context = '\n'.join(lines[start:end])
|
171
|
+
matches.append(FileSegment(
|
172
|
+
path=path, start_line=start, end_line=end, content=context))
|
173
|
+
return matches
|
174
|
+
|
175
|
+
__all__ = [
|
176
|
+
"search_text",
|
177
|
+
]
|
@@ -0,0 +1,294 @@
|
|
1
|
+
"""Core knowledge base operations for file lifecycle management.
|
2
|
+
|
3
|
+
This module exposes the ``KnowledgeBase`` class, which orchestrates validated
|
4
|
+
filesystem operations for the MCP server. The class encapsulates logic for
|
5
|
+
creating, reading, appending, and modifying text files while respecting the
|
6
|
+
security constraints defined in the PRD. Each method returns plain Python data
|
7
|
+
structures so that higher-level layers (e.g., JSON-RPC handlers) can focus on
|
8
|
+
protocol serialization rather than filesystem minutiae.
|
9
|
+
"""
|
10
|
+
|
11
|
+
from __future__ import annotations
|
12
|
+
|
13
|
+
import re
|
14
|
+
from pathlib import Path
|
15
|
+
from typing import Iterable, Optional, Union
|
16
|
+
|
17
|
+
from mcp_kb.config import DELETE_SENTINEL, DATA_FOLDER_NAME
|
18
|
+
from mcp_kb.knowledge.events import (
|
19
|
+
FileDeleteEvent,
|
20
|
+
FileUpsertEvent,
|
21
|
+
KnowledgeBaseListener,
|
22
|
+
)
|
23
|
+
from mcp_kb.security.path_validation import (
|
24
|
+
PathRules,
|
25
|
+
ensure_write_allowed,
|
26
|
+
normalize_path,
|
27
|
+
)
|
28
|
+
from mcp_kb.utils.filesystem import (
|
29
|
+
FileLockRegistry,
|
30
|
+
append_text,
|
31
|
+
ensure_parent_directory,
|
32
|
+
read_text,
|
33
|
+
rename,
|
34
|
+
write_text,
|
35
|
+
)
|
36
|
+
|
37
|
+
|
38
|
+
from pydantic import BaseModel, model_validator
|
39
|
+
|
40
|
+
|
41
|
+
class FileSegment(BaseModel):
|
42
|
+
"""Represents a snippet of file content returned to MCP clients.
|
43
|
+
|
44
|
+
The model captures a ``path`` (relative to the knowledge base root)
|
45
|
+
along with one-based ``start_line`` and ``end_line`` indices and the
|
46
|
+
extracted text ``content``. Using a Pydantic model makes structured output and
|
47
|
+
validation consistent across API layers.
|
48
|
+
"""
|
49
|
+
|
50
|
+
path: str
|
51
|
+
start_line: int
|
52
|
+
end_line: int
|
53
|
+
content: str
|
54
|
+
|
55
|
+
@model_validator(mode="before")
|
56
|
+
@classmethod
|
57
|
+
def check_path(cls, values: dict) -> dict:
|
58
|
+
if isinstance(values["path"], Path):
|
59
|
+
values["path"] = str(values["path"])
|
60
|
+
return values
|
61
|
+
|
62
|
+
def assert_path(self,rules: PathRules) -> None:
|
63
|
+
rel_path = Path(self.path)
|
64
|
+
if not rel_path.is_absolute():
|
65
|
+
abspath = rules.root / rel_path
|
66
|
+
else:
|
67
|
+
abspath = rel_path
|
68
|
+
# make sure the relative path is inside the knowledge base root
|
69
|
+
if not abspath.is_relative_to(rules.root):
|
70
|
+
raise ValueError(f"Relative path {rel_path} is not in the knowledge base root")
|
71
|
+
# make sure the relative path is not in the protected folders
|
72
|
+
self.path = str(abspath.relative_to(rules.root))
|
73
|
+
|
74
|
+
|
75
|
+
class KnowledgeBase:
|
76
|
+
"""High-level API that executes validated knowledge base operations.
|
77
|
+
|
78
|
+
The class is intentionally stateless aside from the path rules and lock
|
79
|
+
registry. Stateless methods make this component easy to reuse across tests
|
80
|
+
and potential future transports. Locking responsibilities are scoped to the
|
81
|
+
knowledge base to keep write safety consistent across entry points.
|
82
|
+
"""
|
83
|
+
|
84
|
+
def __init__(
|
85
|
+
self,
|
86
|
+
rules: PathRules,
|
87
|
+
lock_registry: FileLockRegistry | None = None,
|
88
|
+
listeners: Iterable[KnowledgeBaseListener] | None = None,
|
89
|
+
) -> None:
|
90
|
+
"""Initialize the knowledge base with path rules and optional locks.
|
91
|
+
|
92
|
+
Parameters
|
93
|
+
----------
|
94
|
+
rules:
|
95
|
+
Active path rules that govern which paths are safe to touch.
|
96
|
+
lock_registry:
|
97
|
+
Optional ``FileLockRegistry`` allowing tests to inject deterministic
|
98
|
+
locking behavior. A new registry is created when omitted.
|
99
|
+
listeners:
|
100
|
+
Optional iterable of callback objects that subscribe to change
|
101
|
+
events. Each listener must implement the
|
102
|
+
:class:`~mcp_kb.knowledge.events.KnowledgeBaseListener` protocol.
|
103
|
+
Events are dispatched synchronously after filesystem operations
|
104
|
+
succeed, which allows callers to maintain eventual consistency with
|
105
|
+
external systems such as vector databases.
|
106
|
+
"""
|
107
|
+
|
108
|
+
self.rules = rules
|
109
|
+
self.locks = lock_registry or FileLockRegistry()
|
110
|
+
self.listeners = tuple(listeners or ())
|
111
|
+
|
112
|
+
def create_file(self, path: Union[str, Path], content: str) -> Path:
|
113
|
+
"""Create or overwrite a text file at ``path``.
|
114
|
+
|
115
|
+
The method validates the path, ensures that the parent directory exists,
|
116
|
+
and writes the provided content as UTF-8 text. Existing files are
|
117
|
+
overwritten to match the PRD, which views creation as setting the file
|
118
|
+
contents.
|
119
|
+
"""
|
120
|
+
|
121
|
+
normalized = normalize_path(path, self.rules)
|
122
|
+
ensure_write_allowed(normalized, self.rules)
|
123
|
+
ensure_parent_directory(normalized)
|
124
|
+
with self.locks.acquire(normalized):
|
125
|
+
write_text(normalized, content)
|
126
|
+
self._notify_upsert(self._path(normalized), content)
|
127
|
+
return normalized
|
128
|
+
|
129
|
+
def read_file(
|
130
|
+
self,
|
131
|
+
path: Union[str, Path],
|
132
|
+
start_line: Optional[int] = None,
|
133
|
+
end_line: Optional[int] = None,
|
134
|
+
) -> FileSegment:
|
135
|
+
"""Read content from ``path`` optionally constraining lines.
|
136
|
+
|
137
|
+
Parameters
|
138
|
+
----------
|
139
|
+
path:
|
140
|
+
Target file path relative to the knowledge base root.
|
141
|
+
start_line:
|
142
|
+
Zero- based index for the first line to include. ``None`` means start
|
143
|
+
from the beginning of the file.
|
144
|
+
end_line:
|
145
|
+
Zero-based index signaling the last line to include. ``None`` means
|
146
|
+
include content through the end of the file.
|
147
|
+
"""
|
148
|
+
|
149
|
+
normalized = normalize_path(path, self.rules)
|
150
|
+
full_content = read_text(normalized)
|
151
|
+
lines = full_content.splitlines()
|
152
|
+
|
153
|
+
if start_line is None and end_line is None:
|
154
|
+
segment_content = full_content
|
155
|
+
actual_start = 0
|
156
|
+
actual_end = len(lines)-1
|
157
|
+
else:
|
158
|
+
actual_start = start_line or 0
|
159
|
+
actual_end = end_line or len(lines)-1
|
160
|
+
if actual_start < 0 or actual_end < actual_start:
|
161
|
+
raise ValueError("Invalid line interval requested")
|
162
|
+
selected = lines[actual_start : actual_end + 1]
|
163
|
+
segment_content = "\n".join(selected)
|
164
|
+
|
165
|
+
return FileSegment(
|
166
|
+
path=normalized,
|
167
|
+
start_line=actual_start,
|
168
|
+
end_line=actual_end,
|
169
|
+
content=segment_content,
|
170
|
+
)
|
171
|
+
|
172
|
+
def append_file(self, path: Union[str, Path], content: str) -> Path:
|
173
|
+
"""Append ``content`` to the file located at ``path``.
|
174
|
+
|
175
|
+
Missing files are created automatically so that append operations remain
|
176
|
+
idempotent for clients.
|
177
|
+
"""
|
178
|
+
|
179
|
+
normalized = normalize_path(path, self.rules)
|
180
|
+
ensure_write_allowed(normalized, self.rules)
|
181
|
+
ensure_parent_directory(normalized)
|
182
|
+
with self.locks.acquire(normalized):
|
183
|
+
if not normalized.exists():
|
184
|
+
write_text(normalized, content)
|
185
|
+
else:
|
186
|
+
append_text(normalized, content)
|
187
|
+
updated_text = read_text(normalized)
|
188
|
+
self._notify_upsert(self._path(normalized), updated_text)
|
189
|
+
return normalized
|
190
|
+
|
191
|
+
def regex_replace(self, path: Union[str, Path], pattern: str, replacement: str) -> int:
|
192
|
+
"""Perform regex replacement and return the number of substitutions."""
|
193
|
+
|
194
|
+
normalized = normalize_path(path, self.rules)
|
195
|
+
ensure_write_allowed(normalized, self.rules)
|
196
|
+
with self.locks.acquire(normalized):
|
197
|
+
text = read_text(normalized)
|
198
|
+
new_text, count = re.subn(pattern, replacement, text, flags=re.MULTILINE)
|
199
|
+
write_text(normalized, new_text)
|
200
|
+
self._notify_upsert(self._path(normalized), new_text)
|
201
|
+
return count
|
202
|
+
|
203
|
+
def soft_delete(self, path: Union[str, Path]) -> Path:
|
204
|
+
"""Apply soft deletion semantics by appending the deletion sentinel."""
|
205
|
+
|
206
|
+
normalized = normalize_path(path, self.rules)
|
207
|
+
ensure_write_allowed(normalized, self.rules)
|
208
|
+
if not normalized.exists():
|
209
|
+
raise FileNotFoundError(f"File '{path}' does not exist")
|
210
|
+
|
211
|
+
target_name = f"{normalized.stem}{DELETE_SENTINEL}{normalized.suffix}"
|
212
|
+
target = normalized.with_name(target_name)
|
213
|
+
ensure_write_allowed(target, self.rules)
|
214
|
+
with self.locks.acquire(normalized):
|
215
|
+
rename(normalized, target)
|
216
|
+
original_relative = self._path(normalized)
|
217
|
+
self._notify_delete(original_relative)
|
218
|
+
return target
|
219
|
+
|
220
|
+
def total_active_files(self, include_docs: bool = False) -> int:
|
221
|
+
"""Return the total number of non-deleted UTF-8 text files under the root directory."""
|
222
|
+
return sum(1 for _ in self.iter_active_files(include_docs=include_docs))
|
223
|
+
|
224
|
+
def iter_active_files(self, include_docs: bool = False) -> Iterable[Path]:
|
225
|
+
"""Yield non-deleted UTF-8 text files under the root directory.
|
226
|
+
|
227
|
+
Parameters
|
228
|
+
----------
|
229
|
+
include_docs:
|
230
|
+
When ``True`` the generator includes files located in the protected
|
231
|
+
documentation folder. By default those files are skipped to match
|
232
|
+
the search and overview requirements from the PRD.
|
233
|
+
"""
|
234
|
+
|
235
|
+
from mcp_kb.utils.filesystem import is_text_file
|
236
|
+
|
237
|
+
for path in self.rules.root.rglob("*"):
|
238
|
+
if not path.is_file():
|
239
|
+
continue
|
240
|
+
if DELETE_SENTINEL in path.name:
|
241
|
+
continue
|
242
|
+
parts = path.relative_to(self.rules.root).parts
|
243
|
+
if parts and parts[0] == DATA_FOLDER_NAME and not include_docs:
|
244
|
+
continue
|
245
|
+
if is_text_file(path):
|
246
|
+
yield path
|
247
|
+
|
248
|
+
def _path(self, absolute: Path) -> str:
|
249
|
+
"""Return ``absolute`` rewritten relative to the knowledge base root."""
|
250
|
+
|
251
|
+
return str(absolute.relative_to(self.rules.root))
|
252
|
+
|
253
|
+
def _notify_upsert(self, relative: str, content: str) -> None:
|
254
|
+
"""Dispatch an upsert event to registered listeners.
|
255
|
+
|
256
|
+
Parameters
|
257
|
+
----------
|
258
|
+
absolute:
|
259
|
+
Fully resolved path that was modified on disk.
|
260
|
+
content:
|
261
|
+
Text payload that should be provided to subscribers.
|
262
|
+
"""
|
263
|
+
|
264
|
+
if not self.listeners:
|
265
|
+
return
|
266
|
+
|
267
|
+
event = FileUpsertEvent(
|
268
|
+
path=relative,
|
269
|
+
content=content,
|
270
|
+
)
|
271
|
+
self._dispatch("handle_upsert", event)
|
272
|
+
|
273
|
+
def _notify_delete(self,relative: str) -> None:
|
274
|
+
"""Dispatch a delete event to registered listeners."""
|
275
|
+
|
276
|
+
if not self.listeners:
|
277
|
+
return
|
278
|
+
|
279
|
+
event = FileDeleteEvent(path=relative)
|
280
|
+
self._dispatch("handle_delete", event)
|
281
|
+
|
282
|
+
def _dispatch(
|
283
|
+
self, method_name: str, event: FileUpsertEvent | FileDeleteEvent
|
284
|
+
) -> None:
|
285
|
+
"""Call ``method_name`` on every listener and wrap failures for clarity."""
|
286
|
+
|
287
|
+
for listener in self.listeners:
|
288
|
+
handler = getattr(listener, method_name)
|
289
|
+
try:
|
290
|
+
handler(event) # type: ignore[misc]
|
291
|
+
except Exception as exc: # pragma: no cover - defensive logging path
|
292
|
+
raise RuntimeError(
|
293
|
+
f"Knowledge base listener {listener!r} failed during {method_name}: {exc}"
|
294
|
+
) from exc
|
@@ -0,0 +1 @@
|
|
1
|
+
"""Security-related helpers such as path validation rules."""
|