mcp-kb 0.3.0__py3-none-any.whl → 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mcp_kb/cli/__init__.py +1 -0
- mcp_kb/cli/args.py +168 -0
- mcp_kb/cli/main.py +175 -0
- mcp_kb/cli/reindex.py +113 -0
- mcp_kb/cli/runtime_config.py +421 -0
- mcp_kb/data/KNOWLEDBASE_DOC.md +151 -0
- mcp_kb/data/__init__.py +1 -0
- mcp_kb/ingest/__init__.py +1 -0
- mcp_kb/ingest/chroma.py +1287 -0
- mcp_kb/knowledge/__init__.py +1 -0
- mcp_kb/knowledge/bootstrap.py +44 -0
- mcp_kb/knowledge/events.py +105 -0
- mcp_kb/knowledge/search.py +177 -0
- mcp_kb/knowledge/store.py +294 -0
- mcp_kb/security/__init__.py +1 -0
- mcp_kb/security/path_validation.py +108 -0
- mcp_kb/server/__init__.py +1 -0
- mcp_kb/server/app.py +201 -0
- mcp_kb/ui/__init__.py +17 -0
- mcp_kb/ui/api.py +377 -0
- mcp_kb/ui/assets/assets/index.css +1 -0
- mcp_kb/ui/assets/index.html +62 -0
- mcp_kb/ui/server.py +332 -0
- mcp_kb/utils/__init__.py +1 -0
- mcp_kb/utils/filesystem.py +128 -0
- mcp_kb-0.3.2.dist-info/METADATA +338 -0
- mcp_kb-0.3.2.dist-info/RECORD +32 -0
- {mcp_kb-0.3.0.dist-info → mcp_kb-0.3.2.dist-info}/entry_points.txt +1 -0
- mcp_kb-0.3.0.dist-info/METADATA +0 -178
- mcp_kb-0.3.0.dist-info/RECORD +0 -7
- {mcp_kb-0.3.0.dist-info → mcp_kb-0.3.2.dist-info}/WHEEL +0 -0
- {mcp_kb-0.3.0.dist-info → mcp_kb-0.3.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,421 @@
|
|
1
|
+
"""Runtime configuration helpers for CLI defaults and persistence.
|
2
|
+
|
3
|
+
The MCP knowledge base CLI needs to juggle three independent sources of
|
4
|
+
configuration: command-line arguments, environment variables, and the last set
|
5
|
+
of options emitted by a previous run. Centralising that precedence logic in a
|
6
|
+
dedicated module keeps the main CLI entry points focused on orchestration while
|
7
|
+
making it simple to unit test the resolution rules.
|
8
|
+
|
9
|
+
The module exposes three primitives:
|
10
|
+
|
11
|
+
``load_runtime_configuration``
|
12
|
+
Reads the serialized configuration dictionary stored in the knowledge base
|
13
|
+
data directory (``.data`` by default). Missing or invalid files are treated
|
14
|
+
as empty configuration snapshots.
|
15
|
+
|
16
|
+
``apply_cli_runtime_configuration``
|
17
|
+
Normalises an ``argparse.Namespace`` to ensure every CLI option has a
|
18
|
+
concrete value after considering CLI flags, environment variables, and the
|
19
|
+
persisted snapshot. The function also returns the resolved mapping so that
|
20
|
+
callers can persist exactly what was used during the current run.
|
21
|
+
|
22
|
+
``persist_runtime_configuration``
|
23
|
+
Writes the resolved mapping back to the data directory so that subsequent
|
24
|
+
runs inherit the same defaults unless explicitly overridden.
|
25
|
+
|
26
|
+
All helper functions include extensive docstrings so that readers understand
|
27
|
+
their role in the configuration pipeline without diving into the implementation
|
28
|
+
details.
|
29
|
+
"""
|
30
|
+
|
31
|
+
from __future__ import annotations
|
32
|
+
|
33
|
+
import json
|
34
|
+
import logging
|
35
|
+
import os
|
36
|
+
from argparse import Namespace
|
37
|
+
from dataclasses import dataclass, field
|
38
|
+
from pathlib import Path
|
39
|
+
from typing import Any, Callable, Mapping, MutableMapping, Sequence
|
40
|
+
|
41
|
+
from mcp_kb.cli.args import parse_bool
|
42
|
+
from mcp_kb.config import DATA_FOLDER_NAME
|
43
|
+
|
44
|
+
|
45
|
+
logger = logging.getLogger(__name__)
|
46
|
+
|
47
|
+
|
48
|
+
CONFIG_FILENAME = "cli-config.json"
|
49
|
+
"""str: File name used to persist resolved CLI defaults within ``.data``."""
|
50
|
+
|
51
|
+
|
52
|
+
def _configuration_path(root: Path) -> Path:
|
53
|
+
"""Return the absolute path to the persisted CLI configuration file.
|
54
|
+
|
55
|
+
Parameters
|
56
|
+
----------
|
57
|
+
root:
|
58
|
+
Knowledge base root directory that owns the ``.data`` folder where
|
59
|
+
configuration metadata is stored.
|
60
|
+
"""
|
61
|
+
|
62
|
+
return root / DATA_FOLDER_NAME / CONFIG_FILENAME
|
63
|
+
|
64
|
+
|
65
|
+
def load_runtime_configuration(root: Path) -> dict[str, Any]:
|
66
|
+
"""Load the previously persisted CLI configuration snapshot.
|
67
|
+
|
68
|
+
The function returns an empty dictionary when no configuration file exists
|
69
|
+
or when the file cannot be decoded as JSON. Invalid files are logged at the
|
70
|
+
DEBUG level so that operators can inspect issues while keeping the CLI
|
71
|
+
output quiet by default.
|
72
|
+
"""
|
73
|
+
|
74
|
+
config_path = _configuration_path(root)
|
75
|
+
if not config_path.exists():
|
76
|
+
return {}
|
77
|
+
|
78
|
+
try:
|
79
|
+
contents = config_path.read_text(encoding="utf-8")
|
80
|
+
except OSError as exc: # pragma: no cover - filesystem edge cases
|
81
|
+
logger.debug("Failed to read CLI configuration at %s: %s", config_path, exc)
|
82
|
+
return {}
|
83
|
+
|
84
|
+
try:
|
85
|
+
payload = json.loads(contents)
|
86
|
+
except json.JSONDecodeError as exc:
|
87
|
+
logger.debug("Invalid CLI configuration JSON at %s: %s", config_path, exc)
|
88
|
+
return {}
|
89
|
+
|
90
|
+
if not isinstance(payload, dict):
|
91
|
+
logger.debug(
|
92
|
+
"Ignoring CLI configuration at %s because the payload is not a mapping",
|
93
|
+
config_path,
|
94
|
+
)
|
95
|
+
return {}
|
96
|
+
|
97
|
+
return payload
|
98
|
+
|
99
|
+
|
100
|
+
def persist_runtime_configuration(root: Path, configuration: Mapping[str, Any]) -> Path:
|
101
|
+
"""Persist ``configuration`` into the knowledge base data directory.
|
102
|
+
|
103
|
+
Parameters
|
104
|
+
----------
|
105
|
+
root:
|
106
|
+
Knowledge base root directory that owns the ``.data`` folder.
|
107
|
+
configuration:
|
108
|
+
Final configuration mapping produced by
|
109
|
+
:func:`apply_cli_runtime_configuration`.
|
110
|
+
|
111
|
+
Returns
|
112
|
+
-------
|
113
|
+
Path
|
114
|
+
The absolute path to the written configuration file.
|
115
|
+
"""
|
116
|
+
|
117
|
+
config_path = _configuration_path(root)
|
118
|
+
config_path.parent.mkdir(parents=True, exist_ok=True)
|
119
|
+
# Sorting keys makes the file diff-friendly and easier to inspect manually.
|
120
|
+
serialized = json.dumps(configuration, indent=2, sort_keys=True)
|
121
|
+
config_path.write_text(serialized + "\n", encoding="utf-8")
|
122
|
+
return config_path
|
123
|
+
|
124
|
+
|
125
|
+
def _identity(value: Any) -> Any:
|
126
|
+
"""Return ``value`` unchanged.
|
127
|
+
|
128
|
+
The helper keeps ``OptionSpec`` declarations concise; it is intentionally
|
129
|
+
defined at module scope so it can be referenced multiple times without
|
130
|
+
allocating additional callables.
|
131
|
+
"""
|
132
|
+
|
133
|
+
return value
|
134
|
+
|
135
|
+
|
136
|
+
def _normalize_optional_int(value: Any) -> int | None:
|
137
|
+
"""Convert ``value`` into an optional integer.
|
138
|
+
|
139
|
+
``None`` and empty strings remain ``None``. Numeric strings are coerced using
|
140
|
+
:class:`int`, and floats are truncated. Any other type raises ``TypeError`` so
|
141
|
+
that misconfigured persisted values become obvious during testing.
|
142
|
+
"""
|
143
|
+
|
144
|
+
if value is None or value == "":
|
145
|
+
return None
|
146
|
+
if isinstance(value, int):
|
147
|
+
return value
|
148
|
+
if isinstance(value, float):
|
149
|
+
return int(value)
|
150
|
+
if isinstance(value, str):
|
151
|
+
return int(value)
|
152
|
+
raise TypeError(f"Expected optional int-compatible value, received {type(value)!r}")
|
153
|
+
|
154
|
+
|
155
|
+
def _normalize_lower_str(value: Any) -> str | None:
|
156
|
+
"""Normalize ``value`` to a lowercase string when possible."""
|
157
|
+
|
158
|
+
if value is None:
|
159
|
+
return None
|
160
|
+
if isinstance(value, str):
|
161
|
+
return value.lower()
|
162
|
+
return str(value).lower()
|
163
|
+
|
164
|
+
|
165
|
+
def _normalize_transports(value: Any) -> list[str] | None:
|
166
|
+
"""Ensure transport selections are serializable lists of strings."""
|
167
|
+
|
168
|
+
if value is None:
|
169
|
+
return None
|
170
|
+
if isinstance(value, str):
|
171
|
+
return [value]
|
172
|
+
if isinstance(value, (list, tuple, set)):
|
173
|
+
return [str(item) for item in value]
|
174
|
+
raise TypeError(f"Unsupported transports value: {value!r}")
|
175
|
+
|
176
|
+
|
177
|
+
def _normalize_bool(value: Any) -> bool:
|
178
|
+
"""Convert ``value`` into a boolean using :func:`parse_bool` semantics."""
|
179
|
+
|
180
|
+
if isinstance(value, bool):
|
181
|
+
return value
|
182
|
+
if value is None:
|
183
|
+
return False
|
184
|
+
if isinstance(value, str):
|
185
|
+
return parse_bool(value)
|
186
|
+
return bool(value)
|
187
|
+
|
188
|
+
|
189
|
+
def _parse_optional_int(value: str) -> int | None:
|
190
|
+
"""Parse ``value`` from the environment into an optional integer."""
|
191
|
+
|
192
|
+
stripped = value.strip()
|
193
|
+
if not stripped:
|
194
|
+
return None
|
195
|
+
return int(stripped)
|
196
|
+
|
197
|
+
|
198
|
+
@dataclass(frozen=True)
|
199
|
+
class OptionSpec:
|
200
|
+
"""Specification describing how to resolve a single CLI option.
|
201
|
+
|
202
|
+
Attributes
|
203
|
+
----------
|
204
|
+
name:
|
205
|
+
Attribute name on the :class:`argparse.Namespace` produced by the CLI
|
206
|
+
parser.
|
207
|
+
env_var:
|
208
|
+
Optional environment variable that should be considered when the CLI
|
209
|
+
did not provide a value. ``None`` skips environment lookup.
|
210
|
+
default:
|
211
|
+
Fallback value used when neither CLI flags, environment variables, nor
|
212
|
+
persisted configuration provide a value. This can be a raw value or a
|
213
|
+
zero-argument callable that produces the value on demand.
|
214
|
+
env_parser:
|
215
|
+
Optional callable that converts the raw environment string into the
|
216
|
+
expected type before normalisation.
|
217
|
+
normalizer:
|
218
|
+
Callable that converts the CLI/environment/persisted value into the
|
219
|
+
final, type-stable representation.
|
220
|
+
"""
|
221
|
+
|
222
|
+
name: str
|
223
|
+
env_var: str | None = None
|
224
|
+
default: Any | Callable[[], Any] = None
|
225
|
+
env_parser: Callable[[str], Any] | None = None
|
226
|
+
normalizer: Callable[[Any], Any] = field(default=_identity)
|
227
|
+
|
228
|
+
|
229
|
+
def _evaluate_default(default: Any | Callable[[], Any]) -> Any:
|
230
|
+
"""Return the default value, invoking callables when necessary."""
|
231
|
+
|
232
|
+
if callable(default): # ``bool`` defaults are handled by ``normalizer``
|
233
|
+
return default()
|
234
|
+
return default
|
235
|
+
|
236
|
+
|
237
|
+
OPTION_SPECS: Sequence[OptionSpec] = (
|
238
|
+
OptionSpec("host", default=None),
|
239
|
+
OptionSpec("port", default=None, normalizer=_normalize_optional_int),
|
240
|
+
OptionSpec("transports", default=None, normalizer=_normalize_transports),
|
241
|
+
OptionSpec("ui_port", default=None, normalizer=_normalize_optional_int),
|
242
|
+
OptionSpec(
|
243
|
+
"chroma_client",
|
244
|
+
env_var="MCP_KB_CHROMA_CLIENT",
|
245
|
+
default="persistent",
|
246
|
+
env_parser=lambda value: value.lower(),
|
247
|
+
normalizer=_normalize_lower_str,
|
248
|
+
),
|
249
|
+
OptionSpec(
|
250
|
+
"chroma_collection",
|
251
|
+
env_var="MCP_KB_CHROMA_COLLECTION",
|
252
|
+
default="knowledge-base",
|
253
|
+
),
|
254
|
+
OptionSpec(
|
255
|
+
"chroma_embedding",
|
256
|
+
env_var="MCP_KB_CHROMA_EMBEDDING",
|
257
|
+
default="default",
|
258
|
+
),
|
259
|
+
OptionSpec(
|
260
|
+
"chroma_data_dir",
|
261
|
+
env_var="MCP_KB_CHROMA_DATA_DIR",
|
262
|
+
default=None,
|
263
|
+
),
|
264
|
+
OptionSpec(
|
265
|
+
"chroma_host",
|
266
|
+
env_var="MCP_KB_CHROMA_HOST",
|
267
|
+
default=None,
|
268
|
+
),
|
269
|
+
OptionSpec(
|
270
|
+
"chroma_port",
|
271
|
+
env_var="MCP_KB_CHROMA_PORT",
|
272
|
+
default=None,
|
273
|
+
env_parser=_parse_optional_int,
|
274
|
+
normalizer=_normalize_optional_int,
|
275
|
+
),
|
276
|
+
OptionSpec(
|
277
|
+
"chroma_ssl",
|
278
|
+
env_var="MCP_KB_CHROMA_SSL",
|
279
|
+
default=True,
|
280
|
+
env_parser=parse_bool,
|
281
|
+
normalizer=_normalize_bool,
|
282
|
+
),
|
283
|
+
OptionSpec(
|
284
|
+
"chroma_tenant",
|
285
|
+
env_var="MCP_KB_CHROMA_TENANT",
|
286
|
+
default=None,
|
287
|
+
),
|
288
|
+
OptionSpec(
|
289
|
+
"chroma_database",
|
290
|
+
env_var="MCP_KB_CHROMA_DATABASE",
|
291
|
+
default=None,
|
292
|
+
),
|
293
|
+
OptionSpec(
|
294
|
+
"chroma_api_key",
|
295
|
+
env_var="MCP_KB_CHROMA_API_KEY",
|
296
|
+
default=None,
|
297
|
+
),
|
298
|
+
OptionSpec(
|
299
|
+
"chroma_custom_auth",
|
300
|
+
env_var="MCP_KB_CHROMA_CUSTOM_AUTH",
|
301
|
+
default=None,
|
302
|
+
),
|
303
|
+
OptionSpec(
|
304
|
+
"chroma_id_prefix",
|
305
|
+
env_var="MCP_KB_CHROMA_ID_PREFIX",
|
306
|
+
default=None,
|
307
|
+
),
|
308
|
+
OptionSpec(
|
309
|
+
"chroma_sentence_transformer",
|
310
|
+
env_var="MCP_KB_CHROMA_SENTENCE_TRANSFORMER",
|
311
|
+
default=None,
|
312
|
+
),
|
313
|
+
OptionSpec(
|
314
|
+
"chroma_chunk_size",
|
315
|
+
env_var="MCP_KB_CHROMA_CHUNK_SIZE",
|
316
|
+
default=200,
|
317
|
+
),
|
318
|
+
OptionSpec(
|
319
|
+
"chroma_chunk_overlap",
|
320
|
+
env_var="MCP_KB_CHROMA_CHUNK_OVERLAP",
|
321
|
+
default=20,
|
322
|
+
),
|
323
|
+
)
|
324
|
+
|
325
|
+
|
326
|
+
def _resolve_option_value(
|
327
|
+
namespace: Namespace,
|
328
|
+
spec: OptionSpec,
|
329
|
+
persisted: Mapping[str, Any],
|
330
|
+
environ: Mapping[str, str],
|
331
|
+
) -> Any:
|
332
|
+
"""Resolve a single option using CLI, env, and persisted configuration."""
|
333
|
+
|
334
|
+
if hasattr(namespace, spec.name):
|
335
|
+
raw = getattr(namespace, spec.name)
|
336
|
+
value = spec.normalizer(raw)
|
337
|
+
setattr(namespace, spec.name, value)
|
338
|
+
return value
|
339
|
+
|
340
|
+
if spec.env_var:
|
341
|
+
env_raw = environ.get(spec.env_var)
|
342
|
+
if env_raw is not None:
|
343
|
+
parsed = spec.env_parser(env_raw) if spec.env_parser else env_raw
|
344
|
+
value = spec.normalizer(parsed)
|
345
|
+
setattr(namespace, spec.name, value)
|
346
|
+
return value
|
347
|
+
|
348
|
+
if spec.name in persisted:
|
349
|
+
stored = spec.normalizer(persisted[spec.name])
|
350
|
+
setattr(namespace, spec.name, stored)
|
351
|
+
return stored
|
352
|
+
|
353
|
+
fallback = spec.normalizer(_evaluate_default(spec.default))
|
354
|
+
setattr(namespace, spec.name, fallback)
|
355
|
+
return fallback
|
356
|
+
|
357
|
+
|
358
|
+
def _resolve_no_ui(
|
359
|
+
namespace: Namespace,
|
360
|
+
persisted: Mapping[str, Any],
|
361
|
+
environ: Mapping[str, str],
|
362
|
+
) -> bool:
|
363
|
+
"""Resolve the ``--no-ui`` flag with persisted fallback semantics."""
|
364
|
+
|
365
|
+
if getattr(namespace, "no_ui", False):
|
366
|
+
return True
|
367
|
+
|
368
|
+
env_value = environ.get("MCP_KB_NO_UI")
|
369
|
+
if env_value is not None:
|
370
|
+
return parse_bool(env_value)
|
371
|
+
|
372
|
+
stored = persisted.get("no_ui")
|
373
|
+
if stored is None:
|
374
|
+
return False
|
375
|
+
if isinstance(stored, bool):
|
376
|
+
return stored
|
377
|
+
if isinstance(stored, str):
|
378
|
+
return parse_bool(stored)
|
379
|
+
return bool(stored)
|
380
|
+
|
381
|
+
|
382
|
+
def apply_cli_runtime_configuration(
|
383
|
+
namespace: Namespace,
|
384
|
+
*,
|
385
|
+
root: Path,
|
386
|
+
persisted: Mapping[str, Any] | None = None,
|
387
|
+
environ: Mapping[str, str] | None = None,
|
388
|
+
) -> dict[str, Any]:
|
389
|
+
"""Populate ``namespace`` with resolved CLI options and return the mapping.
|
390
|
+
|
391
|
+
Parameters
|
392
|
+
----------
|
393
|
+
namespace:
|
394
|
+
Parsed CLI arguments as produced by a shared :mod:`argparse` parser.
|
395
|
+
root:
|
396
|
+
Knowledge base root path. The value is not used directly during
|
397
|
+
resolution but is included to make the signature self-documenting and
|
398
|
+
accommodate future enhancements that may require the path.
|
399
|
+
persisted:
|
400
|
+
Previously persisted configuration mapping. ``None`` is treated as an
|
401
|
+
empty mapping.
|
402
|
+
environ:
|
403
|
+
Mapping interface used to look up environment variables. Defaults to
|
404
|
+
:data:`os.environ` for production usage and can be overridden by tests
|
405
|
+
to exercise precedence rules deterministically.
|
406
|
+
"""
|
407
|
+
|
408
|
+
del root # currently unused but retained for signature symmetry
|
409
|
+
persisted = dict(persisted or {})
|
410
|
+
environ = environ or os.environ
|
411
|
+
|
412
|
+
resolved: MutableMapping[str, Any] = {}
|
413
|
+
for spec in OPTION_SPECS:
|
414
|
+
resolved_value = _resolve_option_value(namespace, spec, persisted, environ)
|
415
|
+
resolved[spec.name] = resolved_value
|
416
|
+
|
417
|
+
no_ui_value = _resolve_no_ui(namespace, persisted, environ)
|
418
|
+
namespace.no_ui = no_ui_value
|
419
|
+
resolved["no_ui"] = no_ui_value
|
420
|
+
|
421
|
+
return dict(resolved)
|
@@ -0,0 +1,151 @@
|
|
1
|
+
# LLM Operating Manual — MCP Knowledge Base (`mcp-kb`)
|
2
|
+
|
3
|
+
You are connected to a **local, text-only knowledge base**. Your job is to **search, read, create, update, and soft-delete** UTF‑8 text files under a single root directory while respecting safety rules below. Use the provided MCP tools exactly as specified.
|
4
|
+
|
5
|
+
---
|
6
|
+
|
7
|
+
## Ground Rules (enforced by the server)
|
8
|
+
|
9
|
+
- **Paths are relative only.** Absolute paths are rejected. No `..` traversal.
|
10
|
+
- **Protected folder:** `.data/` is read‑only. Do not write there.
|
11
|
+
- **Soft delete sentinel:** Files marked with `_DELETE_` in the name are considered deleted. Do not read/write them.
|
12
|
+
- **Text files only.** Binary-ish files are ignored by scans. Treat this KB as UTF‑8 text storage.
|
13
|
+
- **Concurrency:** Writes are serialized per file; still prefer read‑verify‑write sequences.
|
14
|
+
|
15
|
+
Constants (baked into the server):
|
16
|
+
- Protected folder: `.data`
|
17
|
+
- Documentation file name: `KNOWLEDBASE_DOC.md`
|
18
|
+
- Delete sentinel: `_DELETE_`
|
19
|
+
|
20
|
+
---
|
21
|
+
|
22
|
+
## Tools You Can Call
|
23
|
+
|
24
|
+
All tool names and parameter contracts are stable. Stick to these shapes.
|
25
|
+
|
26
|
+
### `create_file(path: str, content: str) -> str`
|
27
|
+
- Create or **overwrite** a text file at `path` with `content`.
|
28
|
+
- `path` must be **relative** and **outside** `.data/`.
|
29
|
+
|
30
|
+
### `read_file(path: str, start_line?: int, end_line?: int) -> { path, start_line, end_line, content }`
|
31
|
+
- Read full file or a 1‑based inclusive slice.
|
32
|
+
- If both bounds omitted ⇒ full file. If one bound omitted ⇒ server fills it.
|
33
|
+
|
34
|
+
### `append_file(path: str, content: str) -> str`
|
35
|
+
- Append text. If file is missing, it will be **created**.
|
36
|
+
|
37
|
+
### `regex_replace(path: str, pattern: str, replacement: str) -> { replacements: int }`
|
38
|
+
- Multiline regex (`re.MULTILINE`). Returns count. Always `read_file` afterwards to verify.
|
39
|
+
|
40
|
+
### `delete(path: str) -> str`
|
41
|
+
- **Soft delete**: renames `name.ext` to `name_DELETE_.ext`. Use when content is obsolete.
|
42
|
+
|
43
|
+
### `search(query: str, limit: int = 5) -> [{ path, line, context: string[] }]`
|
44
|
+
- Returns up to `limit` matches with short context.
|
45
|
+
- If Chroma mirroring is active, results are **semantic** first; otherwise plain scan.
|
46
|
+
- `limit` must be **> 0**.
|
47
|
+
|
48
|
+
### `overview() -> str`
|
49
|
+
- A deterministic `tree`-like view of active files under root (skips deleted and binaries). Use this only if necessary, and dont try to find a matching file by its title. Search is always the first preference.
|
50
|
+
|
51
|
+
### `documentation() -> str`
|
52
|
+
- Returns this manual.
|
53
|
+
|
54
|
+
---
|
55
|
+
|
56
|
+
## How to Work Effectively
|
57
|
+
|
58
|
+
### 1) Discover
|
59
|
+
- Call `overview()` to understand the tree.
|
60
|
+
- Use this for overview only and where to place knowledge, dont find concent by file titles unless normal search is not fruitfull.
|
61
|
+
|
62
|
+
### 2) Locate Content
|
63
|
+
- Prefer `search("keywords", limit=5)` to find candidate files/snippets.
|
64
|
+
- Examine each `{path, line, context}`. The `context` is a short window around the hit.
|
65
|
+
- If results look thin, **increase `limit`** (e.g., 10–20) before broadening the query.
|
66
|
+
|
67
|
+
### 3) Read Precisely
|
68
|
+
- Use `read_file(path)` for the full file when structure matters.
|
69
|
+
- If the file is large but you know the region, use `read_file(path, start_line, end_line)` to minimize tokens.
|
70
|
+
|
71
|
+
### 4) Create New Knowledge
|
72
|
+
- Pick a **descriptive relative path** (folders based on topic, kebab‑case names).
|
73
|
+
- Example: `architecture/decision-records/adr-2025-10-06-edge-cache.md`
|
74
|
+
- Call `create_file(path, content)`.
|
75
|
+
- Keep the **title as the first Markdown heading** so search has context.
|
76
|
+
- Link related files with **relative Markdown links**.
|
77
|
+
|
78
|
+
### 5) Update Safely
|
79
|
+
- For small edits:
|
80
|
+
1) `read_file(...)` to confirm current state.
|
81
|
+
2) `regex_replace(path, pattern, replacement)` for targeted changes.
|
82
|
+
3) `read_file(...)` again to verify.
|
83
|
+
- For additive changes: `append_file(path, "\n...")`.
|
84
|
+
|
85
|
+
### 6) Deletion Policy
|
86
|
+
- Use `delete(path)` to **soft-delete**. Do not operate on files that already include `_DELETE_` in their name.
|
87
|
+
|
88
|
+
---
|
89
|
+
|
90
|
+
## Search Semantics (important)
|
91
|
+
|
92
|
+
- When Chroma ingestion is **enabled**, `search()` uses semantic ranking first and returns the **best slice per file** (the ingestor extracts one representative match per document chunk/file). If no obvious line match is found, you may get a **top-of-file preview** — then call `read_file()` to confirm.
|
93
|
+
- When Chroma is **not** enabled, `search()` scans files literally and returns all matches up to `limit`.
|
94
|
+
- Always **validate** by fetching the file segment with `read_file()` before making edits.
|
95
|
+
|
96
|
+
---
|
97
|
+
|
98
|
+
## Parameter Contracts and Gotchas
|
99
|
+
|
100
|
+
- `path` must be **relative** (e.g., `notes/today.md`). Absolute paths are rejected.
|
101
|
+
- Do **not** write into `.data/` (protected). Reads are allowed there.
|
102
|
+
- Line numbers in `read_file` are **1‑based** and the interval is **inclusive**.
|
103
|
+
- `regex_replace` uses Python’s `re.MULTILINE`. Validate your pattern; avoid overly broad substitutions.
|
104
|
+
- `append_file` will create a file if missing (useful for logs/progress notes).
|
105
|
+
|
106
|
+
---
|
107
|
+
|
108
|
+
## Typical Recipes
|
109
|
+
|
110
|
+
**Find → Read → Edit**
|
111
|
+
1. `search("beta feature toggle", limit=10)`
|
112
|
+
2. Pick a result: `read_file("features/toggles.md", 40, 80)`
|
113
|
+
3. Adjust: `regex_replace("features/toggles.md", "^Status:.*$", "Status: Enabled")`
|
114
|
+
4. Verify: `read_file("features/toggles.md")` (check the `Status:` header)
|
115
|
+
|
116
|
+
**Add a new doc**
|
117
|
+
1. `create_file("ops/runbooks/cache-invalidation.md", "# Cache Invalidation\n\n…")`
|
118
|
+
2. Optionally link it from an index: `append_file("ops/README.md", "\n- [Cache Invalidation](runbooks/cache-invalidation.md)")`
|
119
|
+
|
120
|
+
**Soft delete an obsolete note**
|
121
|
+
1. `delete("notes/old-incident.md")`
|
122
|
+
|
123
|
+
---
|
124
|
+
|
125
|
+
## Error Recovery
|
126
|
+
|
127
|
+
- **"Absolute paths are not permitted"** → Use a **relative** path.
|
128
|
+
- **"Writes are not allowed inside the protected folder '.data'"** → Choose a different folder (e.g., `docs/`).
|
129
|
+
- **"File 'X' does not exist"** on delete → Confirm with `overview()` or `search()`. Only existing non‑deleted files can be soft‑deleted.
|
130
|
+
- **No search hits** → Widen keywords, increase `limit`, or pivot to `overview()` to eyeball likely locations.
|
131
|
+
|
132
|
+
---
|
133
|
+
|
134
|
+
## Things You Should Not Do
|
135
|
+
|
136
|
+
- Do not fabricate file contents or paths. Always confirm with `overview()`, `search()`, and `read_file()`.
|
137
|
+
- Do not operate on files that include `_DELETE_` in their name.
|
138
|
+
- Do not attempt to talk directly to Chroma; you only use `search()`. Indexing is handled automatically after writes.
|
139
|
+
- Do not write binary or non‑UTF‑8 content.
|
140
|
+
|
141
|
+
---
|
142
|
+
|
143
|
+
## Performance Hints
|
144
|
+
|
145
|
+
- Prefer `search()` + targeted `read_file()` slices over reading entire large files.
|
146
|
+
- Keep `limit` modest (5–10) unless you must broaden the search.
|
147
|
+
- Batch edits in one file using a single `regex_replace` when safe (then verify).
|
148
|
+
|
149
|
+
---
|
150
|
+
|
151
|
+
You now have the minimal contract to operate this KB safely and efficiently.
|
mcp_kb/data/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
"""Embedded data files shipped with the MCP knowledge base server."""
|
@@ -0,0 +1 @@
|
|
1
|
+
"""Pluggable ingestion adapters for synchronizing knowledge base content."""
|