mcp-kb 0.3.1__py3-none-any.whl → 0.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,421 @@
1
+ """Runtime configuration helpers for CLI defaults and persistence.
2
+
3
+ The MCP knowledge base CLI needs to juggle three independent sources of
4
+ configuration: command-line arguments, environment variables, and the last set
5
+ of options emitted by a previous run. Centralising that precedence logic in a
6
+ dedicated module keeps the main CLI entry points focused on orchestration while
7
+ making it simple to unit test the resolution rules.
8
+
9
+ The module exposes three primitives:
10
+
11
+ ``load_runtime_configuration``
12
+ Reads the serialized configuration dictionary stored in the knowledge base
13
+ data directory (``.data`` by default). Missing or invalid files are treated
14
+ as empty configuration snapshots.
15
+
16
+ ``apply_cli_runtime_configuration``
17
+ Normalises an ``argparse.Namespace`` to ensure every CLI option has a
18
+ concrete value after considering CLI flags, environment variables, and the
19
+ persisted snapshot. The function also returns the resolved mapping so that
20
+ callers can persist exactly what was used during the current run.
21
+
22
+ ``persist_runtime_configuration``
23
+ Writes the resolved mapping back to the data directory so that subsequent
24
+ runs inherit the same defaults unless explicitly overridden.
25
+
26
+ All helper functions include extensive docstrings so that readers understand
27
+ their role in the configuration pipeline without diving into the implementation
28
+ details.
29
+ """
30
+
31
+ from __future__ import annotations
32
+
33
+ import json
34
+ import logging
35
+ import os
36
+ from argparse import Namespace
37
+ from dataclasses import dataclass, field
38
+ from pathlib import Path
39
+ from typing import Any, Callable, Mapping, MutableMapping, Sequence
40
+
41
+ from mcp_kb.cli.args import parse_bool
42
+ from mcp_kb.config import DATA_FOLDER_NAME
43
+
44
+
45
+ logger = logging.getLogger(__name__)
46
+
47
+
48
+ CONFIG_FILENAME = "cli-config.json"
49
+ """str: File name used to persist resolved CLI defaults within ``.data``."""
50
+
51
+
52
+ def _configuration_path(root: Path) -> Path:
53
+ """Return the absolute path to the persisted CLI configuration file.
54
+
55
+ Parameters
56
+ ----------
57
+ root:
58
+ Knowledge base root directory that owns the ``.data`` folder where
59
+ configuration metadata is stored.
60
+ """
61
+
62
+ return root / DATA_FOLDER_NAME / CONFIG_FILENAME
63
+
64
+
65
+ def load_runtime_configuration(root: Path) -> dict[str, Any]:
66
+ """Load the previously persisted CLI configuration snapshot.
67
+
68
+ The function returns an empty dictionary when no configuration file exists
69
+ or when the file cannot be decoded as JSON. Invalid files are logged at the
70
+ DEBUG level so that operators can inspect issues while keeping the CLI
71
+ output quiet by default.
72
+ """
73
+
74
+ config_path = _configuration_path(root)
75
+ if not config_path.exists():
76
+ return {}
77
+
78
+ try:
79
+ contents = config_path.read_text(encoding="utf-8")
80
+ except OSError as exc: # pragma: no cover - filesystem edge cases
81
+ logger.debug("Failed to read CLI configuration at %s: %s", config_path, exc)
82
+ return {}
83
+
84
+ try:
85
+ payload = json.loads(contents)
86
+ except json.JSONDecodeError as exc:
87
+ logger.debug("Invalid CLI configuration JSON at %s: %s", config_path, exc)
88
+ return {}
89
+
90
+ if not isinstance(payload, dict):
91
+ logger.debug(
92
+ "Ignoring CLI configuration at %s because the payload is not a mapping",
93
+ config_path,
94
+ )
95
+ return {}
96
+
97
+ return payload
98
+
99
+
100
+ def persist_runtime_configuration(root: Path, configuration: Mapping[str, Any]) -> Path:
101
+ """Persist ``configuration`` into the knowledge base data directory.
102
+
103
+ Parameters
104
+ ----------
105
+ root:
106
+ Knowledge base root directory that owns the ``.data`` folder.
107
+ configuration:
108
+ Final configuration mapping produced by
109
+ :func:`apply_cli_runtime_configuration`.
110
+
111
+ Returns
112
+ -------
113
+ Path
114
+ The absolute path to the written configuration file.
115
+ """
116
+
117
+ config_path = _configuration_path(root)
118
+ config_path.parent.mkdir(parents=True, exist_ok=True)
119
+ # Sorting keys makes the file diff-friendly and easier to inspect manually.
120
+ serialized = json.dumps(configuration, indent=2, sort_keys=True)
121
+ config_path.write_text(serialized + "\n", encoding="utf-8")
122
+ return config_path
123
+
124
+
125
+ def _identity(value: Any) -> Any:
126
+ """Return ``value`` unchanged.
127
+
128
+ The helper keeps ``OptionSpec`` declarations concise; it is intentionally
129
+ defined at module scope so it can be referenced multiple times without
130
+ allocating additional callables.
131
+ """
132
+
133
+ return value
134
+
135
+
136
+ def _normalize_optional_int(value: Any) -> int | None:
137
+ """Convert ``value`` into an optional integer.
138
+
139
+ ``None`` and empty strings remain ``None``. Numeric strings are coerced using
140
+ :class:`int`, and floats are truncated. Any other type raises ``TypeError`` so
141
+ that misconfigured persisted values become obvious during testing.
142
+ """
143
+
144
+ if value is None or value == "":
145
+ return None
146
+ if isinstance(value, int):
147
+ return value
148
+ if isinstance(value, float):
149
+ return int(value)
150
+ if isinstance(value, str):
151
+ return int(value)
152
+ raise TypeError(f"Expected optional int-compatible value, received {type(value)!r}")
153
+
154
+
155
+ def _normalize_lower_str(value: Any) -> str | None:
156
+ """Normalize ``value`` to a lowercase string when possible."""
157
+
158
+ if value is None:
159
+ return None
160
+ if isinstance(value, str):
161
+ return value.lower()
162
+ return str(value).lower()
163
+
164
+
165
+ def _normalize_transports(value: Any) -> list[str] | None:
166
+ """Ensure transport selections are serializable lists of strings."""
167
+
168
+ if value is None:
169
+ return None
170
+ if isinstance(value, str):
171
+ return [value]
172
+ if isinstance(value, (list, tuple, set)):
173
+ return [str(item) for item in value]
174
+ raise TypeError(f"Unsupported transports value: {value!r}")
175
+
176
+
177
+ def _normalize_bool(value: Any) -> bool:
178
+ """Convert ``value`` into a boolean using :func:`parse_bool` semantics."""
179
+
180
+ if isinstance(value, bool):
181
+ return value
182
+ if value is None:
183
+ return False
184
+ if isinstance(value, str):
185
+ return parse_bool(value)
186
+ return bool(value)
187
+
188
+
189
+ def _parse_optional_int(value: str) -> int | None:
190
+ """Parse ``value`` from the environment into an optional integer."""
191
+
192
+ stripped = value.strip()
193
+ if not stripped:
194
+ return None
195
+ return int(stripped)
196
+
197
+
198
+ @dataclass(frozen=True)
199
+ class OptionSpec:
200
+ """Specification describing how to resolve a single CLI option.
201
+
202
+ Attributes
203
+ ----------
204
+ name:
205
+ Attribute name on the :class:`argparse.Namespace` produced by the CLI
206
+ parser.
207
+ env_var:
208
+ Optional environment variable that should be considered when the CLI
209
+ did not provide a value. ``None`` skips environment lookup.
210
+ default:
211
+ Fallback value used when neither CLI flags, environment variables, nor
212
+ persisted configuration provide a value. This can be a raw value or a
213
+ zero-argument callable that produces the value on demand.
214
+ env_parser:
215
+ Optional callable that converts the raw environment string into the
216
+ expected type before normalisation.
217
+ normalizer:
218
+ Callable that converts the CLI/environment/persisted value into the
219
+ final, type-stable representation.
220
+ """
221
+
222
+ name: str
223
+ env_var: str | None = None
224
+ default: Any | Callable[[], Any] = None
225
+ env_parser: Callable[[str], Any] | None = None
226
+ normalizer: Callable[[Any], Any] = field(default=_identity)
227
+
228
+
229
+ def _evaluate_default(default: Any | Callable[[], Any]) -> Any:
230
+ """Return the default value, invoking callables when necessary."""
231
+
232
+ if callable(default): # ``bool`` defaults are handled by ``normalizer``
233
+ return default()
234
+ return default
235
+
236
+
237
+ OPTION_SPECS: Sequence[OptionSpec] = (
238
+ OptionSpec("host", default=None),
239
+ OptionSpec("port", default=None, normalizer=_normalize_optional_int),
240
+ OptionSpec("transports", default=None, normalizer=_normalize_transports),
241
+ OptionSpec("ui_port", default=None, normalizer=_normalize_optional_int),
242
+ OptionSpec(
243
+ "chroma_client",
244
+ env_var="MCP_KB_CHROMA_CLIENT",
245
+ default="persistent",
246
+ env_parser=lambda value: value.lower(),
247
+ normalizer=_normalize_lower_str,
248
+ ),
249
+ OptionSpec(
250
+ "chroma_collection",
251
+ env_var="MCP_KB_CHROMA_COLLECTION",
252
+ default="knowledge-base",
253
+ ),
254
+ OptionSpec(
255
+ "chroma_embedding",
256
+ env_var="MCP_KB_CHROMA_EMBEDDING",
257
+ default="default",
258
+ ),
259
+ OptionSpec(
260
+ "chroma_data_dir",
261
+ env_var="MCP_KB_CHROMA_DATA_DIR",
262
+ default=None,
263
+ ),
264
+ OptionSpec(
265
+ "chroma_host",
266
+ env_var="MCP_KB_CHROMA_HOST",
267
+ default=None,
268
+ ),
269
+ OptionSpec(
270
+ "chroma_port",
271
+ env_var="MCP_KB_CHROMA_PORT",
272
+ default=None,
273
+ env_parser=_parse_optional_int,
274
+ normalizer=_normalize_optional_int,
275
+ ),
276
+ OptionSpec(
277
+ "chroma_ssl",
278
+ env_var="MCP_KB_CHROMA_SSL",
279
+ default=True,
280
+ env_parser=parse_bool,
281
+ normalizer=_normalize_bool,
282
+ ),
283
+ OptionSpec(
284
+ "chroma_tenant",
285
+ env_var="MCP_KB_CHROMA_TENANT",
286
+ default=None,
287
+ ),
288
+ OptionSpec(
289
+ "chroma_database",
290
+ env_var="MCP_KB_CHROMA_DATABASE",
291
+ default=None,
292
+ ),
293
+ OptionSpec(
294
+ "chroma_api_key",
295
+ env_var="MCP_KB_CHROMA_API_KEY",
296
+ default=None,
297
+ ),
298
+ OptionSpec(
299
+ "chroma_custom_auth",
300
+ env_var="MCP_KB_CHROMA_CUSTOM_AUTH",
301
+ default=None,
302
+ ),
303
+ OptionSpec(
304
+ "chroma_id_prefix",
305
+ env_var="MCP_KB_CHROMA_ID_PREFIX",
306
+ default=None,
307
+ ),
308
+ OptionSpec(
309
+ "chroma_sentence_transformer",
310
+ env_var="MCP_KB_CHROMA_SENTENCE_TRANSFORMER",
311
+ default=None,
312
+ ),
313
+ OptionSpec(
314
+ "chroma_chunk_size",
315
+ env_var="MCP_KB_CHROMA_CHUNK_SIZE",
316
+ default=200,
317
+ ),
318
+ OptionSpec(
319
+ "chroma_chunk_overlap",
320
+ env_var="MCP_KB_CHROMA_CHUNK_OVERLAP",
321
+ default=20,
322
+ ),
323
+ )
324
+
325
+
326
+ def _resolve_option_value(
327
+ namespace: Namespace,
328
+ spec: OptionSpec,
329
+ persisted: Mapping[str, Any],
330
+ environ: Mapping[str, str],
331
+ ) -> Any:
332
+ """Resolve a single option using CLI, env, and persisted configuration."""
333
+
334
+ if hasattr(namespace, spec.name):
335
+ raw = getattr(namespace, spec.name)
336
+ value = spec.normalizer(raw)
337
+ setattr(namespace, spec.name, value)
338
+ return value
339
+
340
+ if spec.env_var:
341
+ env_raw = environ.get(spec.env_var)
342
+ if env_raw is not None:
343
+ parsed = spec.env_parser(env_raw) if spec.env_parser else env_raw
344
+ value = spec.normalizer(parsed)
345
+ setattr(namespace, spec.name, value)
346
+ return value
347
+
348
+ if spec.name in persisted:
349
+ stored = spec.normalizer(persisted[spec.name])
350
+ setattr(namespace, spec.name, stored)
351
+ return stored
352
+
353
+ fallback = spec.normalizer(_evaluate_default(spec.default))
354
+ setattr(namespace, spec.name, fallback)
355
+ return fallback
356
+
357
+
358
+ def _resolve_no_ui(
359
+ namespace: Namespace,
360
+ persisted: Mapping[str, Any],
361
+ environ: Mapping[str, str],
362
+ ) -> bool:
363
+ """Resolve the ``--no-ui`` flag with persisted fallback semantics."""
364
+
365
+ if getattr(namespace, "no_ui", False):
366
+ return True
367
+
368
+ env_value = environ.get("MCP_KB_NO_UI")
369
+ if env_value is not None:
370
+ return parse_bool(env_value)
371
+
372
+ stored = persisted.get("no_ui")
373
+ if stored is None:
374
+ return False
375
+ if isinstance(stored, bool):
376
+ return stored
377
+ if isinstance(stored, str):
378
+ return parse_bool(stored)
379
+ return bool(stored)
380
+
381
+
382
+ def apply_cli_runtime_configuration(
383
+ namespace: Namespace,
384
+ *,
385
+ root: Path,
386
+ persisted: Mapping[str, Any] | None = None,
387
+ environ: Mapping[str, str] | None = None,
388
+ ) -> dict[str, Any]:
389
+ """Populate ``namespace`` with resolved CLI options and return the mapping.
390
+
391
+ Parameters
392
+ ----------
393
+ namespace:
394
+ Parsed CLI arguments as produced by a shared :mod:`argparse` parser.
395
+ root:
396
+ Knowledge base root path. The value is not used directly during
397
+ resolution but is included to make the signature self-documenting and
398
+ accommodate future enhancements that may require the path.
399
+ persisted:
400
+ Previously persisted configuration mapping. ``None`` is treated as an
401
+ empty mapping.
402
+ environ:
403
+ Mapping interface used to look up environment variables. Defaults to
404
+ :data:`os.environ` for production usage and can be overridden by tests
405
+ to exercise precedence rules deterministically.
406
+ """
407
+
408
+ del root # currently unused but retained for signature symmetry
409
+ persisted = dict(persisted or {})
410
+ environ = environ or os.environ
411
+
412
+ resolved: MutableMapping[str, Any] = {}
413
+ for spec in OPTION_SPECS:
414
+ resolved_value = _resolve_option_value(namespace, spec, persisted, environ)
415
+ resolved[spec.name] = resolved_value
416
+
417
+ no_ui_value = _resolve_no_ui(namespace, persisted, environ)
418
+ namespace.no_ui = no_ui_value
419
+ resolved["no_ui"] = no_ui_value
420
+
421
+ return dict(resolved)
@@ -0,0 +1,151 @@
1
+ # LLM Operating Manual — MCP Knowledge Base (`mcp-kb`)
2
+
3
+ You are connected to a **local, text-only knowledge base**. Your job is to **search, read, create, update, and soft-delete** UTF‑8 text files under a single root directory while respecting safety rules below. Use the provided MCP tools exactly as specified.
4
+
5
+ ---
6
+
7
+ ## Ground Rules (enforced by the server)
8
+
9
+ - **Paths are relative only.** Absolute paths are rejected. No `..` traversal.
10
+ - **Protected folder:** `.data/` is read‑only. Do not write there.
11
+ - **Soft delete sentinel:** Files marked with `_DELETE_` in the name are considered deleted. Do not read/write them.
12
+ - **Text files only.** Binary-ish files are ignored by scans. Treat this KB as UTF‑8 text storage.
13
+ - **Concurrency:** Writes are serialized per file; still prefer read‑verify‑write sequences.
14
+
15
+ Constants (baked into the server):
16
+ - Protected folder: `.data`
17
+ - Documentation file name: `KNOWLEDBASE_DOC.md`
18
+ - Delete sentinel: `_DELETE_`
19
+
20
+ ---
21
+
22
+ ## Tools You Can Call
23
+
24
+ All tool names and parameter contracts are stable. Stick to these shapes.
25
+
26
+ ### `create_file(path: str, content: str) -> str`
27
+ - Create or **overwrite** a text file at `path` with `content`.
28
+ - `path` must be **relative** and **outside** `.data/`.
29
+
30
+ ### `read_file(path: str, start_line?: int, end_line?: int) -> { path, start_line, end_line, content }`
31
+ - Read full file or a 1‑based inclusive slice.
32
+ - If both bounds omitted ⇒ full file. If one bound omitted ⇒ server fills it.
33
+
34
+ ### `append_file(path: str, content: str) -> str`
35
+ - Append text. If file is missing, it will be **created**.
36
+
37
+ ### `regex_replace(path: str, pattern: str, replacement: str) -> { replacements: int }`
38
+ - Multiline regex (`re.MULTILINE`). Returns count. Always `read_file` afterwards to verify.
39
+
40
+ ### `delete(path: str) -> str`
41
+ - **Soft delete**: renames `name.ext` to `name_DELETE_.ext`. Use when content is obsolete.
42
+
43
+ ### `search(query: str, limit: int = 5) -> [{ path, line, context: string[] }]`
44
+ - Returns up to `limit` matches with short context.
45
+ - If Chroma mirroring is active, results are **semantic** first; otherwise plain scan.
46
+ - `limit` must be **> 0**.
47
+
48
+ ### `overview() -> str`
49
+ - A deterministic `tree`-like view of active files under root (skips deleted and binaries). Use this only if necessary, and dont try to find a matching file by its title. Search is always the first preference.
50
+
51
+ ### `documentation() -> str`
52
+ - Returns this manual.
53
+
54
+ ---
55
+
56
+ ## How to Work Effectively
57
+
58
+ ### 1) Discover
59
+ - Call `overview()` to understand the tree.
60
+ - Use this for overview only and where to place knowledge, dont find concent by file titles unless normal search is not fruitfull.
61
+
62
+ ### 2) Locate Content
63
+ - Prefer `search("keywords", limit=5)` to find candidate files/snippets.
64
+ - Examine each `{path, line, context}`. The `context` is a short window around the hit.
65
+ - If results look thin, **increase `limit`** (e.g., 10–20) before broadening the query.
66
+
67
+ ### 3) Read Precisely
68
+ - Use `read_file(path)` for the full file when structure matters.
69
+ - If the file is large but you know the region, use `read_file(path, start_line, end_line)` to minimize tokens.
70
+
71
+ ### 4) Create New Knowledge
72
+ - Pick a **descriptive relative path** (folders based on topic, kebab‑case names).
73
+ - Example: `architecture/decision-records/adr-2025-10-06-edge-cache.md`
74
+ - Call `create_file(path, content)`.
75
+ - Keep the **title as the first Markdown heading** so search has context.
76
+ - Link related files with **relative Markdown links**.
77
+
78
+ ### 5) Update Safely
79
+ - For small edits:
80
+ 1) `read_file(...)` to confirm current state.
81
+ 2) `regex_replace(path, pattern, replacement)` for targeted changes.
82
+ 3) `read_file(...)` again to verify.
83
+ - For additive changes: `append_file(path, "\n...")`.
84
+
85
+ ### 6) Deletion Policy
86
+ - Use `delete(path)` to **soft-delete**. Do not operate on files that already include `_DELETE_` in their name.
87
+
88
+ ---
89
+
90
+ ## Search Semantics (important)
91
+
92
+ - When Chroma ingestion is **enabled**, `search()` uses semantic ranking first and returns the **best slice per file** (the ingestor extracts one representative match per document chunk/file). If no obvious line match is found, you may get a **top-of-file preview** — then call `read_file()` to confirm.
93
+ - When Chroma is **not** enabled, `search()` scans files literally and returns all matches up to `limit`.
94
+ - Always **validate** by fetching the file segment with `read_file()` before making edits.
95
+
96
+ ---
97
+
98
+ ## Parameter Contracts and Gotchas
99
+
100
+ - `path` must be **relative** (e.g., `notes/today.md`). Absolute paths are rejected.
101
+ - Do **not** write into `.data/` (protected). Reads are allowed there.
102
+ - Line numbers in `read_file` are **1‑based** and the interval is **inclusive**.
103
+ - `regex_replace` uses Python’s `re.MULTILINE`. Validate your pattern; avoid overly broad substitutions.
104
+ - `append_file` will create a file if missing (useful for logs/progress notes).
105
+
106
+ ---
107
+
108
+ ## Typical Recipes
109
+
110
+ **Find → Read → Edit**
111
+ 1. `search("beta feature toggle", limit=10)`
112
+ 2. Pick a result: `read_file("features/toggles.md", 40, 80)`
113
+ 3. Adjust: `regex_replace("features/toggles.md", "^Status:.*$", "Status: Enabled")`
114
+ 4. Verify: `read_file("features/toggles.md")` (check the `Status:` header)
115
+
116
+ **Add a new doc**
117
+ 1. `create_file("ops/runbooks/cache-invalidation.md", "# Cache Invalidation\n\n…")`
118
+ 2. Optionally link it from an index: `append_file("ops/README.md", "\n- [Cache Invalidation](runbooks/cache-invalidation.md)")`
119
+
120
+ **Soft delete an obsolete note**
121
+ 1. `delete("notes/old-incident.md")`
122
+
123
+ ---
124
+
125
+ ## Error Recovery
126
+
127
+ - **"Absolute paths are not permitted"** → Use a **relative** path.
128
+ - **"Writes are not allowed inside the protected folder '.data'"** → Choose a different folder (e.g., `docs/`).
129
+ - **"File 'X' does not exist"** on delete → Confirm with `overview()` or `search()`. Only existing non‑deleted files can be soft‑deleted.
130
+ - **No search hits** → Widen keywords, increase `limit`, or pivot to `overview()` to eyeball likely locations.
131
+
132
+ ---
133
+
134
+ ## Things You Should Not Do
135
+
136
+ - Do not fabricate file contents or paths. Always confirm with `overview()`, `search()`, and `read_file()`.
137
+ - Do not operate on files that include `_DELETE_` in their name.
138
+ - Do not attempt to talk directly to Chroma; you only use `search()`. Indexing is handled automatically after writes.
139
+ - Do not write binary or non‑UTF‑8 content.
140
+
141
+ ---
142
+
143
+ ## Performance Hints
144
+
145
+ - Prefer `search()` + targeted `read_file()` slices over reading entire large files.
146
+ - Keep `limit` modest (5–10) unless you must broaden the search.
147
+ - Batch edits in one file using a single `regex_replace` when safe (then verify).
148
+
149
+ ---
150
+
151
+ You now have the minimal contract to operate this KB safely and efficiently.
@@ -0,0 +1 @@
1
+ """Embedded data files shipped with the MCP knowledge base server."""
@@ -0,0 +1 @@
1
+ """Pluggable ingestion adapters for synchronizing knowledge base content."""