exomem 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. exomem-0.2.1.dist-info/METADATA +379 -0
  2. exomem-0.2.1.dist-info/RECORD +88 -0
  3. exomem-0.2.1.dist-info/WHEEL +4 -0
  4. exomem-0.2.1.dist-info/entry_points.txt +3 -0
  5. exomem-0.2.1.dist-info/licenses/LICENSE +661 -0
  6. kb_mcp/__init__.py +1 -0
  7. kb_mcp/__main__.py +559 -0
  8. kb_mcp/_hooks/kb-capture-nudge.sh +11 -0
  9. kb_mcp/_hooks/kb-retrieve-nudge.sh +11 -0
  10. kb_mcp/_hooks/kb_capture_nudge.py +189 -0
  11. kb_mcp/_hooks/kb_retrieve_nudge.py +119 -0
  12. kb_mcp/_scaffold/Entities/index.md +16 -0
  13. kb_mcp/_scaffold/Notes/index.md +25 -0
  14. kb_mcp/_scaffold/Sources/index.md +13 -0
  15. kb_mcp/_scaffold/_Schema/SKILL.md +584 -0
  16. kb_mcp/_scaffold/_Schema/project-keys.yaml +34 -0
  17. kb_mcp/_scaffold/_Schema/references/audit-checks.md +21 -0
  18. kb_mcp/_scaffold/_Schema/references/frontmatter.md +197 -0
  19. kb_mcp/_scaffold/_Schema/references/operations.md +333 -0
  20. kb_mcp/_scaffold/_Schema/references/page-types.md +498 -0
  21. kb_mcp/_scaffold/_Schema/references/supersession.md +117 -0
  22. kb_mcp/_scaffold/_Schema/references/write-scope.md +116 -0
  23. kb_mcp/_scaffold/index.md +21 -0
  24. kb_mcp/_scaffold/log.md +7 -0
  25. kb_mcp/access.py +141 -0
  26. kb_mcp/add.py +353 -0
  27. kb_mcp/append_to_file.py +137 -0
  28. kb_mcp/attention.py +243 -0
  29. kb_mcp/audit.py +1574 -0
  30. kb_mcp/audit_fix.py +435 -0
  31. kb_mcp/backfill.py +182 -0
  32. kb_mcp/bm25.py +207 -0
  33. kb_mcp/cf_access.py +54 -0
  34. kb_mcp/cli_ops.py +225 -0
  35. kb_mcp/commands.py +2106 -0
  36. kb_mcp/compile_proposal.py +151 -0
  37. kb_mcp/context_pack.py +386 -0
  38. kb_mcp/corpus_aware.py +386 -0
  39. kb_mcp/create_directory.py +123 -0
  40. kb_mcp/create_file.py +175 -0
  41. kb_mcp/delete_directory.py +285 -0
  42. kb_mcp/delete_file.py +319 -0
  43. kb_mcp/doctor.py +392 -0
  44. kb_mcp/edit.py +717 -0
  45. kb_mcp/embeddings.py +889 -0
  46. kb_mcp/enroll_speaker.py +73 -0
  47. kb_mcp/eval_metrics.py +57 -0
  48. kb_mcp/evolution.py +250 -0
  49. kb_mcp/extract.py +812 -0
  50. kb_mcp/file_watcher.py +204 -0
  51. kb_mcp/find.py +1812 -0
  52. kb_mcp/fusion.py +58 -0
  53. kb_mcp/get_frontmatter.py +67 -0
  54. kb_mcp/get_page.py +129 -0
  55. kb_mcp/guards.py +79 -0
  56. kb_mcp/icon.svg +4 -0
  57. kb_mcp/image_tags.py +175 -0
  58. kb_mcp/indexes.py +675 -0
  59. kb_mcp/init.py +66 -0
  60. kb_mcp/install_hook.py +140 -0
  61. kb_mcp/install_skill.py +105 -0
  62. kb_mcp/link.py +502 -0
  63. kb_mcp/list_directory.py +144 -0
  64. kb_mcp/list_inbound_links.py +76 -0
  65. kb_mcp/list_trash.py +162 -0
  66. kb_mcp/logging_config.py +36 -0
  67. kb_mcp/media_worker.py +242 -0
  68. kb_mcp/move_file.py +309 -0
  69. kb_mcp/multi_edit.py +166 -0
  70. kb_mcp/note.py +966 -0
  71. kb_mcp/preserve.py +634 -0
  72. kb_mcp/project_keys.py +303 -0
  73. kb_mcp/provenance.py +124 -0
  74. kb_mcp/query_data.py +530 -0
  75. kb_mcp/query_log.py +152 -0
  76. kb_mcp/reconcile.py +123 -0
  77. kb_mcp/recover_from_trash.py +205 -0
  78. kb_mcp/replace.py +354 -0
  79. kb_mcp/schema.py +199 -0
  80. kb_mcp/server.py +974 -0
  81. kb_mcp/set_frontmatter_field.py +255 -0
  82. kb_mcp/set_take.py +168 -0
  83. kb_mcp/speaker_assignment.py +51 -0
  84. kb_mcp/speaker_attribution.py +203 -0
  85. kb_mcp/upload_tokens.py +84 -0
  86. kb_mcp/vault.py +947 -0
  87. kb_mcp/voice_embed.py +174 -0
  88. kb_mcp/voice_profiles.py +124 -0
@@ -0,0 +1,379 @@
1
+ Metadata-Version: 2.4
2
+ Name: exomem
3
+ Version: 0.2.1
4
+ Summary: Local knowledge substrate for owned markdown/Obsidian vaults, exposed through MCP, REST, and CLI with multimodal OCR/ASR/CLIP search
5
+ Author-email: Hugo Ander <founder@substratesystems.io>
6
+ License: AGPL-3.0-or-later
7
+ License-File: LICENSE
8
+ Requires-Python: >=3.11
9
+ Requires-Dist: fastmcp>=2.10
10
+ Requires-Dist: numpy>=1.26
11
+ Requires-Dist: pyjwt[crypto]>=2.8
12
+ Requires-Dist: python-dotenv>=1.0
13
+ Requires-Dist: python-multipart>=0.0.9
14
+ Requires-Dist: python-slugify>=8.0
15
+ Requires-Dist: pyyaml>=6.0
16
+ Requires-Dist: rank-bm25>=0.2.2
17
+ Requires-Dist: snowballstemmer>=2.2
18
+ Requires-Dist: watchdog>=4.0
19
+ Provides-Extra: diarization
20
+ Requires-Dist: speechbrain>=1.0; extra == 'diarization'
21
+ Provides-Extra: embeddings
22
+ Requires-Dist: pillow>=10.0; extra == 'embeddings'
23
+ Requires-Dist: sentence-transformers>=2.7; extra == 'embeddings'
24
+ Requires-Dist: torch>=2.12; extra == 'embeddings'
25
+ Provides-Extra: media
26
+ Requires-Dist: faster-whisper>=1.0; extra == 'media'
27
+ Requires-Dist: markitdown[docx,pptx,xlsx]>=0.1.1; extra == 'media'
28
+ Requires-Dist: nvidia-cublas-cu12; (sys_platform == 'win32') and extra == 'media'
29
+ Requires-Dist: nvidia-cuda-runtime-cu12; (sys_platform == 'win32') and extra == 'media'
30
+ Requires-Dist: nvidia-cudnn-cu12; (sys_platform == 'win32') and extra == 'media'
31
+ Requires-Dist: pillow>=10.0; extra == 'media'
32
+ Requires-Dist: pymupdf>=1.24; extra == 'media'
33
+ Requires-Dist: pytesseract>=0.3.10; extra == 'media'
34
+ Provides-Extra: vision
35
+ Requires-Dist: transformers>=4.40; extra == 'vision'
36
+ Description-Content-Type: text/markdown
37
+
38
+ # exomem
39
+
40
+ An MCP server that makes your Obsidian / markdown vault searchable — text, PDFs,
41
+ Office docs, images, and audio — from inside any MCP client (Claude, Cursor, …).
42
+ Self-hosted; your files stay yours.
43
+
44
+ ## Why exomem
45
+
46
+ - **Meets you where you work.** exomem is an MCP *server*: your KB shows up as
47
+ native tools inside Claude, Cursor, or any MCP client — desktop and mobile. You
48
+ don't move into a new app; the KB comes to the agent you already use.
49
+ - **In place, not a silo.** It reads and writes your actual markdown files. They
50
+ stay plain, portable, yours — editable in Obsidian, versioned/backed-up however
51
+ you like. Most note-AI tools import *copies* into their own store; exomem
52
+ operates on the originals.
53
+ - **Multimodal, not just text.** Beyond markdown it extracts and searches PDFs,
54
+ Office docs (docx/xlsx/pptx), images (OCR + CLIP visual search), and audio/video
55
+ (speech-to-text) — so a photo, a scanned invoice, or a recording is findable.
56
+ (Office/audio extraction is common; the distinctive combination is multimodal +
57
+ MCP-native + over your live vault, plus CLIP *visual* retrieval.)
58
+ - **Real retrieval, not naive RAG.** Hybrid BM25 + vector fused via
59
+ reciprocal-rank-fusion, plus wikilink-graph signals and type-aware ranking, over
60
+ a *typed* corpus (raw sources vs compiled notes), with provenance and
61
+ write-governance.
62
+ - **Substrate, not a brain.** The server only does deterministic work (search,
63
+ extract, embed); reasoning happens in your client's model. No server-side LLM,
64
+ no proprietary cloud backend.
65
+
66
+ ## How it compares
67
+
68
+ - **vs. doc-chat / RAG apps:** they ingest copies into their own store and you
69
+ work inside their UI; exomem works in place over your live vault, inside your
70
+ existing agent.
71
+ - **vs. other MCP note servers:** most are text-only search/CRUD; exomem adds
72
+ multimodal extraction + CLIP visual search + a typed/governed knowledge model.
73
+
74
+ For a deeper point-in-time comparison with engraph, see
75
+ **[docs/comparison-engraph.md](docs/comparison-engraph.md)**.
76
+
77
+ ## 5-minute proof
78
+
79
+ Run exomem against the bundled sample vault before connecting your own notes:
80
+
81
+ ```bash
82
+ git clone <repo-url> exomem && cd exomem
83
+ uv sync
84
+ uv run python scripts/demo-sample-vault.py
85
+ ```
86
+
87
+ Expected shape:
88
+
89
+ ```text
90
+ exomem sample-vault demo
91
+ vault: examples/sample-vault
92
+
93
+ 1. doctor: PASS (lean profile)
94
+ 2. find "retrieval":
95
+ - Knowledge Base/Sources/Sessions/2026-06-30-sample-session.md
96
+ - Knowledge Base/Notes/Insights/retrieval-needs-owned-files.md
97
+ 3. get retrieval insight:
98
+ - title: Retrieval needs owned files
99
+ - type: insight
100
+ - excerpt: Local-first knowledge tools should retrieve from files the user already owns.
101
+ 4. audit: PASS (broken_wikilink, unprocessed_source)
102
+
103
+ demo PASS
104
+ ```
105
+
106
+ ## Quickstart (local)
107
+
108
+ The fastest path is **local, inside Claude Code, over your own vault — no cloud,
109
+ no OAuth, ~20 minutes**:
110
+
111
+ ```bash
112
+ uv sync # lean: keyword/BM25 search, no heavy deps
113
+ uv run python scripts/smoke-sample-vault.py
114
+ uv run python -m kb_mcp init --vault "/path/to/your/Obsidian"
115
+ uv run python -m kb_mcp doctor --vault "/path/to/your/Obsidian"
116
+ claude mcp add exomem --env KB_MCP_VAULT_PATH="/path/to/your/Obsidian" \
117
+ --env KB_MCP_DISABLE_EMBEDDINGS=1 -- \
118
+ uv --directory "$PWD" run python -m kb_mcp --transport stdio
119
+ uv run python -m kb_mcp install-skill # the "brain" — don't skip this
120
+ ```
121
+
122
+ **[SETUP-LOCAL.md](SETUP-LOCAL.md)** walks the local path end to end (vault
123
+ bootstrap, hybrid-vs-lean choice, the skill, and the optional auto-capture hooks).
124
+ For remote / mobile access, start with the
125
+ **[remote checklist](docs/remote-checklist.md)**, then use
126
+ **[docs/deployment.md](docs/deployment.md)** for the full walkthrough.
127
+
128
+ ## Tools
129
+
130
+ Two tiers. Tier 1 is type-routed and encodes the KB discipline; Tier 2 is a
131
+ filesystem escape hatch for what Tier 1 can't express.
132
+
133
+ **Tier 1 — type-routed (primary).** Use these whenever a Tier 1 op fits.
134
+
135
+ - `find` — read-only search across `Knowledge Base/`, type/project/tag filtered.
136
+ - `get` — read a full file anywhere under the vault root (including read-only
137
+ curated input folders). `frontmatter_only=true` returns just the frontmatter.
138
+ - `add` — capture a raw `source` page with full write discipline.
139
+ - `note` — create any of the six compiled page types (research-note, insight,
140
+ failure, pattern, experiment, production-log) with `ingested_into:` back-refs on
141
+ cited sources.
142
+ - `link` — create a typed entity under `Entities/<Type>/<Name>.md` (person,
143
+ concept, library, decision).
144
+ - `edit` — in-place edit of a compiled page. Modes: body / tags / surgical
145
+ `old_string`→`new_string`; `edits=[…]` (batch surgical); `row_key`+`take` (fill a
146
+ `[take: ]` opinion row); `field`+`value` (patch one frontmatter field). Bumps
147
+ `updated:`.
148
+ - `replace` — supersession: write a new page + flip the old one to
149
+ `status: superseded` with a `superseded_by:` back-link. The modify path for
150
+ substantial rewrites.
151
+ - `preserve` — capture a binary or text artifact to `Evidence/<scope>/<category>/`
152
+ (append-only).
153
+ - `audit` — read-only graph health check (broken wikilinks, orphan entities,
154
+ unprocessed sources, index/log drift, tag inconsistency).
155
+
156
+ **Tier 2 — filesystem-parity (escape hatches).** Use when Tier 1 can't express
157
+ what you need: new folder structures, files outside the typed-note set, or
158
+ surgical edits.
159
+
160
+ > **Lean surface (`KB_MCP_DISABLE_TIER2`).** Set `KB_MCP_DISABLE_TIER2=1` (in
161
+ > `.env` or the service environment) to drop all 8 Tier 2 tools from registration;
162
+ > the Tier 1 ops still load. Use it when the client *defers* MCP tools behind a
163
+ > keyword search — a smaller surface means an agent reaches `find`/`get`/`note`
164
+ > without wading past a dozen escape hatches. Default is unset: all tools register.
165
+
166
+ - `create_file` — write a file at an arbitrary vault path, optional frontmatter
167
+ dict. `kind="dir"` instead makes a folder (mkdir -p). Refuses Sources/Evidence;
168
+ curated trees require `allow_curated=true`.
169
+ - `list_directory` — list files + subfolders (recursive optional). Surfaces the
170
+ `type:` frontmatter field for `.md` entries. Read-only.
171
+ - `move_file` — rename/relocate. Rewrites inbound wikilinks by default.
172
+ - `delete` — **trash** a file OR folder (auto-detected). Moves to
173
+ `Knowledge Base/_trash/YYYY-MM-DD/` with a `.meta.json` sidecar; never permanent.
174
+ Recovery is `recover_from_trash`. Requires `confirm=true`; folders need
175
+ `recursive=true` if non-empty; refuses on inbound links unless
176
+ `force_orphan=true`.
177
+ - `list_trash` — enumerate recoverable trash entries (original path, timestamp,
178
+ force-flags used). Also surfaces drift. Read-only.
179
+ - `recover_from_trash` — undo a delete; reads the sidecar to find the original
180
+ location. Optional `restore_path` override.
181
+ - `append_to_file` — append text. Refuses on Sources/.
182
+ - `list_inbound_links` — find all files whose wikilinks resolve to a target.
183
+ Read-only. Useful before move/delete.
184
+
185
+ **Discipline preserved across both tiers:** Sources/ and Evidence/ are
186
+ append-only (no Tier 2 op writes there); curated input folders (configurable)
187
+ refuse Tier 2 writes by default — pass `allow_curated=true` as a deliberate
188
+ per-call acknowledgement; deletes are never permanent (`delete` trashes,
189
+ recoverable via `recover_from_trash`); every write logs to
190
+ `Knowledge Base/log.md`.
191
+
192
+ **Two-layer traceability:**
193
+
194
+ - `Knowledge Base/log.md` — durable content history. Writes only, KB-scoped. The
195
+ "what happened to the vault" record; never auto-purged.
196
+ - `logs/exomem.log` — service log. Every call (reads + writes) is surfaced via a
197
+ per-call middleware as `tool=<name> duration_ms=<n>
198
+ event=tool_success|tool_error`. The operational layer (did the call reach the
199
+ server, spot slow ops). Rotated in-process (5 MB × 5) — same on every platform.
200
+
201
+ ## One surface, three doors (MCP / REST / CLI)
202
+
203
+ Every operation is declared **once** in a command registry (`src/kb_mcp/commands.py`).
204
+ That single declaration drives all of:
205
+
206
+ - the **MCP tool** Claude calls (`find`, `note`, …),
207
+ - a **REST** route `POST /api/<name>` (the personal HTTP facade), and
208
+ - a **CLI** subcommand `kb <name>` (reads *and* writes, from a terminal or script).
209
+
210
+ Adding an operation is one registry entry — the surfaces can't drift. A
211
+ byte-identical schema-fidelity test pins the MCP tools so what Claude sees never
212
+ changes when the registry evolves.
213
+
214
+ **CLI (`exomem` / `kb`).** Installing the package adds console scripts; `exomem`
215
+ is the public command and `kb` is the short daily-driver alias.
216
+ `python -m kb_mcp` works too from source checkouts.
217
+ Verb-first, with a global `--json` envelope and `0`/`1`/`2` exit codes (success /
218
+ operation error / usage error):
219
+
220
+ ```bash
221
+ kb find "carbonation rig" --mode keyword # human listing (path title)
222
+ kb find "carbonation rig" --json # {"success": true, "data": [ … ]}
223
+ kb get "Notes/Insights/some-note" --json
224
+ kb note --note-type insight --title "…" --content "# …" # writes to the vault
225
+ # note's type-specific args use a --field escape so the CLI stays clean:
226
+ kb note --note-type research-note --title "…" --content "# …" --field project=my-project
227
+ ```
228
+
229
+ A failed op prints `Error [CODE]: message` (+ a remediation line) and exits `1`;
230
+ a missing required argument exits `2`.
231
+
232
+ **REST facade (`/api/<name>`).** Opt-in: set `KB_MCP_REST_API_KEY` to enable the
233
+ `/api/*` routes (off → `503`). Every registry op gets a route; the request body is
234
+ JSON, the response is the shared envelope. `GET /api/openapi.json` self-documents
235
+ the surface with real per-parameter schemas.
236
+
237
+ ```bash
238
+ curl -s -X POST http://127.0.0.1:8765/api/find \
239
+ -H "Authorization: Bearer $KB_MCP_REST_API_KEY" \
240
+ -H "Content-Type: application/json" \
241
+ -d '{"query": "carbonation rig", "mode": "keyword"}'
242
+ # → {"success": true, "data": [ … ]}
243
+ ```
244
+
245
+ **Shared envelope** (CLI `--json` + REST): success is `{"success": true, "data": …}`;
246
+ failure is `{"success": false, "error": {"code", "message", "remediation"}}` with a
247
+ stable, machine-readable `code`. Text-write fields keep the base64 binary-blob guard
248
+ (`BINARY_BLOB_REJECTED`) on both surfaces — push binaries through `/upload`, not a
249
+ text field.
250
+
251
+ ## Multimodal extraction (optional)
252
+
253
+ Two optional dependency extras turn binaries into searchable text/vectors. Both
254
+ **soft-fall-back**: if the libraries aren't installed, search degrades to
255
+ keyword/BM25 and uploads still work, just without server-side extraction.
256
+
257
+ - **`embeddings`** (`uv sync --extra embeddings`) — `torch` +
258
+ `sentence-transformers` + `pillow`. Adds the local vector half of hybrid `find`
259
+ (a bge text model) and **CLIP** image embedding for visual search. ~1–2 GB
260
+ download.
261
+ - **`media`** (`uv sync --extra media`) — server-side extraction on upload:
262
+ **faster-whisper** ASR for audio/video, **Tesseract** OCR for images,
263
+ **PyMuPDF** for PDFs, and **MarkItDown** for Office/HTML docs
264
+ (docx/xlsx/pptx/html). Two system tools are not pip-installable: **Tesseract OCR**
265
+ (`winget install UB-Mannheim.TesseractOCR`, or set `KB_MCP_TESSERACT_CMD`), and
266
+ ffmpeg (bundled by PyAV via faster-whisper, so audio/video decode works out of
267
+ the box).
268
+
269
+ **GPU note.** A CUDA GPU accelerates ASR/OCR/embedding but is **not required** —
270
+ CPU works, just slower (pick a smaller Whisper model with
271
+ `KB_MCP_WHISPER_MODEL=base`). On Windows + NVIDIA the `media` extra pins a CUDA-12
272
+ runtime (cublas/cudnn/cudart) that ctranslate2 needs alongside torch's cu132 build;
273
+ RTX 50-series (Blackwell, sm_120) is supported. See
274
+ **[docs/deployment.md](docs/deployment.md)** for the GPU bring-up and the
275
+ Blackwell/CUDA details. Disable extraction entirely with
276
+ `KB_MCP_DISABLE_MEDIA_EXTRACTION=1` (uploads still work; no searchable-text
277
+ extraction).
278
+
279
+ `pip install -e .` remains supported if you manage your own virtual environment,
280
+ but the documented path uses `uv` so the lockfile and the configured PyTorch index
281
+ are honored. Check a machine with `uv run python -m kb_mcp doctor --profile lean`
282
+ or `--profile hybrid|media|remote` before wiring a client. For a media host, run
283
+ `uv run python -m kb_mcp doctor --profile media` after installing the extra and
284
+ Tesseract so missing Python/system dependencies are reported before uploads rely
285
+ on extraction.
286
+
287
+ ## Remote access (optional)
288
+
289
+ To reach the vault from claude.ai on the web or mobile, the server runs as an
290
+ always-on HTTP service behind a public HTTPS endpoint, authenticated with
291
+ **GitHub OAuth** locked to a single login. claude.ai's MCP client fetches the
292
+ connector URL from Anthropic's cloud (not from your phone), so the endpoint must
293
+ be publicly reachable — a **Cloudflare Tunnel** (domain you own) or **Tailscale
294
+ Funnel** (free `*.ts.net` host) provides it.
295
+
296
+ Use **[docs/remote-checklist.md](docs/remote-checklist.md)** as the bring-up
297
+ checklist. Full setup — OAuth app, tunnel, the service installers (launchd /
298
+ systemd / NSSM), multi-host deployment, and troubleshooting — is in
299
+ **[docs/deployment.md](docs/deployment.md)**. Replace `<your-host>` /
300
+ `example.com` throughout with your own hostname.
301
+
302
+ ## Configuration
303
+
304
+ The server reads configuration from environment variables (or a `.env` file in
305
+ the repo root). The only required one is the vault path.
306
+
307
+ | Variable | Purpose |
308
+ |---|---|
309
+ | `KB_MCP_VAULT_PATH` | **Required.** Vault root — the folder that contains `Knowledge Base/`. |
310
+ | `KB_MCP_DISABLE_EMBEDDINGS` | `1` forces keyword/BM25-only search (no torch/vectors). |
311
+ | `KB_MCP_DISABLE_TIER2` | `1` drops the 8 Tier 2 escape-hatch tools (leaner tool surface). |
312
+ | `KB_MCP_REST_API_KEY` | Enables the personal `POST /api/<name>` REST facade (bearer-auth). Unset → `/api/*` returns `503`. |
313
+ | `KB_MCP_DISABLE_MEDIA_EXTRACTION` | `1` skips server-side OCR/ASR/PDF/office extraction. |
314
+ | `KB_MCP_DISABLE_CLIP` | `1` disables CLIP visual image search. |
315
+ | `KB_MCP_CLIP_DEVICE` | `cpu`/`cuda` override for CLIP (defaults to CPU when ASR is active). |
316
+ | `KB_MCP_IMAGE_TAGS` | Set to append zero-shot CLIP tags (`Tags: invoice, table, …`) to an image's indexed text. Default off; no new dependency (reuses CLIP). |
317
+ | `KB_MCP_IMAGE_TAGS_TOPK` | Max image tags to emit per image (default `5`). |
318
+ | `KB_MCP_IMAGE_TAGS_THRESHOLD` | Raw-cosine floor a tag must clear (default `0.22`). |
319
+ | `KB_MCP_DIARIZE` | Set to enable opt-in ASR speaker diarization (`[Speaker A]: …` turns). Requires the diarizer sidecar (see below). |
320
+ | `KB_MCP_DIARIZE_DEVICE` | Sidecar device: `cpu`/`cuda`/`auto` (default `auto` → GPU when available, else CPU). |
321
+ | `KB_MCP_DIARIZE_SIDECAR_PYTHON` | Override path to the diarizer sidecar's Python (default `sidecar/diarizer/.venv/Scripts/python.exe`). |
322
+ | `KB_MCP_DIARIZE_TIMEOUT` | Seconds the sidecar subprocess may run before soft-failing to a plain transcript (default: `max(900, duration×6)`). |
323
+ | `KB_MCP_DIARIZE_MODEL` | pyannote checkpoint the sidecar loads (default `pyannote/speaker-diarization-3.1`). |
324
+ | `KB_MCP_DIARIZE_CLUSTERING_THRESHOLD` | Optional pyannote clustering-threshold override (higher → fewer clusters). Default: pyannote's own. |
325
+ | `KB_MCP_VOICE_DEVICE` | `cpu`/`cuda` override for the ECAPA voice embedder (defaults to CPU when ASR is active). |
326
+ | `KB_MCP_VOICE_EMBED_MODEL` | ECAPA checkpoint for named-speaker attribution (default `speechbrain/spkrec-ecapa-voxceleb`). |
327
+ | `KB_MCP_WHISPER_MODEL` | Whisper model size for ASR (e.g. `base`, `small`, `large-v3`). |
328
+ | `KB_MCP_TESSERACT_CMD` | Path to the `tesseract` binary if not auto-discovered. |
329
+ | `KB_MCP_DUP_THRESHOLD` | Near-duplicate cosine-warning threshold (default `0.90`). |
330
+ | `KB_MCP_DISABLE_QUERY_LOG` | `1` disables the retrieval-eval query/write logs. |
331
+ | `KB_MCP_HOST` | Bind host for the HTTP transport (default `127.0.0.1`). |
332
+
333
+ Remote-only (see [docs/deployment.md](docs/deployment.md)): `KB_MCP_BASE_URL`,
334
+ `GITHUB_CLIENT_ID`, `GITHUB_CLIENT_SECRET`, `KB_MCP_GITHUB_USERNAME`,
335
+ `KB_MCP_JWT_SIGNING_KEY`.
336
+
337
+ ### Speaker diarization sidecar
338
+
339
+ `KB_MCP_DIARIZE` adds `[Speaker A]: …` (or, with voice profiles enrolled, `[Alice]: …`)
340
+ turns to transcripts. The pyannote *who-spoke-when* pipeline is **incompatible** with this
341
+ server's bleeding-edge `torch-2.12+cu132` build, so it runs in an **isolated sidecar venv**
342
+ (`sidecar/diarizer/`) as a subprocess, pinned to a standard `torch-2.9.1+cu130` that still has
343
+ Blackwell `sm_120` kernels — so it runs **on the GPU** (`KB_MCP_DIARIZE_DEVICE=auto`, ~20× faster
344
+ than CPU) and falls back to CPU. The main service shells out the turn detection and resolves the
345
+ anonymous turns to enrolled names locally via ECAPA. The whole feature is **default-off and
346
+ soft-fail**: with the flag unset, or the sidecar unbuilt, or anything failing, extraction is
347
+ byte-for-byte the plain transcript.
348
+
349
+ Provision it once per box (needs `uv`; not needed at service runtime):
350
+
351
+ ```powershell
352
+ uv sync --extra media --extra embeddings --extra diarization # main venv (ECAPA + ASR)
353
+ pwsh -File scripts/setup-diarizer.ps1 -Prewarm # builds sidecar/diarizer/.venv
354
+ ```
355
+
356
+ `setup-diarizer.ps1` is the Windows convenience wrapper (it also runs an import smoke + optional
357
+ `-Prewarm`). On **Linux/macOS** build the sidecar with the underlying command directly:
358
+
359
+ ```bash
360
+ uv sync --directory sidecar/diarizer
361
+ ```
362
+
363
+ The sidecar is **cross-platform**: its torch source is platform-conditional — the cu130 (CUDA-13)
364
+ index on Windows/Linux (GPU, Blackwell `sm_120`), and default PyPI on macOS (CPU/MPS, since cu130
365
+ has no macOS wheels). uv auto-fetches a Python 3.12 for it. The pyannote checkpoints are HF-gated:
366
+ set `HUGGINGFACE_TOKEN` and accept the conditions for **both** `pyannote/speaker-diarization-3.1`
367
+ and `pyannote/segmentation-3.0`. Then `KB_MCP_DIARIZE=1`, enroll yourself
368
+ (`exomem enroll-speaker --name <you> --self <sample.wav>`), and restart.
369
+
370
+ ## License
371
+
372
+ AGPL-3.0-or-later — see [LICENSE](LICENSE).
373
+
374
+ ## Releases
375
+
376
+ Versioning follows the lightweight SemVer policy in
377
+ **[docs/release.md](docs/release.md)**. The source of truth is
378
+ `pyproject.toml`'s `[project].version`; release tags use `vX.Y.Z`. Release
379
+ Please drives future version bumps from Conventional Commit messages.
@@ -0,0 +1,88 @@
1
+ kb_mcp/__init__.py,sha256=kUR5RAFc7HCeiqdlX36dZOHkUI5wI6V_43RpEcD8b-0,22
2
+ kb_mcp/__main__.py,sha256=KBZsEjPbw7SMJaXx8e3RR2Ie6pXow8fMTInLWCTOED8,21027
3
+ kb_mcp/access.py,sha256=DOyti-gG1poEdp_lj68W7ut63UKCxFU4jvnbxHFkhe4,5449
4
+ kb_mcp/add.py,sha256=9HpFBsQOls_pzLuRwSHuZ48iiLmohCkBHG-NKcqoHuY,11860
5
+ kb_mcp/append_to_file.py,sha256=-lGDmZujYn3LN5jhgqk3H5Ewg39u1fwABODFX5XdSxo,4057
6
+ kb_mcp/attention.py,sha256=BvGKEiHgLva1Dh1iQ-nGgCV24SDSyJirnvbn1qS-jno,9717
7
+ kb_mcp/audit.py,sha256=ps8waVUgwofweu7CAgP7Xur9s__LwxQywzI18Es4tvA,65023
8
+ kb_mcp/audit_fix.py,sha256=FKMVRlPHHwXgt86WV4SkBK-YPBYkIwt2AHVoF9BCYqI,17479
9
+ kb_mcp/backfill.py,sha256=VQjPBoVer0D1vludSjNuOIF-D6q5M3ogYSU82B8TZ0A,7231
10
+ kb_mcp/bm25.py,sha256=cGzkcqJRzUcRyG7LTFLFBwuSUeEug8vS28WL2XtfA5E,7430
11
+ kb_mcp/cf_access.py,sha256=_JXrwO_U7hR1UxVTyHXrGWbY6oGFCOMpQZGa0DzlmX4,2114
12
+ kb_mcp/cli_ops.py,sha256=7JnJPW9MOXJlUR5ah55knnElyXRz2XcRQjF3SG86Rio,8772
13
+ kb_mcp/commands.py,sha256=Z-r9gq9SLX1cXDjyztqSB879FmjcBBXy93tOOjB9zts,94010
14
+ kb_mcp/compile_proposal.py,sha256=nVgDfxM32UniMd6tCvSYE0JNNQNMS22pMaVIdItbrxM,5398
15
+ kb_mcp/context_pack.py,sha256=IHM1AL-y5ydF-qkTIRPqD8BMjS6HONZZjH709CdzVF8,14684
16
+ kb_mcp/corpus_aware.py,sha256=9ZnMBBNihyIPJ9wPdIWh3USjA9FPCO4RPPPWy5TFYRI,14972
17
+ kb_mcp/create_directory.py,sha256=_NoH_DJ7Mg5lYSbdXuQvkFt70TNji2gAdOc5gJjtH0c,3990
18
+ kb_mcp/create_file.py,sha256=P7I5z4thJarOTe3ak-maXLMDNFir1WqiZqlIbPRMU40,6021
19
+ kb_mcp/delete_directory.py,sha256=3s9e57SoI-QYwI2hsmKXSHwG--WT-1cEqwx0KmjCPW4,9382
20
+ kb_mcp/delete_file.py,sha256=wNnOqfTL5mSEhEeM5qTCKsa-aQZACWIlhkI5-sQj9xk,11263
21
+ kb_mcp/doctor.py,sha256=gIRRBLT3EJzfQ09TLpjCj7N1tN6D4owuXMzGK60TQs8,14488
22
+ kb_mcp/edit.py,sha256=xZgBbVj8J9s-wfuTjGaFPjYag9zNU-PoNvDy9V4R9kQ,26271
23
+ kb_mcp/embeddings.py,sha256=tyPUumQKZ2eEQTjLPcFBUGLJGYA_v8aQvIqtcRZ6PuE,34916
24
+ kb_mcp/enroll_speaker.py,sha256=ZXVWb_6FjvTZ3DEMYQXvqFshORl8PessnFHmIBmi9rM,3056
25
+ kb_mcp/eval_metrics.py,sha256=8vmsJ4LV5r4gWhPZJJ-Rs_CUNhhLcspHpZig44XBlE8,2241
26
+ kb_mcp/evolution.py,sha256=E14RtCqZcoL-tR-rjGi8PfIysB2ohNvE9tD4Mk7OqPw,10189
27
+ kb_mcp/extract.py,sha256=NwG3m2Om6Qsiwy_XcKISxx773gUK1EhzVgu8H17v8bs,36503
28
+ kb_mcp/file_watcher.py,sha256=5feSumqBccMONzS0jH2ncupQwLDNaPXqqfIX85U6Z6c,8428
29
+ kb_mcp/find.py,sha256=G1Qep2QmDiuk241u808JCAsSAdbZ9wPmlWS5kS-fRW8,74608
30
+ kb_mcp/fusion.py,sha256=5ao_vtiDJd0pL9ikPHsIzrbmmqmj00HEa1Oi3lvJrm8,2390
31
+ kb_mcp/get_frontmatter.py,sha256=IIXYRlihIh2mYHi8y3LujVBdHkSjYlDpD0sek_OPZhc,1682
32
+ kb_mcp/get_page.py,sha256=BW9jIbrdo-3G_XuoIJhMkJ_a8z6Kzm85oL0cvgF0S5E,4347
33
+ kb_mcp/guards.py,sha256=51h07pcQUfTKCFh6VPycDbvhp7h9ssbuMBDSPNYImow,3774
34
+ kb_mcp/icon.svg,sha256=17i6QgJzCZiJ4PtzPYissMGs9_LsyEz73WSn5JU1Llo,267
35
+ kb_mcp/image_tags.py,sha256=eL-qCgmbRHjckHpZs2ZOOQxXmeE-Hh3g2xRTEhgkbQY,7969
36
+ kb_mcp/indexes.py,sha256=8tv_8XlSFIzPpKS_q0fJ4kQFRaL3md8SdxLVFC4XmZs,24475
37
+ kb_mcp/init.py,sha256=W1AovrsnRjqiEwdUzYO1zJsB2MjEoCeBSumAG2mb8Fg,2314
38
+ kb_mcp/install_hook.py,sha256=BeSsucUwBwMs_gUgeUhhXlfbN2zYE_H0kwG1grDvnPI,6082
39
+ kb_mcp/install_skill.py,sha256=PM_V75laF16qB2kFA-uhIv6zxniQfu14hckLOa68_d0,4359
40
+ kb_mcp/link.py,sha256=MCuSjV5TBei2Cb-V7IKMOn7I4tg8nOtXMzIkGnCp_vE,15912
41
+ kb_mcp/list_directory.py,sha256=2a2o_Jw3LvXuFH7nI2NxhmeZWRyxERIiWmv4UpqxGmY,4030
42
+ kb_mcp/list_inbound_links.py,sha256=9Lrwmv1p7xgNQ6xBC0TZZJ3552b1-MseHMBEs6dLduQ,2130
43
+ kb_mcp/list_trash.py,sha256=NF79cCWDip8lZVNpjXH8VJ2IhK3OVhzPSq8W-XPVNtY,5823
44
+ kb_mcp/logging_config.py,sha256=JgzeSdpRcmjpbx6xkDgYj-5VEjQzMKQAzlymH6NO-yU,1244
45
+ kb_mcp/media_worker.py,sha256=SnmaFeJQdJ9_A5kJd2iDkVudk3aNmBSouzU4eg7IOvo,10160
46
+ kb_mcp/move_file.py,sha256=oeLVyR9Aimquq5_O5T1aSQgaUR3BxzrSvJCHo2Ahn4c,11513
47
+ kb_mcp/multi_edit.py,sha256=pKj6t351c6cT14UbuLqLyajgK7dPNhcsW9PE09r0c1o,5578
48
+ kb_mcp/note.py,sha256=6lqHNHvaXltMTxIeqqwtk1EjdbK5wEnRwsC5_IMYyA8,34481
49
+ kb_mcp/preserve.py,sha256=VlF2lXCRQui9AtrrcffF-Im7hLUqNi8ELLux4ziRruM,25916
50
+ kb_mcp/project_keys.py,sha256=eNc6unQUVHyVQnzJpIAtmX6aikYOB50MyOaJ3Av390c,11565
51
+ kb_mcp/provenance.py,sha256=3Tjq386mNIPORneiacWmBbud-8D8x8ijREYLd8JqhWg,4495
52
+ kb_mcp/query_data.py,sha256=vy_kO6aItGGTHtU5rffOw2RSOujk2AmlG_SREqzFQ1Q,19845
53
+ kb_mcp/query_log.py,sha256=osVPBEs6w-JuvlXUD4rYtKTMy5zq1k59sIjjkRn1vlg,4757
54
+ kb_mcp/reconcile.py,sha256=CeuDiOBbcUZm3kjGJLXPtNj7R9G4ohUqREbhVqw7J6w,4872
55
+ kb_mcp/recover_from_trash.py,sha256=qAsYe7SKICss5JDgS84bGEx4TlhQ4wJbl6We3zB3OjY,6248
56
+ kb_mcp/replace.py,sha256=tcWu8oX8HlhM8yDAgbHypw--K8AABgtMkQpRRIK1XDM,12300
57
+ kb_mcp/schema.py,sha256=l3DOlU9ewz5eTvwqUCWf9fgUZEHm4LvbCrboI22kufI,7204
58
+ kb_mcp/server.py,sha256=6z3_xmmB6eFoHYL1lTDxtL6hlWHbk0e6oQQrtLM2Hg0,46670
59
+ kb_mcp/set_frontmatter_field.py,sha256=lgZ1jrdTqZjPgsajgbZKDAE4FPvWz3DgDAU-NxYRAdg,7749
60
+ kb_mcp/set_take.py,sha256=qQPq7zJ21tf9PHiR7DwdquGHWG6fxHvqbKREf09heMk,6136
61
+ kb_mcp/speaker_assignment.py,sha256=fEf6nhJyXWe2p1iRhq8Um10ZJ3ShUYXoZtOXATcqa2U,1883
62
+ kb_mcp/speaker_attribution.py,sha256=u-UPNwML2ESfRXoj0ZOvAWGqj6ZXkCNiPHGhzSn-umc,9162
63
+ kb_mcp/upload_tokens.py,sha256=LeGuLrhSYo9gQxd6pbpt1xNlNyk_CxBHBaR9DHpjMyM,3404
64
+ kb_mcp/vault.py,sha256=YY079Sf7GBNT0sf9CQ1eWDacR4IZN6n4ff8AitB797g,35638
65
+ kb_mcp/voice_embed.py,sha256=sVsUg5NG9AVfD_jk4t4BFwwmaU05j0TJynIORM4z3LM,8181
66
+ kb_mcp/voice_profiles.py,sha256=ZSerdto7NkPDGbbz9FYS4pLIErqUqVd3-DrO7urCTEg,4421
67
+ kb_mcp/_hooks/kb-capture-nudge.sh,sha256=E6tBTKW6y63xdAW5tULOBFczYZxjnH_GFpWqxYakGqQ,642
68
+ kb_mcp/_hooks/kb-retrieve-nudge.sh,sha256=eMJuIBM7AmpvQnkHqf38cRHEvx2dUj_k367gyeR-27w,603
69
+ kb_mcp/_hooks/kb_capture_nudge.py,sha256=Y8YDWBpn6Y7QiOhOmEdYh5TG82iPqt71h49wTLFGyTg,7266
70
+ kb_mcp/_hooks/kb_retrieve_nudge.py,sha256=F9XwWEWBiW7oNU-R31g85LR2IcNYBoeaYttNP-ZcNfU,4318
71
+ kb_mcp/_scaffold/index.md,sha256=PvHYHUWDL-GEAlNL6diWDGoQKYWj_hXF9Qr7EPo69y8,593
72
+ kb_mcp/_scaffold/log.md,sha256=zz5k0R6L9s0G7io-e6pg2bDOh1_YTpnztcC9Vw-JKQI,189
73
+ kb_mcp/_scaffold/Entities/index.md,sha256=DAG4f0bU0FhAsoQDbeeHsRb-Sjf-RfFYb7LSqSDkxjI,575
74
+ kb_mcp/_scaffold/Notes/index.md,sha256=KPhYeXgSFQfpWuuLjAdlqIHFM7M2ba8BwNUYlu4MZRc,976
75
+ kb_mcp/_scaffold/Sources/index.md,sha256=0KsaEkcl0lWsVqOEKR6Flb_QtFVx-TrFXgz3KGbC__s,415
76
+ kb_mcp/_scaffold/_Schema/SKILL.md,sha256=TwztBYNMfSj4MaGAimZ-BU-aE9clxf-7dW_TCv9F2Vo,34784
77
+ kb_mcp/_scaffold/_Schema/project-keys.yaml,sha256=ujPfiG09zypgwAKc-G_AQRkEI7JJ3QghceR_0qjRy4Y,1245
78
+ kb_mcp/_scaffold/_Schema/references/audit-checks.md,sha256=CacXCrK7DNIYoFdbN8aVjjkEIx3M55KaHc9C9WI2AzE,5308
79
+ kb_mcp/_scaffold/_Schema/references/frontmatter.md,sha256=3wvCoCt4hSsRwFOpTkpIYtfmGFwykMVvs3YoisoksU4,7856
80
+ kb_mcp/_scaffold/_Schema/references/operations.md,sha256=GPRRphzP1RXLIe7br6KQ2PBzjIHz7OxqjjsahrQ-CQQ,17052
81
+ kb_mcp/_scaffold/_Schema/references/page-types.md,sha256=Pzur_qCKCv_T7K0HiMVBtKMBv01hIBOhTmSSMNWI1is,13959
82
+ kb_mcp/_scaffold/_Schema/references/supersession.md,sha256=VZaK5KIBRXqQDzycno20TjxPrhkM0-4wkEFEiToBodY,4488
83
+ kb_mcp/_scaffold/_Schema/references/write-scope.md,sha256=4ab1E_F-iL3aB2Zh1Dl4k6T-Q7GFRu5eAEHXrK7oh5s,6370
84
+ exomem-0.2.1.dist-info/METADATA,sha256=M8aUfYkXCmHdUUHBMLznrcTQcwsRKRao3UFmJXQSqF8,19712
85
+ exomem-0.2.1.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
86
+ exomem-0.2.1.dist-info/entry_points.txt,sha256=FsE9NmZ1cH4C7mC8mHPDd1Vp51Wq-8UjinqEIHqFULE,74
87
+ exomem-0.2.1.dist-info/licenses/LICENSE,sha256=DZak_2itbUtvHzD3E7GNUYSRK6jdOJ-GqncQ2weavLA,34523
88
+ exomem-0.2.1.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.30.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,3 @@
1
+ [console_scripts]
2
+ exomem = kb_mcp.__main__:main
3
+ kb = kb_mcp.__main__:main