mneme-cli 0.4.0__tar.gz → 0.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. {mneme_cli-0.4.0/mneme/templates/workspace → mneme_cli-0.5.0}/AGENTS.md +40 -2
  2. {mneme_cli-0.4.0 → mneme_cli-0.5.0}/CHANGELOG.md +54 -0
  3. {mneme_cli-0.4.0 → mneme_cli-0.5.0}/FEATURES.md +6 -3
  4. {mneme_cli-0.4.0 → mneme_cli-0.5.0}/PKG-INFO +140 -19
  5. {mneme_cli-0.4.0 → mneme_cli-0.5.0}/README.md +135 -16
  6. {mneme_cli-0.4.0 → mneme_cli-0.5.0}/mneme/__init__.py +1 -1
  7. {mneme_cli-0.4.0 → mneme_cli-0.5.0}/mneme/config.py +4 -11
  8. {mneme_cli-0.4.0 → mneme_cli-0.5.0}/mneme/core.py +748 -661
  9. mneme_cli-0.5.0/mneme/search.py +318 -0
  10. {mneme_cli-0.4.0 → mneme_cli-0.5.0}/mneme/server.py +2 -2
  11. mneme_cli-0.5.0/mneme/templates/workspace/.gitignore +9 -0
  12. {mneme_cli-0.4.0 → mneme_cli-0.5.0/mneme/templates/workspace}/AGENTS.md +2 -2
  13. {mneme_cli-0.4.0 → mneme_cli-0.5.0}/mneme/templates/workspace/README.md +1 -1
  14. {mneme_cli-0.4.0 → mneme_cli-0.5.0}/mneme/ui.html +17 -17
  15. {mneme_cli-0.4.0 → mneme_cli-0.5.0}/mneme_cli.egg-info/SOURCES.txt +2 -0
  16. {mneme_cli-0.4.0 → mneme_cli-0.5.0}/pyproject.toml +3 -3
  17. {mneme_cli-0.4.0 → mneme_cli-0.5.0}/tests/test_agent_loop.py +4 -1
  18. {mneme_cli-0.4.0 → mneme_cli-0.5.0}/tests/test_bug_regressions.py +20 -15
  19. {mneme_cli-0.4.0 → mneme_cli-0.5.0}/tests/test_core.py +225 -205
  20. {mneme_cli-0.4.0 → mneme_cli-0.5.0}/tests/test_ingest_csv.py +1 -1
  21. {mneme_cli-0.4.0 → mneme_cli-0.5.0}/tests/test_profile.py +1 -1
  22. {mneme_cli-0.4.0 → mneme_cli-0.5.0}/tests/test_schema_search.py +23 -5
  23. mneme_cli-0.5.0/tests/test_search.py +142 -0
  24. {mneme_cli-0.4.0 → mneme_cli-0.5.0}/tests/test_tornado_lint.py +1 -1
  25. {mneme_cli-0.4.0 → mneme_cli-0.5.0}/tests/test_trace.py +1 -1
  26. mneme_cli-0.4.0/mneme/templates/workspace/.gitignore +0 -9
  27. {mneme_cli-0.4.0 → mneme_cli-0.5.0}/CLAUDE.md +0 -0
  28. {mneme_cli-0.4.0 → mneme_cli-0.5.0}/CODER.md +0 -0
  29. {mneme_cli-0.4.0 → mneme_cli-0.5.0}/EXAMPLES.md +0 -0
  30. {mneme_cli-0.4.0 → mneme_cli-0.5.0}/LICENSE +0 -0
  31. {mneme_cli-0.4.0 → mneme_cli-0.5.0}/MANIFEST.in +0 -0
  32. {mneme_cli-0.4.0 → mneme_cli-0.5.0}/mneme/__main__.py +0 -0
  33. {mneme_cli-0.4.0 → mneme_cli-0.5.0}/mneme/profiles/eu-mdr.md +0 -0
  34. {mneme_cli-0.4.0 → mneme_cli-0.5.0}/mneme/profiles/iso-13485.md +0 -0
  35. {mneme_cli-0.4.0 → mneme_cli-0.5.0}/mneme/profiles/mappings/dds.json +0 -0
  36. {mneme_cli-0.4.0 → mneme_cli-0.5.0}/mneme/profiles/mappings/requirements.json +0 -0
  37. {mneme_cli-0.4.0 → mneme_cli-0.5.0}/mneme/profiles/mappings/risk-register.json +0 -0
  38. {mneme_cli-0.4.0 → mneme_cli-0.5.0}/mneme/profiles/mappings/test-cases.json +0 -0
  39. {mneme_cli-0.4.0 → mneme_cli-0.5.0}/mneme/profiles/mappings/user-needs.json +0 -0
  40. {mneme_cli-0.4.0 → mneme_cli-0.5.0}/mneme/templates/workspace/inbox/.gitkeep +0 -0
  41. {mneme_cli-0.4.0 → mneme_cli-0.5.0}/mneme/templates/workspace/index.md +0 -0
  42. {mneme_cli-0.4.0 → mneme_cli-0.5.0}/mneme/templates/workspace/log.md +0 -0
  43. {mneme_cli-0.4.0 → mneme_cli-0.5.0}/mneme/templates/workspace/profiles/README.md +0 -0
  44. {mneme_cli-0.4.0 → mneme_cli-0.5.0}/mneme/templates/workspace/profiles/mappings/.gitkeep +0 -0
  45. {mneme_cli-0.4.0 → mneme_cli-0.5.0}/mneme/templates/workspace/schema/entities.json +0 -0
  46. {mneme_cli-0.4.0 → mneme_cli-0.5.0}/mneme/templates/workspace/schema/graph.json +0 -0
  47. {mneme_cli-0.4.0 → mneme_cli-0.5.0}/mneme/templates/workspace/schema/tags.json +0 -0
  48. {mneme_cli-0.4.0 → mneme_cli-0.5.0}/mneme/templates/workspace/sources/.gitkeep +0 -0
  49. {mneme_cli-0.4.0 → mneme_cli-0.5.0}/mneme/templates/workspace/wiki/_templates/page.md +0 -0
  50. {mneme_cli-0.4.0 → mneme_cli-0.5.0}/setup.cfg +0 -0
  51. {mneme_cli-0.4.0 → mneme_cli-0.5.0}/tests/__init__.py +0 -0
@@ -69,7 +69,7 @@ A mneme workspace is a directory. Its shape is stable across versions:
69
69
  graph.json relationship graph
70
70
  tags.json tag registry
71
71
  traceability.json trace links between pages
72
- memvid/ optional .mv2 archives (semantic search)
72
+ search.db SQLite FTS5 search index (rebuilt from wiki)
73
73
  profiles/ workspace-local profiles and CSV mappings
74
74
  mappings/ JSON column mappings for ingest-csv
75
75
  exports/ JSON / markdown exports
@@ -107,7 +107,7 @@ mneme tornado --client <client> # batch from inbox/
107
107
  ```
108
108
 
109
109
  `ingest` is atomic: it writes the wiki page, updates the schema, and
110
- advances the Memvid archive in one operation. `ingest-csv` produces one
110
+ indexes the page in SQLite FTS5 in one operation. `ingest-csv` produces one
111
111
  wiki page per row, with trace links derived from the mapping. `tornado`
112
112
  is a bulk inbox processor — it auto-detects page type and routes CSVs
113
113
  through `ingest-csv`, everything else through `ingest`.
@@ -207,6 +207,44 @@ baseline, your current wiki page, and a fresh ingest of the new source.
207
207
  If there are conflicts, the page is left with merge markers. Edit them
208
208
  out manually, then run `resync-resolve`.
209
209
 
210
+ ### 3.6 TAG — agent-driven tagging
211
+
212
+ ```bash
213
+ mneme tags suggest <client>/<page> # build tag packet
214
+ mneme tags suggest <client>/<page> --json # raw dict
215
+ mneme tags apply <client>/<page> --add t1,t2 --remove t3
216
+ ```
217
+
218
+ `mneme tags suggest` builds a **tag packet**: the page content, current
219
+ tags, the workspace tag taxonomy (every existing tag with usage counts),
220
+ active profile guidance, and a ready-to-paste prompt instructing you to
221
+ choose 3–7 tags. Mneme does **not** propose tags itself — content
222
+ understanding is your job. The packet gives you all the context you need.
223
+
224
+ Your contract when consuming a tag packet:
225
+
226
+ 1. **Prefer existing tags** from the taxonomy when they fit. Consistency
227
+ matters more than novelty — `iso-13485` should not become `iso13485`
228
+ on the next page.
229
+ 2. **Add new tags only** when no existing tag captures the concept.
230
+ 3. Follow the format: lowercase, hyphenated (`risk-management`, not
231
+ `Risk Management`).
232
+ 4. Do not propose generic tags (`summary`, `overview`, `report`).
233
+ 5. Do not add the client slug — it is auto-applied.
234
+ 6. Output JSON: `{"tags": ["existing-a", "existing-b"], "new_tags": ["proposed-c"]}`.
235
+
236
+ `mneme tags apply` is **atomic**: it rewrites the wiki page frontmatter,
237
+ updates `schema/tags.json`, re-syncs the page to the FTS5 index, and
238
+ appends a log entry — all in one operation. Search picks up the new tags
239
+ immediately. Use `--add` and/or `--remove`, comma-separated.
240
+
241
+ Existing taxonomy ops:
242
+
243
+ ```bash
244
+ mneme tags list # all tags + counts
245
+ mneme tags merge <old> <new> # rename across all pages
246
+ ```
247
+
210
248
  ---
211
249
 
212
250
  ## 4. Profiles and the writing-style contract
@@ -4,6 +4,60 @@ All notable changes to this project are documented here.
4
4
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
5
5
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
6
6
 
7
+ ## [0.5.0] - 2026-04-13
8
+
9
+ ### Breaking Changes
10
+
11
+ - **Replaced memvid-sdk with SQLite FTS5.** The `memvid/` directory and `.mv2`
12
+ archives are no longer used. Search is now powered by a local `search.db`
13
+ file using BM25 ranking with Porter stemming. **Zero external dependencies**
14
+ for search — `sqlite3` is in the Python stdlib.
15
+ - `mneme repair` now rebuilds the FTS5 index instead of memvid archives.
16
+ - `mneme drift` reports `unindexed` / `orphaned` / `stale` instead of
17
+ `missing_from_memvid` / `orphan_frames`.
18
+ - `get_stats()` returns a `search` key (page_count, db_size_bytes,
19
+ search_latency_ms) instead of `memvid`.
20
+ - `sync_page_to_memvid()` renamed to `sync_page_to_index()`. Returns
21
+ `bool` (indexed) instead of `int` (frame count).
22
+ - Removed `chunk_body()`, `_sanitize_memvid_query()`, and all chunking
23
+ config (`MAX_CHUNK_SIZE`, `MIN_CHUNK_SIZE`, `MAX_CHUNKS_PER_INGEST`,
24
+ `CHUNK_COMMIT_BATCH`).
25
+ - Removed `MEMVID_DIR`, `MASTER_MV2`, `PER_CLIENT_DIR` config constants.
26
+ Replaced by `SEARCH_DB`.
27
+
28
+ ### Added
29
+
30
+ - **`mneme reindex`** command — rebuild search index from wiki pages.
31
+ - **`ingest-dir --recursive` / `-r`** — recurse into subdirectories.
32
+ - **`ingest-dir --preserve-structure`** — mirror source directory structure
33
+ in wiki subdirectories (avoids dedup collisions between same-basename files
34
+ in different directories).
35
+ - **`ingest-csv --delimiter`** flag with auto-detection via `csv.Sniffer`.
36
+ - **`.xlsx` ingest support** — install with `pip install "mneme-cli[xlsx]"`.
37
+ Sheets are rendered as markdown tables.
38
+ - **`mneme trace matrix --csv [--out FILE]`** — export the trace matrix as
39
+ CSV for QMS audits and DHF inclusion.
40
+ - **`graph.json` auto-populated** during ingest from wiki page wikilinks
41
+ and `related` frontmatter.
42
+ - **`stats` relationship count** now includes traceability.json links, not
43
+ just graph.json edges.
44
+ - **log.md rotation** — entries beyond `LOG_MAX_ENTRIES` (default 500) are
45
+ archived to `log-archive-YYYY-MM-DD.md`.
46
+
47
+ ### Fixed
48
+
49
+ - `mneme status` crash (UnboundLocalError on `log_content`).
50
+ - CSV ingest crash on `None` cells (`row.get()` returning None).
51
+ - Duplicate ingest detection now uses full source path, not just filename
52
+ (two `INSTRUCTIONS.md` files in different directories now both ingest).
53
+
54
+ ### Removed
55
+
56
+ - `memvid-sdk` dependency.
57
+ - `MNEME_NO_MEMVID` env var (no longer needed — FTS5 is always available).
58
+ - Chunking logic (`chunk_body`, `MAX_CHUNK_SIZE`, frame management).
59
+ - Tantivy-reserved-word query sanitizer (FTS5 has different syntax).
60
+
7
61
  ## [Unreleased]
8
62
 
9
63
  ### Added
@@ -17,13 +17,14 @@
17
17
  | `mneme new` | Scaffold a new workspace from the bundled template (preferred over `init`) |
18
18
  | `mneme init` | Scaffold a workspace in cwd (legacy) |
19
19
  | `mneme --workspace <dir>` / `MNEME_HOME=<dir>` | Run any command against a specific workspace |
20
- | `mneme ingest` | Atomic ingest: source -> wiki + Memvid + schema |
20
+ | `mneme ingest` | Atomic ingest: source -> wiki + FTS5 index + schema |
21
21
  | `mneme resync` | Diff-aware re-ingest: 3-way merge (baseline / wiki / fresh ingest) via `git merge-file` |
22
22
  | `mneme resync-resolve` | Mark a conflicted resync page as resolved after editing out markers |
23
23
  | `mneme ingest-dir` | Batch ingest all files from a directory |
24
24
  | `mneme search` | Dual-layer search with `--client` scoping |
25
25
  | `mneme lint` | Health check: orphan pages, dead links, stale pages, citations, schema drift, coverage |
26
- | `mneme sync` | Sync wiki pages to Memvid |
26
+ | `mneme sync` | Sync wiki pages to FTS5 search index |
27
+ | `mneme reindex` | Rebuild search index from wiki pages |
27
28
  | `mneme drift` | Detect layer desynchronization |
28
29
  | `mneme stats` | Health overview |
29
30
  | `mneme repair` | Fix corrupted archives and schema |
@@ -31,6 +32,8 @@
31
32
  | `mneme recent` | Show last N activity log entries |
32
33
  | `mneme tags list` | List all tags with page counts |
33
34
  | `mneme tags merge` | Merge one tag into another across all pages |
35
+ | `mneme tags suggest <page>` | Build a *tag packet* for an LLM agent (page content + taxonomy + prompt) |
36
+ | `mneme tags apply <page> --add t1,t2 --remove t3` | Atomic tag update: rewrites frontmatter, updates schema/tags.json, re-syncs FTS5 index |
34
37
  | `mneme diff` | Git-aware diff for a wiki page |
35
38
  | `mneme snapshot` | Versioned zip archive of a client + git tag |
36
39
  | `mneme dedupe` | Detect near-duplicate wiki pages |
@@ -54,7 +57,7 @@
54
57
  | `mneme scan-repo` | Scan code repo, compare against QMS docs, find gaps |
55
58
  | `mneme tornado` | Inbox processor: auto-detect type/client, ingest, archive to sources |
56
59
  | `mneme ingest-csv` | CSV ingest: one row = one wiki page, with column-to-frontmatter mapping and auto trace links |
57
- | `mneme demo clean` | Remove all demo content: demo-retail client, demo/ folder, schema entries, memvid manifest, index/log entries |
60
+ | `mneme demo clean` | Remove all demo content: demo-retail client, demo/ folder, schema entries, search index entries, index/log entries |
58
61
 
59
62
  ### Web UI (localhost:3141)
60
63
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mneme-cli
3
- Version: 0.4.0
3
+ Version: 0.5.0
4
4
  Summary: Mnemosyne - CLI tool that turns documents into a searchable second brain. Ingest once, query forever.
5
5
  Author-email: Tolis Moustaklis <apostolos.moustaklis@gmail.com>
6
6
  License-Expression: MIT
@@ -9,7 +9,7 @@ Project-URL: Repository, https://github.com/tolism/mneme
9
9
  Project-URL: Issues, https://github.com/tolism/mneme/issues
10
10
  Project-URL: Documentation, https://github.com/tolism/mneme#readme
11
11
  Project-URL: Changelog, https://github.com/tolism/mneme/blob/main/CHANGELOG.md
12
- Keywords: knowledge-management,second-brain,cli,wiki,memvid,llm,qms,obsidian,traceability
12
+ Keywords: knowledge-management,second-brain,cli,wiki,sqlite,fts5,llm,qms,obsidian,traceability
13
13
  Classifier: Development Status :: 3 - Alpha
14
14
  Classifier: Environment :: Console
15
15
  Classifier: Intended Audience :: Developers
@@ -28,12 +28,14 @@ Classifier: Topic :: Text Processing :: Markup :: Markdown
28
28
  Requires-Python: >=3.9
29
29
  Description-Content-Type: text/markdown
30
30
  License-File: LICENSE
31
- Requires-Dist: memvid-sdk>=2.0.0
32
31
  Requires-Dist: portalocker>=2.0.0
33
32
  Provides-Extra: pdf
34
33
  Requires-Dist: pymupdf>=1.23.0; extra == "pdf"
34
+ Provides-Extra: xlsx
35
+ Requires-Dist: openpyxl>=3.1.0; extra == "xlsx"
35
36
  Provides-Extra: all
36
37
  Requires-Dist: pymupdf>=1.23.0; extra == "all"
38
+ Requires-Dist: openpyxl>=3.1.0; extra == "all"
37
39
  Provides-Extra: release
38
40
  Requires-Dist: build>=1.0.0; extra == "release"
39
41
  Requires-Dist: twine>=5.0.0; extra == "release"
@@ -163,15 +165,18 @@ One installed CLI serves many projects — each workspace is just a directory.
163
165
  | `mneme search "<query>"` | Search across all layers |
164
166
  | `mneme draft --doc-type <t> --section <s> --client <c>` | Build a *write packet* for an LLM agent to produce one section |
165
167
  | `mneme validate writing-style <page>` | Build a *review packet* for an LLM agent to grade a page |
168
+ | `mneme tags suggest <page>` | Build a *tag packet* for an LLM agent to choose tags |
169
+ | `mneme tags apply <page> --add t1,t2 --remove t3` | Atomic tag update (frontmatter + schema + search index) |
166
170
  | `mneme agent plan --goal "..." --doc-type <t> --client <c>` | Generate a deterministic TODO plan from the active profile |
167
171
  | `mneme agent next-task` | Return the next ready task in the active plan |
168
172
  | `mneme agent task-done <id>` | Mark a task as done |
169
- | `mneme sync` | Sync wiki to Memvid memory |
173
+ | `mneme sync` | Sync wiki pages to FTS5 search index |
174
+ | `mneme reindex` | Rebuild search index from wiki pages |
170
175
  | `mneme drift` | Detect layer desynchronization |
171
176
  | `mneme stats` | Health overview |
172
177
  | `mneme repair` | Fix corrupted archives |
173
178
 
174
- **Formats:** `.md`, `.txt`, `.pdf`
179
+ **Formats:** `.md`, `.txt`, `.pdf`, `.xlsx` (with `pip install "mneme-cli[xlsx]"`)
175
180
 
176
181
  ---
177
182
 
@@ -206,6 +211,121 @@ Mneme generates the plan deterministically from the active profile's section_not
206
211
 
207
212
  ---
208
213
 
214
+ ## End-to-end example: from raw documents to a tagged, searchable, validated knowledge base
215
+
216
+ A realistic walkthrough showing how the human, the CLI, and the LLM agent collaborate. Suppose you're building a knowledge base for **Parkiwatch**, a medical device for Parkinson's monitoring.
217
+
218
+ ### Step 1 — Scaffold a workspace (human, one-time)
219
+
220
+ ```bash
221
+ mneme new ~/projects/parkiwatch --name Parkiwatch --client parkiwatch --profile eu-mdr
222
+ cd ~/projects/parkiwatch
223
+ ```
224
+
225
+ Creates the workspace tree, sets the EU MDR writing-style profile, and initializes empty schema files.
226
+
227
+ ### Step 2 — Ingest source material (human)
228
+
229
+ ```bash
230
+ # Drop a folder of source documents into inbox/, then bulk-process
231
+ cp -r ~/Downloads/parkinson-research/* inbox/
232
+ mneme tornado --client parkiwatch
233
+
234
+ # Or ingest individual files
235
+ mneme ingest research-paper.pdf parkiwatch
236
+ mneme ingest-csv risk-register.csv parkiwatch --mapping risk-register
237
+ mneme ingest spec-table.xlsx parkiwatch # .xlsx renders sheets as markdown tables
238
+ mneme ingest-dir docs/ parkiwatch --recursive # walk subdirectories
239
+ ```
240
+
241
+ What happens per ingest: source file → wiki page in `wiki/parkiwatch/` → frontmatter with auto-extracted entities → entry in `index.md` → row in the FTS5 search DB → log entry.
242
+
243
+ ### Step 3 — Tag the new pages (LLM agent)
244
+
245
+ The new pages have only the auto-applied `parkiwatch` client tag. The agent now adds meaningful tags:
246
+
247
+ ```bash
248
+ # For each new page, the agent runs:
249
+ mneme tags suggest parkiwatch/research-paper > /tmp/packet.md
250
+ ```
251
+
252
+ The packet contains the page body, the current tag taxonomy (every tag in the workspace + usage counts), and a ready-to-paste prompt. **The LLM reads the packet** — it understands the content and decides on tags, preferring existing taxonomy entries when they fit. The LLM's response is JSON:
253
+
254
+ ```json
255
+ {"tags": ["clinical-trial", "iso-13485"], "new_tags": ["bradykinesia-detection"]}
256
+ ```
257
+
258
+ The agent then runs:
259
+
260
+ ```bash
261
+ mneme tags apply parkiwatch/research-paper \
262
+ --add clinical-trial,iso-13485,bradykinesia-detection
263
+ ```
264
+
265
+ Atomic operation: rewrites the wiki page frontmatter, updates `schema/tags.json`, re-indexes the page in FTS5 (so search picks up the new tags immediately), appends a log entry. **Repeat for every page** — the taxonomy grows, and subsequent pages tend to reuse existing tags (consistency).
266
+
267
+ ### Step 4 — Search the knowledge base (anyone)
268
+
269
+ ```bash
270
+ mneme search "bradykinesia" # BM25 + Porter stemming
271
+ mneme search "clinical evaluation" --client parkiwatch # client-scoped
272
+ ```
273
+
274
+ Sub-millisecond. Returns the page title, snippet (with `<b>highlights</b>`), tags, and BM25 score.
275
+
276
+ ### Step 5 — Produce a regulatory deliverable (LLM agent driving the agent loop)
277
+
278
+ ```bash
279
+ # Generate a deterministic plan from the active profile
280
+ mneme agent plan --goal "produce a Design Validation Report" \
281
+ --doc-type design-validation-report \
282
+ --client parkiwatch
283
+ # → 15 tasks: 11 section drafts + assemble + harmonize + review + submission-check
284
+
285
+ # Walk the plan
286
+ mneme agent next-task
287
+ # → Task: section-purpose-and-scope
288
+ # next_command: mneme draft --doc-type design-validation-report \
289
+ # --section purpose-and-scope --client parkiwatch
290
+
291
+ mneme draft --doc-type design-validation-report \
292
+ --section purpose-and-scope --client parkiwatch \
293
+ --query "purpose scope intended use" \
294
+ --out /tmp/write-packet.md
295
+
296
+ # The LLM reads /tmp/write-packet.md (which includes wiki search hits as evidence,
297
+ # the profile's writing-style rules, and a write prompt) and produces the section.
298
+ # The agent writes the section to wiki/parkiwatch/design-validation-report.md.
299
+
300
+ mneme agent task-done section-purpose-and-scope
301
+
302
+ # ... repeat for each section ...
303
+
304
+ # After all sections drafted:
305
+ mneme harmonize --client parkiwatch --fix # mechanical vocabulary swap
306
+ mneme validate writing-style parkiwatch/design-validation-report > /tmp/review.md
307
+ # The LLM reads /tmp/review.md, critiques every section, applies fixes in place
308
+ mneme agent task-done review-page
309
+
310
+ # Submission readiness
311
+ mneme validate consistency --client parkiwatch # cross-doc version checks
312
+ mneme trace gaps parkiwatch # find broken trace chains
313
+ mneme trace matrix parkiwatch --csv --out trace-matrix.csv # for the DHF
314
+ mneme snapshot parkiwatch # versioned audit zip
315
+ ```
316
+
317
+ ### Who does what
318
+
319
+ | Layer | Responsibility |
320
+ |---|---|
321
+ | **Human** | Drops sources, runs commands, reviews diffs, ships the deliverable |
322
+ | **mneme CLI** | Deterministic infrastructure: parses files, builds packets, indexes, traces, harmonizes vocabulary, generates plans, atomic state updates |
323
+ | **LLM agent** | All reasoning: classifying entities, choosing tags, drafting prose, grading writing style, deciding when a chain is complete |
324
+
325
+ mneme never calls an LLM. The LLM never bypasses mneme's atomic operations. They meet at the packet boundary.
326
+
327
+ ---
328
+
209
329
  ## How It Works
210
330
 
211
331
  ```
@@ -218,9 +338,9 @@ Mneme generates the plan deterministically from the active profile's section_not
218
338
  | Frontmatter, citations, [[wikilinks]]
219
339
  | You read and browse here
220
340
  |
221
- +---> Memory Layer (.mv2 archive)
222
- | Smart Frames, semantic embeddings
223
- | Machines query here (<5ms)
341
+ +---> Search Index (SQLite FTS5)
342
+ | BM25 ranking, Porter stemming
343
+ | Sub-millisecond queries, zero dependencies
224
344
  |
225
345
  +---> Schema Layer (JSON)
226
346
  entities.json - people, companies, products
@@ -228,9 +348,9 @@ Mneme generates the plan deterministically from the active profile's section_not
228
348
  tags.json - taxonomy
229
349
  ```
230
350
 
231
- Every `mneme ingest` writes both layers atomically. `mneme drift` catches desync. `mneme repair` fixes it.
351
+ Every `mneme ingest` writes the wiki page and updates the search index atomically. `mneme drift` catches desync. `mneme reindex` rebuilds the index from wiki pages.
232
352
 
233
- **Memvid is optional.** Without it, mneme runs as a wiki-only knowledge base with text search. Add `memvid-sdk` when you outgrow grep.
353
+ **Zero external dependencies for search.** SQLite FTS5 is built into Python's stdlib no install, no API key, no capacity limit.
234
354
 
235
355
  ---
236
356
 
@@ -405,14 +525,15 @@ See `EXAMPLES.md` Example 13 for a full walkthrough with a real Parkiwatch scena
405
525
 
406
526
  ## When You Need This
407
527
 
408
- | Scale | Wiki alone | Wiki + Memvid |
409
- |---|---|---|
410
- | 5 docs | Plenty | Overkill |
411
- | 50 docs | Fine | Starting to help |
412
- | 500 docs | Grep takes 2-3s, misses semantic matches | 2ms, cross-client connections |
413
- | 5,000 docs | Unusable | Still 2ms |
528
+ | Scale | Search performance |
529
+ |---|---|
530
+ | 5 docs | Sub-millisecond |
531
+ | 50 docs | Sub-millisecond |
532
+ | 500 docs | Sub-millisecond, BM25 ranked |
533
+ | 5,000 docs | A few ms, still ranked by relevance |
534
+ | 50,000 docs | Tens of ms |
414
535
 
415
- Start wiki-only. Add the memory layer when search gets slow.
536
+ SQLite FTS5 scales transparently. No tuning, no capacity limits.
416
537
 
417
538
  ---
418
539
 
@@ -423,7 +544,7 @@ mneme/
423
544
  sources/ Raw documents (immutable, never modified)
424
545
  wiki/ Markdown knowledge pages (Obsidian-compatible)
425
546
  schema/ entities.json, graph.json, tags.json
426
- memvid/ .mv2 memory archives
547
+ search.db SQLite FTS5 search index
427
548
  core.py Engine (ingest, search, sync, drift, repair)
428
549
  config.py Configuration
429
550
  server.py Web dashboard
@@ -489,7 +610,7 @@ password = pypi-AgENd... # from https://test.pypi.org/manage/account/to
489
610
  This project builds on two foundational ideas:
490
611
 
491
612
  - **LLM Wiki pattern** by [Andrej Karpathy](https://gist.github.com/karpathy/442a6bf555914893e9891c11519de94f) -- the insight that LLMs should build and maintain a persistent, compounding wiki instead of re-deriving answers from raw documents on every query
492
- - **Memvid** by [Olow304/memvid](https://github.com/Olow304/memvid) -- single-file AI memory with sub-millisecond retrieval, no vector DB required
613
+ - **SQLite FTS5** -- the world's most-deployed embedded database, with built-in BM25 full-text search
493
614
  - **Original implementation** -- [tashisleepy/knowledge-engine](https://github.com/tashisleepy/knowledge-engine) -- the first version that fused both patterns into a dual-layer bridge
494
615
 
495
616
  ---
@@ -122,15 +122,18 @@ One installed CLI serves many projects — each workspace is just a directory.
122
122
  | `mneme search "<query>"` | Search across all layers |
123
123
  | `mneme draft --doc-type <t> --section <s> --client <c>` | Build a *write packet* for an LLM agent to produce one section |
124
124
  | `mneme validate writing-style <page>` | Build a *review packet* for an LLM agent to grade a page |
125
+ | `mneme tags suggest <page>` | Build a *tag packet* for an LLM agent to choose tags |
126
+ | `mneme tags apply <page> --add t1,t2 --remove t3` | Atomic tag update (frontmatter + schema + search index) |
125
127
  | `mneme agent plan --goal "..." --doc-type <t> --client <c>` | Generate a deterministic TODO plan from the active profile |
126
128
  | `mneme agent next-task` | Return the next ready task in the active plan |
127
129
  | `mneme agent task-done <id>` | Mark a task as done |
128
- | `mneme sync` | Sync wiki to Memvid memory |
130
+ | `mneme sync` | Sync wiki pages to FTS5 search index |
131
+ | `mneme reindex` | Rebuild search index from wiki pages |
129
132
  | `mneme drift` | Detect layer desynchronization |
130
133
  | `mneme stats` | Health overview |
131
134
  | `mneme repair` | Fix corrupted archives |
132
135
 
133
- **Formats:** `.md`, `.txt`, `.pdf`
136
+ **Formats:** `.md`, `.txt`, `.pdf`, `.xlsx` (with `pip install "mneme-cli[xlsx]"`)
134
137
 
135
138
  ---
136
139
 
@@ -165,6 +168,121 @@ Mneme generates the plan deterministically from the active profile's section_not
165
168
 
166
169
  ---
167
170
 
171
+ ## End-to-end example: from raw documents to a tagged, searchable, validated knowledge base
172
+
173
+ A realistic walkthrough showing how the human, the CLI, and the LLM agent collaborate. Suppose you're building a knowledge base for **Parkiwatch**, a medical device for Parkinson's monitoring.
174
+
175
+ ### Step 1 — Scaffold a workspace (human, one-time)
176
+
177
+ ```bash
178
+ mneme new ~/projects/parkiwatch --name Parkiwatch --client parkiwatch --profile eu-mdr
179
+ cd ~/projects/parkiwatch
180
+ ```
181
+
182
+ Creates the workspace tree, sets the EU MDR writing-style profile, and initializes empty schema files.
183
+
184
+ ### Step 2 — Ingest source material (human)
185
+
186
+ ```bash
187
+ # Drop a folder of source documents into inbox/, then bulk-process
188
+ cp -r ~/Downloads/parkinson-research/* inbox/
189
+ mneme tornado --client parkiwatch
190
+
191
+ # Or ingest individual files
192
+ mneme ingest research-paper.pdf parkiwatch
193
+ mneme ingest-csv risk-register.csv parkiwatch --mapping risk-register
194
+ mneme ingest spec-table.xlsx parkiwatch # .xlsx renders sheets as markdown tables
195
+ mneme ingest-dir docs/ parkiwatch --recursive # walk subdirectories
196
+ ```
197
+
198
+ What happens per ingest: source file → wiki page in `wiki/parkiwatch/` → frontmatter with auto-extracted entities → entry in `index.md` → row in the FTS5 search DB → log entry.
199
+
200
+ ### Step 3 — Tag the new pages (LLM agent)
201
+
202
+ The new pages have only the auto-applied `parkiwatch` client tag. The agent now adds meaningful tags:
203
+
204
+ ```bash
205
+ # For each new page, the agent runs:
206
+ mneme tags suggest parkiwatch/research-paper > /tmp/packet.md
207
+ ```
208
+
209
+ The packet contains the page body, the current tag taxonomy (every tag in the workspace + usage counts), and a ready-to-paste prompt. **The LLM reads the packet** — it understands the content and decides on tags, preferring existing taxonomy entries when they fit. The LLM's response is JSON:
210
+
211
+ ```json
212
+ {"tags": ["clinical-trial", "iso-13485"], "new_tags": ["bradykinesia-detection"]}
213
+ ```
214
+
215
+ The agent then runs:
216
+
217
+ ```bash
218
+ mneme tags apply parkiwatch/research-paper \
219
+ --add clinical-trial,iso-13485,bradykinesia-detection
220
+ ```
221
+
222
+ Atomic operation: rewrites the wiki page frontmatter, updates `schema/tags.json`, re-indexes the page in FTS5 (so search picks up the new tags immediately), appends a log entry. **Repeat for every page** — the taxonomy grows, and subsequent pages tend to reuse existing tags (consistency).
223
+
224
+ ### Step 4 — Search the knowledge base (anyone)
225
+
226
+ ```bash
227
+ mneme search "bradykinesia" # BM25 + Porter stemming
228
+ mneme search "clinical evaluation" --client parkiwatch # client-scoped
229
+ ```
230
+
231
+ Sub-millisecond. Returns the page title, snippet (with `<b>highlights</b>`), tags, and BM25 score.
232
+
233
+ ### Step 5 — Produce a regulatory deliverable (LLM agent driving the agent loop)
234
+
235
+ ```bash
236
+ # Generate a deterministic plan from the active profile
237
+ mneme agent plan --goal "produce a Design Validation Report" \
238
+ --doc-type design-validation-report \
239
+ --client parkiwatch
240
+ # → 15 tasks: 11 section drafts + assemble + harmonize + review + submission-check
241
+
242
+ # Walk the plan
243
+ mneme agent next-task
244
+ # → Task: section-purpose-and-scope
245
+ # next_command: mneme draft --doc-type design-validation-report \
246
+ # --section purpose-and-scope --client parkiwatch
247
+
248
+ mneme draft --doc-type design-validation-report \
249
+ --section purpose-and-scope --client parkiwatch \
250
+ --query "purpose scope intended use" \
251
+ --out /tmp/write-packet.md
252
+
253
+ # The LLM reads /tmp/write-packet.md (which includes wiki search hits as evidence,
254
+ # the profile's writing-style rules, and a write prompt) and produces the section.
255
+ # The agent writes the section to wiki/parkiwatch/design-validation-report.md.
256
+
257
+ mneme agent task-done section-purpose-and-scope
258
+
259
+ # ... repeat for each section ...
260
+
261
+ # After all sections drafted:
262
+ mneme harmonize --client parkiwatch --fix # mechanical vocabulary swap
263
+ mneme validate writing-style parkiwatch/design-validation-report > /tmp/review.md
264
+ # The LLM reads /tmp/review.md, critiques every section, applies fixes in place
265
+ mneme agent task-done review-page
266
+
267
+ # Submission readiness
268
+ mneme validate consistency --client parkiwatch # cross-doc version checks
269
+ mneme trace gaps parkiwatch # find broken trace chains
270
+ mneme trace matrix parkiwatch --csv --out trace-matrix.csv # for the DHF
271
+ mneme snapshot parkiwatch # versioned audit zip
272
+ ```
273
+
274
+ ### Who does what
275
+
276
+ | Layer | Responsibility |
277
+ |---|---|
278
+ | **Human** | Drops sources, runs commands, reviews diffs, ships the deliverable |
279
+ | **mneme CLI** | Deterministic infrastructure: parses files, builds packets, indexes, traces, harmonizes vocabulary, generates plans, atomic state updates |
280
+ | **LLM agent** | All reasoning: classifying entities, choosing tags, drafting prose, grading writing style, deciding when a chain is complete |
281
+
282
+ mneme never calls an LLM. The LLM never bypasses mneme's atomic operations. They meet at the packet boundary.
283
+
284
+ ---
285
+
168
286
  ## How It Works
169
287
 
170
288
  ```
@@ -177,9 +295,9 @@ Mneme generates the plan deterministically from the active profile's section_not
177
295
  | Frontmatter, citations, [[wikilinks]]
178
296
  | You read and browse here
179
297
  |
180
- +---> Memory Layer (.mv2 archive)
181
- | Smart Frames, semantic embeddings
182
- | Machines query here (<5ms)
298
+ +---> Search Index (SQLite FTS5)
299
+ | BM25 ranking, Porter stemming
300
+ | Sub-millisecond queries, zero dependencies
183
301
  |
184
302
  +---> Schema Layer (JSON)
185
303
  entities.json - people, companies, products
@@ -187,9 +305,9 @@ Mneme generates the plan deterministically from the active profile's section_not
187
305
  tags.json - taxonomy
188
306
  ```
189
307
 
190
- Every `mneme ingest` writes both layers atomically. `mneme drift` catches desync. `mneme repair` fixes it.
308
+ Every `mneme ingest` writes the wiki page and updates the search index atomically. `mneme drift` catches desync. `mneme reindex` rebuilds the index from wiki pages.
191
309
 
192
- **Memvid is optional.** Without it, mneme runs as a wiki-only knowledge base with text search. Add `memvid-sdk` when you outgrow grep.
310
+ **Zero external dependencies for search.** SQLite FTS5 is built into Python's stdlib no install, no API key, no capacity limit.
193
311
 
194
312
  ---
195
313
 
@@ -364,14 +482,15 @@ See `EXAMPLES.md` Example 13 for a full walkthrough with a real Parkiwatch scena
364
482
 
365
483
  ## When You Need This
366
484
 
367
- | Scale | Wiki alone | Wiki + Memvid |
368
- |---|---|---|
369
- | 5 docs | Plenty | Overkill |
370
- | 50 docs | Fine | Starting to help |
371
- | 500 docs | Grep takes 2-3s, misses semantic matches | 2ms, cross-client connections |
372
- | 5,000 docs | Unusable | Still 2ms |
485
+ | Scale | Search performance |
486
+ |---|---|
487
+ | 5 docs | Sub-millisecond |
488
+ | 50 docs | Sub-millisecond |
489
+ | 500 docs | Sub-millisecond, BM25 ranked |
490
+ | 5,000 docs | A few ms, still ranked by relevance |
491
+ | 50,000 docs | Tens of ms |
373
492
 
374
- Start wiki-only. Add the memory layer when search gets slow.
493
+ SQLite FTS5 scales transparently. No tuning, no capacity limits.
375
494
 
376
495
  ---
377
496
 
@@ -382,7 +501,7 @@ mneme/
382
501
  sources/ Raw documents (immutable, never modified)
383
502
  wiki/ Markdown knowledge pages (Obsidian-compatible)
384
503
  schema/ entities.json, graph.json, tags.json
385
- memvid/ .mv2 memory archives
504
+ search.db SQLite FTS5 search index
386
505
  core.py Engine (ingest, search, sync, drift, repair)
387
506
  config.py Configuration
388
507
  server.py Web dashboard
@@ -448,7 +567,7 @@ password = pypi-AgENd... # from https://test.pypi.org/manage/account/to
448
567
  This project builds on two foundational ideas:
449
568
 
450
569
  - **LLM Wiki pattern** by [Andrej Karpathy](https://gist.github.com/karpathy/442a6bf555914893e9891c11519de94f) -- the insight that LLMs should build and maintain a persistent, compounding wiki instead of re-deriving answers from raw documents on every query
451
- - **Memvid** by [Olow304/memvid](https://github.com/Olow304/memvid) -- single-file AI memory with sub-millisecond retrieval, no vector DB required
570
+ - **SQLite FTS5** -- the world's most-deployed embedded database, with built-in BM25 full-text search
452
571
  - **Original implementation** -- [tashisleepy/knowledge-engine](https://github.com/tashisleepy/knowledge-engine) -- the first version that fused both patterns into a dual-layer bridge
453
572
 
454
573
  ---
@@ -5,4 +5,4 @@ Public API:
5
5
  from mneme.core import ingest_source_to_both, dual_search, ...
6
6
  """
7
7
 
8
- __version__ = "0.4.0"
8
+ __version__ = "0.5.0"
@@ -8,7 +8,7 @@ Two distinct roots:
8
8
  from here.
9
9
 
10
10
  * WORKSPACE_DIR - where the user's data lives (wiki/, sources/, schema/,
11
- memvid/, index.md, log.md). Resolved in this order:
11
+ search.db, index.md, log.md). Resolved in this order:
12
12
  1. The MNEME_HOME environment variable, if set.
13
13
  2. The current working directory.
14
14
 
@@ -54,9 +54,7 @@ BASE_DIR = WORKSPACE_DIR
54
54
  WIKI_DIR = os.path.join(WORKSPACE_DIR, 'wiki')
55
55
  SOURCES_DIR = os.path.join(WORKSPACE_DIR, 'sources')
56
56
  SCHEMA_DIR = os.path.join(WORKSPACE_DIR, 'schema')
57
- MEMVID_DIR = os.path.join(WORKSPACE_DIR, 'memvid')
58
- MASTER_MV2 = os.path.join(MEMVID_DIR, 'master.mv2')
59
- PER_CLIENT_DIR = os.path.join(MEMVID_DIR, 'per-client')
57
+ SEARCH_DB = os.path.join(WORKSPACE_DIR, 'search.db')
60
58
  INDEX_FILE = os.path.join(WORKSPACE_DIR, 'index.md')
61
59
  LOG_FILE = os.path.join(WORKSPACE_DIR, 'log.md')
62
60
  TEMPLATES_DIR = os.path.join(WIKI_DIR, '_templates')
@@ -78,13 +76,8 @@ WORKSPACE_MAPPINGS_DIR = os.path.join(WORKSPACE_PROFILES_DIR, 'mappings')
78
76
  EXCLUDED_DIRS = ['_templates', '.baselines']
79
77
  EXCLUDED_FILES = ['_meta.yaml']
80
78
 
81
- # Chunk settings for memvid
82
- MAX_CHUNK_SIZE = 500 # characters per Smart Frame
83
- MIN_CHUNK_SIZE = 50 # don't create tiny frames
84
-
85
- # Ingest limits to prevent hangs on huge files
86
- MAX_CHUNKS_PER_INGEST = 200 # hard cap on chunks sent to memvid per page
87
- CHUNK_COMMIT_BATCH = 50 # commit to memvid every N chunks
79
+ # Log rotation
80
+ LOG_MAX_ENTRIES = 500 # archive after this many entries
88
81
 
89
82
  # Entity extraction stopwords
90
83
  ENTITY_STOPWORDS = {