mneme-cli 0.4.0__tar.gz → 0.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mneme_cli-0.4.0/mneme/templates/workspace → mneme_cli-0.5.0}/AGENTS.md +40 -2
- {mneme_cli-0.4.0 → mneme_cli-0.5.0}/CHANGELOG.md +54 -0
- {mneme_cli-0.4.0 → mneme_cli-0.5.0}/FEATURES.md +6 -3
- {mneme_cli-0.4.0 → mneme_cli-0.5.0}/PKG-INFO +140 -19
- {mneme_cli-0.4.0 → mneme_cli-0.5.0}/README.md +135 -16
- {mneme_cli-0.4.0 → mneme_cli-0.5.0}/mneme/__init__.py +1 -1
- {mneme_cli-0.4.0 → mneme_cli-0.5.0}/mneme/config.py +4 -11
- {mneme_cli-0.4.0 → mneme_cli-0.5.0}/mneme/core.py +748 -661
- mneme_cli-0.5.0/mneme/search.py +318 -0
- {mneme_cli-0.4.0 → mneme_cli-0.5.0}/mneme/server.py +2 -2
- mneme_cli-0.5.0/mneme/templates/workspace/.gitignore +9 -0
- {mneme_cli-0.4.0 → mneme_cli-0.5.0/mneme/templates/workspace}/AGENTS.md +2 -2
- {mneme_cli-0.4.0 → mneme_cli-0.5.0}/mneme/templates/workspace/README.md +1 -1
- {mneme_cli-0.4.0 → mneme_cli-0.5.0}/mneme/ui.html +17 -17
- {mneme_cli-0.4.0 → mneme_cli-0.5.0}/mneme_cli.egg-info/SOURCES.txt +2 -0
- {mneme_cli-0.4.0 → mneme_cli-0.5.0}/pyproject.toml +3 -3
- {mneme_cli-0.4.0 → mneme_cli-0.5.0}/tests/test_agent_loop.py +4 -1
- {mneme_cli-0.4.0 → mneme_cli-0.5.0}/tests/test_bug_regressions.py +20 -15
- {mneme_cli-0.4.0 → mneme_cli-0.5.0}/tests/test_core.py +225 -205
- {mneme_cli-0.4.0 → mneme_cli-0.5.0}/tests/test_ingest_csv.py +1 -1
- {mneme_cli-0.4.0 → mneme_cli-0.5.0}/tests/test_profile.py +1 -1
- {mneme_cli-0.4.0 → mneme_cli-0.5.0}/tests/test_schema_search.py +23 -5
- mneme_cli-0.5.0/tests/test_search.py +142 -0
- {mneme_cli-0.4.0 → mneme_cli-0.5.0}/tests/test_tornado_lint.py +1 -1
- {mneme_cli-0.4.0 → mneme_cli-0.5.0}/tests/test_trace.py +1 -1
- mneme_cli-0.4.0/mneme/templates/workspace/.gitignore +0 -9
- {mneme_cli-0.4.0 → mneme_cli-0.5.0}/CLAUDE.md +0 -0
- {mneme_cli-0.4.0 → mneme_cli-0.5.0}/CODER.md +0 -0
- {mneme_cli-0.4.0 → mneme_cli-0.5.0}/EXAMPLES.md +0 -0
- {mneme_cli-0.4.0 → mneme_cli-0.5.0}/LICENSE +0 -0
- {mneme_cli-0.4.0 → mneme_cli-0.5.0}/MANIFEST.in +0 -0
- {mneme_cli-0.4.0 → mneme_cli-0.5.0}/mneme/__main__.py +0 -0
- {mneme_cli-0.4.0 → mneme_cli-0.5.0}/mneme/profiles/eu-mdr.md +0 -0
- {mneme_cli-0.4.0 → mneme_cli-0.5.0}/mneme/profiles/iso-13485.md +0 -0
- {mneme_cli-0.4.0 → mneme_cli-0.5.0}/mneme/profiles/mappings/dds.json +0 -0
- {mneme_cli-0.4.0 → mneme_cli-0.5.0}/mneme/profiles/mappings/requirements.json +0 -0
- {mneme_cli-0.4.0 → mneme_cli-0.5.0}/mneme/profiles/mappings/risk-register.json +0 -0
- {mneme_cli-0.4.0 → mneme_cli-0.5.0}/mneme/profiles/mappings/test-cases.json +0 -0
- {mneme_cli-0.4.0 → mneme_cli-0.5.0}/mneme/profiles/mappings/user-needs.json +0 -0
- {mneme_cli-0.4.0 → mneme_cli-0.5.0}/mneme/templates/workspace/inbox/.gitkeep +0 -0
- {mneme_cli-0.4.0 → mneme_cli-0.5.0}/mneme/templates/workspace/index.md +0 -0
- {mneme_cli-0.4.0 → mneme_cli-0.5.0}/mneme/templates/workspace/log.md +0 -0
- {mneme_cli-0.4.0 → mneme_cli-0.5.0}/mneme/templates/workspace/profiles/README.md +0 -0
- {mneme_cli-0.4.0 → mneme_cli-0.5.0}/mneme/templates/workspace/profiles/mappings/.gitkeep +0 -0
- {mneme_cli-0.4.0 → mneme_cli-0.5.0}/mneme/templates/workspace/schema/entities.json +0 -0
- {mneme_cli-0.4.0 → mneme_cli-0.5.0}/mneme/templates/workspace/schema/graph.json +0 -0
- {mneme_cli-0.4.0 → mneme_cli-0.5.0}/mneme/templates/workspace/schema/tags.json +0 -0
- {mneme_cli-0.4.0 → mneme_cli-0.5.0}/mneme/templates/workspace/sources/.gitkeep +0 -0
- {mneme_cli-0.4.0 → mneme_cli-0.5.0}/mneme/templates/workspace/wiki/_templates/page.md +0 -0
- {mneme_cli-0.4.0 → mneme_cli-0.5.0}/setup.cfg +0 -0
- {mneme_cli-0.4.0 → mneme_cli-0.5.0}/tests/__init__.py +0 -0
|
@@ -69,7 +69,7 @@ A mneme workspace is a directory. Its shape is stable across versions:
|
|
|
69
69
|
graph.json relationship graph
|
|
70
70
|
tags.json tag registry
|
|
71
71
|
traceability.json trace links between pages
|
|
72
|
-
|
|
72
|
+
search.db SQLite FTS5 search index (rebuilt from wiki)
|
|
73
73
|
profiles/ workspace-local profiles and CSV mappings
|
|
74
74
|
mappings/ JSON column mappings for ingest-csv
|
|
75
75
|
exports/ JSON / markdown exports
|
|
@@ -107,7 +107,7 @@ mneme tornado --client <client> # batch from inbox/
|
|
|
107
107
|
```
|
|
108
108
|
|
|
109
109
|
`ingest` is atomic: it writes the wiki page, updates the schema, and
|
|
110
|
-
|
|
110
|
+
indexes the page in SQLite FTS5 in one operation. `ingest-csv` produces one
|
|
111
111
|
wiki page per row, with trace links derived from the mapping. `tornado`
|
|
112
112
|
is a bulk inbox processor — it auto-detects page type and routes CSVs
|
|
113
113
|
through `ingest-csv`, everything else through `ingest`.
|
|
@@ -207,6 +207,44 @@ baseline, your current wiki page, and a fresh ingest of the new source.
|
|
|
207
207
|
If there are conflicts, the page is left with merge markers. Edit them
|
|
208
208
|
out manually, then run `resync-resolve`.
|
|
209
209
|
|
|
210
|
+
### 3.6 TAG — agent-driven tagging
|
|
211
|
+
|
|
212
|
+
```bash
|
|
213
|
+
mneme tags suggest <client>/<page> # build tag packet
|
|
214
|
+
mneme tags suggest <client>/<page> --json # raw dict
|
|
215
|
+
mneme tags apply <client>/<page> --add t1,t2 --remove t3
|
|
216
|
+
```
|
|
217
|
+
|
|
218
|
+
`mneme tags suggest` builds a **tag packet**: the page content, current
|
|
219
|
+
tags, the workspace tag taxonomy (every existing tag with usage counts),
|
|
220
|
+
active profile guidance, and a ready-to-paste prompt instructing you to
|
|
221
|
+
choose 3–7 tags. Mneme does **not** propose tags itself — content
|
|
222
|
+
understanding is your job. The packet gives you all the context you need.
|
|
223
|
+
|
|
224
|
+
Your contract when consuming a tag packet:
|
|
225
|
+
|
|
226
|
+
1. **Prefer existing tags** from the taxonomy when they fit. Consistency
|
|
227
|
+
matters more than novelty — `iso-13485` should not become `iso13485`
|
|
228
|
+
on the next page.
|
|
229
|
+
2. **Add new tags only** when no existing tag captures the concept.
|
|
230
|
+
3. Follow the format: lowercase, hyphenated (`risk-management`, not
|
|
231
|
+
`Risk Management`).
|
|
232
|
+
4. Do not propose generic tags (`summary`, `overview`, `report`).
|
|
233
|
+
5. Do not add the client slug — it is auto-applied.
|
|
234
|
+
6. Output JSON: `{"tags": ["existing-a", "existing-b"], "new_tags": ["proposed-c"]}`.
|
|
235
|
+
|
|
236
|
+
`mneme tags apply` is **atomic**: it rewrites the wiki page frontmatter,
|
|
237
|
+
updates `schema/tags.json`, re-syncs the page to the FTS5 index, and
|
|
238
|
+
appends a log entry — all in one operation. Search picks up the new tags
|
|
239
|
+
immediately. Use `--add` and/or `--remove`, comma-separated.
|
|
240
|
+
|
|
241
|
+
Existing taxonomy ops:
|
|
242
|
+
|
|
243
|
+
```bash
|
|
244
|
+
mneme tags list # all tags + counts
|
|
245
|
+
mneme tags merge <old> <new> # rename across all pages
|
|
246
|
+
```
|
|
247
|
+
|
|
210
248
|
---
|
|
211
249
|
|
|
212
250
|
## 4. Profiles and the writing-style contract
|
|
@@ -4,6 +4,60 @@ All notable changes to this project are documented here.
|
|
|
4
4
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
5
5
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
6
6
|
|
|
7
|
+
## [0.5.0] - 2026-04-13
|
|
8
|
+
|
|
9
|
+
### Breaking Changes
|
|
10
|
+
|
|
11
|
+
- **Replaced memvid-sdk with SQLite FTS5.** The `memvid/` directory and `.mv2`
|
|
12
|
+
archives are no longer used. Search is now powered by a local `search.db`
|
|
13
|
+
file using BM25 ranking with Porter stemming. **Zero external dependencies**
|
|
14
|
+
for search — `sqlite3` is in the Python stdlib.
|
|
15
|
+
- `mneme repair` now rebuilds the FTS5 index instead of memvid archives.
|
|
16
|
+
- `mneme drift` reports `unindexed` / `orphaned` / `stale` instead of
|
|
17
|
+
`missing_from_memvid` / `orphan_frames`.
|
|
18
|
+
- `get_stats()` returns a `search` key (page_count, db_size_bytes,
|
|
19
|
+
search_latency_ms) instead of `memvid`.
|
|
20
|
+
- `sync_page_to_memvid()` renamed to `sync_page_to_index()`. Returns
|
|
21
|
+
`bool` (indexed) instead of `int` (frame count).
|
|
22
|
+
- Removed `chunk_body()`, `_sanitize_memvid_query()`, and all chunking
|
|
23
|
+
config (`MAX_CHUNK_SIZE`, `MIN_CHUNK_SIZE`, `MAX_CHUNKS_PER_INGEST`,
|
|
24
|
+
`CHUNK_COMMIT_BATCH`).
|
|
25
|
+
- Removed `MEMVID_DIR`, `MASTER_MV2`, `PER_CLIENT_DIR` config constants.
|
|
26
|
+
Replaced by `SEARCH_DB`.
|
|
27
|
+
|
|
28
|
+
### Added
|
|
29
|
+
|
|
30
|
+
- **`mneme reindex`** command — rebuild search index from wiki pages.
|
|
31
|
+
- **`ingest-dir --recursive` / `-r`** — recurse into subdirectories.
|
|
32
|
+
- **`ingest-dir --preserve-structure`** — mirror source directory structure
|
|
33
|
+
in wiki subdirectories (avoids dedup collisions between same-basename files
|
|
34
|
+
in different directories).
|
|
35
|
+
- **`ingest-csv --delimiter`** flag with auto-detection via `csv.Sniffer`.
|
|
36
|
+
- **`.xlsx` ingest support** — install with `pip install "mneme-cli[xlsx]"`.
|
|
37
|
+
Sheets are rendered as markdown tables.
|
|
38
|
+
- **`mneme trace matrix --csv [--out FILE]`** — export the trace matrix as
|
|
39
|
+
CSV for QMS audits and DHF inclusion.
|
|
40
|
+
- **`graph.json` auto-populated** during ingest from wiki page wikilinks
|
|
41
|
+
and `related` frontmatter.
|
|
42
|
+
- **`stats` relationship count** now includes traceability.json links, not
|
|
43
|
+
just graph.json edges.
|
|
44
|
+
- **log.md rotation** — entries beyond `LOG_MAX_ENTRIES` (default 500) are
|
|
45
|
+
archived to `log-archive-YYYY-MM-DD.md`.
|
|
46
|
+
|
|
47
|
+
### Fixed
|
|
48
|
+
|
|
49
|
+
- `mneme status` crash (UnboundLocalError on `log_content`).
|
|
50
|
+
- CSV ingest crash on `None` cells (`row.get()` returning None).
|
|
51
|
+
- Duplicate ingest detection now uses full source path, not just filename
|
|
52
|
+
(two `INSTRUCTIONS.md` files in different directories now both ingest).
|
|
53
|
+
|
|
54
|
+
### Removed
|
|
55
|
+
|
|
56
|
+
- `memvid-sdk` dependency.
|
|
57
|
+
- `MNEME_NO_MEMVID` env var (no longer needed — FTS5 is always available).
|
|
58
|
+
- Chunking logic (`chunk_body`, `MAX_CHUNK_SIZE`, frame management).
|
|
59
|
+
- Tantivy-reserved-word query sanitizer (FTS5 has different syntax).
|
|
60
|
+
|
|
7
61
|
## [Unreleased]
|
|
8
62
|
|
|
9
63
|
### Added
|
|
@@ -17,13 +17,14 @@
|
|
|
17
17
|
| `mneme new` | Scaffold a new workspace from the bundled template (preferred over `init`) |
|
|
18
18
|
| `mneme init` | Scaffold a workspace in cwd (legacy) |
|
|
19
19
|
| `mneme --workspace <dir>` / `MNEME_HOME=<dir>` | Run any command against a specific workspace |
|
|
20
|
-
| `mneme ingest` | Atomic ingest: source -> wiki +
|
|
20
|
+
| `mneme ingest` | Atomic ingest: source -> wiki + FTS5 index + schema |
|
|
21
21
|
| `mneme resync` | Diff-aware re-ingest: 3-way merge (baseline / wiki / fresh ingest) via `git merge-file` |
|
|
22
22
|
| `mneme resync-resolve` | Mark a conflicted resync page as resolved after editing out markers |
|
|
23
23
|
| `mneme ingest-dir` | Batch ingest all files from a directory |
|
|
24
24
|
| `mneme search` | Dual-layer search with `--client` scoping |
|
|
25
25
|
| `mneme lint` | Health check: orphan pages, dead links, stale pages, citations, schema drift, coverage |
|
|
26
|
-
| `mneme sync` | Sync wiki pages to
|
|
26
|
+
| `mneme sync` | Sync wiki pages to FTS5 search index |
|
|
27
|
+
| `mneme reindex` | Rebuild search index from wiki pages |
|
|
27
28
|
| `mneme drift` | Detect layer desynchronization |
|
|
28
29
|
| `mneme stats` | Health overview |
|
|
29
30
|
| `mneme repair` | Fix corrupted archives and schema |
|
|
@@ -31,6 +32,8 @@
|
|
|
31
32
|
| `mneme recent` | Show last N activity log entries |
|
|
32
33
|
| `mneme tags list` | List all tags with page counts |
|
|
33
34
|
| `mneme tags merge` | Merge one tag into another across all pages |
|
|
35
|
+
| `mneme tags suggest <page>` | Build a *tag packet* for an LLM agent (page content + taxonomy + prompt) |
|
|
36
|
+
| `mneme tags apply <page> --add t1,t2 --remove t3` | Atomic tag update: rewrites frontmatter, updates schema/tags.json, re-syncs FTS5 index |
|
|
34
37
|
| `mneme diff` | Git-aware diff for a wiki page |
|
|
35
38
|
| `mneme snapshot` | Versioned zip archive of a client + git tag |
|
|
36
39
|
| `mneme dedupe` | Detect near-duplicate wiki pages |
|
|
@@ -54,7 +57,7 @@
|
|
|
54
57
|
| `mneme scan-repo` | Scan code repo, compare against QMS docs, find gaps |
|
|
55
58
|
| `mneme tornado` | Inbox processor: auto-detect type/client, ingest, archive to sources |
|
|
56
59
|
| `mneme ingest-csv` | CSV ingest: one row = one wiki page, with column-to-frontmatter mapping and auto trace links |
|
|
57
|
-
| `mneme demo clean` | Remove all demo content: demo-retail client, demo/ folder, schema entries,
|
|
60
|
+
| `mneme demo clean` | Remove all demo content: demo-retail client, demo/ folder, schema entries, search index entries, index/log entries |
|
|
58
61
|
|
|
59
62
|
### Web UI (localhost:3141)
|
|
60
63
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mneme-cli
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.5.0
|
|
4
4
|
Summary: Mnemosyne - CLI tool that turns documents into a searchable second brain. Ingest once, query forever.
|
|
5
5
|
Author-email: Tolis Moustaklis <apostolos.moustaklis@gmail.com>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -9,7 +9,7 @@ Project-URL: Repository, https://github.com/tolism/mneme
|
|
|
9
9
|
Project-URL: Issues, https://github.com/tolism/mneme/issues
|
|
10
10
|
Project-URL: Documentation, https://github.com/tolism/mneme#readme
|
|
11
11
|
Project-URL: Changelog, https://github.com/tolism/mneme/blob/main/CHANGELOG.md
|
|
12
|
-
Keywords: knowledge-management,second-brain,cli,wiki,
|
|
12
|
+
Keywords: knowledge-management,second-brain,cli,wiki,sqlite,fts5,llm,qms,obsidian,traceability
|
|
13
13
|
Classifier: Development Status :: 3 - Alpha
|
|
14
14
|
Classifier: Environment :: Console
|
|
15
15
|
Classifier: Intended Audience :: Developers
|
|
@@ -28,12 +28,14 @@ Classifier: Topic :: Text Processing :: Markup :: Markdown
|
|
|
28
28
|
Requires-Python: >=3.9
|
|
29
29
|
Description-Content-Type: text/markdown
|
|
30
30
|
License-File: LICENSE
|
|
31
|
-
Requires-Dist: memvid-sdk>=2.0.0
|
|
32
31
|
Requires-Dist: portalocker>=2.0.0
|
|
33
32
|
Provides-Extra: pdf
|
|
34
33
|
Requires-Dist: pymupdf>=1.23.0; extra == "pdf"
|
|
34
|
+
Provides-Extra: xlsx
|
|
35
|
+
Requires-Dist: openpyxl>=3.1.0; extra == "xlsx"
|
|
35
36
|
Provides-Extra: all
|
|
36
37
|
Requires-Dist: pymupdf>=1.23.0; extra == "all"
|
|
38
|
+
Requires-Dist: openpyxl>=3.1.0; extra == "all"
|
|
37
39
|
Provides-Extra: release
|
|
38
40
|
Requires-Dist: build>=1.0.0; extra == "release"
|
|
39
41
|
Requires-Dist: twine>=5.0.0; extra == "release"
|
|
@@ -163,15 +165,18 @@ One installed CLI serves many projects — each workspace is just a directory.
|
|
|
163
165
|
| `mneme search "<query>"` | Search across all layers |
|
|
164
166
|
| `mneme draft --doc-type <t> --section <s> --client <c>` | Build a *write packet* for an LLM agent to produce one section |
|
|
165
167
|
| `mneme validate writing-style <page>` | Build a *review packet* for an LLM agent to grade a page |
|
|
168
|
+
| `mneme tags suggest <page>` | Build a *tag packet* for an LLM agent to choose tags |
|
|
169
|
+
| `mneme tags apply <page> --add t1,t2 --remove t3` | Atomic tag update (frontmatter + schema + search index) |
|
|
166
170
|
| `mneme agent plan --goal "..." --doc-type <t> --client <c>` | Generate a deterministic TODO plan from the active profile |
|
|
167
171
|
| `mneme agent next-task` | Return the next ready task in the active plan |
|
|
168
172
|
| `mneme agent task-done <id>` | Mark a task as done |
|
|
169
|
-
| `mneme sync` | Sync wiki to
|
|
173
|
+
| `mneme sync` | Sync wiki pages to FTS5 search index |
|
|
174
|
+
| `mneme reindex` | Rebuild search index from wiki pages |
|
|
170
175
|
| `mneme drift` | Detect layer desynchronization |
|
|
171
176
|
| `mneme stats` | Health overview |
|
|
172
177
|
| `mneme repair` | Fix corrupted archives |
|
|
173
178
|
|
|
174
|
-
**Formats:** `.md`, `.txt`, `.pdf`
|
|
179
|
+
**Formats:** `.md`, `.txt`, `.pdf`, `.xlsx` (with `pip install "mneme-cli[xlsx]"`)
|
|
175
180
|
|
|
176
181
|
---
|
|
177
182
|
|
|
@@ -206,6 +211,121 @@ Mneme generates the plan deterministically from the active profile's section_not
|
|
|
206
211
|
|
|
207
212
|
---
|
|
208
213
|
|
|
214
|
+
## End-to-end example: from raw documents to a tagged, searchable, validated knowledge base
|
|
215
|
+
|
|
216
|
+
A realistic walkthrough showing how the human, the CLI, and the LLM agent collaborate. Suppose you're building a knowledge base for **Parkiwatch**, a medical device for Parkinson's monitoring.
|
|
217
|
+
|
|
218
|
+
### Step 1 — Scaffold a workspace (human, one-time)
|
|
219
|
+
|
|
220
|
+
```bash
|
|
221
|
+
mneme new ~/projects/parkiwatch --name Parkiwatch --client parkiwatch --profile eu-mdr
|
|
222
|
+
cd ~/projects/parkiwatch
|
|
223
|
+
```
|
|
224
|
+
|
|
225
|
+
Creates the workspace tree, sets the EU MDR writing-style profile, and initializes empty schema files.
|
|
226
|
+
|
|
227
|
+
### Step 2 — Ingest source material (human)
|
|
228
|
+
|
|
229
|
+
```bash
|
|
230
|
+
# Drop a folder of source documents into inbox/, then bulk-process
|
|
231
|
+
cp -r ~/Downloads/parkinson-research/* inbox/
|
|
232
|
+
mneme tornado --client parkiwatch
|
|
233
|
+
|
|
234
|
+
# Or ingest individual files
|
|
235
|
+
mneme ingest research-paper.pdf parkiwatch
|
|
236
|
+
mneme ingest-csv risk-register.csv parkiwatch --mapping risk-register
|
|
237
|
+
mneme ingest spec-table.xlsx parkiwatch # .xlsx renders sheets as markdown tables
|
|
238
|
+
mneme ingest-dir docs/ parkiwatch --recursive # walk subdirectories
|
|
239
|
+
```
|
|
240
|
+
|
|
241
|
+
What happens per ingest: source file → wiki page in `wiki/parkiwatch/` → frontmatter with auto-extracted entities → entry in `index.md` → row in the FTS5 search DB → log entry.
|
|
242
|
+
|
|
243
|
+
### Step 3 — Tag the new pages (LLM agent)
|
|
244
|
+
|
|
245
|
+
The new pages have only the auto-applied `parkiwatch` client tag. The agent now adds meaningful tags:
|
|
246
|
+
|
|
247
|
+
```bash
|
|
248
|
+
# For each new page, the agent runs:
|
|
249
|
+
mneme tags suggest parkiwatch/research-paper > /tmp/packet.md
|
|
250
|
+
```
|
|
251
|
+
|
|
252
|
+
The packet contains the page body, the current tag taxonomy (every tag in the workspace + usage counts), and a ready-to-paste prompt. **The LLM reads the packet** — it understands the content and decides on tags, preferring existing taxonomy entries when they fit. The LLM's response is JSON:
|
|
253
|
+
|
|
254
|
+
```json
|
|
255
|
+
{"tags": ["clinical-trial", "iso-13485"], "new_tags": ["bradykinesia-detection"]}
|
|
256
|
+
```
|
|
257
|
+
|
|
258
|
+
The agent then runs:
|
|
259
|
+
|
|
260
|
+
```bash
|
|
261
|
+
mneme tags apply parkiwatch/research-paper \
|
|
262
|
+
--add clinical-trial,iso-13485,bradykinesia-detection
|
|
263
|
+
```
|
|
264
|
+
|
|
265
|
+
Atomic operation: rewrites the wiki page frontmatter, updates `schema/tags.json`, re-indexes the page in FTS5 (so search picks up the new tags immediately), appends a log entry. **Repeat for every page** — the taxonomy grows, and subsequent pages tend to reuse existing tags (consistency).
|
|
266
|
+
|
|
267
|
+
### Step 4 — Search the knowledge base (anyone)
|
|
268
|
+
|
|
269
|
+
```bash
|
|
270
|
+
mneme search "bradykinesia" # BM25 + Porter stemming
|
|
271
|
+
mneme search "clinical evaluation" --client parkiwatch # client-scoped
|
|
272
|
+
```
|
|
273
|
+
|
|
274
|
+
Sub-millisecond. Returns the page title, snippet (with `<b>highlights</b>`), tags, and BM25 score.
|
|
275
|
+
|
|
276
|
+
### Step 5 — Produce a regulatory deliverable (LLM agent driving the agent loop)
|
|
277
|
+
|
|
278
|
+
```bash
|
|
279
|
+
# Generate a deterministic plan from the active profile
|
|
280
|
+
mneme agent plan --goal "produce a Design Validation Report" \
|
|
281
|
+
--doc-type design-validation-report \
|
|
282
|
+
--client parkiwatch
|
|
283
|
+
# → 15 tasks: 11 section drafts + assemble + harmonize + review + submission-check
|
|
284
|
+
|
|
285
|
+
# Walk the plan
|
|
286
|
+
mneme agent next-task
|
|
287
|
+
# → Task: section-purpose-and-scope
|
|
288
|
+
# next_command: mneme draft --doc-type design-validation-report \
|
|
289
|
+
# --section purpose-and-scope --client parkiwatch
|
|
290
|
+
|
|
291
|
+
mneme draft --doc-type design-validation-report \
|
|
292
|
+
--section purpose-and-scope --client parkiwatch \
|
|
293
|
+
--query "purpose scope intended use" \
|
|
294
|
+
--out /tmp/write-packet.md
|
|
295
|
+
|
|
296
|
+
# The LLM reads /tmp/write-packet.md (which includes wiki search hits as evidence,
|
|
297
|
+
# the profile's writing-style rules, and a write prompt) and produces the section.
|
|
298
|
+
# The agent writes the section to wiki/parkiwatch/design-validation-report.md.
|
|
299
|
+
|
|
300
|
+
mneme agent task-done section-purpose-and-scope
|
|
301
|
+
|
|
302
|
+
# ... repeat for each section ...
|
|
303
|
+
|
|
304
|
+
# After all sections drafted:
|
|
305
|
+
mneme harmonize --client parkiwatch --fix # mechanical vocabulary swap
|
|
306
|
+
mneme validate writing-style parkiwatch/design-validation-report > /tmp/review.md
|
|
307
|
+
# The LLM reads /tmp/review.md, critiques every section, applies fixes in place
|
|
308
|
+
mneme agent task-done review-page
|
|
309
|
+
|
|
310
|
+
# Submission readiness
|
|
311
|
+
mneme validate consistency --client parkiwatch # cross-doc version checks
|
|
312
|
+
mneme trace gaps parkiwatch # find broken trace chains
|
|
313
|
+
mneme trace matrix parkiwatch --csv --out trace-matrix.csv # for the DHF
|
|
314
|
+
mneme snapshot parkiwatch # versioned audit zip
|
|
315
|
+
```
|
|
316
|
+
|
|
317
|
+
### Who does what
|
|
318
|
+
|
|
319
|
+
| Layer | Responsibility |
|
|
320
|
+
|---|---|
|
|
321
|
+
| **Human** | Drops sources, runs commands, reviews diffs, ships the deliverable |
|
|
322
|
+
| **mneme CLI** | Deterministic infrastructure: parses files, builds packets, indexes, traces, harmonizes vocabulary, generates plans, atomic state updates |
|
|
323
|
+
| **LLM agent** | All reasoning: classifying entities, choosing tags, drafting prose, grading writing style, deciding when a chain is complete |
|
|
324
|
+
|
|
325
|
+
mneme never calls an LLM. The LLM never bypasses mneme's atomic operations. They meet at the packet boundary.
|
|
326
|
+
|
|
327
|
+
---
|
|
328
|
+
|
|
209
329
|
## How It Works
|
|
210
330
|
|
|
211
331
|
```
|
|
@@ -218,9 +338,9 @@ Mneme generates the plan deterministically from the active profile's section_not
|
|
|
218
338
|
| Frontmatter, citations, [[wikilinks]]
|
|
219
339
|
| You read and browse here
|
|
220
340
|
|
|
|
221
|
-
+--->
|
|
222
|
-
|
|
|
223
|
-
|
|
|
341
|
+
+---> Search Index (SQLite FTS5)
|
|
342
|
+
| BM25 ranking, Porter stemming
|
|
343
|
+
| Sub-millisecond queries, zero dependencies
|
|
224
344
|
|
|
|
225
345
|
+---> Schema Layer (JSON)
|
|
226
346
|
entities.json - people, companies, products
|
|
@@ -228,9 +348,9 @@ Mneme generates the plan deterministically from the active profile's section_not
|
|
|
228
348
|
tags.json - taxonomy
|
|
229
349
|
```
|
|
230
350
|
|
|
231
|
-
Every `mneme ingest` writes
|
|
351
|
+
Every `mneme ingest` writes the wiki page and updates the search index atomically. `mneme drift` catches desync. `mneme reindex` rebuilds the index from wiki pages.
|
|
232
352
|
|
|
233
|
-
**
|
|
353
|
+
**Zero external dependencies for search.** SQLite FTS5 is built into Python's stdlib — no install, no API key, no capacity limit.
|
|
234
354
|
|
|
235
355
|
---
|
|
236
356
|
|
|
@@ -405,14 +525,15 @@ See `EXAMPLES.md` Example 13 for a full walkthrough with a real Parkiwatch scena
|
|
|
405
525
|
|
|
406
526
|
## When You Need This
|
|
407
527
|
|
|
408
|
-
| Scale |
|
|
409
|
-
|
|
410
|
-
| 5 docs |
|
|
411
|
-
| 50 docs |
|
|
412
|
-
| 500 docs |
|
|
413
|
-
| 5,000 docs |
|
|
528
|
+
| Scale | Search performance |
|
|
529
|
+
|---|---|
|
|
530
|
+
| 5 docs | Sub-millisecond |
|
|
531
|
+
| 50 docs | Sub-millisecond |
|
|
532
|
+
| 500 docs | Sub-millisecond, BM25 ranked |
|
|
533
|
+
| 5,000 docs | A few ms, still ranked by relevance |
|
|
534
|
+
| 50,000 docs | Tens of ms |
|
|
414
535
|
|
|
415
|
-
|
|
536
|
+
SQLite FTS5 scales transparently. No tuning, no capacity limits.
|
|
416
537
|
|
|
417
538
|
---
|
|
418
539
|
|
|
@@ -423,7 +544,7 @@ mneme/
|
|
|
423
544
|
sources/ Raw documents (immutable, never modified)
|
|
424
545
|
wiki/ Markdown knowledge pages (Obsidian-compatible)
|
|
425
546
|
schema/ entities.json, graph.json, tags.json
|
|
426
|
-
|
|
547
|
+
search.db SQLite FTS5 search index
|
|
427
548
|
core.py Engine (ingest, search, sync, drift, repair)
|
|
428
549
|
config.py Configuration
|
|
429
550
|
server.py Web dashboard
|
|
@@ -489,7 +610,7 @@ password = pypi-AgENd... # from https://test.pypi.org/manage/account/to
|
|
|
489
610
|
This project builds on two foundational ideas:
|
|
490
611
|
|
|
491
612
|
- **LLM Wiki pattern** by [Andrej Karpathy](https://gist.github.com/karpathy/442a6bf555914893e9891c11519de94f) -- the insight that LLMs should build and maintain a persistent, compounding wiki instead of re-deriving answers from raw documents on every query
|
|
492
|
-
- **
|
|
613
|
+
- **SQLite FTS5** -- the world's most-deployed embedded database, with built-in BM25 full-text search
|
|
493
614
|
- **Original implementation** -- [tashisleepy/knowledge-engine](https://github.com/tashisleepy/knowledge-engine) -- the first version that fused both patterns into a dual-layer bridge
|
|
494
615
|
|
|
495
616
|
---
|
|
@@ -122,15 +122,18 @@ One installed CLI serves many projects — each workspace is just a directory.
|
|
|
122
122
|
| `mneme search "<query>"` | Search across all layers |
|
|
123
123
|
| `mneme draft --doc-type <t> --section <s> --client <c>` | Build a *write packet* for an LLM agent to produce one section |
|
|
124
124
|
| `mneme validate writing-style <page>` | Build a *review packet* for an LLM agent to grade a page |
|
|
125
|
+
| `mneme tags suggest <page>` | Build a *tag packet* for an LLM agent to choose tags |
|
|
126
|
+
| `mneme tags apply <page> --add t1,t2 --remove t3` | Atomic tag update (frontmatter + schema + search index) |
|
|
125
127
|
| `mneme agent plan --goal "..." --doc-type <t> --client <c>` | Generate a deterministic TODO plan from the active profile |
|
|
126
128
|
| `mneme agent next-task` | Return the next ready task in the active plan |
|
|
127
129
|
| `mneme agent task-done <id>` | Mark a task as done |
|
|
128
|
-
| `mneme sync` | Sync wiki to
|
|
130
|
+
| `mneme sync` | Sync wiki pages to FTS5 search index |
|
|
131
|
+
| `mneme reindex` | Rebuild search index from wiki pages |
|
|
129
132
|
| `mneme drift` | Detect layer desynchronization |
|
|
130
133
|
| `mneme stats` | Health overview |
|
|
131
134
|
| `mneme repair` | Fix corrupted archives |
|
|
132
135
|
|
|
133
|
-
**Formats:** `.md`, `.txt`, `.pdf`
|
|
136
|
+
**Formats:** `.md`, `.txt`, `.pdf`, `.xlsx` (with `pip install "mneme-cli[xlsx]"`)
|
|
134
137
|
|
|
135
138
|
---
|
|
136
139
|
|
|
@@ -165,6 +168,121 @@ Mneme generates the plan deterministically from the active profile's section_not
|
|
|
165
168
|
|
|
166
169
|
---
|
|
167
170
|
|
|
171
|
+
## End-to-end example: from raw documents to a tagged, searchable, validated knowledge base
|
|
172
|
+
|
|
173
|
+
A realistic walkthrough showing how the human, the CLI, and the LLM agent collaborate. Suppose you're building a knowledge base for **Parkiwatch**, a medical device for Parkinson's monitoring.
|
|
174
|
+
|
|
175
|
+
### Step 1 — Scaffold a workspace (human, one-time)
|
|
176
|
+
|
|
177
|
+
```bash
|
|
178
|
+
mneme new ~/projects/parkiwatch --name Parkiwatch --client parkiwatch --profile eu-mdr
|
|
179
|
+
cd ~/projects/parkiwatch
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
Creates the workspace tree, sets the EU MDR writing-style profile, and initializes empty schema files.
|
|
183
|
+
|
|
184
|
+
### Step 2 — Ingest source material (human)
|
|
185
|
+
|
|
186
|
+
```bash
|
|
187
|
+
# Drop a folder of source documents into inbox/, then bulk-process
|
|
188
|
+
cp -r ~/Downloads/parkinson-research/* inbox/
|
|
189
|
+
mneme tornado --client parkiwatch
|
|
190
|
+
|
|
191
|
+
# Or ingest individual files
|
|
192
|
+
mneme ingest research-paper.pdf parkiwatch
|
|
193
|
+
mneme ingest-csv risk-register.csv parkiwatch --mapping risk-register
|
|
194
|
+
mneme ingest spec-table.xlsx parkiwatch # .xlsx renders sheets as markdown tables
|
|
195
|
+
mneme ingest-dir docs/ parkiwatch --recursive # walk subdirectories
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
What happens per ingest: source file → wiki page in `wiki/parkiwatch/` → frontmatter with auto-extracted entities → entry in `index.md` → row in the FTS5 search DB → log entry.
|
|
199
|
+
|
|
200
|
+
### Step 3 — Tag the new pages (LLM agent)
|
|
201
|
+
|
|
202
|
+
The new pages have only the auto-applied `parkiwatch` client tag. The agent now adds meaningful tags:
|
|
203
|
+
|
|
204
|
+
```bash
|
|
205
|
+
# For each new page, the agent runs:
|
|
206
|
+
mneme tags suggest parkiwatch/research-paper > /tmp/packet.md
|
|
207
|
+
```
|
|
208
|
+
|
|
209
|
+
The packet contains the page body, the current tag taxonomy (every tag in the workspace + usage counts), and a ready-to-paste prompt. **The LLM reads the packet** — it understands the content and decides on tags, preferring existing taxonomy entries when they fit. The LLM's response is JSON:
|
|
210
|
+
|
|
211
|
+
```json
|
|
212
|
+
{"tags": ["clinical-trial", "iso-13485"], "new_tags": ["bradykinesia-detection"]}
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
The agent then runs:
|
|
216
|
+
|
|
217
|
+
```bash
|
|
218
|
+
mneme tags apply parkiwatch/research-paper \
|
|
219
|
+
--add clinical-trial,iso-13485,bradykinesia-detection
|
|
220
|
+
```
|
|
221
|
+
|
|
222
|
+
Atomic operation: rewrites the wiki page frontmatter, updates `schema/tags.json`, re-indexes the page in FTS5 (so search picks up the new tags immediately), appends a log entry. **Repeat for every page** — the taxonomy grows, and subsequent pages tend to reuse existing tags (consistency).
|
|
223
|
+
|
|
224
|
+
### Step 4 — Search the knowledge base (anyone)
|
|
225
|
+
|
|
226
|
+
```bash
|
|
227
|
+
mneme search "bradykinesia" # BM25 + Porter stemming
|
|
228
|
+
mneme search "clinical evaluation" --client parkiwatch # client-scoped
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
Sub-millisecond. Returns the page title, snippet (with `<b>highlights</b>`), tags, and BM25 score.
|
|
232
|
+
|
|
233
|
+
### Step 5 — Produce a regulatory deliverable (LLM agent driving the agent loop)
|
|
234
|
+
|
|
235
|
+
```bash
|
|
236
|
+
# Generate a deterministic plan from the active profile
|
|
237
|
+
mneme agent plan --goal "produce a Design Validation Report" \
|
|
238
|
+
--doc-type design-validation-report \
|
|
239
|
+
--client parkiwatch
|
|
240
|
+
# → 15 tasks: 11 section drafts + assemble + harmonize + review + submission-check
|
|
241
|
+
|
|
242
|
+
# Walk the plan
|
|
243
|
+
mneme agent next-task
|
|
244
|
+
# → Task: section-purpose-and-scope
|
|
245
|
+
# next_command: mneme draft --doc-type design-validation-report \
|
|
246
|
+
# --section purpose-and-scope --client parkiwatch
|
|
247
|
+
|
|
248
|
+
mneme draft --doc-type design-validation-report \
|
|
249
|
+
--section purpose-and-scope --client parkiwatch \
|
|
250
|
+
--query "purpose scope intended use" \
|
|
251
|
+
--out /tmp/write-packet.md
|
|
252
|
+
|
|
253
|
+
# The LLM reads /tmp/write-packet.md (which includes wiki search hits as evidence,
|
|
254
|
+
# the profile's writing-style rules, and a write prompt) and produces the section.
|
|
255
|
+
# The agent writes the section to wiki/parkiwatch/design-validation-report.md.
|
|
256
|
+
|
|
257
|
+
mneme agent task-done section-purpose-and-scope
|
|
258
|
+
|
|
259
|
+
# ... repeat for each section ...
|
|
260
|
+
|
|
261
|
+
# After all sections drafted:
|
|
262
|
+
mneme harmonize --client parkiwatch --fix # mechanical vocabulary swap
|
|
263
|
+
mneme validate writing-style parkiwatch/design-validation-report > /tmp/review.md
|
|
264
|
+
# The LLM reads /tmp/review.md, critiques every section, applies fixes in place
|
|
265
|
+
mneme agent task-done review-page
|
|
266
|
+
|
|
267
|
+
# Submission readiness
|
|
268
|
+
mneme validate consistency --client parkiwatch # cross-doc version checks
|
|
269
|
+
mneme trace gaps parkiwatch # find broken trace chains
|
|
270
|
+
mneme trace matrix parkiwatch --csv --out trace-matrix.csv # for the DHF
|
|
271
|
+
mneme snapshot parkiwatch # versioned audit zip
|
|
272
|
+
```
|
|
273
|
+
|
|
274
|
+
### Who does what
|
|
275
|
+
|
|
276
|
+
| Layer | Responsibility |
|
|
277
|
+
|---|---|
|
|
278
|
+
| **Human** | Drops sources, runs commands, reviews diffs, ships the deliverable |
|
|
279
|
+
| **mneme CLI** | Deterministic infrastructure: parses files, builds packets, indexes, traces, harmonizes vocabulary, generates plans, atomic state updates |
|
|
280
|
+
| **LLM agent** | All reasoning: classifying entities, choosing tags, drafting prose, grading writing style, deciding when a chain is complete |
|
|
281
|
+
|
|
282
|
+
mneme never calls an LLM. The LLM never bypasses mneme's atomic operations. They meet at the packet boundary.
|
|
283
|
+
|
|
284
|
+
---
|
|
285
|
+
|
|
168
286
|
## How It Works
|
|
169
287
|
|
|
170
288
|
```
|
|
@@ -177,9 +295,9 @@ Mneme generates the plan deterministically from the active profile's section_not
|
|
|
177
295
|
| Frontmatter, citations, [[wikilinks]]
|
|
178
296
|
| You read and browse here
|
|
179
297
|
|
|
|
180
|
-
+--->
|
|
181
|
-
|
|
|
182
|
-
|
|
|
298
|
+
+---> Search Index (SQLite FTS5)
|
|
299
|
+
| BM25 ranking, Porter stemming
|
|
300
|
+
| Sub-millisecond queries, zero dependencies
|
|
183
301
|
|
|
|
184
302
|
+---> Schema Layer (JSON)
|
|
185
303
|
entities.json - people, companies, products
|
|
@@ -187,9 +305,9 @@ Mneme generates the plan deterministically from the active profile's section_not
|
|
|
187
305
|
tags.json - taxonomy
|
|
188
306
|
```
|
|
189
307
|
|
|
190
|
-
Every `mneme ingest` writes
|
|
308
|
+
Every `mneme ingest` writes the wiki page and updates the search index atomically. `mneme drift` catches desync. `mneme reindex` rebuilds the index from wiki pages.
|
|
191
309
|
|
|
192
|
-
**
|
|
310
|
+
**Zero external dependencies for search.** SQLite FTS5 is built into Python's stdlib — no install, no API key, no capacity limit.
|
|
193
311
|
|
|
194
312
|
---
|
|
195
313
|
|
|
@@ -364,14 +482,15 @@ See `EXAMPLES.md` Example 13 for a full walkthrough with a real Parkiwatch scena
|
|
|
364
482
|
|
|
365
483
|
## When You Need This
|
|
366
484
|
|
|
367
|
-
| Scale |
|
|
368
|
-
|
|
369
|
-
| 5 docs |
|
|
370
|
-
| 50 docs |
|
|
371
|
-
| 500 docs |
|
|
372
|
-
| 5,000 docs |
|
|
485
|
+
| Scale | Search performance |
|
|
486
|
+
|---|---|
|
|
487
|
+
| 5 docs | Sub-millisecond |
|
|
488
|
+
| 50 docs | Sub-millisecond |
|
|
489
|
+
| 500 docs | Sub-millisecond, BM25 ranked |
|
|
490
|
+
| 5,000 docs | A few ms, still ranked by relevance |
|
|
491
|
+
| 50,000 docs | Tens of ms |
|
|
373
492
|
|
|
374
|
-
|
|
493
|
+
SQLite FTS5 scales transparently. No tuning, no capacity limits.
|
|
375
494
|
|
|
376
495
|
---
|
|
377
496
|
|
|
@@ -382,7 +501,7 @@ mneme/
|
|
|
382
501
|
sources/ Raw documents (immutable, never modified)
|
|
383
502
|
wiki/ Markdown knowledge pages (Obsidian-compatible)
|
|
384
503
|
schema/ entities.json, graph.json, tags.json
|
|
385
|
-
|
|
504
|
+
search.db SQLite FTS5 search index
|
|
386
505
|
core.py Engine (ingest, search, sync, drift, repair)
|
|
387
506
|
config.py Configuration
|
|
388
507
|
server.py Web dashboard
|
|
@@ -448,7 +567,7 @@ password = pypi-AgENd... # from https://test.pypi.org/manage/account/to
|
|
|
448
567
|
This project builds on two foundational ideas:
|
|
449
568
|
|
|
450
569
|
- **LLM Wiki pattern** by [Andrej Karpathy](https://gist.github.com/karpathy/442a6bf555914893e9891c11519de94f) -- the insight that LLMs should build and maintain a persistent, compounding wiki instead of re-deriving answers from raw documents on every query
|
|
451
|
-
- **
|
|
570
|
+
- **SQLite FTS5** -- the world's most-deployed embedded database, with built-in BM25 full-text search
|
|
452
571
|
- **Original implementation** -- [tashisleepy/knowledge-engine](https://github.com/tashisleepy/knowledge-engine) -- the first version that fused both patterns into a dual-layer bridge
|
|
453
572
|
|
|
454
573
|
---
|
|
@@ -8,7 +8,7 @@ Two distinct roots:
|
|
|
8
8
|
from here.
|
|
9
9
|
|
|
10
10
|
* WORKSPACE_DIR - where the user's data lives (wiki/, sources/, schema/,
|
|
11
|
-
|
|
11
|
+
search.db, index.md, log.md). Resolved in this order:
|
|
12
12
|
1. The MNEME_HOME environment variable, if set.
|
|
13
13
|
2. The current working directory.
|
|
14
14
|
|
|
@@ -54,9 +54,7 @@ BASE_DIR = WORKSPACE_DIR
|
|
|
54
54
|
WIKI_DIR = os.path.join(WORKSPACE_DIR, 'wiki')
|
|
55
55
|
SOURCES_DIR = os.path.join(WORKSPACE_DIR, 'sources')
|
|
56
56
|
SCHEMA_DIR = os.path.join(WORKSPACE_DIR, 'schema')
|
|
57
|
-
|
|
58
|
-
MASTER_MV2 = os.path.join(MEMVID_DIR, 'master.mv2')
|
|
59
|
-
PER_CLIENT_DIR = os.path.join(MEMVID_DIR, 'per-client')
|
|
57
|
+
SEARCH_DB = os.path.join(WORKSPACE_DIR, 'search.db')
|
|
60
58
|
INDEX_FILE = os.path.join(WORKSPACE_DIR, 'index.md')
|
|
61
59
|
LOG_FILE = os.path.join(WORKSPACE_DIR, 'log.md')
|
|
62
60
|
TEMPLATES_DIR = os.path.join(WIKI_DIR, '_templates')
|
|
@@ -78,13 +76,8 @@ WORKSPACE_MAPPINGS_DIR = os.path.join(WORKSPACE_PROFILES_DIR, 'mappings')
|
|
|
78
76
|
EXCLUDED_DIRS = ['_templates', '.baselines']
|
|
79
77
|
EXCLUDED_FILES = ['_meta.yaml']
|
|
80
78
|
|
|
81
|
-
#
|
|
82
|
-
|
|
83
|
-
MIN_CHUNK_SIZE = 50 # don't create tiny frames
|
|
84
|
-
|
|
85
|
-
# Ingest limits to prevent hangs on huge files
|
|
86
|
-
MAX_CHUNKS_PER_INGEST = 200 # hard cap on chunks sent to memvid per page
|
|
87
|
-
CHUNK_COMMIT_BATCH = 50 # commit to memvid every N chunks
|
|
79
|
+
# Log rotation
|
|
80
|
+
LOG_MAX_ENTRIES = 500 # archive after this many entries
|
|
88
81
|
|
|
89
82
|
# Entity extraction stopwords
|
|
90
83
|
ENTITY_STOPWORDS = {
|