mnemofish 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mnemo/__init__.py +10 -0
- mnemo/cli.py +236 -0
- mnemo/config.py +32 -0
- mnemo/daily.py +29 -0
- mnemo/embed.py +50 -0
- mnemo/index.py +205 -0
- mnemo/note.py +75 -0
- mnemo/portability.py +126 -0
- mnemo/recall.py +71 -0
- mnemo/search.py +78 -0
- mnemo/server.py +94 -0
- mnemo/vault.py +68 -0
- mnemo/writer.py +128 -0
- mnemofish-0.2.0.dist-info/METADATA +243 -0
- mnemofish-0.2.0.dist-info/RECORD +18 -0
- mnemofish-0.2.0.dist-info/WHEEL +4 -0
- mnemofish-0.2.0.dist-info/entry_points.txt +3 -0
- mnemofish-0.2.0.dist-info/licenses/LICENSE +21 -0
mnemo/__init__.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
"""mnemo — persistent, portable, cross-AI memory over a markdown vault."""
|
|
2
|
+
|
|
3
|
+
__version__ = "0.2.0"
|
|
4
|
+
|
|
5
|
+
from .config import Config
|
|
6
|
+
from .index import Index
|
|
7
|
+
from .note import Note
|
|
8
|
+
from .search import Search
|
|
9
|
+
|
|
10
|
+
__all__ = ["Config", "Index", "Note", "Search", "__version__"]
|
mnemo/cli.py
ADDED
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
"""mnemo CLI.
|
|
2
|
+
|
|
3
|
+
F1: reindex / search / get
|
|
4
|
+
F2: recall (push, for the SessionStart hook) / write (with dedup)
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import argparse
|
|
10
|
+
import json
|
|
11
|
+
import os
|
|
12
|
+
import sys
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
|
|
15
|
+
from .config import Config
|
|
16
|
+
from .index import Index
|
|
17
|
+
from .recall import build_recall
|
|
18
|
+
from .search import Search
|
|
19
|
+
from .vault import detect_project
|
|
20
|
+
from .writer import write_note
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _csv(value: str) -> list[str]:
|
|
24
|
+
return [v.strip() for v in (value or "").split(",") if v.strip()]
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _dest_from_url(url: str) -> str:
|
|
28
|
+
name = url.rstrip("/").split("/")[-1]
|
|
29
|
+
if name.endswith(".git"):
|
|
30
|
+
name = name[:-4]
|
|
31
|
+
return name or "vault"
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _build_parser() -> argparse.ArgumentParser:
|
|
35
|
+
p = argparse.ArgumentParser(
|
|
36
|
+
prog="mnemo", description="Persistent memory over a markdown vault"
|
|
37
|
+
)
|
|
38
|
+
p.add_argument("--vault", help="vault path (default: $MNEMO_VAULT or cwd)")
|
|
39
|
+
sub = p.add_subparsers(dest="cmd", required=True)
|
|
40
|
+
|
|
41
|
+
sp = sub.add_parser("reindex", help="refresh the derived index from the vault")
|
|
42
|
+
sp.add_argument("--full", action="store_true", help="reparse every note")
|
|
43
|
+
|
|
44
|
+
ss = sub.add_parser("search", help="search notes (returns summaries, not bodies)")
|
|
45
|
+
ss.add_argument("query")
|
|
46
|
+
ss.add_argument("--type")
|
|
47
|
+
ss.add_argument("--project")
|
|
48
|
+
ss.add_argument("-k", type=int, default=5)
|
|
49
|
+
ss.add_argument("--json", action="store_true")
|
|
50
|
+
|
|
51
|
+
sg = sub.add_parser("get", help="print a full note by id")
|
|
52
|
+
sg.add_argument("id")
|
|
53
|
+
|
|
54
|
+
sr = sub.add_parser("recall", help="build the session-start recall block")
|
|
55
|
+
sr.add_argument("--project", help="override project (else detected)")
|
|
56
|
+
sr.add_argument("--project-dir", help="dir to detect project from (default: cwd)")
|
|
57
|
+
sr.add_argument("--hook", action="store_true", help="emit SessionStart JSON")
|
|
58
|
+
sr.add_argument("--reindex", action="store_true", help="refresh index first")
|
|
59
|
+
|
|
60
|
+
sw = sub.add_parser("write", help="add or update a note (deduped)")
|
|
61
|
+
sw.add_argument("--type", required=True)
|
|
62
|
+
sw.add_argument("--title", required=True)
|
|
63
|
+
sw.add_argument("--summary", default="")
|
|
64
|
+
sw.add_argument("--body", help="body text; '-' or omitted reads stdin")
|
|
65
|
+
sw.add_argument("--project")
|
|
66
|
+
sw.add_argument("--tags", default="", help="comma-separated")
|
|
67
|
+
sw.add_argument("--links", default="", help="comma-separated ids")
|
|
68
|
+
sw.add_argument("--id")
|
|
69
|
+
|
|
70
|
+
sd = sub.add_parser("daily", help="append an entry to today's daily note")
|
|
71
|
+
sd.add_argument("text", nargs="?", help="entry text; omitted/'-' reads stdin")
|
|
72
|
+
|
|
73
|
+
sub.add_parser("serve", help="run the MCP server over stdio (cross-AI, pull)")
|
|
74
|
+
|
|
75
|
+
si = sub.add_parser("init", help="scaffold the vault as a git repo")
|
|
76
|
+
si.add_argument("--remote", help="git remote URL (use a PRIVATE repo)")
|
|
77
|
+
|
|
78
|
+
sy = sub.add_parser("sync", help="commit + pull --rebase + push the vault")
|
|
79
|
+
sy.add_argument("--message", "-m")
|
|
80
|
+
|
|
81
|
+
sc = sub.add_parser("clone", help="clone a vault on a new machine, then reindex")
|
|
82
|
+
sc.add_argument("url")
|
|
83
|
+
sc.add_argument("dest", nargs="?", help="destination dir (default: repo name)")
|
|
84
|
+
|
|
85
|
+
se = sub.add_parser("export", help="zip the vault markdown (for Drive/transfer)")
|
|
86
|
+
se.add_argument("out", help="output .zip path")
|
|
87
|
+
|
|
88
|
+
sm = sub.add_parser("import", help="unzip an archive into the vault, then reindex")
|
|
89
|
+
sm.add_argument("archive", help="input .zip path")
|
|
90
|
+
|
|
91
|
+
spj = sub.add_parser(
|
|
92
|
+
"project",
|
|
93
|
+
help="show the detected project, or write a .mnemo-project marker",
|
|
94
|
+
)
|
|
95
|
+
spj.add_argument("name", nargs="?", help="set this project name in the current dir")
|
|
96
|
+
spj.add_argument("--dir", default=".", help="directory (default: cwd)")
|
|
97
|
+
return p
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _cmd_recall(args, cfg, idx) -> None:
|
|
101
|
+
if args.reindex:
|
|
102
|
+
idx.reindex(cfg.vault)
|
|
103
|
+
project = args.project or detect_project(args.project_dir or os.getcwd())
|
|
104
|
+
block = build_recall(idx, project)
|
|
105
|
+
if args.hook:
|
|
106
|
+
if block:
|
|
107
|
+
print(
|
|
108
|
+
json.dumps(
|
|
109
|
+
{
|
|
110
|
+
"hookSpecificOutput": {
|
|
111
|
+
"hookEventName": "SessionStart",
|
|
112
|
+
"additionalContext": block,
|
|
113
|
+
}
|
|
114
|
+
},
|
|
115
|
+
ensure_ascii=False,
|
|
116
|
+
)
|
|
117
|
+
)
|
|
118
|
+
else:
|
|
119
|
+
print(block)
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def _cmd_write(args, cfg, idx) -> None:
|
|
123
|
+
body = args.body
|
|
124
|
+
if body in (None, "-"):
|
|
125
|
+
body = "" if sys.stdin.isatty() else sys.stdin.read()
|
|
126
|
+
result = write_note(
|
|
127
|
+
cfg,
|
|
128
|
+
idx,
|
|
129
|
+
type=args.type,
|
|
130
|
+
title=args.title,
|
|
131
|
+
summary=args.summary,
|
|
132
|
+
body=body,
|
|
133
|
+
project=args.project,
|
|
134
|
+
tags=_csv(args.tags),
|
|
135
|
+
links=_csv(args.links),
|
|
136
|
+
id=args.id,
|
|
137
|
+
)
|
|
138
|
+
print(json.dumps(result, ensure_ascii=False))
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def _force_utf8() -> None:
|
|
142
|
+
# Hooks and non-ASCII (Turkish) notes must not crash on Windows cp1252.
|
|
143
|
+
for stream in (sys.stdout, sys.stderr):
|
|
144
|
+
try:
|
|
145
|
+
stream.reconfigure(encoding="utf-8")
|
|
146
|
+
except (AttributeError, ValueError):
|
|
147
|
+
pass
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def main(argv: list[str] | None = None) -> int:
|
|
151
|
+
_force_utf8()
|
|
152
|
+
args = _build_parser().parse_args(argv)
|
|
153
|
+
|
|
154
|
+
if args.cmd == "serve":
|
|
155
|
+
from .server import run as serve
|
|
156
|
+
serve(args.vault)
|
|
157
|
+
return 0
|
|
158
|
+
|
|
159
|
+
if args.cmd == "project":
|
|
160
|
+
from .vault import MARKER, detect_project
|
|
161
|
+
target = Path(args.dir)
|
|
162
|
+
if args.name:
|
|
163
|
+
(target / MARKER).write_text(args.name + "\n", encoding="utf-8")
|
|
164
|
+
print(json.dumps({"marker": str(target / MARKER), "project": args.name}))
|
|
165
|
+
else:
|
|
166
|
+
print(json.dumps({"project": detect_project(target)}))
|
|
167
|
+
return 0
|
|
168
|
+
|
|
169
|
+
if args.cmd in ("init", "sync", "clone", "export", "import"):
|
|
170
|
+
from . import portability as port
|
|
171
|
+
cfg = Config(args.vault)
|
|
172
|
+
if args.cmd == "init":
|
|
173
|
+
if args.remote:
|
|
174
|
+
print(
|
|
175
|
+
"WARNING: use a PRIVATE repo — your notes will be pushed there.",
|
|
176
|
+
file=sys.stderr,
|
|
177
|
+
)
|
|
178
|
+
print(json.dumps(port.init_vault(cfg.vault, args.remote)))
|
|
179
|
+
elif args.cmd == "sync":
|
|
180
|
+
print(json.dumps(port.sync_vault(cfg.vault, args.message)))
|
|
181
|
+
elif args.cmd == "clone":
|
|
182
|
+
dest = args.dest or _dest_from_url(args.url)
|
|
183
|
+
print(json.dumps(port.clone_vault(args.url, dest)))
|
|
184
|
+
elif args.cmd == "export":
|
|
185
|
+
print(json.dumps(port.export_vault(cfg.vault, args.out)))
|
|
186
|
+
elif args.cmd == "import":
|
|
187
|
+
print(json.dumps(port.import_vault(args.archive, cfg.vault)))
|
|
188
|
+
return 0
|
|
189
|
+
|
|
190
|
+
cfg = Config(args.vault)
|
|
191
|
+
# Semantic (embedding) index for search/reindex/write; FTS-only fast path
|
|
192
|
+
# for recall/daily/get so the SessionStart hook never loads the model.
|
|
193
|
+
emb = None
|
|
194
|
+
if args.cmd in ("reindex", "search", "write"):
|
|
195
|
+
from .embed import Embedder
|
|
196
|
+
if Embedder.is_available():
|
|
197
|
+
emb = Embedder()
|
|
198
|
+
idx = Index(cfg.index_path, embedder=emb)
|
|
199
|
+
try:
|
|
200
|
+
if args.cmd == "reindex":
|
|
201
|
+
print(json.dumps(idx.reindex(cfg.vault, full=args.full)))
|
|
202
|
+
elif args.cmd == "search":
|
|
203
|
+
res = Search(idx).search(
|
|
204
|
+
args.query, type=args.type, project=args.project, k=args.k
|
|
205
|
+
)
|
|
206
|
+
if args.json:
|
|
207
|
+
print(json.dumps(res, ensure_ascii=False, indent=2))
|
|
208
|
+
else:
|
|
209
|
+
if not res:
|
|
210
|
+
print("(no matches)")
|
|
211
|
+
for r in res:
|
|
212
|
+
proj = r["project"] or "-"
|
|
213
|
+
print(f"[{r['score']}] {r['title']} ({r['type']}/{proj})")
|
|
214
|
+
if r["summary"]:
|
|
215
|
+
print(f" {r['summary']}")
|
|
216
|
+
print(f" {r['path']}")
|
|
217
|
+
elif args.cmd == "get":
|
|
218
|
+
note = Search(idx).get(args.id)
|
|
219
|
+
print(json.dumps(note, ensure_ascii=False, indent=2) if note else "not found")
|
|
220
|
+
elif args.cmd == "recall":
|
|
221
|
+
_cmd_recall(args, cfg, idx)
|
|
222
|
+
elif args.cmd == "write":
|
|
223
|
+
_cmd_write(args, cfg, idx)
|
|
224
|
+
elif args.cmd == "daily":
|
|
225
|
+
from .daily import append_daily
|
|
226
|
+
text = args.text
|
|
227
|
+
if text in (None, "-"):
|
|
228
|
+
text = "" if sys.stdin.isatty() else sys.stdin.read().strip()
|
|
229
|
+
print(json.dumps(append_daily(cfg, idx, text), ensure_ascii=False))
|
|
230
|
+
finally:
|
|
231
|
+
idx.close()
|
|
232
|
+
return 0
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
if __name__ == "__main__":
|
|
236
|
+
sys.exit(main())
|
mnemo/config.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
"""Configuration — vault location + derived index path."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
INDEX_REL = Path(".mnemo") / "index.sqlite"
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class Config:
|
|
12
|
+
"""Resolves where the vault lives and where the derived index goes.
|
|
13
|
+
|
|
14
|
+
Vault resolution order: explicit arg → $MNEMO_VAULT → current directory.
|
|
15
|
+
The index always lives at ``<vault>/.mnemo/index.sqlite`` (gitignored).
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
def __init__(self, vault: str | Path | None = None):
|
|
19
|
+
self.vault = self._resolve_vault(vault)
|
|
20
|
+
self.index_path = self.vault / INDEX_REL
|
|
21
|
+
|
|
22
|
+
@staticmethod
|
|
23
|
+
def _resolve_vault(vault: str | Path | None) -> Path:
|
|
24
|
+
if vault:
|
|
25
|
+
return Path(vault).expanduser().resolve()
|
|
26
|
+
env = os.environ.get("MNEMO_VAULT")
|
|
27
|
+
if env:
|
|
28
|
+
return Path(env).expanduser().resolve()
|
|
29
|
+
return Path.cwd().resolve()
|
|
30
|
+
|
|
31
|
+
def ensure_dirs(self) -> None:
|
|
32
|
+
self.index_path.parent.mkdir(parents=True, exist_ok=True)
|
mnemo/daily.py
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
"""Daily journal — append timestamped entries to today's daily note."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import datetime as _dt
|
|
6
|
+
|
|
7
|
+
import frontmatter
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def append_daily(cfg, index, text: str) -> dict:
|
|
11
|
+
today = _dt.date.today().isoformat()
|
|
12
|
+
path = cfg.vault / "daily" / f"{today}.md"
|
|
13
|
+
|
|
14
|
+
if path.exists():
|
|
15
|
+
post = frontmatter.load(str(path))
|
|
16
|
+
meta = post.metadata
|
|
17
|
+
body = post.content
|
|
18
|
+
else:
|
|
19
|
+
meta = {"id": today, "type": "daily", "title": today, "created": today}
|
|
20
|
+
body = ""
|
|
21
|
+
|
|
22
|
+
stamp = _dt.datetime.now().strftime("%H:%M")
|
|
23
|
+
body = f"{body.rstrip()}\n- {stamp} {text}".strip()
|
|
24
|
+
meta["updated"] = today
|
|
25
|
+
|
|
26
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
27
|
+
path.write_text(frontmatter.dumps(frontmatter.Post(body, **meta)), encoding="utf-8")
|
|
28
|
+
index.reindex(cfg.vault)
|
|
29
|
+
return {"path": str(path.relative_to(cfg.vault)), "entry": text}
|
mnemo/embed.py
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
"""Optional semantic embedding layer (pluggable, lazy).
|
|
2
|
+
|
|
3
|
+
Backend: fastembed (ONNX) — fast cold-start, light install, friendly to
|
|
4
|
+
`uv tool install`. Default model is multilingual (notes may be Turkish).
|
|
5
|
+
Everything degrades to FTS5 when the ``embed`` extra is absent.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from collections.abc import Sequence
|
|
11
|
+
|
|
12
|
+
# Multilingual MiniLM: fast + stable, handles Turkish. 384-dim.
|
|
13
|
+
DEFAULT_MODEL = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
|
|
14
|
+
DEFAULT_DIM = 384
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class Embedder:
|
|
18
|
+
def __init__(self, model_name: str = DEFAULT_MODEL, dim: int = DEFAULT_DIM):
|
|
19
|
+
self.model_name = model_name
|
|
20
|
+
self.dim = dim
|
|
21
|
+
self._model = None
|
|
22
|
+
|
|
23
|
+
@staticmethod
|
|
24
|
+
def is_available() -> bool:
|
|
25
|
+
try:
|
|
26
|
+
import fastembed # noqa: F401
|
|
27
|
+
import sqlite_vec # noqa: F401
|
|
28
|
+
except Exception:
|
|
29
|
+
return False
|
|
30
|
+
return True
|
|
31
|
+
|
|
32
|
+
def _ensure(self):
|
|
33
|
+
if self._model is None:
|
|
34
|
+
import warnings
|
|
35
|
+
|
|
36
|
+
with warnings.catch_warnings():
|
|
37
|
+
warnings.simplefilter("ignore")
|
|
38
|
+
from fastembed import TextEmbedding
|
|
39
|
+
|
|
40
|
+
self._model = TextEmbedding(self.model_name)
|
|
41
|
+
return self._model
|
|
42
|
+
|
|
43
|
+
def encode_one(self, text: str) -> list[float]:
|
|
44
|
+
model = self._ensure()
|
|
45
|
+
vec = next(iter(model.embed([text])))
|
|
46
|
+
return [float(x) for x in vec]
|
|
47
|
+
|
|
48
|
+
def encode(self, texts: Sequence[str]) -> list[list[float]]:
|
|
49
|
+
model = self._ensure()
|
|
50
|
+
return [[float(x) for x in v] for v in model.embed(list(texts))]
|
mnemo/index.py
ADDED
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
"""SQLite index: `notes` metadata + FTS5 full-text + optional vector store.
|
|
2
|
+
|
|
3
|
+
The index is derived from the vault and fully rebuildable. Reindexing is
|
|
4
|
+
incremental (a file is re-parsed only when its mtime changed). When an
|
|
5
|
+
``Embedder`` is supplied and sqlite-vec is available, a cosine vector table is
|
|
6
|
+
maintained alongside FTS for hybrid search; otherwise everything works on FTS5.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import hashlib
|
|
12
|
+
import json
|
|
13
|
+
import re
|
|
14
|
+
import sqlite3
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
|
|
17
|
+
from .note import Note
|
|
18
|
+
from .vault import iter_note_files
|
|
19
|
+
|
|
20
|
+
SCHEMA = """
|
|
21
|
+
CREATE TABLE IF NOT EXISTS notes (
|
|
22
|
+
id TEXT PRIMARY KEY,
|
|
23
|
+
path TEXT UNIQUE,
|
|
24
|
+
mtime REAL,
|
|
25
|
+
hash TEXT,
|
|
26
|
+
type TEXT,
|
|
27
|
+
project TEXT,
|
|
28
|
+
title TEXT,
|
|
29
|
+
summary TEXT,
|
|
30
|
+
tags TEXT,
|
|
31
|
+
created TEXT,
|
|
32
|
+
updated TEXT,
|
|
33
|
+
body TEXT
|
|
34
|
+
);
|
|
35
|
+
|
|
36
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS notes_fts USING fts5(
|
|
37
|
+
id UNINDEXED,
|
|
38
|
+
title,
|
|
39
|
+
summary,
|
|
40
|
+
body,
|
|
41
|
+
tags,
|
|
42
|
+
tokenize = 'unicode61 remove_diacritics 2'
|
|
43
|
+
);
|
|
44
|
+
|
|
45
|
+
CREATE INDEX IF NOT EXISTS idx_notes_type ON notes(type);
|
|
46
|
+
CREATE INDEX IF NOT EXISTS idx_notes_project ON notes(project);
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
_WORD_RE = re.compile(r"\w+", re.UNICODE)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _hash(text: str) -> str:
|
|
53
|
+
return hashlib.sha256(text.encode("utf-8")).hexdigest()
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def fts_query(text: str) -> str:
|
|
57
|
+
"""Build a safe FTS5 query: prefix-matched terms joined by OR."""
|
|
58
|
+
terms = _WORD_RE.findall(text)
|
|
59
|
+
if not terms:
|
|
60
|
+
return '""'
|
|
61
|
+
return " OR ".join(f'"{t}"*' for t in terms)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class Index:
|
|
65
|
+
def __init__(self, db_path: str | Path, embedder=None):
|
|
66
|
+
self.db_path = Path(db_path)
|
|
67
|
+
self.db_path.parent.mkdir(parents=True, exist_ok=True)
|
|
68
|
+
self.con = sqlite3.connect(str(self.db_path))
|
|
69
|
+
self.con.row_factory = sqlite3.Row
|
|
70
|
+
self.embedder = embedder
|
|
71
|
+
self.vectors = False
|
|
72
|
+
self._vec = None
|
|
73
|
+
if embedder is not None:
|
|
74
|
+
self._enable_vectors()
|
|
75
|
+
self.con.executescript(SCHEMA)
|
|
76
|
+
if self.vectors:
|
|
77
|
+
self.con.execute(
|
|
78
|
+
"CREATE VIRTUAL TABLE IF NOT EXISTS vec_notes USING vec0("
|
|
79
|
+
f"note_id TEXT, embedding float[{embedder.dim}] distance_metric=cosine)"
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
def _enable_vectors(self) -> None:
|
|
83
|
+
try:
|
|
84
|
+
import sqlite_vec
|
|
85
|
+
|
|
86
|
+
self.con.enable_load_extension(True)
|
|
87
|
+
sqlite_vec.load(self.con)
|
|
88
|
+
self.con.enable_load_extension(False)
|
|
89
|
+
self._vec = sqlite_vec
|
|
90
|
+
self.vectors = True
|
|
91
|
+
except Exception:
|
|
92
|
+
self.vectors = False
|
|
93
|
+
|
|
94
|
+
def close(self) -> None:
|
|
95
|
+
self.con.close()
|
|
96
|
+
|
|
97
|
+
def __enter__(self) -> "Index":
|
|
98
|
+
return self
|
|
99
|
+
|
|
100
|
+
def __exit__(self, *exc) -> None:
|
|
101
|
+
self.close()
|
|
102
|
+
|
|
103
|
+
# ------------------------------------------------------------------ write
|
|
104
|
+
def _vec_delete(self, note_id: str) -> None:
|
|
105
|
+
if self.vectors:
|
|
106
|
+
self.con.execute("DELETE FROM vec_notes WHERE note_id = ?", (note_id,))
|
|
107
|
+
|
|
108
|
+
def _vec_insert(self, note: Note) -> None:
|
|
109
|
+
if not self.vectors:
|
|
110
|
+
return
|
|
111
|
+
vec = self.embedder.encode_one(note.search_text())
|
|
112
|
+
self.con.execute(
|
|
113
|
+
"INSERT INTO vec_notes(note_id, embedding) VALUES (?, ?)",
|
|
114
|
+
(note.id, self._vec.serialize_float32(vec)),
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
def _upsert(self, note: Note, mtime: float, h: str) -> None:
|
|
118
|
+
rel = str(note.path)
|
|
119
|
+
old = self.con.execute("SELECT id FROM notes WHERE path = ?", (rel,)).fetchone()
|
|
120
|
+
if old:
|
|
121
|
+
self.con.execute("DELETE FROM notes_fts WHERE id = ?", (old["id"],))
|
|
122
|
+
self._vec_delete(old["id"])
|
|
123
|
+
self.con.execute("DELETE FROM notes_fts WHERE id = ?", (note.id,))
|
|
124
|
+
self._vec_delete(note.id)
|
|
125
|
+
self.con.execute("DELETE FROM notes WHERE id = ? OR path = ?", (note.id, rel))
|
|
126
|
+
self.con.execute(
|
|
127
|
+
"""INSERT INTO notes
|
|
128
|
+
(id, path, mtime, hash, type, project, title, summary, tags,
|
|
129
|
+
created, updated, body)
|
|
130
|
+
VALUES (?,?,?,?,?,?,?,?,?,?,?,?)""",
|
|
131
|
+
(
|
|
132
|
+
note.id, rel, mtime, h, note.type, note.project, note.title,
|
|
133
|
+
note.summary, json.dumps(note.tags, ensure_ascii=False),
|
|
134
|
+
note.created, note.updated, note.body,
|
|
135
|
+
),
|
|
136
|
+
)
|
|
137
|
+
self.con.execute(
|
|
138
|
+
"INSERT INTO notes_fts (id, title, summary, body, tags) VALUES (?,?,?,?,?)",
|
|
139
|
+
(note.id, note.title, note.summary, note.body, " ".join(note.tags)),
|
|
140
|
+
)
|
|
141
|
+
self._vec_insert(note)
|
|
142
|
+
|
|
143
|
+
def reindex(self, vault: str | Path, full: bool = False) -> dict[str, int]:
|
|
144
|
+
"""Incrementally sync the index with the vault. Returns stats."""
|
|
145
|
+
vault = Path(vault)
|
|
146
|
+
existing = {
|
|
147
|
+
row["path"]: (row["mtime"], row["id"])
|
|
148
|
+
for row in self.con.execute("SELECT path, mtime, id FROM notes")
|
|
149
|
+
}
|
|
150
|
+
seen: set[str] = set()
|
|
151
|
+
added = updated = skipped = 0
|
|
152
|
+
|
|
153
|
+
for f in iter_note_files(vault):
|
|
154
|
+
rel = str(f.relative_to(vault))
|
|
155
|
+
seen.add(rel)
|
|
156
|
+
mtime = f.stat().st_mtime
|
|
157
|
+
if not full and rel in existing and abs(existing[rel][0] - mtime) < 1e-6:
|
|
158
|
+
skipped += 1
|
|
159
|
+
continue
|
|
160
|
+
note = Note.from_file(f)
|
|
161
|
+
note.path = Path(rel)
|
|
162
|
+
self._upsert(note, mtime, _hash(note.search_text()))
|
|
163
|
+
updated += 1 if rel in existing else 0
|
|
164
|
+
added += 0 if rel in existing else 1
|
|
165
|
+
|
|
166
|
+
removed = 0
|
|
167
|
+
for path, (_, nid) in existing.items():
|
|
168
|
+
if path not in seen:
|
|
169
|
+
self.con.execute("DELETE FROM notes WHERE path = ?", (path,))
|
|
170
|
+
self.con.execute("DELETE FROM notes_fts WHERE id = ?", (nid,))
|
|
171
|
+
self._vec_delete(nid)
|
|
172
|
+
removed += 1
|
|
173
|
+
|
|
174
|
+
self.con.commit()
|
|
175
|
+
return {"added": added, "updated": updated, "skipped": skipped, "removed": removed}
|
|
176
|
+
|
|
177
|
+
# ------------------------------------------------------------------- read
|
|
178
|
+
def count(self) -> int:
|
|
179
|
+
return self.con.execute("SELECT COUNT(*) FROM notes").fetchone()[0]
|
|
180
|
+
|
|
181
|
+
def fts_ids(self, query: str, limit: int) -> list[str]:
|
|
182
|
+
rows = self.con.execute(
|
|
183
|
+
"""SELECT id, bm25(notes_fts) AS rank
|
|
184
|
+
FROM notes_fts WHERE notes_fts MATCH ?
|
|
185
|
+
ORDER BY rank LIMIT ?""",
|
|
186
|
+
(fts_query(query), limit),
|
|
187
|
+
).fetchall()
|
|
188
|
+
return [r["id"] for r in rows]
|
|
189
|
+
|
|
190
|
+
def vec_search(self, query: str, limit: int) -> list[tuple[str, float]] | None:
|
|
191
|
+
"""KNN over the vector store. Returns (id, cosine_distance) or None
|
|
192
|
+
when semantic search is unavailable."""
|
|
193
|
+
if not self.vectors:
|
|
194
|
+
return None
|
|
195
|
+
q = self.embedder.encode_one(query)
|
|
196
|
+
rows = self.con.execute(
|
|
197
|
+
"""SELECT note_id, distance FROM vec_notes
|
|
198
|
+
WHERE embedding MATCH ? AND k = ? ORDER BY distance""",
|
|
199
|
+
(self._vec.serialize_float32(q), limit),
|
|
200
|
+
).fetchall()
|
|
201
|
+
return [(r["note_id"], r["distance"]) for r in rows]
|
|
202
|
+
|
|
203
|
+
def vec_ids(self, query: str, limit: int) -> list[str] | None:
|
|
204
|
+
hits = self.vec_search(query, limit)
|
|
205
|
+
return None if hits is None else [h[0] for h in hits]
|
mnemo/note.py
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
"""Note model — markdown + YAML frontmatter parsing/serialization."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
import frontmatter
|
|
10
|
+
|
|
11
|
+
NOTE_TYPES = {"decision", "lesson", "daily", "project", "reference", "note"}
|
|
12
|
+
|
|
13
|
+
_SLUG_RE = re.compile(r"[^a-z0-9]+")
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def slugify(text: str) -> str:
|
|
17
|
+
s = _SLUG_RE.sub("-", text.lower()).strip("-")
|
|
18
|
+
return s or "note"
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _as_list(value) -> list[str]:
|
|
22
|
+
if value is None:
|
|
23
|
+
return []
|
|
24
|
+
if isinstance(value, str):
|
|
25
|
+
return [value.strip()] if value.strip() else []
|
|
26
|
+
return [str(v).strip() for v in value if str(v).strip()]
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass
|
|
30
|
+
class Note:
|
|
31
|
+
"""One atomic memory: a decision, lesson, daily entry, reference, etc."""
|
|
32
|
+
|
|
33
|
+
id: str
|
|
34
|
+
type: str
|
|
35
|
+
title: str
|
|
36
|
+
body: str = ""
|
|
37
|
+
summary: str = ""
|
|
38
|
+
project: str | None = None
|
|
39
|
+
tags: list[str] = field(default_factory=list)
|
|
40
|
+
created: str | None = None
|
|
41
|
+
updated: str | None = None
|
|
42
|
+
links: list[str] = field(default_factory=list)
|
|
43
|
+
path: Path | None = None
|
|
44
|
+
|
|
45
|
+
@classmethod
|
|
46
|
+
def from_post(cls, post: "frontmatter.Post", path: Path | None = None) -> "Note":
|
|
47
|
+
meta = post.metadata or {}
|
|
48
|
+
stem = path.stem if path else ""
|
|
49
|
+
title = str(meta.get("title") or stem).strip()
|
|
50
|
+
ntype = str(meta.get("type") or "note").strip()
|
|
51
|
+
nid = str(meta.get("id") or stem or slugify(title))
|
|
52
|
+
project = meta.get("project")
|
|
53
|
+
return cls(
|
|
54
|
+
id=nid,
|
|
55
|
+
type=ntype,
|
|
56
|
+
title=title,
|
|
57
|
+
body=(post.content or "").strip(),
|
|
58
|
+
summary=str(meta.get("summary") or "").strip(),
|
|
59
|
+
project=str(project).strip() if project else None,
|
|
60
|
+
tags=_as_list(meta.get("tags")),
|
|
61
|
+
created=str(meta["created"]) if meta.get("created") else None,
|
|
62
|
+
updated=str(meta["updated"]) if meta.get("updated") else None,
|
|
63
|
+
links=_as_list(meta.get("links")),
|
|
64
|
+
path=Path(path) if path else None,
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
@classmethod
|
|
68
|
+
def from_file(cls, path: str | Path) -> "Note":
|
|
69
|
+
path = Path(path)
|
|
70
|
+
post = frontmatter.load(str(path))
|
|
71
|
+
return cls.from_post(post, path)
|
|
72
|
+
|
|
73
|
+
def search_text(self) -> str:
|
|
74
|
+
"""Concatenated text used for indexing / hashing."""
|
|
75
|
+
return "\n".join(p for p in (self.title, self.summary, self.body) if p)
|