@dinasor/mnemo-cli 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +46 -0
- package/LICENSE +21 -0
- package/README.md +263 -0
- package/VERSION +1 -0
- package/bin/mnemo.js +139 -0
- package/memory.ps1 +178 -0
- package/memory_mac.sh +2447 -0
- package/package.json +36 -0
- package/scripts/memory/installer/bootstrap.ps1 +21 -0
- package/scripts/memory/installer/core/bridge.ps1 +285 -0
- package/scripts/memory/installer/core/io.ps1 +110 -0
- package/scripts/memory/installer/core/paths.ps1 +83 -0
- package/scripts/memory/installer/features/gitignore_setup.ps1 +80 -0
- package/scripts/memory/installer/features/hooks_setup.ps1 +157 -0
- package/scripts/memory/installer/features/mcp_setup.ps1 +87 -0
- package/scripts/memory/installer/features/memory_scaffold.ps1 +541 -0
- package/scripts/memory/installer/features/vector_setup.ps1 +103 -0
- package/scripts/memory/installer/templates/add-journal-entry.ps1 +122 -0
- package/scripts/memory/installer/templates/add-lesson.ps1 +151 -0
- package/scripts/memory/installer/templates/autonomy/__init__.py +6 -0
- package/scripts/memory/installer/templates/autonomy/context_safety.py +181 -0
- package/scripts/memory/installer/templates/autonomy/entity_resolver.py +215 -0
- package/scripts/memory/installer/templates/autonomy/ingest_pipeline.py +252 -0
- package/scripts/memory/installer/templates/autonomy/lifecycle_engine.py +254 -0
- package/scripts/memory/installer/templates/autonomy/policies.yaml +59 -0
- package/scripts/memory/installer/templates/autonomy/reranker.py +220 -0
- package/scripts/memory/installer/templates/autonomy/retrieval_router.py +148 -0
- package/scripts/memory/installer/templates/autonomy/runner.py +272 -0
- package/scripts/memory/installer/templates/autonomy/schema.py +150 -0
- package/scripts/memory/installer/templates/autonomy/vault_policy.py +205 -0
- package/scripts/memory/installer/templates/build-memory-sqlite.py +111 -0
- package/scripts/memory/installer/templates/clear-active.ps1 +55 -0
- package/scripts/memory/installer/templates/customization.md +84 -0
- package/scripts/memory/installer/templates/lint-memory.ps1 +217 -0
- package/scripts/memory/installer/templates/mnemo_vector.py +556 -0
- package/scripts/memory/installer/templates/query-memory-sqlite.py +95 -0
- package/scripts/memory/installer/templates/query-memory.ps1 +122 -0
- package/scripts/memory/installer/templates/rebuild-memory-index.ps1 +293 -0
|
@@ -0,0 +1,556 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Mnemo vector memory engine (v2).
|
|
4
|
+
Optional semantic layer for Mnemo memory with MCP tools.
|
|
5
|
+
Schema v2 adds typed memory units, fact lifecycle tables, and entity tags.
|
|
6
|
+
"""
|
|
7
|
+
import os
|
|
8
|
+
import re
|
|
9
|
+
import json
|
|
10
|
+
import sqlite3
|
|
11
|
+
import hashlib
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
|
|
14
|
+
import sqlite_vec
|
|
15
|
+
try:
|
|
16
|
+
from sqlite_vec import serialize_float32 as serialize_f32
|
|
17
|
+
except ImportError:
|
|
18
|
+
from sqlite_vec import serialize_f32 # backwards compatibility
|
|
19
|
+
from mcp.server.fastmcp import FastMCP
|
|
20
|
+
|
|
21
|
+
SCHEMA_VERSION = 2
|
|
22
|
+
EMBED_DIM = 1536
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _resolve_memory_root() -> Path:
|
|
26
|
+
override = os.getenv("MNEMO_MEMORY_ROOT", "").strip()
|
|
27
|
+
if override:
|
|
28
|
+
return Path(override).expanduser().resolve()
|
|
29
|
+
|
|
30
|
+
cwd = Path.cwd().resolve()
|
|
31
|
+
for root in (cwd, *cwd.parents):
|
|
32
|
+
for rel in ((".mnemo", "memory"), (".cursor", "memory")):
|
|
33
|
+
candidate = root.joinpath(*rel)
|
|
34
|
+
if candidate.exists():
|
|
35
|
+
return candidate
|
|
36
|
+
return cwd / ".mnemo" / "memory"
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
MEM_ROOT = _resolve_memory_root()
|
|
40
|
+
_DB_OVERRIDE = os.getenv("MNEMO_DB_PATH", "").strip()
|
|
41
|
+
DB_PATH = Path(_DB_OVERRIDE).expanduser().resolve() if _DB_OVERRIDE else (MEM_ROOT / "mnemo_vector.sqlite")
|
|
42
|
+
PROVIDER = os.getenv("MNEMO_PROVIDER", "openai").lower()
|
|
43
|
+
|
|
44
|
+
SKIP_NAMES = {
|
|
45
|
+
"README.md", "index.md", "lessons-index.json",
|
|
46
|
+
"journal-index.json", "journal-index.md",
|
|
47
|
+
}
|
|
48
|
+
SKIP_DIRS = {"legacy", "templates"}
|
|
49
|
+
MAX_EMBED_CHARS = 12000
|
|
50
|
+
BATCH_SIZE = 16 if PROVIDER == "gemini" else 64
|
|
51
|
+
_EMBED_CLIENT = None
|
|
52
|
+
|
|
53
|
+
# Memory type authority weights for reranking
|
|
54
|
+
AUTHORITY_WEIGHTS = {
|
|
55
|
+
"core": 1.0, # hot-rules.md
|
|
56
|
+
"procedural": 0.9, # lessons
|
|
57
|
+
"episodic": 0.7, # journal/active-context
|
|
58
|
+
"semantic": 0.8, # digests/memo
|
|
59
|
+
"resource": 0.5, # general docs
|
|
60
|
+
"vault": 0.0, # redacted unless authorized
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
# File → memory_type mapping
|
|
64
|
+
def _infer_memory_type(path_str: str) -> str:
|
|
65
|
+
p = path_str.lower().replace("\\", "/")
|
|
66
|
+
if "hot-rules" in p or "memo.md" in p:
|
|
67
|
+
return "core"
|
|
68
|
+
if "/lessons/" in p and "/l-" in p:
|
|
69
|
+
return "procedural"
|
|
70
|
+
if "/journal/" in p or "active-context" in p:
|
|
71
|
+
return "episodic"
|
|
72
|
+
if "/digests/" in p:
|
|
73
|
+
return "semantic"
|
|
74
|
+
if "/vault/" in p:
|
|
75
|
+
return "vault"
|
|
76
|
+
return "semantic"
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def _infer_time_scope(memory_type: str) -> str:
|
|
80
|
+
if memory_type in ("episodic",):
|
|
81
|
+
return "recency-sensitive"
|
|
82
|
+
if memory_type in ("core", "procedural"):
|
|
83
|
+
return "atemporal"
|
|
84
|
+
return "time-bound"
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
mcp = FastMCP("MnemoVector")
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def _trim_for_embedding(text: str) -> str:
|
|
91
|
+
return text[:MAX_EMBED_CHARS] if len(text) > MAX_EMBED_CHARS else text
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def _get_embed_client():
|
|
95
|
+
global _EMBED_CLIENT
|
|
96
|
+
if _EMBED_CLIENT is not None:
|
|
97
|
+
return _EMBED_CLIENT
|
|
98
|
+
|
|
99
|
+
if PROVIDER == "gemini":
|
|
100
|
+
key = os.getenv("GEMINI_API_KEY")
|
|
101
|
+
if not key:
|
|
102
|
+
raise RuntimeError("GEMINI_API_KEY is not set")
|
|
103
|
+
from google import genai
|
|
104
|
+
_EMBED_CLIENT = genai.Client(api_key=key)
|
|
105
|
+
return _EMBED_CLIENT
|
|
106
|
+
|
|
107
|
+
key = os.getenv("OPENAI_API_KEY")
|
|
108
|
+
if not key:
|
|
109
|
+
raise RuntimeError("OPENAI_API_KEY is not set")
|
|
110
|
+
from openai import OpenAI
|
|
111
|
+
_EMBED_CLIENT = OpenAI(api_key=key)
|
|
112
|
+
return _EMBED_CLIENT
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def get_embeddings(texts: list[str]) -> list[list[float]]:
|
|
116
|
+
if not texts:
|
|
117
|
+
return []
|
|
118
|
+
trimmed = [_trim_for_embedding(t) for t in texts]
|
|
119
|
+
client = _get_embed_client()
|
|
120
|
+
|
|
121
|
+
if PROVIDER == "gemini":
|
|
122
|
+
from google.genai import types
|
|
123
|
+
result = client.models.embed_content(
|
|
124
|
+
model="gemini-embedding-001",
|
|
125
|
+
contents=trimmed,
|
|
126
|
+
config=types.EmbedContentConfig(output_dimensionality=EMBED_DIM),
|
|
127
|
+
)
|
|
128
|
+
vectors = [emb.values for emb in result.embeddings]
|
|
129
|
+
else:
|
|
130
|
+
resp = client.embeddings.create(input=trimmed, model="text-embedding-3-small")
|
|
131
|
+
vectors = [item.embedding for item in resp.data]
|
|
132
|
+
|
|
133
|
+
if len(vectors) != len(trimmed):
|
|
134
|
+
raise RuntimeError(f"Embedding provider returned {len(vectors)} vectors for {len(trimmed)} inputs")
|
|
135
|
+
return vectors
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def get_embedding(text: str) -> list[float]:
|
|
139
|
+
return get_embeddings([text])[0]
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def get_db() -> sqlite3.Connection:
|
|
143
|
+
DB_PATH.parent.mkdir(parents=True, exist_ok=True)
|
|
144
|
+
db = sqlite3.connect(str(DB_PATH), timeout=30)
|
|
145
|
+
db.execute("PRAGMA journal_mode=WAL")
|
|
146
|
+
db.execute("PRAGMA busy_timeout=10000")
|
|
147
|
+
db.enable_load_extension(True)
|
|
148
|
+
sqlite_vec.load(db)
|
|
149
|
+
return db
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def init_db() -> sqlite3.Connection:
|
|
153
|
+
db = get_db()
|
|
154
|
+
db.execute("CREATE TABLE IF NOT EXISTS schema_info (key TEXT PRIMARY KEY, value TEXT)")
|
|
155
|
+
row = db.execute("SELECT value FROM schema_info WHERE key='version'").fetchone()
|
|
156
|
+
ver = int(row[0]) if row else 0
|
|
157
|
+
|
|
158
|
+
if ver < 1:
|
|
159
|
+
db.execute("DROP TABLE IF EXISTS file_meta")
|
|
160
|
+
db.execute("DROP TABLE IF EXISTS vec_memory")
|
|
161
|
+
db.execute(
|
|
162
|
+
"""
|
|
163
|
+
CREATE TABLE file_meta (
|
|
164
|
+
path TEXT PRIMARY KEY,
|
|
165
|
+
hash TEXT NOT NULL,
|
|
166
|
+
chunk_count INTEGER DEFAULT 0,
|
|
167
|
+
updated_at REAL DEFAULT (unixepoch('now'))
|
|
168
|
+
)
|
|
169
|
+
"""
|
|
170
|
+
)
|
|
171
|
+
db.execute(
|
|
172
|
+
f"""
|
|
173
|
+
CREATE VIRTUAL TABLE vec_memory USING vec0(
|
|
174
|
+
embedding float[{EMBED_DIM}] distance_metric=cosine,
|
|
175
|
+
+ref_path TEXT,
|
|
176
|
+
+content TEXT,
|
|
177
|
+
+source_file TEXT
|
|
178
|
+
)
|
|
179
|
+
"""
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
if ver < SCHEMA_VERSION:
|
|
183
|
+
# v2: typed memory units, fact lifecycle, entity tables
|
|
184
|
+
db.execute(
|
|
185
|
+
"""
|
|
186
|
+
CREATE TABLE IF NOT EXISTS memory_units (
|
|
187
|
+
unit_id TEXT PRIMARY KEY,
|
|
188
|
+
source_ref TEXT NOT NULL,
|
|
189
|
+
memory_type TEXT NOT NULL DEFAULT 'semantic',
|
|
190
|
+
authority REAL NOT NULL DEFAULT 0.5,
|
|
191
|
+
time_scope TEXT NOT NULL DEFAULT 'time-bound',
|
|
192
|
+
sensitivity TEXT NOT NULL DEFAULT 'public',
|
|
193
|
+
entity_tags TEXT NOT NULL DEFAULT '[]',
|
|
194
|
+
content_hash TEXT NOT NULL,
|
|
195
|
+
created_at REAL DEFAULT (unixepoch('now')),
|
|
196
|
+
updated_at REAL DEFAULT (unixepoch('now'))
|
|
197
|
+
)
|
|
198
|
+
"""
|
|
199
|
+
)
|
|
200
|
+
db.execute(
|
|
201
|
+
"""
|
|
202
|
+
CREATE TABLE IF NOT EXISTS facts (
|
|
203
|
+
fact_id TEXT PRIMARY KEY,
|
|
204
|
+
canonical_fact TEXT NOT NULL,
|
|
205
|
+
status TEXT NOT NULL DEFAULT 'active',
|
|
206
|
+
confidence REAL NOT NULL DEFAULT 1.0,
|
|
207
|
+
source_ref TEXT NOT NULL,
|
|
208
|
+
created_at REAL DEFAULT (unixepoch('now')),
|
|
209
|
+
updated_at REAL DEFAULT (unixepoch('now'))
|
|
210
|
+
)
|
|
211
|
+
"""
|
|
212
|
+
)
|
|
213
|
+
db.execute(
|
|
214
|
+
"""
|
|
215
|
+
CREATE TABLE IF NOT EXISTS lifecycle_events (
|
|
216
|
+
event_id TEXT PRIMARY KEY,
|
|
217
|
+
unit_id TEXT NOT NULL,
|
|
218
|
+
operation TEXT NOT NULL,
|
|
219
|
+
old_status TEXT,
|
|
220
|
+
new_status TEXT,
|
|
221
|
+
reason TEXT,
|
|
222
|
+
ts REAL DEFAULT (unixepoch('now'))
|
|
223
|
+
)
|
|
224
|
+
"""
|
|
225
|
+
)
|
|
226
|
+
db.execute(
|
|
227
|
+
"""
|
|
228
|
+
CREATE TABLE IF NOT EXISTS entities (
|
|
229
|
+
entity_id TEXT PRIMARY KEY,
|
|
230
|
+
entity_name TEXT NOT NULL,
|
|
231
|
+
entity_type TEXT NOT NULL DEFAULT 'general',
|
|
232
|
+
confidence REAL NOT NULL DEFAULT 1.0,
|
|
233
|
+
created_at REAL DEFAULT (unixepoch('now'))
|
|
234
|
+
)
|
|
235
|
+
"""
|
|
236
|
+
)
|
|
237
|
+
db.execute(
|
|
238
|
+
"""
|
|
239
|
+
CREATE TABLE IF NOT EXISTS entity_aliases (
|
|
240
|
+
alias_id TEXT PRIMARY KEY,
|
|
241
|
+
entity_id TEXT NOT NULL,
|
|
242
|
+
alias_text TEXT NOT NULL,
|
|
243
|
+
confidence REAL NOT NULL DEFAULT 1.0
|
|
244
|
+
)
|
|
245
|
+
"""
|
|
246
|
+
)
|
|
247
|
+
db.execute(
|
|
248
|
+
"INSERT OR REPLACE INTO schema_info(key, value) VALUES ('version', ?)",
|
|
249
|
+
(str(SCHEMA_VERSION),),
|
|
250
|
+
)
|
|
251
|
+
db.commit()
|
|
252
|
+
return db
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
def _upsert_memory_unit(db: sqlite3.Connection, ref_path: str, content_hash: str) -> str:
|
|
256
|
+
import uuid
|
|
257
|
+
mem_type = _infer_memory_type(ref_path)
|
|
258
|
+
auth = AUTHORITY_WEIGHTS.get(mem_type, 0.5)
|
|
259
|
+
time_scope = _infer_time_scope(mem_type)
|
|
260
|
+
|
|
261
|
+
existing = db.execute(
|
|
262
|
+
"SELECT unit_id FROM memory_units WHERE source_ref = ?", (ref_path,)
|
|
263
|
+
).fetchone()
|
|
264
|
+
|
|
265
|
+
if existing:
|
|
266
|
+
unit_id = existing[0]
|
|
267
|
+
db.execute(
|
|
268
|
+
"UPDATE memory_units SET content_hash=?, authority=?, updated_at=unixepoch('now') WHERE unit_id=?",
|
|
269
|
+
(content_hash, auth, unit_id),
|
|
270
|
+
)
|
|
271
|
+
else:
|
|
272
|
+
unit_id = str(uuid.uuid4())
|
|
273
|
+
db.execute(
|
|
274
|
+
"""
|
|
275
|
+
INSERT INTO memory_units(unit_id, source_ref, memory_type, authority, time_scope, sensitivity, entity_tags, content_hash)
|
|
276
|
+
VALUES (?, ?, ?, ?, ?, 'public', '[]', ?)
|
|
277
|
+
""",
|
|
278
|
+
(unit_id, ref_path, mem_type, auth, time_scope, content_hash),
|
|
279
|
+
)
|
|
280
|
+
# Log ADD lifecycle event
|
|
281
|
+
db.execute(
|
|
282
|
+
"INSERT INTO lifecycle_events(event_id, unit_id, operation, new_status, reason) VALUES (?,?,'ADD',NULL,'initial_index')",
|
|
283
|
+
(str(uuid.uuid4()), unit_id),
|
|
284
|
+
)
|
|
285
|
+
return unit_id
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
def chunk_markdown(content: str, file_path: Path) -> list[tuple[str, str]]:
|
|
289
|
+
chunks: list[tuple[str, str]] = []
|
|
290
|
+
path_str = str(file_path).replace("\\", "/")
|
|
291
|
+
|
|
292
|
+
if "journal/" in path_str.lower():
|
|
293
|
+
parts = re.split(r"^(##\s+\d{4}-\d{2}-\d{2})", content, flags=re.MULTILINE)
|
|
294
|
+
preamble = parts[0].strip()
|
|
295
|
+
if preamble:
|
|
296
|
+
chunks.append((preamble, f"@{path_str}"))
|
|
297
|
+
i = 1
|
|
298
|
+
while i < len(parts) - 1:
|
|
299
|
+
heading = parts[i].strip()
|
|
300
|
+
body = parts[i + 1].strip()
|
|
301
|
+
date = heading.replace("##", "").strip()
|
|
302
|
+
chunks.append((f"{heading}\n{body}".strip(), f"@{path_str}# {date}"))
|
|
303
|
+
i += 2
|
|
304
|
+
if chunks:
|
|
305
|
+
return chunks
|
|
306
|
+
|
|
307
|
+
if file_path.parent.name == "lessons" and file_path.name.startswith("L-"):
|
|
308
|
+
text = content.strip()
|
|
309
|
+
if text:
|
|
310
|
+
m = re.match(r"(L-\d{3})", file_path.name)
|
|
311
|
+
ref = f"@{path_str}# {m.group(1)}" if m else f"@{path_str}"
|
|
312
|
+
chunks.append((text, ref))
|
|
313
|
+
return chunks
|
|
314
|
+
|
|
315
|
+
parts = re.split(r"^(#{1,4}\s+.+)$", content, flags=re.MULTILINE)
|
|
316
|
+
preamble = parts[0].strip()
|
|
317
|
+
if preamble:
|
|
318
|
+
chunks.append((preamble, f"@{path_str}"))
|
|
319
|
+
|
|
320
|
+
i = 1
|
|
321
|
+
while i < len(parts) - 1:
|
|
322
|
+
heading_line = parts[i].strip()
|
|
323
|
+
body = parts[i + 1].strip()
|
|
324
|
+
heading_text = re.sub(r"^#{1,4}\s+", "", heading_line)
|
|
325
|
+
full = f"{heading_line}\n{body}".strip() if body else heading_line
|
|
326
|
+
if full.strip():
|
|
327
|
+
chunks.append((full, f"@{path_str}# {heading_text}"))
|
|
328
|
+
i += 2
|
|
329
|
+
|
|
330
|
+
if not chunks and content.strip():
|
|
331
|
+
chunks.append((content.strip(), f"@{path_str}"))
|
|
332
|
+
return chunks
|
|
333
|
+
|
|
334
|
+
|
|
335
|
+
@mcp.tool()
|
|
336
|
+
def vector_sync() -> str:
|
|
337
|
+
try:
|
|
338
|
+
db = init_db()
|
|
339
|
+
except Exception as e:
|
|
340
|
+
return f"DB init failed: {e}"
|
|
341
|
+
|
|
342
|
+
files: dict[str, Path] = {}
|
|
343
|
+
for p in MEM_ROOT.glob("**/*.md"):
|
|
344
|
+
if p.name in SKIP_NAMES:
|
|
345
|
+
continue
|
|
346
|
+
if any(skip in p.parts for skip in SKIP_DIRS):
|
|
347
|
+
continue
|
|
348
|
+
files[str(p)] = p
|
|
349
|
+
|
|
350
|
+
updated = 0
|
|
351
|
+
skipped = 0
|
|
352
|
+
errors = 0
|
|
353
|
+
|
|
354
|
+
known = db.execute("SELECT path FROM file_meta").fetchall()
|
|
355
|
+
for (stored,) in known:
|
|
356
|
+
if stored not in files:
|
|
357
|
+
db.execute("DELETE FROM vec_memory WHERE source_file = ?", (stored,))
|
|
358
|
+
db.execute("DELETE FROM file_meta WHERE path = ?", (stored,))
|
|
359
|
+
updated += 1
|
|
360
|
+
|
|
361
|
+
for str_path, file_path in files.items():
|
|
362
|
+
try:
|
|
363
|
+
content = file_path.read_text(encoding="utf-8-sig")
|
|
364
|
+
except (UnicodeDecodeError, PermissionError, OSError):
|
|
365
|
+
errors += 1
|
|
366
|
+
continue
|
|
367
|
+
|
|
368
|
+
if not content.strip():
|
|
369
|
+
skipped += 1
|
|
370
|
+
continue
|
|
371
|
+
|
|
372
|
+
f_hash = hashlib.sha256(content.encode("utf-8")).hexdigest()
|
|
373
|
+
row = db.execute("SELECT hash FROM file_meta WHERE path = ?", (str_path,)).fetchone()
|
|
374
|
+
if row and row[0] == f_hash:
|
|
375
|
+
skipped += 1
|
|
376
|
+
continue
|
|
377
|
+
|
|
378
|
+
db.execute("DELETE FROM vec_memory WHERE source_file = ?", (str_path,))
|
|
379
|
+
_upsert_memory_unit(db, str_path, f_hash)
|
|
380
|
+
chunks = chunk_markdown(content, file_path)
|
|
381
|
+
embedded = 0
|
|
382
|
+
chunk_errors = 0
|
|
383
|
+
|
|
384
|
+
for i in range(0, len(chunks), BATCH_SIZE):
|
|
385
|
+
batch = chunks[i : i + BATCH_SIZE]
|
|
386
|
+
texts = [text for text, _ in batch]
|
|
387
|
+
try:
|
|
388
|
+
vectors = get_embeddings(texts)
|
|
389
|
+
for (text, ref), emb in zip(batch, vectors):
|
|
390
|
+
db.execute(
|
|
391
|
+
"INSERT INTO vec_memory(embedding, ref_path, content, source_file) VALUES (?, ?, ?, ?)",
|
|
392
|
+
(serialize_f32(emb), ref, text, str_path),
|
|
393
|
+
)
|
|
394
|
+
embedded += 1
|
|
395
|
+
except Exception:
|
|
396
|
+
for text, ref in batch:
|
|
397
|
+
try:
|
|
398
|
+
emb = get_embedding(text)
|
|
399
|
+
db.execute(
|
|
400
|
+
"INSERT INTO vec_memory(embedding, ref_path, content, source_file) VALUES (?, ?, ?, ?)",
|
|
401
|
+
(serialize_f32(emb), ref, text, str_path),
|
|
402
|
+
)
|
|
403
|
+
embedded += 1
|
|
404
|
+
except Exception:
|
|
405
|
+
chunk_errors += 1
|
|
406
|
+
|
|
407
|
+
if chunk_errors == 0:
|
|
408
|
+
db.execute(
|
|
409
|
+
"INSERT OR REPLACE INTO file_meta(path, hash, chunk_count, updated_at) VALUES (?, ?, ?, unixepoch('now'))",
|
|
410
|
+
(str_path, f_hash, embedded),
|
|
411
|
+
)
|
|
412
|
+
else:
|
|
413
|
+
db.execute(
|
|
414
|
+
"INSERT OR REPLACE INTO file_meta(path, hash, chunk_count, updated_at) VALUES (?, ?, ?, unixepoch('now'))",
|
|
415
|
+
(str_path, "DIRTY", embedded),
|
|
416
|
+
)
|
|
417
|
+
errors += chunk_errors
|
|
418
|
+
updated += 1
|
|
419
|
+
|
|
420
|
+
db.commit()
|
|
421
|
+
db.close()
|
|
422
|
+
msg = f"Synced: {updated} files processed, {skipped} unchanged"
|
|
423
|
+
if errors:
|
|
424
|
+
msg += f", {errors} chunk errors (will retry)"
|
|
425
|
+
return msg
|
|
426
|
+
|
|
427
|
+
|
|
428
|
+
@mcp.tool()
|
|
429
|
+
def vector_search(query: str, top_k: int = 5) -> str:
|
|
430
|
+
"""Semantic search with authority-aware reranking."""
|
|
431
|
+
try:
|
|
432
|
+
db = init_db()
|
|
433
|
+
emb = get_embedding(query)
|
|
434
|
+
rows = db.execute(
|
|
435
|
+
"SELECT ref_path, content, distance FROM vec_memory WHERE embedding MATCH ? AND k = ? ORDER BY distance",
|
|
436
|
+
(serialize_f32(emb), top_k * 3), # over-fetch for reranking
|
|
437
|
+
).fetchall()
|
|
438
|
+
db.close()
|
|
439
|
+
except Exception as e:
|
|
440
|
+
return f"Search failed: {e}"
|
|
441
|
+
|
|
442
|
+
if not rows:
|
|
443
|
+
return "No relevant memory found."
|
|
444
|
+
|
|
445
|
+
# Rerank: combine semantic score with authority weight
|
|
446
|
+
reranked = []
|
|
447
|
+
for ref, content, dist in rows:
|
|
448
|
+
sem_score = round(1.0 - dist, 4)
|
|
449
|
+
mem_type = _infer_memory_type(ref)
|
|
450
|
+
auth_weight = AUTHORITY_WEIGHTS.get(mem_type, 0.5)
|
|
451
|
+
# Skip vault entries entirely (sensitivity guard)
|
|
452
|
+
if mem_type == "vault":
|
|
453
|
+
continue
|
|
454
|
+
# Temporal boost for recency-sensitive types when query mentions time words
|
|
455
|
+
temporal_boost = 0.0
|
|
456
|
+
time_words = {"today", "yesterday", "last week", "last month", "recent", "latest"}
|
|
457
|
+
if mem_type == "episodic" and any(w in query.lower() for w in time_words):
|
|
458
|
+
temporal_boost = 0.1
|
|
459
|
+
final_score = (sem_score * 0.6) + (auth_weight * 0.3) + temporal_boost
|
|
460
|
+
reranked.append((ref, content, sem_score, auth_weight, final_score))
|
|
461
|
+
|
|
462
|
+
reranked.sort(key=lambda x: x[4], reverse=True)
|
|
463
|
+
top = reranked[:top_k]
|
|
464
|
+
|
|
465
|
+
out = []
|
|
466
|
+
for ref, content, sem, auth, final in top:
|
|
467
|
+
preview = " ".join(content[:400].split())
|
|
468
|
+
out.append(f"[score={final:.3f} sem={sem:.3f} auth={auth:.2f}] {ref}\n{preview}")
|
|
469
|
+
return "\n\n---\n\n".join(out)
|
|
470
|
+
|
|
471
|
+
|
|
472
|
+
@mcp.tool()
|
|
473
|
+
def vector_forget(path_pattern: str = "") -> str:
|
|
474
|
+
try:
|
|
475
|
+
db = init_db()
|
|
476
|
+
removed = 0
|
|
477
|
+
if path_pattern:
|
|
478
|
+
like = f"%{path_pattern}%"
|
|
479
|
+
r1 = db.execute("DELETE FROM vec_memory WHERE source_file LIKE ?", (like,)).rowcount
|
|
480
|
+
r2 = db.execute("DELETE FROM file_meta WHERE path LIKE ?", (like,)).rowcount
|
|
481
|
+
db.execute("DELETE FROM memory_units WHERE source_ref LIKE ?", (like,))
|
|
482
|
+
removed = max(r1, r2)
|
|
483
|
+
else:
|
|
484
|
+
known = db.execute("SELECT path FROM file_meta").fetchall()
|
|
485
|
+
for (p,) in known:
|
|
486
|
+
if not Path(p).exists():
|
|
487
|
+
db.execute("DELETE FROM vec_memory WHERE source_file = ?", (p,))
|
|
488
|
+
db.execute("DELETE FROM file_meta WHERE path = ?", (p,))
|
|
489
|
+
db.execute("DELETE FROM memory_units WHERE source_ref = ?", (p,))
|
|
490
|
+
removed += 1
|
|
491
|
+
db.commit()
|
|
492
|
+
db.close()
|
|
493
|
+
return f"Pruned {removed} entries."
|
|
494
|
+
except Exception as e:
|
|
495
|
+
return f"Forget failed: {e}"
|
|
496
|
+
|
|
497
|
+
|
|
498
|
+
@mcp.tool()
|
|
499
|
+
def vector_health() -> str:
|
|
500
|
+
lines = []
|
|
501
|
+
try:
|
|
502
|
+
db = init_db()
|
|
503
|
+
ver = db.execute("SELECT value FROM schema_info WHERE key='version'").fetchone()
|
|
504
|
+
lines.append(f"Schema: v{ver[0] if ver else '?'}")
|
|
505
|
+
files = db.execute("SELECT COUNT(*) FROM file_meta").fetchone()[0]
|
|
506
|
+
vecs = db.execute("SELECT COUNT(*) FROM vec_memory").fetchone()[0]
|
|
507
|
+
dirty = db.execute("SELECT COUNT(*) FROM file_meta WHERE hash = 'DIRTY'").fetchone()[0]
|
|
508
|
+
units = db.execute("SELECT COUNT(*) FROM memory_units").fetchone()[0]
|
|
509
|
+
facts = db.execute("SELECT COUNT(*) FROM facts WHERE status = 'active'").fetchone()[0]
|
|
510
|
+
lines.append(f"Files tracked: {files}")
|
|
511
|
+
lines.append(f"Vector chunks: {vecs}")
|
|
512
|
+
lines.append(f"Memory units: {units}")
|
|
513
|
+
lines.append(f"Active facts: {facts}")
|
|
514
|
+
if dirty:
|
|
515
|
+
lines.append(f"Dirty files: {dirty}")
|
|
516
|
+
lines.append(f"DB integrity: {db.execute('PRAGMA integrity_check').fetchone()[0]}")
|
|
517
|
+
db.close()
|
|
518
|
+
except Exception as e:
|
|
519
|
+
lines.append(f"DB error: {e}")
|
|
520
|
+
|
|
521
|
+
try:
|
|
522
|
+
_ = get_embedding("health check")
|
|
523
|
+
lines.append(f"Embedding API ({PROVIDER}): OK")
|
|
524
|
+
except Exception as e:
|
|
525
|
+
lines.append(f"Embedding API ({PROVIDER}): FAILED - {e}")
|
|
526
|
+
return "\n".join(lines)
|
|
527
|
+
|
|
528
|
+
|
|
529
|
+
@mcp.tool()
|
|
530
|
+
def memory_status() -> str:
|
|
531
|
+
"""Return a JSON summary of memory system status for autonomous monitoring."""
|
|
532
|
+
try:
|
|
533
|
+
db = init_db()
|
|
534
|
+
files = db.execute("SELECT COUNT(*) FROM file_meta").fetchone()[0]
|
|
535
|
+
vecs = db.execute("SELECT COUNT(*) FROM vec_memory").fetchone()[0]
|
|
536
|
+
dirty = db.execute("SELECT COUNT(*) FROM file_meta WHERE hash = 'DIRTY'").fetchone()[0]
|
|
537
|
+
units = db.execute("SELECT COUNT(*) FROM memory_units").fetchone()[0]
|
|
538
|
+
facts_active = db.execute("SELECT COUNT(*) FROM facts WHERE status = 'active'").fetchone()[0]
|
|
539
|
+
facts_deprecated = db.execute("SELECT COUNT(*) FROM facts WHERE status = 'deprecated'").fetchone()[0]
|
|
540
|
+
events = db.execute("SELECT COUNT(*) FROM lifecycle_events").fetchone()[0]
|
|
541
|
+
type_dist = db.execute(
|
|
542
|
+
"SELECT memory_type, COUNT(*) FROM memory_units GROUP BY memory_type"
|
|
543
|
+
).fetchall()
|
|
544
|
+
db.close()
|
|
545
|
+
return json.dumps({
|
|
546
|
+
"files": files, "vectors": vecs, "dirty": dirty,
|
|
547
|
+
"memory_units": units, "facts_active": facts_active,
|
|
548
|
+
"facts_deprecated": facts_deprecated, "lifecycle_events": events,
|
|
549
|
+
"type_distribution": dict(type_dist),
|
|
550
|
+
}, indent=2)
|
|
551
|
+
except Exception as e:
|
|
552
|
+
return json.dumps({"error": str(e)})
|
|
553
|
+
|
|
554
|
+
|
|
555
|
+
if __name__ == "__main__":
|
|
556
|
+
mcp.run()
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Query memory SQLite FTS index."""
|
|
3
|
+
import argparse
|
|
4
|
+
import sqlite3
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def resolve_memory_dir(repo: Path) -> Path:
|
|
9
|
+
candidates = [
|
|
10
|
+
repo / ".mnemo" / "memory",
|
|
11
|
+
repo / ".cursor" / "memory",
|
|
12
|
+
]
|
|
13
|
+
for candidate in candidates:
|
|
14
|
+
if candidate.exists():
|
|
15
|
+
return candidate
|
|
16
|
+
return candidates[0]
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def main():
|
|
20
|
+
ap = argparse.ArgumentParser()
|
|
21
|
+
ap.add_argument("--repo", required=True)
|
|
22
|
+
ap.add_argument("--q", required=True)
|
|
23
|
+
ap.add_argument("--area", default="All")
|
|
24
|
+
ap.add_argument("--format", default="Human")
|
|
25
|
+
args = ap.parse_args()
|
|
26
|
+
|
|
27
|
+
repo = Path(args.repo)
|
|
28
|
+
db = resolve_memory_dir(repo) / "memory.sqlite"
|
|
29
|
+
if not db.exists():
|
|
30
|
+
print("SQLite DB not found. Run rebuild-memory-index.ps1 first.")
|
|
31
|
+
return 2
|
|
32
|
+
|
|
33
|
+
area = args.area.lower()
|
|
34
|
+
kind_filter = None
|
|
35
|
+
if area == "hotrules":
|
|
36
|
+
kind_filter = "hot_rules"
|
|
37
|
+
elif area == "active":
|
|
38
|
+
kind_filter = "active"
|
|
39
|
+
elif area == "memo":
|
|
40
|
+
kind_filter = "memo"
|
|
41
|
+
elif area == "lessons":
|
|
42
|
+
kind_filter = "lesson"
|
|
43
|
+
elif area == "journal":
|
|
44
|
+
kind_filter = "journal"
|
|
45
|
+
elif area == "digests":
|
|
46
|
+
kind_filter = "digest"
|
|
47
|
+
|
|
48
|
+
con = sqlite3.connect(str(db))
|
|
49
|
+
cur = con.cursor()
|
|
50
|
+
|
|
51
|
+
sql = "SELECT kind, id, date, title, path, snippet(memory_fts, 5, '[', ']', '...', 12) FROM memory_fts WHERE memory_fts MATCH ?"
|
|
52
|
+
params = [args.q]
|
|
53
|
+
if kind_filter:
|
|
54
|
+
sql += " AND kind = ?"
|
|
55
|
+
params.append(kind_filter)
|
|
56
|
+
sql += " LIMIT 20"
|
|
57
|
+
|
|
58
|
+
rows = cur.execute(sql, params).fetchall()
|
|
59
|
+
con.close()
|
|
60
|
+
|
|
61
|
+
if args.format.lower() == "ai":
|
|
62
|
+
paths = []
|
|
63
|
+
for r in rows:
|
|
64
|
+
p = r[4]
|
|
65
|
+
try:
|
|
66
|
+
rel = str(Path(p).resolve().relative_to(repo.resolve()))
|
|
67
|
+
except Exception:
|
|
68
|
+
rel = p
|
|
69
|
+
paths.append(rel.replace("\\", "/"))
|
|
70
|
+
uniq = []
|
|
71
|
+
for p in paths:
|
|
72
|
+
if p not in uniq:
|
|
73
|
+
uniq.append(p)
|
|
74
|
+
if not uniq:
|
|
75
|
+
print(f"No matches for: {args.q}")
|
|
76
|
+
else:
|
|
77
|
+
print("Files to read:")
|
|
78
|
+
for p in uniq:
|
|
79
|
+
print(f" @{p}")
|
|
80
|
+
return 0
|
|
81
|
+
|
|
82
|
+
if not rows:
|
|
83
|
+
print(f"No matches for: {args.q}")
|
|
84
|
+
return 0
|
|
85
|
+
|
|
86
|
+
for kind, idv, date, title, path, snip in rows:
|
|
87
|
+
print(f"==> {kind} | {idv or '-'} | {date or '-'} | {title}")
|
|
88
|
+
print(f" {path}")
|
|
89
|
+
print(f" {snip}")
|
|
90
|
+
print("")
|
|
91
|
+
return 0
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
if __name__ == "__main__":
|
|
95
|
+
raise SystemExit(main())
|