@dinasor/mnemo-cli 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +46 -0
- package/LICENSE +21 -0
- package/README.md +263 -0
- package/VERSION +1 -0
- package/bin/mnemo.js +139 -0
- package/memory.ps1 +178 -0
- package/memory_mac.sh +2447 -0
- package/package.json +36 -0
- package/scripts/memory/installer/bootstrap.ps1 +21 -0
- package/scripts/memory/installer/core/bridge.ps1 +285 -0
- package/scripts/memory/installer/core/io.ps1 +110 -0
- package/scripts/memory/installer/core/paths.ps1 +83 -0
- package/scripts/memory/installer/features/gitignore_setup.ps1 +80 -0
- package/scripts/memory/installer/features/hooks_setup.ps1 +157 -0
- package/scripts/memory/installer/features/mcp_setup.ps1 +87 -0
- package/scripts/memory/installer/features/memory_scaffold.ps1 +541 -0
- package/scripts/memory/installer/features/vector_setup.ps1 +103 -0
- package/scripts/memory/installer/templates/add-journal-entry.ps1 +122 -0
- package/scripts/memory/installer/templates/add-lesson.ps1 +151 -0
- package/scripts/memory/installer/templates/autonomy/__init__.py +6 -0
- package/scripts/memory/installer/templates/autonomy/context_safety.py +181 -0
- package/scripts/memory/installer/templates/autonomy/entity_resolver.py +215 -0
- package/scripts/memory/installer/templates/autonomy/ingest_pipeline.py +252 -0
- package/scripts/memory/installer/templates/autonomy/lifecycle_engine.py +254 -0
- package/scripts/memory/installer/templates/autonomy/policies.yaml +59 -0
- package/scripts/memory/installer/templates/autonomy/reranker.py +220 -0
- package/scripts/memory/installer/templates/autonomy/retrieval_router.py +148 -0
- package/scripts/memory/installer/templates/autonomy/runner.py +272 -0
- package/scripts/memory/installer/templates/autonomy/schema.py +150 -0
- package/scripts/memory/installer/templates/autonomy/vault_policy.py +205 -0
- package/scripts/memory/installer/templates/build-memory-sqlite.py +111 -0
- package/scripts/memory/installer/templates/clear-active.ps1 +55 -0
- package/scripts/memory/installer/templates/customization.md +84 -0
- package/scripts/memory/installer/templates/lint-memory.ps1 +217 -0
- package/scripts/memory/installer/templates/mnemo_vector.py +556 -0
- package/scripts/memory/installer/templates/query-memory-sqlite.py +95 -0
- package/scripts/memory/installer/templates/query-memory.ps1 +122 -0
- package/scripts/memory/installer/templates/rebuild-memory-index.ps1 +293 -0
|
@@ -0,0 +1,252 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
ingest_pipeline.py - Autonomous ingestion and chunking with typed metadata.
|
|
4
|
+
|
|
5
|
+
Detects changed .md files in .mnemo/memory/ (with bridge fallback), chunks them with context-aware
|
|
6
|
+
splitting, classifies memory type, and upserts into the DB as memory_units
|
|
7
|
+
with full metadata (authority, time_scope, sensitivity, entity_tags).
|
|
8
|
+
"""
|
|
9
|
+
import hashlib
|
|
10
|
+
import json
|
|
11
|
+
import os
|
|
12
|
+
import re
|
|
13
|
+
import sqlite3
|
|
14
|
+
import uuid
|
|
15
|
+
from dataclasses import dataclass, field
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from typing import Optional
|
|
18
|
+
|
|
19
|
+
from autonomy.schema import get_db
|
|
20
|
+
|
|
21
|
+
SKIP_NAMES = frozenset({"README.md", "index.md", "lessons-index.json",
|
|
22
|
+
"journal-index.json", "journal-index.md"})
|
|
23
|
+
SKIP_DIRS = frozenset({"legacy", "templates"})
|
|
24
|
+
MAX_CHUNK_CHARS = 10000
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _resolve_memory_root(repo_root: Path) -> Path:
|
|
28
|
+
override = os.getenv("MNEMO_MEMORY_ROOT", "").strip()
|
|
29
|
+
if override:
|
|
30
|
+
return Path(override).expanduser().resolve()
|
|
31
|
+
|
|
32
|
+
candidates = [
|
|
33
|
+
repo_root / ".mnemo" / "memory",
|
|
34
|
+
repo_root / ".cursor" / "memory",
|
|
35
|
+
]
|
|
36
|
+
for candidate in candidates:
|
|
37
|
+
if candidate.exists():
|
|
38
|
+
return candidate
|
|
39
|
+
return candidates[0]
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
AUTHORITY_WEIGHTS: dict[str, float] = {
|
|
43
|
+
"core": 1.0,
|
|
44
|
+
"procedural": 0.9,
|
|
45
|
+
"semantic": 0.8,
|
|
46
|
+
"episodic": 0.7,
|
|
47
|
+
"resource": 0.5,
|
|
48
|
+
"vault": 0.0,
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@dataclass
|
|
53
|
+
class MemoryUnit:
|
|
54
|
+
unit_id: str
|
|
55
|
+
source_ref: str
|
|
56
|
+
memory_type: str
|
|
57
|
+
authority: float
|
|
58
|
+
time_scope: str
|
|
59
|
+
sensitivity: str
|
|
60
|
+
entity_tags: list[str]
|
|
61
|
+
content_hash: str
|
|
62
|
+
content: str
|
|
63
|
+
chunks: list[tuple[str, str]] = field(default_factory=list) # (text, ref)
|
|
64
|
+
is_new: bool = True
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def _infer_memory_type(path_str: str) -> str:
|
|
68
|
+
p = path_str.lower().replace("\\", "/")
|
|
69
|
+
if "hot-rules" in p or "memo.md" in p:
|
|
70
|
+
return "core"
|
|
71
|
+
if "/lessons/" in p and re.search(r"/l-\d+", p):
|
|
72
|
+
return "procedural"
|
|
73
|
+
if "/journal/" in p or "active-context" in p:
|
|
74
|
+
return "episodic"
|
|
75
|
+
if "/digests/" in p:
|
|
76
|
+
return "semantic"
|
|
77
|
+
if "/vault/" in p:
|
|
78
|
+
return "vault"
|
|
79
|
+
if "/adr/" in p:
|
|
80
|
+
return "semantic"
|
|
81
|
+
return "semantic"
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _infer_time_scope(memory_type: str) -> str:
|
|
85
|
+
if memory_type == "episodic":
|
|
86
|
+
return "recency-sensitive"
|
|
87
|
+
if memory_type in ("core", "procedural"):
|
|
88
|
+
return "atemporal"
|
|
89
|
+
return "time-bound"
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def _infer_sensitivity(path_str: str) -> str:
|
|
93
|
+
p = path_str.lower()
|
|
94
|
+
if "/vault/" in p or "secret" in p or ".secret." in p:
|
|
95
|
+
return "secret"
|
|
96
|
+
return "public"
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def _content_hash(content: str) -> str:
|
|
100
|
+
return hashlib.sha256(content.encode("utf-8")).hexdigest()
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _chunk_markdown(content: str, file_path: Path) -> list[tuple[str, str]]:
|
|
104
|
+
"""Split markdown content into (text, ref_path) chunks."""
|
|
105
|
+
chunks: list[tuple[str, str]] = []
|
|
106
|
+
path_str = str(file_path).replace("\\", "/")
|
|
107
|
+
|
|
108
|
+
# Journal: split by date headings
|
|
109
|
+
if "journal/" in path_str.lower():
|
|
110
|
+
parts = re.split(r"^(##\s+\d{4}-\d{2}-\d{2})", content, flags=re.MULTILINE)
|
|
111
|
+
preamble = parts[0].strip()
|
|
112
|
+
if preamble:
|
|
113
|
+
chunks.append((preamble, f"@{path_str}"))
|
|
114
|
+
i = 1
|
|
115
|
+
while i < len(parts) - 1:
|
|
116
|
+
heading = parts[i].strip()
|
|
117
|
+
body = parts[i + 1].strip()
|
|
118
|
+
date_val = heading.replace("##", "").strip()
|
|
119
|
+
chunk_text = f"{heading}\n{body}".strip()
|
|
120
|
+
if chunk_text:
|
|
121
|
+
chunks.append((chunk_text[:MAX_CHUNK_CHARS], f"@{path_str}#{date_val}"))
|
|
122
|
+
i += 2
|
|
123
|
+
return chunks
|
|
124
|
+
|
|
125
|
+
# Lessons: single chunk per lesson file
|
|
126
|
+
if re.search(r"/lessons/l-\d+", path_str.lower()):
|
|
127
|
+
text = content.strip()
|
|
128
|
+
if text:
|
|
129
|
+
m = re.match(r"(L-\d{3})", file_path.name)
|
|
130
|
+
ref = f"@{path_str}#{m.group(1)}" if m else f"@{path_str}"
|
|
131
|
+
chunks.append((text[:MAX_CHUNK_CHARS], ref))
|
|
132
|
+
return chunks
|
|
133
|
+
|
|
134
|
+
# General: split by headers
|
|
135
|
+
parts = re.split(r"^(#{1,4}\s+.+)$", content, flags=re.MULTILINE)
|
|
136
|
+
preamble = parts[0].strip()
|
|
137
|
+
if preamble:
|
|
138
|
+
chunks.append((preamble[:MAX_CHUNK_CHARS], f"@{path_str}"))
|
|
139
|
+
|
|
140
|
+
i = 1
|
|
141
|
+
while i < len(parts) - 1:
|
|
142
|
+
heading_line = parts[i].strip()
|
|
143
|
+
body = parts[i + 1].strip()
|
|
144
|
+
heading_text = re.sub(r"^#{1,4}\s+", "", heading_line)
|
|
145
|
+
full = f"{heading_line}\n{body}".strip() if body else heading_line
|
|
146
|
+
if full:
|
|
147
|
+
chunks.append((full[:MAX_CHUNK_CHARS], f"@{path_str}#{heading_text}"))
|
|
148
|
+
i += 2
|
|
149
|
+
|
|
150
|
+
if not chunks and content.strip():
|
|
151
|
+
chunks.append((content.strip()[:MAX_CHUNK_CHARS], f"@{path_str}"))
|
|
152
|
+
return chunks
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
class IngestPipeline:
|
|
156
|
+
def __init__(self, db: Optional[sqlite3.Connection] = None, repo_root: Optional[Path] = None):
|
|
157
|
+
self.db = db or get_db()
|
|
158
|
+
self.repo_root = repo_root or Path.cwd()
|
|
159
|
+
self.mem_root = _resolve_memory_root(self.repo_root)
|
|
160
|
+
|
|
161
|
+
def detect_changes(self) -> list[Path]:
|
|
162
|
+
"""Return list of .md files that have changed hash."""
|
|
163
|
+
changed: list[Path] = []
|
|
164
|
+
for p in self.mem_root.glob("**/*.md"):
|
|
165
|
+
if p.name in SKIP_NAMES:
|
|
166
|
+
continue
|
|
167
|
+
if any(skip in p.parts for skip in SKIP_DIRS):
|
|
168
|
+
continue
|
|
169
|
+
try:
|
|
170
|
+
content = p.read_text(encoding="utf-8-sig")
|
|
171
|
+
h = _content_hash(content)
|
|
172
|
+
row = self.db.execute(
|
|
173
|
+
"SELECT hash FROM file_meta WHERE path = ?", (str(p),)
|
|
174
|
+
).fetchone()
|
|
175
|
+
if not row or row["hash"] != h:
|
|
176
|
+
changed.append(p)
|
|
177
|
+
except OSError:
|
|
178
|
+
pass
|
|
179
|
+
return changed
|
|
180
|
+
|
|
181
|
+
def ingest_file(self, file_path: Path) -> list[MemoryUnit]:
|
|
182
|
+
"""Ingest a single file, create/update memory units, return list."""
|
|
183
|
+
content = file_path.read_text(encoding="utf-8-sig")
|
|
184
|
+
h = _content_hash(content)
|
|
185
|
+
path_str = str(file_path)
|
|
186
|
+
|
|
187
|
+
mem_type = _infer_memory_type(path_str)
|
|
188
|
+
authority = AUTHORITY_WEIGHTS.get(mem_type, 0.5)
|
|
189
|
+
time_scope = _infer_time_scope(mem_type)
|
|
190
|
+
sensitivity = _infer_sensitivity(path_str)
|
|
191
|
+
chunks = _chunk_markdown(content, file_path)
|
|
192
|
+
|
|
193
|
+
existing_row = self.db.execute(
|
|
194
|
+
"SELECT unit_id FROM memory_units WHERE source_ref = ?", (path_str,)
|
|
195
|
+
).fetchone()
|
|
196
|
+
|
|
197
|
+
if existing_row:
|
|
198
|
+
unit_id = existing_row["unit_id"]
|
|
199
|
+
is_new = False
|
|
200
|
+
else:
|
|
201
|
+
unit_id = str(uuid.uuid4())
|
|
202
|
+
is_new = True
|
|
203
|
+
|
|
204
|
+
unit = MemoryUnit(
|
|
205
|
+
unit_id=unit_id,
|
|
206
|
+
source_ref=path_str,
|
|
207
|
+
memory_type=mem_type,
|
|
208
|
+
authority=authority,
|
|
209
|
+
time_scope=time_scope,
|
|
210
|
+
sensitivity=sensitivity,
|
|
211
|
+
entity_tags=[],
|
|
212
|
+
content_hash=h,
|
|
213
|
+
content=content,
|
|
214
|
+
chunks=chunks,
|
|
215
|
+
is_new=is_new,
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
if is_new:
|
|
219
|
+
self.db.execute(
|
|
220
|
+
"""
|
|
221
|
+
INSERT INTO memory_units
|
|
222
|
+
(unit_id, source_ref, memory_type, authority, time_scope, sensitivity, entity_tags, content_hash)
|
|
223
|
+
VALUES (?, ?, ?, ?, ?, ?, '[]', ?)
|
|
224
|
+
""",
|
|
225
|
+
(unit_id, path_str, mem_type, authority, time_scope, sensitivity, h),
|
|
226
|
+
)
|
|
227
|
+
else:
|
|
228
|
+
self.db.execute(
|
|
229
|
+
"""
|
|
230
|
+
UPDATE memory_units
|
|
231
|
+
SET memory_type=?, authority=?, time_scope=?, sensitivity=?,
|
|
232
|
+
content_hash=?, updated_at=unixepoch('now')
|
|
233
|
+
WHERE unit_id=?
|
|
234
|
+
""",
|
|
235
|
+
(mem_type, authority, time_scope, sensitivity, h, unit_id),
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
self.db.execute(
|
|
239
|
+
"INSERT OR REPLACE INTO file_meta(path, hash, chunk_count, updated_at) VALUES (?,?,?,unixepoch('now'))",
|
|
240
|
+
(path_str, h, len(chunks)),
|
|
241
|
+
)
|
|
242
|
+
self.db.commit()
|
|
243
|
+
return [unit]
|
|
244
|
+
|
|
245
|
+
def update_entity_tags(self, unit: MemoryUnit, entity_ids: list[str]) -> None:
|
|
246
|
+
"""Persist resolved entity tags back to the unit row."""
|
|
247
|
+
unit.entity_tags = entity_ids
|
|
248
|
+
self.db.execute(
|
|
249
|
+
"UPDATE memory_units SET entity_tags=?, updated_at=unixepoch('now') WHERE unit_id=?",
|
|
250
|
+
(json.dumps(entity_ids), unit.unit_id),
|
|
251
|
+
)
|
|
252
|
+
self.db.commit()
|
|
@@ -0,0 +1,254 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
lifecycle_engine.py - Autonomous fact lifecycle engine.
|
|
4
|
+
|
|
5
|
+
Decides ADD / UPDATE / DEPRECATE / NOOP for each memory unit based on
|
|
6
|
+
similarity to existing facts, freshness, and contradiction detection.
|
|
7
|
+
All decisions are logged to lifecycle_events for full auditability.
|
|
8
|
+
|
|
9
|
+
No human required: transitions happen automatically on every ingest cycle.
|
|
10
|
+
"""
|
|
11
|
+
import hashlib
|
|
12
|
+
import json
|
|
13
|
+
import os
|
|
14
|
+
import re
|
|
15
|
+
import sqlite3
|
|
16
|
+
import uuid
|
|
17
|
+
from dataclasses import dataclass
|
|
18
|
+
from datetime import datetime, timezone
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
from typing import Optional
|
|
21
|
+
|
|
22
|
+
from autonomy.schema import get_db
|
|
23
|
+
from autonomy.ingest_pipeline import MemoryUnit
|
|
24
|
+
|
|
25
|
+
DEPRECATION_SIMILARITY_THRESHOLD = 0.85
|
|
26
|
+
PROMOTE_STABILITY_CYCLES = 3 # fact must appear N cycles before lesson promotion
|
|
27
|
+
NOOP_HASH_MATCH = True # if content_hash unchanged, always NOOP
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass
|
|
31
|
+
class LifecycleDecision:
|
|
32
|
+
operation: str # ADD | UPDATE | DEPRECATE | NOOP
|
|
33
|
+
unit_id: str
|
|
34
|
+
fact_id: Optional[str]
|
|
35
|
+
reason: str
|
|
36
|
+
confidence: float = 1.0
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _extract_key_facts(content: str, memory_type: str) -> list[str]:
|
|
40
|
+
"""
|
|
41
|
+
Heuristic extraction of canonical facts from content.
|
|
42
|
+
Returns list of short declarative sentences.
|
|
43
|
+
"""
|
|
44
|
+
facts = []
|
|
45
|
+
|
|
46
|
+
# Hot rules / lessons: extract bullet points as facts
|
|
47
|
+
if memory_type in ("core", "procedural"):
|
|
48
|
+
for m in re.finditer(r"^[-*]\s+(.+)", content, re.MULTILINE):
|
|
49
|
+
fact = m.group(1).strip()
|
|
50
|
+
if len(fact) > 10:
|
|
51
|
+
facts.append(fact)
|
|
52
|
+
|
|
53
|
+
# Journal / active-context: extract decision lines
|
|
54
|
+
if memory_type in ("episodic",):
|
|
55
|
+
for m in re.finditer(r"(decided|confirmed|fixed|added|removed|changed):\s*(.+)", content, re.IGNORECASE):
|
|
56
|
+
facts.append(m.group(0).strip())
|
|
57
|
+
|
|
58
|
+
# Generic: extract first sentence of each section
|
|
59
|
+
for m in re.finditer(r"^#{1,4}\s+(.+)\n+(.*?)(?:\n|$)", content, re.MULTILINE):
|
|
60
|
+
heading = m.group(1).strip()
|
|
61
|
+
body = m.group(2).strip()
|
|
62
|
+
if body:
|
|
63
|
+
facts.append(f"{heading}: {body}")
|
|
64
|
+
|
|
65
|
+
return facts[:20] # cap to prevent runaway
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _simple_similarity(a: str, b: str) -> float:
|
|
69
|
+
"""Token Jaccard similarity for contradiction detection (no embeddings needed)."""
|
|
70
|
+
ta = set(re.findall(r"\w+", a.lower()))
|
|
71
|
+
tb = set(re.findall(r"\w+", b.lower()))
|
|
72
|
+
if not ta or not tb:
|
|
73
|
+
return 0.0
|
|
74
|
+
return len(ta & tb) / len(ta | tb)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _resolve_lessons_dir(repo_root: Path) -> Path:
|
|
78
|
+
override = os.getenv("MNEMO_MEMORY_ROOT", "").strip()
|
|
79
|
+
if override:
|
|
80
|
+
return Path(override).expanduser().resolve() / "lessons"
|
|
81
|
+
|
|
82
|
+
candidates = [
|
|
83
|
+
repo_root / ".mnemo" / "memory" / "lessons",
|
|
84
|
+
repo_root / ".cursor" / "memory" / "lessons",
|
|
85
|
+
]
|
|
86
|
+
for candidate in candidates:
|
|
87
|
+
if candidate.exists():
|
|
88
|
+
return candidate
|
|
89
|
+
return candidates[0]
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
class LifecycleEngine:
|
|
93
|
+
def __init__(self, db: Optional[sqlite3.Connection] = None):
|
|
94
|
+
self.db = db or get_db()
|
|
95
|
+
|
|
96
|
+
def process(self, unit: MemoryUnit) -> LifecycleDecision:
|
|
97
|
+
"""
|
|
98
|
+
Process a memory unit through the lifecycle state machine.
|
|
99
|
+
Emits a lifecycle_event and returns the decision.
|
|
100
|
+
"""
|
|
101
|
+
existing_hash = self.db.execute(
|
|
102
|
+
"SELECT content_hash FROM memory_units WHERE unit_id = ?",
|
|
103
|
+
(unit.unit_id,),
|
|
104
|
+
).fetchone()
|
|
105
|
+
|
|
106
|
+
if existing_hash and NOOP_HASH_MATCH and existing_hash["content_hash"] == unit.content_hash:
|
|
107
|
+
decision = LifecycleDecision("NOOP", unit.unit_id, None, "content_hash_unchanged")
|
|
108
|
+
self._log_event(decision)
|
|
109
|
+
return decision
|
|
110
|
+
|
|
111
|
+
facts = _extract_key_facts(unit.content, unit.memory_type)
|
|
112
|
+
if not facts:
|
|
113
|
+
decision = LifecycleDecision("NOOP", unit.unit_id, None, "no_extractable_facts")
|
|
114
|
+
self._log_event(decision)
|
|
115
|
+
return decision
|
|
116
|
+
|
|
117
|
+
existing_facts = self.db.execute(
|
|
118
|
+
"SELECT fact_id, canonical_fact, status, confidence FROM facts WHERE source_ref = ?",
|
|
119
|
+
(unit.source_ref,),
|
|
120
|
+
).fetchall()
|
|
121
|
+
|
|
122
|
+
# Check for contradiction / supersession in global facts
|
|
123
|
+
contradictions = self._detect_contradictions(facts)
|
|
124
|
+
for old_fact_id, old_fact_text in contradictions:
|
|
125
|
+
self.db.execute(
|
|
126
|
+
"UPDATE facts SET status='deprecated', updated_at=unixepoch('now') WHERE fact_id=?",
|
|
127
|
+
(old_fact_id,),
|
|
128
|
+
)
|
|
129
|
+
dep_decision = LifecycleDecision(
|
|
130
|
+
"DEPRECATE", unit.unit_id, old_fact_id,
|
|
131
|
+
reason=f"superseded_by_unit:{unit.unit_id}",
|
|
132
|
+
confidence=0.8,
|
|
133
|
+
)
|
|
134
|
+
self._log_event(dep_decision)
|
|
135
|
+
|
|
136
|
+
if existing_facts:
|
|
137
|
+
# UPDATE existing facts from this source
|
|
138
|
+
for ef in existing_facts:
|
|
139
|
+
self.db.execute(
|
|
140
|
+
"UPDATE facts SET status='active', confidence=?, updated_at=unixepoch('now') WHERE fact_id=?",
|
|
141
|
+
(min(ef["confidence"] + 0.05, 1.0), ef["fact_id"]),
|
|
142
|
+
)
|
|
143
|
+
decision = LifecycleDecision("UPDATE", unit.unit_id, existing_facts[0]["fact_id"], "source_file_changed")
|
|
144
|
+
else:
|
|
145
|
+
# ADD new facts
|
|
146
|
+
for fact_text in facts[:5]: # cap facts per unit
|
|
147
|
+
fact_id = str(uuid.uuid4())
|
|
148
|
+
self.db.execute(
|
|
149
|
+
"INSERT INTO facts(fact_id, canonical_fact, status, confidence, source_ref) VALUES (?,?,'active',1.0,?)",
|
|
150
|
+
(fact_id, fact_text, unit.source_ref),
|
|
151
|
+
)
|
|
152
|
+
decision = LifecycleDecision("ADD", unit.unit_id, None, f"new_unit_{len(facts)}_facts_extracted")
|
|
153
|
+
|
|
154
|
+
self._log_event(decision)
|
|
155
|
+
self.db.commit()
|
|
156
|
+
return decision
|
|
157
|
+
|
|
158
|
+
def _detect_contradictions(self, new_facts: list[str]) -> list[tuple[str, str]]:
|
|
159
|
+
"""
|
|
160
|
+
Find existing active facts that are semantically contradicted by new_facts.
|
|
161
|
+
Uses token Jaccard with high threshold — low false positive rate is more important than recall.
|
|
162
|
+
"""
|
|
163
|
+
contradicted: list[tuple[str, str]] = []
|
|
164
|
+
existing = self.db.execute(
|
|
165
|
+
"SELECT fact_id, canonical_fact FROM facts WHERE status = 'active'"
|
|
166
|
+
).fetchall()
|
|
167
|
+
|
|
168
|
+
contradiction_patterns = [
|
|
169
|
+
(r"\bdo\s+not\b", r"\bdo\b"),
|
|
170
|
+
(r"\bnever\b", r"\balways\b"),
|
|
171
|
+
(r"\bdisabled\b", r"\benabled\b"),
|
|
172
|
+
]
|
|
173
|
+
|
|
174
|
+
for ef in existing:
|
|
175
|
+
ef_text = ef["canonical_fact"]
|
|
176
|
+
for new_fact in new_facts:
|
|
177
|
+
sim = _simple_similarity(new_fact, ef_text)
|
|
178
|
+
if sim >= DEPRECATION_SIMILARITY_THRESHOLD:
|
|
179
|
+
for pat_a, pat_b in contradiction_patterns:
|
|
180
|
+
a_in_new = bool(re.search(pat_a, new_fact, re.I))
|
|
181
|
+
b_in_old = bool(re.search(pat_b, ef_text, re.I))
|
|
182
|
+
if a_in_new and b_in_old:
|
|
183
|
+
contradicted.append((ef["fact_id"], ef_text))
|
|
184
|
+
break
|
|
185
|
+
return contradicted
|
|
186
|
+
|
|
187
|
+
def _log_event(self, decision: LifecycleDecision) -> None:
|
|
188
|
+
self.db.execute(
|
|
189
|
+
"INSERT INTO lifecycle_events(event_id, unit_id, operation, reason) VALUES (?,?,?,?)",
|
|
190
|
+
(str(uuid.uuid4()), decision.unit_id, decision.operation, decision.reason),
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
def promote_lessons(self, repo_root: Path) -> list[str]:
|
|
194
|
+
"""
|
|
195
|
+
Auto-promote stable repeated signals into lesson files.
|
|
196
|
+
A fact qualifies when: status=active AND confidence >= 0.95
|
|
197
|
+
AND no lesson already covers the source_ref.
|
|
198
|
+
Returns list of created lesson paths.
|
|
199
|
+
"""
|
|
200
|
+
candidates = self.db.execute(
|
|
201
|
+
"""
|
|
202
|
+
SELECT f.fact_id, f.canonical_fact, f.source_ref, f.confidence
|
|
203
|
+
FROM facts f
|
|
204
|
+
WHERE f.status = 'active' AND f.confidence >= 0.95
|
|
205
|
+
AND f.source_ref NOT LIKE '%lessons/L-%'
|
|
206
|
+
ORDER BY f.confidence DESC
|
|
207
|
+
LIMIT 5
|
|
208
|
+
"""
|
|
209
|
+
).fetchall()
|
|
210
|
+
|
|
211
|
+
promoted = []
|
|
212
|
+
lessons_dir = _resolve_lessons_dir(repo_root)
|
|
213
|
+
lessons_dir.mkdir(parents=True, exist_ok=True)
|
|
214
|
+
|
|
215
|
+
existing = sorted(lessons_dir.glob("L-*.md"))
|
|
216
|
+
next_id = 1
|
|
217
|
+
if existing:
|
|
218
|
+
m = re.match(r"L-(\d+)", existing[-1].name)
|
|
219
|
+
if m:
|
|
220
|
+
next_id = int(m.group(1)) + 1
|
|
221
|
+
|
|
222
|
+
for row in candidates:
|
|
223
|
+
fact_text = row["canonical_fact"][:200]
|
|
224
|
+
lesson_id = f"L-{next_id:03d}"
|
|
225
|
+
slug = re.sub(r"[^a-z0-9]+", "-", fact_text.lower())[:40].strip("-")
|
|
226
|
+
lesson_file = lessons_dir / f"{lesson_id}-{slug}.md"
|
|
227
|
+
|
|
228
|
+
if lesson_file.exists():
|
|
229
|
+
continue
|
|
230
|
+
|
|
231
|
+
today = datetime.now().strftime("%Y-%m-%d")
|
|
232
|
+
content = (
|
|
233
|
+
f"---\nid: {lesson_id}\ntitle: {fact_text[:80]}\nstatus: Active\n"
|
|
234
|
+
f"tags: [Process]\nintroduced: {today}\napplies_to:\n - \"**/*\"\n"
|
|
235
|
+
f"triggers:\n - auto-promoted\nrule: {fact_text[:120]}\n---\n\n"
|
|
236
|
+
f"# {lesson_id} - Auto-Promoted Lesson\n\n"
|
|
237
|
+
f"**Source:** `{row['source_ref']}`\n\n"
|
|
238
|
+
f"**Canonical fact:** {fact_text}\n\n"
|
|
239
|
+
f"> This lesson was auto-promoted by the Mnemo autonomous runner.\n"
|
|
240
|
+
f"> Review and edit the rule to ensure accuracy.\n"
|
|
241
|
+
)
|
|
242
|
+
lesson_file.write_text(content, encoding="utf-8")
|
|
243
|
+
promoted.append(str(lesson_file))
|
|
244
|
+
|
|
245
|
+
# Mark fact as promoted
|
|
246
|
+
self.db.execute(
|
|
247
|
+
"UPDATE facts SET status='promoted', updated_at=unixepoch('now') WHERE fact_id=?",
|
|
248
|
+
(row["fact_id"],),
|
|
249
|
+
)
|
|
250
|
+
next_id += 1
|
|
251
|
+
|
|
252
|
+
if promoted:
|
|
253
|
+
self.db.commit()
|
|
254
|
+
return promoted
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
# Mnemo Memory System - Vault & Sensitivity Policy
|
|
2
|
+
# Loaded by vault_policy.py at runtime.
|
|
3
|
+
# Edit this file to customize sensitivity rules for your project.
|
|
4
|
+
|
|
5
|
+
# Sensitivity classification rules (applied in order: secret > internal > public)
|
|
6
|
+
sensitivity_paths:
|
|
7
|
+
secret:
|
|
8
|
+
- ".mnemo/memory/vault/"
|
|
9
|
+
- ".cursor/memory/vault/"
|
|
10
|
+
- ".env"
|
|
11
|
+
- "*.secret.*"
|
|
12
|
+
- "*credentials*"
|
|
13
|
+
- "*private-key*"
|
|
14
|
+
internal:
|
|
15
|
+
- ".mnemo/memory/active-context.md"
|
|
16
|
+
- ".cursor/memory/active-context.md"
|
|
17
|
+
|
|
18
|
+
# Additional regex redaction patterns (applied on top of built-ins)
|
|
19
|
+
# Built-ins already handle: API keys, bearer tokens, long random strings
|
|
20
|
+
redaction_patterns:
|
|
21
|
+
# Add project-specific patterns here, e.g.:
|
|
22
|
+
# - "sk-[a-zA-Z0-9]{40,}"
|
|
23
|
+
# - "xoxb-[0-9]+-[0-9]+-[a-zA-Z0-9]+"
|
|
24
|
+
|
|
25
|
+
# Roles allowed to see 'internal' sensitivity content
|
|
26
|
+
allow_internal_for_roles:
|
|
27
|
+
- agent
|
|
28
|
+
- autonomous
|
|
29
|
+
|
|
30
|
+
# Maximum sensitivity level allowed in outgoing context packs
|
|
31
|
+
# Options: public | internal
|
|
32
|
+
# Setting 'public' means only public content is included
|
|
33
|
+
max_sensitivity_in_context: internal
|
|
34
|
+
|
|
35
|
+
# Autonomous ingestion: re-run sensitivity classification every N cycles
|
|
36
|
+
reclassify_every_n_cycles: 10
|
|
37
|
+
|
|
38
|
+
# Lifecycle deprecation: minimum confidence before auto-deprecating a fact
|
|
39
|
+
deprecation_confidence_threshold: 0.8
|
|
40
|
+
|
|
41
|
+
# Entity resolution: minimum Jaccard similarity to merge an alias
|
|
42
|
+
alias_merge_threshold: 0.85
|
|
43
|
+
|
|
44
|
+
# Lesson promotion: minimum fact confidence to auto-promote to lesson
|
|
45
|
+
lesson_promotion_threshold: 0.95
|
|
46
|
+
|
|
47
|
+
# Retrieval safety: minimum final_score for results to appear in context packs
|
|
48
|
+
min_retrieval_score: 0.2
|
|
49
|
+
|
|
50
|
+
# Token budgets (in characters, ~4 chars per token)
|
|
51
|
+
token_budget_default: 6000
|
|
52
|
+
token_budget_extended: 12000
|
|
53
|
+
|
|
54
|
+
# Benchmark thresholds (used by CI quality gates)
|
|
55
|
+
benchmark:
|
|
56
|
+
min_hit_at_3: 0.7
|
|
57
|
+
min_ndcg_at_5: 0.65
|
|
58
|
+
max_p95_latency_ms: 2000
|
|
59
|
+
max_token_cost_per_query: 0.005
|