@dinasor/mnemo-cli 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/CHANGELOG.md +46 -0
  2. package/LICENSE +21 -0
  3. package/README.md +263 -0
  4. package/VERSION +1 -0
  5. package/bin/mnemo.js +139 -0
  6. package/memory.ps1 +178 -0
  7. package/memory_mac.sh +2447 -0
  8. package/package.json +36 -0
  9. package/scripts/memory/installer/bootstrap.ps1 +21 -0
  10. package/scripts/memory/installer/core/bridge.ps1 +285 -0
  11. package/scripts/memory/installer/core/io.ps1 +110 -0
  12. package/scripts/memory/installer/core/paths.ps1 +83 -0
  13. package/scripts/memory/installer/features/gitignore_setup.ps1 +80 -0
  14. package/scripts/memory/installer/features/hooks_setup.ps1 +157 -0
  15. package/scripts/memory/installer/features/mcp_setup.ps1 +87 -0
  16. package/scripts/memory/installer/features/memory_scaffold.ps1 +541 -0
  17. package/scripts/memory/installer/features/vector_setup.ps1 +103 -0
  18. package/scripts/memory/installer/templates/add-journal-entry.ps1 +122 -0
  19. package/scripts/memory/installer/templates/add-lesson.ps1 +151 -0
  20. package/scripts/memory/installer/templates/autonomy/__init__.py +6 -0
  21. package/scripts/memory/installer/templates/autonomy/context_safety.py +181 -0
  22. package/scripts/memory/installer/templates/autonomy/entity_resolver.py +215 -0
  23. package/scripts/memory/installer/templates/autonomy/ingest_pipeline.py +252 -0
  24. package/scripts/memory/installer/templates/autonomy/lifecycle_engine.py +254 -0
  25. package/scripts/memory/installer/templates/autonomy/policies.yaml +59 -0
  26. package/scripts/memory/installer/templates/autonomy/reranker.py +220 -0
  27. package/scripts/memory/installer/templates/autonomy/retrieval_router.py +148 -0
  28. package/scripts/memory/installer/templates/autonomy/runner.py +272 -0
  29. package/scripts/memory/installer/templates/autonomy/schema.py +150 -0
  30. package/scripts/memory/installer/templates/autonomy/vault_policy.py +205 -0
  31. package/scripts/memory/installer/templates/build-memory-sqlite.py +111 -0
  32. package/scripts/memory/installer/templates/clear-active.ps1 +55 -0
  33. package/scripts/memory/installer/templates/customization.md +84 -0
  34. package/scripts/memory/installer/templates/lint-memory.ps1 +217 -0
  35. package/scripts/memory/installer/templates/mnemo_vector.py +556 -0
  36. package/scripts/memory/installer/templates/query-memory-sqlite.py +95 -0
  37. package/scripts/memory/installer/templates/query-memory.ps1 +122 -0
  38. package/scripts/memory/installer/templates/rebuild-memory-index.ps1 +293 -0
@@ -0,0 +1,252 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ ingest_pipeline.py - Autonomous ingestion and chunking with typed metadata.
4
+
5
+ Detects changed .md files in .mnemo/memory/ (with bridge fallback), chunks them with context-aware
6
+ splitting, classifies memory type, and upserts into the DB as memory_units
7
+ with full metadata (authority, time_scope, sensitivity, entity_tags).
8
+ """
9
+ import hashlib
10
+ import json
11
+ import os
12
+ import re
13
+ import sqlite3
14
+ import uuid
15
+ from dataclasses import dataclass, field
16
+ from pathlib import Path
17
+ from typing import Optional
18
+
19
+ from autonomy.schema import get_db
20
+
21
+ SKIP_NAMES = frozenset({"README.md", "index.md", "lessons-index.json",
22
+ "journal-index.json", "journal-index.md"})
23
+ SKIP_DIRS = frozenset({"legacy", "templates"})
24
+ MAX_CHUNK_CHARS = 10000
25
+
26
+
27
+ def _resolve_memory_root(repo_root: Path) -> Path:
28
+ override = os.getenv("MNEMO_MEMORY_ROOT", "").strip()
29
+ if override:
30
+ return Path(override).expanduser().resolve()
31
+
32
+ candidates = [
33
+ repo_root / ".mnemo" / "memory",
34
+ repo_root / ".cursor" / "memory",
35
+ ]
36
+ for candidate in candidates:
37
+ if candidate.exists():
38
+ return candidate
39
+ return candidates[0]
40
+
41
+
42
+ AUTHORITY_WEIGHTS: dict[str, float] = {
43
+ "core": 1.0,
44
+ "procedural": 0.9,
45
+ "semantic": 0.8,
46
+ "episodic": 0.7,
47
+ "resource": 0.5,
48
+ "vault": 0.0,
49
+ }
50
+
51
+
52
+ @dataclass
53
+ class MemoryUnit:
54
+ unit_id: str
55
+ source_ref: str
56
+ memory_type: str
57
+ authority: float
58
+ time_scope: str
59
+ sensitivity: str
60
+ entity_tags: list[str]
61
+ content_hash: str
62
+ content: str
63
+ chunks: list[tuple[str, str]] = field(default_factory=list) # (text, ref)
64
+ is_new: bool = True
65
+
66
+
67
+ def _infer_memory_type(path_str: str) -> str:
68
+ p = path_str.lower().replace("\\", "/")
69
+ if "hot-rules" in p or "memo.md" in p:
70
+ return "core"
71
+ if "/lessons/" in p and re.search(r"/l-\d+", p):
72
+ return "procedural"
73
+ if "/journal/" in p or "active-context" in p:
74
+ return "episodic"
75
+ if "/digests/" in p:
76
+ return "semantic"
77
+ if "/vault/" in p:
78
+ return "vault"
79
+ if "/adr/" in p:
80
+ return "semantic"
81
+ return "semantic"
82
+
83
+
84
+ def _infer_time_scope(memory_type: str) -> str:
85
+ if memory_type == "episodic":
86
+ return "recency-sensitive"
87
+ if memory_type in ("core", "procedural"):
88
+ return "atemporal"
89
+ return "time-bound"
90
+
91
+
92
+ def _infer_sensitivity(path_str: str) -> str:
93
+ p = path_str.lower()
94
+ if "/vault/" in p or "secret" in p or ".secret." in p:
95
+ return "secret"
96
+ return "public"
97
+
98
+
99
+ def _content_hash(content: str) -> str:
100
+ return hashlib.sha256(content.encode("utf-8")).hexdigest()
101
+
102
+
103
+ def _chunk_markdown(content: str, file_path: Path) -> list[tuple[str, str]]:
104
+ """Split markdown content into (text, ref_path) chunks."""
105
+ chunks: list[tuple[str, str]] = []
106
+ path_str = str(file_path).replace("\\", "/")
107
+
108
+ # Journal: split by date headings
109
+ if "journal/" in path_str.lower():
110
+ parts = re.split(r"^(##\s+\d{4}-\d{2}-\d{2})", content, flags=re.MULTILINE)
111
+ preamble = parts[0].strip()
112
+ if preamble:
113
+ chunks.append((preamble, f"@{path_str}"))
114
+ i = 1
115
+ while i < len(parts) - 1:
116
+ heading = parts[i].strip()
117
+ body = parts[i + 1].strip()
118
+ date_val = heading.replace("##", "").strip()
119
+ chunk_text = f"{heading}\n{body}".strip()
120
+ if chunk_text:
121
+ chunks.append((chunk_text[:MAX_CHUNK_CHARS], f"@{path_str}#{date_val}"))
122
+ i += 2
123
+ return chunks
124
+
125
+ # Lessons: single chunk per lesson file
126
+ if re.search(r"/lessons/l-\d+", path_str.lower()):
127
+ text = content.strip()
128
+ if text:
129
+ m = re.match(r"(L-\d{3})", file_path.name)
130
+ ref = f"@{path_str}#{m.group(1)}" if m else f"@{path_str}"
131
+ chunks.append((text[:MAX_CHUNK_CHARS], ref))
132
+ return chunks
133
+
134
+ # General: split by headers
135
+ parts = re.split(r"^(#{1,4}\s+.+)$", content, flags=re.MULTILINE)
136
+ preamble = parts[0].strip()
137
+ if preamble:
138
+ chunks.append((preamble[:MAX_CHUNK_CHARS], f"@{path_str}"))
139
+
140
+ i = 1
141
+ while i < len(parts) - 1:
142
+ heading_line = parts[i].strip()
143
+ body = parts[i + 1].strip()
144
+ heading_text = re.sub(r"^#{1,4}\s+", "", heading_line)
145
+ full = f"{heading_line}\n{body}".strip() if body else heading_line
146
+ if full:
147
+ chunks.append((full[:MAX_CHUNK_CHARS], f"@{path_str}#{heading_text}"))
148
+ i += 2
149
+
150
+ if not chunks and content.strip():
151
+ chunks.append((content.strip()[:MAX_CHUNK_CHARS], f"@{path_str}"))
152
+ return chunks
153
+
154
+
155
+ class IngestPipeline:
156
+ def __init__(self, db: Optional[sqlite3.Connection] = None, repo_root: Optional[Path] = None):
157
+ self.db = db or get_db()
158
+ self.repo_root = repo_root or Path.cwd()
159
+ self.mem_root = _resolve_memory_root(self.repo_root)
160
+
161
+ def detect_changes(self) -> list[Path]:
162
+ """Return list of .md files that have changed hash."""
163
+ changed: list[Path] = []
164
+ for p in self.mem_root.glob("**/*.md"):
165
+ if p.name in SKIP_NAMES:
166
+ continue
167
+ if any(skip in p.parts for skip in SKIP_DIRS):
168
+ continue
169
+ try:
170
+ content = p.read_text(encoding="utf-8-sig")
171
+ h = _content_hash(content)
172
+ row = self.db.execute(
173
+ "SELECT hash FROM file_meta WHERE path = ?", (str(p),)
174
+ ).fetchone()
175
+ if not row or row["hash"] != h:
176
+ changed.append(p)
177
+ except OSError:
178
+ pass
179
+ return changed
180
+
181
+ def ingest_file(self, file_path: Path) -> list[MemoryUnit]:
182
+ """Ingest a single file, create/update memory units, return list."""
183
+ content = file_path.read_text(encoding="utf-8-sig")
184
+ h = _content_hash(content)
185
+ path_str = str(file_path)
186
+
187
+ mem_type = _infer_memory_type(path_str)
188
+ authority = AUTHORITY_WEIGHTS.get(mem_type, 0.5)
189
+ time_scope = _infer_time_scope(mem_type)
190
+ sensitivity = _infer_sensitivity(path_str)
191
+ chunks = _chunk_markdown(content, file_path)
192
+
193
+ existing_row = self.db.execute(
194
+ "SELECT unit_id FROM memory_units WHERE source_ref = ?", (path_str,)
195
+ ).fetchone()
196
+
197
+ if existing_row:
198
+ unit_id = existing_row["unit_id"]
199
+ is_new = False
200
+ else:
201
+ unit_id = str(uuid.uuid4())
202
+ is_new = True
203
+
204
+ unit = MemoryUnit(
205
+ unit_id=unit_id,
206
+ source_ref=path_str,
207
+ memory_type=mem_type,
208
+ authority=authority,
209
+ time_scope=time_scope,
210
+ sensitivity=sensitivity,
211
+ entity_tags=[],
212
+ content_hash=h,
213
+ content=content,
214
+ chunks=chunks,
215
+ is_new=is_new,
216
+ )
217
+
218
+ if is_new:
219
+ self.db.execute(
220
+ """
221
+ INSERT INTO memory_units
222
+ (unit_id, source_ref, memory_type, authority, time_scope, sensitivity, entity_tags, content_hash)
223
+ VALUES (?, ?, ?, ?, ?, ?, '[]', ?)
224
+ """,
225
+ (unit_id, path_str, mem_type, authority, time_scope, sensitivity, h),
226
+ )
227
+ else:
228
+ self.db.execute(
229
+ """
230
+ UPDATE memory_units
231
+ SET memory_type=?, authority=?, time_scope=?, sensitivity=?,
232
+ content_hash=?, updated_at=unixepoch('now')
233
+ WHERE unit_id=?
234
+ """,
235
+ (mem_type, authority, time_scope, sensitivity, h, unit_id),
236
+ )
237
+
238
+ self.db.execute(
239
+ "INSERT OR REPLACE INTO file_meta(path, hash, chunk_count, updated_at) VALUES (?,?,?,unixepoch('now'))",
240
+ (path_str, h, len(chunks)),
241
+ )
242
+ self.db.commit()
243
+ return [unit]
244
+
245
+ def update_entity_tags(self, unit: MemoryUnit, entity_ids: list[str]) -> None:
246
+ """Persist resolved entity tags back to the unit row."""
247
+ unit.entity_tags = entity_ids
248
+ self.db.execute(
249
+ "UPDATE memory_units SET entity_tags=?, updated_at=unixepoch('now') WHERE unit_id=?",
250
+ (json.dumps(entity_ids), unit.unit_id),
251
+ )
252
+ self.db.commit()
@@ -0,0 +1,254 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ lifecycle_engine.py - Autonomous fact lifecycle engine.
4
+
5
+ Decides ADD / UPDATE / DEPRECATE / NOOP for each memory unit based on
6
+ similarity to existing facts, freshness, and contradiction detection.
7
+ All decisions are logged to lifecycle_events for full auditability.
8
+
9
+ No human required: transitions happen automatically on every ingest cycle.
10
+ """
11
+ import hashlib
12
+ import json
13
+ import os
14
+ import re
15
+ import sqlite3
16
+ import uuid
17
+ from dataclasses import dataclass
18
+ from datetime import datetime, timezone
19
+ from pathlib import Path
20
+ from typing import Optional
21
+
22
+ from autonomy.schema import get_db
23
+ from autonomy.ingest_pipeline import MemoryUnit
24
+
25
+ DEPRECATION_SIMILARITY_THRESHOLD = 0.85
26
+ PROMOTE_STABILITY_CYCLES = 3 # fact must appear N cycles before lesson promotion
27
+ NOOP_HASH_MATCH = True # if content_hash unchanged, always NOOP
28
+
29
+
30
+ @dataclass
31
+ class LifecycleDecision:
32
+ operation: str # ADD | UPDATE | DEPRECATE | NOOP
33
+ unit_id: str
34
+ fact_id: Optional[str]
35
+ reason: str
36
+ confidence: float = 1.0
37
+
38
+
39
+ def _extract_key_facts(content: str, memory_type: str) -> list[str]:
40
+ """
41
+ Heuristic extraction of canonical facts from content.
42
+ Returns list of short declarative sentences.
43
+ """
44
+ facts = []
45
+
46
+ # Hot rules / lessons: extract bullet points as facts
47
+ if memory_type in ("core", "procedural"):
48
+ for m in re.finditer(r"^[-*]\s+(.+)", content, re.MULTILINE):
49
+ fact = m.group(1).strip()
50
+ if len(fact) > 10:
51
+ facts.append(fact)
52
+
53
+ # Journal / active-context: extract decision lines
54
+ if memory_type in ("episodic",):
55
+ for m in re.finditer(r"(decided|confirmed|fixed|added|removed|changed):\s*(.+)", content, re.IGNORECASE):
56
+ facts.append(m.group(0).strip())
57
+
58
+ # Generic: extract first sentence of each section
59
+ for m in re.finditer(r"^#{1,4}\s+(.+)\n+(.*?)(?:\n|$)", content, re.MULTILINE):
60
+ heading = m.group(1).strip()
61
+ body = m.group(2).strip()
62
+ if body:
63
+ facts.append(f"{heading}: {body}")
64
+
65
+ return facts[:20] # cap to prevent runaway
66
+
67
+
68
+ def _simple_similarity(a: str, b: str) -> float:
69
+ """Token Jaccard similarity for contradiction detection (no embeddings needed)."""
70
+ ta = set(re.findall(r"\w+", a.lower()))
71
+ tb = set(re.findall(r"\w+", b.lower()))
72
+ if not ta or not tb:
73
+ return 0.0
74
+ return len(ta & tb) / len(ta | tb)
75
+
76
+
77
+ def _resolve_lessons_dir(repo_root: Path) -> Path:
78
+ override = os.getenv("MNEMO_MEMORY_ROOT", "").strip()
79
+ if override:
80
+ return Path(override).expanduser().resolve() / "lessons"
81
+
82
+ candidates = [
83
+ repo_root / ".mnemo" / "memory" / "lessons",
84
+ repo_root / ".cursor" / "memory" / "lessons",
85
+ ]
86
+ for candidate in candidates:
87
+ if candidate.exists():
88
+ return candidate
89
+ return candidates[0]
90
+
91
+
92
+ class LifecycleEngine:
93
+ def __init__(self, db: Optional[sqlite3.Connection] = None):
94
+ self.db = db or get_db()
95
+
96
+ def process(self, unit: MemoryUnit) -> LifecycleDecision:
97
+ """
98
+ Process a memory unit through the lifecycle state machine.
99
+ Emits a lifecycle_event and returns the decision.
100
+ """
101
+ existing_hash = self.db.execute(
102
+ "SELECT content_hash FROM memory_units WHERE unit_id = ?",
103
+ (unit.unit_id,),
104
+ ).fetchone()
105
+
106
+ if existing_hash and NOOP_HASH_MATCH and existing_hash["content_hash"] == unit.content_hash:
107
+ decision = LifecycleDecision("NOOP", unit.unit_id, None, "content_hash_unchanged")
108
+ self._log_event(decision)
109
+ return decision
110
+
111
+ facts = _extract_key_facts(unit.content, unit.memory_type)
112
+ if not facts:
113
+ decision = LifecycleDecision("NOOP", unit.unit_id, None, "no_extractable_facts")
114
+ self._log_event(decision)
115
+ return decision
116
+
117
+ existing_facts = self.db.execute(
118
+ "SELECT fact_id, canonical_fact, status, confidence FROM facts WHERE source_ref = ?",
119
+ (unit.source_ref,),
120
+ ).fetchall()
121
+
122
+ # Check for contradiction / supersession in global facts
123
+ contradictions = self._detect_contradictions(facts)
124
+ for old_fact_id, old_fact_text in contradictions:
125
+ self.db.execute(
126
+ "UPDATE facts SET status='deprecated', updated_at=unixepoch('now') WHERE fact_id=?",
127
+ (old_fact_id,),
128
+ )
129
+ dep_decision = LifecycleDecision(
130
+ "DEPRECATE", unit.unit_id, old_fact_id,
131
+ reason=f"superseded_by_unit:{unit.unit_id}",
132
+ confidence=0.8,
133
+ )
134
+ self._log_event(dep_decision)
135
+
136
+ if existing_facts:
137
+ # UPDATE existing facts from this source
138
+ for ef in existing_facts:
139
+ self.db.execute(
140
+ "UPDATE facts SET status='active', confidence=?, updated_at=unixepoch('now') WHERE fact_id=?",
141
+ (min(ef["confidence"] + 0.05, 1.0), ef["fact_id"]),
142
+ )
143
+ decision = LifecycleDecision("UPDATE", unit.unit_id, existing_facts[0]["fact_id"], "source_file_changed")
144
+ else:
145
+ # ADD new facts
146
+ for fact_text in facts[:5]: # cap facts per unit
147
+ fact_id = str(uuid.uuid4())
148
+ self.db.execute(
149
+ "INSERT INTO facts(fact_id, canonical_fact, status, confidence, source_ref) VALUES (?,?,'active',1.0,?)",
150
+ (fact_id, fact_text, unit.source_ref),
151
+ )
152
+ decision = LifecycleDecision("ADD", unit.unit_id, None, f"new_unit_{len(facts)}_facts_extracted")
153
+
154
+ self._log_event(decision)
155
+ self.db.commit()
156
+ return decision
157
+
158
+ def _detect_contradictions(self, new_facts: list[str]) -> list[tuple[str, str]]:
159
+ """
160
+ Find existing active facts that are semantically contradicted by new_facts.
161
+ Uses token Jaccard with high threshold — low false positive rate is more important than recall.
162
+ """
163
+ contradicted: list[tuple[str, str]] = []
164
+ existing = self.db.execute(
165
+ "SELECT fact_id, canonical_fact FROM facts WHERE status = 'active'"
166
+ ).fetchall()
167
+
168
+ contradiction_patterns = [
169
+ (r"\bdo\s+not\b", r"\bdo\b"),
170
+ (r"\bnever\b", r"\balways\b"),
171
+ (r"\bdisabled\b", r"\benabled\b"),
172
+ ]
173
+
174
+ for ef in existing:
175
+ ef_text = ef["canonical_fact"]
176
+ for new_fact in new_facts:
177
+ sim = _simple_similarity(new_fact, ef_text)
178
+ if sim >= DEPRECATION_SIMILARITY_THRESHOLD:
179
+ for pat_a, pat_b in contradiction_patterns:
180
+ a_in_new = bool(re.search(pat_a, new_fact, re.I))
181
+ b_in_old = bool(re.search(pat_b, ef_text, re.I))
182
+ if a_in_new and b_in_old:
183
+ contradicted.append((ef["fact_id"], ef_text))
184
+ break
185
+ return contradicted
186
+
187
+ def _log_event(self, decision: LifecycleDecision) -> None:
188
+ self.db.execute(
189
+ "INSERT INTO lifecycle_events(event_id, unit_id, operation, reason) VALUES (?,?,?,?)",
190
+ (str(uuid.uuid4()), decision.unit_id, decision.operation, decision.reason),
191
+ )
192
+
193
+ def promote_lessons(self, repo_root: Path) -> list[str]:
194
+ """
195
+ Auto-promote stable repeated signals into lesson files.
196
+ A fact qualifies when: status=active AND confidence >= 0.95
197
+ AND no lesson already covers the source_ref.
198
+ Returns list of created lesson paths.
199
+ """
200
+ candidates = self.db.execute(
201
+ """
202
+ SELECT f.fact_id, f.canonical_fact, f.source_ref, f.confidence
203
+ FROM facts f
204
+ WHERE f.status = 'active' AND f.confidence >= 0.95
205
+ AND f.source_ref NOT LIKE '%lessons/L-%'
206
+ ORDER BY f.confidence DESC
207
+ LIMIT 5
208
+ """
209
+ ).fetchall()
210
+
211
+ promoted = []
212
+ lessons_dir = _resolve_lessons_dir(repo_root)
213
+ lessons_dir.mkdir(parents=True, exist_ok=True)
214
+
215
+ existing = sorted(lessons_dir.glob("L-*.md"))
216
+ next_id = 1
217
+ if existing:
218
+ m = re.match(r"L-(\d+)", existing[-1].name)
219
+ if m:
220
+ next_id = int(m.group(1)) + 1
221
+
222
+ for row in candidates:
223
+ fact_text = row["canonical_fact"][:200]
224
+ lesson_id = f"L-{next_id:03d}"
225
+ slug = re.sub(r"[^a-z0-9]+", "-", fact_text.lower())[:40].strip("-")
226
+ lesson_file = lessons_dir / f"{lesson_id}-{slug}.md"
227
+
228
+ if lesson_file.exists():
229
+ continue
230
+
231
+ today = datetime.now().strftime("%Y-%m-%d")
232
+ content = (
233
+ f"---\nid: {lesson_id}\ntitle: {fact_text[:80]}\nstatus: Active\n"
234
+ f"tags: [Process]\nintroduced: {today}\napplies_to:\n - \"**/*\"\n"
235
+ f"triggers:\n - auto-promoted\nrule: {fact_text[:120]}\n---\n\n"
236
+ f"# {lesson_id} - Auto-Promoted Lesson\n\n"
237
+ f"**Source:** `{row['source_ref']}`\n\n"
238
+ f"**Canonical fact:** {fact_text}\n\n"
239
+ f"> This lesson was auto-promoted by the Mnemo autonomous runner.\n"
240
+ f"> Review and edit the rule to ensure accuracy.\n"
241
+ )
242
+ lesson_file.write_text(content, encoding="utf-8")
243
+ promoted.append(str(lesson_file))
244
+
245
+ # Mark fact as promoted
246
+ self.db.execute(
247
+ "UPDATE facts SET status='promoted', updated_at=unixepoch('now') WHERE fact_id=?",
248
+ (row["fact_id"],),
249
+ )
250
+ next_id += 1
251
+
252
+ if promoted:
253
+ self.db.commit()
254
+ return promoted
@@ -0,0 +1,59 @@
1
+ # Mnemo Memory System - Vault & Sensitivity Policy
2
+ # Loaded by vault_policy.py at runtime.
3
+ # Edit this file to customize sensitivity rules for your project.
4
+
5
+ # Sensitivity classification rules (applied in order: secret > internal > public)
6
+ sensitivity_paths:
7
+ secret:
8
+ - ".mnemo/memory/vault/"
9
+ - ".cursor/memory/vault/"
10
+ - ".env"
11
+ - "*.secret.*"
12
+ - "*credentials*"
13
+ - "*private-key*"
14
+ internal:
15
+ - ".mnemo/memory/active-context.md"
16
+ - ".cursor/memory/active-context.md"
17
+
18
+ # Additional regex redaction patterns (applied on top of built-ins)
19
+ # Built-ins already handle: API keys, bearer tokens, long random strings
20
+ redaction_patterns:
21
+ # Add project-specific patterns here, e.g.:
22
+ # - "sk-[a-zA-Z0-9]{40,}"
23
+ # - "xoxb-[0-9]+-[0-9]+-[a-zA-Z0-9]+"
24
+
25
+ # Roles allowed to see 'internal' sensitivity content
26
+ allow_internal_for_roles:
27
+ - agent
28
+ - autonomous
29
+
30
+ # Maximum sensitivity level allowed in outgoing context packs
31
+ # Options: public | internal
32
+ # Setting 'public' means only public content is included
33
+ max_sensitivity_in_context: internal
34
+
35
+ # Autonomous ingestion: re-run sensitivity classification every N cycles
36
+ reclassify_every_n_cycles: 10
37
+
38
+ # Lifecycle deprecation: minimum confidence before auto-deprecating a fact
39
+ deprecation_confidence_threshold: 0.8
40
+
41
+ # Entity resolution: minimum Jaccard similarity to merge an alias
42
+ alias_merge_threshold: 0.85
43
+
44
+ # Lesson promotion: minimum fact confidence to auto-promote to lesson
45
+ lesson_promotion_threshold: 0.95
46
+
47
+ # Retrieval safety: minimum final_score for results to appear in context packs
48
+ min_retrieval_score: 0.2
49
+
50
+ # Token budgets (in characters, ~4 chars per token)
51
+ token_budget_default: 6000
52
+ token_budget_extended: 12000
53
+
54
+ # Benchmark thresholds (used by CI quality gates)
55
+ benchmark:
56
+ min_hit_at_3: 0.7
57
+ min_ndcg_at_5: 0.65
58
+ max_p95_latency_ms: 2000
59
+ max_token_cost_per_query: 0.005