nexo-brain 7.34.0 → 7.35.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1249 @@
1
+ """SELECTIVE-FORGET (Ola 4) — verifiable hard-forget of revoked secrets.
2
+
3
+ Problem this solves (real incidents): a compromised key/secret (OpenAI key,
4
+ GITHUB_PAT, admin keys pasted into chat) lands in memory. Today a "correction"
5
+ is a SOFT-HIDE (``UPDATE ... is_dormant=1``) that does NOT fire the FTS
6
+ ``AFTER DELETE`` trigger, so the secret text stays grep-able in the FTS index.
7
+ This module lets us forget it *for real* and *verifiably*.
8
+
9
+ Two modes, NEVER mixed (central security principle):
10
+
11
+ * ``mode='secret'`` (HARD-FORGET): real PHYSICAL removal + verified-to-zero.
12
+ ONLY for revoked secrets/credentials or data flagged toxic. Here we
13
+ deliberately break the anti-loss discipline — the goal is that the secret
14
+ disappears from *every* surface.
15
+ * ``mode='fact'`` (CORRECT-FACT): useful memory is NOT lost — it keeps the
16
+ existing reversible supersede (``item_history`` / soft-supersede). The forget
17
+ engine does NOT physically delete here.
18
+
19
+ The golden rule: destructive forget only arms with ``mode='secret'`` +
20
+ mandatory dry-run + explicit ``confirm``. Fact correction stays on the soft path.
21
+
22
+ ────────────────────────────────────────────────────────────────────────────
23
+ SECURITY-CRITICAL DESIGN (the fix): COVERAGE BY INTROSPECTION, NOT A CURATED LIST
24
+ ────────────────────────────────────────────────────────────────────────────
25
+ The previous version hard-coded ~13 of the ~110 tables. A secret in any of the
26
+ other ~97 (``item_history``, ``diary_archive``, ``historical_diary_index``,
27
+ ``memory_events``, ``continuity_snapshots``, ``session_checkpoints``,
28
+ ``session_diary_draft``, ``transcript_index``, FTS shadow tables, legacy shadow
29
+ DBs …) reported ``complete=True`` while the secret SURVIVED grep-able. A secret
30
+ that "was deleted" but is still there is worse than not deleting it.
31
+
32
+ The guarantee is now structural and non-negotiable: **if
33
+ ``verification.complete is True`` the secret is NOT grep-able in ANY column of
34
+ ANY table of ANY LIVE DB the agent retrieves from, in ANY FTS index, in any
35
+ on-disk transcript, or in any legacy shadow DB.** We achieve this by
36
+ *enumerating* every table and every column at runtime (``sqlite_master`` +
37
+ ``PRAGMA table_info``) instead of trusting a curated registry. A residual in a
38
+ table nobody anticipated is both *cleaned* and, if cleaning failed, *reported*
39
+ as ``complete=False``.
40
+
41
+ The set of LIVE DBs is itself discovered from each subsystem's own canonical
42
+ path resolver, NOT hardcoded (round-3 fix — the previous version covered only
43
+ nexo.db + cognitive.db and silently missed local-context.db, where a secret in
44
+ any indexed file lands in ``local_chunks``/``local_chunks_fts``/
45
+ ``entity_facts.value``/``local_entities.evidence`` and survives forget). The
46
+ live set is:
47
+ * nexo.db — ``db._core`` / ``paths.resolve_db_path``
48
+ * cognitive.db — ``cognitive_paths.resolve_cognitive_db``
49
+ * local-context.db — ``local_context.db.local_context_db_path``
50
+ * local-context-usage.db — ``local_context.usage_events.usage_db_path``
51
+ * nexo-email.db — ``email_sent_events.sent_email_db_path``
52
+ plus legacy shadows of cognitive.db and local-context.db.
53
+
54
+ HONEST BACKUP SCOPE: ``complete=True`` is a statement about LIVE DBs only.
55
+ Point-in-time backups/snapshots (``*.bak``, ``runtime/backups``, deep-sleep
56
+ snapshots) are NOT swept — they are retention copies, and the real mitigation
57
+ for a compromised secret is to ROTATE it (operator action). Every report
58
+ carries this note (``backup_scope``) so the guarantee never overclaims.
59
+
60
+ Strategy per surface:
61
+ * REDACT-IN-PLACE (default for every introspected table): replace the secret
62
+ substring with a placeholder so the surrounding useful record (a diary
63
+ entry, ``item_history.note``, ``change_log`` row …) survives without the
64
+ secret. This is uniform and safe for tables we did not anticipate.
65
+ * DELETE-ROW (explicit allow-list): drop the whole row where the row *is* the
66
+ secret container or carries a vector embedding / FTS copy that must vanish
67
+ with it (``stm/ltm/quarantine`` + embedding, ``memory_observations`` + FTS,
68
+ ``kg_nodes``/``kg_edges``, ``somatic_markers``, ``resolution_cache``
69
+ ``content_snapshot_json`` leak, ``hot_context``, ``recent_events``).
70
+ * FTS5 virtual tables are scrubbed by mutating the *parent* vtable directly
71
+ (FTS5 maintains its ``_content``/``_docsize``/``_idx`` shadow tables on
72
+ UPDATE/DELETE — verified). Standalone FTS5 does NOT support 'rebuild', so we
73
+ never rely on it; we redact/delete the FTS rows in place.
74
+ * Vector embeddings ride with their row (DELETE removes them); persisted HNSW
75
+ indices are invalidated so a later search cannot reload forgotten vectors.
76
+ * On-disk transcripts: matching lines are redacted in place.
77
+ * Legacy shadow DBs — cognitive (``cognitive_paths.legacy_cognitive_db_paths``)
78
+ AND local-context (``_local_context_shadow_paths``) — are swept with the
79
+ same introspective engine; a secret in a shadow is a leak.
80
+
81
+ VERIFICATION is honest: it RE-ENUMERATES and re-scans everything (no reuse of
82
+ the curated list) and only returns ``complete=True`` at total zero, otherwise
83
+ ``complete=False`` with the exact surviving locations (namespaced ``<db>.<table>``).
84
+ """
85
+
86
+ from __future__ import annotations
87
+
88
+ import json
89
+ import os
90
+ import re
91
+ import sqlite3
92
+ import time
93
+ from dataclasses import dataclass, field
94
+ from pathlib import Path
95
+ from typing import Any, Iterable, Optional
96
+
97
+
98
+ # HONEST SCOPE — point-in-time BACKUPS / snapshots are deliberately NOT swept.
99
+ # ``complete=True`` means the secret is gone from every LIVE DB the agent
100
+ # retrieves from; it does NOT touch ``*.bak`` files, dated backups under
101
+ # runtime/backups, or deep-sleep snapshots. Those are retention copies; the real
102
+ # mitigation for a compromised secret is to ROTATE it (operator action), so
103
+ # scrubbing immutable backups would give a false sense of safety AND fight the
104
+ # backups' purpose. This note is surfaced in every report for transparency.
105
+ _BACKUP_SCOPE_NOTE = (
106
+ "complete=True means the secret is no longer RETRIEVABLE (row/column value) "
107
+ "from any LIVE DB. Out of scope, by design: (1) point-in-time backups / "
108
+ "snapshots (*.bak, runtime/backups, deep-sleep snapshots) — retention copies; "
109
+ "(2) the external claude-mem plugin DB (batch-read only, not an answer-time "
110
+ "retrieval surface). Byte-level note: the sweep runs with PRAGMA "
111
+ "secure_delete=ON so pages freed by the delete are zeroed, but a full "
112
+ "historical VACUUM is NOT forced (cost on the ~20GB local index), so isolated "
113
+ "byte residue may persist in free pages until a VACUUM. The real mitigation "
114
+ "for a COMPROMISED secret is always to ROTATE it."
115
+ )
116
+
117
+
118
+ # ─────────────────────────────────────────────────────────────────────────────
119
+ # DB handles — the FULL set of LIVE memory DBs the agent RETRIEVES from
120
+ # ─────────────────────────────────────────────────────────────────────────────
121
+ #
122
+ # COVERAGE PRINCIPLE (round-3 fix): the security guarantee is
123
+ # "complete=True ⇒ the secret is not grep-able in any LIVE DB". A LIVE DB is one
124
+ # the cooperator READS / RETRIEVES from at answer/act time, so a secret indexed
125
+ # into it would resurface. We do NOT hardcode the set of DBs — we ask each
126
+ # subsystem's own canonical path resolver, so this set tracks the runtime:
127
+ #
128
+ # * nexo.db — db._core (paths.resolve_db_path)
129
+ # * cognitive.db — cognitive_paths.resolve_cognitive_db
130
+ # * local-context.db — local_context.db.local_context_db_path (the
131
+ # ~20GB local file index: local_chunks/_fts,
132
+ # local_entities.evidence, entity_facts.value, …)
133
+ # * local-context-usage.db — local_context.usage_events.usage_db_path
134
+ # (query telemetry: intent/error/metadata free text)
135
+ # * nexo-email.db — email_sent_events.sent_email_db_path
136
+ # (sent/inbound bodies the agent recalls)
137
+ #
138
+ # This is exactly the live-retrieval inventory used by saved_not_used_audit.
139
+ # Other ~.db files under runtime are NOT live retrieval surfaces and are out of
140
+ # scope on purpose: backups/snapshots (retention — see _backup_scope_note),
141
+ # personal_scripts.db (a script *registry*, not memory text), and
142
+ # auto_capture_dedup.db (dedup *hashes*, never raw secrets). Each is documented
143
+ # so "live-DB coverage" is an explicit, auditable claim — not an accident of
144
+ # which resolver happened to be wired.
145
+ #
146
+ # Legacy SHADOWS of cognitive.db and local-context.db are swept too (a secret in
147
+ # a shadow is still a leak); see _shadow_db_paths.
148
+
149
+
150
+ def _nexo_conn() -> sqlite3.Connection:
151
+ """nexo.db connection (learnings, change_log, decisions, observations…)."""
152
+ import db as _db
153
+
154
+ return _db.get_db()
155
+
156
+
157
+ def _cognitive_conn() -> sqlite3.Connection:
158
+ """cognitive.db connection (stm/ltm, resolution_cache, KG, somatic…)."""
159
+ import cognitive
160
+
161
+ return cognitive._get_db()
162
+
163
+
164
+ def _local_context_conn() -> sqlite3.Connection:
165
+ """local-context.db connection (local_chunks/_fts, entities, facts, …)."""
166
+ import local_context.db as _lc
167
+
168
+ return _lc.get_local_context_db()
169
+
170
+
171
+ def _local_context_usage_path() -> Optional[Path]:
172
+ try:
173
+ import local_context.usage_events as _ue
174
+
175
+ return _ue.usage_db_path()
176
+ except Exception:
177
+ return None
178
+
179
+
180
+ def _email_db_path() -> Optional[Path]:
181
+ try:
182
+ import email_sent_events as _ee
183
+
184
+ return _ee.sent_email_db_path()
185
+ except Exception:
186
+ return None
187
+
188
+
189
+ def _open_file_db(path: Path) -> Optional[sqlite3.Connection]:
190
+ """Open an on-disk SQLite DB by path (used for the usage + email stores,
191
+ whose owning modules connect per-call rather than caching one handle)."""
192
+ try:
193
+ if not path.exists():
194
+ return None
195
+ conn = sqlite3.connect(str(path), timeout=30, check_same_thread=False)
196
+ conn.row_factory = sqlite3.Row
197
+ try:
198
+ conn.execute("PRAGMA busy_timeout=30000")
199
+ # Zero out pages freed by the forget deletes so the secret bytes do
200
+ # not linger in the freelist of the on-disk file (forensic residue).
201
+ conn.execute("PRAGMA secure_delete=ON")
202
+ except Exception:
203
+ pass
204
+ return conn
205
+ except Exception:
206
+ return None
207
+
208
+
209
+ def _live_conns() -> list[tuple[str, sqlite3.Connection, bool]]:
210
+ """Every LIVE memory DB as ``(name, connection, owns_connection)``.
211
+
212
+ ``owns_connection`` is True for path-opened handles (usage/email) that the
213
+ caller must close; cached subsystem handles (nexo/cognitive/local-context)
214
+ are owned by their module and must NOT be closed here.
215
+ """
216
+ conns: list[tuple[str, sqlite3.Connection, bool]] = []
217
+ # Cached, module-owned handles.
218
+ for name, getter in (
219
+ ("nexo", _nexo_conn),
220
+ ("cognitive", _cognitive_conn),
221
+ ("local-context", _local_context_conn),
222
+ ):
223
+ try:
224
+ conns.append((name, getter(), False))
225
+ except Exception:
226
+ pass
227
+ # Path-opened, caller-owned handles.
228
+ for name, path_getter in (
229
+ ("local-context-usage", _local_context_usage_path),
230
+ ("email", _email_db_path),
231
+ ):
232
+ try:
233
+ path = path_getter()
234
+ except Exception:
235
+ path = None
236
+ if not path:
237
+ continue
238
+ conn = _open_file_db(Path(path))
239
+ if conn is not None:
240
+ conns.append((name, conn, True))
241
+ return conns
242
+
243
+
244
+ def _close_if_owned(conns: Iterable[tuple[str, sqlite3.Connection, bool]]) -> None:
245
+ for _name, conn, owns in conns:
246
+ if owns:
247
+ try:
248
+ conn.close()
249
+ except Exception:
250
+ pass
251
+
252
+
253
+ def _both_conns() -> list[tuple[str, sqlite3.Connection]]:
254
+ """Back-compat shim: name+connection pairs over ALL live DBs.
255
+
256
+ Kept (despite the misleading name) so older callers/tests keep working; it
257
+ now spans every live DB, not just nexo+cognitive. Path-opened handles are
258
+ intentionally left open for the lifetime of the caller's scan loop; callers
259
+ that mutate should prefer _live_conns() to close owned handles afterwards.
260
+ """
261
+ return [(name, conn) for name, conn, _owns in _live_conns()]
262
+
263
+
264
+ def _now_epoch() -> float:
265
+ try:
266
+ import db as _db
267
+
268
+ return float(_db.now_epoch())
269
+ except Exception:
270
+ return time.time()
271
+
272
+
273
+ # ─────────────────────────────────────────────────────────────────────────────
274
+ # MATCHER — reuse the redact regexes from cognitive._core, INVERTED to a
275
+ # predicate, plus the literal value(s) to forget.
276
+ # ─────────────────────────────────────────────────────────────────────────────
277
+
278
+
279
+ def _redact_detect_patterns() -> list[re.Pattern]:
280
+ """Return the compiled detection regexes drawn from the redact table."""
281
+ try:
282
+ from cognitive._core import _REDACT_PATTERNS
283
+
284
+ return [pattern for pattern, _replacement in _REDACT_PATTERNS]
285
+ except Exception:
286
+ return [
287
+ re.compile(r"sk-[a-zA-Z0-9_\-]{20,}"),
288
+ re.compile(r"ghp_[a-zA-Z0-9]{20,}"),
289
+ re.compile(r"gho_[a-zA-Z0-9]{20,}"),
290
+ re.compile(r"shpat_[a-f0-9]{20,}"),
291
+ re.compile(r"AKIA[A-Z0-9]{16}"),
292
+ re.compile(r"xox[bp]-[a-zA-Z0-9\-]{20,}"),
293
+ ]
294
+
295
+
296
+ @dataclass
297
+ class ForgetMatcher:
298
+ """Predicate ``matches(text) -> bool`` for the value(s) being forgotten.
299
+
300
+ Built from the explicit literal value(s) (case-sensitive substring) and,
301
+ optionally, the secret-shaped detection regexes (``use_regex=True``).
302
+ ``use_regex`` defaults to False so a plain fact value can never trip a
303
+ generic secret pattern.
304
+ """
305
+
306
+ literals: list[str] = field(default_factory=list)
307
+ use_regex: bool = False
308
+ _regexes: list[re.Pattern] = field(default_factory=list, init=False)
309
+
310
+ def __post_init__(self) -> None:
311
+ self.literals = [str(v) for v in self.literals if str(v or "").strip()]
312
+ if self.use_regex:
313
+ self._regexes = _redact_detect_patterns()
314
+
315
+ def matches(self, text: Optional[str]) -> bool:
316
+ if not text:
317
+ return False
318
+ for literal in self.literals:
319
+ if literal and literal in text:
320
+ return True
321
+ for rx in self._regexes:
322
+ if rx.search(text):
323
+ return True
324
+ return False
325
+
326
+ def redact(self, text: Optional[str]) -> str:
327
+ """Replace every matched span with a placeholder.
328
+
329
+ Literals are removed first; regex hits use the engine's own redactor so
330
+ the result stays consistent with ingest-time redaction.
331
+ """
332
+ if not text:
333
+ return text or ""
334
+ result = text
335
+ for literal in self.literals:
336
+ if literal:
337
+ result = result.replace(literal, "[REDACTED:forgotten]")
338
+ if self._regexes:
339
+ try:
340
+ from cognitive._core import redact_secrets
341
+
342
+ result = redact_secrets(result)
343
+ except Exception:
344
+ for rx in self._regexes:
345
+ result = rx.sub("[REDACTED:secret]", result)
346
+ return result
347
+
348
+
349
+ # ─────────────────────────────────────────────────────────────────────────────
350
+ # INTROSPECTION — enumerate EVERY table + text-bearing column of a connection
351
+ # ─────────────────────────────────────────────────────────────────────────────
352
+
353
+ # FTS5 shadow internal tables are maintained automatically by the parent vtable;
354
+ # never mutate them directly (and they hold no plaintext beyond what the parent
355
+ # already exposes for verification, except *_content which the parent cleans).
356
+ _FTS_SHADOW_SUFFIXES = ("_data", "_idx", "_docsize", "_config", "_content")
357
+
358
+ # Columns we never treat as text targets: pure binary embeddings/vectors. They
359
+ # ride with their row on DELETE and never carry a plaintext secret.
360
+ _BLOB_COLUMN_HINTS = ("embedding", "vector", "blob")
361
+
362
+
363
+ @dataclass
364
+ class TableInfo:
365
+ name: str
366
+ is_fts5: bool
367
+ pk: str # primary-key column name ('rowid' fallback) for row identification
368
+ text_columns: tuple[str, ...]
369
+ has_embedding: bool
370
+
371
+
372
+ def _quote(ident: str) -> str:
373
+ return '"' + ident.replace('"', '""') + '"'
374
+
375
+
376
+ def _is_fts5_sql(sql: Optional[str]) -> bool:
377
+ return bool(sql) and "using fts5" in sql.lower()
378
+
379
+
380
+ def _looks_textual(declared_type: str) -> bool:
381
+ """Whether a declared column type may hold a string in SQLite.
382
+
383
+ SQLite is dynamically typed, so we scan generously: any column whose
384
+ declared affinity is not strictly INTEGER/REAL/BLOB is scanned. Numeric and
385
+ blob columns are skipped for performance (a secret string cannot live there
386
+ as a grep-able substring under TEXT affinity)."""
387
+ t = (declared_type or "").upper()
388
+ if not t:
389
+ return True # NONE affinity → can hold text
390
+ if "INT" in t:
391
+ return False
392
+ if any(b in t for b in ("BLOB",)):
393
+ return False
394
+ if any(r in t for r in ("REAL", "FLOA", "DOUB")):
395
+ return False
396
+ return True # TEXT, CHAR, CLOB, NUMERIC, DATE, JSON, … → scan it
397
+
398
+
399
+ def _introspect_tables(conn: sqlite3.Connection) -> list[TableInfo]:
400
+ """Enumerate every base table and FTS5 vtable with its text-bearing columns.
401
+
402
+ No curated list: this reflects the *actual* schema of the live DB, so a
403
+ table that did not exist when this module was written is still covered.
404
+ """
405
+ out: list[TableInfo] = []
406
+ try:
407
+ rows = conn.execute(
408
+ "SELECT name, sql FROM sqlite_master WHERE type='table'"
409
+ ).fetchall()
410
+ except Exception:
411
+ return out
412
+
413
+ fts_names = {r[0] for r in rows if _is_fts5_sql(r[1])}
414
+
415
+ for name, sql in rows:
416
+ if name.startswith("sqlite_"):
417
+ continue
418
+ # Skip FTS5 shadow internal tables — the parent vtable owns them.
419
+ if any(name.endswith(sfx) for sfx in _FTS_SHADOW_SUFFIXES):
420
+ base = name.rsplit("_", 1)[0]
421
+ if base in fts_names:
422
+ continue
423
+ is_fts5 = name in fts_names
424
+ try:
425
+ cols = conn.execute(f"PRAGMA table_info({_quote(name)})").fetchall()
426
+ except Exception:
427
+ continue
428
+
429
+ text_cols: list[str] = []
430
+ pk_col = ""
431
+ has_embedding = False
432
+ for col in cols:
433
+ # PRAGMA table_info → (cid, name, type, notnull, dflt, pk)
434
+ col_name = col[1]
435
+ col_type = col[2] or ""
436
+ col_pk = col[5]
437
+ lname = str(col_name).lower()
438
+ if any(h in lname for h in _BLOB_COLUMN_HINTS) or "blob" in col_type.lower():
439
+ has_embedding = has_embedding or ("embed" in lname or "vector" in lname)
440
+ continue
441
+ if col_pk and not pk_col:
442
+ pk_col = col_name
443
+ if _looks_textual(col_type):
444
+ text_cols.append(col_name)
445
+
446
+ if not pk_col:
447
+ pk_col = "rowid" # FTS5 + WITHOUT-ROWID-free tables expose rowid
448
+
449
+ out.append(
450
+ TableInfo(
451
+ name=name,
452
+ is_fts5=is_fts5,
453
+ pk=pk_col,
454
+ text_columns=tuple(text_cols),
455
+ has_embedding=has_embedding,
456
+ )
457
+ )
458
+ return out
459
+
460
+
461
+ # ─────────────────────────────────────────────────────────────────────────────
462
+ # Mutation policy — DELETE-ROW allow-list vs default REDACT-IN-PLACE
463
+ # ─────────────────────────────────────────────────────────────────────────────
464
+
465
+ # Tables where a matching row should be DELETED whole (the row *is* the secret
466
+ # container, or it carries a vector embedding / dedicated FTS copy that must
467
+ # vanish with it). Everything else is REDACTED in place so surrounding useful
468
+ # memory survives. Unknown/introspected tables → redact (safe default).
469
+ _DELETE_ROW_TABLES: frozenset[str] = frozenset({
470
+ # cognitive.db — carry embeddings and/or are pure memory rows
471
+ "stm_memories",
472
+ "ltm_memories",
473
+ "quarantine",
474
+ "kg_nodes",
475
+ "kg_edges",
476
+ "somatic_markers",
477
+ # nexo.db — the row's reason to exist is the (now toxic) content
478
+ "memory_observations", # has linked FTS + entities
479
+ "resolution_cache", # content_snapshot_json leak the soft invalidate() never clears
480
+ "hot_context",
481
+ "recent_events",
482
+ })
483
+
484
+
485
+ def _should_delete_row(table: str) -> bool:
486
+ return table in _DELETE_ROW_TABLES
487
+
488
+
489
+ # ─────────────────────────────────────────────────────────────────────────────
490
+ # Per-DB scan / sweep primitives
491
+ # ─────────────────────────────────────────────────────────────────────────────
492
+
493
+
494
+ def _row_value_str(row: Any, idx: int) -> str:
495
+ try:
496
+ v = row[idx]
497
+ except Exception:
498
+ return ""
499
+ if v is None:
500
+ return ""
501
+ if isinstance(v, bytes):
502
+ try:
503
+ return v.decode("utf-8", "ignore")
504
+ except Exception:
505
+ return ""
506
+ return str(v)
507
+
508
+
509
+ def _scan_db(conn: sqlite3.Connection, matcher: ForgetMatcher) -> dict[str, int]:
510
+ """Return {table: matching_row_count} across ALL introspected tables.
511
+
512
+ This is the grep-equivalent: it scans every text column of every table
513
+ (including FTS5 vtables, which expose their stored content)."""
514
+ residual: dict[str, int] = {}
515
+ for ti in _introspect_tables(conn):
516
+ if not ti.text_columns:
517
+ continue
518
+ col_list = ", ".join(_quote(c) for c in ti.text_columns)
519
+ try:
520
+ rows = conn.execute(f"SELECT {col_list} FROM {_quote(ti.name)}").fetchall()
521
+ except Exception:
522
+ continue
523
+ count = 0
524
+ for row in rows:
525
+ blob = " ".join(_row_value_str(row, i) for i in range(len(ti.text_columns)))
526
+ if matcher.matches(blob):
527
+ count += 1
528
+ if count:
529
+ residual[ti.name] = count
530
+ return residual
531
+
532
+
533
+ def _sweep_db(conn: sqlite3.Connection, matcher: ForgetMatcher) -> dict[str, dict[str, int]]:
534
+ """Physically remove the secret from every table of a connection.
535
+
536
+ Returns {"deleted": {table: n}, "redacted": {table: n}}.
537
+ For each introspected table:
538
+ * DELETE-ROW tables / FTS5 vtables → delete matching rows.
539
+ * everything else → redact the secret substring in place (literal first,
540
+ then regex via the matcher's redactor), preserving the row.
541
+ """
542
+ deleted: dict[str, int] = {}
543
+ redacted: dict[str, int] = {}
544
+
545
+ for ti in _introspect_tables(conn):
546
+ if not ti.text_columns:
547
+ continue
548
+ col_list = ", ".join(_quote(c) for c in ti.text_columns)
549
+ # Read pk + text columns together so we can target exact rows.
550
+ sel_cols = ti.text_columns
551
+ pk = ti.pk
552
+ try:
553
+ rows = conn.execute(
554
+ f"SELECT {pk if pk == 'rowid' else _quote(pk)}, {col_list} "
555
+ f"FROM {_quote(ti.name)}"
556
+ ).fetchall()
557
+ except Exception:
558
+ # Some FTS5 vtables reject SELECT of rowid alias differently; retry plain.
559
+ try:
560
+ rows = conn.execute(
561
+ f"SELECT rowid, {col_list} FROM {_quote(ti.name)}"
562
+ ).fetchall()
563
+ pk = "rowid"
564
+ except Exception:
565
+ continue
566
+
567
+ delete_ids: list[Any] = []
568
+ redact_targets: list[tuple[Any, dict[str, str]]] = []
569
+
570
+ for row in rows:
571
+ row_pk = row[0]
572
+ values = {ti.text_columns[i]: _row_value_str(row, i + 1) for i in range(len(ti.text_columns))}
573
+ blob = " ".join(values.values())
574
+ if not matcher.matches(blob):
575
+ continue
576
+ if ti.is_fts5 or _should_delete_row(ti.name):
577
+ delete_ids.append(row_pk)
578
+ else:
579
+ new_vals = {}
580
+ for col, val in values.items():
581
+ if val and matcher.matches(val):
582
+ new_vals[col] = matcher.redact(val)
583
+ if new_vals:
584
+ redact_targets.append((row_pk, new_vals))
585
+ else:
586
+ # Match spanned the concatenation only (rare) — delete to be safe.
587
+ delete_ids.append(row_pk)
588
+
589
+ # Apply deletes.
590
+ if delete_ids:
591
+ pkref = "rowid" if pk == "rowid" else _quote(pk)
592
+ n = 0
593
+ for chunk_start in range(0, len(delete_ids), 500):
594
+ chunk = delete_ids[chunk_start:chunk_start + 500]
595
+ ph = ",".join("?" * len(chunk))
596
+ try:
597
+ cur = conn.execute(
598
+ f"DELETE FROM {_quote(ti.name)} WHERE {pkref} IN ({ph})", chunk
599
+ )
600
+ n += int(cur.rowcount or 0)
601
+ except Exception:
602
+ pass
603
+ if n:
604
+ deleted[ti.name] = n
605
+
606
+ # Apply redactions (one UPDATE per row, only its matching columns).
607
+ if redact_targets:
608
+ pkref = "rowid" if pk == "rowid" else _quote(pk)
609
+ n = 0
610
+ for row_pk, new_vals in redact_targets:
611
+ set_clause = ", ".join(f"{_quote(c)} = ?" for c in new_vals)
612
+ params = list(new_vals.values()) + [row_pk]
613
+ try:
614
+ conn.execute(
615
+ f"UPDATE {_quote(ti.name)} SET {set_clause} WHERE {pkref} = ?",
616
+ params,
617
+ )
618
+ n += 1
619
+ except Exception:
620
+ pass
621
+ if n:
622
+ redacted[ti.name] = n
623
+
624
+ try:
625
+ conn.commit()
626
+ except Exception:
627
+ pass
628
+ return {"deleted": deleted, "redacted": redacted}
629
+
630
+
631
+ # ─────────────────────────────────────────────────────────────────────────────
632
+ # Legacy shadow cognitive DBs — a secret in a shadow is a leak
633
+ # ─────────────────────────────────────────────────────────────────────────────
634
+
635
+
636
+ def _local_context_shadow_paths() -> list[Path]:
637
+ """Legacy / alternate-location copies of local-context.db.
638
+
639
+ The canonical store lives at ``paths.memory_dir()/local-context.db``; older
640
+ installs and the personal-brain layout left shadows behind. A secret indexed
641
+ into any of them is still grep-able, so we sweep them like cognitive shadows.
642
+ Only paths that differ from the live store and actually exist are returned.
643
+
644
+ SAFETY: when ``NEXO_LOCAL_CONTEXT_DB`` is explicitly overridden (tests, or an
645
+ operator pinning a custom path), we do NOT scan the ambient ``NEXO_HOME``
646
+ locations — doing so would let a sweep run against an unrelated tree (and in
647
+ tests would touch the operator's real ~/.nexo shadows). Shadows are only
648
+ considered in the live store's OWN directory in that case. This mirrors the
649
+ ``_configured_override`` discipline in cognitive_paths.
650
+ """
651
+ candidates: list[Path] = []
652
+ try:
653
+ import local_context.db as _lc
654
+
655
+ live = _lc.local_context_db_path()
656
+ except Exception:
657
+ live = None
658
+ overridden = bool(os.environ.get("NEXO_LOCAL_CONTEXT_DB", "").strip()) or bool(
659
+ os.environ.get("NEXO_TEST_DB", "").strip()
660
+ )
661
+ if overridden:
662
+ # Only sibling shadows next to the (overridden) live store — never the
663
+ # ambient NEXO_HOME tree, which is unrelated under an override/in tests.
664
+ if live is not None:
665
+ candidates.append(live.with_name("local-context.db.legacy"))
666
+ else:
667
+ try:
668
+ import paths as _paths
669
+
670
+ candidates.extend([
671
+ _paths.brain_dir() / "local-context.db", # personal/brain shadow
672
+ _paths.home() / "memory" / "local-context.db", # legacy flat layout
673
+ _paths.home() / "local-context.db",
674
+ _paths.runtime_dir() / "local-context.db",
675
+ ])
676
+ except Exception:
677
+ pass
678
+ out: list[Path] = []
679
+ seen: set[str] = set()
680
+ for cand in candidates:
681
+ try:
682
+ key = str(cand.resolve())
683
+ live_key = str(live.resolve()) if live else ""
684
+ except Exception:
685
+ key = str(cand)
686
+ live_key = str(live) if live else ""
687
+ if not cand.exists() or key == live_key or key in seen:
688
+ continue
689
+ seen.add(key)
690
+ out.append(cand)
691
+ return out
692
+
693
+
694
+ def _shadow_db_paths() -> list[Path]:
695
+ """All legacy shadow DBs to sweep: cognitive shadows + local-context shadows.
696
+
697
+ A leak in a shadow is a leak. We cover shadows for every live store that has
698
+ historically lived in more than one location (cognitive.db, local-context.db).
699
+ """
700
+ paths_out: list[Path] = []
701
+ try:
702
+ import cognitive_paths
703
+
704
+ paths_out.extend(p for p in cognitive_paths.legacy_cognitive_db_paths() if p.exists())
705
+ except Exception:
706
+ pass
707
+ paths_out.extend(_local_context_shadow_paths())
708
+ # De-dup by resolved path.
709
+ unique: list[Path] = []
710
+ seen: set[str] = set()
711
+ for p in paths_out:
712
+ try:
713
+ key = str(p.resolve())
714
+ except Exception:
715
+ key = str(p)
716
+ if key in seen:
717
+ continue
718
+ seen.add(key)
719
+ unique.append(p)
720
+ return unique
721
+
722
+
723
+ def _open_shadow(path: Path) -> Optional[sqlite3.Connection]:
724
+ try:
725
+ conn = sqlite3.connect(str(path))
726
+ conn.row_factory = sqlite3.Row
727
+ return conn
728
+ except Exception:
729
+ return None
730
+
731
+
732
+ def _scan_shadows(matcher: ForgetMatcher) -> dict[str, dict[str, int]]:
733
+ """Read-only scan of every legacy shadow DB. {path: {table: n}}."""
734
+ out: dict[str, dict[str, int]] = {}
735
+ for path in _shadow_db_paths():
736
+ conn = _open_shadow(path)
737
+ if conn is None:
738
+ continue
739
+ try:
740
+ residual = _scan_db(conn, matcher)
741
+ if residual:
742
+ out[str(path)] = residual
743
+ finally:
744
+ try:
745
+ conn.close()
746
+ except Exception:
747
+ pass
748
+ return out
749
+
750
+
751
+ def _sweep_shadows(matcher: ForgetMatcher) -> dict[str, dict[str, dict[str, int]]]:
752
+ """Clean every legacy shadow DB with the same introspective engine."""
753
+ out: dict[str, dict[str, dict[str, int]]] = {}
754
+ for path in _shadow_db_paths():
755
+ conn = _open_shadow(path)
756
+ if conn is None:
757
+ continue
758
+ try:
759
+ result = _sweep_db(conn, matcher)
760
+ if result["deleted"] or result["redacted"]:
761
+ out[str(path)] = result
762
+ finally:
763
+ try:
764
+ conn.close()
765
+ except Exception:
766
+ pass
767
+ return out
768
+
769
+
770
+ # ─────────────────────────────────────────────────────────────────────────────
771
+ # FTS residual scan (kept as a named helper for callers/tests) — introspective
772
+ # ─────────────────────────────────────────────────────────────────────────────
773
+
774
+
775
+ def _fts_residual_hits(matcher: ForgetMatcher) -> dict[str, int]:
776
+ """Scan every FTS5 surface in ALL live DBs directly for residual matches.
777
+
778
+ Reads the stored FTS content and applies the matcher (the grep-equivalent),
779
+ instead of trusting an FTS MATCH query (which tokenizes and may miss a raw
780
+ secret substring). Discovered by introspection, not a hardcoded list. Keys
781
+ are namespaced ``<db>.<fts_table>`` so a residual is reported by DB+table
782
+ (e.g. ``local-context.local_chunks_fts``) and same-named FTS tables in
783
+ different DBs never collide."""
784
+ residual: dict[str, int] = {}
785
+ conns = _live_conns()
786
+ try:
787
+ for db_name, conn, _owns in conns:
788
+ for ti in _introspect_tables(conn):
789
+ if not ti.is_fts5 or not ti.text_columns:
790
+ continue
791
+ col_list = ", ".join(_quote(c) for c in ti.text_columns)
792
+ try:
793
+ rows = conn.execute(
794
+ f"SELECT {col_list} FROM {_quote(ti.name)}"
795
+ ).fetchall()
796
+ except Exception:
797
+ continue
798
+ count = 0
799
+ for row in rows:
800
+ blob = " ".join(_row_value_str(row, i) for i in range(len(ti.text_columns)))
801
+ if matcher.matches(blob):
802
+ count += 1
803
+ if count:
804
+ residual[f"{db_name}.{ti.name}"] = count
805
+ finally:
806
+ _close_if_owned(conns)
807
+ return residual
808
+
809
+
810
+ # ─────────────────────────────────────────────────────────────────────────────
811
+ # Transcripts on disk (outside SQLite) — scan + redact matching lines
812
+ # ─────────────────────────────────────────────────────────────────────────────
813
+
814
+
815
+ def _transcript_roots() -> list[Path]:
816
+ roots: list[Path] = []
817
+ try:
818
+ import paths as _paths
819
+
820
+ for candidate in (
821
+ _paths.runtime_dir() / "transcripts",
822
+ _paths.runtime_dir() / "coordination",
823
+ ):
824
+ roots.append(candidate)
825
+ except Exception:
826
+ pass
827
+ env_root = os.environ.get("NEXO_TRANSCRIPT_DIR", "").strip()
828
+ if env_root:
829
+ roots.append(Path(env_root))
830
+ seen: set[str] = set()
831
+ unique: list[Path] = []
832
+ for root in roots:
833
+ try:
834
+ key = str(root.resolve())
835
+ except Exception:
836
+ key = str(root)
837
+ if key in seen:
838
+ continue
839
+ seen.add(key)
840
+ unique.append(root)
841
+ return unique
842
+
843
+
844
+ def _transcript_files(roots: Iterable[Path]) -> list[Path]:
845
+ files: list[Path] = []
846
+ for root in roots:
847
+ if not root.exists():
848
+ continue
849
+ if root.is_file():
850
+ files.append(root)
851
+ continue
852
+ for ext in ("*.jsonl", "*.json", "*.txt", "*.md", "*.log"):
853
+ files.extend(root.rglob(ext))
854
+ return files
855
+
856
+
857
+ def _scan_transcripts(matcher: ForgetMatcher, files: list[Path]) -> dict[str, int]:
858
+ """Count matching lines per file without modifying anything."""
859
+ hits: dict[str, int] = {}
860
+ for path in files:
861
+ try:
862
+ text = path.read_text(encoding="utf-8", errors="ignore")
863
+ except Exception:
864
+ continue
865
+ count = sum(1 for line in text.splitlines() if matcher.matches(line))
866
+ if count:
867
+ hits[str(path)] = count
868
+ return hits
869
+
870
+
871
+ def _redact_transcripts(matcher: ForgetMatcher, files: list[Path]) -> dict[str, int]:
872
+ """Redact matching lines in place. Returns redacted-line count per file."""
873
+ redacted: dict[str, int] = {}
874
+ for path in files:
875
+ try:
876
+ original = path.read_text(encoding="utf-8", errors="ignore")
877
+ except Exception:
878
+ continue
879
+ lines = original.splitlines(keepends=True)
880
+ changed = 0
881
+ out: list[str] = []
882
+ for line in lines:
883
+ if matcher.matches(line):
884
+ newline = "\n" if line.endswith("\n") else ""
885
+ core = line[:-1] if newline else line
886
+ out.append(matcher.redact(core) + newline)
887
+ changed += 1
888
+ else:
889
+ out.append(line)
890
+ if changed:
891
+ try:
892
+ path.write_text("".join(out), encoding="utf-8")
893
+ redacted[str(path)] = changed
894
+ except Exception:
895
+ pass
896
+ return redacted
897
+
898
+
899
+ # ─────────────────────────────────────────────────────────────────────────────
900
+ # Ledger — auditable record in memory_corrections (cognitive.db)
901
+ # ─────────────────────────────────────────────────────────────────────────────
902
+
903
+
904
+ def _write_ledger(operation: dict[str, Any]) -> Optional[int]:
905
+ try:
906
+ conn = _cognitive_conn()
907
+ except Exception:
908
+ return None
909
+ try:
910
+ row = conn.execute(
911
+ "SELECT name FROM sqlite_master WHERE type='table' AND name='memory_corrections' LIMIT 1"
912
+ ).fetchone()
913
+ if not row:
914
+ return None
915
+ cur = conn.execute(
916
+ "INSERT INTO memory_corrections (memory_id, store, correction_type, context) "
917
+ "VALUES (?, ?, ?, ?)",
918
+ (0, "forget", operation.get("mode", "secret"), json.dumps(operation, default=str)[:8000]),
919
+ )
920
+ conn.commit()
921
+ return int(cur.lastrowid)
922
+ except Exception:
923
+ return None
924
+
925
+
926
+ # ─────────────────────────────────────────────────────────────────────────────
927
+ # Public engine
928
+ # ─────────────────────────────────────────────────────────────────────────────
929
+
930
+
931
+ def _dry_run_counts(matcher: ForgetMatcher) -> dict[str, Any]:
932
+ """Count matched rows per table (ALL live DBs, by introspection) +
933
+ transcripts + shadow DBs. No mutation."""
934
+ per_store: dict[str, int] = {}
935
+ tables_scanned = 0
936
+ conns = _live_conns()
937
+ try:
938
+ for db_name, conn, _owns in conns:
939
+ residual = _scan_db(conn, matcher)
940
+ tables_scanned += len(_introspect_tables(conn))
941
+ for table, n in residual.items():
942
+ per_store[f"{db_name}.{table}"] = n
943
+ finally:
944
+ _close_if_owned(conns)
945
+ transcript_files = _transcript_files(_transcript_roots())
946
+ transcript_hits = _scan_transcripts(matcher, transcript_files)
947
+ shadow_hits = _scan_shadows(matcher)
948
+ return {
949
+ "per_store": per_store,
950
+ "total_rows": sum(per_store.values()),
951
+ "transcript_hits": transcript_hits,
952
+ "transcript_lines": sum(transcript_hits.values()),
953
+ "shadow_hits": shadow_hits,
954
+ "live_dbs": [name for name, _c, _o in conns],
955
+ "tables_scanned": tables_scanned,
956
+ "coverage": "all-live-dbs-by-introspection",
957
+ "backup_scope": _BACKUP_SCOPE_NOTE,
958
+ }
959
+
960
+
961
+ def verify_forgotten(matcher: ForgetMatcher) -> dict[str, Any]:
962
+ """Re-ENUMERATE and re-scan EVERY table of EVERY LIVE DB + every FTS +
963
+ transcripts + every legacy shadow DB, and assert zero hits.
964
+
965
+ This is the core value: a signed report. ``complete`` is True ONLY when the
966
+ total re-scan is zero across all live DBs (nexo, cognitive, local-context,
967
+ local-context-usage, email). If anything remains — even in a table this
968
+ module did not anticipate — ``complete`` is False and the residual locations
969
+ are listed, namespaced ``<db>.<table>``. Backups are out of scope by design
970
+ (``backup_scope``).
971
+ """
972
+ residual_stores: dict[str, int] = {}
973
+ conns = _live_conns()
974
+ try:
975
+ for db_name, conn, _owns in conns:
976
+ for table, n in _scan_db(conn, matcher).items():
977
+ residual_stores[f"{db_name}.{table}"] = n
978
+ finally:
979
+ _close_if_owned(conns)
980
+
981
+ residual_fts = _fts_residual_hits(matcher)
982
+ transcript_files = _transcript_files(_transcript_roots())
983
+ residual_transcripts = _scan_transcripts(matcher, transcript_files)
984
+ residual_shadows = _scan_shadows(matcher)
985
+
986
+ complete = not (
987
+ residual_stores or residual_fts or residual_transcripts or residual_shadows
988
+ )
989
+ return {
990
+ "complete": complete,
991
+ "residual_stores": residual_stores,
992
+ "residual_fts": residual_fts,
993
+ "residual_transcripts": residual_transcripts,
994
+ "residual_shadows": residual_shadows,
995
+ # Honest scope: complete=True means "out of every LIVE DB", NOT backups.
996
+ "backup_scope": _BACKUP_SCOPE_NOTE,
997
+ }
998
+
999
+
1000
+ def forget(
1001
+ value: str = "",
1002
+ *,
1003
+ values: Optional[list[str]] = None,
1004
+ mode: str = "secret",
1005
+ dry_run: bool = True,
1006
+ confirm: bool = False,
1007
+ use_regex: bool = False,
1008
+ invalidate_hnsw: bool = True,
1009
+ redact_transcripts: bool = True,
1010
+ sweep_shadows: bool = True,
1011
+ reason: str = "",
1012
+ ) -> dict[str, Any]:
1013
+ """Selective-forget engine.
1014
+
1015
+ Destructive deletion only happens when ``mode='secret'`` AND
1016
+ ``dry_run is False`` AND ``confirm is True``.
1017
+ """
1018
+ literals = [v for v in ([value] + (values or [])) if str(v or "").strip()]
1019
+ if not literals:
1020
+ return {"ok": False, "error": "no value(s) provided to forget"}
1021
+
1022
+ mode = (mode or "secret").lower()
1023
+ if mode not in ("secret", "fact"):
1024
+ return {"ok": False, "error": f"unknown mode '{mode}'"}
1025
+
1026
+ # ── CORRECT-FACT: never physically delete. Keep the reversible soft path. ──
1027
+ if mode == "fact":
1028
+ return {
1029
+ "ok": True,
1030
+ "mode": "fact",
1031
+ "destructive": False,
1032
+ "message": (
1033
+ "CORRECT-FACT mode does not physically delete. Useful memory is "
1034
+ "preserved via the existing reversible supersede (item_history / "
1035
+ "soft-supersede). Use nexo_learning_update / supersede_learning to "
1036
+ "correct the fact; the original stays auditable and restorable."
1037
+ ),
1038
+ "values": literals,
1039
+ }
1040
+
1041
+ # ── HARD-FORGET (secret) ──────────────────────────────────────────────────
1042
+ matcher = ForgetMatcher(literals=literals, use_regex=use_regex)
1043
+ counts = _dry_run_counts(matcher)
1044
+
1045
+ # GUARD: destructive secret sweep requires explicit dry_run=False + confirm.
1046
+ if dry_run or not confirm:
1047
+ return {
1048
+ "ok": True,
1049
+ "mode": "secret",
1050
+ "destructive": False,
1051
+ "dry_run": True,
1052
+ "armed": (not dry_run and confirm),
1053
+ "counts": counts,
1054
+ "guard": (
1055
+ "DRY-RUN. To physically delete, call again with dry_run=False AND "
1056
+ "confirm=True. No store was modified."
1057
+ ),
1058
+ }
1059
+
1060
+ # Confirmed destructive path — sweep ALL LIVE DBs by introspection.
1061
+ deleted_per_store: dict[str, int] = {}
1062
+ redacted_per_store: dict[str, int] = {}
1063
+ conns = _live_conns()
1064
+ try:
1065
+ for db_name, conn, _owns in conns:
1066
+ # Zero freed pages on the shared (nexo/cognitive) handles too, so the
1067
+ # secret bytes do not survive in the freelist after the delete.
1068
+ try:
1069
+ conn.execute("PRAGMA secure_delete=ON")
1070
+ except Exception:
1071
+ pass
1072
+ result = _sweep_db(conn, matcher)
1073
+ for table, n in result["deleted"].items():
1074
+ deleted_per_store[f"{db_name}.{table}"] = n
1075
+ for table, n in result["redacted"].items():
1076
+ redacted_per_store[f"{db_name}.{table}"] = n
1077
+ finally:
1078
+ _close_if_owned(conns)
1079
+
1080
+ # Legacy shadow DBs — cognitive + local-context (a secret in a shadow is a leak).
1081
+ shadow_result: dict[str, Any] = {}
1082
+ if sweep_shadows:
1083
+ shadow_result = _sweep_shadows(matcher)
1084
+
1085
+ # HNSW: embeddings were deleted with their rows; drop persisted indices so a
1086
+ # later search cannot reload vectors built from forgotten rows.
1087
+ hnsw_result = {"invalidated": False}
1088
+ if invalidate_hnsw:
1089
+ try:
1090
+ import hnsw_index
1091
+
1092
+ hnsw_index.invalidate("both", remove_persisted=True)
1093
+ hnsw_result = {"invalidated": True, "store": "both", "remove_persisted": True}
1094
+ except Exception as exc:
1095
+ hnsw_result = {"invalidated": False, "error": str(exc)[:200]}
1096
+
1097
+ # Transcripts on disk (outside SQLite).
1098
+ transcript_result: dict[str, int] = {}
1099
+ if redact_transcripts:
1100
+ transcript_files = _transcript_files(_transcript_roots())
1101
+ transcript_result = _redact_transcripts(matcher, transcript_files)
1102
+
1103
+ # VERIFICATION — re-enumerate + re-scan everything; complete only at zero.
1104
+ verification = verify_forgotten(matcher)
1105
+
1106
+ deleted_total = sum(deleted_per_store.values())
1107
+ redacted_total = sum(redacted_per_store.values())
1108
+
1109
+ operation = {
1110
+ "mode": "secret",
1111
+ "reason": reason,
1112
+ "values_count": len(literals),
1113
+ "use_regex": use_regex,
1114
+ "pre_counts": counts,
1115
+ "deleted_per_store": deleted_per_store,
1116
+ "redacted_per_store": redacted_per_store,
1117
+ "deleted_total": deleted_total,
1118
+ "redacted_total": redacted_total,
1119
+ "shadow_swept": shadow_result,
1120
+ "hnsw": hnsw_result,
1121
+ "transcripts_redacted": transcript_result,
1122
+ "verification": verification,
1123
+ "live_dbs": counts.get("live_dbs", []),
1124
+ "backup_scope": _BACKUP_SCOPE_NOTE,
1125
+ "at": _now_epoch(),
1126
+ }
1127
+ ledger_id = _write_ledger(operation)
1128
+
1129
+ return {
1130
+ "ok": True,
1131
+ "mode": "secret",
1132
+ "destructive": True,
1133
+ "deleted_per_store": deleted_per_store,
1134
+ "redacted_per_store": redacted_per_store,
1135
+ "deleted_total": deleted_total,
1136
+ "redacted_total": redacted_total,
1137
+ # Back-compat: callers summed deleted_total; expose affected total too.
1138
+ "affected_total": deleted_total + redacted_total,
1139
+ "shadow_swept": shadow_result,
1140
+ "hnsw": hnsw_result,
1141
+ "transcripts_redacted": transcript_result,
1142
+ "verification": verification,
1143
+ "complete": verification["complete"],
1144
+ # Honest scope + which live DBs were swept (for the operator's report).
1145
+ "live_dbs": counts.get("live_dbs", []),
1146
+ "backup_scope": _BACKUP_SCOPE_NOTE,
1147
+ "ledger_id": ledger_id,
1148
+ }
1149
+
1150
+
1151
+ # ─────────────────────────────────────────────────────────────────────────────
1152
+ # Wiring helpers
1153
+ # ─────────────────────────────────────────────────────────────────────────────
1154
+
1155
+
1156
+ def sweep_revoked_secret(value: str, *, reason: str = "credential_deleted") -> dict[str, Any]:
1157
+ """Auto-trigger entry point for ``nexo_credential_delete``.
1158
+
1159
+ Runs a confirmed HARD-FORGET sweep over a just-deleted credential value so
1160
+ revoking the credential leaves no grep-able copies in memory. Safe no-op for
1161
+ empty/short values (avoids deleting on a meaningless substring)."""
1162
+ value = str(value or "").strip()
1163
+ if len(value) < 8:
1164
+ return {"ok": True, "skipped": True, "reason": "value_too_short_for_safe_match"}
1165
+ return forget(
1166
+ value,
1167
+ mode="secret",
1168
+ dry_run=False,
1169
+ confirm=True,
1170
+ use_regex=False,
1171
+ reason=reason,
1172
+ )
1173
+
1174
+
1175
+ def handle_memory_forget(
1176
+ value: str = "",
1177
+ mode: str = "secret",
1178
+ dry_run: bool = True,
1179
+ confirm: bool = False,
1180
+ use_regex: bool = False,
1181
+ reason: str = "",
1182
+ ) -> str:
1183
+ """MCP handler: ``nexo_memory_forget``. Returns a human-readable summary."""
1184
+ result = forget(
1185
+ value,
1186
+ mode=mode,
1187
+ dry_run=dry_run,
1188
+ confirm=confirm,
1189
+ use_regex=use_regex,
1190
+ reason=reason,
1191
+ )
1192
+ if not result.get("ok"):
1193
+ return f"ERROR: {result.get('error', 'forget failed')}"
1194
+
1195
+ if result.get("mode") == "fact":
1196
+ return "CORRECT-FACT: " + result["message"]
1197
+
1198
+ if not result.get("destructive"):
1199
+ counts = result.get("counts", {})
1200
+ live_dbs = counts.get("live_dbs", [])
1201
+ lines = [
1202
+ "DRY-RUN (no store modified).",
1203
+ f"Total matching rows: {counts.get('total_rows', 0)} across "
1204
+ f"{counts.get('tables_scanned', 0)} tables in {len(live_dbs)} live DB(s) "
1205
+ f"[{', '.join(live_dbs)}] (coverage: all-live-dbs-by-introspection).",
1206
+ ]
1207
+ per_store = counts.get("per_store", {})
1208
+ if per_store:
1209
+ lines.append("Per store: " + ", ".join(f"{k}={v}" for k, v in per_store.items()))
1210
+ tlines = counts.get("transcript_lines", 0)
1211
+ if tlines:
1212
+ lines.append(f"Transcript lines matching: {tlines}.")
1213
+ if counts.get("shadow_hits"):
1214
+ lines.append(f"Shadow legacy DB hits: {counts['shadow_hits']}.")
1215
+ lines.append("To delete: dry_run=False AND confirm=True.")
1216
+ lines.append(f"NOTE: {counts.get('backup_scope', _BACKUP_SCOPE_NOTE)}")
1217
+ return "\n".join(lines)
1218
+
1219
+ # Destructive result.
1220
+ verification = result.get("verification", {})
1221
+ complete = verification.get("complete")
1222
+ status = "COMPLETE (verified zero matches everywhere)" if complete else "INCOMPLETE — residual matches remain"
1223
+ lines = [
1224
+ f"HARD-FORGET {status}.",
1225
+ f"Deleted {result.get('deleted_total', 0)} row(s), redacted "
1226
+ f"{result.get('redacted_total', 0)} row(s).",
1227
+ ]
1228
+ live_dbs = result.get("live_dbs", [])
1229
+ if live_dbs:
1230
+ lines.append(f"Live DBs swept ({len(live_dbs)}): {', '.join(live_dbs)}.")
1231
+ affected = {**result.get("deleted_per_store", {})}
1232
+ for k, v in result.get("redacted_per_store", {}).items():
1233
+ affected[k] = affected.get(k, 0) + v
1234
+ if affected:
1235
+ lines.append("Per store: " + ", ".join(f"{k}={v}" for k, v in affected.items()))
1236
+ lines.append(f"HNSW invalidated: {result.get('hnsw', {}).get('invalidated')}")
1237
+ if result.get("shadow_swept"):
1238
+ lines.append(f"Shadow legacy DBs cleaned: {list(result['shadow_swept'].keys())}")
1239
+ if result.get("transcripts_redacted"):
1240
+ lines.append(f"Transcripts redacted: {len(result['transcripts_redacted'])} file(s).")
1241
+ if not complete:
1242
+ lines.append(f"RESIDUAL stores: {verification.get('residual_stores')}")
1243
+ lines.append(f"RESIDUAL fts: {verification.get('residual_fts')}")
1244
+ lines.append(f"RESIDUAL transcripts: {verification.get('residual_transcripts')}")
1245
+ lines.append(f"RESIDUAL shadows: {verification.get('residual_shadows')}")
1246
+ if result.get("ledger_id"):
1247
+ lines.append(f"Ledger id (memory_corrections): {result['ledger_id']}")
1248
+ lines.append(f"SCOPE: {result.get('backup_scope', _BACKUP_SCOPE_NOTE)}")
1249
+ return "\n".join(lines)