nexo-brain 7.24.0 → 7.25.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +1 -1
- package/README.md +5 -1
- package/package.json +1 -1
- package/src/auto_update.py +30 -0
- package/src/crons/manifest.json +13 -0
- package/src/db/_fts.py +38 -8
- package/src/db/_schema.py +46 -0
- package/src/doctor/providers/runtime.py +69 -0
- package/src/hook_guardrails.py +41 -0
- package/src/memory_fabric.py +536 -0
- package/src/pre_answer_router.py +4 -3
- package/src/scripts/deep-sleep/phase_protocol_debt_drain.py +29 -6
- package/src/scripts/nexo-backup.sh +30 -0
- package/src/scripts/nexo-daily-self-audit.py +36 -1
- package/src/scripts/nexo-memory-fabric.py +45 -0
- package/src/tools_transcripts.py +50 -8
- package/src/transcript_index.py +105 -2
- package/src/transcript_utils.py +65 -13
|
@@ -0,0 +1,536 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
"""Memory Fabric release helpers.
|
|
4
|
+
|
|
5
|
+
This module is the product-owned bridge between existing memory islands:
|
|
6
|
+
transcript metadata, historical diary backups, local-context embeddings and the
|
|
7
|
+
cognitive knowledge graph. It does not copy raw transcripts into the DB.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import hashlib
|
|
11
|
+
import json
|
|
12
|
+
import re
|
|
13
|
+
import sqlite3
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from typing import Any
|
|
16
|
+
|
|
17
|
+
import paths
|
|
18
|
+
from db import get_db
|
|
19
|
+
from transcript_index import ensure_transcript_index
|
|
20
|
+
from transcript_utils import (
|
|
21
|
+
MAX_TRANSCRIPT_HOURS,
|
|
22
|
+
find_claude_session_files,
|
|
23
|
+
find_codex_session_files,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
HISTORICAL_DIARY_SOURCE = "historical_diary"
|
|
27
|
+
HASH_EMBEDDING_MODEL = "nexo-local-hash-embedding"
|
|
28
|
+
EMAIL_RE = re.compile(r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b")
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def ensure_memory_fabric_schema(conn: sqlite3.Connection | None = None) -> None:
|
|
32
|
+
db = conn or get_db()
|
|
33
|
+
db.executescript(
|
|
34
|
+
"""
|
|
35
|
+
CREATE TABLE IF NOT EXISTS memory_fabric_sources (
|
|
36
|
+
source_id TEXT PRIMARY KEY,
|
|
37
|
+
source_type TEXT NOT NULL,
|
|
38
|
+
source_ref TEXT NOT NULL,
|
|
39
|
+
status TEXT NOT NULL DEFAULT 'active',
|
|
40
|
+
item_count INTEGER NOT NULL DEFAULT 0,
|
|
41
|
+
last_indexed_at TEXT DEFAULT '',
|
|
42
|
+
metadata_json TEXT NOT NULL DEFAULT '{}'
|
|
43
|
+
);
|
|
44
|
+
|
|
45
|
+
CREATE TABLE IF NOT EXISTS historical_diary_index (
|
|
46
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
47
|
+
source_backup_path TEXT NOT NULL,
|
|
48
|
+
source_table TEXT NOT NULL DEFAULT 'session_diary',
|
|
49
|
+
source_row_id INTEGER NOT NULL,
|
|
50
|
+
session_id TEXT NOT NULL DEFAULT '',
|
|
51
|
+
created_at TEXT NOT NULL DEFAULT '',
|
|
52
|
+
domain TEXT NOT NULL DEFAULT '',
|
|
53
|
+
summary TEXT NOT NULL DEFAULT '',
|
|
54
|
+
decisions TEXT NOT NULL DEFAULT '',
|
|
55
|
+
pending TEXT NOT NULL DEFAULT '',
|
|
56
|
+
context_next TEXT NOT NULL DEFAULT '',
|
|
57
|
+
mental_state TEXT NOT NULL DEFAULT '',
|
|
58
|
+
self_critique TEXT NOT NULL DEFAULT '',
|
|
59
|
+
source TEXT NOT NULL DEFAULT '',
|
|
60
|
+
content_hash TEXT NOT NULL UNIQUE,
|
|
61
|
+
indexed_at TEXT DEFAULT (datetime('now')),
|
|
62
|
+
metadata_json TEXT NOT NULL DEFAULT '{}',
|
|
63
|
+
UNIQUE(source_backup_path, source_table, source_row_id)
|
|
64
|
+
);
|
|
65
|
+
|
|
66
|
+
CREATE INDEX IF NOT EXISTS idx_historical_diary_session
|
|
67
|
+
ON historical_diary_index(session_id);
|
|
68
|
+
CREATE INDEX IF NOT EXISTS idx_historical_diary_created
|
|
69
|
+
ON historical_diary_index(created_at);
|
|
70
|
+
CREATE INDEX IF NOT EXISTS idx_historical_diary_domain
|
|
71
|
+
ON historical_diary_index(domain);
|
|
72
|
+
"""
|
|
73
|
+
)
|
|
74
|
+
if conn is None:
|
|
75
|
+
db.commit()
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def _table_exists(conn: sqlite3.Connection, table: str) -> bool:
|
|
79
|
+
row = conn.execute(
|
|
80
|
+
"SELECT 1 FROM sqlite_master WHERE type='table' AND name=? LIMIT 1",
|
|
81
|
+
(table,),
|
|
82
|
+
).fetchone()
|
|
83
|
+
return bool(row)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _fts_upsert_with_conn(
|
|
87
|
+
conn: sqlite3.Connection,
|
|
88
|
+
source: str,
|
|
89
|
+
source_id: str,
|
|
90
|
+
title: str,
|
|
91
|
+
body: str,
|
|
92
|
+
category: str = "",
|
|
93
|
+
) -> None:
|
|
94
|
+
conn.execute("DELETE FROM unified_search WHERE source = ? AND source_id = ?", (source, str(source_id)))
|
|
95
|
+
conn.execute(
|
|
96
|
+
"""
|
|
97
|
+
INSERT INTO unified_search(source, source_id, title, body, category, updated_at)
|
|
98
|
+
VALUES (?, ?, ?, ?, ?, datetime('now'))
|
|
99
|
+
""",
|
|
100
|
+
(source, str(source_id), str(title)[:200], body or "", category or ""),
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def _row_value(row: sqlite3.Row | dict[str, Any], key: str, default: str = "") -> str:
|
|
105
|
+
try:
|
|
106
|
+
if isinstance(row, sqlite3.Row) and key not in row.keys():
|
|
107
|
+
return default
|
|
108
|
+
value = row[key]
|
|
109
|
+
except Exception:
|
|
110
|
+
return default
|
|
111
|
+
return "" if value is None else str(value)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def _historical_diary_hash(backup_path: Path, row: sqlite3.Row | dict[str, Any]) -> str:
|
|
115
|
+
payload = {
|
|
116
|
+
"id": _row_value(row, "id"),
|
|
117
|
+
"session_id": _row_value(row, "session_id"),
|
|
118
|
+
"created_at": _row_value(row, "created_at"),
|
|
119
|
+
"summary": _row_value(row, "summary"),
|
|
120
|
+
"decisions": _row_value(row, "decisions"),
|
|
121
|
+
"pending": _row_value(row, "pending"),
|
|
122
|
+
"context_next": _row_value(row, "context_next"),
|
|
123
|
+
}
|
|
124
|
+
return hashlib.sha256(json.dumps(payload, sort_keys=True).encode("utf-8")).hexdigest()
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def _diary_body(row: sqlite3.Row | dict[str, Any]) -> str:
|
|
128
|
+
return " | ".join(
|
|
129
|
+
part
|
|
130
|
+
for part in [
|
|
131
|
+
_row_value(row, "summary"),
|
|
132
|
+
_row_value(row, "decisions"),
|
|
133
|
+
_row_value(row, "pending"),
|
|
134
|
+
_row_value(row, "context_next"),
|
|
135
|
+
_row_value(row, "mental_state"),
|
|
136
|
+
_row_value(row, "self_critique"),
|
|
137
|
+
_row_value(row, "user_signals"),
|
|
138
|
+
]
|
|
139
|
+
if part
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def _link_historical_diary_to_kg(hist: sqlite3.Row, row: sqlite3.Row | dict[str, Any]) -> int:
|
|
144
|
+
try:
|
|
145
|
+
import knowledge_graph as kg
|
|
146
|
+
|
|
147
|
+
diary_ref = f"historical_diary:{hist['id']}"
|
|
148
|
+
session_id = _row_value(row, "session_id")
|
|
149
|
+
domain = _row_value(row, "domain") or "general"
|
|
150
|
+
body = _diary_body(row)
|
|
151
|
+
label = _row_value(row, "summary") or session_id or diary_ref
|
|
152
|
+
kg.upsert_node(
|
|
153
|
+
"diary",
|
|
154
|
+
diary_ref,
|
|
155
|
+
label,
|
|
156
|
+
{
|
|
157
|
+
"created_at": _row_value(row, "created_at"),
|
|
158
|
+
"session_id": session_id,
|
|
159
|
+
"source": "backup",
|
|
160
|
+
"backup_path": _row_value(hist, "source_backup_path"),
|
|
161
|
+
},
|
|
162
|
+
)
|
|
163
|
+
edges = 0
|
|
164
|
+
if session_id:
|
|
165
|
+
kg.upsert_node("session", f"session:{session_id}", session_id, {"source": "historical_diary"})
|
|
166
|
+
kg.upsert_edge(
|
|
167
|
+
"diary",
|
|
168
|
+
diary_ref,
|
|
169
|
+
"describes_session",
|
|
170
|
+
"session",
|
|
171
|
+
f"session:{session_id}",
|
|
172
|
+
confidence=0.95,
|
|
173
|
+
source_memory_id=diary_ref,
|
|
174
|
+
)
|
|
175
|
+
edges += 1
|
|
176
|
+
if domain:
|
|
177
|
+
kg.upsert_node("area", f"area:{domain}", domain, {"source": "historical_diary"})
|
|
178
|
+
kg.upsert_edge(
|
|
179
|
+
"diary",
|
|
180
|
+
diary_ref,
|
|
181
|
+
"belongs_to_area",
|
|
182
|
+
"area",
|
|
183
|
+
f"area:{domain}",
|
|
184
|
+
confidence=0.8,
|
|
185
|
+
source_memory_id=diary_ref,
|
|
186
|
+
)
|
|
187
|
+
edges += 1
|
|
188
|
+
for email in sorted(set(EMAIL_RE.findall(body)))[:12]:
|
|
189
|
+
kg.upsert_node("email", f"email:{email.lower()}", email.lower(), {"source": "historical_diary"})
|
|
190
|
+
kg.upsert_edge(
|
|
191
|
+
"diary",
|
|
192
|
+
diary_ref,
|
|
193
|
+
"mentions_email",
|
|
194
|
+
"email",
|
|
195
|
+
f"email:{email.lower()}",
|
|
196
|
+
confidence=0.75,
|
|
197
|
+
source_memory_id=diary_ref,
|
|
198
|
+
)
|
|
199
|
+
edges += 1
|
|
200
|
+
return edges
|
|
201
|
+
except Exception:
|
|
202
|
+
return 0
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def _backup_db_paths(backups_root: str | Path | None = None, *, max_files: int = 40) -> list[Path]:
|
|
206
|
+
root = Path(backups_root) if backups_root is not None else paths.backups_dir()
|
|
207
|
+
if not root.exists():
|
|
208
|
+
return []
|
|
209
|
+
candidates: list[Path] = []
|
|
210
|
+
for path in root.rglob("*.db"):
|
|
211
|
+
name = path.name.lower()
|
|
212
|
+
if name.endswith("-wal") or name.endswith("-shm"):
|
|
213
|
+
continue
|
|
214
|
+
candidates.append(path)
|
|
215
|
+
def sort_key(item: Path) -> tuple[int, float]:
|
|
216
|
+
try:
|
|
217
|
+
mtime = item.stat().st_mtime if item.exists() else 0.0
|
|
218
|
+
except OSError:
|
|
219
|
+
mtime = 0.0
|
|
220
|
+
weekly_priority = 1 if item.name.startswith("weekly-") or "weekly" in item.parts else 0
|
|
221
|
+
return (weekly_priority, mtime)
|
|
222
|
+
|
|
223
|
+
candidates.sort(key=sort_key, reverse=True)
|
|
224
|
+
return candidates[: max(1, int(max_files or 1))]
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
def _connect_backup(path: Path) -> sqlite3.Connection | None:
|
|
228
|
+
try:
|
|
229
|
+
uri = f"file:{path.resolve().as_posix()}?mode=ro"
|
|
230
|
+
conn = sqlite3.connect(uri, uri=True, timeout=1.0)
|
|
231
|
+
conn.row_factory = sqlite3.Row
|
|
232
|
+
return conn
|
|
233
|
+
except Exception:
|
|
234
|
+
return None
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
def _active_diary_keys(conn: sqlite3.Connection) -> set[tuple[str, str]]:
|
|
238
|
+
keys: set[tuple[str, str]] = set()
|
|
239
|
+
for table in ("session_diary", "diary_archive"):
|
|
240
|
+
if not _table_exists(conn, table):
|
|
241
|
+
continue
|
|
242
|
+
for row in conn.execute(f"SELECT session_id, created_at FROM {table}").fetchall():
|
|
243
|
+
keys.add((str(row["session_id"] or ""), str(row["created_at"] or "")))
|
|
244
|
+
return keys
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def reconcile_backup_diaries(
|
|
248
|
+
*,
|
|
249
|
+
backups_root: str | Path | None = None,
|
|
250
|
+
max_backup_files: int = 40,
|
|
251
|
+
limit: int = 5000,
|
|
252
|
+
) -> dict[str, Any]:
|
|
253
|
+
"""Index missing session diaries from technical backups into active search.
|
|
254
|
+
|
|
255
|
+
Rows are copied into a historical index, not into active `session_diary`.
|
|
256
|
+
That keeps provenance intact and avoids overwriting current memory.
|
|
257
|
+
"""
|
|
258
|
+
conn = get_db()
|
|
259
|
+
ensure_memory_fabric_schema(conn)
|
|
260
|
+
active_keys = _active_diary_keys(conn)
|
|
261
|
+
scanned_backups = 0
|
|
262
|
+
scanned_rows = 0
|
|
263
|
+
skipped_active = 0
|
|
264
|
+
inserted = 0
|
|
265
|
+
fts_rows = 0
|
|
266
|
+
kg_edges = 0
|
|
267
|
+
|
|
268
|
+
for backup_path in _backup_db_paths(backups_root, max_files=max_backup_files):
|
|
269
|
+
if scanned_rows >= limit:
|
|
270
|
+
break
|
|
271
|
+
backup_conn = _connect_backup(backup_path)
|
|
272
|
+
if backup_conn is None:
|
|
273
|
+
continue
|
|
274
|
+
try:
|
|
275
|
+
if not _table_exists(backup_conn, "session_diary"):
|
|
276
|
+
continue
|
|
277
|
+
scanned_backups += 1
|
|
278
|
+
rows = backup_conn.execute(
|
|
279
|
+
"SELECT * FROM session_diary ORDER BY created_at DESC LIMIT ?",
|
|
280
|
+
(max(1, int(limit - scanned_rows)),),
|
|
281
|
+
).fetchall()
|
|
282
|
+
for row in rows:
|
|
283
|
+
scanned_rows += 1
|
|
284
|
+
key = (_row_value(row, "session_id"), _row_value(row, "created_at"))
|
|
285
|
+
if key in active_keys:
|
|
286
|
+
skipped_active += 1
|
|
287
|
+
continue
|
|
288
|
+
content_hash = _historical_diary_hash(backup_path, row)
|
|
289
|
+
metadata = {
|
|
290
|
+
"backup_name": backup_path.name,
|
|
291
|
+
"quality_tier": _row_value(row, "quality_tier"),
|
|
292
|
+
"quality_score": _row_value(row, "quality_score"),
|
|
293
|
+
}
|
|
294
|
+
before = conn.total_changes
|
|
295
|
+
conn.execute(
|
|
296
|
+
"""
|
|
297
|
+
INSERT OR IGNORE INTO historical_diary_index (
|
|
298
|
+
source_backup_path, source_table, source_row_id,
|
|
299
|
+
session_id, created_at, domain, summary, decisions,
|
|
300
|
+
pending, context_next, mental_state, self_critique,
|
|
301
|
+
source, content_hash, metadata_json
|
|
302
|
+
)
|
|
303
|
+
VALUES (?, 'session_diary', ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
304
|
+
""",
|
|
305
|
+
(
|
|
306
|
+
str(backup_path),
|
|
307
|
+
int(_row_value(row, "id", "0") or 0),
|
|
308
|
+
_row_value(row, "session_id"),
|
|
309
|
+
_row_value(row, "created_at"),
|
|
310
|
+
_row_value(row, "domain"),
|
|
311
|
+
_row_value(row, "summary"),
|
|
312
|
+
_row_value(row, "decisions"),
|
|
313
|
+
_row_value(row, "pending"),
|
|
314
|
+
_row_value(row, "context_next"),
|
|
315
|
+
_row_value(row, "mental_state"),
|
|
316
|
+
_row_value(row, "self_critique"),
|
|
317
|
+
_row_value(row, "source"),
|
|
318
|
+
content_hash,
|
|
319
|
+
json.dumps(metadata, ensure_ascii=False, sort_keys=True),
|
|
320
|
+
),
|
|
321
|
+
)
|
|
322
|
+
if conn.total_changes > before:
|
|
323
|
+
inserted += 1
|
|
324
|
+
hist = conn.execute(
|
|
325
|
+
"SELECT id, summary, domain FROM historical_diary_index WHERE content_hash=?",
|
|
326
|
+
(content_hash,),
|
|
327
|
+
).fetchone()
|
|
328
|
+
if hist:
|
|
329
|
+
title = str(hist["summary"] or _row_value(row, "session_id") or "Historical diary")
|
|
330
|
+
_fts_upsert_with_conn(
|
|
331
|
+
conn,
|
|
332
|
+
HISTORICAL_DIARY_SOURCE,
|
|
333
|
+
str(hist["id"]),
|
|
334
|
+
title,
|
|
335
|
+
_diary_body(row),
|
|
336
|
+
str(hist["domain"] or "backup"),
|
|
337
|
+
)
|
|
338
|
+
fts_rows += 1
|
|
339
|
+
kg_edges += _link_historical_diary_to_kg(hist, row)
|
|
340
|
+
finally:
|
|
341
|
+
backup_conn.close()
|
|
342
|
+
|
|
343
|
+
conn.execute(
|
|
344
|
+
"""
|
|
345
|
+
INSERT INTO memory_fabric_sources(source_id, source_type, source_ref, status, item_count, last_indexed_at, metadata_json)
|
|
346
|
+
VALUES ('historical_diary_backups', 'backup', ?, 'active', ?, datetime('now'), ?)
|
|
347
|
+
ON CONFLICT(source_id) DO UPDATE SET
|
|
348
|
+
source_ref=excluded.source_ref,
|
|
349
|
+
item_count=excluded.item_count,
|
|
350
|
+
last_indexed_at=excluded.last_indexed_at,
|
|
351
|
+
metadata_json=excluded.metadata_json
|
|
352
|
+
""",
|
|
353
|
+
(
|
|
354
|
+
str(Path(backups_root) if backups_root is not None else paths.backups_dir()),
|
|
355
|
+
int(conn.execute("SELECT COUNT(*) AS total FROM historical_diary_index").fetchone()["total"] or 0),
|
|
356
|
+
json.dumps({"scanned_backups": scanned_backups, "scanned_rows": scanned_rows}, sort_keys=True),
|
|
357
|
+
),
|
|
358
|
+
)
|
|
359
|
+
conn.commit()
|
|
360
|
+
return {
|
|
361
|
+
"ok": True,
|
|
362
|
+
"scanned_backups": scanned_backups,
|
|
363
|
+
"scanned_rows": scanned_rows,
|
|
364
|
+
"skipped_active": skipped_active,
|
|
365
|
+
"inserted": inserted,
|
|
366
|
+
"fts_rows": fts_rows,
|
|
367
|
+
"kg_edges": kg_edges,
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
|
|
371
|
+
def _count_transcript_files() -> dict[str, int]:
|
|
372
|
+
return {
|
|
373
|
+
"claude_code": len(find_claude_session_files()),
|
|
374
|
+
"codex": len(find_codex_session_files()),
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
|
|
378
|
+
def _local_context_embedding_stats() -> dict[str, Any]:
|
|
379
|
+
try:
|
|
380
|
+
from local_context.db import local_context_db_path
|
|
381
|
+
|
|
382
|
+
db_path = local_context_db_path()
|
|
383
|
+
if not db_path.is_file():
|
|
384
|
+
return {"exists": False}
|
|
385
|
+
conn = sqlite3.connect(f"file:{db_path.resolve().as_posix()}?mode=ro", uri=True, timeout=1.0)
|
|
386
|
+
conn.row_factory = sqlite3.Row
|
|
387
|
+
try:
|
|
388
|
+
if not _table_exists(conn, "local_embeddings"):
|
|
389
|
+
return {"exists": True, "embeddings": 0, "models": {}}
|
|
390
|
+
rows = conn.execute(
|
|
391
|
+
"SELECT model_id, dimension, COUNT(*) AS total FROM local_embeddings GROUP BY model_id, dimension"
|
|
392
|
+
).fetchall()
|
|
393
|
+
models = {
|
|
394
|
+
f"{row['model_id']}:{row['dimension']}": int(row["total"] or 0)
|
|
395
|
+
for row in rows
|
|
396
|
+
}
|
|
397
|
+
return {
|
|
398
|
+
"exists": True,
|
|
399
|
+
"embeddings": sum(models.values()),
|
|
400
|
+
"models": models,
|
|
401
|
+
"hash_embeddings": sum(
|
|
402
|
+
total for key, total in models.items() if key.startswith(HASH_EMBEDDING_MODEL + ":")
|
|
403
|
+
),
|
|
404
|
+
}
|
|
405
|
+
finally:
|
|
406
|
+
conn.close()
|
|
407
|
+
except Exception as exc:
|
|
408
|
+
return {"exists": False, "error": str(exc)}
|
|
409
|
+
|
|
410
|
+
|
|
411
|
+
def _cognitive_kg_stats() -> dict[str, Any]:
|
|
412
|
+
try:
|
|
413
|
+
from cognitive_paths import resolve_cognitive_db
|
|
414
|
+
|
|
415
|
+
db_path = resolve_cognitive_db(for_write=False)
|
|
416
|
+
if not db_path.is_file():
|
|
417
|
+
return {"exists": False}
|
|
418
|
+
conn = sqlite3.connect(f"file:{db_path.resolve().as_posix()}?mode=ro", uri=True, timeout=1.0)
|
|
419
|
+
try:
|
|
420
|
+
nodes = conn.execute("SELECT COUNT(*) FROM kg_nodes").fetchone()[0]
|
|
421
|
+
edges = conn.execute("SELECT COUNT(*) FROM kg_edges").fetchone()[0]
|
|
422
|
+
return {"exists": True, "nodes": int(nodes or 0), "edges": int(edges or 0)}
|
|
423
|
+
finally:
|
|
424
|
+
conn.close()
|
|
425
|
+
except Exception as exc:
|
|
426
|
+
return {"exists": False, "error": str(exc)}
|
|
427
|
+
|
|
428
|
+
|
|
429
|
+
def memory_fabric_health(
|
|
430
|
+
*,
|
|
431
|
+
include_backup_scan: bool = True,
|
|
432
|
+
backups_root: str | Path | None = None,
|
|
433
|
+
) -> dict[str, Any]:
|
|
434
|
+
ensure_memory_fabric_schema()
|
|
435
|
+
conn = get_db()
|
|
436
|
+
transcript_files = _count_transcript_files()
|
|
437
|
+
transcript_index_count = int(conn.execute("SELECT COUNT(*) AS total FROM transcript_index").fetchone()["total"] or 0)
|
|
438
|
+
historical_count = int(conn.execute("SELECT COUNT(*) AS total FROM historical_diary_index").fetchone()["total"] or 0)
|
|
439
|
+
issues: list[dict[str, str]] = []
|
|
440
|
+
|
|
441
|
+
if sum(transcript_files.values()) > 0 and transcript_index_count == 0:
|
|
442
|
+
issues.append({
|
|
443
|
+
"code": "transcript_index_empty",
|
|
444
|
+
"severity": "warn",
|
|
445
|
+
"message": "Transcript files exist but compact transcript_index is empty.",
|
|
446
|
+
})
|
|
447
|
+
|
|
448
|
+
backup_rows = 0
|
|
449
|
+
backup_files = 0
|
|
450
|
+
backup_unreconciled = 0
|
|
451
|
+
if include_backup_scan:
|
|
452
|
+
active_keys = _active_diary_keys(conn)
|
|
453
|
+
historical_hashes = {
|
|
454
|
+
str(row["content_hash"] or "")
|
|
455
|
+
for row in conn.execute("SELECT content_hash FROM historical_diary_index").fetchall()
|
|
456
|
+
}
|
|
457
|
+
for backup_path in _backup_db_paths(backups_root, max_files=12):
|
|
458
|
+
backup_conn = _connect_backup(backup_path)
|
|
459
|
+
if backup_conn is None:
|
|
460
|
+
continue
|
|
461
|
+
try:
|
|
462
|
+
if not _table_exists(backup_conn, "session_diary"):
|
|
463
|
+
continue
|
|
464
|
+
backup_files += 1
|
|
465
|
+
rows = backup_conn.execute("SELECT * FROM session_diary ORDER BY created_at DESC LIMIT 1000").fetchall()
|
|
466
|
+
backup_rows += len(rows)
|
|
467
|
+
for row in rows:
|
|
468
|
+
key = (_row_value(row, "session_id"), _row_value(row, "created_at"))
|
|
469
|
+
if key in active_keys:
|
|
470
|
+
continue
|
|
471
|
+
if _historical_diary_hash(backup_path, row) in historical_hashes:
|
|
472
|
+
continue
|
|
473
|
+
backup_unreconciled += 1
|
|
474
|
+
finally:
|
|
475
|
+
backup_conn.close()
|
|
476
|
+
if backup_unreconciled > 0:
|
|
477
|
+
issues.append({
|
|
478
|
+
"code": "backup_diaries_not_reconciled",
|
|
479
|
+
"severity": "warn",
|
|
480
|
+
"message": "Backup session diaries exist outside active memory and historical index.",
|
|
481
|
+
})
|
|
482
|
+
|
|
483
|
+
embeddings = _local_context_embedding_stats()
|
|
484
|
+
if int(embeddings.get("hash_embeddings") or 0) > 0:
|
|
485
|
+
issues.append({
|
|
486
|
+
"code": "hash_embeddings_present",
|
|
487
|
+
"severity": "info",
|
|
488
|
+
"message": "Local context still has deterministic fallback embeddings; re-embedding is recommended.",
|
|
489
|
+
})
|
|
490
|
+
|
|
491
|
+
kg = _cognitive_kg_stats()
|
|
492
|
+
if kg.get("exists") and int(kg.get("nodes") or 0) == 0:
|
|
493
|
+
issues.append({
|
|
494
|
+
"code": "kg_empty",
|
|
495
|
+
"severity": "info",
|
|
496
|
+
"message": "Knowledge graph tables exist but have no nodes.",
|
|
497
|
+
})
|
|
498
|
+
|
|
499
|
+
return {
|
|
500
|
+
"ok": not any(issue["severity"] == "error" for issue in issues),
|
|
501
|
+
"issues": issues,
|
|
502
|
+
"transcripts": {
|
|
503
|
+
"files": transcript_files,
|
|
504
|
+
"index_rows": transcript_index_count,
|
|
505
|
+
},
|
|
506
|
+
"historical_diaries": {
|
|
507
|
+
"index_rows": historical_count,
|
|
508
|
+
"backup_files_scanned": backup_files,
|
|
509
|
+
"backup_rows_seen": backup_rows,
|
|
510
|
+
"backup_rows_unreconciled": backup_unreconciled,
|
|
511
|
+
},
|
|
512
|
+
"local_context": embeddings,
|
|
513
|
+
"knowledge_graph": kg,
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
|
|
517
|
+
def repair_memory_fabric(
|
|
518
|
+
*,
|
|
519
|
+
transcript_hours: int = MAX_TRANSCRIPT_HOURS,
|
|
520
|
+
transcript_limit: int = 1000,
|
|
521
|
+
backup_limit: int = 5000,
|
|
522
|
+
) -> dict[str, Any]:
|
|
523
|
+
transcript_result = ensure_transcript_index(
|
|
524
|
+
hours=transcript_hours,
|
|
525
|
+
limit=transcript_limit,
|
|
526
|
+
min_user_messages=1,
|
|
527
|
+
force=True,
|
|
528
|
+
)
|
|
529
|
+
backup_result = reconcile_backup_diaries(limit=backup_limit)
|
|
530
|
+
health = memory_fabric_health(include_backup_scan=True)
|
|
531
|
+
return {
|
|
532
|
+
"ok": True,
|
|
533
|
+
"transcripts": transcript_result,
|
|
534
|
+
"backups": backup_result,
|
|
535
|
+
"health": health,
|
|
536
|
+
}
|
package/src/pre_answer_router.py
CHANGED
|
@@ -1100,10 +1100,11 @@ def _source_diary(request: SourceRequest) -> SourceResult:
|
|
|
1100
1100
|
|
|
1101
1101
|
def _source_transcripts(request: SourceRequest) -> SourceResult:
|
|
1102
1102
|
try:
|
|
1103
|
-
from transcript_index import
|
|
1103
|
+
from transcript_index import ensure_transcript_index, search_transcript_index
|
|
1104
|
+
from transcript_utils import MAX_TRANSCRIPT_HOURS
|
|
1104
1105
|
|
|
1105
|
-
|
|
1106
|
-
indexed_rows = search_transcript_index(request.query, hours=
|
|
1106
|
+
ensure_transcript_index(hours=MAX_TRANSCRIPT_HOURS, limit=1000, min_user_messages=1)
|
|
1107
|
+
indexed_rows = search_transcript_index(request.query, hours=MAX_TRANSCRIPT_HOURS, limit=4)
|
|
1107
1108
|
if indexed_rows:
|
|
1108
1109
|
indexed_result = _rows_result(
|
|
1109
1110
|
"transcript_index",
|
|
@@ -192,7 +192,7 @@ def run(
|
|
|
192
192
|
"SELECT id, session_id, task_id, debt_type, severity, evidence, created_at "
|
|
193
193
|
"FROM protocol_debt WHERE resolved_at IS NULL"
|
|
194
194
|
).fetchall()
|
|
195
|
-
|
|
195
|
+
by_severity_type: dict[tuple[str, str], int] = {}
|
|
196
196
|
for row in rows:
|
|
197
197
|
task_open = _task_is_open(conn, str(row["task_id"] or ""))
|
|
198
198
|
bucket = classify_debt(
|
|
@@ -216,13 +216,36 @@ def run(
|
|
|
216
216
|
),
|
|
217
217
|
)
|
|
218
218
|
elif bucket == "requires_user":
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
219
|
+
# Track by (severity, debt_type) so the morning briefing
|
|
220
|
+
# can split ERROR vs WARN buckets dynamically — without
|
|
221
|
+
# this split, freshly-introduced ERROR debt classes stay
|
|
222
|
+
# invisible until someone hand-edits the whitelist.
|
|
223
|
+
severity = str(row["severity"] or "warn").strip().lower() or "warn"
|
|
224
|
+
debt_type = str(row["debt_type"] or "")
|
|
225
|
+
key = (severity, debt_type)
|
|
226
|
+
by_severity_type[key] = by_severity_type.get(key, 0) + 1
|
|
227
|
+
# Consolidate requires_user into a per-severity, per-type summary
|
|
228
|
+
# so the morning briefing stays short even when the backlog is
|
|
229
|
+
# long, while still surfacing ALL error classes (not a fixed top-4).
|
|
222
230
|
report["requires_user_summary"] = [
|
|
223
|
-
{"debt_type": debt_type, "count": count}
|
|
224
|
-
for debt_type, count in sorted(
|
|
231
|
+
{"severity": severity, "debt_type": debt_type, "count": count}
|
|
232
|
+
for (severity, debt_type), count in sorted(
|
|
233
|
+
by_severity_type.items(),
|
|
234
|
+
key=lambda item: (item[0][0] != "error", -item[1]),
|
|
235
|
+
)
|
|
225
236
|
]
|
|
237
|
+
# Aggregate by severity so consumers can report
|
|
238
|
+
# ``ERROR=N (a=x, b=y), WARN=M`` without re-bucketing.
|
|
239
|
+
report["requires_user_by_severity"] = {}
|
|
240
|
+
for entry in report["requires_user_summary"]:
|
|
241
|
+
sev = entry["severity"]
|
|
242
|
+
stat = report["requires_user_by_severity"].setdefault(
|
|
243
|
+
sev, {"total": 0, "by_type": []}
|
|
244
|
+
)
|
|
245
|
+
stat["total"] += int(entry["count"])
|
|
246
|
+
stat["by_type"].append(
|
|
247
|
+
{"debt_type": entry["debt_type"], "count": int(entry["count"])}
|
|
248
|
+
)
|
|
226
249
|
if dry_run:
|
|
227
250
|
conn.execute("ROLLBACK")
|
|
228
251
|
else:
|
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
# NEXO DB hourly backup — crontab: 0 * * * * $NEXO_HOME/core/scripts/nexo-backup.sh
|
|
3
3
|
NEXO_HOME="${NEXO_HOME:-$HOME/.nexo}"
|
|
4
4
|
NEXO_DIR="$NEXO_HOME"
|
|
5
|
+
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
|
6
|
+
CORE_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
|
|
5
7
|
BACKUP_DIR="$NEXO_HOME/runtime/backups"
|
|
6
8
|
if [ ! -d "$BACKUP_DIR" ] && [ -d "$NEXO_HOME/backups" ]; then
|
|
7
9
|
BACKUP_DIR="$NEXO_HOME/backups"
|
|
@@ -23,7 +25,35 @@ LOCAL_CONTEXT_MAX_BACKUP_BYTES="${NEXO_LOCAL_CONTEXT_MAX_BACKUP_BYTES:-214748364
|
|
|
23
25
|
|
|
24
26
|
mkdir -p "$BACKUP_DIR" "$WEEKLY_DIR"
|
|
25
27
|
|
|
28
|
+
reconcile_memory_fabric_before_prune() {
|
|
29
|
+
python3 - "$BACKUP_DIR" "$CORE_DIR" <<'PY' >/dev/null 2>&1 || true
|
|
30
|
+
from __future__ import annotations
|
|
31
|
+
|
|
32
|
+
import sys
|
|
33
|
+
from pathlib import Path
|
|
34
|
+
|
|
35
|
+
backup_dir = Path(sys.argv[1])
|
|
36
|
+
core_dir = Path(sys.argv[2])
|
|
37
|
+
for candidate in (core_dir, core_dir.parent / "src"):
|
|
38
|
+
if candidate.exists():
|
|
39
|
+
sys.path.insert(0, str(candidate))
|
|
40
|
+
|
|
41
|
+
try:
|
|
42
|
+
import memory_fabric
|
|
43
|
+
|
|
44
|
+
memory_fabric.reconcile_backup_diaries(
|
|
45
|
+
backups_root=backup_dir,
|
|
46
|
+
max_backup_files=80,
|
|
47
|
+
limit=10000,
|
|
48
|
+
)
|
|
49
|
+
except Exception:
|
|
50
|
+
pass
|
|
51
|
+
PY
|
|
52
|
+
}
|
|
53
|
+
|
|
26
54
|
cleanup_backups() {
|
|
55
|
+
reconcile_memory_fabric_before_prune
|
|
56
|
+
|
|
27
57
|
PRUNER="$NEXO_HOME/core/scripts/prune_runtime_backups.py"
|
|
28
58
|
if [ ! -f "$PRUNER" ]; then
|
|
29
59
|
PRUNER="$(dirname "$0")/prune_runtime_backups.py"
|