nexo-brain 5.0.4 → 5.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +1 -1
- package/README.md +12 -0
- package/package.json +1 -1
- package/src/auto_update.py +291 -1
- package/src/cognitive/_ingest.py +3 -1
- package/src/cognitive/_memory.py +5 -1
- package/src/cognitive/_search.py +115 -3
- package/src/crons/manifest.json +12 -0
- package/src/db/_core.py +1 -1
- package/src/db/_reminders.py +36 -0
- package/src/db/_schema.py +52 -0
- package/src/hook_observability.py +293 -0
- package/src/hooks/session-start.sh +27 -0
- package/src/knowledge_graph.py +179 -0
- package/src/maintenance.py +53 -62
- package/src/observability.py +199 -0
- package/src/plugins/adaptive_mode.py +55 -1
- package/src/plugins/backup.py +14 -3
- package/src/plugins/knowledge_graph_tools.py +32 -0
- package/src/plugins/protocol.py +2 -1
- package/src/plugins/simple_api.py +4 -1
- package/src/plugins/skills.py +32 -0
- package/src/retroactive_learnings.py +370 -0
- package/src/scripts/check-context.py +2 -2
- package/src/scripts/deep-sleep/apply_findings.py +131 -4
- package/src/scripts/deep-sleep/synthesize.py +3 -1
- package/src/scripts/nexo-cognitive-decay.py +75 -0
- package/src/scripts/nexo-cortex-cycle.py +266 -0
- package/src/scripts/nexo-daily-self-audit.py +4 -2
- package/src/scripts/nexo-evolution-run.py +174 -7
- package/src/scripts/nexo-hook-record.py +42 -0
- package/src/scripts/nexo-outcome-checker.py +30 -0
- package/src/server.py +84 -0
- package/src/skills/run-release-final-audit/guide.md +14 -0
- package/src/skills/run-release-final-audit/script.py +177 -0
- package/src/skills/run-release-final-audit/skill.json +64 -0
- package/src/skills_runtime.py +231 -0
- package/src/state_watchers_runtime.py +134 -0
- package/src/tools_learnings.py +25 -1
- package/src/tools_menu.py +1 -0
- package/src/tools_sessions.py +77 -0
package/src/db/_schema.py
CHANGED
|
@@ -886,6 +886,56 @@ def _m37_cortex_goal_profile_trace(conn):
|
|
|
886
886
|
conn.execute("CREATE INDEX IF NOT EXISTS idx_cortex_evaluations_goal_profile ON cortex_evaluations(goal_profile_id)")
|
|
887
887
|
|
|
888
888
|
|
|
889
|
+
def _m38_evolution_log_proposal_payload(conn):
|
|
890
|
+
"""Persist the full proposal dict (with `changes` array) so user-approved
|
|
891
|
+
proposals can be applied by a later cycle.
|
|
892
|
+
|
|
893
|
+
Before m38, evolution_log only stored the proposal `action` string. When a
|
|
894
|
+
user marked a proposal as `accepted` via nexo_evolution_approve, the next
|
|
895
|
+
cycle had no way to re-execute it because the `changes` operations were
|
|
896
|
+
discarded after the original cycle. Adding `proposal_payload` (TEXT/JSON)
|
|
897
|
+
closes that loop and lets _apply_accepted_proposals() in the runner pick
|
|
898
|
+
up accepted rows and run them through execute_auto_proposal().
|
|
899
|
+
|
|
900
|
+
Idempotent and append-only: ALTER TABLE ADD COLUMN is non-destructive in
|
|
901
|
+
SQLite. Pre-m38 rows keep proposal_payload NULL and are skipped by the
|
|
902
|
+
apply step (which requires a non-null payload).
|
|
903
|
+
"""
|
|
904
|
+
_migrate_add_column(conn, "evolution_log", "proposal_payload", "TEXT DEFAULT NULL")
|
|
905
|
+
|
|
906
|
+
|
|
907
|
+
def _m39_hook_runs(conn):
|
|
908
|
+
"""Persist hook lifecycle observability — closes Fase 3 item 7.
|
|
909
|
+
|
|
910
|
+
Before m39, NEXO had 12 hook scripts (session-start.sh, post-compact.sh,
|
|
911
|
+
pre-compact.sh, inbox-hook.sh, etc.) but no central record of when they
|
|
912
|
+
ran, how long they took, or whether they succeeded. The audit lifecycle
|
|
913
|
+
was a black box. This table is the storage layer that
|
|
914
|
+
src/hook_observability.py records into and that the new
|
|
915
|
+
nexo_hook_runs MCP tool reads from.
|
|
916
|
+
|
|
917
|
+
Idempotent: CREATE TABLE IF NOT EXISTS plus indexes by hook_name and
|
|
918
|
+
started_at so the daily query patterns are cheap.
|
|
919
|
+
"""
|
|
920
|
+
conn.execute(
|
|
921
|
+
"""CREATE TABLE IF NOT EXISTS hook_runs (
|
|
922
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
923
|
+
hook_name TEXT NOT NULL,
|
|
924
|
+
started_at REAL NOT NULL,
|
|
925
|
+
duration_ms INTEGER NOT NULL DEFAULT 0,
|
|
926
|
+
exit_code INTEGER NOT NULL DEFAULT 0,
|
|
927
|
+
status TEXT NOT NULL DEFAULT 'ok',
|
|
928
|
+
session_id TEXT DEFAULT '',
|
|
929
|
+
summary TEXT DEFAULT '',
|
|
930
|
+
metadata TEXT DEFAULT '{}',
|
|
931
|
+
created_at REAL NOT NULL
|
|
932
|
+
)"""
|
|
933
|
+
)
|
|
934
|
+
conn.execute("CREATE INDEX IF NOT EXISTS idx_hook_runs_hook_name ON hook_runs(hook_name)")
|
|
935
|
+
conn.execute("CREATE INDEX IF NOT EXISTS idx_hook_runs_started_at ON hook_runs(started_at)")
|
|
936
|
+
conn.execute("CREATE INDEX IF NOT EXISTS idx_hook_runs_status ON hook_runs(status)")
|
|
937
|
+
|
|
938
|
+
|
|
889
939
|
MIGRATIONS = [
|
|
890
940
|
(1, "learnings_columns", _m1_learnings_columns),
|
|
891
941
|
(2, "followups_reasoning", _m2_followups_reasoning),
|
|
@@ -924,6 +974,8 @@ MIGRATIONS = [
|
|
|
924
974
|
(35, "cortex_evaluation_outcome_link", _m35_cortex_evaluation_outcome_link),
|
|
925
975
|
(36, "goal_profiles", _m36_goal_profiles),
|
|
926
976
|
(37, "cortex_goal_profile_trace", _m37_cortex_goal_profile_trace),
|
|
977
|
+
(38, "evolution_log_proposal_payload", _m38_evolution_log_proposal_payload),
|
|
978
|
+
(39, "hook_runs", _m39_hook_runs),
|
|
927
979
|
]
|
|
928
980
|
|
|
929
981
|
|
|
@@ -0,0 +1,293 @@
|
|
|
1
|
+
"""Observability for the NEXO hook lifecycle pipeline.
|
|
2
|
+
|
|
3
|
+
Closes Fase 3 item 7 of NEXO-AUDIT-2026-04-11. Before this module, NEXO
|
|
4
|
+
had 12 hook scripts (session-start.sh, post-compact.sh, pre-compact.sh,
|
|
5
|
+
inbox-hook.sh, etc.) but no central record of when they ran, how long
|
|
6
|
+
they took, or whether they succeeded. The audit lifecycle was a black box
|
|
7
|
+
— a hook could silently fail for weeks before anyone noticed.
|
|
8
|
+
|
|
9
|
+
This module is the API layer on top of the m39 hook_runs table:
|
|
10
|
+
|
|
11
|
+
record_hook_run(hook_name, ...) -> int (rowid)
|
|
12
|
+
list_recent_hook_runs(hours=24, hook_name='', status='', limit=200)
|
|
13
|
+
hook_health_summary(hours=24) -> dict with success rate per hook
|
|
14
|
+
|
|
15
|
+
It is consumed by:
|
|
16
|
+
- src/scripts/nexo-hook-record.py: a tiny shell-friendly CLI so any
|
|
17
|
+
bash hook can pipe its result back into the database with one line.
|
|
18
|
+
- src/server.py:nexo_hook_runs: an MCP tool so the agent can read the
|
|
19
|
+
hook lifecycle without needing the dashboard.
|
|
20
|
+
|
|
21
|
+
Best-effort throughout: every helper wraps the DB call in try/except so
|
|
22
|
+
the hook itself never fails because observability could not write.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
from __future__ import annotations
|
|
26
|
+
|
|
27
|
+
import json
|
|
28
|
+
import sys
|
|
29
|
+
import time
|
|
30
|
+
from typing import Any
|
|
31
|
+
|
|
32
|
+
from db import get_db
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
_VALID_STATUS = {"ok", "error", "skipped", "timeout", "blocked"}
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _coerce_status(exit_code: int, status: str = "") -> str:
|
|
39
|
+
"""Derive status from exit_code when not explicitly provided."""
|
|
40
|
+
s = (status or "").strip().lower()
|
|
41
|
+
if s in _VALID_STATUS:
|
|
42
|
+
return s
|
|
43
|
+
if exit_code == 0:
|
|
44
|
+
return "ok"
|
|
45
|
+
return "error"
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def record_hook_run(
|
|
49
|
+
hook_name: str,
|
|
50
|
+
*,
|
|
51
|
+
started_at: float | None = None,
|
|
52
|
+
duration_ms: int = 0,
|
|
53
|
+
exit_code: int = 0,
|
|
54
|
+
status: str = "",
|
|
55
|
+
session_id: str = "",
|
|
56
|
+
summary: str = "",
|
|
57
|
+
metadata: dict | None = None,
|
|
58
|
+
) -> int:
|
|
59
|
+
"""Insert a single row into hook_runs and return its id.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
hook_name: The hook identifier (e.g. 'session-start', 'post-compact').
|
|
63
|
+
started_at: Unix epoch when the hook started. Defaults to now.
|
|
64
|
+
duration_ms: Wall-clock duration in milliseconds.
|
|
65
|
+
exit_code: Process exit code (0 = ok). When status is empty, it is
|
|
66
|
+
derived from this value.
|
|
67
|
+
status: One of {ok, error, skipped, timeout, blocked}. Optional.
|
|
68
|
+
session_id: Claude Code session id when known.
|
|
69
|
+
summary: Short human-readable summary (truncated to 500 chars).
|
|
70
|
+
metadata: Extra JSON-serializable payload (truncated to 4 KB serialized).
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
The new hook_runs row id, or 0 if the insert failed.
|
|
74
|
+
|
|
75
|
+
This helper never raises. A failure here must never block the hook.
|
|
76
|
+
"""
|
|
77
|
+
name = (hook_name or "").strip()
|
|
78
|
+
if not name:
|
|
79
|
+
return 0
|
|
80
|
+
if started_at is None:
|
|
81
|
+
started_at = time.time()
|
|
82
|
+
try:
|
|
83
|
+
duration_ms = max(0, int(duration_ms))
|
|
84
|
+
except (TypeError, ValueError):
|
|
85
|
+
duration_ms = 0
|
|
86
|
+
try:
|
|
87
|
+
exit_code = int(exit_code)
|
|
88
|
+
except (TypeError, ValueError):
|
|
89
|
+
exit_code = 0
|
|
90
|
+
final_status = _coerce_status(exit_code, status)
|
|
91
|
+
summary_clean = (summary or "")[:500]
|
|
92
|
+
try:
|
|
93
|
+
metadata_blob = json.dumps(metadata or {}, ensure_ascii=False)
|
|
94
|
+
except Exception:
|
|
95
|
+
metadata_blob = "{}"
|
|
96
|
+
if len(metadata_blob) > 4096:
|
|
97
|
+
metadata_blob = metadata_blob[:4096]
|
|
98
|
+
now_epoch = time.time()
|
|
99
|
+
try:
|
|
100
|
+
conn = get_db()
|
|
101
|
+
cur = conn.execute(
|
|
102
|
+
"INSERT INTO hook_runs (hook_name, started_at, duration_ms, exit_code, "
|
|
103
|
+
"status, session_id, summary, metadata, created_at) "
|
|
104
|
+
"VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
|
|
105
|
+
(
|
|
106
|
+
name[:120],
|
|
107
|
+
float(started_at),
|
|
108
|
+
duration_ms,
|
|
109
|
+
exit_code,
|
|
110
|
+
final_status,
|
|
111
|
+
(session_id or "")[:80],
|
|
112
|
+
summary_clean,
|
|
113
|
+
metadata_blob,
|
|
114
|
+
now_epoch,
|
|
115
|
+
),
|
|
116
|
+
)
|
|
117
|
+
try:
|
|
118
|
+
conn.commit()
|
|
119
|
+
except Exception:
|
|
120
|
+
pass
|
|
121
|
+
return int(cur.lastrowid or 0)
|
|
122
|
+
except Exception:
|
|
123
|
+
return 0
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def list_recent_hook_runs(
|
|
127
|
+
*,
|
|
128
|
+
hours: int = 24,
|
|
129
|
+
hook_name: str = "",
|
|
130
|
+
status: str = "",
|
|
131
|
+
limit: int = 200,
|
|
132
|
+
) -> list[dict]:
|
|
133
|
+
"""Return recent hook_runs filtered by time window, name, and status.
|
|
134
|
+
|
|
135
|
+
Args:
|
|
136
|
+
hours: How far back to look. Default 24h.
|
|
137
|
+
hook_name: Optional substring filter on hook_name (LIKE %name%).
|
|
138
|
+
status: Optional exact match on status field.
|
|
139
|
+
limit: Max rows. Default 200.
|
|
140
|
+
|
|
141
|
+
Returns ordered list (newest first) of dicts. Empty list on any error.
|
|
142
|
+
"""
|
|
143
|
+
try:
|
|
144
|
+
cutoff = time.time() - max(60, int(hours)) * 3600
|
|
145
|
+
except (TypeError, ValueError):
|
|
146
|
+
cutoff = time.time() - 86400
|
|
147
|
+
clauses = ["started_at >= ?"]
|
|
148
|
+
params: list[Any] = [cutoff]
|
|
149
|
+
if hook_name:
|
|
150
|
+
clauses.append("hook_name LIKE ?")
|
|
151
|
+
params.append(f"%{hook_name.strip()}%")
|
|
152
|
+
if status:
|
|
153
|
+
clauses.append("status = ?")
|
|
154
|
+
params.append(status.strip().lower())
|
|
155
|
+
where = " AND ".join(clauses)
|
|
156
|
+
try:
|
|
157
|
+
conn = get_db()
|
|
158
|
+
rows = conn.execute(
|
|
159
|
+
f"SELECT id, hook_name, started_at, duration_ms, exit_code, status, "
|
|
160
|
+
f"session_id, summary, metadata, created_at FROM hook_runs "
|
|
161
|
+
f"WHERE {where} ORDER BY started_at DESC LIMIT ?",
|
|
162
|
+
params + [max(1, int(limit))],
|
|
163
|
+
).fetchall()
|
|
164
|
+
except Exception:
|
|
165
|
+
return []
|
|
166
|
+
result = []
|
|
167
|
+
for row in rows:
|
|
168
|
+
d = dict(row)
|
|
169
|
+
try:
|
|
170
|
+
d["metadata"] = json.loads(d.get("metadata") or "{}")
|
|
171
|
+
except Exception:
|
|
172
|
+
d["metadata"] = {}
|
|
173
|
+
result.append(d)
|
|
174
|
+
return result
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def hook_health_summary(hours: int = 24) -> dict:
|
|
178
|
+
"""Aggregate per-hook health stats over a time window.
|
|
179
|
+
|
|
180
|
+
Returns dict with shape:
|
|
181
|
+
{
|
|
182
|
+
"window_hours": N,
|
|
183
|
+
"total_runs": N,
|
|
184
|
+
"by_hook": [
|
|
185
|
+
{"hook_name": str, "runs": int, "ok": int, "errors": int,
|
|
186
|
+
"p50_duration_ms": int, "p95_duration_ms": int,
|
|
187
|
+
"success_rate": float (0..1), "last_run_at": float},
|
|
188
|
+
...
|
|
189
|
+
],
|
|
190
|
+
"unhealthy_hooks": [hook_name, ...] # success rate < 0.8 with >= 3 runs
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
Used by:
|
|
194
|
+
- nexo_hook_runs MCP tool
|
|
195
|
+
- dashboard widgets
|
|
196
|
+
- the daily self-audit's hook health column
|
|
197
|
+
"""
|
|
198
|
+
try:
|
|
199
|
+
cutoff = time.time() - max(60, int(hours)) * 3600
|
|
200
|
+
except (TypeError, ValueError):
|
|
201
|
+
cutoff = time.time() - 86400
|
|
202
|
+
try:
|
|
203
|
+
conn = get_db()
|
|
204
|
+
rows = conn.execute(
|
|
205
|
+
"SELECT hook_name, status, duration_ms, started_at "
|
|
206
|
+
"FROM hook_runs WHERE started_at >= ? ORDER BY hook_name, started_at",
|
|
207
|
+
(cutoff,),
|
|
208
|
+
).fetchall()
|
|
209
|
+
except Exception:
|
|
210
|
+
return {"window_hours": hours, "total_runs": 0, "by_hook": [], "unhealthy_hooks": []}
|
|
211
|
+
|
|
212
|
+
by_hook: dict[str, dict] = {}
|
|
213
|
+
for row in rows:
|
|
214
|
+
name = row["hook_name"]
|
|
215
|
+
bucket = by_hook.setdefault(
|
|
216
|
+
name,
|
|
217
|
+
{"hook_name": name, "runs": 0, "ok": 0, "errors": 0, "_durations": [], "last_run_at": 0.0},
|
|
218
|
+
)
|
|
219
|
+
bucket["runs"] += 1
|
|
220
|
+
status = row["status"]
|
|
221
|
+
if status == "ok":
|
|
222
|
+
bucket["ok"] += 1
|
|
223
|
+
elif status in {"error", "timeout", "blocked"}:
|
|
224
|
+
bucket["errors"] += 1
|
|
225
|
+
bucket["_durations"].append(int(row["duration_ms"] or 0))
|
|
226
|
+
if row["started_at"] > bucket["last_run_at"]:
|
|
227
|
+
bucket["last_run_at"] = float(row["started_at"])
|
|
228
|
+
|
|
229
|
+
summary_rows = []
|
|
230
|
+
unhealthy = []
|
|
231
|
+
for name, bucket in by_hook.items():
|
|
232
|
+
durations = sorted(bucket.pop("_durations"))
|
|
233
|
+
n = len(durations)
|
|
234
|
+
if n:
|
|
235
|
+
p50 = durations[n // 2]
|
|
236
|
+
p95 = durations[min(n - 1, int(n * 0.95))]
|
|
237
|
+
else:
|
|
238
|
+
p50 = p95 = 0
|
|
239
|
+
success_rate = (bucket["ok"] / bucket["runs"]) if bucket["runs"] else 0.0
|
|
240
|
+
bucket["p50_duration_ms"] = p50
|
|
241
|
+
bucket["p95_duration_ms"] = p95
|
|
242
|
+
bucket["success_rate"] = round(success_rate, 3)
|
|
243
|
+
summary_rows.append(bucket)
|
|
244
|
+
if bucket["runs"] >= 3 and success_rate < 0.8:
|
|
245
|
+
unhealthy.append(name)
|
|
246
|
+
|
|
247
|
+
summary_rows.sort(key=lambda b: b["runs"], reverse=True)
|
|
248
|
+
return {
|
|
249
|
+
"window_hours": hours,
|
|
250
|
+
"total_runs": sum(b["runs"] for b in summary_rows),
|
|
251
|
+
"by_hook": summary_rows,
|
|
252
|
+
"unhealthy_hooks": unhealthy,
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
def main_cli(argv: list[str]) -> int:
|
|
257
|
+
"""Tiny CLI shim so bash hooks can call this module directly.
|
|
258
|
+
|
|
259
|
+
Usage from a hook:
|
|
260
|
+
python3 -m hook_observability record \
|
|
261
|
+
--hook session-start --duration-ms 142 --exit 0 --session abc
|
|
262
|
+
|
|
263
|
+
The first positional verb selects the action (`record` only for now).
|
|
264
|
+
Returns 0 always — the recorder must never break the hook itself.
|
|
265
|
+
"""
|
|
266
|
+
if len(argv) < 1 or argv[0] != "record":
|
|
267
|
+
print("usage: hook_observability record --hook NAME [--duration-ms N] [--exit N] [--session SID] [--summary TEXT]")
|
|
268
|
+
return 0
|
|
269
|
+
args: dict[str, str] = {}
|
|
270
|
+
i = 1
|
|
271
|
+
while i < len(argv):
|
|
272
|
+
token = argv[i]
|
|
273
|
+
if token.startswith("--") and i + 1 < len(argv):
|
|
274
|
+
args[token[2:]] = argv[i + 1]
|
|
275
|
+
i += 2
|
|
276
|
+
else:
|
|
277
|
+
i += 1
|
|
278
|
+
try:
|
|
279
|
+
record_hook_run(
|
|
280
|
+
args.get("hook", ""),
|
|
281
|
+
duration_ms=int(args.get("duration-ms", "0") or 0),
|
|
282
|
+
exit_code=int(args.get("exit", "0") or 0),
|
|
283
|
+
status=args.get("status", ""),
|
|
284
|
+
session_id=args.get("session", ""),
|
|
285
|
+
summary=args.get("summary", ""),
|
|
286
|
+
)
|
|
287
|
+
except Exception:
|
|
288
|
+
pass
|
|
289
|
+
return 0
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
if __name__ == "__main__":
|
|
293
|
+
sys.exit(main_cli(sys.argv[1:]))
|
|
@@ -4,6 +4,33 @@
|
|
|
4
4
|
# Caches output for 1 hour to avoid regenerating on rapid successive sessions.
|
|
5
5
|
set -uo pipefail
|
|
6
6
|
|
|
7
|
+
# Fase 3 item 7: hook lifecycle observability — record duration + exit code
|
|
8
|
+
# in hook_runs on EXIT. Best-effort: a failure here must not break the hook.
|
|
9
|
+
NEXO_HOOK_START_MS=$(python3 -c "import time; print(int(time.time()*1000))" 2>/dev/null || echo 0)
|
|
10
|
+
NEXO_HOOK_NAME="session-start"
|
|
11
|
+
_nexo_record_hook_run() {
|
|
12
|
+
local exit_code=$?
|
|
13
|
+
local duration_ms=0
|
|
14
|
+
if [ "$NEXO_HOOK_START_MS" != "0" ]; then
|
|
15
|
+
local now_ms
|
|
16
|
+
now_ms=$(python3 -c "import time; print(int(time.time()*1000))" 2>/dev/null || echo 0)
|
|
17
|
+
if [ "$now_ms" != "0" ]; then
|
|
18
|
+
duration_ms=$((now_ms - NEXO_HOOK_START_MS))
|
|
19
|
+
fi
|
|
20
|
+
fi
|
|
21
|
+
local recorder
|
|
22
|
+
recorder="${NEXO_CODE:-/Users/franciscoc/Documents/_PhpstormProjects/nexo/src}/scripts/nexo-hook-record.py"
|
|
23
|
+
if [ -f "$recorder" ]; then
|
|
24
|
+
python3 "$recorder" record \
|
|
25
|
+
--hook "$NEXO_HOOK_NAME" \
|
|
26
|
+
--duration-ms "$duration_ms" \
|
|
27
|
+
--exit "$exit_code" \
|
|
28
|
+
--session "${CLAUDE_SID:-}" \
|
|
29
|
+
>/dev/null 2>&1 || true
|
|
30
|
+
fi
|
|
31
|
+
}
|
|
32
|
+
trap _nexo_record_hook_run EXIT
|
|
33
|
+
|
|
7
34
|
NEXO_HOME="${NEXO_HOME:-$HOME/.nexo}"
|
|
8
35
|
BRIEFING_FILE="$NEXO_HOME/coordination/session-briefing.txt"
|
|
9
36
|
MAX_AGE_SECONDS=3600 # 1 hour cache
|
package/src/knowledge_graph.py
CHANGED
|
@@ -255,3 +255,182 @@ def extract_subgraph(center_id: int, depth: int = 2) -> dict:
|
|
|
255
255
|
d3_edges = [{"source": e["source_id"], "target": e["target_id"],
|
|
256
256
|
"relation": e["relation"], "weight": e["weight"]} for e in graph["edges"]]
|
|
257
257
|
return {"nodes": d3_nodes, "edges": d3_edges}
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
# ── Bitemporal export — Fase 5 item 1 ────────────────────────────────────
|
|
261
|
+
#
|
|
262
|
+
# The KG is bi-temporal by design: kg_edges has valid_from and valid_until
|
|
263
|
+
# columns and the upsert_edge / delete_edge helpers maintain them
|
|
264
|
+
# correctly. The audit's "exportable" requirement asked for emitting the
|
|
265
|
+
# graph to standard interchange formats so external tools can ingest it
|
|
266
|
+
# without speaking SQLite. The two helpers below cover the canonical
|
|
267
|
+
# choices: JSON-LD (semantic web, human-readable) and GraphML (igraph,
|
|
268
|
+
# Gephi, NetworkX, Cytoscape).
|
|
269
|
+
#
|
|
270
|
+
# Both helpers respect the bitemporal model: when as_of is None, only
|
|
271
|
+
# active edges (valid_until IS NULL) are emitted. When as_of is a
|
|
272
|
+
# timestamp string, the historical state at that instant is emitted.
|
|
273
|
+
|
|
274
|
+
import json as _json
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
def export_to_jsonld(*, as_of: str = "") -> dict:
|
|
278
|
+
"""Export the active or historical KG to a JSON-LD document.
|
|
279
|
+
|
|
280
|
+
The vocabulary lives under https://nexo-brain.com/kg/v1# so external
|
|
281
|
+
tools can resolve types and relations consistently. Each node becomes
|
|
282
|
+
a top-level @graph entry with @id = nexo:node:<id> and @type =
|
|
283
|
+
nexo:<node_type>. Each edge becomes a relation property on its source
|
|
284
|
+
node, plus a parallel @reverse on the target so the JSON-LD remains
|
|
285
|
+
fully traversable.
|
|
286
|
+
|
|
287
|
+
Args:
|
|
288
|
+
as_of: ISO timestamp. If empty, exports active edges only
|
|
289
|
+
(valid_until IS NULL). If provided, exports the snapshot
|
|
290
|
+
that was valid at that instant via temporal range query.
|
|
291
|
+
|
|
292
|
+
Returns a JSON-LD-shaped dict ready for json.dumps().
|
|
293
|
+
"""
|
|
294
|
+
db = _get_db()
|
|
295
|
+
# kg_nodes is NOT bitemporal — only kg_edges has valid_from/valid_until.
|
|
296
|
+
# The audit's "bitemporal" requirement is satisfied at the edge level
|
|
297
|
+
# because nodes are stable identities while edges encode the temporal
|
|
298
|
+
# facts (relationships valid during a time window).
|
|
299
|
+
nodes = [dict(row) for row in db.execute(
|
|
300
|
+
"SELECT id, node_type, node_ref, label, properties FROM kg_nodes"
|
|
301
|
+
).fetchall()]
|
|
302
|
+
|
|
303
|
+
if as_of and as_of.strip():
|
|
304
|
+
edge_rows = db.execute(
|
|
305
|
+
"SELECT id, source_id, target_id, relation, weight, confidence, "
|
|
306
|
+
"valid_from, valid_until, properties FROM kg_edges "
|
|
307
|
+
"WHERE valid_from <= ? AND (valid_until IS NULL OR valid_until > ?)",
|
|
308
|
+
(as_of, as_of),
|
|
309
|
+
).fetchall()
|
|
310
|
+
else:
|
|
311
|
+
edge_rows = db.execute(
|
|
312
|
+
"SELECT id, source_id, target_id, relation, weight, confidence, "
|
|
313
|
+
"valid_from, valid_until, properties FROM kg_edges WHERE valid_until IS NULL"
|
|
314
|
+
).fetchall()
|
|
315
|
+
edges = [dict(row) for row in edge_rows]
|
|
316
|
+
|
|
317
|
+
nodes_by_id: dict[int, dict] = {}
|
|
318
|
+
for n in nodes:
|
|
319
|
+
try:
|
|
320
|
+
props = _json.loads(n.get("properties") or "{}")
|
|
321
|
+
except Exception:
|
|
322
|
+
props = {}
|
|
323
|
+
nodes_by_id[n["id"]] = {
|
|
324
|
+
"@id": f"nexo:node:{n['id']}",
|
|
325
|
+
"@type": f"nexo:{n['node_type']}",
|
|
326
|
+
"label": n.get("label") or "",
|
|
327
|
+
"node_ref": n.get("node_ref") or "",
|
|
328
|
+
"properties": props,
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
for e in edges:
|
|
332
|
+
src_id = e["source_id"]
|
|
333
|
+
tgt_id = e["target_id"]
|
|
334
|
+
if src_id not in nodes_by_id or tgt_id not in nodes_by_id:
|
|
335
|
+
continue # orphan edge — skip
|
|
336
|
+
relation_key = f"nexo:{e['relation']}"
|
|
337
|
+
edge_payload = {
|
|
338
|
+
"@id": f"nexo:edge:{e['id']}",
|
|
339
|
+
"target": f"nexo:node:{tgt_id}",
|
|
340
|
+
"weight": float(e.get("weight") or 0.0),
|
|
341
|
+
"confidence": float(e.get("confidence") or 0.0),
|
|
342
|
+
"valid_from": e.get("valid_from"),
|
|
343
|
+
"valid_until": e.get("valid_until"),
|
|
344
|
+
}
|
|
345
|
+
nodes_by_id[src_id].setdefault(relation_key, []).append(edge_payload)
|
|
346
|
+
|
|
347
|
+
snapshot_label = as_of.strip() if as_of and as_of.strip() else "active"
|
|
348
|
+
return {
|
|
349
|
+
"@context": {
|
|
350
|
+
"nexo": "https://nexo-brain.com/kg/v1#",
|
|
351
|
+
"label": "https://nexo-brain.com/kg/v1#label",
|
|
352
|
+
"node_ref": "https://nexo-brain.com/kg/v1#node_ref",
|
|
353
|
+
"weight": "https://nexo-brain.com/kg/v1#weight",
|
|
354
|
+
"confidence": "https://nexo-brain.com/kg/v1#confidence",
|
|
355
|
+
"valid_from": "https://nexo-brain.com/kg/v1#valid_from",
|
|
356
|
+
"valid_until": "https://nexo-brain.com/kg/v1#valid_until",
|
|
357
|
+
"properties": "https://nexo-brain.com/kg/v1#properties",
|
|
358
|
+
},
|
|
359
|
+
"@type": "nexo:KnowledgeGraphSnapshot",
|
|
360
|
+
"snapshot": snapshot_label,
|
|
361
|
+
"node_count": len(nodes_by_id),
|
|
362
|
+
"edge_count": len(edges),
|
|
363
|
+
"@graph": list(nodes_by_id.values()),
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
|
|
367
|
+
def export_to_graphml(*, as_of: str = "") -> str:
|
|
368
|
+
"""Export the active or historical KG to a GraphML XML string.
|
|
369
|
+
|
|
370
|
+
GraphML is the canonical interchange for igraph, Gephi, NetworkX, and
|
|
371
|
+
Cytoscape. Bitemporal columns are emitted as edge data attributes so
|
|
372
|
+
importers that support them (Gephi temporal layouts, NetworkX
|
|
373
|
+
DiGraph) can render the historical view.
|
|
374
|
+
|
|
375
|
+
Args:
|
|
376
|
+
as_of: ISO timestamp. Same semantics as export_to_jsonld.
|
|
377
|
+
|
|
378
|
+
Returns a string with a valid GraphML 1.1 document.
|
|
379
|
+
"""
|
|
380
|
+
db = _get_db()
|
|
381
|
+
nodes = [dict(row) for row in db.execute(
|
|
382
|
+
"SELECT id, node_type, node_ref, label FROM kg_nodes"
|
|
383
|
+
).fetchall()]
|
|
384
|
+
if as_of and as_of.strip():
|
|
385
|
+
edge_rows = db.execute(
|
|
386
|
+
"SELECT id, source_id, target_id, relation, weight, valid_from, valid_until FROM kg_edges "
|
|
387
|
+
"WHERE valid_from <= ? AND (valid_until IS NULL OR valid_until > ?)",
|
|
388
|
+
(as_of, as_of),
|
|
389
|
+
).fetchall()
|
|
390
|
+
else:
|
|
391
|
+
edge_rows = db.execute(
|
|
392
|
+
"SELECT id, source_id, target_id, relation, weight, valid_from, valid_until FROM kg_edges "
|
|
393
|
+
"WHERE valid_until IS NULL"
|
|
394
|
+
).fetchall()
|
|
395
|
+
|
|
396
|
+
def _xml_escape(value: object) -> str:
|
|
397
|
+
text = "" if value is None else str(value)
|
|
398
|
+
return (
|
|
399
|
+
text.replace("&", "&")
|
|
400
|
+
.replace("<", "<")
|
|
401
|
+
.replace(">", ">")
|
|
402
|
+
.replace('"', """)
|
|
403
|
+
.replace("'", "'")
|
|
404
|
+
)
|
|
405
|
+
|
|
406
|
+
out: list[str] = []
|
|
407
|
+
out.append('<?xml version="1.0" encoding="UTF-8"?>')
|
|
408
|
+
out.append('<graphml xmlns="http://graphml.graphdrawing.org/xmlns">')
|
|
409
|
+
out.append(' <key id="label" for="node" attr.name="label" attr.type="string"/>')
|
|
410
|
+
out.append(' <key id="node_type" for="node" attr.name="node_type" attr.type="string"/>')
|
|
411
|
+
out.append(' <key id="node_ref" for="node" attr.name="node_ref" attr.type="string"/>')
|
|
412
|
+
out.append(' <key id="relation" for="edge" attr.name="relation" attr.type="string"/>')
|
|
413
|
+
out.append(' <key id="weight" for="edge" attr.name="weight" attr.type="double"/>')
|
|
414
|
+
out.append(' <key id="valid_from" for="edge" attr.name="valid_from" attr.type="string"/>')
|
|
415
|
+
out.append(' <key id="valid_until" for="edge" attr.name="valid_until" attr.type="string"/>')
|
|
416
|
+
snapshot_label = as_of.strip() if as_of and as_of.strip() else "active"
|
|
417
|
+
out.append(f' <graph id="nexo_kg_{_xml_escape(snapshot_label)}" edgedefault="directed">')
|
|
418
|
+
for n in nodes:
|
|
419
|
+
out.append(f' <node id="n{n["id"]}">')
|
|
420
|
+
out.append(f' <data key="label">{_xml_escape(n.get("label"))}</data>')
|
|
421
|
+
out.append(f' <data key="node_type">{_xml_escape(n.get("node_type"))}</data>')
|
|
422
|
+
out.append(f' <data key="node_ref">{_xml_escape(n.get("node_ref"))}</data>')
|
|
423
|
+
out.append(' </node>')
|
|
424
|
+
for e in edge_rows:
|
|
425
|
+
out.append(
|
|
426
|
+
f' <edge id="e{e["id"]}" source="n{e["source_id"]}" target="n{e["target_id"]}">'
|
|
427
|
+
)
|
|
428
|
+
out.append(f' <data key="relation">{_xml_escape(e["relation"])}</data>')
|
|
429
|
+
out.append(f' <data key="weight">{float(e["weight"] or 0.0)}</data>')
|
|
430
|
+
out.append(f' <data key="valid_from">{_xml_escape(e["valid_from"])}</data>')
|
|
431
|
+
if e["valid_until"]:
|
|
432
|
+
out.append(f' <data key="valid_until">{_xml_escape(e["valid_until"])}</data>')
|
|
433
|
+
out.append(' </edge>')
|
|
434
|
+
out.append(' </graph>')
|
|
435
|
+
out.append('</graphml>')
|
|
436
|
+
return "\n".join(out)
|