nexo-brain 2.3.0 → 2.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (299) hide show
  1. package/README.md +1 -1
  2. package/bin/nexo-brain.js +92 -9
  3. package/bin/postinstall.js +22 -15
  4. package/package.json +7 -4
  5. package/src/auto_update.py +194 -5
  6. package/src/crons/sync.py +6 -2
  7. package/src/db/_core.py +1 -0
  8. package/src/db/_entities.py +1 -0
  9. package/src/db/_episodic.py +1 -0
  10. package/src/db/_learnings.py +1 -0
  11. package/src/db/_reminders.py +1 -0
  12. package/src/db/_schema.py +11 -1
  13. package/src/db/_sessions.py +1 -0
  14. package/src/db/_skills.py +1 -0
  15. package/src/hooks/capture-tool-logs.sh +23 -6
  16. package/src/hooks/session-start.sh +4 -3
  17. package/src/plugin_loader.py +1 -0
  18. package/src/plugins/update.py +377 -26
  19. package/src/scripts/deep-sleep/apply_findings.py +1 -0
  20. package/src/scripts/deep-sleep/collect.py +1 -0
  21. package/src/scripts/deep-sleep/extract.py +1 -0
  22. package/src/scripts/deep-sleep/synthesize.py +1 -0
  23. package/src/scripts/nexo-catchup.py +29 -4
  24. package/src/scripts/nexo-daily-self-audit.py +21 -1
  25. package/src/scripts/nexo-evolution-run.py +21 -1
  26. package/src/scripts/nexo-learning-housekeep.py +1 -0
  27. package/src/scripts/nexo-postmortem-consolidator.py +34 -9
  28. package/src/scripts/nexo-sleep.py +32 -10
  29. package/src/scripts/nexo-synthesis.py +29 -9
  30. package/src/scripts/nexo-update.sh +109 -7
  31. package/src/scripts/nexo-watchdog.sh +122 -58
  32. package/src/server.py +66 -1
  33. package/src/tools_coordination.py +1 -0
  34. package/src/tools_sessions.py +1 -0
  35. package/scripts/migrate-to-unified 2.sh +0 -813
  36. package/scripts/migrate-to-unified.sh +0 -813
  37. package/scripts/migrate-v1.5-to-v1.6 2.py +0 -778
  38. package/scripts/migrate-v1.5-to-v1.6.py +0 -778
  39. package/scripts/migrate-v1.7-to-v1.8 2.py +0 -214
  40. package/scripts/migrate-v1.7-to-v1.8.py +0 -214
  41. package/scripts/nexo-preflight.sh +0 -236
  42. package/scripts/pre-commit-check 2.sh +0 -55
  43. package/scripts/pre-commit-check.sh +0 -55
  44. package/src/__pycache__/auto_close_sessions.cpython-314.pyc +0 -0
  45. package/src/__pycache__/auto_update.cpython-310.pyc +0 -0
  46. package/src/__pycache__/hnsw_index.cpython-310.pyc +0 -0
  47. package/src/__pycache__/hnsw_index.cpython-314.pyc +0 -0
  48. package/src/__pycache__/kg_populate.cpython-310.pyc +0 -0
  49. package/src/__pycache__/knowledge_graph.cpython-310.pyc +0 -0
  50. package/src/__pycache__/plugin_loader.cpython-310.pyc +0 -0
  51. package/src/__pycache__/plugin_loader.cpython-314.pyc +0 -0
  52. package/src/__pycache__/tools_coordination.cpython-310.pyc +0 -0
  53. package/src/__pycache__/tools_credentials.cpython-310.pyc +0 -0
  54. package/src/__pycache__/tools_learnings.cpython-310.pyc +0 -0
  55. package/src/__pycache__/tools_menu.cpython-310.pyc +0 -0
  56. package/src/__pycache__/tools_reminders.cpython-310.pyc +0 -0
  57. package/src/__pycache__/tools_reminders_crud.cpython-310.pyc +0 -0
  58. package/src/__pycache__/tools_sessions.cpython-310.pyc +0 -0
  59. package/src/__pycache__/tools_task_history.cpython-310.pyc +0 -0
  60. package/src/auto_close_sessions 2.py +0 -159
  61. package/src/auto_update 2.py +0 -634
  62. package/src/claim_graph 2.py +0 -323
  63. package/src/cognitive/__init__ 2.py +0 -62
  64. package/src/cognitive/__pycache__/__init__.cpython-310.pyc +0 -0
  65. package/src/cognitive/__pycache__/__init__.cpython-312.pyc +0 -0
  66. package/src/cognitive/__pycache__/__init__.cpython-314.pyc +0 -0
  67. package/src/cognitive/__pycache__/_core.cpython-310.pyc +0 -0
  68. package/src/cognitive/__pycache__/_core.cpython-312.pyc +0 -0
  69. package/src/cognitive/__pycache__/_core.cpython-314.pyc +0 -0
  70. package/src/cognitive/__pycache__/_decay.cpython-310.pyc +0 -0
  71. package/src/cognitive/__pycache__/_decay.cpython-312.pyc +0 -0
  72. package/src/cognitive/__pycache__/_decay.cpython-314.pyc +0 -0
  73. package/src/cognitive/__pycache__/_ingest.cpython-310.pyc +0 -0
  74. package/src/cognitive/__pycache__/_ingest.cpython-312.pyc +0 -0
  75. package/src/cognitive/__pycache__/_ingest.cpython-314.pyc +0 -0
  76. package/src/cognitive/__pycache__/_memory.cpython-310.pyc +0 -0
  77. package/src/cognitive/__pycache__/_memory.cpython-312.pyc +0 -0
  78. package/src/cognitive/__pycache__/_memory.cpython-314.pyc +0 -0
  79. package/src/cognitive/__pycache__/_search.cpython-310.pyc +0 -0
  80. package/src/cognitive/__pycache__/_search.cpython-312.pyc +0 -0
  81. package/src/cognitive/__pycache__/_search.cpython-314.pyc +0 -0
  82. package/src/cognitive/__pycache__/_trust.cpython-310.pyc +0 -0
  83. package/src/cognitive/__pycache__/_trust.cpython-312.pyc +0 -0
  84. package/src/cognitive/__pycache__/_trust.cpython-314.pyc +0 -0
  85. package/src/cognitive/_core 2.py +0 -567
  86. package/src/cognitive/_decay 2.py +0 -382
  87. package/src/cognitive/_ingest 2.py +0 -892
  88. package/src/cognitive/_memory 2.py +0 -912
  89. package/src/cognitive/_search 2.py +0 -949
  90. package/src/cognitive/_trust 2.py +0 -464
  91. package/src/crons/__pycache__/sync.cpython-314.pyc +0 -0
  92. package/src/crons/manifest 2.json +0 -106
  93. package/src/crons/sync 2.py +0 -217
  94. package/src/dashboard/__init__ 2.py +0 -0
  95. package/src/dashboard/__pycache__/__init__.cpython-310.pyc +0 -0
  96. package/src/dashboard/__pycache__/app.cpython-310.pyc +0 -0
  97. package/src/dashboard/app 2.py +0 -789
  98. package/src/db/__init__ 2.py +0 -89
  99. package/src/db/__pycache__/__init__.cpython-310.pyc +0 -0
  100. package/src/db/__pycache__/__init__.cpython-312.pyc +0 -0
  101. package/src/db/__pycache__/__init__.cpython-314.pyc +0 -0
  102. package/src/db/__pycache__/_core.cpython-310.pyc +0 -0
  103. package/src/db/__pycache__/_core.cpython-312.pyc +0 -0
  104. package/src/db/__pycache__/_core.cpython-314.pyc +0 -0
  105. package/src/db/__pycache__/_credentials.cpython-310.pyc +0 -0
  106. package/src/db/__pycache__/_credentials.cpython-312.pyc +0 -0
  107. package/src/db/__pycache__/_credentials.cpython-314.pyc +0 -0
  108. package/src/db/__pycache__/_cron_runs.cpython-310.pyc +0 -0
  109. package/src/db/__pycache__/_cron_runs.cpython-314.pyc +0 -0
  110. package/src/db/__pycache__/_entities.cpython-310.pyc +0 -0
  111. package/src/db/__pycache__/_entities.cpython-312.pyc +0 -0
  112. package/src/db/__pycache__/_entities.cpython-314.pyc +0 -0
  113. package/src/db/__pycache__/_episodic.cpython-310.pyc +0 -0
  114. package/src/db/__pycache__/_episodic.cpython-312.pyc +0 -0
  115. package/src/db/__pycache__/_episodic.cpython-314.pyc +0 -0
  116. package/src/db/__pycache__/_evolution.cpython-310.pyc +0 -0
  117. package/src/db/__pycache__/_evolution.cpython-312.pyc +0 -0
  118. package/src/db/__pycache__/_evolution.cpython-314.pyc +0 -0
  119. package/src/db/__pycache__/_fts.cpython-310.pyc +0 -0
  120. package/src/db/__pycache__/_fts.cpython-312.pyc +0 -0
  121. package/src/db/__pycache__/_fts.cpython-314.pyc +0 -0
  122. package/src/db/__pycache__/_learnings.cpython-310.pyc +0 -0
  123. package/src/db/__pycache__/_learnings.cpython-312.pyc +0 -0
  124. package/src/db/__pycache__/_learnings.cpython-314.pyc +0 -0
  125. package/src/db/__pycache__/_reminders.cpython-310.pyc +0 -0
  126. package/src/db/__pycache__/_reminders.cpython-312.pyc +0 -0
  127. package/src/db/__pycache__/_reminders.cpython-314.pyc +0 -0
  128. package/src/db/__pycache__/_schema.cpython-310.pyc +0 -0
  129. package/src/db/__pycache__/_schema.cpython-312.pyc +0 -0
  130. package/src/db/__pycache__/_schema.cpython-314.pyc +0 -0
  131. package/src/db/__pycache__/_sessions.cpython-310.pyc +0 -0
  132. package/src/db/__pycache__/_sessions.cpython-312.pyc +0 -0
  133. package/src/db/__pycache__/_sessions.cpython-314.pyc +0 -0
  134. package/src/db/__pycache__/_skills.cpython-310.pyc +0 -0
  135. package/src/db/__pycache__/_skills.cpython-312.pyc +0 -0
  136. package/src/db/__pycache__/_skills.cpython-314.pyc +0 -0
  137. package/src/db/__pycache__/_tasks.cpython-310.pyc +0 -0
  138. package/src/db/__pycache__/_tasks.cpython-312.pyc +0 -0
  139. package/src/db/__pycache__/_tasks.cpython-314.pyc +0 -0
  140. package/src/db/_core 2.py +0 -417
  141. package/src/db/_credentials 2.py +0 -124
  142. package/src/db/_entities 2.py +0 -178
  143. package/src/db/_episodic 2.py +0 -738
  144. package/src/db/_evolution 2.py +0 -54
  145. package/src/db/_fts 2.py +0 -406
  146. package/src/db/_learnings 2.py +0 -168
  147. package/src/db/_reminders 2.py +0 -338
  148. package/src/db/_schema 2.py +0 -364
  149. package/src/db/_sessions 2.py +0 -300
  150. package/src/db/_tasks 2.py +0 -91
  151. package/src/evolution_cycle 2.py +0 -266
  152. package/src/hnsw_index 2.py +0 -254
  153. package/src/hooks/auto_capture 2.py +0 -208
  154. package/src/hooks/caffeinate-guard 2.sh +0 -8
  155. package/src/hooks/capture-session 2.sh +0 -21
  156. package/src/hooks/capture-tool-logs 2.sh +0 -127
  157. package/src/hooks/daily-briefing-check 2.sh +0 -33
  158. package/src/hooks/inbox-hook 2.sh +0 -76
  159. package/src/hooks/post-compact 2.sh +0 -148
  160. package/src/hooks/pre-compact 2.sh +0 -151
  161. package/src/hooks/session-start 2.sh +0 -268
  162. package/src/hooks/session-stop 2.sh +0 -140
  163. package/src/kg_populate 2.py +0 -290
  164. package/src/knowledge_graph 2.py +0 -257
  165. package/src/maintenance 2.py +0 -59
  166. package/src/migrate_embeddings 2.py +0 -122
  167. package/src/plugin_loader 2.py +0 -202
  168. package/src/plugins/__init__ 2.py +0 -0
  169. package/src/plugins/__pycache__/__init__ 2.cpython-310.pyc +0 -0
  170. package/src/plugins/__pycache__/__init__.cpython-310.pyc +0 -0
  171. package/src/plugins/__pycache__/__init__.cpython-314.pyc +0 -0
  172. package/src/plugins/__pycache__/adaptive_mode 2.cpython-310.pyc +0 -0
  173. package/src/plugins/__pycache__/adaptive_mode.cpython-310.pyc +0 -0
  174. package/src/plugins/__pycache__/adaptive_mode.cpython-314.pyc +0 -0
  175. package/src/plugins/__pycache__/agents 2.cpython-310.pyc +0 -0
  176. package/src/plugins/__pycache__/agents.cpython-310.pyc +0 -0
  177. package/src/plugins/__pycache__/artifact_registry 2.cpython-310.pyc +0 -0
  178. package/src/plugins/__pycache__/artifact_registry.cpython-310.pyc +0 -0
  179. package/src/plugins/__pycache__/backup 2.cpython-310.pyc +0 -0
  180. package/src/plugins/__pycache__/backup.cpython-310.pyc +0 -0
  181. package/src/plugins/__pycache__/cognitive_memory 2.cpython-310.pyc +0 -0
  182. package/src/plugins/__pycache__/cognitive_memory.cpython-310.pyc +0 -0
  183. package/src/plugins/__pycache__/core_rules 2.cpython-310.pyc +0 -0
  184. package/src/plugins/__pycache__/core_rules.cpython-310.pyc +0 -0
  185. package/src/plugins/__pycache__/cortex 2.cpython-310.pyc +0 -0
  186. package/src/plugins/__pycache__/cortex.cpython-310.pyc +0 -0
  187. package/src/plugins/__pycache__/entities 2.cpython-310.pyc +0 -0
  188. package/src/plugins/__pycache__/entities.cpython-310.pyc +0 -0
  189. package/src/plugins/__pycache__/episodic_memory 2.cpython-310.pyc +0 -0
  190. package/src/plugins/__pycache__/episodic_memory.cpython-310.pyc +0 -0
  191. package/src/plugins/__pycache__/evolution 2.cpython-310.pyc +0 -0
  192. package/src/plugins/__pycache__/evolution.cpython-310.pyc +0 -0
  193. package/src/plugins/__pycache__/guard 2.cpython-310.pyc +0 -0
  194. package/src/plugins/__pycache__/guard.cpython-310.pyc +0 -0
  195. package/src/plugins/__pycache__/knowledge_graph_tools 2.cpython-310.pyc +0 -0
  196. package/src/plugins/__pycache__/knowledge_graph_tools.cpython-310.pyc +0 -0
  197. package/src/plugins/__pycache__/preferences 2.cpython-310.pyc +0 -0
  198. package/src/plugins/__pycache__/preferences.cpython-310.pyc +0 -0
  199. package/src/plugins/__pycache__/schedule.cpython-310.pyc +0 -0
  200. package/src/plugins/__pycache__/schedule.cpython-314.pyc +0 -0
  201. package/src/plugins/__pycache__/skills.cpython-310.pyc +0 -0
  202. package/src/plugins/__pycache__/skills.cpython-314.pyc +0 -0
  203. package/src/plugins/__pycache__/update 2.cpython-310.pyc +0 -0
  204. package/src/plugins/__pycache__/update.cpython-310.pyc +0 -0
  205. package/src/plugins/adaptive_mode 2.py +0 -805
  206. package/src/plugins/agents 2.py +0 -52
  207. package/src/plugins/artifact_registry 2.py +0 -450
  208. package/src/plugins/backup 2.py +0 -104
  209. package/src/plugins/cognitive_memory 2.py +0 -564
  210. package/src/plugins/core_rules 2.py +0 -252
  211. package/src/plugins/cortex 2.py +0 -299
  212. package/src/plugins/entities 2.py +0 -67
  213. package/src/plugins/episodic_memory 2.py +0 -533
  214. package/src/plugins/evolution 2.py +0 -115
  215. package/src/plugins/guard 2.py +0 -746
  216. package/src/plugins/knowledge_graph_tools 2.py +0 -105
  217. package/src/plugins/preferences 2.py +0 -47
  218. package/src/plugins/update 2.py +0 -256
  219. package/src/requirements 2.txt +0 -12
  220. package/src/rules/__init__ 2.py +0 -0
  221. package/src/rules/core-rules 2.json +0 -331
  222. package/src/rules/migrate 2.py +0 -207
  223. package/src/scripts/__pycache__/nexo-auto-update.cpython-314.pyc +0 -0
  224. package/src/scripts/__pycache__/nexo-catchup.cpython-314.pyc +0 -0
  225. package/src/scripts/__pycache__/nexo-cognitive-decay.cpython-314.pyc +0 -0
  226. package/src/scripts/__pycache__/nexo-daily-self-audit.cpython-314.pyc +0 -0
  227. package/src/scripts/__pycache__/nexo-evolution-run.cpython-314.pyc +0 -0
  228. package/src/scripts/__pycache__/nexo-followup-hygiene.cpython-314.pyc +0 -0
  229. package/src/scripts/__pycache__/nexo-immune.cpython-314.pyc +0 -0
  230. package/src/scripts/__pycache__/nexo-install.cpython-314.pyc +0 -0
  231. package/src/scripts/__pycache__/nexo-learning-housekeep.cpython-314.pyc +0 -0
  232. package/src/scripts/__pycache__/nexo-learning-validator.cpython-314.pyc +0 -0
  233. package/src/scripts/__pycache__/nexo-migrate.cpython-314.pyc +0 -0
  234. package/src/scripts/__pycache__/nexo-postmortem-consolidator.cpython-314.pyc +0 -0
  235. package/src/scripts/__pycache__/nexo-pre-commit.cpython-314.pyc +0 -0
  236. package/src/scripts/__pycache__/nexo-proactive-dashboard.cpython-314.pyc +0 -0
  237. package/src/scripts/__pycache__/nexo-reflection.cpython-314.pyc +0 -0
  238. package/src/scripts/__pycache__/nexo-runtime-preflight.cpython-314.pyc +0 -0
  239. package/src/scripts/__pycache__/nexo-send-email.cpython-314.pyc +0 -0
  240. package/src/scripts/__pycache__/nexo-send-reply.cpython-314.pyc +0 -0
  241. package/src/scripts/__pycache__/nexo-sleep.cpython-314.pyc +0 -0
  242. package/src/scripts/__pycache__/nexo-synthesis.cpython-314.pyc +0 -0
  243. package/src/scripts/__pycache__/nexo-watchdog-smoke.cpython-314.pyc +0 -0
  244. package/src/scripts/check-context 2.py +0 -264
  245. package/src/scripts/nexo-auto-update 2.py +0 -6
  246. package/src/scripts/nexo-backup 2.sh +0 -25
  247. package/src/scripts/nexo-brain-activation 2.sh +0 -140
  248. package/src/scripts/nexo-catchup 2.py +0 -242
  249. package/src/scripts/nexo-cognitive-decay 2.py +0 -182
  250. package/src/scripts/nexo-daily-self-audit 2.py +0 -552
  251. package/src/scripts/nexo-deep-sleep 2.sh +0 -97
  252. package/src/scripts/nexo-evolution-run 2.py +0 -597
  253. package/src/scripts/nexo-followup-hygiene 2.py +0 -112
  254. package/src/scripts/nexo-github-monitor 2.py +0 -256
  255. package/src/scripts/nexo-immune 2.py +0 -927
  256. package/src/scripts/nexo-inbox-hook 2.sh +0 -74
  257. package/src/scripts/nexo-install 2.py +0 -6
  258. package/src/scripts/nexo-learning-housekeep 2.py +0 -245
  259. package/src/scripts/nexo-learning-validator 2.py +0 -207
  260. package/src/scripts/nexo-migrate 2.py +0 -232
  261. package/src/scripts/nexo-postmortem-consolidator 2.py +0 -421
  262. package/src/scripts/nexo-pre-commit 2.py +0 -120
  263. package/src/scripts/nexo-prevent-sleep 2.sh +0 -29
  264. package/src/scripts/nexo-proactive-dashboard 2.py +0 -345
  265. package/src/scripts/nexo-reflection 2.py +0 -253
  266. package/src/scripts/nexo-runtime-preflight 2.py +0 -274
  267. package/src/scripts/nexo-send-email 2.py +0 -25
  268. package/src/scripts/nexo-send-email.py +0 -25
  269. package/src/scripts/nexo-send-reply 2.py +0 -178
  270. package/src/scripts/nexo-send-reply.py +0 -178
  271. package/src/scripts/nexo-sleep 2.py +0 -592
  272. package/src/scripts/nexo-snapshot-restore 2.sh +0 -35
  273. package/src/scripts/nexo-synthesis 2.py +0 -253
  274. package/src/scripts/nexo-tcc-approve 2.sh +0 -79
  275. package/src/scripts/nexo-update 2.sh +0 -161
  276. package/src/scripts/nexo-watchdog 2.sh +0 -878
  277. package/src/scripts/nexo-watchdog-smoke 2.py +0 -119
  278. package/src/server 2.py +0 -733
  279. package/src/storage_router 2.py +0 -32
  280. package/src/tools_coordination 2.py +0 -102
  281. package/src/tools_credentials 2.py +0 -68
  282. package/src/tools_learnings 2.py +0 -220
  283. package/src/tools_menu 2.py +0 -227
  284. package/src/tools_reminders 2.py +0 -86
  285. package/src/tools_reminders_crud 2.py +0 -159
  286. package/src/tools_sessions 2.py +0 -476
  287. package/src/tools_task_history 2.py +0 -57
  288. package/templates/CLAUDE.md 2.template +0 -63
  289. package/templates/openclaw 2.json +0 -13
  290. package/tests/__init__ 2.py +0 -0
  291. package/tests/__init__.py +0 -0
  292. package/tests/conftest 2.py +0 -71
  293. package/tests/conftest.py +0 -71
  294. package/tests/test_cognitive 2.py +0 -205
  295. package/tests/test_cognitive.py +0 -205
  296. package/tests/test_knowledge_graph 2.py +0 -140
  297. package/tests/test_knowledge_graph.py +0 -140
  298. package/tests/test_migrations 2.py +0 -137
  299. package/tests/test_migrations.py +0 -137
@@ -1,912 +0,0 @@
1
- """NEXO Cognitive — Memory operations: format, stats, consolidation, somatic."""
2
- import json, math, re
3
- import numpy as np
4
- from datetime import datetime, timedelta
5
- from cognitive._core import _get_db, embed, cosine_similarity, _blob_to_array, _array_to_blob, EMBEDDING_DIM, DISCRIMINATING_ENTITIES
6
- from cognitive._ingest import _sanitize_memory_content
7
-
8
-
9
- def _quarantine_stats():
10
- from cognitive._ingest import quarantine_stats
11
- return quarantine_stats()
12
-
13
-
14
- def _get_gate_stats():
15
- from cognitive._ingest import get_gate_stats
16
- return get_gate_stats()
17
-
18
- def format_results(results: list[dict]) -> str:
19
- """Format search results with enriched context."""
20
- if not results:
21
- return "No results found."
22
-
23
- lines = []
24
- for r in results:
25
- score = r["score"]
26
- stype = r["source_type"].upper()
27
- domain = r.get("domain", "")
28
- title = r.get("source_title", "")
29
- content = _sanitize_memory_content(r["content"])
30
-
31
- # Header
32
- domain_str = f" ({domain})" if domain else ""
33
- title_str = f': "{title}"' if title else ""
34
- header = f"[{score:.2f}] {stype}{domain_str}{title_str}"
35
-
36
- # Content preview (300 chars)
37
- preview = content[:300]
38
- if len(content) > 300:
39
- preview += "..."
40
-
41
- # Proto-procedural: detect sequential markers in change logs
42
- if r["source_type"] == "change" and any(m in content for m in ["1.", "2.", "3.", "step ", "Step ", "then ", "first ", "First "]):
43
- header += " [PROCEDURE]"
44
-
45
- store_tag = r["store"].upper()
46
- reactivated = " [REACTIVATED]" if r.get("reactivated") else ""
47
- explanation = r.get("explanation", "")
48
- explain_line = f"\n ⚙ {explanation}" if explanation else ""
49
- lines.append(f"{header} [{store_tag}]{reactivated}\n {preview}{explain_line}")
50
-
51
- # Sibling mention: if this LTM memory has siblings, note them
52
- if r["store"] == "ltm":
53
- try:
54
- siblings = get_siblings(r["id"])
55
- if siblings:
56
- for sib in siblings[:2]:
57
- disc_str = ", ".join(sib["discriminators"].split(",")[:3])
58
- lines.append(f" ↳ SIBLING #{sib['sibling_id']} ({sib['domain']}): differs in [{disc_str}] — {sib['content'][:80]}...")
59
- except Exception:
60
- pass
61
-
62
- return "\n\n".join(lines)
63
-
64
-
65
- def get_metrics(days: int = 7) -> dict:
66
- """Calculate spec section 9 metrics over the last N days.
67
-
68
- Returns:
69
- retrieval_relevance: % of retrievals with top_score >= 0.6
70
- repeat_error_rate: % of new learnings that duplicate existing LTM (cosine > 0.8)
71
- avg_top_score: average best match score across all retrievals
72
- total_retrievals: number of retrievals in period
73
- retrievals_per_day: average retrievals per day
74
- score_distribution: histogram buckets [<0.5, 0.5-0.6, 0.6-0.7, 0.7-0.8, >0.8]
75
- """
76
- db = _get_db()
77
- cutoff = (datetime.utcnow() - timedelta(days=days)).isoformat()
78
-
79
- rows = db.execute(
80
- "SELECT top_score FROM retrieval_log WHERE created_at >= ?", (cutoff,)
81
- ).fetchall()
82
-
83
- total = len(rows)
84
- if total == 0:
85
- return {
86
- "period_days": days,
87
- "total_retrievals": 0,
88
- "retrieval_relevance_pct": 0.0,
89
- "avg_top_score": 0.0,
90
- "retrievals_per_day": 0.0,
91
- "score_distribution": {"below_50": 0, "50_60": 0, "60_70": 0, "70_80": 0, "above_80": 0},
92
- "needs_multilingual": False,
93
- }
94
-
95
- scores = [r[0] for r in rows]
96
- relevant = sum(1 for s in scores if s >= 0.6)
97
- relevance_pct = round(relevant / total * 100, 1)
98
- avg_score = round(sum(scores) / total, 3)
99
-
100
- dist = {"below_50": 0, "50_60": 0, "60_70": 0, "70_80": 0, "above_80": 0}
101
- for s in scores:
102
- if s < 0.5:
103
- dist["below_50"] += 1
104
- elif s < 0.6:
105
- dist["50_60"] += 1
106
- elif s < 0.7:
107
- dist["60_70"] += 1
108
- elif s < 0.8:
109
- dist["70_80"] += 1
110
- else:
111
- dist["above_80"] += 1
112
-
113
- # Check if multilingual model is needed (spec 13.3)
114
- needs_multilingual = relevance_pct < 70.0 and total >= 10
115
-
116
- return {
117
- "period_days": days,
118
- "total_retrievals": total,
119
- "retrieval_relevance_pct": relevance_pct,
120
- "avg_top_score": avg_score,
121
- "retrievals_per_day": round(total / days, 1),
122
- "score_distribution": dist,
123
- "needs_multilingual": needs_multilingual,
124
- }
125
-
126
-
127
- def check_repeat_errors() -> dict:
128
- """Compare recent learnings in STM against LTM to find duplicates (spec section 9).
129
-
130
- Returns count of new learnings that are semantically duplicate (cosine > 0.8).
131
- """
132
- db = _get_db()
133
- cutoff_7d = (datetime.utcnow() - timedelta(days=7)).isoformat()
134
-
135
- # Recent learning STM entries
136
- new_learnings = db.execute(
137
- "SELECT id, content, embedding FROM stm_memories WHERE source_type = 'learning' AND created_at >= ? AND promoted_to_ltm = 0",
138
- (cutoff_7d,)
139
- ).fetchall()
140
-
141
- # All LTM learnings
142
- ltm_learnings = db.execute(
143
- "SELECT id, content, embedding FROM ltm_memories WHERE source_type = 'learning' AND is_dormant = 0"
144
- ).fetchall()
145
-
146
- if not new_learnings or not ltm_learnings:
147
- return {"new_count": len(new_learnings), "duplicate_count": 0, "repeat_rate_pct": 0.0, "duplicates": []}
148
-
149
- duplicates = []
150
- for new in new_learnings:
151
- new_vec = _blob_to_array(new["embedding"])
152
- for ltm in ltm_learnings:
153
- ltm_vec = _blob_to_array(ltm["embedding"])
154
- score = cosine_similarity(new_vec, ltm_vec)
155
- if score > 0.8:
156
- duplicates.append({
157
- "new_stm_id": new["id"],
158
- "new_content": new["content"][:100],
159
- "ltm_id": ltm["id"],
160
- "ltm_content": ltm["content"][:100],
161
- "score": round(score, 3),
162
- })
163
- break # One match is enough
164
-
165
- repeat_rate = round(len(duplicates) / len(new_learnings) * 100, 1) if new_learnings else 0.0
166
-
167
- return {
168
- "new_count": len(new_learnings),
169
- "duplicate_count": len(duplicates),
170
- "repeat_rate_pct": repeat_rate,
171
- "duplicates": duplicates[:10],
172
- }
173
-
174
-
175
- def rehearse_by_content(content_keywords: str, source_type: str = ""):
176
- """Passive rehearsal: find and strengthen cognitive memories that match content from classic tools.
177
-
178
- Called when nexo_recall or nexo_learning_search return results. Strengthens matching
179
- memories without returning them (side effect only). This closes the rehearsal loop
180
- so memories accessed via keyword tools also get reinforced in the vector store.
181
-
182
- Args:
183
- content_keywords: Text to match against (e.g., learning title + content)
184
- source_type: Optional filter by source_type
185
- """
186
- if not content_keywords or len(content_keywords.strip()) < 10:
187
- return
188
-
189
- try:
190
- db = _get_db()
191
- query_vec = embed(content_keywords[:500]) # cap to avoid slow embedding
192
- if np.linalg.norm(query_vec) == 0:
193
- return
194
-
195
- now = datetime.utcnow().isoformat()
196
-
197
- # Search both stores for matches >= 0.7
198
- for table in ("stm_memories", "ltm_memories"):
199
- extra_where = ""
200
- if table == "stm_memories":
201
- extra_where = " AND promoted_to_ltm = 0"
202
- if table == "ltm_memories":
203
- extra_where = " AND is_dormant = 0"
204
-
205
- rows = db.execute(f"SELECT id, embedding FROM {table} WHERE 1=1{extra_where}").fetchall()
206
- for row in rows:
207
- vec = _blob_to_array(row["embedding"])
208
- score = cosine_similarity(query_vec, vec)
209
- if score >= 0.7:
210
- db.execute(
211
- f"UPDATE {table} SET strength = 1.0, access_count = access_count + 1, last_accessed = ? WHERE id = ?",
212
- (now, row["id"])
213
- )
214
-
215
- db.commit()
216
- except Exception:
217
- pass # Rehearsal is best-effort, never block the main tool
218
-
219
-
220
- def _extract_discriminators(text: str) -> set:
221
- """Extract discriminating entities from text (OS, platform, language, infra)."""
222
- words = set(text.lower().split())
223
- # Also check for multi-word patterns
224
- text_lower = text.lower()
225
- found = set()
226
- for entity in DISCRIMINATING_ENTITIES:
227
- if entity in words or entity in text_lower:
228
- found.add(entity)
229
- return found
230
-
231
-
232
- def _memories_are_siblings(content_a: str, content_b: str) -> tuple[bool, list[str]]:
233
- """Check if two memories are siblings (similar-but-incompatible).
234
-
235
- Returns (is_sibling, list_of_discriminating_entities_that_differ).
236
- """
237
- disc_a = _extract_discriminators(content_a)
238
- disc_b = _extract_discriminators(content_b)
239
-
240
- # Entities present in one but not the other
241
- only_a = disc_a - disc_b
242
- only_b = disc_b - disc_a
243
-
244
- if only_a or only_b:
245
- # There are discriminating entities that differ — these are siblings
246
- diff = sorted(only_a | only_b)
247
- return True, diff
248
-
249
- return False, []
250
-
251
-
252
- def consolidate_semantic(threshold: float = 0.9, dry_run: bool = False) -> dict:
253
- """Merge LTM memories with cosine similarity > threshold, with discriminative fusion.
254
-
255
- Before merging, checks for discriminating entities (OS, platform, language, etc.).
256
- If two memories are >90% similar but differ in critical entities, they become
257
- "siblings" (linked but NOT merged) instead of being consolidated.
258
-
259
- Args:
260
- threshold: Cosine similarity threshold for considering duplicates (default 0.9)
261
- dry_run: If True, return pairs without merging
262
-
263
- Returns:
264
- Dict with 'merged' (list of merge actions) and 'siblings' (list of sibling links created)
265
- """
266
- db = _get_db()
267
- rows = db.execute(
268
- "SELECT id, content, embedding, source_type, domain, access_count, strength FROM ltm_memories WHERE is_dormant = 0"
269
- ).fetchall()
270
-
271
- if len(rows) < 2:
272
- return {"merged": [], "siblings": []}
273
-
274
- memories = []
275
- for row in rows:
276
- memories.append({
277
- "id": row["id"],
278
- "content": row["content"],
279
- "vec": _blob_to_array(row["embedding"]),
280
- "source_type": row["source_type"],
281
- "domain": row["domain"],
282
- "access_count": row["access_count"],
283
- "strength": row["strength"],
284
- })
285
-
286
- merged_ids = set()
287
- merge_actions = []
288
- sibling_actions = []
289
-
290
- for i in range(len(memories)):
291
- if memories[i]["id"] in merged_ids:
292
- continue
293
- for j in range(i + 1, len(memories)):
294
- if memories[j]["id"] in merged_ids:
295
- continue
296
-
297
- score = cosine_similarity(memories[i]["vec"], memories[j]["vec"])
298
- if score < threshold:
299
- continue
300
-
301
- # Check for discriminating entities before merging
302
- is_sibling, discriminators = _memories_are_siblings(
303
- memories[i]["content"], memories[j]["content"]
304
- )
305
-
306
- if is_sibling:
307
- # Don't merge — create sibling relationship
308
- sibling_action = {
309
- "memory_a_id": memories[i]["id"],
310
- "memory_b_id": memories[j]["id"],
311
- "score": round(score, 4),
312
- "discriminators": discriminators,
313
- "content_a": memories[i]["content"][:100],
314
- "content_b": memories[j]["content"][:100],
315
- }
316
-
317
- if not dry_run:
318
- try:
319
- db.execute(
320
- "INSERT OR IGNORE INTO memory_siblings (memory_a_id, memory_b_id, similarity, discriminators) VALUES (?, ?, ?, ?)",
321
- (memories[i]["id"], memories[j]["id"], score, ",".join(discriminators))
322
- )
323
- except Exception:
324
- pass
325
-
326
- sibling_actions.append(sibling_action)
327
- continue
328
-
329
- # Safe to merge — no discriminating entities differ
330
- if memories[i]["access_count"] >= memories[j]["access_count"]:
331
- keep, drop = memories[i], memories[j]
332
- else:
333
- keep, drop = memories[j], memories[i]
334
-
335
- action = {
336
- "keep_id": keep["id"],
337
- "drop_id": drop["id"],
338
- "score": round(score, 4),
339
- "keep_content": keep["content"][:100],
340
- "drop_content": drop["content"][:100],
341
- "keep_access": keep["access_count"],
342
- "drop_access": drop["access_count"],
343
- }
344
-
345
- if not dry_run:
346
- separator = "\n\n[CONSOLIDATED]: "
347
- new_content = keep["content"]
348
- drop_words = set(drop["content"].lower().split())
349
- keep_words = set(keep["content"].lower().split())
350
- unique_words = drop_words - keep_words
351
- if len(unique_words) > 5:
352
- new_content = keep["content"] + separator + drop["content"]
353
-
354
- new_vec = embed(new_content)
355
- new_blob = _array_to_blob(new_vec)
356
-
357
- db.execute(
358
- "UPDATE ltm_memories SET content = ?, embedding = ?, access_count = access_count + ? WHERE id = ?",
359
- (new_content, new_blob, drop["access_count"], keep["id"])
360
- )
361
- db.execute("DELETE FROM ltm_memories WHERE id = ?", (drop["id"],))
362
- merged_ids.add(drop["id"])
363
-
364
- merge_actions.append(action)
365
-
366
- if not dry_run and (merge_actions or sibling_actions):
367
- db.commit()
368
-
369
- return {"merged": merge_actions, "siblings": sibling_actions}
370
-
371
-
372
- def get_siblings(memory_id: int) -> list[dict]:
373
- """Get sibling memories for a given memory ID (similar-but-incompatible)."""
374
- db = _get_db()
375
- rows = db.execute(
376
- """SELECT s.*,
377
- CASE WHEN s.memory_a_id = ? THEN s.memory_b_id ELSE s.memory_a_id END as sibling_id
378
- FROM memory_siblings s
379
- WHERE s.memory_a_id = ? OR s.memory_b_id = ?""",
380
- (memory_id, memory_id, memory_id)
381
- ).fetchall()
382
-
383
- siblings = []
384
- for row in rows:
385
- sib_id = row["sibling_id"]
386
- sib_mem = db.execute("SELECT content, domain, source_type FROM ltm_memories WHERE id = ?", (sib_id,)).fetchone()
387
- if sib_mem:
388
- siblings.append({
389
- "sibling_id": sib_id,
390
- "similarity": row["similarity"],
391
- "discriminators": row["discriminators"],
392
- "content": sib_mem["content"][:200],
393
- "domain": sib_mem["domain"],
394
- })
395
- return siblings
396
-
397
- def get_stats() -> dict:
398
- """Return statistics about the cognitive memory system."""
399
- db = _get_db()
400
-
401
- stm_active = db.execute("SELECT COUNT(*) FROM stm_memories WHERE lifecycle_state = 'active' AND promoted_to_ltm = 0").fetchone()[0]
402
- stm_promoted = db.execute("SELECT COUNT(*) FROM stm_memories WHERE promoted_to_ltm = 1").fetchone()[0]
403
- stm_total = db.execute("SELECT COUNT(*) FROM stm_memories WHERE lifecycle_state = 'active'").fetchone()[0]
404
- ltm_active = db.execute("SELECT COUNT(*) FROM ltm_memories WHERE is_dormant = 0").fetchone()[0]
405
- ltm_dormant = db.execute("SELECT COUNT(*) FROM ltm_memories WHERE is_dormant = 1").fetchone()[0]
406
-
407
- avg_stm = db.execute("SELECT AVG(strength) FROM stm_memories WHERE lifecycle_state = 'active' AND promoted_to_ltm = 0").fetchone()[0] or 0.0
408
- avg_ltm = db.execute("SELECT AVG(strength) FROM ltm_memories WHERE is_dormant = 0").fetchone()[0] or 0.0
409
-
410
- total_retrievals = db.execute("SELECT COUNT(*) FROM retrieval_log").fetchone()[0]
411
- avg_retrieval_score = db.execute("SELECT AVG(top_score) FROM retrieval_log").fetchone()[0] or 0.0
412
-
413
- top_domains_stm = db.execute(
414
- "SELECT domain, COUNT(*) as cnt FROM stm_memories WHERE lifecycle_state = 'active' AND promoted_to_ltm = 0 AND domain != '' GROUP BY domain ORDER BY cnt DESC LIMIT 5"
415
- ).fetchall()
416
- top_domains_ltm = db.execute(
417
- "SELECT domain, COUNT(*) as cnt FROM ltm_memories WHERE is_dormant = 0 AND domain != '' GROUP BY domain ORDER BY cnt DESC LIMIT 5"
418
- ).fetchall()
419
-
420
- # Quarantine stats
421
- q_stats = _quarantine_stats()
422
-
423
- return {
424
- "stm_active": stm_active,
425
- "stm_promoted": stm_promoted,
426
- "stm_total": stm_total,
427
- "ltm_active": ltm_active,
428
- "ltm_dormant": ltm_dormant,
429
- "avg_stm_strength": round(avg_stm, 3),
430
- "avg_ltm_strength": round(avg_ltm, 3),
431
- "total_retrievals": total_retrievals,
432
- "avg_retrieval_score": round(avg_retrieval_score, 3),
433
- "top_domains_stm": [(r["domain"], r["cnt"]) for r in top_domains_stm],
434
- "top_domains_ltm": [(r["domain"], r["cnt"]) for r in top_domains_ltm],
435
- "quarantine": q_stats,
436
- "prediction_error_gate": _get_gate_stats(),
437
- }
438
-
439
- def set_lifecycle(memory_id: int, state: str, store: str = "auto", snooze_until: str = "") -> str:
440
- """Set the lifecycle state of a memory.
441
-
442
- Args:
443
- memory_id: Memory ID
444
- state: 'active', 'pinned', 'snoozed', 'archived'
445
- store: 'stm', 'ltm', or 'auto' (tries both)
446
- snooze_until: Required for 'snoozed' state — ISO date string (YYYY-MM-DD or full datetime)
447
- """
448
- if state not in ("active", "pinned", "snoozed", "archived"):
449
- return f"Invalid state: {state}. Must be active, pinned, snoozed, or archived."
450
-
451
- if state == "snoozed" and not snooze_until:
452
- return "snooze_until is required when setting state to 'snoozed'."
453
-
454
- db = _get_db()
455
-
456
- tables = []
457
- if store == "auto":
458
- tables = ["stm_memories", "ltm_memories"]
459
- elif store == "stm":
460
- tables = ["stm_memories"]
461
- elif store == "ltm":
462
- tables = ["ltm_memories"]
463
- else:
464
- return f"Invalid store: {store}. Must be stm, ltm, or auto."
465
-
466
- found = False
467
- found_table = None
468
- for table in tables:
469
- row = db.execute(f"SELECT id FROM {table} WHERE id = ?", (memory_id,)).fetchone()
470
- if row:
471
- found = True
472
- found_table = table
473
- break
474
-
475
- if not found:
476
- return f"Memory #{memory_id} not found in {store}."
477
-
478
- snooze_val = snooze_until if state == "snoozed" else None
479
- db.execute(
480
- f"UPDATE {found_table} SET lifecycle_state = ?, snooze_until = ? WHERE id = ?",
481
- (state, snooze_val, memory_id)
482
- )
483
- db.commit()
484
-
485
- store_name = "STM" if found_table == "stm_memories" else "LTM"
486
- extra = f" until {snooze_until}" if state == "snoozed" else ""
487
- return f"Memory #{memory_id} ({store_name}) → {state}{extra}"
488
-
489
-
490
- # ---------------------------------------------------------------------------
491
- # Feature 1: Auto-Merge Duplicates
492
- # Inspired by Vestige's union-find clustering and claude-cortex's Jaccard
493
- # similarity merge. Runs during sleep cycle AFTER dream_cycle.
494
- # ---------------------------------------------------------------------------
495
-
496
- def auto_merge_duplicates(threshold: float = 0.92) -> dict:
497
- """Auto-merge near-duplicate LTM memories with cosine similarity > threshold.
498
-
499
- Unlike consolidate_semantic (threshold=0.9, runs during decay), this uses a
500
- higher threshold (0.92) and is designed for the sleep cycle. It respects
501
- sibling detection: memories with differing discriminating entities are never
502
- merged, even at 0.99 similarity.
503
-
504
- Merge strategy (adapted from claude-cortex):
505
- - Keep the longer/richer memory
506
- - Append unique info from the shorter one (if >5 unique words)
507
- - Re-embed merged content
508
- - Sum access_count from both
509
- - Delete the duplicate
510
- - Log every merge for audit
511
-
512
- Returns:
513
- Dict with scanned, merged, kept counts and merge_log details.
514
- """
515
- db = _get_db()
516
- rows = db.execute(
517
- "SELECT id, content, embedding, source_type, domain, access_count, strength, tags "
518
- "FROM ltm_memories WHERE is_dormant = 0 AND "
519
- "(lifecycle_state IS NULL OR lifecycle_state = 'active')"
520
- ).fetchall()
521
-
522
- if len(rows) < 2:
523
- return {"scanned": len(rows), "merged": 0, "kept": len(rows), "merge_log": []}
524
-
525
- # Build memory list with vectors (batch load like dream_cycle)
526
- memories = []
527
- for row in rows:
528
- memories.append({
529
- "id": row["id"],
530
- "content": row["content"],
531
- "vec": _blob_to_array(row["embedding"]),
532
- "source_type": row["source_type"],
533
- "domain": row["domain"] or "",
534
- "access_count": row["access_count"],
535
- "strength": row["strength"],
536
- "tags": row["tags"] or "",
537
- })
538
-
539
- n = len(memories)
540
-
541
- # Batch cosine similarity matrix (same approach as dream_cycle)
542
- vecs = np.array([m["vec"] for m in memories], dtype=np.float32)
543
- norms = np.linalg.norm(vecs, axis=1, keepdims=True)
544
- norms[norms == 0] = 1.0
545
- normalized = vecs / norms
546
- sim_matrix = normalized @ normalized.T
547
-
548
- merged_ids = set()
549
- merge_log = []
550
-
551
- for i in range(n):
552
- if memories[i]["id"] in merged_ids:
553
- continue
554
- for j in range(i + 1, n):
555
- if memories[j]["id"] in merged_ids:
556
- continue
557
-
558
- score = float(sim_matrix[i, j])
559
- if score < threshold:
560
- continue
561
-
562
- # Sibling check — never merge if discriminating entities differ
563
- is_sibling, discriminators = _memories_are_siblings(
564
- memories[i]["content"], memories[j]["content"]
565
- )
566
- if is_sibling:
567
- continue
568
-
569
- # Domain/tags compatibility check
570
- if memories[i]["domain"] and memories[j]["domain"]:
571
- if memories[i]["domain"] != memories[j]["domain"]:
572
- continue
573
-
574
- # Determine keep vs drop: prefer longer content, then higher access_count
575
- if len(memories[i]["content"]) >= len(memories[j]["content"]):
576
- keep, drop = memories[i], memories[j]
577
- elif memories[i]["access_count"] > memories[j]["access_count"]:
578
- keep, drop = memories[i], memories[j]
579
- else:
580
- keep, drop = memories[j], memories[i]
581
-
582
- # Merge content: append unique info from drop (Jaccard-style word diff)
583
- keep_words = set(keep["content"].lower().split())
584
- drop_words = set(drop["content"].lower().split())
585
- unique_words = drop_words - keep_words
586
-
587
- new_content = keep["content"]
588
- if len(unique_words) > 5:
589
- new_content = keep["content"] + "\n\n[AUTO-MERGED]: " + drop["content"]
590
-
591
- # Re-embed merged content
592
- new_vec = embed(new_content)
593
- new_blob = _array_to_blob(new_vec)
594
-
595
- # Merge tags
596
- keep_tags = set(filter(None, keep["tags"].split(",")))
597
- drop_tags = set(filter(None, drop["tags"].split(",")))
598
- merged_tags = ",".join(sorted(keep_tags | drop_tags))
599
-
600
- # Update keep, delete drop
601
- new_access = keep["access_count"] + drop["access_count"]
602
- db.execute(
603
- "UPDATE ltm_memories SET content = ?, embedding = ?, "
604
- "access_count = ?, tags = ?, strength = MIN(1.0, strength + 0.1) WHERE id = ?",
605
- (new_content, new_blob, new_access, merged_tags, keep["id"])
606
- )
607
- db.execute("DELETE FROM ltm_memories WHERE id = ?", (drop["id"],))
608
- merged_ids.add(drop["id"])
609
-
610
- merge_log.append({
611
- "kept_id": keep["id"],
612
- "dropped_id": drop["id"],
613
- "similarity": round(score, 4),
614
- "unique_words_appended": len(unique_words) if len(unique_words) > 5 else 0,
615
- "kept_preview": keep["content"][:80],
616
- "dropped_preview": drop["content"][:80],
617
- })
618
-
619
- if merge_log:
620
- db.commit()
621
-
622
- return {
623
- "scanned": n,
624
- "merged": len(merge_log),
625
- "kept": n - len(merge_log),
626
- "merge_log": merge_log,
627
- }
628
-
629
-
630
- # ---------------------------------------------------------------------------
631
- # Feature 2: Security Pipeline (Memory Poisoning Defense)
632
- # Adapted from ShieldCortex's 6-layer defence pipeline:
633
- # - instruction-detector.ts → pattern groups with weights
634
- # - encoding-detector.ts → base64, homoglyphs, invisible chars
635
- # - credential-leak scanner → reuses existing redact_secrets()
636
- # ---------------------------------------------------------------------------
637
-
638
- # Injection patterns (adapted from ShieldCortex instruction-detector.ts)
639
- _INJECTION_PATTERNS = [
640
- # System prompt markers (weight 0.9)
641
- (re.compile(r'\[SYSTEM:', re.IGNORECASE), "system_prompt_marker", 0.9),
642
- (re.compile(r'<<SYS>>', re.IGNORECASE), "system_prompt_marker", 0.9),
643
- (re.compile(r'\[INST\]', re.IGNORECASE), "system_prompt_marker", 0.9),
644
- (re.compile(r'<\|im_start\|>', re.IGNORECASE), "system_prompt_marker", 0.9),
645
- (re.compile(r'<\|system\|>', re.IGNORECASE), "system_prompt_marker", 0.9),
646
- (re.compile(r'^SYSTEM\s*:', re.IGNORECASE | re.MULTILINE), "system_prompt_marker", 0.9),
647
-
648
- # Hidden instructions (weight 0.8)
649
- (re.compile(r'ignore\s+(all\s+)?previous\s+(instructions?|prompts?|context)', re.IGNORECASE), "hidden_instruction", 0.8),
650
- (re.compile(r'forget\s+everything', re.IGNORECASE), "hidden_instruction", 0.8),
651
- (re.compile(r'new\s+instructions?\s*:', re.IGNORECASE), "hidden_instruction", 0.8),
652
- (re.compile(r'you\s+are\s+now\b', re.IGNORECASE), "hidden_instruction", 0.8),
653
- (re.compile(r'disregard\s+(all\s+)?(previous|above|prior)', re.IGNORECASE), "hidden_instruction", 0.8),
654
- (re.compile(r'override\s+(previous|all|system)', re.IGNORECASE), "hidden_instruction", 0.8),
655
-
656
- # Memory manipulation (weight 0.7)
657
- (re.compile(r'save\s+(this\s+)?to\s+memory', re.IGNORECASE), "memory_manipulation", 0.7),
658
- (re.compile(r'remember\s+this\s+(instruction|command|rule)', re.IGNORECASE), "memory_manipulation", 0.7),
659
- (re.compile(r'from\s+now\s+on\s*(,\s*)?always', re.IGNORECASE), "memory_manipulation", 0.7),
660
- (re.compile(r'inject\s+(into\s+)?memory', re.IGNORECASE), "memory_manipulation", 0.7),
661
-
662
- # Behavioral modification (weight 0.7)
663
- (re.compile(r'your\s+new\s+rule\s+is', re.IGNORECASE), "behavioral_mod", 0.7),
664
- (re.compile(r'always\s+respond\s+with', re.IGNORECASE), "behavioral_mod", 0.7),
665
- (re.compile(r'when\s+(the\s+)?user\s+asks', re.IGNORECASE), "behavioral_mod", 0.7),
666
-
667
- # Delimiter attacks (weight 0.75)
668
- (re.compile(r'\n{5,}[\s\S]{0,500}\b(instruction|command|system|ignore)\b', re.IGNORECASE), "delimiter_attack", 0.75),
669
- (re.compile(r'<!--[\s\S]{0,200}?(instruction|command|system|ignore|inject|override)[\s\S]{0,200}?-->', re.IGNORECASE), "delimiter_attack", 0.75),
670
- ]
671
-
672
- # Max content length to scan (prevents ReDOS, adapted from ShieldCortex)
673
- _MAX_SECURITY_SCAN_LENGTH = 50000
674
-
675
-
676
- def security_scan(content: str) -> dict:
677
- """Security scan for memory poisoning defense.
678
-
679
- Adapted from ShieldCortex's 6-layer defence pipeline. Checks:
680
- 1. Input sanitization — strip injection patterns
681
- 2. Pattern detection — base64, homoglyphs, invisible chars
682
- 3. Behavioral scoring — content trying to modify NEXO behavior
683
- 4. Credential detection — reuses existing redact_secrets()
684
-
685
- Args:
686
- content: Text content to scan
687
-
688
- Returns:
689
- Dict with safe (bool), flags (list), sanitized_content (str),
690
- risk_score (float 0-1)
691
- """
692
- if not content or not content.strip():
693
- return {"safe": True, "flags": [], "sanitized_content": "", "risk_score": 0.0}
694
-
695
- flags = []
696
- max_weight = 0.0
697
- total_weight = 0.0
698
- matches_count = 0
699
- sanitized = content
700
-
701
- # Truncate for safety (ShieldCortex pattern)
702
- scan_text = content[:_MAX_SECURITY_SCAN_LENGTH] if len(content) > _MAX_SECURITY_SCAN_LENGTH else content
703
-
704
- # --- Layer 1: Injection pattern detection ---
705
- for pattern, category, weight in _INJECTION_PATTERNS:
706
- if pattern.search(scan_text):
707
- flag = f"{category}:{pattern.pattern[:50]}"
708
- flags.append(flag)
709
- max_weight = max(max_weight, weight)
710
- total_weight += weight
711
- matches_count += 1
712
- # Sanitize: remove the matched pattern
713
- sanitized = pattern.sub("[SANITIZED]", sanitized)
714
-
715
- # --- Layer 2: Encoding/obfuscation detection (from ShieldCortex encoding-detector.ts) ---
716
-
717
- # Base64 blocks > 100 chars
718
- b64_pattern = re.compile(r'(?:[A-Za-z0-9+/]{4}){25,}(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?')
719
- b64_matches = b64_pattern.findall(scan_text)
720
- for b64_match in b64_matches:
721
- try:
722
- decoded = base64.b64decode(b64_match).decode("utf-8", errors="ignore")
723
- printable_ratio = len(re.sub(r'[^\x20-\x7E]', '', decoded)) / max(len(decoded), 1)
724
- if printable_ratio > 0.7 and len(decoded) > 10:
725
- flags.append(f"base64_encoded:{decoded[:60]}")
726
- max_weight = max(max_weight, 0.6)
727
- total_weight += 0.6
728
- matches_count += 1
729
- except Exception:
730
- pass
731
-
732
- # Zero-width / invisible characters (from ShieldCortex)
733
- invisible_chars = re.findall(r'[\u200B\u200C\u200D\uFEFF\u202E]', scan_text)
734
- if len(invisible_chars) > 2:
735
- flags.append(f"invisible_chars:{len(invisible_chars)}_found")
736
- max_weight = max(max_weight, 0.5)
737
- total_weight += 0.5
738
- matches_count += 1
739
- # Remove invisible chars
740
- sanitized = re.sub(r'[\u200B\u200C\u200D\uFEFF\u202E]', '', sanitized)
741
-
742
- # Unicode homoglyphs — Cyrillic chars that look like Latin (from ShieldCortex)
743
- homoglyphs = re.findall(
744
- r'[\u0430\u0435\u043E\u0440\u0441\u0443\u0445\u0410\u0412\u0415\u041A\u041C\u041D\u041E\u0420\u0421\u0422\u0423\u0425]',
745
- scan_text
746
- )
747
- if len(homoglyphs) > 3:
748
- flags.append(f"unicode_homoglyphs:{len(homoglyphs)}_cyrillic")
749
- max_weight = max(max_weight, 0.5)
750
- total_weight += 0.5
751
- matches_count += 1
752
-
753
- # --- Layer 3: Behavioral scoring ---
754
- behavioral_patterns = [
755
- (re.compile(r'\balways\s+do\b', re.IGNORECASE), "behavioral:always_do"),
756
- (re.compile(r'\bnever\s+do\b', re.IGNORECASE), "behavioral:never_do"),
757
- (re.compile(r'\byour\s+new\s+rule\b', re.IGNORECASE), "behavioral:new_rule"),
758
- (re.compile(r'\byou\s+must\s+always\b', re.IGNORECASE), "behavioral:must_always"),
759
- (re.compile(r'\bchange\s+your\s+behavior\b', re.IGNORECASE), "behavioral:change_behavior"),
760
- ]
761
- for bp, label in behavioral_patterns:
762
- if bp.search(scan_text):
763
- flags.append(label)
764
- max_weight = max(max_weight, 0.4)
765
- total_weight += 0.4
766
- matches_count += 1
767
-
768
- # --- Layer 4: Credential detection (reuse existing redact_secrets) ---
769
- redacted = redact_secrets(scan_text)
770
- if redacted != scan_text:
771
- flags.append("credentials_detected")
772
- sanitized = redact_secrets(sanitized)
773
- # Don't increase risk score for creds — still store (redacted)
774
- # but flag for awareness
775
-
776
- # Calculate risk score (0-1): weighted by max_weight and count
777
- if matches_count == 0:
778
- risk_score = 0.0
779
- else:
780
- # ShieldCortex approach: max weight dominates, count adds diminishing returns
781
- risk_score = min(1.0, max_weight + (matches_count - 1) * 0.05)
782
-
783
- safe = risk_score < 0.5
784
-
785
- return {
786
- "safe": safe,
787
- "flags": flags,
788
- "sanitized_content": sanitized,
789
- "risk_score": round(risk_score, 3),
790
- }
791
-
792
-
793
- # ─── Somatic Markers ────────────────────────────────────────────────
794
-
795
- def somatic_accumulate(target: str, target_type: str, delta: float):
796
- """Increase risk_score for a target (file or area). Capped at 1.0."""
797
- db = _get_db()
798
- now = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S")
799
- existing = db.execute(
800
- "SELECT id, risk_score, incident_count FROM somatic_markers WHERE target = ? AND target_type = ?",
801
- (target, target_type)
802
- ).fetchone()
803
- if existing:
804
- new_score = min(1.0, existing["risk_score"] + delta)
805
- db.execute(
806
- "UPDATE somatic_markers SET risk_score = ?, incident_count = incident_count + 1, "
807
- "last_incident = ?, updated_at = ? WHERE id = ?",
808
- (new_score, now, now, existing["id"])
809
- )
810
- else:
811
- db.execute(
812
- "INSERT INTO somatic_markers (target, target_type, risk_score, incident_count, last_incident, updated_at) "
813
- "VALUES (?, ?, ?, 1, ?, ?)",
814
- (target, target_type, min(1.0, delta), now, now)
815
- )
816
- db.commit()
817
-
818
-
819
- def somatic_guard_decay(target: str, target_type: str):
820
- """Validated recovery: multiplicative x0.7 on successful guard check. Max once/day/target."""
821
- db = _get_db()
822
- today = datetime.utcnow().strftime("%Y-%m-%d")
823
- now = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S")
824
- row = db.execute(
825
- "SELECT id, risk_score, last_guard_decay_date FROM somatic_markers WHERE target = ? AND target_type = ?",
826
- (target, target_type)
827
- ).fetchone()
828
- if not row or row["risk_score"] <= 0:
829
- return
830
- if row["last_guard_decay_date"] == today:
831
- return
832
- new_score = max(0.0, row["risk_score"] * 0.7)
833
- if new_score < 0.01:
834
- new_score = 0.0
835
- db.execute(
836
- "UPDATE somatic_markers SET risk_score = ?, last_guard_decay_date = ?, "
837
- "last_validated_at = ?, updated_at = datetime('now') WHERE id = ?",
838
- (new_score, today, now, row["id"])
839
- )
840
- db.commit()
841
-
842
-
843
- def somatic_nightly_decay(gamma: float = 0.95):
844
- """Apply nightly decay to all somatic markers. Called from cognitive-decay cron."""
845
- db = _get_db()
846
- rows = db.execute("SELECT id, risk_score FROM somatic_markers WHERE risk_score > 0").fetchall()
847
- updated = 0
848
- for row in rows:
849
- new_score = row["risk_score"] * gamma
850
- if new_score < 0.01:
851
- new_score = 0.0
852
- db.execute(
853
- "UPDATE somatic_markers SET risk_score = ?, last_decay = datetime('now'), updated_at = datetime('now') WHERE id = ?",
854
- (new_score, row["id"])
855
- )
856
- updated += 1
857
- db.commit()
858
- return updated
859
-
860
-
861
- def somatic_project_events():
862
- """Project unprojected somatic_events from nexo.db into cognitive.db somatic_markers.
863
- Called during nightly cron. Idempotent — each event processed exactly once.
864
- """
865
- try:
866
- from db import get_db
867
- conn = get_db()
868
- rows = conn.execute(
869
- "SELECT id, target, target_type, delta FROM somatic_events WHERE projected = 0 ORDER BY id"
870
- ).fetchall()
871
- for row in rows:
872
- somatic_accumulate(row["target"], row["target_type"], row["delta"])
873
- conn.execute("UPDATE somatic_events SET projected = 1 WHERE id = ?", (row["id"],))
874
- conn.commit()
875
- return len(rows)
876
- except Exception:
877
- return 0
878
-
879
-
880
- def somatic_get_risk(targets: list, area: str = "") -> dict:
881
- """Get risk scores for targets (files) and optional area."""
882
- db = _get_db()
883
- scores = {}
884
- for t in targets:
885
- row = db.execute(
886
- "SELECT risk_score, incident_count, last_incident FROM somatic_markers WHERE target = ? AND target_type = 'file'",
887
- (t,)
888
- ).fetchone()
889
- if row and row["risk_score"] > 0:
890
- scores[t] = {"risk": round(row["risk_score"], 3), "incidents": row["incident_count"],
891
- "last": row["last_incident"] or "unknown"}
892
- if area:
893
- row = db.execute(
894
- "SELECT risk_score, incident_count, last_incident FROM somatic_markers WHERE target = ? AND target_type = 'area'",
895
- (area,)
896
- ).fetchone()
897
- if row and row["risk_score"] > 0:
898
- scores[f"area:{area}"] = {"risk": round(row["risk_score"], 3), "incidents": row["incident_count"],
899
- "last": row["last_incident"] or "unknown"}
900
- all_risks = [s["risk"] for s in scores.values()]
901
- return {"max_risk": max(all_risks) if all_risks else 0.0, "scores": scores}
902
-
903
-
904
- def somatic_top_risks(limit: int = 10) -> list:
905
- """Get top N riskiest targets across all types."""
906
- db = _get_db()
907
- rows = db.execute(
908
- "SELECT target, target_type, risk_score, incident_count, last_incident "
909
- "FROM somatic_markers WHERE risk_score > 0 ORDER BY risk_score DESC LIMIT ?",
910
- (limit,)
911
- ).fetchall()
912
- return [dict(r) for r in rows]