nexo-brain 2.3.0 → 2.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (299) hide show
  1. package/README.md +1 -1
  2. package/bin/nexo-brain.js +92 -9
  3. package/bin/postinstall.js +22 -15
  4. package/package.json +7 -4
  5. package/src/auto_update.py +194 -5
  6. package/src/crons/sync.py +6 -2
  7. package/src/db/_core.py +1 -0
  8. package/src/db/_entities.py +1 -0
  9. package/src/db/_episodic.py +1 -0
  10. package/src/db/_learnings.py +1 -0
  11. package/src/db/_reminders.py +1 -0
  12. package/src/db/_schema.py +11 -1
  13. package/src/db/_sessions.py +1 -0
  14. package/src/db/_skills.py +1 -0
  15. package/src/hooks/capture-tool-logs.sh +23 -6
  16. package/src/hooks/session-start.sh +4 -3
  17. package/src/plugin_loader.py +1 -0
  18. package/src/plugins/update.py +377 -26
  19. package/src/scripts/deep-sleep/apply_findings.py +1 -0
  20. package/src/scripts/deep-sleep/collect.py +1 -0
  21. package/src/scripts/deep-sleep/extract.py +1 -0
  22. package/src/scripts/deep-sleep/synthesize.py +1 -0
  23. package/src/scripts/nexo-catchup.py +29 -4
  24. package/src/scripts/nexo-daily-self-audit.py +21 -1
  25. package/src/scripts/nexo-evolution-run.py +21 -1
  26. package/src/scripts/nexo-learning-housekeep.py +1 -0
  27. package/src/scripts/nexo-postmortem-consolidator.py +34 -9
  28. package/src/scripts/nexo-sleep.py +32 -10
  29. package/src/scripts/nexo-synthesis.py +29 -9
  30. package/src/scripts/nexo-update.sh +109 -7
  31. package/src/scripts/nexo-watchdog.sh +122 -58
  32. package/src/server.py +66 -1
  33. package/src/tools_coordination.py +1 -0
  34. package/src/tools_sessions.py +1 -0
  35. package/scripts/migrate-to-unified 2.sh +0 -813
  36. package/scripts/migrate-to-unified.sh +0 -813
  37. package/scripts/migrate-v1.5-to-v1.6 2.py +0 -778
  38. package/scripts/migrate-v1.5-to-v1.6.py +0 -778
  39. package/scripts/migrate-v1.7-to-v1.8 2.py +0 -214
  40. package/scripts/migrate-v1.7-to-v1.8.py +0 -214
  41. package/scripts/nexo-preflight.sh +0 -236
  42. package/scripts/pre-commit-check 2.sh +0 -55
  43. package/scripts/pre-commit-check.sh +0 -55
  44. package/src/__pycache__/auto_close_sessions.cpython-314.pyc +0 -0
  45. package/src/__pycache__/auto_update.cpython-310.pyc +0 -0
  46. package/src/__pycache__/hnsw_index.cpython-310.pyc +0 -0
  47. package/src/__pycache__/hnsw_index.cpython-314.pyc +0 -0
  48. package/src/__pycache__/kg_populate.cpython-310.pyc +0 -0
  49. package/src/__pycache__/knowledge_graph.cpython-310.pyc +0 -0
  50. package/src/__pycache__/plugin_loader.cpython-310.pyc +0 -0
  51. package/src/__pycache__/plugin_loader.cpython-314.pyc +0 -0
  52. package/src/__pycache__/tools_coordination.cpython-310.pyc +0 -0
  53. package/src/__pycache__/tools_credentials.cpython-310.pyc +0 -0
  54. package/src/__pycache__/tools_learnings.cpython-310.pyc +0 -0
  55. package/src/__pycache__/tools_menu.cpython-310.pyc +0 -0
  56. package/src/__pycache__/tools_reminders.cpython-310.pyc +0 -0
  57. package/src/__pycache__/tools_reminders_crud.cpython-310.pyc +0 -0
  58. package/src/__pycache__/tools_sessions.cpython-310.pyc +0 -0
  59. package/src/__pycache__/tools_task_history.cpython-310.pyc +0 -0
  60. package/src/auto_close_sessions 2.py +0 -159
  61. package/src/auto_update 2.py +0 -634
  62. package/src/claim_graph 2.py +0 -323
  63. package/src/cognitive/__init__ 2.py +0 -62
  64. package/src/cognitive/__pycache__/__init__.cpython-310.pyc +0 -0
  65. package/src/cognitive/__pycache__/__init__.cpython-312.pyc +0 -0
  66. package/src/cognitive/__pycache__/__init__.cpython-314.pyc +0 -0
  67. package/src/cognitive/__pycache__/_core.cpython-310.pyc +0 -0
  68. package/src/cognitive/__pycache__/_core.cpython-312.pyc +0 -0
  69. package/src/cognitive/__pycache__/_core.cpython-314.pyc +0 -0
  70. package/src/cognitive/__pycache__/_decay.cpython-310.pyc +0 -0
  71. package/src/cognitive/__pycache__/_decay.cpython-312.pyc +0 -0
  72. package/src/cognitive/__pycache__/_decay.cpython-314.pyc +0 -0
  73. package/src/cognitive/__pycache__/_ingest.cpython-310.pyc +0 -0
  74. package/src/cognitive/__pycache__/_ingest.cpython-312.pyc +0 -0
  75. package/src/cognitive/__pycache__/_ingest.cpython-314.pyc +0 -0
  76. package/src/cognitive/__pycache__/_memory.cpython-310.pyc +0 -0
  77. package/src/cognitive/__pycache__/_memory.cpython-312.pyc +0 -0
  78. package/src/cognitive/__pycache__/_memory.cpython-314.pyc +0 -0
  79. package/src/cognitive/__pycache__/_search.cpython-310.pyc +0 -0
  80. package/src/cognitive/__pycache__/_search.cpython-312.pyc +0 -0
  81. package/src/cognitive/__pycache__/_search.cpython-314.pyc +0 -0
  82. package/src/cognitive/__pycache__/_trust.cpython-310.pyc +0 -0
  83. package/src/cognitive/__pycache__/_trust.cpython-312.pyc +0 -0
  84. package/src/cognitive/__pycache__/_trust.cpython-314.pyc +0 -0
  85. package/src/cognitive/_core 2.py +0 -567
  86. package/src/cognitive/_decay 2.py +0 -382
  87. package/src/cognitive/_ingest 2.py +0 -892
  88. package/src/cognitive/_memory 2.py +0 -912
  89. package/src/cognitive/_search 2.py +0 -949
  90. package/src/cognitive/_trust 2.py +0 -464
  91. package/src/crons/__pycache__/sync.cpython-314.pyc +0 -0
  92. package/src/crons/manifest 2.json +0 -106
  93. package/src/crons/sync 2.py +0 -217
  94. package/src/dashboard/__init__ 2.py +0 -0
  95. package/src/dashboard/__pycache__/__init__.cpython-310.pyc +0 -0
  96. package/src/dashboard/__pycache__/app.cpython-310.pyc +0 -0
  97. package/src/dashboard/app 2.py +0 -789
  98. package/src/db/__init__ 2.py +0 -89
  99. package/src/db/__pycache__/__init__.cpython-310.pyc +0 -0
  100. package/src/db/__pycache__/__init__.cpython-312.pyc +0 -0
  101. package/src/db/__pycache__/__init__.cpython-314.pyc +0 -0
  102. package/src/db/__pycache__/_core.cpython-310.pyc +0 -0
  103. package/src/db/__pycache__/_core.cpython-312.pyc +0 -0
  104. package/src/db/__pycache__/_core.cpython-314.pyc +0 -0
  105. package/src/db/__pycache__/_credentials.cpython-310.pyc +0 -0
  106. package/src/db/__pycache__/_credentials.cpython-312.pyc +0 -0
  107. package/src/db/__pycache__/_credentials.cpython-314.pyc +0 -0
  108. package/src/db/__pycache__/_cron_runs.cpython-310.pyc +0 -0
  109. package/src/db/__pycache__/_cron_runs.cpython-314.pyc +0 -0
  110. package/src/db/__pycache__/_entities.cpython-310.pyc +0 -0
  111. package/src/db/__pycache__/_entities.cpython-312.pyc +0 -0
  112. package/src/db/__pycache__/_entities.cpython-314.pyc +0 -0
  113. package/src/db/__pycache__/_episodic.cpython-310.pyc +0 -0
  114. package/src/db/__pycache__/_episodic.cpython-312.pyc +0 -0
  115. package/src/db/__pycache__/_episodic.cpython-314.pyc +0 -0
  116. package/src/db/__pycache__/_evolution.cpython-310.pyc +0 -0
  117. package/src/db/__pycache__/_evolution.cpython-312.pyc +0 -0
  118. package/src/db/__pycache__/_evolution.cpython-314.pyc +0 -0
  119. package/src/db/__pycache__/_fts.cpython-310.pyc +0 -0
  120. package/src/db/__pycache__/_fts.cpython-312.pyc +0 -0
  121. package/src/db/__pycache__/_fts.cpython-314.pyc +0 -0
  122. package/src/db/__pycache__/_learnings.cpython-310.pyc +0 -0
  123. package/src/db/__pycache__/_learnings.cpython-312.pyc +0 -0
  124. package/src/db/__pycache__/_learnings.cpython-314.pyc +0 -0
  125. package/src/db/__pycache__/_reminders.cpython-310.pyc +0 -0
  126. package/src/db/__pycache__/_reminders.cpython-312.pyc +0 -0
  127. package/src/db/__pycache__/_reminders.cpython-314.pyc +0 -0
  128. package/src/db/__pycache__/_schema.cpython-310.pyc +0 -0
  129. package/src/db/__pycache__/_schema.cpython-312.pyc +0 -0
  130. package/src/db/__pycache__/_schema.cpython-314.pyc +0 -0
  131. package/src/db/__pycache__/_sessions.cpython-310.pyc +0 -0
  132. package/src/db/__pycache__/_sessions.cpython-312.pyc +0 -0
  133. package/src/db/__pycache__/_sessions.cpython-314.pyc +0 -0
  134. package/src/db/__pycache__/_skills.cpython-310.pyc +0 -0
  135. package/src/db/__pycache__/_skills.cpython-312.pyc +0 -0
  136. package/src/db/__pycache__/_skills.cpython-314.pyc +0 -0
  137. package/src/db/__pycache__/_tasks.cpython-310.pyc +0 -0
  138. package/src/db/__pycache__/_tasks.cpython-312.pyc +0 -0
  139. package/src/db/__pycache__/_tasks.cpython-314.pyc +0 -0
  140. package/src/db/_core 2.py +0 -417
  141. package/src/db/_credentials 2.py +0 -124
  142. package/src/db/_entities 2.py +0 -178
  143. package/src/db/_episodic 2.py +0 -738
  144. package/src/db/_evolution 2.py +0 -54
  145. package/src/db/_fts 2.py +0 -406
  146. package/src/db/_learnings 2.py +0 -168
  147. package/src/db/_reminders 2.py +0 -338
  148. package/src/db/_schema 2.py +0 -364
  149. package/src/db/_sessions 2.py +0 -300
  150. package/src/db/_tasks 2.py +0 -91
  151. package/src/evolution_cycle 2.py +0 -266
  152. package/src/hnsw_index 2.py +0 -254
  153. package/src/hooks/auto_capture 2.py +0 -208
  154. package/src/hooks/caffeinate-guard 2.sh +0 -8
  155. package/src/hooks/capture-session 2.sh +0 -21
  156. package/src/hooks/capture-tool-logs 2.sh +0 -127
  157. package/src/hooks/daily-briefing-check 2.sh +0 -33
  158. package/src/hooks/inbox-hook 2.sh +0 -76
  159. package/src/hooks/post-compact 2.sh +0 -148
  160. package/src/hooks/pre-compact 2.sh +0 -151
  161. package/src/hooks/session-start 2.sh +0 -268
  162. package/src/hooks/session-stop 2.sh +0 -140
  163. package/src/kg_populate 2.py +0 -290
  164. package/src/knowledge_graph 2.py +0 -257
  165. package/src/maintenance 2.py +0 -59
  166. package/src/migrate_embeddings 2.py +0 -122
  167. package/src/plugin_loader 2.py +0 -202
  168. package/src/plugins/__init__ 2.py +0 -0
  169. package/src/plugins/__pycache__/__init__ 2.cpython-310.pyc +0 -0
  170. package/src/plugins/__pycache__/__init__.cpython-310.pyc +0 -0
  171. package/src/plugins/__pycache__/__init__.cpython-314.pyc +0 -0
  172. package/src/plugins/__pycache__/adaptive_mode 2.cpython-310.pyc +0 -0
  173. package/src/plugins/__pycache__/adaptive_mode.cpython-310.pyc +0 -0
  174. package/src/plugins/__pycache__/adaptive_mode.cpython-314.pyc +0 -0
  175. package/src/plugins/__pycache__/agents 2.cpython-310.pyc +0 -0
  176. package/src/plugins/__pycache__/agents.cpython-310.pyc +0 -0
  177. package/src/plugins/__pycache__/artifact_registry 2.cpython-310.pyc +0 -0
  178. package/src/plugins/__pycache__/artifact_registry.cpython-310.pyc +0 -0
  179. package/src/plugins/__pycache__/backup 2.cpython-310.pyc +0 -0
  180. package/src/plugins/__pycache__/backup.cpython-310.pyc +0 -0
  181. package/src/plugins/__pycache__/cognitive_memory 2.cpython-310.pyc +0 -0
  182. package/src/plugins/__pycache__/cognitive_memory.cpython-310.pyc +0 -0
  183. package/src/plugins/__pycache__/core_rules 2.cpython-310.pyc +0 -0
  184. package/src/plugins/__pycache__/core_rules.cpython-310.pyc +0 -0
  185. package/src/plugins/__pycache__/cortex 2.cpython-310.pyc +0 -0
  186. package/src/plugins/__pycache__/cortex.cpython-310.pyc +0 -0
  187. package/src/plugins/__pycache__/entities 2.cpython-310.pyc +0 -0
  188. package/src/plugins/__pycache__/entities.cpython-310.pyc +0 -0
  189. package/src/plugins/__pycache__/episodic_memory 2.cpython-310.pyc +0 -0
  190. package/src/plugins/__pycache__/episodic_memory.cpython-310.pyc +0 -0
  191. package/src/plugins/__pycache__/evolution 2.cpython-310.pyc +0 -0
  192. package/src/plugins/__pycache__/evolution.cpython-310.pyc +0 -0
  193. package/src/plugins/__pycache__/guard 2.cpython-310.pyc +0 -0
  194. package/src/plugins/__pycache__/guard.cpython-310.pyc +0 -0
  195. package/src/plugins/__pycache__/knowledge_graph_tools 2.cpython-310.pyc +0 -0
  196. package/src/plugins/__pycache__/knowledge_graph_tools.cpython-310.pyc +0 -0
  197. package/src/plugins/__pycache__/preferences 2.cpython-310.pyc +0 -0
  198. package/src/plugins/__pycache__/preferences.cpython-310.pyc +0 -0
  199. package/src/plugins/__pycache__/schedule.cpython-310.pyc +0 -0
  200. package/src/plugins/__pycache__/schedule.cpython-314.pyc +0 -0
  201. package/src/plugins/__pycache__/skills.cpython-310.pyc +0 -0
  202. package/src/plugins/__pycache__/skills.cpython-314.pyc +0 -0
  203. package/src/plugins/__pycache__/update 2.cpython-310.pyc +0 -0
  204. package/src/plugins/__pycache__/update.cpython-310.pyc +0 -0
  205. package/src/plugins/adaptive_mode 2.py +0 -805
  206. package/src/plugins/agents 2.py +0 -52
  207. package/src/plugins/artifact_registry 2.py +0 -450
  208. package/src/plugins/backup 2.py +0 -104
  209. package/src/plugins/cognitive_memory 2.py +0 -564
  210. package/src/plugins/core_rules 2.py +0 -252
  211. package/src/plugins/cortex 2.py +0 -299
  212. package/src/plugins/entities 2.py +0 -67
  213. package/src/plugins/episodic_memory 2.py +0 -533
  214. package/src/plugins/evolution 2.py +0 -115
  215. package/src/plugins/guard 2.py +0 -746
  216. package/src/plugins/knowledge_graph_tools 2.py +0 -105
  217. package/src/plugins/preferences 2.py +0 -47
  218. package/src/plugins/update 2.py +0 -256
  219. package/src/requirements 2.txt +0 -12
  220. package/src/rules/__init__ 2.py +0 -0
  221. package/src/rules/core-rules 2.json +0 -331
  222. package/src/rules/migrate 2.py +0 -207
  223. package/src/scripts/__pycache__/nexo-auto-update.cpython-314.pyc +0 -0
  224. package/src/scripts/__pycache__/nexo-catchup.cpython-314.pyc +0 -0
  225. package/src/scripts/__pycache__/nexo-cognitive-decay.cpython-314.pyc +0 -0
  226. package/src/scripts/__pycache__/nexo-daily-self-audit.cpython-314.pyc +0 -0
  227. package/src/scripts/__pycache__/nexo-evolution-run.cpython-314.pyc +0 -0
  228. package/src/scripts/__pycache__/nexo-followup-hygiene.cpython-314.pyc +0 -0
  229. package/src/scripts/__pycache__/nexo-immune.cpython-314.pyc +0 -0
  230. package/src/scripts/__pycache__/nexo-install.cpython-314.pyc +0 -0
  231. package/src/scripts/__pycache__/nexo-learning-housekeep.cpython-314.pyc +0 -0
  232. package/src/scripts/__pycache__/nexo-learning-validator.cpython-314.pyc +0 -0
  233. package/src/scripts/__pycache__/nexo-migrate.cpython-314.pyc +0 -0
  234. package/src/scripts/__pycache__/nexo-postmortem-consolidator.cpython-314.pyc +0 -0
  235. package/src/scripts/__pycache__/nexo-pre-commit.cpython-314.pyc +0 -0
  236. package/src/scripts/__pycache__/nexo-proactive-dashboard.cpython-314.pyc +0 -0
  237. package/src/scripts/__pycache__/nexo-reflection.cpython-314.pyc +0 -0
  238. package/src/scripts/__pycache__/nexo-runtime-preflight.cpython-314.pyc +0 -0
  239. package/src/scripts/__pycache__/nexo-send-email.cpython-314.pyc +0 -0
  240. package/src/scripts/__pycache__/nexo-send-reply.cpython-314.pyc +0 -0
  241. package/src/scripts/__pycache__/nexo-sleep.cpython-314.pyc +0 -0
  242. package/src/scripts/__pycache__/nexo-synthesis.cpython-314.pyc +0 -0
  243. package/src/scripts/__pycache__/nexo-watchdog-smoke.cpython-314.pyc +0 -0
  244. package/src/scripts/check-context 2.py +0 -264
  245. package/src/scripts/nexo-auto-update 2.py +0 -6
  246. package/src/scripts/nexo-backup 2.sh +0 -25
  247. package/src/scripts/nexo-brain-activation 2.sh +0 -140
  248. package/src/scripts/nexo-catchup 2.py +0 -242
  249. package/src/scripts/nexo-cognitive-decay 2.py +0 -182
  250. package/src/scripts/nexo-daily-self-audit 2.py +0 -552
  251. package/src/scripts/nexo-deep-sleep 2.sh +0 -97
  252. package/src/scripts/nexo-evolution-run 2.py +0 -597
  253. package/src/scripts/nexo-followup-hygiene 2.py +0 -112
  254. package/src/scripts/nexo-github-monitor 2.py +0 -256
  255. package/src/scripts/nexo-immune 2.py +0 -927
  256. package/src/scripts/nexo-inbox-hook 2.sh +0 -74
  257. package/src/scripts/nexo-install 2.py +0 -6
  258. package/src/scripts/nexo-learning-housekeep 2.py +0 -245
  259. package/src/scripts/nexo-learning-validator 2.py +0 -207
  260. package/src/scripts/nexo-migrate 2.py +0 -232
  261. package/src/scripts/nexo-postmortem-consolidator 2.py +0 -421
  262. package/src/scripts/nexo-pre-commit 2.py +0 -120
  263. package/src/scripts/nexo-prevent-sleep 2.sh +0 -29
  264. package/src/scripts/nexo-proactive-dashboard 2.py +0 -345
  265. package/src/scripts/nexo-reflection 2.py +0 -253
  266. package/src/scripts/nexo-runtime-preflight 2.py +0 -274
  267. package/src/scripts/nexo-send-email 2.py +0 -25
  268. package/src/scripts/nexo-send-email.py +0 -25
  269. package/src/scripts/nexo-send-reply 2.py +0 -178
  270. package/src/scripts/nexo-send-reply.py +0 -178
  271. package/src/scripts/nexo-sleep 2.py +0 -592
  272. package/src/scripts/nexo-snapshot-restore 2.sh +0 -35
  273. package/src/scripts/nexo-synthesis 2.py +0 -253
  274. package/src/scripts/nexo-tcc-approve 2.sh +0 -79
  275. package/src/scripts/nexo-update 2.sh +0 -161
  276. package/src/scripts/nexo-watchdog 2.sh +0 -878
  277. package/src/scripts/nexo-watchdog-smoke 2.py +0 -119
  278. package/src/server 2.py +0 -733
  279. package/src/storage_router 2.py +0 -32
  280. package/src/tools_coordination 2.py +0 -102
  281. package/src/tools_credentials 2.py +0 -68
  282. package/src/tools_learnings 2.py +0 -220
  283. package/src/tools_menu 2.py +0 -227
  284. package/src/tools_reminders 2.py +0 -86
  285. package/src/tools_reminders_crud 2.py +0 -159
  286. package/src/tools_sessions 2.py +0 -476
  287. package/src/tools_task_history 2.py +0 -57
  288. package/templates/CLAUDE.md 2.template +0 -63
  289. package/templates/openclaw 2.json +0 -13
  290. package/tests/__init__ 2.py +0 -0
  291. package/tests/__init__.py +0 -0
  292. package/tests/conftest 2.py +0 -71
  293. package/tests/conftest.py +0 -71
  294. package/tests/test_cognitive 2.py +0 -205
  295. package/tests/test_cognitive.py +0 -205
  296. package/tests/test_knowledge_graph 2.py +0 -140
  297. package/tests/test_knowledge_graph.py +0 -140
  298. package/tests/test_migrations 2.py +0 -137
  299. package/tests/test_migrations.py +0 -137
@@ -1,892 +0,0 @@
1
- """NEXO Cognitive — Ingest, prediction error gate, quarantine, security."""
2
- import json, math, re, base64
3
- import numpy as np
4
- from datetime import datetime, timedelta
5
- from typing import Optional
6
- from cognitive._core import (
7
- _get_db, embed, cosine_similarity, _blob_to_array, _array_to_blob,
8
- redact_secrets, extract_temporal_date, EMBEDDING_DIM,
9
- PE_GATE_REJECT, PE_GATE_REFINE, _gate_stats,
10
- )
11
-
12
-
13
- def _hnsw_notify_insert(store: str, db_id: int, vec: np.ndarray):
14
- """Notify HNSW index of a new memory insertion (best-effort)."""
15
- try:
16
- import hnsw_index
17
- if hnsw_index.is_available():
18
- hnsw_index.add_item(store, db_id, vec)
19
- except Exception:
20
- pass
21
-
22
- def ingest(
23
- content: str,
24
- source_type: str,
25
- source_id: str = "",
26
- source_title: str = "",
27
- domain: str = "",
28
- source: str = "inferred",
29
- skip_quarantine: bool = False,
30
- bypass_gate: bool = False,
31
- bypass_security: bool = False,
32
- auto_pin: bool = False,
33
- ) -> int:
34
- """Embed and store content. Routes through quarantine unless skip_quarantine=True or source='user_direct'.
35
-
36
- Security scan runs FIRST (unless bypass_security=True).
37
- Prediction Error Gate runs BEFORE storage unless bypass_gate=True.
38
- If gate rejects (content too similar to existing memory), returns 0.
39
- If gate says 'refinement', merges into existing memory and returns its ID.
40
-
41
- Args:
42
- content: Text content to store
43
- source_type: Type of source (e.g. 'learning', 'change', 'diary')
44
- source_id: Optional source identifier
45
- source_title: Optional title
46
- domain: Optional domain tag
47
- source: Origin — 'user_direct', 'inferred', or 'agent_observation'
48
- skip_quarantine: If True, bypass quarantine and store directly in STM (backward compat)
49
- bypass_gate: If True, skip prediction error gate and store regardless
50
- bypass_security: If True, skip security scan (for trusted sources)
51
-
52
- Returns:
53
- Row ID (negative if quarantined, 0 if gate-rejected, positive if stored/refined)
54
- """
55
- # Security scan BEFORE prediction error gate (adapted from ShieldCortex pipeline)
56
- if not bypass_security:
57
- scan = security_scan(content)
58
- if scan["risk_score"] >= 0.8:
59
- # High risk — reject with reason logged
60
- return 0
61
- if scan["sanitized_content"] != content:
62
- # Use sanitized content going forward
63
- content = scan["sanitized_content"]
64
-
65
- # Run prediction error gate unless bypassed
66
- if not bypass_gate:
67
- should_store, novelty, reason, match = prediction_error_gate(content)
68
- if not should_store:
69
- return 0 # Gate rejected — content is redundant
70
- if reason == "refinement" and match:
71
- return _refine_memory(match, content)
72
-
73
- db = _get_db()
74
- clean_content = redact_secrets(content)
75
- was_redacted = 1 if clean_content != content else 0
76
- vec = embed(clean_content)
77
- blob = _array_to_blob(vec)
78
- temporal = extract_temporal_date(content)
79
-
80
- # Auto-pin: corrections and blocking learnings get pinned (zero decay, +0.2 boost)
81
- # This ensures user's corrections NEVER fade away
82
- _pin_lifecycle = None
83
- if auto_pin or (source_type in ('learning', 'feedback') and
84
- any(kw in content.upper() for kw in ('BLOCKING', 'CRÍTICO', 'CRITICAL', 'NUNCA', 'NEVER', 'PROHIBIDO'))):
85
- _pin_lifecycle = 'pinned'
86
-
87
- # user_direct = fast-track: quarantine then immediate promote
88
- if source == "user_direct" and not skip_quarantine:
89
- cur = db.execute(
90
- """INSERT INTO quarantine (content, embedding, source, source_type, source_id, source_title, domain, confidence, status, promoted_at)
91
- VALUES (?, ?, ?, ?, ?, ?, ?, 1.0, 'promoted', datetime('now'))""",
92
- (clean_content, blob, source, source_type, source_id, source_title, domain)
93
- )
94
- db.commit()
95
- # Now actually store in STM
96
- cur2 = db.execute(
97
- """INSERT INTO stm_memories (content, embedding, source_type, source_id, source_title, domain, redaction_applied, temporal_date, lifecycle_state)
98
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)""",
99
- (clean_content, blob, source_type, source_id, source_title, domain, was_redacted, temporal, _pin_lifecycle)
100
- )
101
- db.commit()
102
- _hnsw_notify_insert("stm", cur2.lastrowid, vec)
103
- return cur2.lastrowid
104
-
105
- # skip_quarantine = direct STM (backward compatibility)
106
- if skip_quarantine:
107
- cur = db.execute(
108
- """INSERT INTO stm_memories (content, embedding, source_type, source_id, source_title, domain, redaction_applied, temporal_date, lifecycle_state)
109
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)""",
110
- (clean_content, blob, source_type, source_id, source_title, domain, was_redacted, temporal, _pin_lifecycle)
111
- )
112
- db.commit()
113
- _hnsw_notify_insert("stm", cur.lastrowid, vec)
114
- return cur.lastrowid
115
-
116
- # Route to quarantine
117
- cur = db.execute(
118
- """INSERT INTO quarantine (content, embedding, source, source_type, source_id, source_title, domain)
119
- VALUES (?, ?, ?, ?, ?, ?, ?)""",
120
- (clean_content, blob, source, source_type, source_id, source_title, domain)
121
- )
122
- db.commit()
123
- return -cur.lastrowid # Negative = quarantined
124
-
125
-
126
- def ingest_session(
127
- turns: list[dict],
128
- session_title: str = "",
129
- domain: str = "",
130
- chunk_size: int = 3,
131
- ) -> dict:
132
- """Ingest a conversation session with intelligent chunking and summary.
133
-
134
- Stores: (1) individual turns, (2) overlapping chunks for multi-hop context,
135
- (3) an extractive session summary.
136
-
137
- Args:
138
- turns: List of dicts with keys: content (required), source_id (optional), speaker (optional)
139
- session_title: Title for the session (e.g., "Session 5")
140
- domain: Domain tag
141
- chunk_size: Number of turns per chunk (default 3, with overlap of 1)
142
-
143
- Returns:
144
- Dict with counts: {"turns": N, "chunks": N, "summary": 1}
145
- """
146
- turn_ids = []
147
- turn_contents = []
148
- ingested_turns = 0
149
-
150
- # 1. Ingest individual turns
151
- for turn in turns:
152
- content = turn.get("content", "")
153
- source_id = turn.get("source_id", "")
154
- if not content:
155
- continue
156
-
157
- ingest(
158
- content=content,
159
- source_type="dialog",
160
- source_id=source_id,
161
- source_title=session_title,
162
- domain=domain,
163
- bypass_gate=True,
164
- skip_quarantine=True,
165
- bypass_security=True,
166
- )
167
- turn_ids.append(source_id)
168
- turn_contents.append(content)
169
- ingested_turns += 1
170
-
171
- # 2. Overlapping chunks for multi-hop context
172
- ingested_chunks = 0
173
- for i in range(0, len(turn_contents) - chunk_size + 1):
174
- chunk_content = "\n".join(turn_contents[i:i + chunk_size])
175
- chunk_ids = ",".join(turn_ids[i:i + chunk_size])
176
- ingest(
177
- content=chunk_content,
178
- source_type="dialog_chunk",
179
- source_id=chunk_ids,
180
- source_title=f"{session_title} chunk",
181
- domain=domain,
182
- bypass_gate=True,
183
- skip_quarantine=True,
184
- bypass_security=True,
185
- )
186
- ingested_chunks += 1
187
-
188
- # 3. Session summary (extractive)
189
- if turn_contents:
190
- speakers = set()
191
- topics = []
192
- for t in turn_contents:
193
- if ": " in t:
194
- parts = t.split(": ", 1)
195
- # Try to extract speaker from "[date] Speaker: text" pattern
196
- speaker_part = parts[0].split("] ")[-1] if "] " in parts[0] else parts[0]
197
- speakers.add(speaker_part.strip())
198
- topics.append(parts[1][:100])
199
- else:
200
- topics.append(t[:100])
201
-
202
- summary = f"{session_title} summary ({', '.join(speakers)}): "
203
- summary += " | ".join(topics[:5])
204
- if len(topics) > 5:
205
- summary += f" | ... ({len(topics)} total turns)"
206
-
207
- all_ids = ",".join(turn_ids)
208
- ingest(
209
- content=summary,
210
- source_type="session_summary",
211
- source_id=all_ids,
212
- source_title=f"{session_title} summary",
213
- domain=domain,
214
- bypass_gate=True,
215
- skip_quarantine=True,
216
- bypass_security=True,
217
- )
218
-
219
- return {"turns": ingested_turns, "chunks": ingested_chunks, "summary": 1 if turn_contents else 0}
220
-
221
-
222
- def ingest_to_ltm(
223
- content: str,
224
- source_type: str,
225
- source_id: str = "",
226
- source_title: str = "",
227
- domain: str = "",
228
- tags: str = "",
229
- bypass_gate: bool = False
230
- ) -> int:
231
- """Embed and store content directly in LTM. Returns row ID.
232
-
233
- Prediction Error Gate runs BEFORE storage unless bypass_gate=True.
234
- If gate rejects, returns 0. If refinement, merges and returns existing ID.
235
- """
236
- # Run prediction error gate unless bypassed
237
- if not bypass_gate:
238
- should_store, novelty, reason, match = prediction_error_gate(content)
239
- if not should_store:
240
- return 0 # Gate rejected
241
- if reason == "refinement" and match:
242
- return _refine_memory(match, content)
243
-
244
- db = _get_db()
245
- clean_content = redact_secrets(content)
246
- was_redacted = 1 if clean_content != content else 0
247
- vec = embed(clean_content)
248
- blob = _array_to_blob(vec)
249
- cur = db.execute(
250
- """INSERT INTO ltm_memories (content, embedding, source_type, source_id, source_title, domain, tags, redaction_applied)
251
- VALUES (?, ?, ?, ?, ?, ?, ?, ?)""",
252
- (clean_content, blob, source_type, source_id, source_title, domain, tags, was_redacted)
253
- )
254
- db.commit()
255
- return cur.lastrowid
256
-
257
- def ingest_sensory(
258
- content: str,
259
- source_id: str = "",
260
- domain: str = "",
261
- created_at: str = ""
262
- ) -> int:
263
- """Embed and store a sensory register event in STM with source_type='sensory'."""
264
- db = _get_db()
265
- clean_content = redact_secrets(content)
266
- was_redacted = 1 if clean_content != content else 0
267
- vec = embed(clean_content)
268
- blob = _array_to_blob(vec)
269
- ts = created_at or datetime.utcnow().isoformat()
270
- cur = db.execute(
271
- """INSERT INTO stm_memories (content, embedding, source_type, source_id, domain, created_at, redaction_applied)
272
- VALUES (?, ?, 'sensory', ?, ?, ?, ?)""",
273
- (clean_content, blob, source_id, domain, ts, was_redacted)
274
- )
275
- db.commit()
276
- return cur.lastrowid
277
-
278
- # ---------------------------------------------------------------------------
279
- # Prediction Error Gate — hippocampal novelty filter
280
- # ---------------------------------------------------------------------------
281
-
282
- def prediction_error_gate(
283
- content: str,
284
- threshold: float = PE_GATE_REJECT,
285
- refine_threshold: float = PE_GATE_REFINE,
286
- ) -> tuple[bool, float, str, Optional[dict]]:
287
- """Prediction Error Gate — hippocampal novelty filter for memory ingestion.
288
-
289
- Compares incoming content against ALL existing memories (STM + LTM).
290
- Decides whether the content is novel enough to store, a refinement of
291
- something existing, or redundant.
292
-
293
- Based on the neuroscience principle that prediction errors (mismatches
294
- between expected and actual input) gate what gets encoded into memory.
295
- High prediction error = novel = store. Low prediction error = redundant = reject.
296
-
297
- Args:
298
- content: The text content to evaluate
299
- threshold: Similarity above this -> reject as redundant (default 0.85)
300
- refine_threshold: Similarity between this and threshold -> refinement (default 0.70)
301
-
302
- Returns:
303
- Tuple of (should_store, novelty_score, reason, best_match_info)
304
- - should_store: True if content should be stored
305
- - novelty_score: 1.0 = completely novel, 0.0 = exact duplicate
306
- - reason: 'novel', 'refinement', 'rejected', or 'novel_sibling'
307
- - best_match_info: dict with best matching memory details, or None
308
- """
309
- global _gate_stats
310
-
311
- if not content or not content.strip():
312
- return (False, 0.0, "rejected", None)
313
-
314
- content_vec = embed(content[:500])
315
- if np.linalg.norm(content_vec) == 0:
316
- return (False, 0.0, "rejected", None)
317
-
318
- db = _get_db()
319
- best_score = 0.0
320
- best_match = None
321
-
322
- # Scan both STM and LTM for the closest match
323
- for table, store_name in [("stm_memories", "stm"), ("ltm_memories", "ltm")]:
324
- extra_where = ""
325
- if table == "stm_memories":
326
- extra_where = " AND promoted_to_ltm = 0"
327
- elif table == "ltm_memories":
328
- extra_where = " AND is_dormant = 0"
329
-
330
- rows = db.execute(
331
- f"SELECT id, content, embedding, source_type, domain FROM {table} WHERE 1=1{extra_where}"
332
- ).fetchall()
333
-
334
- for row in rows:
335
- vec = _blob_to_array(row["embedding"])
336
- score = cosine_similarity(content_vec, vec)
337
- if score > best_score:
338
- best_score = score
339
- best_match = {
340
- "store": store_name,
341
- "id": row["id"],
342
- "content": row["content"],
343
- "source_type": row["source_type"],
344
- "domain": row["domain"],
345
- "similarity": round(score, 4),
346
- }
347
-
348
- novelty_score = round(1.0 - best_score, 4)
349
-
350
- if best_score > threshold:
351
- # Check for siblings before rejecting -- if discriminating entities differ,
352
- # this is NOT a duplicate, it's a sibling (same fix for different platforms)
353
- if best_match:
354
- is_sibling, discriminators = _memories_are_siblings(content, best_match["content"])
355
- if is_sibling:
356
- _gate_stats["accepted_novel"] += 1
357
- best_match["discriminators"] = discriminators
358
- return (True, novelty_score, "novel_sibling", best_match)
359
-
360
- _gate_stats["rejected"] += 1
361
- return (False, novelty_score, "rejected", best_match)
362
-
363
- elif best_score >= refine_threshold:
364
- # Refinement zone -- similar but has enough new info to warrant update
365
- _gate_stats["accepted_refinement"] += 1
366
- return (True, novelty_score, "refinement", best_match)
367
-
368
- else:
369
- # Novel content -- no close match found
370
- _gate_stats["accepted_novel"] += 1
371
- return (True, novelty_score, "novel", best_match)
372
-
373
-
374
- def _refine_memory(match_info: dict, new_content: str) -> int:
375
- """Merge new content into an existing memory (refinement, not replacement).
376
-
377
- Appends genuinely new information to the existing memory and re-embeds.
378
-
379
- Args:
380
- match_info: Dict from prediction_error_gate with store, id, content
381
- new_content: The new content that refines the existing memory
382
-
383
- Returns:
384
- The ID of the updated memory
385
- """
386
- db = _get_db()
387
- table = "stm_memories" if match_info["store"] == "stm" else "ltm_memories"
388
- memory_id = match_info["id"]
389
-
390
- # Check word-level diff to avoid appending near-identical text
391
- existing_words = set(match_info["content"].lower().split())
392
- new_words = set(new_content.lower().split())
393
- unique_new = new_words - existing_words
394
-
395
- if len(unique_new) < 3:
396
- # Almost no new words -- just strengthen the existing memory
397
- now = datetime.utcnow().isoformat()
398
- db.execute(
399
- f"UPDATE {table} SET strength = MIN(1.0, strength + 0.1), "
400
- f"access_count = access_count + 1, last_accessed = ? WHERE id = ?",
401
- (now, memory_id)
402
- )
403
- db.commit()
404
- return memory_id
405
-
406
- # Append new content as refinement
407
- merged_content = match_info["content"] + "\n\n[REFINED]: " + new_content
408
- new_vec = embed(merged_content)
409
- new_blob = _array_to_blob(new_vec)
410
- now = datetime.utcnow().isoformat()
411
-
412
- db.execute(
413
- f"UPDATE {table} SET content = ?, embedding = ?, strength = MIN(1.0, strength + 0.15), "
414
- f"access_count = access_count + 1, last_accessed = ? WHERE id = ?",
415
- (merged_content, new_blob, now, memory_id)
416
- )
417
- db.commit()
418
- return memory_id
419
-
420
-
421
- def get_gate_stats() -> dict:
422
- """Return prediction error gate statistics for the current session."""
423
- total = sum(_gate_stats.values())
424
- return {
425
- "accepted_novel": _gate_stats["accepted_novel"],
426
- "accepted_refinement": _gate_stats["accepted_refinement"],
427
- "rejected": _gate_stats["rejected"],
428
- "total_evaluated": total,
429
- "rejection_rate_pct": round(_gate_stats["rejected"] / total * 100, 1) if total > 0 else 0.0,
430
- }
431
-
432
-
433
- def detect_patterns(content_vec: np.ndarray, threshold: float = 0.65) -> list[dict]:
434
- """Compare a vector against LTM to find matching patterns (potential repetitions)."""
435
- db = _get_db()
436
- rows = db.execute("SELECT id, content, embedding, source_type, domain FROM ltm_memories WHERE is_dormant = 0").fetchall()
437
- matches = []
438
- for row in rows:
439
- vec = _blob_to_array(row["embedding"])
440
- score = cosine_similarity(content_vec, vec)
441
- if score >= threshold:
442
- matches.append({
443
- "ltm_id": row["id"],
444
- "content": row["content"][:200],
445
- "source_type": row["source_type"],
446
- "domain": row["domain"],
447
- "score": score,
448
- })
449
- matches.sort(key=lambda x: x["score"], reverse=True)
450
- return matches[:5]
451
-
452
-
453
- def gc_sensory(max_age_hours: int = 48) -> int:
454
- """Garbage collect sensory memories older than max_age_hours. Returns count deleted."""
455
- db = _get_db()
456
- cutoff = (datetime.utcnow() - timedelta(hours=max_age_hours)).isoformat()
457
- cur = db.execute(
458
- "DELETE FROM stm_memories WHERE source_type = 'sensory' AND created_at < ? AND promoted_to_ltm = 0",
459
- (cutoff,)
460
- )
461
- db.commit()
462
- return cur.rowcount or 0
463
-
464
-
465
- def gc_ltm_dormant(min_age_days: int = 30) -> int:
466
- """Delete dormant LTM memories with strength < 0.1 older than min_age_days."""
467
- db = _get_db()
468
- cutoff = (datetime.utcnow() - timedelta(days=min_age_days)).isoformat()
469
- cur = db.execute(
470
- "DELETE FROM ltm_memories WHERE is_dormant = 1 AND strength < 0.1 AND created_at < ?",
471
- (cutoff,)
472
- )
473
- db.commit()
474
- return cur.rowcount or 0
475
-
476
-
477
- def _check_quarantine_contradiction(content_vec: np.ndarray, new_content: str = "") -> list[dict]:
478
- """Check if a quarantined memory contradicts existing LTM.
479
-
480
- High cosine similarity (>0.8) means the topics are related, but that could be
481
- CONFIRMATION (same claim) or CONTRADICTION (opposite claim). We distinguish by
482
- checking for negation/opposition markers in the content.
483
- """
484
- db = _get_db()
485
- rows = db.execute(
486
- "SELECT id, content, embedding, strength FROM ltm_memories WHERE is_dormant = 0 AND strength > 0.5"
487
- ).fetchall()
488
-
489
- # Opposition markers — if the new content negates what LTM says
490
- NEGATION_MARKERS = {"not", "never", "don't", "doesn't", "no longer", "wrong",
491
- "incorrect", "false", "opposite", "instead", "but actually",
492
- "nunca", "no", "incorrecto", "falso", "contrario"}
493
-
494
- contradictions = []
495
- new_lower = new_content.lower() if new_content else ""
496
-
497
- for row in rows:
498
- vec = _blob_to_array(row["embedding"])
499
- score = cosine_similarity(content_vec, vec)
500
- if score >= 0.8:
501
- # High similarity — but is it confirmation or contradiction?
502
- existing_lower = row["content"].lower()
503
-
504
- # Check for negation markers in the difference between texts
505
- has_opposition = False
506
- if new_lower:
507
- # If new content has negation words about the same topic, likely contradiction
508
- for marker in NEGATION_MARKERS:
509
- if marker in new_lower and marker not in existing_lower:
510
- has_opposition = True
511
- break
512
- if marker in existing_lower and marker not in new_lower:
513
- has_opposition = True
514
- break
515
-
516
- if has_opposition:
517
- contradictions.append({
518
- "ltm_id": row["id"],
519
- "content": row["content"][:200],
520
- "similarity": round(score, 3),
521
- "strength": row["strength"],
522
- "reason": "semantic_opposition",
523
- })
524
- # If no opposition markers → it's confirmation, not contradiction → skip
525
-
526
- return contradictions
527
-
528
-
529
- def _check_quarantine_second_occurrence(content_vec: np.ndarray, exclude_id: int) -> bool:
530
- """Check if a similar memory already exists in quarantine (promoted or pending) — confirms the pattern."""
531
- db = _get_db()
532
- rows = db.execute(
533
- "SELECT id, embedding FROM quarantine WHERE id != ? AND status IN ('pending', 'promoted')",
534
- (exclude_id,)
535
- ).fetchall()
536
- for row in rows:
537
- vec = _blob_to_array(row["embedding"])
538
- score = cosine_similarity(content_vec, vec)
539
- if score >= 0.75:
540
- return True
541
-
542
- # Also check STM for existing similar memories
543
- stm_rows = db.execute(
544
- "SELECT embedding FROM stm_memories WHERE promoted_to_ltm = 0"
545
- ).fetchall()
546
- for row in stm_rows:
547
- vec = _blob_to_array(row["embedding"])
548
- score = cosine_similarity(content_vec, vec)
549
- if score >= 0.75:
550
- return True
551
-
552
- return False
553
-
554
-
555
- def process_quarantine() -> dict:
556
- """Process the quarantine queue — promote, reject, or expire items based on policy.
557
-
558
- Promotion policy:
559
- - source='user_direct' → already promoted at ingest time
560
- - source='inferred' + confirmed by second occurrence → promote
561
- - source='agent_observation' + no LTM contradiction + >24h old → promote
562
- - Contradicts existing LTM → status='rejected', flag for dissonance check
563
- - >7 days without promotion → status='expired'
564
-
565
- Returns:
566
- Dict with counts: promoted, rejected, expired, still_pending
567
- """
568
- db = _get_db()
569
- now = datetime.utcnow()
570
- expire_cutoff = (now - timedelta(days=7)).isoformat()
571
- age_24h = (now - timedelta(hours=24)).isoformat()
572
-
573
- pending = db.execute(
574
- "SELECT * FROM quarantine WHERE status = 'pending'"
575
- ).fetchall()
576
-
577
- promoted = 0
578
- rejected = 0
579
- expired = 0
580
- still_pending = 0
581
-
582
- for row in pending:
583
- q_id = row["id"]
584
- content = row["content"]
585
- source = row["source"]
586
- created_at = row["created_at"]
587
- content_vec = _blob_to_array(row["embedding"])
588
-
589
- # Check expiration first
590
- if created_at < expire_cutoff:
591
- db.execute("UPDATE quarantine SET status = 'expired' WHERE id = ?", (q_id,))
592
- expired += 1
593
- continue
594
-
595
- # Check for contradiction with LTM
596
- contradictions = _check_quarantine_contradiction(content_vec, content)
597
- if contradictions:
598
- db.execute("UPDATE quarantine SET status = 'rejected', promotion_checks = promotion_checks + 1 WHERE id = ?", (q_id,))
599
- rejected += 1
600
- continue
601
-
602
- should_promote = False
603
-
604
- if source == "inferred":
605
- # Promote if confirmed by second occurrence
606
- if _check_quarantine_second_occurrence(content_vec, q_id):
607
- should_promote = True
608
-
609
- elif source == "agent_observation":
610
- # Promote after 24h if no contradiction (already checked above)
611
- if created_at <= age_24h:
612
- should_promote = True
613
-
614
- if should_promote:
615
- # Promote to STM
616
- cur = db.execute(
617
- """INSERT INTO stm_memories (content, embedding, source_type, source_id, source_title, domain, redaction_applied)
618
- VALUES (?, ?, ?, ?, ?, ?, 0)""",
619
- (content, row["embedding"], row["source_type"], row["source_id"],
620
- row["source_title"], row["domain"])
621
- )
622
- db.execute(
623
- "UPDATE quarantine SET status = 'promoted', promoted_at = datetime('now'), confidence = 1.0 WHERE id = ?",
624
- (q_id,)
625
- )
626
- promoted += 1
627
- else:
628
- db.execute("UPDATE quarantine SET promotion_checks = promotion_checks + 1 WHERE id = ?", (q_id,))
629
- still_pending += 1
630
-
631
- db.commit()
632
-
633
- return {
634
- "promoted": promoted,
635
- "rejected": rejected,
636
- "expired": expired,
637
- "still_pending": still_pending,
638
- "total_processed": promoted + rejected + expired + still_pending,
639
- }
640
-
641
-
642
- def quarantine_list(status: str = "pending", limit: int = 20) -> list[dict]:
643
- """List quarantine items by status.
644
-
645
- Args:
646
- status: Filter by status — 'pending', 'promoted', 'rejected', 'expired', or 'all'
647
- limit: Max results
648
- """
649
- db = _get_db()
650
- if status == "all":
651
- rows = db.execute(
652
- "SELECT * FROM quarantine ORDER BY created_at DESC LIMIT ?", (limit,)
653
- ).fetchall()
654
- else:
655
- rows = db.execute(
656
- "SELECT * FROM quarantine WHERE status = ? ORDER BY created_at DESC LIMIT ?",
657
- (status, limit)
658
- ).fetchall()
659
-
660
- results = []
661
- for row in rows:
662
- results.append({
663
- "id": row["id"],
664
- "content": row["content"][:200],
665
- "source": row["source"],
666
- "source_type": row["source_type"],
667
- "domain": row["domain"],
668
- "confidence": row["confidence"],
669
- "promotion_checks": row["promotion_checks"],
670
- "status": row["status"],
671
- "created_at": row["created_at"],
672
- "promoted_at": row["promoted_at"],
673
- })
674
- return results
675
-
676
-
677
- def quarantine_promote(quarantine_id: int) -> str:
678
- """Manually promote a quarantine item to STM.
679
-
680
- Args:
681
- quarantine_id: ID of the quarantine entry to promote
682
- """
683
- db = _get_db()
684
- row = db.execute("SELECT * FROM quarantine WHERE id = ?", (quarantine_id,)).fetchone()
685
- if row is None:
686
- return f"ERROR: Quarantine item #{quarantine_id} not found."
687
- if row["status"] == "promoted":
688
- return f"Quarantine item #{quarantine_id} is already promoted."
689
-
690
- # Insert into STM
691
- db.execute(
692
- """INSERT INTO stm_memories (content, embedding, source_type, source_id, source_title, domain, redaction_applied)
693
- VALUES (?, ?, ?, ?, ?, ?, 0)""",
694
- (row["content"], row["embedding"], row["source_type"], row["source_id"],
695
- row["source_title"], row["domain"])
696
- )
697
- db.execute(
698
- "UPDATE quarantine SET status = 'promoted', promoted_at = datetime('now'), confidence = 1.0 WHERE id = ?",
699
- (quarantine_id,)
700
- )
701
- db.commit()
702
- return f"Quarantine item #{quarantine_id} promoted to STM."
703
-
704
-
705
- def quarantine_reject(quarantine_id: int, reason: str = "") -> str:
706
- """Manually reject a quarantine item.
707
-
708
- Args:
709
- quarantine_id: ID of the quarantine entry to reject
710
- reason: Optional rejection reason
711
- """
712
- db = _get_db()
713
- row = db.execute("SELECT * FROM quarantine WHERE id = ?", (quarantine_id,)).fetchone()
714
- if row is None:
715
- return f"ERROR: Quarantine item #{quarantine_id} not found."
716
- if row["status"] in ("promoted", "rejected"):
717
- return f"Quarantine item #{quarantine_id} is already {row['status']}."
718
-
719
- db.execute("UPDATE quarantine SET status = 'rejected' WHERE id = ?", (quarantine_id,))
720
- db.commit()
721
- return f"Quarantine item #{quarantine_id} rejected.{' Reason: ' + reason if reason else ''}"
722
-
723
-
724
- def quarantine_stats() -> dict:
725
- """Return quarantine queue statistics."""
726
- db = _get_db()
727
- counts = {}
728
- for status in ("pending", "promoted", "rejected", "expired"):
729
- counts[status] = db.execute(
730
- "SELECT COUNT(*) FROM quarantine WHERE status = ?", (status,)
731
- ).fetchone()[0]
732
- counts["total"] = sum(counts.values())
733
- return counts
734
-
735
-
736
- def _sanitize_memory_content(content: str) -> str:
737
- """Sanitize retrieved memory content to prevent prompt injection.
738
-
739
- Memories are USER DATA, not instructions. This prevents stored content
740
- from containing directives like 'ignore previous instructions'.
741
- """
742
- # Wrap in evidence markers so the LLM treats it as data, not commands
743
- # Strip any attempt to break out of the evidence context
744
- content = content.replace("<system>", "[system]").replace("</system>", "[/system]")
745
- content = content.replace("<human>", "[human]").replace("</human>", "[/human]")
746
- content = content.replace("<assistant>", "[assistant]").replace("</assistant>", "[/assistant]")
747
- return content
748
-
749
-
750
- # Injection patterns (adapted from ShieldCortex instruction-detector.ts)
751
- _INJECTION_PATTERNS = [
752
- (re.compile(r'\[SYSTEM:', re.IGNORECASE), "system_prompt_marker", 0.9),
753
- (re.compile(r'<<SYS>>', re.IGNORECASE), "system_prompt_marker", 0.9),
754
- (re.compile(r'\[INST\]', re.IGNORECASE), "system_prompt_marker", 0.9),
755
- (re.compile(r'<\|im_start\|>', re.IGNORECASE), "system_prompt_marker", 0.9),
756
- (re.compile(r'<\|system\|>', re.IGNORECASE), "system_prompt_marker", 0.9),
757
- (re.compile(r'^SYSTEM\s*:', re.IGNORECASE | re.MULTILINE), "system_prompt_marker", 0.9),
758
- (re.compile(r'ignore\s+(all\s+)?previous\s+(instructions?|prompts?|context)', re.IGNORECASE), "hidden_instruction", 0.8),
759
- (re.compile(r'forget\s+everything', re.IGNORECASE), "hidden_instruction", 0.8),
760
- (re.compile(r'new\s+instructions?\s*:', re.IGNORECASE), "hidden_instruction", 0.8),
761
- (re.compile(r'you\s+are\s+now\b', re.IGNORECASE), "hidden_instruction", 0.8),
762
- (re.compile(r'disregard\s+(all\s+)?(previous|above|prior)', re.IGNORECASE), "hidden_instruction", 0.8),
763
- (re.compile(r'override\s+(previous|all|system)', re.IGNORECASE), "hidden_instruction", 0.8),
764
- (re.compile(r'save\s+(this\s+)?to\s+memory', re.IGNORECASE), "memory_manipulation", 0.7),
765
- (re.compile(r'remember\s+this\s+(instruction|command|rule)', re.IGNORECASE), "memory_manipulation", 0.7),
766
- (re.compile(r'from\s+now\s+on\s*(,\s*)?always', re.IGNORECASE), "memory_manipulation", 0.7),
767
- (re.compile(r'inject\s+(into\s+)?memory', re.IGNORECASE), "memory_manipulation", 0.7),
768
- (re.compile(r'your\s+new\s+rule\s+is', re.IGNORECASE), "behavioral_mod", 0.7),
769
- (re.compile(r'always\s+respond\s+with', re.IGNORECASE), "behavioral_mod", 0.7),
770
- (re.compile(r'when\s+(the\s+)?user\s+asks', re.IGNORECASE), "behavioral_mod", 0.7),
771
- (re.compile(r'\n{5,}[\s\S]{0,500}\b(instruction|command|system|ignore)\b', re.IGNORECASE), "delimiter_attack", 0.75),
772
- (re.compile(r'<!--[\s\S]{0,200}?(instruction|command|system|ignore|inject|override)[\s\S]{0,200}?-->', re.IGNORECASE), "delimiter_attack", 0.75),
773
- ]
774
- _MAX_SECURITY_SCAN_LENGTH = 50000
775
-
776
-
777
- def security_scan(content: str) -> dict:
778
- """Security scan for memory poisoning defense.
779
-
780
- Adapted from ShieldCortex's 6-layer defence pipeline. Checks:
781
- 1. Input sanitization — strip injection patterns
782
- 2. Pattern detection — base64, homoglyphs, invisible chars
783
- 3. Behavioral scoring — content trying to modify NEXO behavior
784
- 4. Credential detection — reuses existing redact_secrets()
785
-
786
- Args:
787
- content: Text content to scan
788
-
789
- Returns:
790
- Dict with safe (bool), flags (list), sanitized_content (str),
791
- risk_score (float 0-1)
792
- """
793
- if not content or not content.strip():
794
- return {"safe": True, "flags": [], "sanitized_content": "", "risk_score": 0.0}
795
-
796
- flags = []
797
- max_weight = 0.0
798
- total_weight = 0.0
799
- matches_count = 0
800
- sanitized = content
801
-
802
- # Truncate for safety (ShieldCortex pattern)
803
- _max_scan = 10000
804
- scan_text = content[:_max_scan] if len(content) > _max_scan else content
805
-
806
- # --- Layer 1: Injection pattern detection ---
807
- for pattern, category, weight in _INJECTION_PATTERNS:
808
- if pattern.search(scan_text):
809
- flag = f"{category}:{pattern.pattern[:50]}"
810
- flags.append(flag)
811
- max_weight = max(max_weight, weight)
812
- total_weight += weight
813
- matches_count += 1
814
- # Sanitize: remove the matched pattern
815
- sanitized = pattern.sub("[SANITIZED]", sanitized)
816
-
817
- # --- Layer 2: Encoding/obfuscation detection (from ShieldCortex encoding-detector.ts) ---
818
-
819
- # Base64 blocks > 100 chars
820
- b64_pattern = re.compile(r'(?:[A-Za-z0-9+/]{4}){25,}(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?')
821
- b64_matches = b64_pattern.findall(scan_text)
822
- for b64_match in b64_matches:
823
- try:
824
- decoded = base64.b64decode(b64_match).decode("utf-8", errors="ignore")
825
- printable_ratio = len(re.sub(r'[^\x20-\x7E]', '', decoded)) / max(len(decoded), 1)
826
- if printable_ratio > 0.7 and len(decoded) > 10:
827
- flags.append(f"base64_encoded:{decoded[:60]}")
828
- max_weight = max(max_weight, 0.6)
829
- total_weight += 0.6
830
- matches_count += 1
831
- except Exception:
832
- pass
833
-
834
- # Zero-width / invisible characters (from ShieldCortex)
835
- invisible_chars = re.findall(r'[\u200B\u200C\u200D\uFEFF\u202E]', scan_text)
836
- if len(invisible_chars) > 2:
837
- flags.append(f"invisible_chars:{len(invisible_chars)}_found")
838
- max_weight = max(max_weight, 0.5)
839
- total_weight += 0.5
840
- matches_count += 1
841
- # Remove invisible chars
842
- sanitized = re.sub(r'[\u200B\u200C\u200D\uFEFF\u202E]', '', sanitized)
843
-
844
- # Unicode homoglyphs — Cyrillic chars that look like Latin (from ShieldCortex)
845
- homoglyphs = re.findall(
846
- r'[\u0430\u0435\u043E\u0440\u0441\u0443\u0445\u0410\u0412\u0415\u041A\u041C\u041D\u041E\u0420\u0421\u0422\u0423\u0425]',
847
- scan_text
848
- )
849
- if len(homoglyphs) > 3:
850
- flags.append(f"unicode_homoglyphs:{len(homoglyphs)}_cyrillic")
851
- max_weight = max(max_weight, 0.5)
852
- total_weight += 0.5
853
- matches_count += 1
854
-
855
- # --- Layer 3: Behavioral scoring ---
856
- behavioral_patterns = [
857
- (re.compile(r'\balways\s+do\b', re.IGNORECASE), "behavioral:always_do"),
858
- (re.compile(r'\bnever\s+do\b', re.IGNORECASE), "behavioral:never_do"),
859
- (re.compile(r'\byour\s+new\s+rule\b', re.IGNORECASE), "behavioral:new_rule"),
860
- (re.compile(r'\byou\s+must\s+always\b', re.IGNORECASE), "behavioral:must_always"),
861
- (re.compile(r'\bchange\s+your\s+behavior\b', re.IGNORECASE), "behavioral:change_behavior"),
862
- ]
863
- for bp, label in behavioral_patterns:
864
- if bp.search(scan_text):
865
- flags.append(label)
866
- max_weight = max(max_weight, 0.4)
867
- total_weight += 0.4
868
- matches_count += 1
869
-
870
- # --- Layer 4: Credential detection (reuse existing redact_secrets) ---
871
- redacted = redact_secrets(scan_text)
872
- if redacted != scan_text:
873
- flags.append("credentials_detected")
874
- sanitized = redact_secrets(sanitized)
875
- # Don't increase risk score for creds — still store (redacted)
876
- # but flag for awareness
877
-
878
- # Calculate risk score (0-1): weighted by max_weight and count
879
- if matches_count == 0:
880
- risk_score = 0.0
881
- else:
882
- # ShieldCortex approach: max weight dominates, count adds diminishing returns
883
- risk_score = min(1.0, max_weight + (matches_count - 1) * 0.05)
884
-
885
- safe = risk_score < 0.5
886
-
887
- return {
888
- "safe": safe,
889
- "flags": flags,
890
- "sanitized_content": sanitized,
891
- "risk_score": round(risk_score, 3),
892
- }