nexo-brain 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +61 -0
- package/bin/nexo-brain 2.js +610 -0
- package/package.json +4 -2
- package/scripts/pre-commit-check 2.sh +55 -0
- package/src/cognitive 2.py +1224 -0
- package/src/db 2.py +2283 -0
- package/src/plugin_loader 2.py +136 -0
- package/src/server 2.py +560 -0
- package/src/tools_coordination 2.py +102 -0
- package/src/tools_credentials 2.py +64 -0
- package/src/tools_learnings 2.py +180 -0
- package/src/tools_menu 2.py +208 -0
- package/src/tools_reminders 2.py +80 -0
- package/src/tools_reminders_crud 2.py +157 -0
- package/src/tools_sessions 2.py +169 -0
- package/src/tools_task_history 2.py +57 -0
- package/templates/CLAUDE.md 2.template +89 -0
- package/templates/openclaw.json +13 -0
|
@@ -0,0 +1,1224 @@
|
|
|
1
|
+
"""NEXO Cognitive Engine — Vector memory with Atkinson-Shiffrin model."""
|
|
2
|
+
|
|
3
|
+
import math
|
|
4
|
+
import os
|
|
5
|
+
import sqlite3
|
|
6
|
+
import numpy as np
|
|
7
|
+
from datetime import datetime, timedelta
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Optional
|
|
10
|
+
|
|
11
|
+
NEXO_HOME = Path(os.environ.get("NEXO_HOME", str(Path.home() / ".nexo")))
|
|
12
|
+
COGNITIVE_DB = str(NEXO_HOME / "cognitive.db")
|
|
13
|
+
EMBEDDING_DIM = 384
|
|
14
|
+
LAMBDA_STM = 0.1 # half-life ~7 days
|
|
15
|
+
LAMBDA_LTM = 0.012 # half-life ~60 days
|
|
16
|
+
|
|
17
|
+
# Discriminating entities — if these differ between two high-similarity memories,
|
|
18
|
+
# they are siblings (similar-but-incompatible), NOT duplicates to merge.
|
|
19
|
+
DISCRIMINATING_ENTITIES = {
|
|
20
|
+
# OS / Environment
|
|
21
|
+
"linux", "mac", "macos", "windows", "darwin", "ubuntu", "debian", "alpine",
|
|
22
|
+
# Platforms
|
|
23
|
+
"shopify", "whatsapp", "chrome", "firefox",
|
|
24
|
+
# Languages / Runtimes
|
|
25
|
+
"python", "php", "javascript", "typescript", "node", "deno", "ruby",
|
|
26
|
+
# Versions
|
|
27
|
+
"v1", "v2", "v3", "v4", "v5", "5.6", "7.4", "8.0", "8.1", "8.2",
|
|
28
|
+
# Infrastructure
|
|
29
|
+
"cloudrun", "gcloud", "vps", "local", "production", "staging",
|
|
30
|
+
# DB
|
|
31
|
+
"mysql", "sqlite", "postgresql", "postgres", "redis",
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
# Sentiment detection keywords
|
|
35
|
+
POSITIVE_SIGNALS = {
|
|
36
|
+
"gracias", "genial", "perfecto", "bien", "excelente", "bueno", "me gusta",
|
|
37
|
+
"correcto", "sí", "dale", "hazlo", "adelante", "ok", "vale", "great",
|
|
38
|
+
"nice", "good", "exactly", "buen trabajo", "bien hecho", "fenomenal",
|
|
39
|
+
"thanks", "perfect", "awesome", "excellent", "well done",
|
|
40
|
+
}
|
|
41
|
+
NEGATIVE_SIGNALS = {
|
|
42
|
+
"no", "mal", "otra vez", "ya te dije", "frustr", "error", "fallo",
|
|
43
|
+
"cansad", "siempre", "nunca", "por qué no", "no funciona", "roto",
|
|
44
|
+
"no sirve", "horrible", "desastre", "wrong", "broken", "again",
|
|
45
|
+
"not working", "frustrated", "always", "never",
|
|
46
|
+
}
|
|
47
|
+
URGENCY_SIGNALS = {
|
|
48
|
+
"rápido", "ya", "ahora", "urgente", "asap", "inmediatamente", "corre",
|
|
49
|
+
"urgent", "now", "immediately", "hurry",
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
# Trust score events and their point values
|
|
53
|
+
TRUST_EVENTS = {
|
|
54
|
+
# Positive
|
|
55
|
+
"explicit_thanks": +3,
|
|
56
|
+
"delegation": +2, # User delegates new task without micromanaging
|
|
57
|
+
"paradigm_shift": +2, # User teaches, NEXO learns
|
|
58
|
+
"sibling_detected": +3, # NEXO avoided context error on its own
|
|
59
|
+
"proactive_action": +2, # NEXO did something useful without being asked
|
|
60
|
+
# Negative
|
|
61
|
+
"correction": -3, # User corrects NEXO
|
|
62
|
+
"repeated_error": -7, # Error on something NEXO already had a learning for
|
|
63
|
+
"override": -5, # NEXO's memory was wrong
|
|
64
|
+
"correction_fatigue": -10, # Same memory corrected 3+ times
|
|
65
|
+
"forgot_followup": -4, # Forgot to mark followup or execute it
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
_model = None
|
|
69
|
+
_conn = None
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _get_db() -> sqlite3.Connection:
|
|
73
|
+
"""Get or create SQLite connection with WAL mode."""
|
|
74
|
+
global _conn
|
|
75
|
+
if _conn is None:
|
|
76
|
+
NEXO_HOME.mkdir(parents=True, exist_ok=True)
|
|
77
|
+
_conn = sqlite3.connect(COGNITIVE_DB, check_same_thread=False)
|
|
78
|
+
_conn.execute("PRAGMA journal_mode=WAL")
|
|
79
|
+
_conn.execute("PRAGMA synchronous=NORMAL")
|
|
80
|
+
_conn.row_factory = sqlite3.Row
|
|
81
|
+
_init_tables(_conn)
|
|
82
|
+
return _conn
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _init_tables(conn: sqlite3.Connection):
|
|
86
|
+
"""Create tables if they don't exist."""
|
|
87
|
+
conn.executescript("""
|
|
88
|
+
CREATE TABLE IF NOT EXISTS stm_memories (
|
|
89
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
90
|
+
content TEXT NOT NULL,
|
|
91
|
+
embedding BLOB NOT NULL,
|
|
92
|
+
source_type TEXT NOT NULL,
|
|
93
|
+
source_id TEXT DEFAULT '',
|
|
94
|
+
source_title TEXT DEFAULT '',
|
|
95
|
+
domain TEXT DEFAULT '',
|
|
96
|
+
created_at TEXT DEFAULT (datetime('now')),
|
|
97
|
+
last_accessed TEXT DEFAULT (datetime('now')),
|
|
98
|
+
access_count INTEGER DEFAULT 0,
|
|
99
|
+
strength REAL DEFAULT 1.0,
|
|
100
|
+
promoted_to_ltm INTEGER DEFAULT 0
|
|
101
|
+
);
|
|
102
|
+
|
|
103
|
+
CREATE TABLE IF NOT EXISTS ltm_memories (
|
|
104
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
105
|
+
content TEXT NOT NULL,
|
|
106
|
+
embedding BLOB NOT NULL,
|
|
107
|
+
source_type TEXT NOT NULL,
|
|
108
|
+
source_id TEXT DEFAULT '',
|
|
109
|
+
source_title TEXT DEFAULT '',
|
|
110
|
+
domain TEXT DEFAULT '',
|
|
111
|
+
created_at TEXT DEFAULT (datetime('now')),
|
|
112
|
+
last_accessed TEXT DEFAULT (datetime('now')),
|
|
113
|
+
access_count INTEGER DEFAULT 0,
|
|
114
|
+
strength REAL DEFAULT 1.0,
|
|
115
|
+
is_dormant INTEGER DEFAULT 0,
|
|
116
|
+
original_stm_id INTEGER,
|
|
117
|
+
tags TEXT DEFAULT ''
|
|
118
|
+
);
|
|
119
|
+
|
|
120
|
+
CREATE TABLE IF NOT EXISTS retrieval_log (
|
|
121
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
122
|
+
query_text TEXT NOT NULL,
|
|
123
|
+
results_count INTEGER DEFAULT 0,
|
|
124
|
+
top_score REAL DEFAULT 0.0,
|
|
125
|
+
created_at TEXT DEFAULT (datetime('now'))
|
|
126
|
+
);
|
|
127
|
+
|
|
128
|
+
-- Sibling memories: similar-but-incompatible (discriminating entities differ)
|
|
129
|
+
CREATE TABLE IF NOT EXISTS memory_siblings (
|
|
130
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
131
|
+
memory_a_id INTEGER NOT NULL,
|
|
132
|
+
memory_b_id INTEGER NOT NULL,
|
|
133
|
+
similarity REAL NOT NULL,
|
|
134
|
+
discriminators TEXT NOT NULL, -- JSON: entities that differ between them
|
|
135
|
+
created_at TEXT DEFAULT (datetime('now')),
|
|
136
|
+
UNIQUE(memory_a_id, memory_b_id)
|
|
137
|
+
);
|
|
138
|
+
|
|
139
|
+
-- Trust score: NEXO's alignment index (0-100, starts at 50)
|
|
140
|
+
CREATE TABLE IF NOT EXISTS trust_score (
|
|
141
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
142
|
+
score REAL NOT NULL,
|
|
143
|
+
event TEXT NOT NULL, -- what caused the change
|
|
144
|
+
delta REAL NOT NULL, -- points gained or lost
|
|
145
|
+
context TEXT DEFAULT '', -- details
|
|
146
|
+
created_at TEXT DEFAULT (datetime('now'))
|
|
147
|
+
);
|
|
148
|
+
|
|
149
|
+
-- Sentiment readings: user's detected mood per interaction
|
|
150
|
+
CREATE TABLE IF NOT EXISTS sentiment_log (
|
|
151
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
152
|
+
sentiment TEXT NOT NULL, -- 'positive', 'negative', 'neutral', 'urgent'
|
|
153
|
+
intensity REAL DEFAULT 0.5, -- 0.0 to 1.0
|
|
154
|
+
signals TEXT DEFAULT '', -- keywords detected
|
|
155
|
+
created_at TEXT DEFAULT (datetime('now'))
|
|
156
|
+
);
|
|
157
|
+
|
|
158
|
+
-- Correction tracking: when user overrides a memory's guidance
|
|
159
|
+
CREATE TABLE IF NOT EXISTS memory_corrections (
|
|
160
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
161
|
+
memory_id INTEGER NOT NULL,
|
|
162
|
+
store TEXT NOT NULL, -- 'stm' or 'ltm'
|
|
163
|
+
correction_type TEXT NOT NULL, -- 'override', 'exception', 'paradigm_shift'
|
|
164
|
+
context TEXT DEFAULT '', -- what the user said
|
|
165
|
+
created_at TEXT DEFAULT (datetime('now'))
|
|
166
|
+
);
|
|
167
|
+
""")
|
|
168
|
+
conn.commit()
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def _get_model():
|
|
172
|
+
"""Lazy-load fastembed TextEmbedding model."""
|
|
173
|
+
global _model
|
|
174
|
+
if _model is None:
|
|
175
|
+
from fastembed import TextEmbedding
|
|
176
|
+
_model = TextEmbedding("BAAI/bge-small-en-v1.5")
|
|
177
|
+
return _model
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def embed(text: str) -> np.ndarray:
|
|
181
|
+
"""Embed text into a 384-dim float32 vector. Returns zeros for empty text."""
|
|
182
|
+
if not text or not text.strip():
|
|
183
|
+
return np.zeros(EMBEDDING_DIM, dtype=np.float32)
|
|
184
|
+
model = _get_model()
|
|
185
|
+
embeddings = list(model.embed([text]))
|
|
186
|
+
return np.array(embeddings[0], dtype=np.float32)
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def _array_to_blob(arr: np.ndarray) -> bytes:
|
|
190
|
+
"""Serialize numpy array to bytes."""
|
|
191
|
+
return arr.astype(np.float32).tobytes()
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def _blob_to_array(blob: bytes) -> np.ndarray:
|
|
195
|
+
"""Deserialize bytes to numpy array."""
|
|
196
|
+
return np.frombuffer(blob, dtype=np.float32)
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
def cosine_similarity(a: np.ndarray, b: np.ndarray) -> float:
|
|
200
|
+
"""Cosine similarity with zero-norm guard."""
|
|
201
|
+
norm_a = np.linalg.norm(a)
|
|
202
|
+
norm_b = np.linalg.norm(b)
|
|
203
|
+
if norm_a == 0 or norm_b == 0:
|
|
204
|
+
return 0.0
|
|
205
|
+
return float(np.dot(a, b) / (norm_a * norm_b))
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def search(
|
|
209
|
+
query_text: str,
|
|
210
|
+
top_k: int = 10,
|
|
211
|
+
min_score: float = 0.5,
|
|
212
|
+
stores: str = "both",
|
|
213
|
+
exclude_dormant: bool = True,
|
|
214
|
+
rehearse: bool = True,
|
|
215
|
+
source_type_filter: str = ""
|
|
216
|
+
) -> list[dict]:
|
|
217
|
+
"""Full vector search across STM and/or LTM with rehearsal and dormant reactivation."""
|
|
218
|
+
db = _get_db()
|
|
219
|
+
query_vec = embed(query_text)
|
|
220
|
+
if np.linalg.norm(query_vec) == 0:
|
|
221
|
+
return []
|
|
222
|
+
|
|
223
|
+
results = []
|
|
224
|
+
reactivated_ids = set()
|
|
225
|
+
|
|
226
|
+
# Search STM
|
|
227
|
+
if stores in ("both", "stm"):
|
|
228
|
+
where = "WHERE promoted_to_ltm = 0"
|
|
229
|
+
if source_type_filter:
|
|
230
|
+
where += f" AND source_type = ?"
|
|
231
|
+
rows = db.execute(f"SELECT * FROM stm_memories {where}", (source_type_filter,)).fetchall()
|
|
232
|
+
else:
|
|
233
|
+
rows = db.execute(f"SELECT * FROM stm_memories {where}").fetchall()
|
|
234
|
+
|
|
235
|
+
for row in rows:
|
|
236
|
+
vec = _blob_to_array(row["embedding"])
|
|
237
|
+
score = cosine_similarity(query_vec, vec)
|
|
238
|
+
if score >= min_score:
|
|
239
|
+
results.append({
|
|
240
|
+
"store": "stm",
|
|
241
|
+
"id": row["id"],
|
|
242
|
+
"content": row["content"],
|
|
243
|
+
"source_type": row["source_type"],
|
|
244
|
+
"source_id": row["source_id"],
|
|
245
|
+
"source_title": row["source_title"],
|
|
246
|
+
"domain": row["domain"],
|
|
247
|
+
"created_at": row["created_at"],
|
|
248
|
+
"strength": row["strength"],
|
|
249
|
+
"access_count": row["access_count"],
|
|
250
|
+
"score": score,
|
|
251
|
+
})
|
|
252
|
+
|
|
253
|
+
# Search LTM (active)
|
|
254
|
+
if stores in ("both", "ltm"):
|
|
255
|
+
where = "WHERE is_dormant = 0"
|
|
256
|
+
if source_type_filter:
|
|
257
|
+
where += " AND source_type = ?"
|
|
258
|
+
rows = db.execute(f"SELECT * FROM ltm_memories {where}", (source_type_filter,)).fetchall()
|
|
259
|
+
else:
|
|
260
|
+
rows = db.execute(f"SELECT * FROM ltm_memories {where}").fetchall()
|
|
261
|
+
|
|
262
|
+
for row in rows:
|
|
263
|
+
vec = _blob_to_array(row["embedding"])
|
|
264
|
+
score = cosine_similarity(query_vec, vec)
|
|
265
|
+
if score >= min_score:
|
|
266
|
+
results.append({
|
|
267
|
+
"store": "ltm",
|
|
268
|
+
"id": row["id"],
|
|
269
|
+
"content": row["content"],
|
|
270
|
+
"source_type": row["source_type"],
|
|
271
|
+
"source_id": row["source_id"],
|
|
272
|
+
"source_title": row["source_title"],
|
|
273
|
+
"domain": row["domain"],
|
|
274
|
+
"created_at": row["created_at"],
|
|
275
|
+
"strength": row["strength"],
|
|
276
|
+
"access_count": row["access_count"],
|
|
277
|
+
"score": score,
|
|
278
|
+
"tags": row["tags"],
|
|
279
|
+
})
|
|
280
|
+
|
|
281
|
+
# Check dormant LTM for reactivation
|
|
282
|
+
if stores in ("both", "ltm") and not exclude_dormant:
|
|
283
|
+
dormant_rows = db.execute("SELECT * FROM ltm_memories WHERE is_dormant = 1").fetchall()
|
|
284
|
+
for row in dormant_rows:
|
|
285
|
+
vec = _blob_to_array(row["embedding"])
|
|
286
|
+
score = cosine_similarity(query_vec, vec)
|
|
287
|
+
if score > 0.8:
|
|
288
|
+
# Reactivate
|
|
289
|
+
db.execute(
|
|
290
|
+
"UPDATE ltm_memories SET is_dormant = 0, strength = 0.5, last_accessed = datetime('now') WHERE id = ?",
|
|
291
|
+
(row["id"],)
|
|
292
|
+
)
|
|
293
|
+
reactivated_ids.add(("ltm", row["id"]))
|
|
294
|
+
results.append({
|
|
295
|
+
"store": "ltm",
|
|
296
|
+
"id": row["id"],
|
|
297
|
+
"content": row["content"],
|
|
298
|
+
"source_type": row["source_type"],
|
|
299
|
+
"source_id": row["source_id"],
|
|
300
|
+
"source_title": row["source_title"],
|
|
301
|
+
"domain": row["domain"],
|
|
302
|
+
"created_at": row["created_at"],
|
|
303
|
+
"strength": 0.5,
|
|
304
|
+
"access_count": row["access_count"],
|
|
305
|
+
"score": score,
|
|
306
|
+
"tags": row["tags"],
|
|
307
|
+
"reactivated": True,
|
|
308
|
+
})
|
|
309
|
+
if reactivated_ids:
|
|
310
|
+
db.commit()
|
|
311
|
+
|
|
312
|
+
# Sort by score descending and take top_k
|
|
313
|
+
results.sort(key=lambda x: x["score"], reverse=True)
|
|
314
|
+
results = results[:top_k]
|
|
315
|
+
|
|
316
|
+
# Rehearsal: update strength and access_count for returned results
|
|
317
|
+
if rehearse and results:
|
|
318
|
+
now = datetime.utcnow().isoformat()
|
|
319
|
+
for r in results:
|
|
320
|
+
if (r["store"], r["id"]) in reactivated_ids:
|
|
321
|
+
continue
|
|
322
|
+
table = "stm_memories" if r["store"] == "stm" else "ltm_memories"
|
|
323
|
+
db.execute(
|
|
324
|
+
f"UPDATE {table} SET strength = 1.0, access_count = access_count + 1, last_accessed = ? WHERE id = ?",
|
|
325
|
+
(now, r["id"])
|
|
326
|
+
)
|
|
327
|
+
db.commit()
|
|
328
|
+
|
|
329
|
+
# Log retrieval
|
|
330
|
+
top_score = results[0]["score"] if results else 0.0
|
|
331
|
+
db.execute(
|
|
332
|
+
"INSERT INTO retrieval_log (query_text, results_count, top_score) VALUES (?, ?, ?)",
|
|
333
|
+
(query_text[:500], len(results), top_score)
|
|
334
|
+
)
|
|
335
|
+
db.commit()
|
|
336
|
+
|
|
337
|
+
return results
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
def ingest(
|
|
341
|
+
content: str,
|
|
342
|
+
source_type: str,
|
|
343
|
+
source_id: str = "",
|
|
344
|
+
source_title: str = "",
|
|
345
|
+
domain: str = ""
|
|
346
|
+
) -> int:
|
|
347
|
+
"""Embed and store content in STM. Returns row ID."""
|
|
348
|
+
db = _get_db()
|
|
349
|
+
vec = embed(content)
|
|
350
|
+
blob = _array_to_blob(vec)
|
|
351
|
+
cur = db.execute(
|
|
352
|
+
"""INSERT INTO stm_memories (content, embedding, source_type, source_id, source_title, domain)
|
|
353
|
+
VALUES (?, ?, ?, ?, ?, ?)""",
|
|
354
|
+
(content, blob, source_type, source_id, source_title, domain)
|
|
355
|
+
)
|
|
356
|
+
db.commit()
|
|
357
|
+
return cur.lastrowid
|
|
358
|
+
|
|
359
|
+
|
|
360
|
+
def ingest_to_ltm(
|
|
361
|
+
content: str,
|
|
362
|
+
source_type: str,
|
|
363
|
+
source_id: str = "",
|
|
364
|
+
source_title: str = "",
|
|
365
|
+
domain: str = "",
|
|
366
|
+
tags: str = ""
|
|
367
|
+
) -> int:
|
|
368
|
+
"""Embed and store content directly in LTM. Returns row ID."""
|
|
369
|
+
db = _get_db()
|
|
370
|
+
vec = embed(content)
|
|
371
|
+
blob = _array_to_blob(vec)
|
|
372
|
+
cur = db.execute(
|
|
373
|
+
"""INSERT INTO ltm_memories (content, embedding, source_type, source_id, source_title, domain, tags)
|
|
374
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)""",
|
|
375
|
+
(content, blob, source_type, source_id, source_title, domain, tags)
|
|
376
|
+
)
|
|
377
|
+
db.commit()
|
|
378
|
+
return cur.lastrowid
|
|
379
|
+
|
|
380
|
+
|
|
381
|
+
def apply_decay():
|
|
382
|
+
"""Apply Ebbinghaus decay to all memories. Mark LTM as dormant if strength < 0.1."""
|
|
383
|
+
db = _get_db()
|
|
384
|
+
now = datetime.utcnow()
|
|
385
|
+
|
|
386
|
+
# STM decay
|
|
387
|
+
rows = db.execute("SELECT id, last_accessed, strength FROM stm_memories WHERE promoted_to_ltm = 0").fetchall()
|
|
388
|
+
for row in rows:
|
|
389
|
+
last = datetime.fromisoformat(row["last_accessed"])
|
|
390
|
+
hours = (now - last).total_seconds() / 3600.0
|
|
391
|
+
new_strength = row["strength"] * math.exp(-LAMBDA_STM * hours)
|
|
392
|
+
db.execute("UPDATE stm_memories SET strength = ? WHERE id = ?", (new_strength, row["id"]))
|
|
393
|
+
|
|
394
|
+
# LTM decay
|
|
395
|
+
rows = db.execute("SELECT id, last_accessed, strength FROM ltm_memories WHERE is_dormant = 0").fetchall()
|
|
396
|
+
for row in rows:
|
|
397
|
+
last = datetime.fromisoformat(row["last_accessed"])
|
|
398
|
+
hours = (now - last).total_seconds() / 3600.0
|
|
399
|
+
new_strength = row["strength"] * math.exp(-LAMBDA_LTM * hours)
|
|
400
|
+
if new_strength < 0.1:
|
|
401
|
+
db.execute("UPDATE ltm_memories SET strength = ?, is_dormant = 1 WHERE id = ?", (new_strength, row["id"]))
|
|
402
|
+
else:
|
|
403
|
+
db.execute("UPDATE ltm_memories SET strength = ? WHERE id = ?", (new_strength, row["id"]))
|
|
404
|
+
|
|
405
|
+
db.commit()
|
|
406
|
+
|
|
407
|
+
|
|
408
|
+
def promote_stm_to_ltm():
|
|
409
|
+
"""Promote STM memories with access_count >= 3 to LTM. Mark as promoted."""
|
|
410
|
+
db = _get_db()
|
|
411
|
+
rows = db.execute(
|
|
412
|
+
"SELECT * FROM stm_memories WHERE access_count >= 3 AND promoted_to_ltm = 0"
|
|
413
|
+
).fetchall()
|
|
414
|
+
|
|
415
|
+
promoted = 0
|
|
416
|
+
for row in rows:
|
|
417
|
+
db.execute(
|
|
418
|
+
"""INSERT INTO ltm_memories (content, embedding, source_type, source_id, source_title, domain, original_stm_id)
|
|
419
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)""",
|
|
420
|
+
(row["content"], row["embedding"], row["source_type"], row["source_id"],
|
|
421
|
+
row["source_title"], row["domain"], row["id"])
|
|
422
|
+
)
|
|
423
|
+
db.execute("UPDATE stm_memories SET promoted_to_ltm = 1 WHERE id = ?", (row["id"],))
|
|
424
|
+
promoted += 1
|
|
425
|
+
|
|
426
|
+
db.commit()
|
|
427
|
+
return promoted
|
|
428
|
+
|
|
429
|
+
|
|
430
|
+
def gc_stm():
|
|
431
|
+
"""Garbage collect STM: delete weak old memories and anything > 30 days."""
|
|
432
|
+
db = _get_db()
|
|
433
|
+
now = datetime.utcnow()
|
|
434
|
+
cutoff_7d = (now - timedelta(days=7)).isoformat()
|
|
435
|
+
cutoff_30d = (now - timedelta(days=30)).isoformat()
|
|
436
|
+
|
|
437
|
+
# Delete STM with strength < 0.3 and older than 7 days
|
|
438
|
+
cur1 = db.execute(
|
|
439
|
+
"DELETE FROM stm_memories WHERE strength < 0.3 AND created_at < ? AND promoted_to_ltm = 0",
|
|
440
|
+
(cutoff_7d,)
|
|
441
|
+
)
|
|
442
|
+
# Delete any STM older than 30 days
|
|
443
|
+
cur2 = db.execute(
|
|
444
|
+
"DELETE FROM stm_memories WHERE created_at < ? AND promoted_to_ltm = 0",
|
|
445
|
+
(cutoff_30d,)
|
|
446
|
+
)
|
|
447
|
+
db.commit()
|
|
448
|
+
return (cur1.rowcount or 0) + (cur2.rowcount or 0)
|
|
449
|
+
|
|
450
|
+
|
|
451
|
+
def ingest_sensory(
|
|
452
|
+
content: str,
|
|
453
|
+
source_id: str = "",
|
|
454
|
+
domain: str = "",
|
|
455
|
+
created_at: str = ""
|
|
456
|
+
) -> int:
|
|
457
|
+
"""Embed and store a sensory register event in STM with source_type='sensory'."""
|
|
458
|
+
db = _get_db()
|
|
459
|
+
vec = embed(content)
|
|
460
|
+
blob = _array_to_blob(vec)
|
|
461
|
+
ts = created_at or datetime.utcnow().isoformat()
|
|
462
|
+
cur = db.execute(
|
|
463
|
+
"""INSERT INTO stm_memories (content, embedding, source_type, source_id, domain, created_at)
|
|
464
|
+
VALUES (?, ?, 'sensory', ?, ?, ?)""",
|
|
465
|
+
(content, blob, source_id, domain, ts)
|
|
466
|
+
)
|
|
467
|
+
db.commit()
|
|
468
|
+
return cur.lastrowid
|
|
469
|
+
|
|
470
|
+
|
|
471
|
+
def detect_patterns(content_vec: np.ndarray, threshold: float = 0.65) -> list[dict]:
|
|
472
|
+
"""Compare a vector against LTM to find matching patterns (potential repetitions)."""
|
|
473
|
+
db = _get_db()
|
|
474
|
+
rows = db.execute("SELECT id, content, embedding, source_type, domain FROM ltm_memories WHERE is_dormant = 0").fetchall()
|
|
475
|
+
matches = []
|
|
476
|
+
for row in rows:
|
|
477
|
+
vec = _blob_to_array(row["embedding"])
|
|
478
|
+
score = cosine_similarity(content_vec, vec)
|
|
479
|
+
if score >= threshold:
|
|
480
|
+
matches.append({
|
|
481
|
+
"ltm_id": row["id"],
|
|
482
|
+
"content": row["content"][:200],
|
|
483
|
+
"source_type": row["source_type"],
|
|
484
|
+
"domain": row["domain"],
|
|
485
|
+
"score": score,
|
|
486
|
+
})
|
|
487
|
+
matches.sort(key=lambda x: x["score"], reverse=True)
|
|
488
|
+
return matches[:5]
|
|
489
|
+
|
|
490
|
+
|
|
491
|
+
def gc_sensory(max_age_hours: int = 48) -> int:
|
|
492
|
+
"""Garbage collect sensory memories older than max_age_hours. Returns count deleted."""
|
|
493
|
+
db = _get_db()
|
|
494
|
+
cutoff = (datetime.utcnow() - timedelta(hours=max_age_hours)).isoformat()
|
|
495
|
+
cur = db.execute(
|
|
496
|
+
"DELETE FROM stm_memories WHERE source_type = 'sensory' AND created_at < ? AND promoted_to_ltm = 0",
|
|
497
|
+
(cutoff,)
|
|
498
|
+
)
|
|
499
|
+
db.commit()
|
|
500
|
+
return cur.rowcount or 0
|
|
501
|
+
|
|
502
|
+
|
|
503
|
+
def gc_ltm_dormant(min_age_days: int = 30) -> int:
|
|
504
|
+
"""Delete dormant LTM memories with strength < 0.1 older than min_age_days."""
|
|
505
|
+
db = _get_db()
|
|
506
|
+
cutoff = (datetime.utcnow() - timedelta(days=min_age_days)).isoformat()
|
|
507
|
+
cur = db.execute(
|
|
508
|
+
"DELETE FROM ltm_memories WHERE is_dormant = 1 AND strength < 0.1 AND created_at < ?",
|
|
509
|
+
(cutoff,)
|
|
510
|
+
)
|
|
511
|
+
db.commit()
|
|
512
|
+
return cur.rowcount or 0
|
|
513
|
+
|
|
514
|
+
|
|
515
|
+
def format_results(results: list[dict]) -> str:
|
|
516
|
+
"""Format search results with enriched context."""
|
|
517
|
+
if not results:
|
|
518
|
+
return "No results found."
|
|
519
|
+
|
|
520
|
+
lines = []
|
|
521
|
+
for r in results:
|
|
522
|
+
score = r["score"]
|
|
523
|
+
stype = r["source_type"].upper()
|
|
524
|
+
domain = r.get("domain", "")
|
|
525
|
+
title = r.get("source_title", "")
|
|
526
|
+
content = r["content"]
|
|
527
|
+
|
|
528
|
+
# Header
|
|
529
|
+
domain_str = f" ({domain})" if domain else ""
|
|
530
|
+
title_str = f': "{title}"' if title else ""
|
|
531
|
+
header = f"[{score:.2f}] {stype}{domain_str}{title_str}"
|
|
532
|
+
|
|
533
|
+
# Content preview (300 chars)
|
|
534
|
+
preview = content[:300]
|
|
535
|
+
if len(content) > 300:
|
|
536
|
+
preview += "..."
|
|
537
|
+
|
|
538
|
+
# Proto-procedural: detect sequential markers in change logs
|
|
539
|
+
if r["source_type"] == "change" and any(m in content for m in ["1.", "2.", "3.", "step ", "Step ", "then ", "first ", "First "]):
|
|
540
|
+
header += " [PROCEDURE]"
|
|
541
|
+
|
|
542
|
+
store_tag = r["store"].upper()
|
|
543
|
+
reactivated = " [REACTIVATED]" if r.get("reactivated") else ""
|
|
544
|
+
lines.append(f"{header} [{store_tag}]{reactivated}\n {preview}")
|
|
545
|
+
|
|
546
|
+
# Sibling mention: if this LTM memory has siblings, note them
|
|
547
|
+
if r["store"] == "ltm":
|
|
548
|
+
try:
|
|
549
|
+
siblings = get_siblings(r["id"])
|
|
550
|
+
if siblings:
|
|
551
|
+
for sib in siblings[:2]:
|
|
552
|
+
disc_str = ", ".join(sib["discriminators"].split(",")[:3])
|
|
553
|
+
lines.append(f" ↳ SIBLING #{sib['sibling_id']} ({sib['domain']}): differs in [{disc_str}] — {sib['content'][:80]}...")
|
|
554
|
+
except Exception:
|
|
555
|
+
pass
|
|
556
|
+
|
|
557
|
+
return "\n\n".join(lines)
|
|
558
|
+
|
|
559
|
+
|
|
560
|
+
def get_metrics(days: int = 7) -> dict:
|
|
561
|
+
"""Calculate spec section 9 metrics over the last N days.
|
|
562
|
+
|
|
563
|
+
Returns:
|
|
564
|
+
retrieval_relevance: % of retrievals with top_score >= 0.6
|
|
565
|
+
repeat_error_rate: % of new learnings that duplicate existing LTM (cosine > 0.8)
|
|
566
|
+
avg_top_score: average best match score across all retrievals
|
|
567
|
+
total_retrievals: number of retrievals in period
|
|
568
|
+
retrievals_per_day: average retrievals per day
|
|
569
|
+
score_distribution: histogram buckets [<0.5, 0.5-0.6, 0.6-0.7, 0.7-0.8, >0.8]
|
|
570
|
+
"""
|
|
571
|
+
db = _get_db()
|
|
572
|
+
cutoff = (datetime.utcnow() - timedelta(days=days)).isoformat()
|
|
573
|
+
|
|
574
|
+
rows = db.execute(
|
|
575
|
+
"SELECT top_score FROM retrieval_log WHERE created_at >= ?", (cutoff,)
|
|
576
|
+
).fetchall()
|
|
577
|
+
|
|
578
|
+
total = len(rows)
|
|
579
|
+
if total == 0:
|
|
580
|
+
return {
|
|
581
|
+
"period_days": days,
|
|
582
|
+
"total_retrievals": 0,
|
|
583
|
+
"retrieval_relevance_pct": 0.0,
|
|
584
|
+
"avg_top_score": 0.0,
|
|
585
|
+
"retrievals_per_day": 0.0,
|
|
586
|
+
"score_distribution": {"below_50": 0, "50_60": 0, "60_70": 0, "70_80": 0, "above_80": 0},
|
|
587
|
+
"needs_multilingual": False,
|
|
588
|
+
}
|
|
589
|
+
|
|
590
|
+
scores = [r[0] for r in rows]
|
|
591
|
+
relevant = sum(1 for s in scores if s >= 0.6)
|
|
592
|
+
relevance_pct = round(relevant / total * 100, 1)
|
|
593
|
+
avg_score = round(sum(scores) / total, 3)
|
|
594
|
+
|
|
595
|
+
dist = {"below_50": 0, "50_60": 0, "60_70": 0, "70_80": 0, "above_80": 0}
|
|
596
|
+
for s in scores:
|
|
597
|
+
if s < 0.5:
|
|
598
|
+
dist["below_50"] += 1
|
|
599
|
+
elif s < 0.6:
|
|
600
|
+
dist["50_60"] += 1
|
|
601
|
+
elif s < 0.7:
|
|
602
|
+
dist["60_70"] += 1
|
|
603
|
+
elif s < 0.8:
|
|
604
|
+
dist["70_80"] += 1
|
|
605
|
+
else:
|
|
606
|
+
dist["above_80"] += 1
|
|
607
|
+
|
|
608
|
+
# Check if multilingual model is needed (spec 13.3)
|
|
609
|
+
needs_multilingual = relevance_pct < 70.0 and total >= 10
|
|
610
|
+
|
|
611
|
+
return {
|
|
612
|
+
"period_days": days,
|
|
613
|
+
"total_retrievals": total,
|
|
614
|
+
"retrieval_relevance_pct": relevance_pct,
|
|
615
|
+
"avg_top_score": avg_score,
|
|
616
|
+
"retrievals_per_day": round(total / days, 1),
|
|
617
|
+
"score_distribution": dist,
|
|
618
|
+
"needs_multilingual": needs_multilingual,
|
|
619
|
+
}
|
|
620
|
+
|
|
621
|
+
|
|
622
|
+
def check_repeat_errors() -> dict:
|
|
623
|
+
"""Compare recent learnings in STM against LTM to find duplicates (spec section 9).
|
|
624
|
+
|
|
625
|
+
Returns count of new learnings that are semantically duplicate (cosine > 0.8).
|
|
626
|
+
"""
|
|
627
|
+
db = _get_db()
|
|
628
|
+
cutoff_7d = (datetime.utcnow() - timedelta(days=7)).isoformat()
|
|
629
|
+
|
|
630
|
+
# Recent learning STM entries
|
|
631
|
+
new_learnings = db.execute(
|
|
632
|
+
"SELECT id, content, embedding FROM stm_memories WHERE source_type = 'learning' AND created_at >= ? AND promoted_to_ltm = 0",
|
|
633
|
+
(cutoff_7d,)
|
|
634
|
+
).fetchall()
|
|
635
|
+
|
|
636
|
+
# All LTM learnings
|
|
637
|
+
ltm_learnings = db.execute(
|
|
638
|
+
"SELECT id, content, embedding FROM ltm_memories WHERE source_type = 'learning' AND is_dormant = 0"
|
|
639
|
+
).fetchall()
|
|
640
|
+
|
|
641
|
+
if not new_learnings or not ltm_learnings:
|
|
642
|
+
return {"new_count": len(new_learnings), "duplicate_count": 0, "repeat_rate_pct": 0.0, "duplicates": []}
|
|
643
|
+
|
|
644
|
+
duplicates = []
|
|
645
|
+
for new in new_learnings:
|
|
646
|
+
new_vec = _blob_to_array(new["embedding"])
|
|
647
|
+
for ltm in ltm_learnings:
|
|
648
|
+
ltm_vec = _blob_to_array(ltm["embedding"])
|
|
649
|
+
score = cosine_similarity(new_vec, ltm_vec)
|
|
650
|
+
if score > 0.8:
|
|
651
|
+
duplicates.append({
|
|
652
|
+
"new_stm_id": new["id"],
|
|
653
|
+
"new_content": new["content"][:100],
|
|
654
|
+
"ltm_id": ltm["id"],
|
|
655
|
+
"ltm_content": ltm["content"][:100],
|
|
656
|
+
"score": round(score, 3),
|
|
657
|
+
})
|
|
658
|
+
break # One match is enough
|
|
659
|
+
|
|
660
|
+
repeat_rate = round(len(duplicates) / len(new_learnings) * 100, 1) if new_learnings else 0.0
|
|
661
|
+
|
|
662
|
+
return {
|
|
663
|
+
"new_count": len(new_learnings),
|
|
664
|
+
"duplicate_count": len(duplicates),
|
|
665
|
+
"repeat_rate_pct": repeat_rate,
|
|
666
|
+
"duplicates": duplicates[:10],
|
|
667
|
+
}
|
|
668
|
+
|
|
669
|
+
|
|
670
|
+
def rehearse_by_content(content_keywords: str, source_type: str = ""):
|
|
671
|
+
"""Passive rehearsal: find and strengthen cognitive memories that match content from classic tools.
|
|
672
|
+
|
|
673
|
+
Called when nexo_recall or nexo_learning_search return results. Strengthens matching
|
|
674
|
+
memories without returning them (side effect only). This closes the rehearsal loop
|
|
675
|
+
so memories accessed via keyword tools also get reinforced in the vector store.
|
|
676
|
+
|
|
677
|
+
Args:
|
|
678
|
+
content_keywords: Text to match against (e.g., learning title + content)
|
|
679
|
+
source_type: Optional filter by source_type
|
|
680
|
+
"""
|
|
681
|
+
if not content_keywords or len(content_keywords.strip()) < 10:
|
|
682
|
+
return
|
|
683
|
+
|
|
684
|
+
try:
|
|
685
|
+
db = _get_db()
|
|
686
|
+
query_vec = embed(content_keywords[:500]) # cap to avoid slow embedding
|
|
687
|
+
if np.linalg.norm(query_vec) == 0:
|
|
688
|
+
return
|
|
689
|
+
|
|
690
|
+
now = datetime.utcnow().isoformat()
|
|
691
|
+
|
|
692
|
+
# Search both stores for matches >= 0.7
|
|
693
|
+
for table in ("stm_memories", "ltm_memories"):
|
|
694
|
+
extra_where = ""
|
|
695
|
+
if table == "stm_memories":
|
|
696
|
+
extra_where = " AND promoted_to_ltm = 0"
|
|
697
|
+
if table == "ltm_memories":
|
|
698
|
+
extra_where = " AND is_dormant = 0"
|
|
699
|
+
|
|
700
|
+
rows = db.execute(f"SELECT id, embedding FROM {table} WHERE 1=1{extra_where}").fetchall()
|
|
701
|
+
for row in rows:
|
|
702
|
+
vec = _blob_to_array(row["embedding"])
|
|
703
|
+
score = cosine_similarity(query_vec, vec)
|
|
704
|
+
if score >= 0.7:
|
|
705
|
+
db.execute(
|
|
706
|
+
f"UPDATE {table} SET strength = 1.0, access_count = access_count + 1, last_accessed = ? WHERE id = ?",
|
|
707
|
+
(now, row["id"])
|
|
708
|
+
)
|
|
709
|
+
|
|
710
|
+
db.commit()
|
|
711
|
+
except Exception:
|
|
712
|
+
pass # Rehearsal is best-effort, never block the main tool
|
|
713
|
+
|
|
714
|
+
|
|
715
|
+
def _extract_discriminators(text: str) -> set:
|
|
716
|
+
"""Extract discriminating entities from text (OS, platform, language, infra)."""
|
|
717
|
+
words = set(text.lower().split())
|
|
718
|
+
# Also check for multi-word patterns
|
|
719
|
+
text_lower = text.lower()
|
|
720
|
+
found = set()
|
|
721
|
+
for entity in DISCRIMINATING_ENTITIES:
|
|
722
|
+
if entity in words or entity in text_lower:
|
|
723
|
+
found.add(entity)
|
|
724
|
+
return found
|
|
725
|
+
|
|
726
|
+
|
|
727
|
+
def _memories_are_siblings(content_a: str, content_b: str) -> tuple[bool, list[str]]:
|
|
728
|
+
"""Check if two memories are siblings (similar-but-incompatible).
|
|
729
|
+
|
|
730
|
+
Returns (is_sibling, list_of_discriminating_entities_that_differ).
|
|
731
|
+
"""
|
|
732
|
+
disc_a = _extract_discriminators(content_a)
|
|
733
|
+
disc_b = _extract_discriminators(content_b)
|
|
734
|
+
|
|
735
|
+
# Entities present in one but not the other
|
|
736
|
+
only_a = disc_a - disc_b
|
|
737
|
+
only_b = disc_b - disc_a
|
|
738
|
+
|
|
739
|
+
if only_a or only_b:
|
|
740
|
+
# There are discriminating entities that differ — these are siblings
|
|
741
|
+
diff = sorted(only_a | only_b)
|
|
742
|
+
return True, diff
|
|
743
|
+
|
|
744
|
+
return False, []
|
|
745
|
+
|
|
746
|
+
|
|
747
|
+
def consolidate_semantic(threshold: float = 0.9, dry_run: bool = False) -> dict:
|
|
748
|
+
"""Merge LTM memories with cosine similarity > threshold, with discriminative fusion.
|
|
749
|
+
|
|
750
|
+
Before merging, checks for discriminating entities (OS, platform, language, etc.).
|
|
751
|
+
If two memories are >90% similar but differ in critical entities, they become
|
|
752
|
+
"siblings" (linked but NOT merged) instead of being consolidated.
|
|
753
|
+
|
|
754
|
+
Args:
|
|
755
|
+
threshold: Cosine similarity threshold for considering duplicates (default 0.9)
|
|
756
|
+
dry_run: If True, return pairs without merging
|
|
757
|
+
|
|
758
|
+
Returns:
|
|
759
|
+
Dict with 'merged' (list of merge actions) and 'siblings' (list of sibling links created)
|
|
760
|
+
"""
|
|
761
|
+
db = _get_db()
|
|
762
|
+
rows = db.execute(
|
|
763
|
+
"SELECT id, content, embedding, source_type, domain, access_count, strength FROM ltm_memories WHERE is_dormant = 0"
|
|
764
|
+
).fetchall()
|
|
765
|
+
|
|
766
|
+
if len(rows) < 2:
|
|
767
|
+
return {"merged": [], "siblings": []}
|
|
768
|
+
|
|
769
|
+
memories = []
|
|
770
|
+
for row in rows:
|
|
771
|
+
memories.append({
|
|
772
|
+
"id": row["id"],
|
|
773
|
+
"content": row["content"],
|
|
774
|
+
"vec": _blob_to_array(row["embedding"]),
|
|
775
|
+
"source_type": row["source_type"],
|
|
776
|
+
"domain": row["domain"],
|
|
777
|
+
"access_count": row["access_count"],
|
|
778
|
+
"strength": row["strength"],
|
|
779
|
+
})
|
|
780
|
+
|
|
781
|
+
merged_ids = set()
|
|
782
|
+
merge_actions = []
|
|
783
|
+
sibling_actions = []
|
|
784
|
+
|
|
785
|
+
for i in range(len(memories)):
|
|
786
|
+
if memories[i]["id"] in merged_ids:
|
|
787
|
+
continue
|
|
788
|
+
for j in range(i + 1, len(memories)):
|
|
789
|
+
if memories[j]["id"] in merged_ids:
|
|
790
|
+
continue
|
|
791
|
+
|
|
792
|
+
score = cosine_similarity(memories[i]["vec"], memories[j]["vec"])
|
|
793
|
+
if score < threshold:
|
|
794
|
+
continue
|
|
795
|
+
|
|
796
|
+
# Check for discriminating entities before merging
|
|
797
|
+
is_sibling, discriminators = _memories_are_siblings(
|
|
798
|
+
memories[i]["content"], memories[j]["content"]
|
|
799
|
+
)
|
|
800
|
+
|
|
801
|
+
if is_sibling:
|
|
802
|
+
# Don't merge — create sibling relationship
|
|
803
|
+
sibling_action = {
|
|
804
|
+
"memory_a_id": memories[i]["id"],
|
|
805
|
+
"memory_b_id": memories[j]["id"],
|
|
806
|
+
"score": round(score, 4),
|
|
807
|
+
"discriminators": discriminators,
|
|
808
|
+
"content_a": memories[i]["content"][:100],
|
|
809
|
+
"content_b": memories[j]["content"][:100],
|
|
810
|
+
}
|
|
811
|
+
|
|
812
|
+
if not dry_run:
|
|
813
|
+
try:
|
|
814
|
+
db.execute(
|
|
815
|
+
"INSERT OR IGNORE INTO memory_siblings (memory_a_id, memory_b_id, similarity, discriminators) VALUES (?, ?, ?, ?)",
|
|
816
|
+
(memories[i]["id"], memories[j]["id"], score, ",".join(discriminators))
|
|
817
|
+
)
|
|
818
|
+
except Exception:
|
|
819
|
+
pass
|
|
820
|
+
|
|
821
|
+
sibling_actions.append(sibling_action)
|
|
822
|
+
continue
|
|
823
|
+
|
|
824
|
+
# Safe to merge — no discriminating entities differ
|
|
825
|
+
if memories[i]["access_count"] >= memories[j]["access_count"]:
|
|
826
|
+
keep, drop = memories[i], memories[j]
|
|
827
|
+
else:
|
|
828
|
+
keep, drop = memories[j], memories[i]
|
|
829
|
+
|
|
830
|
+
action = {
|
|
831
|
+
"keep_id": keep["id"],
|
|
832
|
+
"drop_id": drop["id"],
|
|
833
|
+
"score": round(score, 4),
|
|
834
|
+
"keep_content": keep["content"][:100],
|
|
835
|
+
"drop_content": drop["content"][:100],
|
|
836
|
+
"keep_access": keep["access_count"],
|
|
837
|
+
"drop_access": drop["access_count"],
|
|
838
|
+
}
|
|
839
|
+
|
|
840
|
+
if not dry_run:
|
|
841
|
+
separator = "\n\n[CONSOLIDATED]: "
|
|
842
|
+
new_content = keep["content"]
|
|
843
|
+
drop_words = set(drop["content"].lower().split())
|
|
844
|
+
keep_words = set(keep["content"].lower().split())
|
|
845
|
+
unique_words = drop_words - keep_words
|
|
846
|
+
if len(unique_words) > 5:
|
|
847
|
+
new_content = keep["content"] + separator + drop["content"]
|
|
848
|
+
|
|
849
|
+
new_vec = embed(new_content)
|
|
850
|
+
new_blob = _array_to_blob(new_vec)
|
|
851
|
+
|
|
852
|
+
db.execute(
|
|
853
|
+
"UPDATE ltm_memories SET content = ?, embedding = ?, access_count = access_count + ? WHERE id = ?",
|
|
854
|
+
(new_content, new_blob, drop["access_count"], keep["id"])
|
|
855
|
+
)
|
|
856
|
+
db.execute("DELETE FROM ltm_memories WHERE id = ?", (drop["id"],))
|
|
857
|
+
merged_ids.add(drop["id"])
|
|
858
|
+
|
|
859
|
+
merge_actions.append(action)
|
|
860
|
+
|
|
861
|
+
if not dry_run and (merge_actions or sibling_actions):
|
|
862
|
+
db.commit()
|
|
863
|
+
|
|
864
|
+
return {"merged": merge_actions, "siblings": sibling_actions}
|
|
865
|
+
|
|
866
|
+
|
|
867
|
+
def get_siblings(memory_id: int) -> list[dict]:
|
|
868
|
+
"""Get sibling memories for a given memory ID (similar-but-incompatible)."""
|
|
869
|
+
db = _get_db()
|
|
870
|
+
rows = db.execute(
|
|
871
|
+
"""SELECT s.*,
|
|
872
|
+
CASE WHEN s.memory_a_id = ? THEN s.memory_b_id ELSE s.memory_a_id END as sibling_id
|
|
873
|
+
FROM memory_siblings s
|
|
874
|
+
WHERE s.memory_a_id = ? OR s.memory_b_id = ?""",
|
|
875
|
+
(memory_id, memory_id, memory_id)
|
|
876
|
+
).fetchall()
|
|
877
|
+
|
|
878
|
+
siblings = []
|
|
879
|
+
for row in rows:
|
|
880
|
+
sib_id = row["sibling_id"]
|
|
881
|
+
sib_mem = db.execute("SELECT content, domain, source_type FROM ltm_memories WHERE id = ?", (sib_id,)).fetchone()
|
|
882
|
+
if sib_mem:
|
|
883
|
+
siblings.append({
|
|
884
|
+
"sibling_id": sib_id,
|
|
885
|
+
"similarity": row["similarity"],
|
|
886
|
+
"discriminators": row["discriminators"],
|
|
887
|
+
"content": sib_mem["content"][:200],
|
|
888
|
+
"domain": sib_mem["domain"],
|
|
889
|
+
})
|
|
890
|
+
return siblings
|
|
891
|
+
|
|
892
|
+
|
|
893
|
+
def detect_dissonance(new_instruction: str, min_score: float = 0.65) -> list[dict]:
|
|
894
|
+
"""Detect cognitive dissonance: find LTM memories that contradict a new instruction.
|
|
895
|
+
|
|
896
|
+
When the user gives a new instruction that conflicts with established LTM memories
|
|
897
|
+
(strength > 0.8), this function surfaces the conflict so NEXO can verbalize it
|
|
898
|
+
rather than silently obeying or silently resisting.
|
|
899
|
+
|
|
900
|
+
Args:
|
|
901
|
+
new_instruction: The new instruction or preference from the user
|
|
902
|
+
min_score: Minimum cosine similarity to consider as potential conflict
|
|
903
|
+
|
|
904
|
+
Returns:
|
|
905
|
+
List of conflicting memories with their strength and content
|
|
906
|
+
"""
|
|
907
|
+
db = _get_db()
|
|
908
|
+
query_vec = embed(new_instruction[:500])
|
|
909
|
+
if np.linalg.norm(query_vec) == 0:
|
|
910
|
+
return []
|
|
911
|
+
|
|
912
|
+
rows = db.execute(
|
|
913
|
+
"SELECT id, content, embedding, source_type, domain, strength, access_count FROM ltm_memories WHERE is_dormant = 0 AND strength > 0.8"
|
|
914
|
+
).fetchall()
|
|
915
|
+
|
|
916
|
+
conflicts = []
|
|
917
|
+
for row in rows:
|
|
918
|
+
vec = _blob_to_array(row["embedding"])
|
|
919
|
+
score = cosine_similarity(query_vec, vec)
|
|
920
|
+
if score >= min_score:
|
|
921
|
+
conflicts.append({
|
|
922
|
+
"memory_id": row["id"],
|
|
923
|
+
"content": row["content"],
|
|
924
|
+
"source_type": row["source_type"],
|
|
925
|
+
"domain": row["domain"],
|
|
926
|
+
"strength": row["strength"],
|
|
927
|
+
"access_count": row["access_count"],
|
|
928
|
+
"similarity": round(score, 3),
|
|
929
|
+
})
|
|
930
|
+
|
|
931
|
+
conflicts.sort(key=lambda x: x["similarity"], reverse=True)
|
|
932
|
+
return conflicts[:5]
|
|
933
|
+
|
|
934
|
+
|
|
935
|
+
def resolve_dissonance(memory_id: int, resolution: str, context: str = "") -> str:
|
|
936
|
+
"""Resolve a cognitive dissonance by applying the user's decision.
|
|
937
|
+
|
|
938
|
+
Args:
|
|
939
|
+
memory_id: The LTM memory that conflicts with the new instruction
|
|
940
|
+
resolution: One of:
|
|
941
|
+
- 'paradigm_shift': User changed their mind permanently. Decay old memory,
|
|
942
|
+
new instruction becomes the standard.
|
|
943
|
+
- 'exception': This is a one-time override. Keep old memory as standard.
|
|
944
|
+
- 'override': Old memory was wrong. Mark as corrupted and decay to dormant.
|
|
945
|
+
|
|
946
|
+
Returns:
|
|
947
|
+
Status message
|
|
948
|
+
"""
|
|
949
|
+
db = _get_db()
|
|
950
|
+
row = db.execute("SELECT * FROM ltm_memories WHERE id = ?", (memory_id,)).fetchone()
|
|
951
|
+
if not row:
|
|
952
|
+
return f"Memory #{memory_id} not found."
|
|
953
|
+
|
|
954
|
+
now = datetime.utcnow().isoformat()
|
|
955
|
+
|
|
956
|
+
if resolution == "paradigm_shift":
|
|
957
|
+
# Instant decay to 0.3, will naturally fade. New instruction takes over.
|
|
958
|
+
db.execute(
|
|
959
|
+
"UPDATE ltm_memories SET strength = 0.3, last_accessed = ? WHERE id = ?",
|
|
960
|
+
(now, memory_id)
|
|
961
|
+
)
|
|
962
|
+
msg = f"Paradigm shift: Memory #{memory_id} decayed to 0.3. New standard will replace it."
|
|
963
|
+
|
|
964
|
+
elif resolution == "exception":
|
|
965
|
+
# Keep memory as-is, just log the exception
|
|
966
|
+
msg = f"Exception noted: Memory #{memory_id} remains standard. One-time override applied."
|
|
967
|
+
|
|
968
|
+
elif resolution == "override":
|
|
969
|
+
# Memory was wrong — mark as corrupted/dormant
|
|
970
|
+
db.execute(
|
|
971
|
+
"UPDATE ltm_memories SET strength = 0.05, is_dormant = 1, last_accessed = ? WHERE id = ?",
|
|
972
|
+
(now, memory_id)
|
|
973
|
+
)
|
|
974
|
+
msg = f"Override: Memory #{memory_id} marked corrupted and dormant."
|
|
975
|
+
|
|
976
|
+
else:
|
|
977
|
+
return f"Unknown resolution: {resolution}. Use 'paradigm_shift', 'exception', or 'override'."
|
|
978
|
+
|
|
979
|
+
# Log the correction
|
|
980
|
+
db.execute(
|
|
981
|
+
"INSERT INTO memory_corrections (memory_id, store, correction_type, context) VALUES (?, 'ltm', ?, ?)",
|
|
982
|
+
(memory_id, resolution, context[:500])
|
|
983
|
+
)
|
|
984
|
+
db.commit()
|
|
985
|
+
|
|
986
|
+
return msg
|
|
987
|
+
|
|
988
|
+
|
|
989
|
+
def check_correction_fatigue() -> list[dict]:
|
|
990
|
+
"""Find memories corrected 3+ times in the last 7 days — mark as 'under review'.
|
|
991
|
+
|
|
992
|
+
These memories are unreliable: the user keeps overriding them, suggesting
|
|
993
|
+
the memory itself may be wrong or outdated.
|
|
994
|
+
|
|
995
|
+
Returns:
|
|
996
|
+
List of memories that should be flagged as unreliable
|
|
997
|
+
"""
|
|
998
|
+
db = _get_db()
|
|
999
|
+
cutoff = (datetime.utcnow() - timedelta(days=7)).isoformat()
|
|
1000
|
+
|
|
1001
|
+
rows = db.execute("""
|
|
1002
|
+
SELECT memory_id, COUNT(*) as correction_count,
|
|
1003
|
+
GROUP_CONCAT(correction_type) as types
|
|
1004
|
+
FROM memory_corrections
|
|
1005
|
+
WHERE created_at >= ? AND store = 'ltm'
|
|
1006
|
+
GROUP BY memory_id
|
|
1007
|
+
HAVING COUNT(*) >= 3
|
|
1008
|
+
""", (cutoff,)).fetchall()
|
|
1009
|
+
|
|
1010
|
+
fatigued = []
|
|
1011
|
+
for row in rows:
|
|
1012
|
+
mem = db.execute(
|
|
1013
|
+
"SELECT content, strength, source_type, domain FROM ltm_memories WHERE id = ?",
|
|
1014
|
+
(row["memory_id"],)
|
|
1015
|
+
).fetchone()
|
|
1016
|
+
if mem:
|
|
1017
|
+
fatigued.append({
|
|
1018
|
+
"memory_id": row["memory_id"],
|
|
1019
|
+
"corrections_7d": row["correction_count"],
|
|
1020
|
+
"types": row["types"],
|
|
1021
|
+
"content": mem["content"][:200],
|
|
1022
|
+
"strength": mem["strength"],
|
|
1023
|
+
"source_type": mem["source_type"],
|
|
1024
|
+
"domain": mem["domain"],
|
|
1025
|
+
})
|
|
1026
|
+
|
|
1027
|
+
# Auto-mark as under review: decay strength to 0.2
|
|
1028
|
+
db.execute(
|
|
1029
|
+
"UPDATE ltm_memories SET strength = MIN(strength, 0.2), tags = CASE WHEN tags LIKE '%under_review%' THEN tags ELSE tags || ',under_review' END WHERE id = ?",
|
|
1030
|
+
(row["memory_id"],)
|
|
1031
|
+
)
|
|
1032
|
+
|
|
1033
|
+
if fatigued:
|
|
1034
|
+
db.commit()
|
|
1035
|
+
|
|
1036
|
+
return fatigued
|
|
1037
|
+
|
|
1038
|
+
|
|
1039
|
+
def detect_sentiment(text: str) -> dict:
|
|
1040
|
+
"""Analyze user text for sentiment signals.
|
|
1041
|
+
|
|
1042
|
+
Returns detected sentiment, intensity, and action guidance for NEXO.
|
|
1043
|
+
Not a model — keyword + heuristic based. Fast and deterministic.
|
|
1044
|
+
"""
|
|
1045
|
+
if not text:
|
|
1046
|
+
return {"sentiment": "neutral", "intensity": 0.5, "signals": [], "guidance": ""}
|
|
1047
|
+
|
|
1048
|
+
text_lower = text.lower()
|
|
1049
|
+
words = set(text_lower.split())
|
|
1050
|
+
|
|
1051
|
+
positive_hits = [s for s in POSITIVE_SIGNALS if s in text_lower]
|
|
1052
|
+
negative_hits = [s for s in NEGATIVE_SIGNALS if s in text_lower]
|
|
1053
|
+
urgency_hits = [s for s in URGENCY_SIGNALS if s in text_lower]
|
|
1054
|
+
|
|
1055
|
+
# Heuristics
|
|
1056
|
+
is_short = len(text) < 30
|
|
1057
|
+
has_caps = any(c.isupper() for c in text[1:]) if len(text) > 1 else False # ignore first char
|
|
1058
|
+
has_exclamation = "!" in text
|
|
1059
|
+
all_caps_words = sum(1 for w in text.split() if w.isupper() and len(w) > 1)
|
|
1060
|
+
|
|
1061
|
+
# Score
|
|
1062
|
+
pos_score = len(positive_hits)
|
|
1063
|
+
neg_score = len(negative_hits)
|
|
1064
|
+
|
|
1065
|
+
# Caps/short boost negative
|
|
1066
|
+
if all_caps_words >= 2:
|
|
1067
|
+
neg_score += 2
|
|
1068
|
+
if is_short and neg_score > 0:
|
|
1069
|
+
neg_score += 1 # Short + negative = terse frustration
|
|
1070
|
+
|
|
1071
|
+
if urgency_hits:
|
|
1072
|
+
neg_score += 1 # Urgency often means something is wrong
|
|
1073
|
+
|
|
1074
|
+
# Determine sentiment
|
|
1075
|
+
if neg_score > pos_score and neg_score >= 1:
|
|
1076
|
+
sentiment = "negative"
|
|
1077
|
+
intensity = min(1.0, 0.3 + neg_score * 0.15)
|
|
1078
|
+
if intensity > 0.7:
|
|
1079
|
+
guidance = "MODE: Ultra-concise. No explanations. Resolve and show result."
|
|
1080
|
+
else:
|
|
1081
|
+
guidance = "MODE: Concise. Less context, more direct action."
|
|
1082
|
+
elif pos_score > neg_score and pos_score >= 1:
|
|
1083
|
+
sentiment = "positive"
|
|
1084
|
+
intensity = min(1.0, 0.3 + pos_score * 0.15)
|
|
1085
|
+
guidance = "MODE: Normal. Good moment to propose backlog ideas or improvements."
|
|
1086
|
+
elif urgency_hits:
|
|
1087
|
+
sentiment = "urgent"
|
|
1088
|
+
intensity = 0.8
|
|
1089
|
+
guidance = "MODE: Immediate action. No preamble."
|
|
1090
|
+
else:
|
|
1091
|
+
sentiment = "neutral"
|
|
1092
|
+
intensity = 0.5
|
|
1093
|
+
guidance = ""
|
|
1094
|
+
|
|
1095
|
+
return {
|
|
1096
|
+
"sentiment": sentiment,
|
|
1097
|
+
"intensity": round(intensity, 2),
|
|
1098
|
+
"signals": positive_hits + negative_hits + urgency_hits,
|
|
1099
|
+
"guidance": guidance,
|
|
1100
|
+
}
|
|
1101
|
+
|
|
1102
|
+
|
|
1103
|
+
def log_sentiment(text: str) -> dict:
|
|
1104
|
+
"""Detect and log user sentiment. Returns the detection result."""
|
|
1105
|
+
result = detect_sentiment(text)
|
|
1106
|
+
if result["sentiment"] != "neutral":
|
|
1107
|
+
db = _get_db()
|
|
1108
|
+
db.execute(
|
|
1109
|
+
"INSERT INTO sentiment_log (sentiment, intensity, signals) VALUES (?, ?, ?)",
|
|
1110
|
+
(result["sentiment"], result["intensity"], ",".join(result["signals"]))
|
|
1111
|
+
)
|
|
1112
|
+
db.commit()
|
|
1113
|
+
return result
|
|
1114
|
+
|
|
1115
|
+
|
|
1116
|
+
def get_trust_score() -> float:
|
|
1117
|
+
"""Get current trust score. Starts at 50, range 0-100."""
|
|
1118
|
+
db = _get_db()
|
|
1119
|
+
row = db.execute("SELECT score FROM trust_score ORDER BY id DESC LIMIT 1").fetchone()
|
|
1120
|
+
if row is None:
|
|
1121
|
+
# Initialize
|
|
1122
|
+
db.execute(
|
|
1123
|
+
"INSERT INTO trust_score (score, event, delta, context) VALUES (50, 'init', 0, 'Initial trust score')"
|
|
1124
|
+
)
|
|
1125
|
+
db.commit()
|
|
1126
|
+
return 50.0
|
|
1127
|
+
return row[0]
|
|
1128
|
+
|
|
1129
|
+
|
|
1130
|
+
def adjust_trust(event: str, context: str = "", custom_delta: float = None) -> dict:
|
|
1131
|
+
"""Adjust trust score based on an event.
|
|
1132
|
+
|
|
1133
|
+
Args:
|
|
1134
|
+
event: Event type from TRUST_EVENTS or custom
|
|
1135
|
+
context: Description of what happened
|
|
1136
|
+
custom_delta: Override the default point value
|
|
1137
|
+
|
|
1138
|
+
Returns:
|
|
1139
|
+
Dict with old_score, delta, new_score, event
|
|
1140
|
+
"""
|
|
1141
|
+
db = _get_db()
|
|
1142
|
+
old_score = get_trust_score()
|
|
1143
|
+
|
|
1144
|
+
delta = custom_delta if custom_delta is not None else TRUST_EVENTS.get(event, 0)
|
|
1145
|
+
if delta == 0 and custom_delta is None:
|
|
1146
|
+
return {"old_score": old_score, "delta": 0, "new_score": old_score, "event": event, "error": "unknown event"}
|
|
1147
|
+
|
|
1148
|
+
new_score = max(0.0, min(100.0, old_score + delta))
|
|
1149
|
+
|
|
1150
|
+
db.execute(
|
|
1151
|
+
"INSERT INTO trust_score (score, event, delta, context) VALUES (?, ?, ?, ?)",
|
|
1152
|
+
(new_score, event, delta, context[:500])
|
|
1153
|
+
)
|
|
1154
|
+
db.commit()
|
|
1155
|
+
|
|
1156
|
+
return {
|
|
1157
|
+
"old_score": round(old_score, 1),
|
|
1158
|
+
"delta": delta,
|
|
1159
|
+
"new_score": round(new_score, 1),
|
|
1160
|
+
"event": event,
|
|
1161
|
+
}
|
|
1162
|
+
|
|
1163
|
+
|
|
1164
|
+
def get_trust_history(days: int = 7) -> dict:
|
|
1165
|
+
"""Get trust score history and sentiment summary."""
|
|
1166
|
+
db = _get_db()
|
|
1167
|
+
cutoff = (datetime.utcnow() - timedelta(days=days)).isoformat()
|
|
1168
|
+
|
|
1169
|
+
# Trust events
|
|
1170
|
+
events = db.execute(
|
|
1171
|
+
"SELECT event, delta, score, context, created_at FROM trust_score WHERE created_at >= ? ORDER BY id",
|
|
1172
|
+
(cutoff,)
|
|
1173
|
+
).fetchall()
|
|
1174
|
+
|
|
1175
|
+
# Sentiment distribution
|
|
1176
|
+
sentiments = db.execute(
|
|
1177
|
+
"SELECT sentiment, COUNT(*) as cnt, AVG(intensity) as avg_int FROM sentiment_log WHERE created_at >= ? GROUP BY sentiment",
|
|
1178
|
+
(cutoff,)
|
|
1179
|
+
).fetchall()
|
|
1180
|
+
|
|
1181
|
+
current = get_trust_score()
|
|
1182
|
+
start_score = events[0]["score"] - events[0]["delta"] if events else current
|
|
1183
|
+
|
|
1184
|
+
return {
|
|
1185
|
+
"current_score": round(current, 1),
|
|
1186
|
+
"period_start_score": round(start_score, 1),
|
|
1187
|
+
"net_change": round(current - start_score, 1),
|
|
1188
|
+
"events": [{"event": e["event"], "delta": e["delta"], "score": round(e["score"], 1), "context": e["context"][:100], "at": e["created_at"]} for e in events],
|
|
1189
|
+
"sentiment_distribution": {s["sentiment"]: {"count": s["cnt"], "avg_intensity": round(s["avg_int"], 2)} for s in sentiments},
|
|
1190
|
+
}
|
|
1191
|
+
|
|
1192
|
+
|
|
1193
|
+
def get_stats() -> dict:
|
|
1194
|
+
"""Return statistics about the cognitive memory system."""
|
|
1195
|
+
db = _get_db()
|
|
1196
|
+
|
|
1197
|
+
stm_active = db.execute("SELECT COUNT(*) FROM stm_memories WHERE promoted_to_ltm = 0").fetchone()[0]
|
|
1198
|
+
ltm_active = db.execute("SELECT COUNT(*) FROM ltm_memories WHERE is_dormant = 0").fetchone()[0]
|
|
1199
|
+
ltm_dormant = db.execute("SELECT COUNT(*) FROM ltm_memories WHERE is_dormant = 1").fetchone()[0]
|
|
1200
|
+
|
|
1201
|
+
avg_stm = db.execute("SELECT AVG(strength) FROM stm_memories WHERE promoted_to_ltm = 0").fetchone()[0] or 0.0
|
|
1202
|
+
avg_ltm = db.execute("SELECT AVG(strength) FROM ltm_memories WHERE is_dormant = 0").fetchone()[0] or 0.0
|
|
1203
|
+
|
|
1204
|
+
total_retrievals = db.execute("SELECT COUNT(*) FROM retrieval_log").fetchone()[0]
|
|
1205
|
+
avg_retrieval_score = db.execute("SELECT AVG(top_score) FROM retrieval_log").fetchone()[0] or 0.0
|
|
1206
|
+
|
|
1207
|
+
top_domains_stm = db.execute(
|
|
1208
|
+
"SELECT domain, COUNT(*) as cnt FROM stm_memories WHERE promoted_to_ltm = 0 AND domain != '' GROUP BY domain ORDER BY cnt DESC LIMIT 5"
|
|
1209
|
+
).fetchall()
|
|
1210
|
+
top_domains_ltm = db.execute(
|
|
1211
|
+
"SELECT domain, COUNT(*) as cnt FROM ltm_memories WHERE is_dormant = 0 AND domain != '' GROUP BY domain ORDER BY cnt DESC LIMIT 5"
|
|
1212
|
+
).fetchall()
|
|
1213
|
+
|
|
1214
|
+
return {
|
|
1215
|
+
"stm_active": stm_active,
|
|
1216
|
+
"ltm_active": ltm_active,
|
|
1217
|
+
"ltm_dormant": ltm_dormant,
|
|
1218
|
+
"avg_stm_strength": round(avg_stm, 3),
|
|
1219
|
+
"avg_ltm_strength": round(avg_ltm, 3),
|
|
1220
|
+
"total_retrievals": total_retrievals,
|
|
1221
|
+
"avg_retrieval_score": round(avg_retrieval_score, 3),
|
|
1222
|
+
"top_domains_stm": [(r["domain"], r["cnt"]) for r in top_domains_stm],
|
|
1223
|
+
"top_domains_ltm": [(r["domain"], r["cnt"]) for r in top_domains_ltm],
|
|
1224
|
+
}
|