ai-browser-profile 1.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +118 -0
- package/ai_browser_profile/__init__.py +6 -0
- package/ai_browser_profile/db.py +929 -0
- package/ai_browser_profile/embeddings.py +196 -0
- package/ai_browser_profile/extract.py +108 -0
- package/ai_browser_profile/ingestors/__init__.py +0 -0
- package/ai_browser_profile/ingestors/bookmarks.py +185 -0
- package/ai_browser_profile/ingestors/browser_detect.py +100 -0
- package/ai_browser_profile/ingestors/constants.py +208 -0
- package/ai_browser_profile/ingestors/history.py +123 -0
- package/ai_browser_profile/ingestors/indexeddb.py +203 -0
- package/ai_browser_profile/ingestors/localstorage.py +66 -0
- package/ai_browser_profile/ingestors/logins.py +46 -0
- package/ai_browser_profile/ingestors/messages.py +151 -0
- package/ai_browser_profile/ingestors/notion.py +313 -0
- package/ai_browser_profile/ingestors/webdata.py +134 -0
- package/autofill/SKILL.md +252 -0
- package/bin/cli.js +315 -0
- package/clean.py +295 -0
- package/extract.py +53 -0
- package/package.json +40 -0
- package/review/SKILL.md +171 -0
- package/review/run.sh +82 -0
- package/setup/SKILL.md +177 -0
- package/skill/SKILL.md +180 -0
- package/whatsapp/SKILL.md +321 -0
|
@@ -0,0 +1,929 @@
|
|
|
1
|
+
"""MemoryDB — schema, upsert, search, mark_accessed, stats, profile, text_search, semantic_search."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import sqlite3
|
|
5
|
+
from datetime import datetime, timezone
|
|
6
|
+
from typing import Optional
|
|
7
|
+
|
|
8
|
+
from ai_browser_profile.embeddings import (
|
|
9
|
+
embed_text, embed_batch, setup_embeddings_table, store_embedding, cosine_search,
|
|
10
|
+
is_available as embeddings_available,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
log = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
SCHEMA = """
|
|
16
|
+
CREATE TABLE IF NOT EXISTS memories (
|
|
17
|
+
id INTEGER PRIMARY KEY,
|
|
18
|
+
key TEXT NOT NULL,
|
|
19
|
+
value TEXT NOT NULL,
|
|
20
|
+
confidence REAL DEFAULT 1.0,
|
|
21
|
+
source TEXT,
|
|
22
|
+
appeared_count INTEGER DEFAULT 0,
|
|
23
|
+
accessed_count INTEGER DEFAULT 0,
|
|
24
|
+
created_at TEXT,
|
|
25
|
+
last_appeared_at TEXT,
|
|
26
|
+
last_accessed_at TEXT,
|
|
27
|
+
superseded_by INTEGER REFERENCES memories(id),
|
|
28
|
+
superseded_at TEXT,
|
|
29
|
+
search_text TEXT,
|
|
30
|
+
UNIQUE(key, value)
|
|
31
|
+
);
|
|
32
|
+
|
|
33
|
+
CREATE TABLE IF NOT EXISTS memory_tags (
|
|
34
|
+
memory_id INTEGER REFERENCES memories(id) ON DELETE CASCADE,
|
|
35
|
+
tag TEXT NOT NULL,
|
|
36
|
+
PRIMARY KEY (memory_id, tag)
|
|
37
|
+
);
|
|
38
|
+
CREATE INDEX IF NOT EXISTS idx_tags ON memory_tags(tag);
|
|
39
|
+
|
|
40
|
+
CREATE TABLE IF NOT EXISTS memory_links (
|
|
41
|
+
source_id INTEGER REFERENCES memories(id) ON DELETE CASCADE,
|
|
42
|
+
target_id INTEGER REFERENCES memories(id) ON DELETE CASCADE,
|
|
43
|
+
relation TEXT NOT NULL,
|
|
44
|
+
created_at TEXT,
|
|
45
|
+
PRIMARY KEY (source_id, target_id, relation)
|
|
46
|
+
);
|
|
47
|
+
CREATE INDEX IF NOT EXISTS idx_links_source ON memory_links(source_id);
|
|
48
|
+
CREATE INDEX IF NOT EXISTS idx_links_target ON memory_links(target_id);
|
|
49
|
+
|
|
50
|
+
CREATE INDEX IF NOT EXISTS idx_search_text ON memories(search_text);
|
|
51
|
+
|
|
52
|
+
CREATE TABLE IF NOT EXISTS metadata (
|
|
53
|
+
key TEXT PRIMARY KEY,
|
|
54
|
+
value TEXT
|
|
55
|
+
);
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
# ── Key Schema ────────────────────────────────────────────────────────
|
|
59
|
+
|
|
60
|
+
KEY_SCHEMA = {
|
|
61
|
+
# Identity (single-value: new value supersedes old)
|
|
62
|
+
"first_name": "single", "last_name": "single", "full_name": "single",
|
|
63
|
+
"date_of_birth": "single", "gender": "single", "job_title": "single",
|
|
64
|
+
"card_holder_name": "single",
|
|
65
|
+
# Multi-value (one per suffix, e.g., account:github.com)
|
|
66
|
+
"email": "multi", "phone": "multi", "username": "multi", "language": "multi",
|
|
67
|
+
"street_address": "multi", "address_line_2": "multi",
|
|
68
|
+
"city": "multi", "state": "multi",
|
|
69
|
+
"zip": "multi", "country": "multi", "company": "multi",
|
|
70
|
+
"account": "multi", "tool": "multi", "contact": "multi", "linkedin": "multi", "bookmark": "multi",
|
|
71
|
+
"product": "multi", "project": "multi", "interest": "multi",
|
|
72
|
+
"skill": "multi", "location": "multi", "relationship": "multi",
|
|
73
|
+
"work": "multi", "business": "multi", "activity": "multi",
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
CANONICAL_TAGS = {
|
|
77
|
+
"identity", "contact_info", "address", "payment",
|
|
78
|
+
"account", "tool", "contact", "work",
|
|
79
|
+
"knowledge", "communication", "social", "finance",
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
TAG_MIGRATION = {
|
|
83
|
+
"email": "contact_info", "phone": "contact_info",
|
|
84
|
+
"credential": "account", "dev": "tool", "ai": "tool",
|
|
85
|
+
"location": "address", "company": "work",
|
|
86
|
+
"business": "knowledge", "interest": "knowledge",
|
|
87
|
+
"lifestyle": "knowledge", "product": "knowledge",
|
|
88
|
+
"project": "knowledge", "skill": "knowledge",
|
|
89
|
+
"activity": "knowledge", "language": "identity",
|
|
90
|
+
"relationship": "contact", "real_estate": "knowledge",
|
|
91
|
+
"spiritual": "knowledge", "autofill": "identity",
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
# Profile section mapping based on KEY_SCHEMA
|
|
95
|
+
PROFILE_SECTIONS = {
|
|
96
|
+
"identity": ["first_name", "last_name", "full_name", "email", "phone",
|
|
97
|
+
"date_of_birth", "gender", "job_title", "language"],
|
|
98
|
+
"address": ["street_address", "address_line_2", "city", "state", "zip", "country"],
|
|
99
|
+
"payment": ["card_holder_name"],
|
|
100
|
+
"work": ["company"],
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
class MemoryDB:
|
|
105
|
+
def __init__(self, path: str = "memories.db", defer_embeddings: bool = False):
|
|
106
|
+
self.path = path
|
|
107
|
+
self.conn = sqlite3.connect(path)
|
|
108
|
+
self.conn.execute("PRAGMA journal_mode=WAL")
|
|
109
|
+
self.conn.execute("PRAGMA foreign_keys=ON")
|
|
110
|
+
self.conn.executescript(SCHEMA)
|
|
111
|
+
self._migrate()
|
|
112
|
+
self._defer_embeddings = defer_embeddings
|
|
113
|
+
self._vec_ready = setup_embeddings_table(self.conn) if not defer_embeddings else False
|
|
114
|
+
|
|
115
|
+
# ── Migration ──────────────────────────────────────────────────
|
|
116
|
+
|
|
117
|
+
def _migrate(self):
|
|
118
|
+
"""Add new columns/tables to existing DBs and migrate to v2."""
|
|
119
|
+
cols = {r[1] for r in self.conn.execute("PRAGMA table_info(memories)").fetchall()}
|
|
120
|
+
if "superseded_by" not in cols:
|
|
121
|
+
self.conn.execute("ALTER TABLE memories ADD COLUMN superseded_by INTEGER REFERENCES memories(id)")
|
|
122
|
+
if "superseded_at" not in cols:
|
|
123
|
+
self.conn.execute("ALTER TABLE memories ADD COLUMN superseded_at TEXT")
|
|
124
|
+
if "search_text" not in cols:
|
|
125
|
+
self.conn.execute("ALTER TABLE memories ADD COLUMN search_text TEXT")
|
|
126
|
+
self.conn.execute("UPDATE memories SET search_text = key || ': ' || value WHERE search_text IS NULL")
|
|
127
|
+
self.conn.commit()
|
|
128
|
+
if "reviewed_at" not in cols:
|
|
129
|
+
self.conn.execute("ALTER TABLE memories ADD COLUMN reviewed_at TEXT")
|
|
130
|
+
|
|
131
|
+
# v2 migration: normalize confidence, migrate tags
|
|
132
|
+
version = self.get_meta("schema_version") or "1"
|
|
133
|
+
if version == "1":
|
|
134
|
+
self._migrate_v2()
|
|
135
|
+
|
|
136
|
+
def _migrate_v2(self):
|
|
137
|
+
"""V2: set all confidence to 1.0, migrate tags to canonical set."""
|
|
138
|
+
log.info("Migrating to schema v2: normalizing confidence, migrating tags")
|
|
139
|
+
|
|
140
|
+
# Set all confidence to 1.0
|
|
141
|
+
self.conn.execute("UPDATE memories SET confidence = 1.0")
|
|
142
|
+
|
|
143
|
+
# Migrate tags
|
|
144
|
+
for old_tag, new_tag in TAG_MIGRATION.items():
|
|
145
|
+
# Update existing tags, ignore if the (memory_id, new_tag) combo already exists
|
|
146
|
+
self.conn.execute("""
|
|
147
|
+
UPDATE OR IGNORE memory_tags SET tag = ? WHERE tag = ?
|
|
148
|
+
""", (new_tag, old_tag))
|
|
149
|
+
# Delete any remaining old tags (dupes that couldn't be updated)
|
|
150
|
+
self.conn.execute("DELETE FROM memory_tags WHERE tag = ?", (old_tag,))
|
|
151
|
+
|
|
152
|
+
self.conn.execute("INSERT OR REPLACE INTO metadata (key, value) VALUES ('schema_version', '2')")
|
|
153
|
+
self.conn.commit()
|
|
154
|
+
log.info("Schema v2 migration complete")
|
|
155
|
+
|
|
156
|
+
# ── Tag Normalization ─────────────────────────────────────────
|
|
157
|
+
|
|
158
|
+
def _normalize_tags(self, tags: list[str]) -> list[str]:
|
|
159
|
+
"""Normalize tags via TAG_MIGRATION, deduplicate."""
|
|
160
|
+
normalized = set()
|
|
161
|
+
for tag in tags:
|
|
162
|
+
normalized.add(TAG_MIGRATION.get(tag, tag))
|
|
163
|
+
return list(normalized)
|
|
164
|
+
|
|
165
|
+
def _key_prefix(self, key: str) -> str:
|
|
166
|
+
"""Extract key prefix before ':' delimiter."""
|
|
167
|
+
return key.split(":")[0] if ":" in key else key
|
|
168
|
+
|
|
169
|
+
# ── Upsert ─────────────────────────────────────────────────────
|
|
170
|
+
|
|
171
|
+
def upsert(self, key: str, value: str, tags: list[str],
|
|
172
|
+
confidence: float = 1.0, source: str = ""):
|
|
173
|
+
"""Insert or update a memory with semantic dedup.
|
|
174
|
+
|
|
175
|
+
Decision framework:
|
|
176
|
+
1. Exact (key, value) match → bump appeared_count, merge source
|
|
177
|
+
2. Semantic match (cosine >= 0.92, same key prefix) → supersede old
|
|
178
|
+
3. Same exact key, different value, single-cardinality → supersede old
|
|
179
|
+
4. Brand new → INSERT
|
|
180
|
+
"""
|
|
181
|
+
if not value or not value.strip():
|
|
182
|
+
return
|
|
183
|
+
value = value.strip()
|
|
184
|
+
now = datetime.now(timezone.utc).isoformat()
|
|
185
|
+
search_text = f"{key}: {value}"
|
|
186
|
+
tags = self._normalize_tags(tags)
|
|
187
|
+
|
|
188
|
+
# Warn on unknown key prefix (soft — doesn't block)
|
|
189
|
+
prefix = self._key_prefix(key)
|
|
190
|
+
if prefix not in KEY_SCHEMA and not key.startswith("autofill:") and not key.startswith("address_type_"):
|
|
191
|
+
log.debug(f"Unknown key prefix: {prefix} (key={key})")
|
|
192
|
+
|
|
193
|
+
# 1. Exact (key, value) match
|
|
194
|
+
existing = self.conn.execute(
|
|
195
|
+
"SELECT id, source, appeared_count FROM memories WHERE key=? AND value=?",
|
|
196
|
+
(key, value),
|
|
197
|
+
).fetchone()
|
|
198
|
+
|
|
199
|
+
if existing:
|
|
200
|
+
mem_id, old_source, appeared = existing
|
|
201
|
+
new_source = old_source or ""
|
|
202
|
+
if source and source not in (new_source or ""):
|
|
203
|
+
new_source = f"{new_source}, {source}" if new_source else source
|
|
204
|
+
self.conn.execute(
|
|
205
|
+
"UPDATE memories SET source=?, appeared_count=?, last_appeared_at=?, search_text=?, confidence=1.0 WHERE id=?",
|
|
206
|
+
(new_source, (appeared or 0) + 1, now, search_text, mem_id),
|
|
207
|
+
)
|
|
208
|
+
self._ensure_tags(mem_id, tags)
|
|
209
|
+
self.conn.commit()
|
|
210
|
+
return mem_id
|
|
211
|
+
|
|
212
|
+
# 2. Semantic dedup — check for near-duplicate with same key prefix
|
|
213
|
+
mem_id = self._try_semantic_supersede(key, value, search_text, tags, source, now)
|
|
214
|
+
if mem_id:
|
|
215
|
+
return mem_id
|
|
216
|
+
|
|
217
|
+
# 3. Single-cardinality key supersession
|
|
218
|
+
cardinality = KEY_SCHEMA.get(prefix, "multi")
|
|
219
|
+
if cardinality == "single":
|
|
220
|
+
old_row = self.conn.execute(
|
|
221
|
+
"SELECT id FROM memories WHERE key=? AND superseded_by IS NULL",
|
|
222
|
+
(key,),
|
|
223
|
+
).fetchone()
|
|
224
|
+
if old_row:
|
|
225
|
+
mem_id = self._insert_and_supersede(key, value, search_text, tags, source, now, old_row[0])
|
|
226
|
+
return mem_id
|
|
227
|
+
|
|
228
|
+
# 4. Brand new
|
|
229
|
+
mem_id = self._insert_new(key, value, search_text, tags, source, now)
|
|
230
|
+
return mem_id
|
|
231
|
+
|
|
232
|
+
def _try_semantic_supersede(self, key: str, value: str, search_text: str,
|
|
233
|
+
tags: list[str], source: str, now: str) -> Optional[int]:
|
|
234
|
+
"""Check for semantic near-duplicate. Returns new mem_id if superseded, else None."""
|
|
235
|
+
if not self._vec_ready:
|
|
236
|
+
return None
|
|
237
|
+
|
|
238
|
+
vec = embed_text(search_text)
|
|
239
|
+
if vec is None:
|
|
240
|
+
return None
|
|
241
|
+
|
|
242
|
+
prefix = self._key_prefix(key)
|
|
243
|
+
matches = cosine_search(self.conn, vec, limit=5, threshold=0.92)
|
|
244
|
+
|
|
245
|
+
for old_id, similarity in matches:
|
|
246
|
+
# Check same key prefix and not already superseded
|
|
247
|
+
old_row = self.conn.execute(
|
|
248
|
+
"SELECT key, value, superseded_by FROM memories WHERE id=?", (old_id,)
|
|
249
|
+
).fetchone()
|
|
250
|
+
if not old_row or old_row[2] is not None:
|
|
251
|
+
continue
|
|
252
|
+
old_prefix = self._key_prefix(old_row[0])
|
|
253
|
+
if old_prefix != prefix:
|
|
254
|
+
continue
|
|
255
|
+
# Same key prefix, high similarity — supersede
|
|
256
|
+
log.debug(f"Semantic dedup: '{old_row[0]}: {old_row[1][:50]}' → '{key}: {value[:50]}' (sim={similarity:.3f})")
|
|
257
|
+
return self._insert_and_supersede(key, value, search_text, tags, source, now, old_id)
|
|
258
|
+
|
|
259
|
+
return None
|
|
260
|
+
|
|
261
|
+
def _insert_new(self, key: str, value: str, search_text: str,
|
|
262
|
+
tags: list[str], source: str, now: str) -> int:
|
|
263
|
+
"""Insert a brand new memory."""
|
|
264
|
+
cursor = self.conn.execute(
|
|
265
|
+
"INSERT INTO memories (key, value, confidence, source, created_at, search_text, appeared_count, last_appeared_at) "
|
|
266
|
+
"VALUES (?, ?, 1.0, ?, ?, ?, 1, ?)",
|
|
267
|
+
(key, value, source, now, search_text, now),
|
|
268
|
+
)
|
|
269
|
+
mem_id = cursor.lastrowid
|
|
270
|
+
self._ensure_tags(mem_id, tags)
|
|
271
|
+
self._auto_link(mem_id, key, value)
|
|
272
|
+
self._store_embedding(mem_id, search_text)
|
|
273
|
+
self.conn.commit()
|
|
274
|
+
return mem_id
|
|
275
|
+
|
|
276
|
+
def _insert_and_supersede(self, key: str, value: str, search_text: str,
|
|
277
|
+
tags: list[str], source: str, now: str,
|
|
278
|
+
old_id: int) -> int:
|
|
279
|
+
"""Insert new memory and supersede old one."""
|
|
280
|
+
cursor = self.conn.execute(
|
|
281
|
+
"INSERT INTO memories (key, value, confidence, source, created_at, search_text, appeared_count, last_appeared_at) "
|
|
282
|
+
"VALUES (?, ?, 1.0, ?, ?, ?, 1, ?)",
|
|
283
|
+
(key, value, source, now, search_text, now),
|
|
284
|
+
)
|
|
285
|
+
mem_id = cursor.lastrowid
|
|
286
|
+
self.conn.execute(
|
|
287
|
+
"UPDATE memories SET superseded_by=?, superseded_at=? WHERE id=?",
|
|
288
|
+
(mem_id, now, old_id),
|
|
289
|
+
)
|
|
290
|
+
self._ensure_tags(mem_id, tags)
|
|
291
|
+
self._auto_link(mem_id, key, value)
|
|
292
|
+
self._store_embedding(mem_id, search_text)
|
|
293
|
+
self.conn.commit()
|
|
294
|
+
return mem_id
|
|
295
|
+
|
|
296
|
+
def _ensure_tags(self, mem_id: int, tags: list[str]):
|
|
297
|
+
"""Ensure all tags exist for a memory."""
|
|
298
|
+
for tag in tags:
|
|
299
|
+
self.conn.execute(
|
|
300
|
+
"INSERT OR IGNORE INTO memory_tags (memory_id, tag) VALUES (?, ?)",
|
|
301
|
+
(mem_id, tag),
|
|
302
|
+
)
|
|
303
|
+
|
|
304
|
+
def _store_embedding(self, mem_id: int, search_text: str):
|
|
305
|
+
"""Compute and store embedding for a memory."""
|
|
306
|
+
if not self._vec_ready:
|
|
307
|
+
return
|
|
308
|
+
vec = embed_text(search_text)
|
|
309
|
+
if vec:
|
|
310
|
+
store_embedding(self.conn, mem_id, vec)
|
|
311
|
+
|
|
312
|
+
# ── Search ─────────────────────────────────────────────────────
|
|
313
|
+
|
|
314
|
+
def search(self, tags: list[str], limit: int = 20,
|
|
315
|
+
include_superseded: bool = False) -> list[dict]:
|
|
316
|
+
"""Search memories by tags, ranked by hit_rate then appeared/accessed counts."""
|
|
317
|
+
placeholders = ",".join("?" for _ in tags)
|
|
318
|
+
superseded_filter = "" if include_superseded else "AND m.superseded_by IS NULL"
|
|
319
|
+
rows = self.conn.execute(f"""
|
|
320
|
+
SELECT DISTINCT m.id, m.key, m.value, m.source,
|
|
321
|
+
m.appeared_count, m.accessed_count,
|
|
322
|
+
m.last_appeared_at, m.last_accessed_at, m.created_at,
|
|
323
|
+
CASE WHEN m.appeared_count = 0 THEN 0.0
|
|
324
|
+
ELSE CAST(m.accessed_count AS REAL) / m.appeared_count
|
|
325
|
+
END AS hit_rate
|
|
326
|
+
FROM memories m
|
|
327
|
+
JOIN memory_tags t ON m.id = t.memory_id
|
|
328
|
+
WHERE t.tag IN ({placeholders}) {superseded_filter}
|
|
329
|
+
ORDER BY hit_rate DESC, m.accessed_count DESC, m.appeared_count DESC
|
|
330
|
+
LIMIT ?
|
|
331
|
+
""", (*tags, limit)).fetchall()
|
|
332
|
+
|
|
333
|
+
now = datetime.now(timezone.utc).isoformat()
|
|
334
|
+
|
|
335
|
+
results = []
|
|
336
|
+
for r in rows:
|
|
337
|
+
results.append({
|
|
338
|
+
"id": r[0], "key": r[1], "value": r[2],
|
|
339
|
+
"source": r[3], "appeared_count": r[4] + 1,
|
|
340
|
+
"accessed_count": r[5], "hit_rate": r[9],
|
|
341
|
+
})
|
|
342
|
+
|
|
343
|
+
ids = [r["id"] for r in results]
|
|
344
|
+
if ids:
|
|
345
|
+
id_placeholders = ",".join("?" for _ in ids)
|
|
346
|
+
self.conn.execute(
|
|
347
|
+
f"UPDATE memories SET appeared_count = appeared_count + 1, "
|
|
348
|
+
f"accessed_count = accessed_count + 1, "
|
|
349
|
+
f"last_appeared_at = ?, last_accessed_at = ? "
|
|
350
|
+
f"WHERE id IN ({id_placeholders})",
|
|
351
|
+
(now, now, *ids),
|
|
352
|
+
)
|
|
353
|
+
self.conn.commit()
|
|
354
|
+
|
|
355
|
+
return results
|
|
356
|
+
|
|
357
|
+
# ── Semantic Search ────────────────────────────────────────────
|
|
358
|
+
|
|
359
|
+
def semantic_search(self, query: str, limit: int = 20,
|
|
360
|
+
threshold: float = 0.3) -> list[dict]:
|
|
361
|
+
"""Search memories by semantic similarity. Falls back to text_search if unavailable."""
|
|
362
|
+
if not self._vec_ready:
|
|
363
|
+
return self.text_search(query, limit)
|
|
364
|
+
|
|
365
|
+
vec = embed_text(query, prefix="search_query")
|
|
366
|
+
if vec is None:
|
|
367
|
+
return self.text_search(query, limit)
|
|
368
|
+
|
|
369
|
+
matches = cosine_search(self.conn, vec, limit=limit, threshold=threshold)
|
|
370
|
+
if not matches:
|
|
371
|
+
return self.text_search(query, limit)
|
|
372
|
+
|
|
373
|
+
now = datetime.now(timezone.utc).isoformat()
|
|
374
|
+
results = []
|
|
375
|
+
for mem_id, similarity in matches:
|
|
376
|
+
row = self.conn.execute(
|
|
377
|
+
"SELECT id, key, value, source, appeared_count, accessed_count, superseded_by "
|
|
378
|
+
"FROM memories WHERE id=?",
|
|
379
|
+
(mem_id,),
|
|
380
|
+
).fetchone()
|
|
381
|
+
if not row or row[6] is not None: # skip superseded
|
|
382
|
+
continue
|
|
383
|
+
results.append({
|
|
384
|
+
"id": row[0], "key": row[1], "value": row[2],
|
|
385
|
+
"source": row[3], "appeared_count": row[4] + 1,
|
|
386
|
+
"accessed_count": row[5] + 1, "similarity": similarity,
|
|
387
|
+
})
|
|
388
|
+
|
|
389
|
+
# Auto-bump appeared + accessed for all returned results
|
|
390
|
+
ids = [r["id"] for r in results]
|
|
391
|
+
if ids:
|
|
392
|
+
id_placeholders = ",".join("?" for _ in ids)
|
|
393
|
+
self.conn.execute(
|
|
394
|
+
f"UPDATE memories SET appeared_count = appeared_count + 1, "
|
|
395
|
+
f"accessed_count = accessed_count + 1, "
|
|
396
|
+
f"last_appeared_at = ?, last_accessed_at = ? "
|
|
397
|
+
f"WHERE id IN ({id_placeholders})",
|
|
398
|
+
(now, now, *ids),
|
|
399
|
+
)
|
|
400
|
+
self.conn.commit()
|
|
401
|
+
|
|
402
|
+
return results
|
|
403
|
+
|
|
404
|
+
# ── Text Search ────────────────────────────────────────────────
|
|
405
|
+
|
|
406
|
+
def text_search(self, query: str, limit: int = 20) -> list[dict]:
|
|
407
|
+
"""Full-text-ish search across memories using LIKE matching."""
|
|
408
|
+
words = query.lower().split()
|
|
409
|
+
if not words:
|
|
410
|
+
return []
|
|
411
|
+
conditions = " AND ".join("LOWER(m.search_text) LIKE ?" for _ in words)
|
|
412
|
+
params = [f"%{w}%" for w in words]
|
|
413
|
+
rows = self.conn.execute(f"""
|
|
414
|
+
SELECT m.id, m.key, m.value, m.source,
|
|
415
|
+
m.appeared_count, m.accessed_count,
|
|
416
|
+
CASE WHEN m.appeared_count = 0 THEN 0.0
|
|
417
|
+
ELSE CAST(m.accessed_count AS REAL) / m.appeared_count
|
|
418
|
+
END AS hit_rate
|
|
419
|
+
FROM memories m
|
|
420
|
+
WHERE {conditions} AND m.superseded_by IS NULL
|
|
421
|
+
ORDER BY hit_rate DESC, m.accessed_count DESC
|
|
422
|
+
LIMIT ?
|
|
423
|
+
""", (*params, limit)).fetchall()
|
|
424
|
+
|
|
425
|
+
now = datetime.now(timezone.utc).isoformat()
|
|
426
|
+
results = []
|
|
427
|
+
for r in rows:
|
|
428
|
+
st = f"{r[1]}: {r[2]}".lower()
|
|
429
|
+
matched = sum(1 for w in words if w in st)
|
|
430
|
+
results.append({
|
|
431
|
+
"id": r[0], "key": r[1], "value": r[2],
|
|
432
|
+
"source": r[3], "appeared_count": r[4] + 1, "accessed_count": r[5] + 1,
|
|
433
|
+
"hit_rate": r[6], "score": matched,
|
|
434
|
+
})
|
|
435
|
+
results.sort(key=lambda x: (x["score"], x["hit_rate"]), reverse=True)
|
|
436
|
+
|
|
437
|
+
# Auto-bump appeared + accessed for all returned results
|
|
438
|
+
ids = [r["id"] for r in results]
|
|
439
|
+
if ids:
|
|
440
|
+
id_placeholders = ",".join("?" for _ in ids)
|
|
441
|
+
self.conn.execute(
|
|
442
|
+
f"UPDATE memories SET appeared_count = appeared_count + 1, "
|
|
443
|
+
f"accessed_count = accessed_count + 1, "
|
|
444
|
+
f"last_appeared_at = ?, last_accessed_at = ? "
|
|
445
|
+
f"WHERE id IN ({id_placeholders})",
|
|
446
|
+
(now, now, *ids),
|
|
447
|
+
)
|
|
448
|
+
self.conn.commit()
|
|
449
|
+
|
|
450
|
+
return results
|
|
451
|
+
|
|
452
|
+
# ── Backfill Embeddings ────────────────────────────────────────
|
|
453
|
+
|
|
454
|
+
def backfill_embeddings(self) -> int:
|
|
455
|
+
"""Compute embeddings for all existing memories. Returns count embedded."""
|
|
456
|
+
if not self._vec_ready:
|
|
457
|
+
log.warning("sqlite-vec not available, cannot backfill embeddings")
|
|
458
|
+
return 0
|
|
459
|
+
|
|
460
|
+
rows = self.conn.execute(
|
|
461
|
+
"SELECT id, key, value FROM memories WHERE superseded_by IS NULL"
|
|
462
|
+
).fetchall()
|
|
463
|
+
|
|
464
|
+
if not rows:
|
|
465
|
+
return 0
|
|
466
|
+
|
|
467
|
+
# Check which already have embeddings
|
|
468
|
+
existing_ids = set()
|
|
469
|
+
try:
|
|
470
|
+
for (mid,) in self.conn.execute("SELECT memory_id FROM memory_embeddings"):
|
|
471
|
+
existing_ids.add(mid)
|
|
472
|
+
except Exception:
|
|
473
|
+
pass
|
|
474
|
+
|
|
475
|
+
to_embed = [(r[0], f"{r[1]}: {r[2]}") for r in rows if r[0] not in existing_ids]
|
|
476
|
+
if not to_embed:
|
|
477
|
+
log.info("All memories already have embeddings")
|
|
478
|
+
return 0
|
|
479
|
+
|
|
480
|
+
log.info(f"Backfilling embeddings for {len(to_embed)} memories...")
|
|
481
|
+
texts = [t[1] for t in to_embed]
|
|
482
|
+
vectors = embed_batch(texts)
|
|
483
|
+
|
|
484
|
+
count = 0
|
|
485
|
+
for (mem_id, _), vec in zip(to_embed, vectors):
|
|
486
|
+
if vec is not None:
|
|
487
|
+
store_embedding(self.conn, mem_id, vec)
|
|
488
|
+
count += 1
|
|
489
|
+
|
|
490
|
+
self.conn.commit()
|
|
491
|
+
log.info(f"Embedded {count} memories")
|
|
492
|
+
return count
|
|
493
|
+
|
|
494
|
+
def regenerate_embeddings(self) -> int:
|
|
495
|
+
"""Wipe all embeddings and recompute with current model. Use after model change."""
|
|
496
|
+
if not self._vec_ready:
|
|
497
|
+
log.warning("Embeddings table not available, cannot regenerate")
|
|
498
|
+
return 0
|
|
499
|
+
|
|
500
|
+
self.conn.execute("DELETE FROM memory_embeddings")
|
|
501
|
+
self.conn.commit()
|
|
502
|
+
log.info("Cleared all existing embeddings")
|
|
503
|
+
return self.backfill_embeddings()
|
|
504
|
+
|
|
505
|
+
# ── Contradiction / History ────────────────────────────────────
|
|
506
|
+
|
|
507
|
+
def history(self, key: str) -> list[dict]:
|
|
508
|
+
"""Return all values for a key ordered by created_at, showing supersession chain."""
|
|
509
|
+
rows = self.conn.execute("""
|
|
510
|
+
SELECT id, key, value, confidence, source, created_at,
|
|
511
|
+
superseded_by, superseded_at
|
|
512
|
+
FROM memories WHERE key=? ORDER BY created_at
|
|
513
|
+
""", (key,)).fetchall()
|
|
514
|
+
return [
|
|
515
|
+
{
|
|
516
|
+
"id": r[0], "key": r[1], "value": r[2], "confidence": r[3],
|
|
517
|
+
"source": r[4], "created_at": r[5],
|
|
518
|
+
"superseded_by": r[6], "superseded_at": r[7],
|
|
519
|
+
}
|
|
520
|
+
for r in rows
|
|
521
|
+
]
|
|
522
|
+
|
|
523
|
+
# ── Entity Linking ─────────────────────────────────────────────
|
|
524
|
+
|
|
525
|
+
def link(self, source_id: int, target_id: int, relation: str):
|
|
526
|
+
"""Create a link between two memories."""
|
|
527
|
+
now = datetime.now(timezone.utc).isoformat()
|
|
528
|
+
self.conn.execute(
|
|
529
|
+
"INSERT OR IGNORE INTO memory_links (source_id, target_id, relation, created_at) VALUES (?, ?, ?, ?)",
|
|
530
|
+
(source_id, target_id, relation, now),
|
|
531
|
+
)
|
|
532
|
+
|
|
533
|
+
def related(self, memory_id: int, relation: Optional[str] = None) -> list[dict]:
|
|
534
|
+
"""Return memories linked to this one."""
|
|
535
|
+
if relation:
|
|
536
|
+
rows = self.conn.execute("""
|
|
537
|
+
SELECT m.id, m.key, m.value, m.confidence, ml.relation
|
|
538
|
+
FROM memory_links ml
|
|
539
|
+
JOIN memories m ON m.id = CASE WHEN ml.source_id = ? THEN ml.target_id ELSE ml.source_id END
|
|
540
|
+
WHERE (ml.source_id = ? OR ml.target_id = ?) AND ml.relation = ?
|
|
541
|
+
""", (memory_id, memory_id, memory_id, relation)).fetchall()
|
|
542
|
+
else:
|
|
543
|
+
rows = self.conn.execute("""
|
|
544
|
+
SELECT m.id, m.key, m.value, m.confidence, ml.relation
|
|
545
|
+
FROM memory_links ml
|
|
546
|
+
JOIN memories m ON m.id = CASE WHEN ml.source_id = ? THEN ml.target_id ELSE ml.source_id END
|
|
547
|
+
WHERE ml.source_id = ? OR ml.target_id = ?
|
|
548
|
+
""", (memory_id, memory_id, memory_id)).fetchall()
|
|
549
|
+
return [
|
|
550
|
+
{"id": r[0], "key": r[1], "value": r[2], "confidence": r[3], "relation": r[4]}
|
|
551
|
+
for r in rows
|
|
552
|
+
]
|
|
553
|
+
|
|
554
|
+
def _auto_link(self, mem_id: int, key: str, value: str):
|
|
555
|
+
"""Deterministic auto-linking on upsert."""
|
|
556
|
+
if key == "email":
|
|
557
|
+
accounts = self.conn.execute(
|
|
558
|
+
"SELECT id FROM memories WHERE key LIKE 'account:%' AND value=? AND id!=?",
|
|
559
|
+
(value, mem_id),
|
|
560
|
+
).fetchall()
|
|
561
|
+
for (aid,) in accounts:
|
|
562
|
+
self.link(mem_id, aid, "belongs_to")
|
|
563
|
+
|
|
564
|
+
if key.startswith("account:"):
|
|
565
|
+
same_user = self.conn.execute(
|
|
566
|
+
"SELECT id FROM memories WHERE key LIKE 'account:%' AND value=? AND id!=?",
|
|
567
|
+
(value, mem_id),
|
|
568
|
+
).fetchall()
|
|
569
|
+
for (sid,) in same_user:
|
|
570
|
+
self.link(mem_id, sid, "same_identity")
|
|
571
|
+
|
|
572
|
+
# ── Mark Accessed ──────────────────────────────────────────────
|
|
573
|
+
|
|
574
|
+
def mark_accessed(self, memory_id: int):
|
|
575
|
+
"""Manually bump accessed_count. Kept for backward compat — search methods now auto-increment."""
|
|
576
|
+
now = datetime.now(timezone.utc).isoformat()
|
|
577
|
+
self.conn.execute(
|
|
578
|
+
"UPDATE memories SET accessed_count = accessed_count + 1, last_accessed_at = ? WHERE id = ?",
|
|
579
|
+
(now, memory_id),
|
|
580
|
+
)
|
|
581
|
+
self.conn.commit()
|
|
582
|
+
|
|
583
|
+
# ── Stats ──────────────────────────────────────────────────────
|
|
584
|
+
|
|
585
|
+
def stats(self) -> dict:
|
|
586
|
+
"""Return summary stats about the memory database."""
|
|
587
|
+
total = self.conn.execute("SELECT COUNT(*) FROM memories WHERE superseded_by IS NULL").fetchone()[0]
|
|
588
|
+
superseded = self.conn.execute("SELECT COUNT(*) FROM memories WHERE superseded_by IS NOT NULL").fetchone()[0]
|
|
589
|
+
by_tag = self.conn.execute(
|
|
590
|
+
"SELECT tag, COUNT(*) FROM memory_tags GROUP BY tag ORDER BY COUNT(*) DESC"
|
|
591
|
+
).fetchall()
|
|
592
|
+
top_accessed = self.conn.execute(
|
|
593
|
+
"SELECT key, value, accessed_count FROM memories WHERE accessed_count > 0 ORDER BY accessed_count DESC LIMIT 10"
|
|
594
|
+
).fetchall()
|
|
595
|
+
links = self.conn.execute("SELECT COUNT(*) FROM memory_links").fetchone()[0]
|
|
596
|
+
|
|
597
|
+
# Count embeddings
|
|
598
|
+
embedded = 0
|
|
599
|
+
try:
|
|
600
|
+
embedded = self.conn.execute("SELECT COUNT(*) FROM memory_embeddings").fetchone()[0]
|
|
601
|
+
except Exception:
|
|
602
|
+
pass
|
|
603
|
+
|
|
604
|
+
return {
|
|
605
|
+
"total_memories": total,
|
|
606
|
+
"superseded": superseded,
|
|
607
|
+
"links": links,
|
|
608
|
+
"embedded": embedded,
|
|
609
|
+
"by_tag": {r[0]: r[1] for r in by_tag},
|
|
610
|
+
"top_accessed": [{"key": r[0], "value": r[1], "accessed": r[2]} for r in top_accessed],
|
|
611
|
+
}
|
|
612
|
+
|
|
613
|
+
# ── Profile ────────────────────────────────────────────────────
|
|
614
|
+
|
|
615
|
+
def profile(self) -> dict:
|
|
616
|
+
"""Generate structured user profile from non-superseded memories."""
|
|
617
|
+
rows = self.conn.execute("""
|
|
618
|
+
SELECT m.key, m.value, m.appeared_count
|
|
619
|
+
FROM memories m
|
|
620
|
+
WHERE m.superseded_by IS NULL
|
|
621
|
+
ORDER BY m.appeared_count DESC, m.accessed_count DESC
|
|
622
|
+
""").fetchall()
|
|
623
|
+
|
|
624
|
+
by_key: dict[str, list[tuple]] = {}
|
|
625
|
+
for key, value, appeared in rows:
|
|
626
|
+
by_key.setdefault(key, []).append((value, appeared))
|
|
627
|
+
|
|
628
|
+
def pick(k, n=1, min_appeared=1):
|
|
629
|
+
"""Pick top n values for a key, filtered by min appeared_count."""
|
|
630
|
+
vals = [(v, a) for v, a in by_key.get(k, []) if a >= min_appeared]
|
|
631
|
+
if n == 1:
|
|
632
|
+
return vals[0][0] if vals else None
|
|
633
|
+
return [v for v, _ in vals[:n]]
|
|
634
|
+
|
|
635
|
+
def pick_prefixed(prefix, n=20, min_appeared=1):
|
|
636
|
+
"""Pick top n entries matching a key prefix."""
|
|
637
|
+
items = []
|
|
638
|
+
for k, vals in by_key.items():
|
|
639
|
+
if k.startswith(prefix):
|
|
640
|
+
suffix = k[len(prefix):]
|
|
641
|
+
top_val, top_appeared = vals[0]
|
|
642
|
+
if top_appeared >= min_appeared:
|
|
643
|
+
items.append((suffix, top_val, top_appeared))
|
|
644
|
+
items.sort(key=lambda x: x[2], reverse=True)
|
|
645
|
+
return [(name, val) for name, val, _ in items[:n]]
|
|
646
|
+
|
|
647
|
+
# Identity
|
|
648
|
+
name_parts = [pick("first_name"), pick("last_name")]
|
|
649
|
+
full_name = pick("full_name") or " ".join(n for n in name_parts if n)
|
|
650
|
+
emails = pick("email", n=10, min_appeared=5)
|
|
651
|
+
phones = pick("phone", n=5, min_appeared=2)
|
|
652
|
+
# Usernames: exclude values that look like emails
|
|
653
|
+
raw_usernames = pick("username", n=20, min_appeared=2) or []
|
|
654
|
+
usernames = [u for u in raw_usernames if "@" not in u and "." not in u]
|
|
655
|
+
|
|
656
|
+
# Addresses — deduplicate similar streets, group into full addresses
|
|
657
|
+
addresses = []
|
|
658
|
+
raw_streets = pick("street_address", n=10, min_appeared=2) or []
|
|
659
|
+
# Deduplicate: normalize by lowercasing and stripping punctuation
|
|
660
|
+
seen_streets = set()
|
|
661
|
+
unique_streets = []
|
|
662
|
+
for s in raw_streets:
|
|
663
|
+
# Clean trailing commas/spaces
|
|
664
|
+
s = s.rstrip(", ")
|
|
665
|
+
normalized = s.lower().replace(" blvd", " boulevard").replace(" st.", " street").replace(" st", " street")
|
|
666
|
+
if normalized not in seen_streets:
|
|
667
|
+
seen_streets.add(normalized)
|
|
668
|
+
unique_streets.append(s)
|
|
669
|
+
if unique_streets:
|
|
670
|
+
primary = {
|
|
671
|
+
"street": unique_streets[0],
|
|
672
|
+
"city": pick("city"), "state": pick("state"),
|
|
673
|
+
"zip": pick("zip"), "country": pick("country"),
|
|
674
|
+
}
|
|
675
|
+
addresses.append(primary)
|
|
676
|
+
for s in unique_streets[1:]:
|
|
677
|
+
addresses.append({"street": s})
|
|
678
|
+
|
|
679
|
+
# Payment — count active (non-expired) cards
|
|
680
|
+
card_holder = pick("card_holder_name")
|
|
681
|
+
expiries = pick("card_expiry", n=20)
|
|
682
|
+
from datetime import datetime
|
|
683
|
+
now_ym = datetime.now().strftime("%Y%m")
|
|
684
|
+
active_cards = []
|
|
685
|
+
for e in (expiries or []):
|
|
686
|
+
if isinstance(e, str) and "/" in e:
|
|
687
|
+
try:
|
|
688
|
+
mm, yyyy = e.split("/")
|
|
689
|
+
if f"{yyyy}{mm.zfill(2)}" >= now_ym:
|
|
690
|
+
active_cards.append(e)
|
|
691
|
+
except ValueError:
|
|
692
|
+
pass
|
|
693
|
+
|
|
694
|
+
# Work — deduplicate case-insensitive, strip suffixes
|
|
695
|
+
raw_companies = pick("company", n=10, min_appeared=2) or []
|
|
696
|
+
seen_companies = set()
|
|
697
|
+
companies = []
|
|
698
|
+
for c in (raw_companies if isinstance(raw_companies, list) else [raw_companies]):
|
|
699
|
+
# Normalize: lowercase, strip Inc/LLC/etc
|
|
700
|
+
norm = c.lower().rstrip(".,").replace(", inc", "").replace(" inc", "")
|
|
701
|
+
if norm not in seen_companies:
|
|
702
|
+
seen_companies.add(norm)
|
|
703
|
+
companies.append(c)
|
|
704
|
+
|
|
705
|
+
# Tools — sorted by appeared_count
|
|
706
|
+
tool_items = pick_prefixed("tool:", n=20)
|
|
707
|
+
|
|
708
|
+
# Accounts — group by username/email, deduplicate domains
|
|
709
|
+
acct_items = pick_prefixed("account:", n=100)
|
|
710
|
+
accounts_by_user: dict[str, list[str]] = {}
|
|
711
|
+
for domain, user in acct_items:
|
|
712
|
+
accounts_by_user.setdefault(user, []).append(domain)
|
|
713
|
+
|
|
714
|
+
# Contacts
|
|
715
|
+
contact_items = pick_prefixed("contact:", n=10000)
|
|
716
|
+
total_contacts = len(contact_items)
|
|
717
|
+
|
|
718
|
+
# Projects (Notion)
|
|
719
|
+
project_items = pick_prefixed("project:", n=20)
|
|
720
|
+
|
|
721
|
+
return {
|
|
722
|
+
"name": full_name or None,
|
|
723
|
+
"emails": emails or [],
|
|
724
|
+
"phones": phones or [],
|
|
725
|
+
"usernames": usernames or [],
|
|
726
|
+
"gender": pick("gender"),
|
|
727
|
+
"date_of_birth": pick("date_of_birth"),
|
|
728
|
+
"addresses": addresses,
|
|
729
|
+
"card_holder": card_holder,
|
|
730
|
+
"active_cards": len(active_cards),
|
|
731
|
+
"companies": companies or [],
|
|
732
|
+
"tools": [name for name, _ in tool_items],
|
|
733
|
+
"accounts": accounts_by_user,
|
|
734
|
+
"total_contacts": total_contacts,
|
|
735
|
+
"projects": [name for name, _ in project_items],
|
|
736
|
+
}
|
|
737
|
+
|
|
738
|
+
def profile_text(self) -> str:
|
|
739
|
+
"""Format profile as markdown text for LLM context injection."""
|
|
740
|
+
p = self.profile()
|
|
741
|
+
lines = ["## User Profile"]
|
|
742
|
+
|
|
743
|
+
if p["name"]:
|
|
744
|
+
lines.append(f"**Name:** {p['name']}")
|
|
745
|
+
if p.get("gender"):
|
|
746
|
+
lines[-1] += f" ({p['gender']})"
|
|
747
|
+
|
|
748
|
+
if p["emails"]:
|
|
749
|
+
lines.append(f"**Emails:** {', '.join(p['emails'])}")
|
|
750
|
+
|
|
751
|
+
if p["phones"]:
|
|
752
|
+
lines.append(f"**Phones:** {', '.join(p['phones'])}")
|
|
753
|
+
|
|
754
|
+
if p["usernames"]:
|
|
755
|
+
lines.append(f"**Handles:** {', '.join(p['usernames'])}")
|
|
756
|
+
|
|
757
|
+
# Addresses
|
|
758
|
+
for i, addr in enumerate(p.get("addresses", [])):
|
|
759
|
+
parts = [addr.get("street", "")]
|
|
760
|
+
city_state = ", ".join(filter(None, [addr.get("city"), addr.get("state")]))
|
|
761
|
+
if city_state:
|
|
762
|
+
parts.append(city_state)
|
|
763
|
+
if addr.get("zip"):
|
|
764
|
+
parts[-1] = parts[-1] + " " + addr["zip"] if parts else addr["zip"]
|
|
765
|
+
if addr.get("country"):
|
|
766
|
+
parts.append(addr["country"])
|
|
767
|
+
addr_str = ", ".join(filter(None, parts))
|
|
768
|
+
if addr_str:
|
|
769
|
+
label = "**Address:**" if i == 0 else "**Address " + str(i + 1) + ":**"
|
|
770
|
+
lines.append(f"{label} {addr_str}")
|
|
771
|
+
|
|
772
|
+
# Payment
|
|
773
|
+
if p.get("card_holder") or p.get("active_cards"):
|
|
774
|
+
card_parts = []
|
|
775
|
+
if p["card_holder"]:
|
|
776
|
+
card_parts.append(p["card_holder"])
|
|
777
|
+
if p["active_cards"]:
|
|
778
|
+
card_parts.append(f"{p['active_cards']} cards on file")
|
|
779
|
+
lines.append(f"**Payment:** {', '.join(card_parts)}")
|
|
780
|
+
|
|
781
|
+
# Companies
|
|
782
|
+
if p.get("companies"):
|
|
783
|
+
if isinstance(p["companies"], list):
|
|
784
|
+
lines.append(f"**Companies:** {', '.join(p['companies'])}")
|
|
785
|
+
else:
|
|
786
|
+
lines.append(f"**Company:** {p['companies']}")
|
|
787
|
+
|
|
788
|
+
# Tools
|
|
789
|
+
if p.get("tools"):
|
|
790
|
+
lines.append(f"**Top Tools:** {', '.join(p['tools'][:15])}")
|
|
791
|
+
|
|
792
|
+
# Accounts grouped by identity
|
|
793
|
+
if p.get("accounts"):
|
|
794
|
+
lines.append("**Accounts:**")
|
|
795
|
+
for user, domains in sorted(p["accounts"].items(),
|
|
796
|
+
key=lambda x: len(x[1]), reverse=True)[:8]:
|
|
797
|
+
# Clean domain names: extract meaningful service name
|
|
798
|
+
seen = set()
|
|
799
|
+
short_domains = []
|
|
800
|
+
for d in domains:
|
|
801
|
+
# Use second-level domain as service name
|
|
802
|
+
parts = d.replace("www.", "").split(".")
|
|
803
|
+
if len(parts) >= 2:
|
|
804
|
+
short = parts[-2] # e.g. "mercury" from "app.mercury.com"
|
|
805
|
+
else:
|
|
806
|
+
short = parts[0]
|
|
807
|
+
# Skip generic TLDs, gov subdomains, localhost
|
|
808
|
+
if short in ("com", "co", "io", "ai", "org", "net", "ru",
|
|
809
|
+
"gov", "ca", "us", "localhost", "localhost:3000"):
|
|
810
|
+
# For .gov domains (dmv.ca.gov), use the subdomain
|
|
811
|
+
short = parts[0] if len(parts) > 2 else d
|
|
812
|
+
if "localhost" in short:
|
|
813
|
+
continue
|
|
814
|
+
if short not in seen:
|
|
815
|
+
seen.add(short)
|
|
816
|
+
short_domains.append(short)
|
|
817
|
+
display = short_domains[:6]
|
|
818
|
+
extra = f" +{len(short_domains) - 6}" if len(short_domains) > 6 else ""
|
|
819
|
+
lines.append(f" {user}: {', '.join(display)}{extra}")
|
|
820
|
+
|
|
821
|
+
# Projects
|
|
822
|
+
if p.get("projects"):
|
|
823
|
+
# Clean up Notion page titles, deduplicate by core name
|
|
824
|
+
seen_proj = set()
|
|
825
|
+
clean = []
|
|
826
|
+
for name in p["projects"]:
|
|
827
|
+
c = name.rstrip(" ‣").strip()
|
|
828
|
+
# Extract last meaningful name for dedup (e.g. "Eugene O'Donald" from "TBD: Eugene O'Donald")
|
|
829
|
+
core = c.split(" - ")[-1].split(": ")[-1].lower().strip()
|
|
830
|
+
if core not in seen_proj and c:
|
|
831
|
+
seen_proj.add(core)
|
|
832
|
+
clean.append(c)
|
|
833
|
+
if clean:
|
|
834
|
+
lines.append(f"**Projects:** {', '.join(clean[:10])}")
|
|
835
|
+
|
|
836
|
+
# Contacts
|
|
837
|
+
if p.get("total_contacts"):
|
|
838
|
+
lines.append(f"**Contacts:** {p['total_contacts']} total")
|
|
839
|
+
|
|
840
|
+
return "\n".join(lines)
|
|
841
|
+
|
|
842
|
+
# ── Review Operations ─────────────────────────────────────────
|
|
843
|
+
|
|
844
|
+
def delete(self, memory_id: int):
|
|
845
|
+
"""Delete a memory and its tags/links."""
|
|
846
|
+
self.conn.execute("DELETE FROM memory_tags WHERE memory_id=?", (memory_id,))
|
|
847
|
+
self.conn.execute("DELETE FROM memory_links WHERE source_id=? OR target_id=?", (memory_id, memory_id))
|
|
848
|
+
self.conn.execute("UPDATE memories SET superseded_by=NULL WHERE superseded_by=?", (memory_id,))
|
|
849
|
+
self.conn.execute("DELETE FROM memories WHERE id=?", (memory_id,))
|
|
850
|
+
self.conn.commit()
|
|
851
|
+
|
|
852
|
+
def update_memory(self, memory_id: int, key: str = None, value: str = None,
|
|
853
|
+
confidence: float = None, tags: list[str] = None):
|
|
854
|
+
"""Update fields on a memory. Regenerates search_text if key/value changed."""
|
|
855
|
+
updates, params = [], []
|
|
856
|
+
if key is not None:
|
|
857
|
+
updates.append("key=?")
|
|
858
|
+
params.append(key)
|
|
859
|
+
if value is not None:
|
|
860
|
+
updates.append("value=?")
|
|
861
|
+
params.append(value)
|
|
862
|
+
if confidence is not None:
|
|
863
|
+
updates.append("confidence=?")
|
|
864
|
+
params.append(confidence)
|
|
865
|
+
if key is not None or value is not None:
|
|
866
|
+
row = self.conn.execute("SELECT key, value FROM memories WHERE id=?", (memory_id,)).fetchone()
|
|
867
|
+
if row:
|
|
868
|
+
new_key = key if key is not None else row[0]
|
|
869
|
+
new_val = value if value is not None else row[1]
|
|
870
|
+
updates.append("search_text=?")
|
|
871
|
+
params.append(f"{new_key}: {new_val}")
|
|
872
|
+
if updates:
|
|
873
|
+
params.append(memory_id)
|
|
874
|
+
self.conn.execute(f"UPDATE memories SET {', '.join(updates)} WHERE id=?", params)
|
|
875
|
+
if tags is not None:
|
|
876
|
+
self.conn.execute("DELETE FROM memory_tags WHERE memory_id=?", (memory_id,))
|
|
877
|
+
for tag in tags:
|
|
878
|
+
self.conn.execute("INSERT OR IGNORE INTO memory_tags (memory_id, tag) VALUES (?, ?)", (memory_id, tag))
|
|
879
|
+
self.conn.commit()
|
|
880
|
+
|
|
881
|
+
def get_unreviewed(self, limit: int = 100) -> list[dict]:
|
|
882
|
+
"""Get memories where reviewed_at IS NULL, with their tags."""
|
|
883
|
+
rows = self.conn.execute("""
|
|
884
|
+
SELECT m.id, m.key, m.value, m.confidence, m.source, m.created_at,
|
|
885
|
+
m.superseded_by
|
|
886
|
+
FROM memories m
|
|
887
|
+
WHERE m.reviewed_at IS NULL
|
|
888
|
+
ORDER BY m.id
|
|
889
|
+
LIMIT ?
|
|
890
|
+
""", (limit,)).fetchall()
|
|
891
|
+
results = []
|
|
892
|
+
for r in rows:
|
|
893
|
+
tags = [t[0] for t in self.conn.execute(
|
|
894
|
+
"SELECT tag FROM memory_tags WHERE memory_id=?", (r[0],)
|
|
895
|
+
).fetchall()]
|
|
896
|
+
results.append({
|
|
897
|
+
"id": r[0], "key": r[1], "value": r[2], "confidence": r[3],
|
|
898
|
+
"source": r[4], "created_at": r[5], "superseded_by": r[6],
|
|
899
|
+
"tags": tags,
|
|
900
|
+
})
|
|
901
|
+
return results
|
|
902
|
+
|
|
903
|
+
def mark_reviewed(self, memory_ids: list[int]):
|
|
904
|
+
"""Set reviewed_at = now for given IDs."""
|
|
905
|
+
if not memory_ids:
|
|
906
|
+
return
|
|
907
|
+
now = datetime.now(timezone.utc).isoformat()
|
|
908
|
+
placeholders = ",".join("?" for _ in memory_ids)
|
|
909
|
+
self.conn.execute(
|
|
910
|
+
f"UPDATE memories SET reviewed_at=? WHERE id IN ({placeholders})",
|
|
911
|
+
(now, *memory_ids),
|
|
912
|
+
)
|
|
913
|
+
self.conn.commit()
|
|
914
|
+
|
|
915
|
+
def get_meta(self, key: str) -> Optional[str]:
|
|
916
|
+
"""Get metadata value."""
|
|
917
|
+
row = self.conn.execute("SELECT value FROM metadata WHERE key=?", (key,)).fetchone()
|
|
918
|
+
return row[0] if row else None
|
|
919
|
+
|
|
920
|
+
def set_meta(self, key: str, value: str):
|
|
921
|
+
"""Set metadata value (INSERT OR REPLACE)."""
|
|
922
|
+
self.conn.execute("INSERT OR REPLACE INTO metadata (key, value) VALUES (?, ?)", (key, value))
|
|
923
|
+
self.conn.commit()
|
|
924
|
+
|
|
925
|
+
# ── Close ──────────────────────────────────────────────────────
|
|
926
|
+
|
|
927
|
+
def close(self):
|
|
928
|
+
self.conn.commit()
|
|
929
|
+
self.conn.close()
|