superlocalmemory 3.2.1 → 3.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +23 -1
- package/README.md +61 -1
- package/package.json +1 -1
- package/pyproject.toml +26 -1
- package/src/superlocalmemory/attribution/signer.py +6 -1
- package/src/superlocalmemory/core/config.py +113 -1
- package/src/superlocalmemory/core/consolidation_engine.py +595 -0
- package/src/superlocalmemory/core/embeddings.py +0 -1
- package/src/superlocalmemory/core/engine.py +164 -674
- package/src/superlocalmemory/core/engine_wiring.py +474 -0
- package/src/superlocalmemory/core/graph_analyzer.py +199 -0
- package/src/superlocalmemory/core/recall_pipeline.py +247 -0
- package/src/superlocalmemory/core/store_pipeline.py +483 -0
- package/src/superlocalmemory/core/worker_pool.py +35 -12
- package/src/superlocalmemory/encoding/auto_linker.py +308 -0
- package/src/superlocalmemory/encoding/context_generator.py +175 -0
- package/src/superlocalmemory/encoding/temporal_validator.py +513 -0
- package/src/superlocalmemory/hooks/auto_invoker.py +484 -0
- package/src/superlocalmemory/retrieval/channel_registry.py +154 -0
- package/src/superlocalmemory/retrieval/engine.py +12 -0
- package/src/superlocalmemory/retrieval/semantic_channel.py +87 -3
- package/src/superlocalmemory/retrieval/spreading_activation.py +311 -0
- package/src/superlocalmemory/retrieval/strategy.py +6 -6
- package/src/superlocalmemory/retrieval/vector_store.py +386 -0
- package/src/superlocalmemory/server/routes/v3_api.py +576 -0
- package/src/superlocalmemory/storage/access_log.py +169 -0
- package/src/superlocalmemory/storage/database.py +288 -0
- package/src/superlocalmemory/storage/schema.py +10 -0
- package/src/superlocalmemory/storage/schema_v32.py +252 -0
- package/src/superlocalmemory/storage/v2_migrator.py +24 -2
|
@@ -0,0 +1,386 @@
|
|
|
1
|
+
# Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
|
|
2
|
+
# Licensed under the MIT License - see LICENSE file
|
|
3
|
+
# Part of SuperLocalMemory V3
|
|
4
|
+
|
|
5
|
+
"""VectorStore -- sqlite-vec backed KNN search with profile isolation.
|
|
6
|
+
|
|
7
|
+
Replaces full-table-scan in SemanticChannel with native vec0 KNN.
|
|
8
|
+
Falls back to ANNIndex if sqlite-vec is unavailable (Rule 03).
|
|
9
|
+
Implements ANNSearchable protocol for GraphBuilder compatibility (Rule 07).
|
|
10
|
+
|
|
11
|
+
Part of Qualixar | Author: Varun Pratap Bhardwaj
|
|
12
|
+
License: MIT
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import logging
|
|
18
|
+
import sqlite3
|
|
19
|
+
import threading
|
|
20
|
+
from dataclasses import dataclass
|
|
21
|
+
from pathlib import Path
|
|
22
|
+
from typing import Any
|
|
23
|
+
|
|
24
|
+
import numpy as np
|
|
25
|
+
|
|
26
|
+
logger = logging.getLogger(__name__)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass(frozen=True) # Rule 10
|
|
30
|
+
class VectorStoreConfig:
|
|
31
|
+
"""Configuration for VectorStore."""
|
|
32
|
+
dimension: int = 768
|
|
33
|
+
binary_quantization_threshold: int = 100_000 # L4 fix
|
|
34
|
+
model_name: str = "nomic-embed-text-v1.5"
|
|
35
|
+
enabled: bool = True # Ships enabled by default
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class VectorStore:
|
|
39
|
+
"""sqlite-vec backed vector store with profile-scoped KNN search.
|
|
40
|
+
|
|
41
|
+
- Loads sqlite-vec extension on init (try/except, Rule 03)
|
|
42
|
+
- Creates vec0 virtual table with profile_id PARTITION KEY
|
|
43
|
+
- Maps string fact_ids to integer rowids via embedding_metadata
|
|
44
|
+
- Implements ANNSearchable protocol (Rule 07)
|
|
45
|
+
- Thread-safe via lock on mutations
|
|
46
|
+
|
|
47
|
+
If sqlite-vec is unavailable, self.available is False and all
|
|
48
|
+
methods are no-ops (caller uses ANNIndex fallback).
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
def __init__(self, db_path: Path, config: VectorStoreConfig) -> None:
|
|
52
|
+
self._db_path = Path(db_path)
|
|
53
|
+
self._config = config
|
|
54
|
+
self._lock = threading.Lock()
|
|
55
|
+
self._available = False
|
|
56
|
+
|
|
57
|
+
if not config.enabled:
|
|
58
|
+
logger.debug("VectorStore disabled by config (enabled=False)")
|
|
59
|
+
return
|
|
60
|
+
|
|
61
|
+
self._available = self._try_load_extension()
|
|
62
|
+
if self._available:
|
|
63
|
+
self._ensure_vec0_table()
|
|
64
|
+
|
|
65
|
+
@property
|
|
66
|
+
def available(self) -> bool:
|
|
67
|
+
"""True if sqlite-vec is loaded and vec0 table exists."""
|
|
68
|
+
return self._available
|
|
69
|
+
|
|
70
|
+
# -- Extension loading (Rule 03) ----------------------------------------
|
|
71
|
+
|
|
72
|
+
def _try_load_extension(self) -> bool:
|
|
73
|
+
"""Attempt to load sqlite-vec. Returns True on success.
|
|
74
|
+
|
|
75
|
+
Catches ImportError, AttributeError, and any other exception.
|
|
76
|
+
"""
|
|
77
|
+
try:
|
|
78
|
+
import sqlite_vec # noqa: F401
|
|
79
|
+
conn = self._connect()
|
|
80
|
+
conn.close()
|
|
81
|
+
return True
|
|
82
|
+
except ImportError:
|
|
83
|
+
logger.debug("sqlite-vec not installed. VectorStore unavailable.")
|
|
84
|
+
return False
|
|
85
|
+
except AttributeError:
|
|
86
|
+
logger.debug(
|
|
87
|
+
"enable_load_extension not available (macOS default Python). "
|
|
88
|
+
"VectorStore unavailable."
|
|
89
|
+
)
|
|
90
|
+
return False
|
|
91
|
+
except Exception as exc:
|
|
92
|
+
logger.debug("sqlite-vec load failed: %s", exc)
|
|
93
|
+
return False
|
|
94
|
+
|
|
95
|
+
def _connect(self) -> sqlite3.Connection:
|
|
96
|
+
"""Open a connection with sqlite-vec loaded.
|
|
97
|
+
|
|
98
|
+
Every connection loads the extension fresh (per-call model).
|
|
99
|
+
"""
|
|
100
|
+
import sqlite_vec
|
|
101
|
+
|
|
102
|
+
conn = sqlite3.connect(str(self._db_path))
|
|
103
|
+
conn.row_factory = sqlite3.Row
|
|
104
|
+
conn.execute("PRAGMA busy_timeout = 10000")
|
|
105
|
+
# FK enforcement is OFF here because VectorStore operates on its own
|
|
106
|
+
# tables (fact_embeddings + embedding_metadata). The store pipeline
|
|
107
|
+
# guarantees fact/profile exist before calling upsert.
|
|
108
|
+
conn.enable_load_extension(True)
|
|
109
|
+
sqlite_vec.load(conn)
|
|
110
|
+
conn.enable_load_extension(False)
|
|
111
|
+
return conn
|
|
112
|
+
|
|
113
|
+
# -- Table creation -----------------------------------------------------
|
|
114
|
+
|
|
115
|
+
def _ensure_vec0_table(self) -> None:
|
|
116
|
+
"""Create the vec0 virtual table and embedding_metadata if not exist."""
|
|
117
|
+
dim = self._config.dimension
|
|
118
|
+
vec0_ddl = (
|
|
119
|
+
f"CREATE VIRTUAL TABLE IF NOT EXISTS fact_embeddings USING vec0("
|
|
120
|
+
f"profile_id TEXT PARTITION KEY, "
|
|
121
|
+
f"embedding float[{dim}] distance_metric=cosine"
|
|
122
|
+
f")"
|
|
123
|
+
)
|
|
124
|
+
meta_ddl = (
|
|
125
|
+
"CREATE TABLE IF NOT EXISTS embedding_metadata ("
|
|
126
|
+
"vec_rowid INTEGER PRIMARY KEY, "
|
|
127
|
+
"fact_id TEXT NOT NULL UNIQUE, "
|
|
128
|
+
"profile_id TEXT NOT NULL DEFAULT 'default', "
|
|
129
|
+
"model_name TEXT NOT NULL DEFAULT '', "
|
|
130
|
+
"dimension INTEGER NOT NULL DEFAULT 768, "
|
|
131
|
+
"created_at TEXT NOT NULL DEFAULT (datetime('now'))"
|
|
132
|
+
")"
|
|
133
|
+
)
|
|
134
|
+
meta_idx_fact = (
|
|
135
|
+
"CREATE INDEX IF NOT EXISTS idx_embmeta_fact "
|
|
136
|
+
"ON embedding_metadata (fact_id)"
|
|
137
|
+
)
|
|
138
|
+
meta_idx_profile = (
|
|
139
|
+
"CREATE INDEX IF NOT EXISTS idx_embmeta_profile "
|
|
140
|
+
"ON embedding_metadata (profile_id)"
|
|
141
|
+
)
|
|
142
|
+
try:
|
|
143
|
+
conn = self._connect()
|
|
144
|
+
conn.execute(vec0_ddl)
|
|
145
|
+
conn.execute(meta_ddl)
|
|
146
|
+
conn.execute(meta_idx_fact)
|
|
147
|
+
conn.execute(meta_idx_profile)
|
|
148
|
+
conn.commit()
|
|
149
|
+
conn.close()
|
|
150
|
+
except Exception as exc:
|
|
151
|
+
logger.debug("vec0 table creation failed: %s", exc)
|
|
152
|
+
self._available = False
|
|
153
|
+
|
|
154
|
+
# -- Serialization ------------------------------------------------------
|
|
155
|
+
|
|
156
|
+
@staticmethod
|
|
157
|
+
def _serialize_f32(vector: list[float]) -> bytes:
|
|
158
|
+
"""Serialize float list to raw bytes for sqlite-vec."""
|
|
159
|
+
return np.array(vector, dtype=np.float32).tobytes()
|
|
160
|
+
|
|
161
|
+
# -- CRUD Operations ----------------------------------------------------
|
|
162
|
+
|
|
163
|
+
def upsert(
|
|
164
|
+
self,
|
|
165
|
+
fact_id: str,
|
|
166
|
+
profile_id: str,
|
|
167
|
+
embedding: list[float],
|
|
168
|
+
model_name: str = "",
|
|
169
|
+
) -> bool:
|
|
170
|
+
"""Insert or update a vector in the vec0 table.
|
|
171
|
+
|
|
172
|
+
Thread-safe: acquires self._lock.
|
|
173
|
+
Returns True on success, False on failure or if unavailable.
|
|
174
|
+
"""
|
|
175
|
+
if not self._available:
|
|
176
|
+
return False
|
|
177
|
+
|
|
178
|
+
if len(embedding) != self._config.dimension:
|
|
179
|
+
logger.debug(
|
|
180
|
+
"Dimension mismatch: got %d, expected %d",
|
|
181
|
+
len(embedding), self._config.dimension,
|
|
182
|
+
)
|
|
183
|
+
return False
|
|
184
|
+
|
|
185
|
+
vec_bytes = self._serialize_f32(embedding)
|
|
186
|
+
|
|
187
|
+
with self._lock:
|
|
188
|
+
try:
|
|
189
|
+
conn = self._connect()
|
|
190
|
+
# Check if fact_id already exists in metadata
|
|
191
|
+
row = conn.execute(
|
|
192
|
+
"SELECT vec_rowid FROM embedding_metadata "
|
|
193
|
+
"WHERE fact_id = ?",
|
|
194
|
+
(fact_id,),
|
|
195
|
+
).fetchone()
|
|
196
|
+
|
|
197
|
+
if row is not None:
|
|
198
|
+
# UPDATE existing
|
|
199
|
+
rowid = row["vec_rowid"]
|
|
200
|
+
conn.execute(
|
|
201
|
+
"UPDATE fact_embeddings SET embedding = ? "
|
|
202
|
+
"WHERE rowid = ?",
|
|
203
|
+
(vec_bytes, rowid),
|
|
204
|
+
)
|
|
205
|
+
else:
|
|
206
|
+
# INSERT new
|
|
207
|
+
conn.execute(
|
|
208
|
+
"INSERT INTO fact_embeddings(profile_id, embedding) "
|
|
209
|
+
"VALUES (?, ?)",
|
|
210
|
+
(profile_id, vec_bytes),
|
|
211
|
+
)
|
|
212
|
+
rowid = conn.execute(
|
|
213
|
+
"SELECT last_insert_rowid()"
|
|
214
|
+
).fetchone()[0]
|
|
215
|
+
conn.execute(
|
|
216
|
+
"INSERT INTO embedding_metadata "
|
|
217
|
+
"(vec_rowid, fact_id, profile_id, model_name, dimension) "
|
|
218
|
+
"VALUES (?, ?, ?, ?, ?)",
|
|
219
|
+
(rowid, fact_id, profile_id,
|
|
220
|
+
model_name or self._config.model_name,
|
|
221
|
+
self._config.dimension),
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
conn.commit()
|
|
225
|
+
conn.close()
|
|
226
|
+
return True
|
|
227
|
+
except Exception as exc:
|
|
228
|
+
logger.debug("upsert failed for fact_id=%s: %s", fact_id, exc)
|
|
229
|
+
return False
|
|
230
|
+
|
|
231
|
+
def search(
|
|
232
|
+
self,
|
|
233
|
+
query_embedding: list[float],
|
|
234
|
+
top_k: int = 30,
|
|
235
|
+
profile_id: str | None = None,
|
|
236
|
+
) -> list[tuple[str, float]]:
|
|
237
|
+
"""KNN search. Returns [(fact_id, similarity_score)].
|
|
238
|
+
|
|
239
|
+
Score is cosine similarity (1.0 - distance).
|
|
240
|
+
Returns empty list if unavailable, dim mismatch, or error.
|
|
241
|
+
"""
|
|
242
|
+
if not self._available:
|
|
243
|
+
return []
|
|
244
|
+
|
|
245
|
+
if len(query_embedding) != self._config.dimension:
|
|
246
|
+
return []
|
|
247
|
+
|
|
248
|
+
vec_bytes = self._serialize_f32(query_embedding)
|
|
249
|
+
|
|
250
|
+
try:
|
|
251
|
+
conn = self._connect()
|
|
252
|
+
|
|
253
|
+
if profile_id is not None:
|
|
254
|
+
rows = conn.execute(
|
|
255
|
+
"SELECT rowid, distance "
|
|
256
|
+
"FROM fact_embeddings "
|
|
257
|
+
"WHERE embedding MATCH ? "
|
|
258
|
+
"AND profile_id = ? "
|
|
259
|
+
"AND k = ?",
|
|
260
|
+
(vec_bytes, profile_id, top_k),
|
|
261
|
+
).fetchall()
|
|
262
|
+
else:
|
|
263
|
+
rows = conn.execute(
|
|
264
|
+
"SELECT rowid, distance "
|
|
265
|
+
"FROM fact_embeddings "
|
|
266
|
+
"WHERE embedding MATCH ? "
|
|
267
|
+
"AND k = ?",
|
|
268
|
+
(vec_bytes, top_k),
|
|
269
|
+
).fetchall()
|
|
270
|
+
|
|
271
|
+
if not rows:
|
|
272
|
+
conn.close()
|
|
273
|
+
return []
|
|
274
|
+
|
|
275
|
+
# Map rowids -> fact_ids via embedding_metadata
|
|
276
|
+
rowids = [r["rowid"] for r in rows]
|
|
277
|
+
dist_map = {r["rowid"]: r["distance"] for r in rows}
|
|
278
|
+
|
|
279
|
+
placeholders = ",".join("?" for _ in rowids)
|
|
280
|
+
meta_rows = conn.execute(
|
|
281
|
+
f"SELECT vec_rowid, fact_id FROM embedding_metadata "
|
|
282
|
+
f"WHERE vec_rowid IN ({placeholders})",
|
|
283
|
+
rowids,
|
|
284
|
+
).fetchall()
|
|
285
|
+
|
|
286
|
+
conn.close()
|
|
287
|
+
|
|
288
|
+
results: list[tuple[str, float]] = []
|
|
289
|
+
for mr in meta_rows:
|
|
290
|
+
rid = mr["vec_rowid"]
|
|
291
|
+
fid = str(mr["fact_id"])
|
|
292
|
+
similarity = max(0.0, 1.0 - dist_map[rid])
|
|
293
|
+
results.append((fid, similarity))
|
|
294
|
+
|
|
295
|
+
results.sort(key=lambda x: x[1], reverse=True)
|
|
296
|
+
return results
|
|
297
|
+
|
|
298
|
+
except Exception as exc:
|
|
299
|
+
logger.debug("search failed: %s", exc)
|
|
300
|
+
return []
|
|
301
|
+
|
|
302
|
+
def delete(self, fact_id: str) -> bool:
|
|
303
|
+
"""Remove a vector from vec0 and metadata.
|
|
304
|
+
|
|
305
|
+
Thread-safe: acquires self._lock.
|
|
306
|
+
Returns True if deleted, False if not found or error.
|
|
307
|
+
"""
|
|
308
|
+
if not self._available:
|
|
309
|
+
return False
|
|
310
|
+
|
|
311
|
+
with self._lock:
|
|
312
|
+
try:
|
|
313
|
+
conn = self._connect()
|
|
314
|
+
row = conn.execute(
|
|
315
|
+
"SELECT vec_rowid FROM embedding_metadata "
|
|
316
|
+
"WHERE fact_id = ?",
|
|
317
|
+
(fact_id,),
|
|
318
|
+
).fetchone()
|
|
319
|
+
|
|
320
|
+
if row is None:
|
|
321
|
+
conn.close()
|
|
322
|
+
return False
|
|
323
|
+
|
|
324
|
+
rowid = row["vec_rowid"]
|
|
325
|
+
conn.execute(
|
|
326
|
+
"DELETE FROM fact_embeddings WHERE rowid = ?",
|
|
327
|
+
(rowid,),
|
|
328
|
+
)
|
|
329
|
+
conn.execute(
|
|
330
|
+
"DELETE FROM embedding_metadata WHERE vec_rowid = ?",
|
|
331
|
+
(rowid,),
|
|
332
|
+
)
|
|
333
|
+
conn.commit()
|
|
334
|
+
conn.close()
|
|
335
|
+
return True
|
|
336
|
+
except Exception as exc:
|
|
337
|
+
logger.debug("delete failed for fact_id=%s: %s", fact_id, exc)
|
|
338
|
+
return False
|
|
339
|
+
|
|
340
|
+
def count(self, profile_id: str | None = None) -> int:
|
|
341
|
+
"""Count vectors in the store.
|
|
342
|
+
|
|
343
|
+
Returns 0 if unavailable.
|
|
344
|
+
"""
|
|
345
|
+
if not self._available:
|
|
346
|
+
return 0
|
|
347
|
+
|
|
348
|
+
try:
|
|
349
|
+
conn = self._connect()
|
|
350
|
+
if profile_id is not None:
|
|
351
|
+
row = conn.execute(
|
|
352
|
+
"SELECT COUNT(*) AS c FROM embedding_metadata "
|
|
353
|
+
"WHERE profile_id = ?",
|
|
354
|
+
(profile_id,),
|
|
355
|
+
).fetchone()
|
|
356
|
+
else:
|
|
357
|
+
row = conn.execute(
|
|
358
|
+
"SELECT COUNT(*) AS c FROM embedding_metadata",
|
|
359
|
+
).fetchone()
|
|
360
|
+
conn.close()
|
|
361
|
+
return int(row["c"]) if row else 0
|
|
362
|
+
except Exception as exc:
|
|
363
|
+
logger.debug("count failed: %s", exc)
|
|
364
|
+
return 0
|
|
365
|
+
|
|
366
|
+
def rebuild_from_facts(
|
|
367
|
+
self,
|
|
368
|
+
facts: list[tuple[str, str, list[float]]],
|
|
369
|
+
) -> int:
|
|
370
|
+
"""Migrate existing facts from JSON TEXT embeddings to vec0.
|
|
371
|
+
|
|
372
|
+
Args:
|
|
373
|
+
facts: List of (fact_id, profile_id, embedding) tuples.
|
|
374
|
+
|
|
375
|
+
Returns:
|
|
376
|
+
Number of vectors successfully migrated.
|
|
377
|
+
"""
|
|
378
|
+
count = 0
|
|
379
|
+
for fact_id, profile_id, embedding in facts:
|
|
380
|
+
if self.upsert(fact_id, profile_id, embedding):
|
|
381
|
+
count += 1
|
|
382
|
+
return count
|
|
383
|
+
|
|
384
|
+
def needs_binary_quantization(self, profile_id: str) -> bool:
|
|
385
|
+
"""Check if BQ should be enabled (count >= 100K threshold)."""
|
|
386
|
+
return self.count(profile_id) >= self._config.binary_quantization_threshold
|