superlocalmemory 3.2.1 → 3.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. package/CHANGELOG.md +23 -1
  2. package/README.md +61 -1
  3. package/package.json +1 -1
  4. package/pyproject.toml +26 -1
  5. package/src/superlocalmemory/attribution/signer.py +6 -1
  6. package/src/superlocalmemory/core/config.py +114 -1
  7. package/src/superlocalmemory/core/consolidation_engine.py +595 -0
  8. package/src/superlocalmemory/core/embeddings.py +0 -1
  9. package/src/superlocalmemory/core/engine.py +164 -674
  10. package/src/superlocalmemory/core/engine_wiring.py +474 -0
  11. package/src/superlocalmemory/core/graph_analyzer.py +199 -0
  12. package/src/superlocalmemory/core/recall_pipeline.py +247 -0
  13. package/src/superlocalmemory/core/store_pipeline.py +483 -0
  14. package/src/superlocalmemory/core/worker_pool.py +35 -12
  15. package/src/superlocalmemory/encoding/auto_linker.py +308 -0
  16. package/src/superlocalmemory/encoding/context_generator.py +175 -0
  17. package/src/superlocalmemory/encoding/temporal_validator.py +513 -0
  18. package/src/superlocalmemory/hooks/auto_invoker.py +484 -0
  19. package/src/superlocalmemory/retrieval/channel_registry.py +154 -0
  20. package/src/superlocalmemory/retrieval/engine.py +12 -0
  21. package/src/superlocalmemory/retrieval/semantic_channel.py +87 -3
  22. package/src/superlocalmemory/retrieval/spreading_activation.py +311 -0
  23. package/src/superlocalmemory/retrieval/strategy.py +6 -6
  24. package/src/superlocalmemory/retrieval/vector_store.py +386 -0
  25. package/src/superlocalmemory/server/routes/v3_api.py +576 -0
  26. package/src/superlocalmemory/storage/access_log.py +169 -0
  27. package/src/superlocalmemory/storage/database.py +288 -0
  28. package/src/superlocalmemory/storage/schema.py +10 -0
  29. package/src/superlocalmemory/storage/schema_v32.py +252 -0
  30. package/src/superlocalmemory/storage/v2_migrator.py +24 -2
@@ -0,0 +1,386 @@
1
+ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
2
+ # Licensed under the MIT License - see LICENSE file
3
+ # Part of SuperLocalMemory V3
4
+
5
+ """VectorStore -- sqlite-vec backed KNN search with profile isolation.
6
+
7
+ Replaces full-table-scan in SemanticChannel with native vec0 KNN.
8
+ Falls back to ANNIndex if sqlite-vec is unavailable (Rule 03).
9
+ Implements ANNSearchable protocol for GraphBuilder compatibility (Rule 07).
10
+
11
+ Part of Qualixar | Author: Varun Pratap Bhardwaj
12
+ License: MIT
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import logging
18
+ import sqlite3
19
+ import threading
20
+ from dataclasses import dataclass
21
+ from pathlib import Path
22
+ from typing import Any
23
+
24
+ import numpy as np
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+
29
+ @dataclass(frozen=True) # Rule 10
30
+ class VectorStoreConfig:
31
+ """Configuration for VectorStore."""
32
+ dimension: int = 768
33
+ binary_quantization_threshold: int = 100_000 # L4 fix
34
+ model_name: str = "nomic-embed-text-v1.5"
35
+ enabled: bool = False # Rule 12: feature flag, default off
36
+
37
+
38
+ class VectorStore:
39
+ """sqlite-vec backed vector store with profile-scoped KNN search.
40
+
41
+ - Loads sqlite-vec extension on init (try/except, Rule 03)
42
+ - Creates vec0 virtual table with profile_id PARTITION KEY
43
+ - Maps string fact_ids to integer rowids via embedding_metadata
44
+ - Implements ANNSearchable protocol (Rule 07)
45
+ - Thread-safe via lock on mutations
46
+
47
+ If sqlite-vec is unavailable, self.available is False and all
48
+ methods are no-ops (caller uses ANNIndex fallback).
49
+ """
50
+
51
+ def __init__(self, db_path: Path, config: VectorStoreConfig) -> None:
52
+ self._db_path = Path(db_path)
53
+ self._config = config
54
+ self._lock = threading.Lock()
55
+ self._available = False
56
+
57
+ if not config.enabled:
58
+ logger.debug("VectorStore disabled by config (enabled=False)")
59
+ return
60
+
61
+ self._available = self._try_load_extension()
62
+ if self._available:
63
+ self._ensure_vec0_table()
64
+
65
+ @property
66
+ def available(self) -> bool:
67
+ """True if sqlite-vec is loaded and vec0 table exists."""
68
+ return self._available
69
+
70
+ # -- Extension loading (Rule 03) ----------------------------------------
71
+
72
+ def _try_load_extension(self) -> bool:
73
+ """Attempt to load sqlite-vec. Returns True on success.
74
+
75
+ Catches ImportError, AttributeError, and any other exception.
76
+ """
77
+ try:
78
+ import sqlite_vec # noqa: F401
79
+ conn = self._connect()
80
+ conn.close()
81
+ return True
82
+ except ImportError:
83
+ logger.debug("sqlite-vec not installed. VectorStore unavailable.")
84
+ return False
85
+ except AttributeError:
86
+ logger.debug(
87
+ "enable_load_extension not available (macOS default Python). "
88
+ "VectorStore unavailable."
89
+ )
90
+ return False
91
+ except Exception as exc:
92
+ logger.debug("sqlite-vec load failed: %s", exc)
93
+ return False
94
+
95
+ def _connect(self) -> sqlite3.Connection:
96
+ """Open a connection with sqlite-vec loaded.
97
+
98
+ Every connection loads the extension fresh (per-call model).
99
+ """
100
+ import sqlite_vec
101
+
102
+ conn = sqlite3.connect(str(self._db_path))
103
+ conn.row_factory = sqlite3.Row
104
+ conn.execute("PRAGMA busy_timeout = 10000")
105
+ # FK enforcement is OFF here because VectorStore operates on its own
106
+ # tables (fact_embeddings + embedding_metadata). The store pipeline
107
+ # guarantees fact/profile exist before calling upsert.
108
+ conn.enable_load_extension(True)
109
+ sqlite_vec.load(conn)
110
+ conn.enable_load_extension(False)
111
+ return conn
112
+
113
+ # -- Table creation -----------------------------------------------------
114
+
115
+ def _ensure_vec0_table(self) -> None:
116
+ """Create the vec0 virtual table and embedding_metadata if not exist."""
117
+ dim = self._config.dimension
118
+ vec0_ddl = (
119
+ f"CREATE VIRTUAL TABLE IF NOT EXISTS fact_embeddings USING vec0("
120
+ f"profile_id TEXT PARTITION KEY, "
121
+ f"embedding float[{dim}] distance_metric=cosine"
122
+ f")"
123
+ )
124
+ meta_ddl = (
125
+ "CREATE TABLE IF NOT EXISTS embedding_metadata ("
126
+ "vec_rowid INTEGER PRIMARY KEY, "
127
+ "fact_id TEXT NOT NULL UNIQUE, "
128
+ "profile_id TEXT NOT NULL DEFAULT 'default', "
129
+ "model_name TEXT NOT NULL DEFAULT '', "
130
+ "dimension INTEGER NOT NULL DEFAULT 768, "
131
+ "created_at TEXT NOT NULL DEFAULT (datetime('now'))"
132
+ ")"
133
+ )
134
+ meta_idx_fact = (
135
+ "CREATE INDEX IF NOT EXISTS idx_embmeta_fact "
136
+ "ON embedding_metadata (fact_id)"
137
+ )
138
+ meta_idx_profile = (
139
+ "CREATE INDEX IF NOT EXISTS idx_embmeta_profile "
140
+ "ON embedding_metadata (profile_id)"
141
+ )
142
+ try:
143
+ conn = self._connect()
144
+ conn.execute(vec0_ddl)
145
+ conn.execute(meta_ddl)
146
+ conn.execute(meta_idx_fact)
147
+ conn.execute(meta_idx_profile)
148
+ conn.commit()
149
+ conn.close()
150
+ except Exception as exc:
151
+ logger.debug("vec0 table creation failed: %s", exc)
152
+ self._available = False
153
+
154
+ # -- Serialization ------------------------------------------------------
155
+
156
+ @staticmethod
157
+ def _serialize_f32(vector: list[float]) -> bytes:
158
+ """Serialize float list to raw bytes for sqlite-vec."""
159
+ return np.array(vector, dtype=np.float32).tobytes()
160
+
161
+ # -- CRUD Operations ----------------------------------------------------
162
+
163
+ def upsert(
164
+ self,
165
+ fact_id: str,
166
+ profile_id: str,
167
+ embedding: list[float],
168
+ model_name: str = "",
169
+ ) -> bool:
170
+ """Insert or update a vector in the vec0 table.
171
+
172
+ Thread-safe: acquires self._lock.
173
+ Returns True on success, False on failure or if unavailable.
174
+ """
175
+ if not self._available:
176
+ return False
177
+
178
+ if len(embedding) != self._config.dimension:
179
+ logger.debug(
180
+ "Dimension mismatch: got %d, expected %d",
181
+ len(embedding), self._config.dimension,
182
+ )
183
+ return False
184
+
185
+ vec_bytes = self._serialize_f32(embedding)
186
+
187
+ with self._lock:
188
+ try:
189
+ conn = self._connect()
190
+ # Check if fact_id already exists in metadata
191
+ row = conn.execute(
192
+ "SELECT vec_rowid FROM embedding_metadata "
193
+ "WHERE fact_id = ?",
194
+ (fact_id,),
195
+ ).fetchone()
196
+
197
+ if row is not None:
198
+ # UPDATE existing
199
+ rowid = row["vec_rowid"]
200
+ conn.execute(
201
+ "UPDATE fact_embeddings SET embedding = ? "
202
+ "WHERE rowid = ?",
203
+ (vec_bytes, rowid),
204
+ )
205
+ else:
206
+ # INSERT new
207
+ conn.execute(
208
+ "INSERT INTO fact_embeddings(profile_id, embedding) "
209
+ "VALUES (?, ?)",
210
+ (profile_id, vec_bytes),
211
+ )
212
+ rowid = conn.execute(
213
+ "SELECT last_insert_rowid()"
214
+ ).fetchone()[0]
215
+ conn.execute(
216
+ "INSERT INTO embedding_metadata "
217
+ "(vec_rowid, fact_id, profile_id, model_name, dimension) "
218
+ "VALUES (?, ?, ?, ?, ?)",
219
+ (rowid, fact_id, profile_id,
220
+ model_name or self._config.model_name,
221
+ self._config.dimension),
222
+ )
223
+
224
+ conn.commit()
225
+ conn.close()
226
+ return True
227
+ except Exception as exc:
228
+ logger.debug("upsert failed for fact_id=%s: %s", fact_id, exc)
229
+ return False
230
+
231
+ def search(
232
+ self,
233
+ query_embedding: list[float],
234
+ top_k: int = 30,
235
+ profile_id: str | None = None,
236
+ ) -> list[tuple[str, float]]:
237
+ """KNN search. Returns [(fact_id, similarity_score)].
238
+
239
+ Score is cosine similarity (1.0 - distance).
240
+ Returns empty list if unavailable, dim mismatch, or error.
241
+ """
242
+ if not self._available:
243
+ return []
244
+
245
+ if len(query_embedding) != self._config.dimension:
246
+ return []
247
+
248
+ vec_bytes = self._serialize_f32(query_embedding)
249
+
250
+ try:
251
+ conn = self._connect()
252
+
253
+ if profile_id is not None:
254
+ rows = conn.execute(
255
+ "SELECT rowid, distance "
256
+ "FROM fact_embeddings "
257
+ "WHERE embedding MATCH ? "
258
+ "AND profile_id = ? "
259
+ "AND k = ?",
260
+ (vec_bytes, profile_id, top_k),
261
+ ).fetchall()
262
+ else:
263
+ rows = conn.execute(
264
+ "SELECT rowid, distance "
265
+ "FROM fact_embeddings "
266
+ "WHERE embedding MATCH ? "
267
+ "AND k = ?",
268
+ (vec_bytes, top_k),
269
+ ).fetchall()
270
+
271
+ if not rows:
272
+ conn.close()
273
+ return []
274
+
275
+ # Map rowids -> fact_ids via embedding_metadata
276
+ rowids = [r["rowid"] for r in rows]
277
+ dist_map = {r["rowid"]: r["distance"] for r in rows}
278
+
279
+ placeholders = ",".join("?" for _ in rowids)
280
+ meta_rows = conn.execute(
281
+ f"SELECT vec_rowid, fact_id FROM embedding_metadata "
282
+ f"WHERE vec_rowid IN ({placeholders})",
283
+ rowids,
284
+ ).fetchall()
285
+
286
+ conn.close()
287
+
288
+ results: list[tuple[str, float]] = []
289
+ for mr in meta_rows:
290
+ rid = mr["vec_rowid"]
291
+ fid = str(mr["fact_id"])
292
+ similarity = max(0.0, 1.0 - dist_map[rid])
293
+ results.append((fid, similarity))
294
+
295
+ results.sort(key=lambda x: x[1], reverse=True)
296
+ return results
297
+
298
+ except Exception as exc:
299
+ logger.debug("search failed: %s", exc)
300
+ return []
301
+
302
+ def delete(self, fact_id: str) -> bool:
303
+ """Remove a vector from vec0 and metadata.
304
+
305
+ Thread-safe: acquires self._lock.
306
+ Returns True if deleted, False if not found or error.
307
+ """
308
+ if not self._available:
309
+ return False
310
+
311
+ with self._lock:
312
+ try:
313
+ conn = self._connect()
314
+ row = conn.execute(
315
+ "SELECT vec_rowid FROM embedding_metadata "
316
+ "WHERE fact_id = ?",
317
+ (fact_id,),
318
+ ).fetchone()
319
+
320
+ if row is None:
321
+ conn.close()
322
+ return False
323
+
324
+ rowid = row["vec_rowid"]
325
+ conn.execute(
326
+ "DELETE FROM fact_embeddings WHERE rowid = ?",
327
+ (rowid,),
328
+ )
329
+ conn.execute(
330
+ "DELETE FROM embedding_metadata WHERE vec_rowid = ?",
331
+ (rowid,),
332
+ )
333
+ conn.commit()
334
+ conn.close()
335
+ return True
336
+ except Exception as exc:
337
+ logger.debug("delete failed for fact_id=%s: %s", fact_id, exc)
338
+ return False
339
+
340
+ def count(self, profile_id: str | None = None) -> int:
341
+ """Count vectors in the store.
342
+
343
+ Returns 0 if unavailable.
344
+ """
345
+ if not self._available:
346
+ return 0
347
+
348
+ try:
349
+ conn = self._connect()
350
+ if profile_id is not None:
351
+ row = conn.execute(
352
+ "SELECT COUNT(*) AS c FROM embedding_metadata "
353
+ "WHERE profile_id = ?",
354
+ (profile_id,),
355
+ ).fetchone()
356
+ else:
357
+ row = conn.execute(
358
+ "SELECT COUNT(*) AS c FROM embedding_metadata",
359
+ ).fetchone()
360
+ conn.close()
361
+ return int(row["c"]) if row else 0
362
+ except Exception as exc:
363
+ logger.debug("count failed: %s", exc)
364
+ return 0
365
+
366
+ def rebuild_from_facts(
367
+ self,
368
+ facts: list[tuple[str, str, list[float]]],
369
+ ) -> int:
370
+ """Migrate existing facts from JSON TEXT embeddings to vec0.
371
+
372
+ Args:
373
+ facts: List of (fact_id, profile_id, embedding) tuples.
374
+
375
+ Returns:
376
+ Number of vectors successfully migrated.
377
+ """
378
+ count = 0
379
+ for fact_id, profile_id, embedding in facts:
380
+ if self.upsert(fact_id, profile_id, embedding):
381
+ count += 1
382
+ return count
383
+
384
+ def needs_binary_quantization(self, profile_id: str) -> bool:
385
+ """Check if BQ should be enabled (count >= 100K threshold)."""
386
+ return self.count(profile_id) >= self._config.binary_quantization_threshold