@psiclawops/hypermem 0.5.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (160) hide show
  1. package/dist/background-indexer.d.ts +132 -0
  2. package/dist/background-indexer.d.ts.map +1 -0
  3. package/dist/background-indexer.js +1044 -0
  4. package/dist/cache.d.ts +110 -0
  5. package/dist/cache.d.ts.map +1 -0
  6. package/dist/cache.js +495 -0
  7. package/dist/compaction-fence.d.ts +89 -0
  8. package/dist/compaction-fence.d.ts.map +1 -0
  9. package/dist/compaction-fence.js +153 -0
  10. package/dist/compositor.d.ts +226 -0
  11. package/dist/compositor.d.ts.map +1 -0
  12. package/dist/compositor.js +2558 -0
  13. package/dist/content-type-classifier.d.ts +41 -0
  14. package/dist/content-type-classifier.d.ts.map +1 -0
  15. package/dist/content-type-classifier.js +181 -0
  16. package/dist/cross-agent.d.ts +62 -0
  17. package/dist/cross-agent.d.ts.map +1 -0
  18. package/dist/cross-agent.js +259 -0
  19. package/dist/db.d.ts +131 -0
  20. package/dist/db.d.ts.map +1 -0
  21. package/dist/db.js +402 -0
  22. package/dist/desired-state-store.d.ts +100 -0
  23. package/dist/desired-state-store.d.ts.map +1 -0
  24. package/dist/desired-state-store.js +222 -0
  25. package/dist/doc-chunk-store.d.ts +140 -0
  26. package/dist/doc-chunk-store.d.ts.map +1 -0
  27. package/dist/doc-chunk-store.js +391 -0
  28. package/dist/doc-chunker.d.ts +99 -0
  29. package/dist/doc-chunker.d.ts.map +1 -0
  30. package/dist/doc-chunker.js +324 -0
  31. package/dist/dreaming-promoter.d.ts +86 -0
  32. package/dist/dreaming-promoter.d.ts.map +1 -0
  33. package/dist/dreaming-promoter.js +381 -0
  34. package/dist/episode-store.d.ts +49 -0
  35. package/dist/episode-store.d.ts.map +1 -0
  36. package/dist/episode-store.js +135 -0
  37. package/dist/fact-store.d.ts +75 -0
  38. package/dist/fact-store.d.ts.map +1 -0
  39. package/dist/fact-store.js +236 -0
  40. package/dist/fleet-store.d.ts +144 -0
  41. package/dist/fleet-store.d.ts.map +1 -0
  42. package/dist/fleet-store.js +276 -0
  43. package/dist/fos-mod.d.ts +178 -0
  44. package/dist/fos-mod.d.ts.map +1 -0
  45. package/dist/fos-mod.js +416 -0
  46. package/dist/hybrid-retrieval.d.ts +64 -0
  47. package/dist/hybrid-retrieval.d.ts.map +1 -0
  48. package/dist/hybrid-retrieval.js +344 -0
  49. package/dist/image-eviction.d.ts +49 -0
  50. package/dist/image-eviction.d.ts.map +1 -0
  51. package/dist/image-eviction.js +251 -0
  52. package/dist/index.d.ts +650 -0
  53. package/dist/index.d.ts.map +1 -0
  54. package/dist/index.js +1072 -0
  55. package/dist/keystone-scorer.d.ts +51 -0
  56. package/dist/keystone-scorer.d.ts.map +1 -0
  57. package/dist/keystone-scorer.js +52 -0
  58. package/dist/knowledge-graph.d.ts +110 -0
  59. package/dist/knowledge-graph.d.ts.map +1 -0
  60. package/dist/knowledge-graph.js +305 -0
  61. package/dist/knowledge-lint.d.ts +29 -0
  62. package/dist/knowledge-lint.d.ts.map +1 -0
  63. package/dist/knowledge-lint.js +116 -0
  64. package/dist/knowledge-store.d.ts +72 -0
  65. package/dist/knowledge-store.d.ts.map +1 -0
  66. package/dist/knowledge-store.js +247 -0
  67. package/dist/library-schema.d.ts +22 -0
  68. package/dist/library-schema.d.ts.map +1 -0
  69. package/dist/library-schema.js +1038 -0
  70. package/dist/message-store.d.ts +89 -0
  71. package/dist/message-store.d.ts.map +1 -0
  72. package/dist/message-store.js +323 -0
  73. package/dist/metrics-dashboard.d.ts +114 -0
  74. package/dist/metrics-dashboard.d.ts.map +1 -0
  75. package/dist/metrics-dashboard.js +260 -0
  76. package/dist/obsidian-exporter.d.ts +57 -0
  77. package/dist/obsidian-exporter.d.ts.map +1 -0
  78. package/dist/obsidian-exporter.js +274 -0
  79. package/dist/obsidian-watcher.d.ts +147 -0
  80. package/dist/obsidian-watcher.d.ts.map +1 -0
  81. package/dist/obsidian-watcher.js +403 -0
  82. package/dist/open-domain.d.ts +46 -0
  83. package/dist/open-domain.d.ts.map +1 -0
  84. package/dist/open-domain.js +125 -0
  85. package/dist/preference-store.d.ts +54 -0
  86. package/dist/preference-store.d.ts.map +1 -0
  87. package/dist/preference-store.js +109 -0
  88. package/dist/preservation-gate.d.ts +82 -0
  89. package/dist/preservation-gate.d.ts.map +1 -0
  90. package/dist/preservation-gate.js +150 -0
  91. package/dist/proactive-pass.d.ts +63 -0
  92. package/dist/proactive-pass.d.ts.map +1 -0
  93. package/dist/proactive-pass.js +239 -0
  94. package/dist/profiles.d.ts +44 -0
  95. package/dist/profiles.d.ts.map +1 -0
  96. package/dist/profiles.js +227 -0
  97. package/dist/provider-translator.d.ts +50 -0
  98. package/dist/provider-translator.d.ts.map +1 -0
  99. package/dist/provider-translator.js +403 -0
  100. package/dist/rate-limiter.d.ts +76 -0
  101. package/dist/rate-limiter.d.ts.map +1 -0
  102. package/dist/rate-limiter.js +179 -0
  103. package/dist/repair-tool-pairs.d.ts +38 -0
  104. package/dist/repair-tool-pairs.d.ts.map +1 -0
  105. package/dist/repair-tool-pairs.js +138 -0
  106. package/dist/retrieval-policy.d.ts +51 -0
  107. package/dist/retrieval-policy.d.ts.map +1 -0
  108. package/dist/retrieval-policy.js +77 -0
  109. package/dist/schema.d.ts +15 -0
  110. package/dist/schema.d.ts.map +1 -0
  111. package/dist/schema.js +229 -0
  112. package/dist/secret-scanner.d.ts +51 -0
  113. package/dist/secret-scanner.d.ts.map +1 -0
  114. package/dist/secret-scanner.js +248 -0
  115. package/dist/seed.d.ts +108 -0
  116. package/dist/seed.d.ts.map +1 -0
  117. package/dist/seed.js +177 -0
  118. package/dist/session-flusher.d.ts +53 -0
  119. package/dist/session-flusher.d.ts.map +1 -0
  120. package/dist/session-flusher.js +69 -0
  121. package/dist/session-topic-map.d.ts +41 -0
  122. package/dist/session-topic-map.d.ts.map +1 -0
  123. package/dist/session-topic-map.js +77 -0
  124. package/dist/spawn-context.d.ts +54 -0
  125. package/dist/spawn-context.d.ts.map +1 -0
  126. package/dist/spawn-context.js +159 -0
  127. package/dist/system-store.d.ts +73 -0
  128. package/dist/system-store.d.ts.map +1 -0
  129. package/dist/system-store.js +182 -0
  130. package/dist/temporal-store.d.ts +80 -0
  131. package/dist/temporal-store.d.ts.map +1 -0
  132. package/dist/temporal-store.js +149 -0
  133. package/dist/topic-detector.d.ts +35 -0
  134. package/dist/topic-detector.d.ts.map +1 -0
  135. package/dist/topic-detector.js +249 -0
  136. package/dist/topic-store.d.ts +45 -0
  137. package/dist/topic-store.d.ts.map +1 -0
  138. package/dist/topic-store.js +136 -0
  139. package/dist/topic-synthesizer.d.ts +51 -0
  140. package/dist/topic-synthesizer.d.ts.map +1 -0
  141. package/dist/topic-synthesizer.js +315 -0
  142. package/dist/trigger-registry.d.ts +63 -0
  143. package/dist/trigger-registry.d.ts.map +1 -0
  144. package/dist/trigger-registry.js +163 -0
  145. package/dist/types.d.ts +533 -0
  146. package/dist/types.d.ts.map +1 -0
  147. package/dist/types.js +9 -0
  148. package/dist/vector-store.d.ts +170 -0
  149. package/dist/vector-store.d.ts.map +1 -0
  150. package/dist/vector-store.js +677 -0
  151. package/dist/version.d.ts +34 -0
  152. package/dist/version.d.ts.map +1 -0
  153. package/dist/version.js +34 -0
  154. package/dist/wiki-page-emitter.d.ts +65 -0
  155. package/dist/wiki-page-emitter.d.ts.map +1 -0
  156. package/dist/wiki-page-emitter.js +258 -0
  157. package/dist/work-store.d.ts +112 -0
  158. package/dist/work-store.d.ts.map +1 -0
  159. package/dist/work-store.js +273 -0
  160. package/package.json +1 -1
@@ -0,0 +1,677 @@
1
+ /**
2
+ * hypermem Vector Store — Semantic Search via sqlite-vec
3
+ *
4
+ * Provides embedding-backed KNN search over facts, knowledge, episodes,
5
+ * and session registry entries. Uses Ollama (local) for embeddings,
6
+ * sqlite-vec for vector indexing, and coexists with existing FTS5.
7
+ *
8
+ * Architecture:
9
+ * - One vec0 virtual table per indexed content type
10
+ * - Embeddings generated via local Ollama (nomic-embed-text, 768d)
11
+ * - Vectors stored alongside content in the same agent DB
12
+ * - LRU embedding cache (module-level, per-process) to avoid redundant Ollama calls
13
+ * - Precomputed embedding passthrough: callers can supply an embedding to skip Ollama
14
+ * - Batch embedding support for bulk indexing
15
+ */
16
+ import { createHash } from 'node:crypto';
17
+ const DEFAULT_EMBEDDING_CONFIG = {
18
+ provider: 'ollama',
19
+ ollamaUrl: 'http://localhost:11434',
20
+ openaiBaseUrl: 'https://api.openai.com/v1',
21
+ model: 'nomic-embed-text',
22
+ dimensions: 768,
23
+ timeout: 10000,
24
+ batchSize: 32,
25
+ cacheSize: 128,
26
+ };
27
+ /** Provider-specific defaults applied when provider is 'openai' and fields are not set. */
28
+ const OPENAI_DEFAULTS = {
29
+ model: 'text-embedding-3-small',
30
+ dimensions: 1536,
31
+ batchSize: 128,
32
+ };
33
+ const _embeddingCache = new Map();
34
+ /**
35
+ * Insert an entry into the LRU cache, evicting the oldest if over capacity.
36
+ */
37
+ function cachePut(key, embedding, maxSize) {
38
+ if (_embeddingCache.has(key)) {
39
+ // Update existing entry (refresh timestamp)
40
+ _embeddingCache.delete(key);
41
+ }
42
+ else if (_embeddingCache.size >= maxSize) {
43
+ // Evict oldest entry by timestamp
44
+ let oldestKey;
45
+ let oldestTime = Infinity;
46
+ for (const [k, v] of _embeddingCache) {
47
+ if (v.timestamp < oldestTime) {
48
+ oldestTime = v.timestamp;
49
+ oldestKey = k;
50
+ }
51
+ }
52
+ if (oldestKey !== undefined) {
53
+ _embeddingCache.delete(oldestKey);
54
+ }
55
+ }
56
+ _embeddingCache.set(key, { embedding, timestamp: Date.now() });
57
+ }
58
+ /**
59
+ * Clear the embedding cache. Primarily for testing.
60
+ */
61
+ export function clearEmbeddingCache() {
62
+ _embeddingCache.clear();
63
+ }
64
+ /**
65
+ * Generate embeddings via OpenAI Embeddings API.
66
+ * Batches up to batchSize inputs per request.
67
+ */
68
+ async function generateOpenAIEmbeddings(texts, config) {
69
+ // Resolve API key: config > environment
70
+ const apiKey = config.openaiApiKey
71
+ ?? process.env.OPENROUTER_API_KEY
72
+ ?? process.env.OPENAI_API_KEY
73
+ ?? null;
74
+ if (!apiKey) {
75
+ throw new Error('[hypermem] OpenAI embedding provider requires an API key. ' +
76
+ 'Set openaiApiKey in hypermem config, or set OPENROUTER_API_KEY / OPENAI_API_KEY env var.');
77
+ }
78
+ const baseUrl = config.openaiBaseUrl ?? 'https://api.openai.com/v1';
79
+ const model = config.model;
80
+ const results = [];
81
+ for (let i = 0; i < texts.length; i += config.batchSize) {
82
+ const batch = texts.slice(i, i + config.batchSize);
83
+ const controller = new AbortController();
84
+ const timer = setTimeout(() => controller.abort(), config.timeout);
85
+ try {
86
+ const response = await fetch(`${baseUrl}/embeddings`, {
87
+ method: 'POST',
88
+ headers: {
89
+ 'Content-Type': 'application/json',
90
+ 'Authorization': `Bearer ${apiKey}`,
91
+ },
92
+ body: JSON.stringify({ model, input: batch }),
93
+ signal: controller.signal,
94
+ });
95
+ if (!response.ok) {
96
+ const body = await response.text().catch(() => '');
97
+ throw new Error(`OpenAI embedding failed: ${response.status} ${response.statusText} — ${body}`);
98
+ }
99
+ const data = await response.json();
100
+ // OpenAI returns results in order by default but may not guarantee it — sort by index.
101
+ const sorted = data.data.sort((a, b) => a.index - b.index);
102
+ for (const item of sorted) {
103
+ if (item.embedding.length !== config.dimensions) {
104
+ throw new Error(`OpenAI embedding dimension mismatch: expected ${config.dimensions}, got ${item.embedding.length}. ` +
105
+ 'If you changed models, re-index via hypermem reindex.');
106
+ }
107
+ results.push(new Float32Array(item.embedding));
108
+ }
109
+ }
110
+ finally {
111
+ clearTimeout(timer);
112
+ }
113
+ }
114
+ return results;
115
+ }
116
+ /**
117
+ * Generate embeddings via Ollama API.
118
+ * Supports single and batch embedding.
119
+ * Results are cached per text hash — cache hits skip the Ollama call entirely.
120
+ */
121
+ export async function generateEmbeddings(texts, config = DEFAULT_EMBEDDING_CONFIG) {
122
+ // Apply provider-specific defaults when provider is 'openai' and fields are at Ollama defaults
123
+ if (config.provider === 'openai') {
124
+ // Merge: OpenAI defaults fill in any unset fields, user-supplied values always win
125
+ config = {
126
+ ...DEFAULT_EMBEDDING_CONFIG,
127
+ ...config,
128
+ model: config.model !== DEFAULT_EMBEDDING_CONFIG.model ? config.model : OPENAI_DEFAULTS.model,
129
+ dimensions: config.dimensions !== DEFAULT_EMBEDDING_CONFIG.dimensions ? config.dimensions : OPENAI_DEFAULTS.dimensions,
130
+ batchSize: config.batchSize !== DEFAULT_EMBEDDING_CONFIG.batchSize ? config.batchSize : OPENAI_DEFAULTS.batchSize,
131
+ };
132
+ // OpenAI path — no LRU cache (responses are billed; caching at this layer
133
+ // adds complexity without proportional benefit given async background use).
134
+ return generateOpenAIEmbeddings(texts, config);
135
+ }
136
+ if (texts.length === 0)
137
+ return [];
138
+ const maxSize = Math.min(config.cacheSize ?? DEFAULT_EMBEDDING_CONFIG.cacheSize ?? 128, 10_000 // Hard cap: prevent unbounded memory growth from operator misconfiguration
139
+ );
140
+ const results = new Array(texts.length).fill(null);
141
+ // Check cache first — build list of texts that need Ollama calls
142
+ const uncachedIndices = [];
143
+ for (let i = 0; i < texts.length; i++) {
144
+ const key = simpleHash(texts[i]);
145
+ const cached = _embeddingCache.get(key);
146
+ if (cached) {
147
+ results[i] = cached.embedding;
148
+ }
149
+ else {
150
+ uncachedIndices.push(i);
151
+ }
152
+ }
153
+ if (uncachedIndices.length === 0) {
154
+ return results;
155
+ }
156
+ // Fetch uncached texts from Ollama in batches
157
+ const uncachedTexts = uncachedIndices.map(i => texts[i]);
158
+ const ollamaResults = [];
159
+ // Ollama /api/embed supports batch via `input` array
160
+ for (let i = 0; i < uncachedTexts.length; i += config.batchSize) {
161
+ const batch = uncachedTexts.slice(i, i + config.batchSize);
162
+ const controller = new AbortController();
163
+ const timer = setTimeout(() => controller.abort(), config.timeout);
164
+ try {
165
+ const response = await fetch(`${config.ollamaUrl}/api/embed`, {
166
+ method: 'POST',
167
+ headers: { 'Content-Type': 'application/json' },
168
+ body: JSON.stringify({
169
+ model: config.model,
170
+ input: batch,
171
+ }),
172
+ signal: controller.signal,
173
+ });
174
+ if (!response.ok) {
175
+ throw new Error(`Ollama embedding failed: ${response.status} ${response.statusText}`);
176
+ }
177
+ const data = await response.json();
178
+ for (const embedding of data.embeddings) {
179
+ if (embedding.length !== config.dimensions) {
180
+ throw new Error(`Embedding dimension mismatch: expected ${config.dimensions}, got ${embedding.length}`);
181
+ }
182
+ ollamaResults.push(new Float32Array(embedding));
183
+ }
184
+ }
185
+ finally {
186
+ clearTimeout(timer);
187
+ }
188
+ }
189
+ // Populate cache and fill results array
190
+ for (let j = 0; j < uncachedIndices.length; j++) {
191
+ const origIdx = uncachedIndices[j];
192
+ const embedding = ollamaResults[j];
193
+ results[origIdx] = embedding;
194
+ cachePut(simpleHash(texts[origIdx]), embedding, maxSize);
195
+ }
196
+ return results;
197
+ }
198
+ /**
199
+ * Serialize a Float32Array to Uint8Array for sqlite-vec binding.
200
+ */
201
+ function vecToBytes(vec) {
202
+ return new Uint8Array(vec.buffer, vec.byteOffset, vec.byteLength);
203
+ }
204
+ /**
205
+ * VectorStore — manages vector indexes in an agent's vector database.
206
+ *
207
+ * The vector DB (vectors.db) stores vec0 virtual tables and the index map.
208
+ * Source content (facts, knowledge, episodes) lives in the library DB.
209
+ * The VectorStore needs both: vectorDb for indexes, libraryDb for content.
210
+ */
211
+ export class VectorStore {
212
+ db; // vectors.db
213
+ libraryDb; // library.db for source content
214
+ config;
215
+ constructor(db, config, libraryDb) {
216
+ this.db = db;
217
+ this.libraryDb = libraryDb || null;
218
+ this.config = { ...DEFAULT_EMBEDDING_CONFIG, ...config };
219
+ }
220
+ /**
221
+ * Create vector index tables if they don't exist.
222
+ * Safe to call multiple times (idempotent).
223
+ */
224
+ ensureTables() {
225
+ const dim = this.config.dimensions;
226
+ // Vector index for facts
227
+ this.db.exec(`
228
+ CREATE VIRTUAL TABLE IF NOT EXISTS vec_facts
229
+ USING vec0(embedding float[${dim}])
230
+ `);
231
+ // Vector index for knowledge
232
+ this.db.exec(`
233
+ CREATE VIRTUAL TABLE IF NOT EXISTS vec_knowledge
234
+ USING vec0(embedding float[${dim}])
235
+ `);
236
+ // Vector index for episodes
237
+ this.db.exec(`
238
+ CREATE VIRTUAL TABLE IF NOT EXISTS vec_episodes
239
+ USING vec0(embedding float[${dim}])
240
+ `);
241
+ // Vector index for session registry (library DB)
242
+ // This is created separately via ensureSessionRegistryTable()
243
+ // Mapping table: links vec rowids to source table rows
244
+ // Using a single mapping table for all vec tables
245
+ this.db.exec(`
246
+ CREATE TABLE IF NOT EXISTS vec_index_map (
247
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
248
+ source_table TEXT NOT NULL,
249
+ source_id INTEGER NOT NULL,
250
+ vec_table TEXT NOT NULL,
251
+ content_hash TEXT NOT NULL,
252
+ indexed_at TEXT NOT NULL,
253
+ UNIQUE(source_table, source_id)
254
+ )
255
+ `);
256
+ this.db.exec('CREATE INDEX IF NOT EXISTS idx_vec_map_source ON vec_index_map(source_table, source_id)');
257
+ this.db.exec('CREATE INDEX IF NOT EXISTS idx_vec_map_vec ON vec_index_map(vec_table, id)');
258
+ }
259
+ /**
260
+ * Index a single content item. Generates embedding and stores in vec table.
261
+ * Skips if content hasn't changed (based on hash).
262
+ */
263
+ /** Allowlisted source tables for vector indexing. Prevents SQL injection via table name interpolation. */
264
+ static ALLOWED_SOURCE_TABLES = new Set(['facts', 'knowledge', 'episodes', 'sessions']);
265
+ validateSourceTable(sourceTable) {
266
+ if (!VectorStore.ALLOWED_SOURCE_TABLES.has(sourceTable)) {
267
+ throw new Error(`Invalid sourceTable: "${sourceTable}". Must be one of: ${[...VectorStore.ALLOWED_SOURCE_TABLES].join(', ')}`);
268
+ }
269
+ }
270
+ async indexItem(sourceTable, sourceId, content, domain) {
271
+ this.validateSourceTable(sourceTable);
272
+ const vecTable = `vec_${sourceTable}`;
273
+ const contentHash = simpleHash(content);
274
+ // Check if already indexed with same content
275
+ const existing = this.db
276
+ .prepare('SELECT id, content_hash FROM vec_index_map WHERE source_table = ? AND source_id = ?')
277
+ .get(sourceTable, sourceId);
278
+ if (existing && existing.content_hash === contentHash) {
279
+ return false; // Already indexed, content unchanged
280
+ }
281
+ // Generate embedding
282
+ const [embedding] = await generateEmbeddings([content], this.config);
283
+ const bytes = vecToBytes(embedding);
284
+ if (existing) {
285
+ // Update: delete old vector, insert new
286
+ this.db.prepare(`DELETE FROM ${vecTable} WHERE rowid = CAST(? AS INTEGER)`).run(existing.id);
287
+ this.db.prepare(`INSERT INTO ${vecTable}(rowid, embedding) VALUES (CAST(? AS INTEGER), ?)`).run(existing.id, bytes);
288
+ this.db
289
+ .prepare('UPDATE vec_index_map SET content_hash = ?, indexed_at = ? WHERE id = ?')
290
+ .run(contentHash, new Date().toISOString(), existing.id);
291
+ }
292
+ else {
293
+ // Insert new mapping row first to get the rowid
294
+ const mapResult = this.db
295
+ .prepare('INSERT INTO vec_index_map (source_table, source_id, vec_table, content_hash, indexed_at) VALUES (?, ?, ?, ?, ?)')
296
+ .run(sourceTable, sourceId, vecTable, contentHash, new Date().toISOString());
297
+ const mapRowId = Number(mapResult.lastInsertRowid);
298
+ // Insert vector with matching rowid
299
+ this.db.prepare(`INSERT INTO ${vecTable}(rowid, embedding) VALUES (CAST(? AS INTEGER), ?)`).run(mapRowId, bytes);
300
+ }
301
+ return true;
302
+ }
303
+ /**
304
+ * Batch index multiple items. More efficient than individual calls.
305
+ */
306
+ async indexBatch(items) {
307
+ let indexed = 0;
308
+ let skipped = 0;
309
+ // Validate all source tables before processing any items
310
+ for (const item of items) {
311
+ this.validateSourceTable(item.sourceTable);
312
+ }
313
+ // Filter out already-indexed items
314
+ const toIndex = [];
315
+ for (const item of items) {
316
+ const contentHash = simpleHash(item.content);
317
+ const existing = this.db
318
+ .prepare('SELECT content_hash FROM vec_index_map WHERE source_table = ? AND source_id = ?')
319
+ .get(item.sourceTable, item.sourceId);
320
+ if (existing && existing.content_hash === contentHash) {
321
+ skipped++;
322
+ }
323
+ else {
324
+ toIndex.push(item);
325
+ }
326
+ }
327
+ if (toIndex.length === 0)
328
+ return { indexed, skipped };
329
+ // Batch generate embeddings
330
+ const texts = toIndex.map(item => item.content);
331
+ const embeddings = await generateEmbeddings(texts, this.config);
332
+ // Insert in a transaction
333
+ this.db.exec('BEGIN');
334
+ try {
335
+ for (let i = 0; i < toIndex.length; i++) {
336
+ const item = toIndex[i];
337
+ const embedding = embeddings[i];
338
+ const vecTable = `vec_${item.sourceTable}`;
339
+ const contentHash = simpleHash(item.content);
340
+ const bytes = vecToBytes(embedding);
341
+ // Check for existing mapping (might need update vs insert)
342
+ const existing = this.db
343
+ .prepare('SELECT id FROM vec_index_map WHERE source_table = ? AND source_id = ?')
344
+ .get(item.sourceTable, item.sourceId);
345
+ if (existing) {
346
+ this.db.prepare(`DELETE FROM ${vecTable} WHERE rowid = CAST(? AS INTEGER)`).run(existing.id);
347
+ this.db.prepare(`INSERT INTO ${vecTable}(rowid, embedding) VALUES (CAST(? AS INTEGER), ?)`).run(existing.id, bytes);
348
+ this.db
349
+ .prepare('UPDATE vec_index_map SET content_hash = ?, indexed_at = ? WHERE id = ?')
350
+ .run(contentHash, new Date().toISOString(), existing.id);
351
+ }
352
+ else {
353
+ const mapResult = this.db
354
+ .prepare('INSERT INTO vec_index_map (source_table, source_id, vec_table, content_hash, indexed_at) VALUES (?, ?, ?, ?, ?)')
355
+ .run(item.sourceTable, item.sourceId, vecTable, contentHash, new Date().toISOString());
356
+ const mapRowId = Number(mapResult.lastInsertRowid);
357
+ this.db.prepare(`INSERT INTO ${vecTable}(rowid, embedding) VALUES (CAST(? AS INTEGER), ?)`).run(mapRowId, bytes);
358
+ }
359
+ indexed++;
360
+ }
361
+ this.db.exec('COMMIT');
362
+ }
363
+ catch (err) {
364
+ this.db.exec('ROLLBACK');
365
+ throw err;
366
+ }
367
+ return { indexed, skipped };
368
+ }
369
+ /**
370
+ * Semantic KNN search across one or all vector tables.
371
+ *
372
+ * @param precomputedEmbedding — optional pre-computed embedding for the query.
373
+ * When provided, skips the Ollama call entirely. The precomputed embedding
374
+ * is still inserted into the LRU cache so subsequent identical queries hit.
375
+ */
376
+ async search(query, opts) {
377
+ const limit = opts?.limit || 10;
378
+ const tables = opts?.tables || ['facts', 'knowledge', 'episodes'];
379
+ // Validate all table names before any SQL construction
380
+ for (const table of tables) {
381
+ this.validateSourceTable(table);
382
+ }
383
+ // Use precomputed embedding if provided, otherwise call Ollama
384
+ let queryEmbedding;
385
+ if (opts?.precomputedEmbedding) {
386
+ queryEmbedding = opts.precomputedEmbedding;
387
+ // Populate LRU cache so subsequent queries for the same text hit
388
+ const maxSize = this.config.cacheSize ?? 128;
389
+ cachePut(simpleHash(query), queryEmbedding, maxSize);
390
+ }
391
+ else {
392
+ [queryEmbedding] = await generateEmbeddings([query], this.config);
393
+ }
394
+ const queryBytes = vecToBytes(queryEmbedding);
395
+ const results = [];
396
+ for (const table of tables) {
397
+ const vecTable = `vec_${table}`;
398
+ // Check if the vec table exists
399
+ const tableExists = this.db
400
+ .prepare("SELECT count(*) as cnt FROM sqlite_master WHERE type='table' AND name=?")
401
+ .get(vecTable);
402
+ if (!tableExists || tableExists.cnt === 0)
403
+ continue;
404
+ // KNN query
405
+ const rows = this.db
406
+ .prepare(`SELECT rowid, distance
407
+ FROM ${vecTable}
408
+ WHERE embedding MATCH ?
409
+ ORDER BY distance
410
+ LIMIT ?`)
411
+ .all(queryBytes, limit);
412
+ for (const row of rows) {
413
+ if (opts?.maxDistance !== undefined && row.distance > opts.maxDistance)
414
+ continue;
415
+ // Look up source from mapping table
416
+ const mapping = this.db
417
+ .prepare('SELECT source_table, source_id FROM vec_index_map WHERE id = ?')
418
+ .get(row.rowid);
419
+ if (!mapping)
420
+ continue;
421
+ // Fetch actual content from source table
422
+ const sourceContent = this.getSourceContent(mapping.source_table, mapping.source_id);
423
+ if (!sourceContent)
424
+ continue;
425
+ results.push({
426
+ rowid: row.rowid,
427
+ distance: row.distance,
428
+ sourceTable: mapping.source_table,
429
+ sourceId: mapping.source_id,
430
+ content: sourceContent.content,
431
+ domain: sourceContent.domain,
432
+ agentId: sourceContent.agentId,
433
+ metadata: sourceContent.metadata,
434
+ });
435
+ }
436
+ }
437
+ // Sort all results by distance (cross-table)
438
+ results.sort((a, b) => a.distance - b.distance);
439
+ return results.slice(0, limit);
440
+ }
441
+ /**
442
+ * Get content from a source table by id.
443
+ */
444
+ getSourceContent(table, id) {
445
+ // Source content lives in the library DB (facts, knowledge, episodes)
446
+ // or in the vector DB itself (if old schema). Try library first.
447
+ const sourceDb = this.libraryDb || this.db;
448
+ switch (table) {
449
+ case 'facts': {
450
+ const row = sourceDb
451
+ .prepare('SELECT content, domain, agent_id FROM facts WHERE id = ? AND superseded_by IS NULL')
452
+ .get(id);
453
+ return row ? { content: row.content, domain: row.domain, agentId: row.agent_id } : null;
454
+ }
455
+ case 'knowledge': {
456
+ const row = sourceDb
457
+ .prepare('SELECT content, domain, agent_id, key FROM knowledge WHERE id = ? AND superseded_by IS NULL')
458
+ .get(id);
459
+ return row
460
+ ? { content: row.content, domain: row.domain, agentId: row.agent_id, metadata: row.key }
461
+ : null;
462
+ }
463
+ case 'episodes': {
464
+ const row = sourceDb
465
+ .prepare('SELECT summary, event_type, agent_id, participants FROM episodes WHERE id = ?')
466
+ .get(id);
467
+ return row
468
+ ? {
469
+ content: row.summary,
470
+ domain: row.event_type,
471
+ agentId: row.agent_id,
472
+ metadata: row.participants,
473
+ }
474
+ : null;
475
+ }
476
+ default:
477
+ return null;
478
+ }
479
+ }
480
+ /**
481
+ * Index all un-indexed content in the agent's database.
482
+ * Called by the background indexer.
483
+ */
484
+ async indexAll(agentId) {
485
+ const items = [];
486
+ const sourceDb = this.libraryDb || this.db;
487
+ // Count already-indexed items for accurate skip reporting
488
+ const alreadyIndexed = this.db
489
+ .prepare('SELECT COUNT(*) as cnt FROM vec_index_map')
490
+ .get().cnt;
491
+ // Get IDs already indexed (in vector DB)
492
+ const indexedFacts = new Set(this.db.prepare("SELECT source_id FROM vec_index_map WHERE source_table = 'facts'")
493
+ .all().map(r => r.source_id));
494
+ const indexedKnowledge = new Set(this.db.prepare("SELECT source_id FROM vec_index_map WHERE source_table = 'knowledge'")
495
+ .all().map(r => r.source_id));
496
+ const indexedEpisodes = new Set(this.db.prepare("SELECT source_id FROM vec_index_map WHERE source_table = 'episodes'")
497
+ .all().map(r => r.source_id));
498
+ // Collect un-indexed facts from library DB
499
+ const facts = sourceDb
500
+ .prepare('SELECT id, content, domain FROM facts WHERE agent_id = ? AND superseded_by IS NULL')
501
+ .all(agentId);
502
+ for (const f of facts) {
503
+ if (!indexedFacts.has(f.id)) {
504
+ items.push({ sourceTable: 'facts', sourceId: f.id, content: f.content });
505
+ }
506
+ }
507
+ // Collect un-indexed knowledge from library DB
508
+ const knowledge = sourceDb
509
+ .prepare('SELECT id, content, domain, key FROM knowledge WHERE agent_id = ? AND superseded_by IS NULL')
510
+ .all(agentId);
511
+ for (const k of knowledge) {
512
+ if (!indexedKnowledge.has(k.id)) {
513
+ items.push({
514
+ sourceTable: 'knowledge',
515
+ sourceId: k.id,
516
+ content: `${k.key}: ${k.content}`,
517
+ });
518
+ }
519
+ }
520
+ // Collect un-indexed episodes from library DB
521
+ const episodes = sourceDb
522
+ .prepare('SELECT id, summary, event_type FROM episodes WHERE agent_id = ?')
523
+ .all(agentId);
524
+ for (const e of episodes) {
525
+ if (!indexedEpisodes.has(e.id)) {
526
+ items.push({ sourceTable: 'episodes', sourceId: e.id, content: e.summary });
527
+ }
528
+ }
529
+ if (items.length === 0) {
530
+ return { indexed: 0, skipped: alreadyIndexed };
531
+ }
532
+ const result = await this.indexBatch(items);
533
+ return { indexed: result.indexed, skipped: result.skipped + alreadyIndexed };
534
+ }
535
+ /**
536
+ * Remove vector index entries for deleted source rows.
537
+ */
538
+ pruneOrphans() {
539
+ let pruned = 0;
540
+ const sourceDb = this.libraryDb || this.db;
541
+ for (const table of ['facts', 'knowledge', 'episodes']) {
542
+ // Get all indexed IDs for this table
543
+ const indexed = this.db
544
+ .prepare('SELECT id, vec_table, source_id FROM vec_index_map WHERE source_table = ?')
545
+ .all(table);
546
+ for (const entry of indexed) {
547
+ // Check if source still exists in library DB
548
+ const exists = sourceDb
549
+ .prepare(`SELECT 1 FROM ${table} WHERE id = ?`)
550
+ .get(entry.source_id);
551
+ if (!exists) {
552
+ this.db.prepare(`DELETE FROM ${entry.vec_table} WHERE rowid = CAST(? AS INTEGER)`).run(entry.id);
553
+ this.db.prepare('DELETE FROM vec_index_map WHERE id = ?').run(entry.id);
554
+ pruned++;
555
+ }
556
+ }
557
+ }
558
+ return pruned;
559
+ }
560
+ /**
561
+ * Remove the vector index entry for a single source item.
562
+ *
563
+ * Deletes both the vec table row and the vec_index_map entry for the given
564
+ * (sourceTable, sourceId) pair. Used by the background indexer for immediate
565
+ * point-in-time removal when a supersedes relationship is detected.
566
+ *
567
+ * @returns true if an entry was found and removed, false if nothing was indexed.
568
+ */
569
+ removeItem(sourceTable, sourceId) {
570
+ this.validateSourceTable(sourceTable);
571
+ const entry = this.db
572
+ .prepare('SELECT id, vec_table FROM vec_index_map WHERE source_table = ? AND source_id = ?')
573
+ .get(sourceTable, sourceId);
574
+ if (!entry)
575
+ return false;
576
+ this.db.prepare(`DELETE FROM ${entry.vec_table} WHERE rowid = CAST(? AS INTEGER)`).run(entry.id);
577
+ this.db.prepare('DELETE FROM vec_index_map WHERE id = ?').run(entry.id);
578
+ return true;
579
+ }
580
+ /**
581
+ * Check whether a source item already has a vector in the index.
582
+ * Used by the episode backfill to skip already-vectorized entries.
583
+ */
584
+ hasItem(sourceTable, sourceId) {
585
+ this.validateSourceTable(sourceTable);
586
+ const row = this.db
587
+ .prepare('SELECT 1 FROM vec_index_map WHERE source_table = ? AND source_id = ? LIMIT 1')
588
+ .get(sourceTable, sourceId);
589
+ return row !== undefined;
590
+ }
591
+ /**
592
+ * Tombstone vector entries for superseded facts and knowledge.
593
+ *
594
+ * When fact A is superseded by fact B (facts.superseded_by = B.id), the old
595
+ * vector for A should not surface in semantic recall. Without this, recalled
596
+ * context can include contradicted/outdated facts alongside their replacements.
597
+ *
598
+ * Strategy: find all indexed facts/knowledge with superseded_by IS NOT NULL
599
+ * and delete their vec_index_map entries + vec table rows. The source row
600
+ * stays in library.db (audit trail) but disappears from recall.
601
+ *
602
+ * @returns Number of vector entries tombstoned.
603
+ */
604
+ tombstoneSuperseded() {
605
+ const sourceDb = this.libraryDb || this.db;
606
+ let tombstoned = 0;
607
+ for (const table of ['facts', 'knowledge']) {
608
+ // Find all indexed entries whose source row has been superseded
609
+ const indexed = this.db
610
+ .prepare('SELECT vim.id, vim.vec_table, vim.source_id FROM vec_index_map vim WHERE vim.source_table = ?')
611
+ .all(table);
612
+ for (const entry of indexed) {
613
+ const row = sourceDb
614
+ .prepare(`SELECT superseded_by FROM ${table} WHERE id = ?`)
615
+ .get(entry.source_id);
616
+ if (row?.superseded_by != null) {
617
+ // Remove from vector table
618
+ this.db.prepare(`DELETE FROM ${entry.vec_table} WHERE rowid = CAST(? AS INTEGER)`).run(entry.id);
619
+ // Remove from index map
620
+ this.db.prepare('DELETE FROM vec_index_map WHERE id = ?').run(entry.id);
621
+ tombstoned++;
622
+ }
623
+ }
624
+ }
625
+ if (tombstoned > 0) {
626
+ console.log(`[hypermem-vector] tombstoneSuperseded: removed ${tombstoned} stale vector entries`);
627
+ }
628
+ return tombstoned;
629
+ }
630
+ /**
631
+ * Get index statistics.
632
+ */
633
+ getStats() {
634
+ const breakdown = {};
635
+ let total = 0;
636
+ for (const table of ['facts', 'knowledge', 'episodes']) {
637
+ const count = this.db
638
+ .prepare('SELECT COUNT(*) as cnt FROM vec_index_map WHERE source_table = ?')
639
+ .get(table);
640
+ breakdown[table] = count.cnt;
641
+ total += count.cnt;
642
+ }
643
+ const lastIndexed = this.db
644
+ .prepare('SELECT MAX(indexed_at) as last_at FROM vec_index_map')
645
+ .get();
646
+ return {
647
+ totalVectors: total,
648
+ tableBreakdown: breakdown,
649
+ lastIndexedAt: lastIndexed.last_at,
650
+ };
651
+ }
652
+ }
653
+ /**
654
+ * SHA-256 content hash for change detection and deduplication.
655
+ * Replaces the prior 32-bit rolling hash which had collision risk on large corpora.
656
+ */
657
+ function simpleHash(str) {
658
+ return createHash('sha256').update(str).digest('hex').slice(0, 16);
659
+ }
660
+ /**
661
+ * Create vector tables in a library database for session registry search.
662
+ */
663
+ export function ensureSessionVecTable(db, dimensions = 768) {
664
+ db.exec(`
665
+ CREATE VIRTUAL TABLE IF NOT EXISTS vec_sessions
666
+ USING vec0(embedding float[${dimensions}])
667
+ `);
668
+ db.exec(`
669
+ CREATE TABLE IF NOT EXISTS vec_session_map (
670
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
671
+ session_id TEXT NOT NULL UNIQUE,
672
+ content_hash TEXT NOT NULL,
673
+ indexed_at TEXT NOT NULL
674
+ )
675
+ `);
676
+ }
677
+ //# sourceMappingURL=vector-store.js.map