@aitytech/agentkits-memory 1.0.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +267 -149
  3. package/assets/agentkits-memory-add-memory.png +0 -0
  4. package/assets/agentkits-memory-memory-detail.png +0 -0
  5. package/assets/agentkits-memory-memory-list.png +0 -0
  6. package/assets/logo.svg +24 -0
  7. package/dist/better-sqlite3-backend.d.ts +192 -0
  8. package/dist/better-sqlite3-backend.d.ts.map +1 -0
  9. package/dist/better-sqlite3-backend.js +801 -0
  10. package/dist/better-sqlite3-backend.js.map +1 -0
  11. package/dist/cli/save.js +0 -0
  12. package/dist/cli/setup.d.ts +6 -2
  13. package/dist/cli/setup.d.ts.map +1 -1
  14. package/dist/cli/setup.js +289 -42
  15. package/dist/cli/setup.js.map +1 -1
  16. package/dist/cli/viewer.js +25 -56
  17. package/dist/cli/viewer.js.map +1 -1
  18. package/dist/cli/web-viewer.d.ts +14 -0
  19. package/dist/cli/web-viewer.d.ts.map +1 -0
  20. package/dist/cli/web-viewer.js +1769 -0
  21. package/dist/cli/web-viewer.js.map +1 -0
  22. package/dist/embeddings/embedding-cache.d.ts +131 -0
  23. package/dist/embeddings/embedding-cache.d.ts.map +1 -0
  24. package/dist/embeddings/embedding-cache.js +217 -0
  25. package/dist/embeddings/embedding-cache.js.map +1 -0
  26. package/dist/embeddings/index.d.ts +11 -0
  27. package/dist/embeddings/index.d.ts.map +1 -0
  28. package/dist/embeddings/index.js +11 -0
  29. package/dist/embeddings/index.js.map +1 -0
  30. package/dist/embeddings/local-embeddings.d.ts +140 -0
  31. package/dist/embeddings/local-embeddings.d.ts.map +1 -0
  32. package/dist/embeddings/local-embeddings.js +293 -0
  33. package/dist/embeddings/local-embeddings.js.map +1 -0
  34. package/dist/hooks/context.d.ts +6 -1
  35. package/dist/hooks/context.d.ts.map +1 -1
  36. package/dist/hooks/context.js +12 -2
  37. package/dist/hooks/context.js.map +1 -1
  38. package/dist/hooks/observation.d.ts +6 -1
  39. package/dist/hooks/observation.d.ts.map +1 -1
  40. package/dist/hooks/observation.js +12 -2
  41. package/dist/hooks/observation.js.map +1 -1
  42. package/dist/hooks/service.d.ts +1 -6
  43. package/dist/hooks/service.d.ts.map +1 -1
  44. package/dist/hooks/service.js +33 -85
  45. package/dist/hooks/service.js.map +1 -1
  46. package/dist/hooks/session-init.d.ts +6 -1
  47. package/dist/hooks/session-init.d.ts.map +1 -1
  48. package/dist/hooks/session-init.js +12 -2
  49. package/dist/hooks/session-init.js.map +1 -1
  50. package/dist/hooks/summarize.d.ts +6 -1
  51. package/dist/hooks/summarize.d.ts.map +1 -1
  52. package/dist/hooks/summarize.js +12 -2
  53. package/dist/hooks/summarize.js.map +1 -1
  54. package/dist/index.d.ts +10 -17
  55. package/dist/index.d.ts.map +1 -1
  56. package/dist/index.js +172 -94
  57. package/dist/index.js.map +1 -1
  58. package/dist/mcp/server.js +17 -3
  59. package/dist/mcp/server.js.map +1 -1
  60. package/dist/migration.js +3 -3
  61. package/dist/migration.js.map +1 -1
  62. package/dist/search/hybrid-search.d.ts +262 -0
  63. package/dist/search/hybrid-search.d.ts.map +1 -0
  64. package/dist/search/hybrid-search.js +688 -0
  65. package/dist/search/hybrid-search.js.map +1 -0
  66. package/dist/search/index.d.ts +13 -0
  67. package/dist/search/index.d.ts.map +1 -0
  68. package/dist/search/index.js +13 -0
  69. package/dist/search/index.js.map +1 -0
  70. package/dist/search/token-economics.d.ts +161 -0
  71. package/dist/search/token-economics.d.ts.map +1 -0
  72. package/dist/search/token-economics.js +239 -0
  73. package/dist/search/token-economics.js.map +1 -0
  74. package/dist/types.d.ts +0 -68
  75. package/dist/types.d.ts.map +1 -1
  76. package/dist/types.js.map +1 -1
  77. package/package.json +23 -8
  78. package/src/__tests__/better-sqlite3-backend.test.ts +1466 -0
  79. package/src/__tests__/cache-manager.test.ts +499 -0
  80. package/src/__tests__/embedding-integration.test.ts +481 -0
  81. package/src/__tests__/hnsw-index.test.ts +727 -0
  82. package/src/__tests__/index.test.ts +432 -0
  83. package/src/better-sqlite3-backend.ts +1000 -0
  84. package/src/cli/setup.ts +358 -47
  85. package/src/cli/viewer.ts +28 -63
  86. package/src/cli/web-viewer.ts +1956 -0
  87. package/src/embeddings/__tests__/embedding-cache.test.ts +269 -0
  88. package/src/embeddings/__tests__/local-embeddings.test.ts +495 -0
  89. package/src/embeddings/embedding-cache.ts +318 -0
  90. package/src/embeddings/index.ts +20 -0
  91. package/src/embeddings/local-embeddings.ts +419 -0
  92. package/src/hooks/__tests__/handlers.test.ts +58 -17
  93. package/src/hooks/__tests__/integration.test.ts +77 -26
  94. package/src/hooks/context.ts +13 -2
  95. package/src/hooks/observation.ts +13 -2
  96. package/src/hooks/service.ts +39 -100
  97. package/src/hooks/session-init.ts +13 -2
  98. package/src/hooks/summarize.ts +13 -2
  99. package/src/index.ts +210 -116
  100. package/src/mcp/server.ts +20 -3
  101. package/src/search/__tests__/hybrid-search.test.ts +669 -0
  102. package/src/search/__tests__/token-economics.test.ts +276 -0
  103. package/src/search/hybrid-search.ts +968 -0
  104. package/src/search/index.ts +29 -0
  105. package/src/search/token-economics.ts +367 -0
  106. package/src/types.ts +0 -96
  107. package/src/__tests__/sqljs-backend.test.ts +0 -410
  108. package/src/migration.ts +0 -574
  109. package/src/sql.js.d.ts +0 -70
  110. package/src/sqljs-backend.ts +0 -789
@@ -0,0 +1,688 @@
1
+ /**
2
+ * Hybrid Search Module
3
+ *
4
+ * Combines FTS5 keyword search with vector semantic search
5
+ * for improved recall (15-20% better than either alone).
6
+ *
7
+ * Features:
8
+ * - SQLite FTS5 full-text search with trigram tokenizer (CJK support)
9
+ * - Score fusion (α*keyword + β*semantic)
10
+ * - 3-layer search workflow for token efficiency
11
+ * - Token economics tracking
12
+ *
13
+ * CJK Language Support:
14
+ * Uses trigram tokenizer which works for Japanese, Chinese, Korean
15
+ * by matching substrings instead of requiring word boundaries.
16
+ *
17
+ * @module @aitytech/agentkits-memory/search
18
+ */
19
+ /**
20
+ * Default hybrid search configuration
21
+ */
22
+ const DEFAULT_CONFIG = {
23
+ keywordWeight: 0.3,
24
+ semanticWeight: 0.7,
25
+ minScore: 0.1,
26
+ useBM25: true,
27
+ maxResultsPerLayer: 100,
28
+ tokenizer: 'trigram', // Best for CJK languages
29
+ fallbackToLike: true,
30
+ };
31
+ /**
32
+ * Estimate token count for text (rough approximation)
33
+ * Uses ~4 chars per token as average for English text
34
+ */
35
+ function estimateTokens(text) {
36
+ return Math.ceil(text.length / 4);
37
+ }
38
+ /**
39
+ * Hybrid Search Engine
40
+ *
41
+ * Provides enterprise-grade search combining keyword and semantic search
42
+ * with token-efficient 3-layer retrieval workflow.
43
+ *
44
+ * Supports CJK languages (Japanese, Chinese, Korean) via trigram tokenizer.
45
+ */
46
+ export class HybridSearchEngine {
47
+ db;
48
+ config;
49
+ embeddingGenerator;
50
+ ftsInitialized = false;
51
+ ftsAvailable = false;
52
+ /** The actual tokenizer being used (may differ from config if tokenizer not available) */
53
+ activeTokenizer = null;
54
+ constructor(db, config = {}, embeddingGenerator) {
55
+ this.db = db;
56
+ this.config = { ...DEFAULT_CONFIG, ...config };
57
+ this.embeddingGenerator = embeddingGenerator;
58
+ }
59
+ /**
60
+ * Check if FTS5 is available in this SQLite build
61
+ */
62
+ checkFts5Available() {
63
+ try {
64
+ // Try to create a minimal FTS5 table
65
+ this.db.exec(`CREATE VIRTUAL TABLE IF NOT EXISTS _fts5_check USING fts5(test)`);
66
+ this.db.exec(`DROP TABLE IF EXISTS _fts5_check`);
67
+ return true;
68
+ }
69
+ catch {
70
+ return false;
71
+ }
72
+ }
73
+ /**
74
+ * Check if a specific tokenizer is available
75
+ */
76
+ checkTokenizerAvailable(tokenizer) {
77
+ try {
78
+ this.db.exec(`CREATE VIRTUAL TABLE IF NOT EXISTS _tokenizer_check USING fts5(test, ${tokenizer})`);
79
+ this.db.exec(`DROP TABLE IF EXISTS _tokenizer_check`);
80
+ return true;
81
+ }
82
+ catch {
83
+ return false;
84
+ }
85
+ }
86
+ /**
87
+ * Get the best available tokenizer for FTS5
88
+ * Tries trigram first (best for CJK), then unicode61, then porter
89
+ * Also sets the activeTokenizer field
90
+ */
91
+ getBestTokenizer() {
92
+ // Try tokenizers in order of preference for CJK support
93
+ if (this.config.tokenizer === 'trigram' && this.checkTokenizerAvailable("tokenize='trigram'")) {
94
+ this.activeTokenizer = 'trigram';
95
+ return "tokenize='trigram'";
96
+ }
97
+ if (this.config.tokenizer === 'porter' && this.checkTokenizerAvailable("tokenize='porter unicode61'")) {
98
+ this.activeTokenizer = 'porter';
99
+ return "tokenize='porter unicode61'";
100
+ }
101
+ // Default to unicode61 which should always be available
102
+ this.activeTokenizer = 'unicode61';
103
+ return "tokenize='unicode61'";
104
+ }
105
+ /**
106
+ * Initialize FTS5 virtual table
107
+ * Note: For best CJK support, use better-sqlite3 which includes trigram tokenizer.
108
+ */
109
+ async initialize() {
110
+ if (this.ftsInitialized)
111
+ return;
112
+ // Check if FTS5 is available
113
+ this.ftsAvailable = this.checkFts5Available();
114
+ if (!this.ftsAvailable) {
115
+ console.warn('[HybridSearch] FTS5 not available in this SQLite build. ' +
116
+ 'Falling back to LIKE search.');
117
+ this.ftsInitialized = true;
118
+ return;
119
+ }
120
+ try {
121
+ // Get the best available tokenizer
122
+ const tokenizer = this.getBestTokenizer();
123
+ // Create FTS5 virtual table for full-text search
124
+ // Uses content= to sync with main table
125
+ // trigram tokenizer provides substring matching for CJK languages
126
+ this.db.exec(`
127
+ CREATE VIRTUAL TABLE IF NOT EXISTS memory_fts USING fts5(
128
+ key,
129
+ content,
130
+ namespace,
131
+ tags,
132
+ content=memory_entries,
133
+ content_rowid=rowid,
134
+ ${tokenizer}
135
+ )
136
+ `);
137
+ // Create triggers to keep FTS in sync with main table
138
+ this.db.exec(`
139
+ CREATE TRIGGER IF NOT EXISTS memory_fts_insert AFTER INSERT ON memory_entries BEGIN
140
+ INSERT INTO memory_fts(rowid, key, content, namespace, tags)
141
+ VALUES (NEW.rowid, NEW.key, NEW.content, NEW.namespace, NEW.tags);
142
+ END
143
+ `);
144
+ this.db.exec(`
145
+ CREATE TRIGGER IF NOT EXISTS memory_fts_delete AFTER DELETE ON memory_entries BEGIN
146
+ INSERT INTO memory_fts(memory_fts, rowid, key, content, namespace, tags)
147
+ VALUES ('delete', OLD.rowid, OLD.key, OLD.content, OLD.namespace, OLD.tags);
148
+ END
149
+ `);
150
+ this.db.exec(`
151
+ CREATE TRIGGER IF NOT EXISTS memory_fts_update AFTER UPDATE ON memory_entries BEGIN
152
+ INSERT INTO memory_fts(memory_fts, rowid, key, content, namespace, tags)
153
+ VALUES ('delete', OLD.rowid, OLD.key, OLD.content, OLD.namespace, OLD.tags);
154
+ INSERT INTO memory_fts(rowid, key, content, namespace, tags)
155
+ VALUES (NEW.rowid, NEW.key, NEW.content, NEW.namespace, NEW.tags);
156
+ END
157
+ `);
158
+ // Rebuild FTS index from existing data
159
+ await this.rebuildFtsIndex();
160
+ }
161
+ catch (error) {
162
+ console.warn('[HybridSearch] Failed to initialize FTS5:', error);
163
+ this.ftsAvailable = false;
164
+ }
165
+ this.ftsInitialized = true;
166
+ }
167
+ /**
168
+ * Check if FTS5 is available and initialized
169
+ */
170
+ isFtsAvailable() {
171
+ return this.ftsAvailable;
172
+ }
173
+ /**
174
+ * Get the active tokenizer being used
175
+ * Returns null if FTS5 is not available
176
+ */
177
+ getActiveTokenizer() {
178
+ return this.activeTokenizer;
179
+ }
180
+ /**
181
+ * Check if CJK search is fully supported (requires trigram tokenizer)
182
+ * If not, CJK queries will fall back to LIKE search
183
+ */
184
+ isCjkOptimized() {
185
+ return this.ftsAvailable && this.activeTokenizer === 'trigram';
186
+ }
187
+ /**
188
+ * Rebuild FTS index from existing memory entries
189
+ * Uses the FTS5 'rebuild' command for content-synced tables
190
+ */
191
+ async rebuildFtsIndex() {
192
+ if (!this.ftsAvailable)
193
+ return;
194
+ try {
195
+ // For content-synced FTS5 tables (using content=memory_entries),
196
+ // use the 'rebuild' command which re-reads from the content table
197
+ this.db.exec(`INSERT INTO memory_fts(memory_fts) VALUES('rebuild')`);
198
+ }
199
+ catch (error) {
200
+ console.warn('[HybridSearch] Failed to rebuild FTS index:', error);
201
+ }
202
+ }
203
+ /**
204
+ * Layer 1: Compact Search
205
+ *
206
+ * Returns minimal data for initial filtering.
207
+ * ~10x token savings vs full results.
208
+ */
209
+ async searchCompact(query, options = {}) {
210
+ const limit = options.limit || this.config.maxResultsPerLayer;
211
+ const includeKeyword = options.includeKeyword ?? true;
212
+ const includeSemantic = options.includeSemantic ?? !!this.embeddingGenerator;
213
+ const results = new Map();
214
+ // Keyword search with FTS5
215
+ if (includeKeyword) {
216
+ const keywordResults = await this.keywordSearch(query, limit, options.namespace);
217
+ for (const result of keywordResults) {
218
+ results.set(result.id, result);
219
+ }
220
+ }
221
+ // Semantic search with embeddings
222
+ if (includeSemantic && this.embeddingGenerator) {
223
+ const semanticResults = await this.semanticSearchCompact(query, limit, options.namespace);
224
+ for (const result of semanticResults) {
225
+ const existing = results.get(result.id);
226
+ if (existing) {
227
+ // Merge scores using fusion
228
+ existing.semanticScore = result.semanticScore;
229
+ existing.score = this.fuseScores(existing.keywordScore, result.semanticScore);
230
+ }
231
+ else {
232
+ results.set(result.id, result);
233
+ }
234
+ }
235
+ }
236
+ // Sort by combined score and limit
237
+ return Array.from(results.values())
238
+ .filter((r) => r.score >= this.config.minScore)
239
+ .sort((a, b) => b.score - a.score)
240
+ .slice(0, limit);
241
+ }
242
+ /**
243
+ * Layer 2: Timeline Search
244
+ *
245
+ * Returns context around matched entries.
246
+ * Useful for understanding temporal relationships.
247
+ */
248
+ async searchTimeline(entryIds, contextWindow = 3) {
249
+ const results = [];
250
+ for (const id of entryIds) {
251
+ // Get the target entry
252
+ const targetRow = this.db.prepare(`
253
+ SELECT id, key, namespace, content, created_at
254
+ FROM memory_entries WHERE id = ?
255
+ `).get(id);
256
+ if (!targetRow)
257
+ continue;
258
+ const targetCompact = {
259
+ id: targetRow.id,
260
+ key: targetRow.key,
261
+ namespace: targetRow.namespace,
262
+ score: 1.0,
263
+ keywordScore: 0,
264
+ semanticScore: 0,
265
+ snippet: targetRow.content.substring(0, 100),
266
+ estimatedTokens: estimateTokens(targetRow.content),
267
+ };
268
+ // Get entries before
269
+ const beforeRows = this.db.prepare(`
270
+ SELECT id, key, namespace, content, created_at
271
+ FROM memory_entries
272
+ WHERE namespace = ? AND created_at < ?
273
+ ORDER BY created_at DESC
274
+ LIMIT ?
275
+ `).all(targetRow.namespace, targetRow.created_at, contextWindow);
276
+ const before = beforeRows.map(row => ({
277
+ id: row.id,
278
+ key: row.key,
279
+ namespace: row.namespace,
280
+ score: 0.5,
281
+ keywordScore: 0,
282
+ semanticScore: 0,
283
+ snippet: row.content.substring(0, 100),
284
+ estimatedTokens: estimateTokens(row.content),
285
+ }));
286
+ // Get entries after
287
+ const afterRows = this.db.prepare(`
288
+ SELECT id, key, namespace, content, created_at
289
+ FROM memory_entries
290
+ WHERE namespace = ? AND created_at > ?
291
+ ORDER BY created_at ASC
292
+ LIMIT ?
293
+ `).all(targetRow.namespace, targetRow.created_at, contextWindow);
294
+ const after = afterRows.map(row => ({
295
+ id: row.id,
296
+ key: row.key,
297
+ namespace: row.namespace,
298
+ score: 0.5,
299
+ keywordScore: 0,
300
+ semanticScore: 0,
301
+ snippet: row.content.substring(0, 100),
302
+ estimatedTokens: estimateTokens(row.content),
303
+ }));
304
+ const totalTokens = targetCompact.estimatedTokens +
305
+ before.reduce((sum, r) => sum + r.estimatedTokens, 0) +
306
+ after.reduce((sum, r) => sum + r.estimatedTokens, 0);
307
+ results.push({
308
+ entry: targetCompact,
309
+ before: before.reverse(), // Chronological order
310
+ after,
311
+ totalTokens,
312
+ });
313
+ }
314
+ return results;
315
+ }
316
+ /**
317
+ * Layer 3: Full Search
318
+ *
319
+ * Returns complete entry data for selected IDs.
320
+ * Only fetch what you need after filtering.
321
+ */
322
+ async getFull(ids) {
323
+ if (ids.length === 0)
324
+ return [];
325
+ const placeholders = ids.map(() => '?').join(', ');
326
+ const rows = this.db.prepare(`
327
+ SELECT * FROM memory_entries WHERE id IN (${placeholders})
328
+ `).all(...ids);
329
+ const entries = rows.map(row => this.rowToEntry(row));
330
+ // Sort by original order
331
+ const orderMap = new Map(ids.map((id, i) => [id, i]));
332
+ entries.sort((a, b) => (orderMap.get(a.id) || 0) - (orderMap.get(b.id) || 0));
333
+ return entries;
334
+ }
335
+ /**
336
+ * Full hybrid search with token economics
337
+ *
338
+ * Combines all three layers with detailed metrics.
339
+ */
340
+ async search(query, options = {}) {
341
+ const startTime = performance.now();
342
+ const limit = options.limit || 10;
343
+ // Layer 1: Compact search
344
+ const keywordStart = performance.now();
345
+ const compact = await this.searchCompact(query, {
346
+ limit: this.config.maxResultsPerLayer,
347
+ namespace: options.namespace,
348
+ });
349
+ const keywordTime = performance.now() - keywordStart;
350
+ // Calculate token economics
351
+ const compactTokens = compact.reduce((sum, r) => sum + r.estimatedTokens, 0);
352
+ // Layer 3: Fetch full results if requested
353
+ const semanticStart = performance.now();
354
+ let results = [];
355
+ let fullTokens = 0;
356
+ if (options.fetchFull !== false) {
357
+ const topIds = compact.slice(0, limit).map((r) => r.id);
358
+ const fullEntries = await this.getFull(topIds);
359
+ results = fullEntries.map((entry, i) => ({
360
+ entry,
361
+ score: compact[i]?.score || 0,
362
+ distance: 1 - (compact[i]?.score || 0),
363
+ }));
364
+ fullTokens = fullEntries.reduce((sum, e) => sum + estimateTokens(e.content), 0);
365
+ }
366
+ const semanticTime = performance.now() - semanticStart;
367
+ const totalTime = performance.now() - startTime;
368
+ // Calculate savings
369
+ const fullResultTokens = compact.reduce((sum, r) => sum + r.estimatedTokens, 0);
370
+ const actualTokens = options.fetchFull !== false ? fullTokens : compactTokens / 10;
371
+ const tokensSaved = fullResultTokens - actualTokens;
372
+ const savingsPercent = fullResultTokens > 0 ? (tokensSaved / fullResultTokens) * 100 : 0;
373
+ return {
374
+ results,
375
+ compact: compact.slice(0, limit),
376
+ economics: {
377
+ tokensSaved: Math.max(0, tokensSaved),
378
+ fullResultTokens,
379
+ actualTokens,
380
+ savingsPercent: Math.max(0, savingsPercent),
381
+ layers: {
382
+ compact: compact.length,
383
+ timeline: 0,
384
+ full: results.length,
385
+ },
386
+ },
387
+ timing: {
388
+ keywordMs: keywordTime,
389
+ semanticMs: semanticTime,
390
+ fusionMs: 0,
391
+ totalMs: totalTime,
392
+ },
393
+ };
394
+ }
395
+ /**
396
+ * Check if text contains CJK characters
397
+ * CJK requires special handling (LIKE or trigram tokenizer)
398
+ */
399
+ containsCJK(text) {
400
+ // Unicode ranges for CJK characters
401
+ // - CJK Unified Ideographs: \u4E00-\u9FFF
402
+ // - Hiragana: \u3040-\u309F
403
+ // - Katakana: \u30A0-\u30FF
404
+ // - Hangul: \uAC00-\uD7AF
405
+ // - CJK Extension: \u3400-\u4DBF
406
+ return /[\u4E00-\u9FFF\u3040-\u309F\u30A0-\u30FF\uAC00-\uD7AF\u3400-\u4DBF]/.test(text);
407
+ }
408
+ /**
409
+ * Keyword search using FTS5 (with LIKE fallback)
410
+ *
411
+ * For CJK languages, automatically falls back to LIKE search
412
+ * unless trigram tokenizer is available.
413
+ */
414
+ async keywordSearch(query, limit, namespace) {
415
+ // Use LIKE fallback if FTS5 not available
416
+ if (!this.ftsAvailable) {
417
+ return this.likeSearch(query, limit, namespace);
418
+ }
419
+ // For CJK queries, use LIKE fallback unless trigram tokenizer is actually active
420
+ // (unicode61 tokenizer doesn't work with CJK - no word boundaries)
421
+ if (this.containsCJK(query) && this.activeTokenizer !== 'trigram') {
422
+ return this.likeSearch(query, limit, namespace);
423
+ }
424
+ // Trigram tokenizer requires at least 3 characters to match
425
+ // For short CJK queries (< 3 chars), fall back to LIKE search
426
+ if (this.activeTokenizer === 'trigram' && this.containsCJK(query)) {
427
+ const cjkChars = query.match(/[\u4E00-\u9FFF\u3040-\u309F\u30A0-\u30FF\uAC00-\uD7AF\u3400-\u4DBF]/g);
428
+ if (cjkChars && cjkChars.length < 3) {
429
+ return this.likeSearch(query, limit, namespace);
430
+ }
431
+ }
432
+ // Sanitize query for FTS5
433
+ const sanitizedQuery = this.sanitizeFtsQuery(query);
434
+ if (!sanitizedQuery)
435
+ return [];
436
+ try {
437
+ let rows;
438
+ if (namespace) {
439
+ rows = this.db.prepare(`
440
+ SELECT
441
+ m.id, m.key, m.namespace, m.content,
442
+ bm25(memory_fts) as rank
443
+ FROM memory_fts f
444
+ JOIN memory_entries m ON f.rowid = m.rowid
445
+ WHERE memory_fts MATCH ? AND m.namespace = ?
446
+ ORDER BY rank
447
+ LIMIT ?
448
+ `).all(sanitizedQuery, namespace, limit);
449
+ }
450
+ else {
451
+ rows = this.db.prepare(`
452
+ SELECT
453
+ m.id, m.key, m.namespace, m.content,
454
+ bm25(memory_fts) as rank
455
+ FROM memory_fts f
456
+ JOIN memory_entries m ON f.rowid = m.rowid
457
+ WHERE memory_fts MATCH ?
458
+ ORDER BY rank
459
+ LIMIT ?
460
+ `).all(sanitizedQuery, limit);
461
+ }
462
+ return rows.map(row => {
463
+ // Normalize BM25 score (negative, closer to 0 is better)
464
+ // Convert to 0-1 scale where 1 is best
465
+ const keywordScore = Math.min(1, Math.max(0, 1 + row.rank / 10));
466
+ return {
467
+ id: row.id,
468
+ key: row.key,
469
+ namespace: row.namespace,
470
+ score: keywordScore * this.config.keywordWeight,
471
+ keywordScore,
472
+ semanticScore: 0,
473
+ snippet: row.content.substring(0, 100),
474
+ estimatedTokens: estimateTokens(row.content),
475
+ };
476
+ });
477
+ }
478
+ catch (error) {
479
+ // Fall back to LIKE search on error
480
+ if (this.config.fallbackToLike) {
481
+ return this.likeSearch(query, limit, namespace);
482
+ }
483
+ throw error;
484
+ }
485
+ }
486
+ /**
487
+ * LIKE-based search fallback (works without FTS5)
488
+ *
489
+ * Less efficient but supports all languages.
490
+ */
491
+ likeSearch(query, limit, namespace) {
492
+ // Handle empty query
493
+ const trimmedQuery = query.trim();
494
+ if (!trimmedQuery)
495
+ return [];
496
+ const searchPattern = `%${trimmedQuery}%`;
497
+ let rows;
498
+ if (namespace) {
499
+ rows = this.db.prepare(`
500
+ SELECT id, key, namespace, content
501
+ FROM memory_entries
502
+ WHERE (content LIKE ? OR key LIKE ? OR tags LIKE ?)
503
+ AND namespace = ?
504
+ ORDER BY created_at DESC
505
+ LIMIT ?
506
+ `).all(searchPattern, searchPattern, searchPattern, namespace, limit);
507
+ }
508
+ else {
509
+ rows = this.db.prepare(`
510
+ SELECT id, key, namespace, content
511
+ FROM memory_entries
512
+ WHERE content LIKE ? OR key LIKE ? OR tags LIKE ?
513
+ ORDER BY created_at DESC
514
+ LIMIT ?
515
+ `).all(searchPattern, searchPattern, searchPattern, limit);
516
+ }
517
+ return rows.map(row => {
518
+ // Simple scoring based on match position
519
+ const lowerContent = row.content.toLowerCase();
520
+ const lowerQuery = query.toLowerCase();
521
+ const matchIndex = lowerContent.indexOf(lowerQuery);
522
+ const keywordScore = matchIndex >= 0 ? Math.max(0.3, 1 - matchIndex / 1000) : 0.5;
523
+ return {
524
+ id: row.id,
525
+ key: row.key,
526
+ namespace: row.namespace,
527
+ score: keywordScore * this.config.keywordWeight,
528
+ keywordScore,
529
+ semanticScore: 0,
530
+ snippet: row.content.substring(0, 100),
531
+ estimatedTokens: estimateTokens(row.content),
532
+ };
533
+ });
534
+ }
535
+ /**
536
+ * Semantic search returning compact results
537
+ */
538
+ async semanticSearchCompact(query, limit, namespace) {
539
+ if (!this.embeddingGenerator)
540
+ return [];
541
+ // Generate query embedding
542
+ const queryEmbedding = await this.embeddingGenerator(query);
543
+ // Get all entries with embeddings
544
+ let rows;
545
+ if (namespace) {
546
+ rows = this.db.prepare(`
547
+ SELECT id, key, namespace, content, embedding
548
+ FROM memory_entries
549
+ WHERE embedding IS NOT NULL AND namespace = ?
550
+ `).all(namespace);
551
+ }
552
+ else {
553
+ rows = this.db.prepare(`
554
+ SELECT id, key, namespace, content, embedding
555
+ FROM memory_entries
556
+ WHERE embedding IS NOT NULL
557
+ `).all();
558
+ }
559
+ const candidates = [];
560
+ for (const row of rows) {
561
+ if (row.embedding) {
562
+ const embedding = new Float32Array(row.embedding.buffer.slice(row.embedding.byteOffset, row.embedding.byteOffset + row.embedding.byteLength));
563
+ const similarity = this.cosineSimilarity(queryEmbedding, embedding);
564
+ candidates.push({
565
+ id: row.id,
566
+ key: row.key,
567
+ namespace: row.namespace,
568
+ content: row.content,
569
+ similarity,
570
+ });
571
+ }
572
+ }
573
+ // Sort by similarity and take top results
574
+ candidates.sort((a, b) => b.similarity - a.similarity);
575
+ return candidates.slice(0, limit).map((c) => ({
576
+ id: c.id,
577
+ key: c.key,
578
+ namespace: c.namespace,
579
+ score: c.similarity * this.config.semanticWeight,
580
+ keywordScore: 0,
581
+ semanticScore: c.similarity,
582
+ snippet: c.content.substring(0, 100),
583
+ estimatedTokens: estimateTokens(c.content),
584
+ }));
585
+ }
586
+ /**
587
+ * Fuse keyword and semantic scores
588
+ */
589
+ fuseScores(keywordScore, semanticScore) {
590
+ return (keywordScore * this.config.keywordWeight +
591
+ semanticScore * this.config.semanticWeight);
592
+ }
593
+ /**
594
+ * Calculate cosine similarity between two vectors
595
+ */
596
+ cosineSimilarity(a, b) {
597
+ if (a.length !== b.length)
598
+ return 0;
599
+ let dotProduct = 0;
600
+ let normA = 0;
601
+ let normB = 0;
602
+ for (let i = 0; i < a.length; i++) {
603
+ dotProduct += a[i] * b[i];
604
+ normA += a[i] * a[i];
605
+ normB += b[i] * b[i];
606
+ }
607
+ const denominator = Math.sqrt(normA) * Math.sqrt(normB);
608
+ return denominator === 0 ? 0 : dotProduct / denominator;
609
+ }
610
+ /**
611
+ * Sanitize query for FTS5
612
+ * Preserves CJK characters (Japanese, Chinese, Korean) and basic alphanumerics
613
+ */
614
+ sanitizeFtsQuery(query) {
615
+ // For trigram tokenizer, preserve CJK characters
616
+ // Remove only FTS5 special operators: AND, OR, NOT, *, ^, :, ", (, )
617
+ // Keep Unicode letters, digits, and spaces
618
+ const sanitized = query
619
+ // Remove FTS5 special characters and operators, but preserve Unicode letters
620
+ .replace(/[*^:"()]/g, ' ')
621
+ .replace(/\bAND\b|\bOR\b|\bNOT\b|\bNEAR\b/gi, ' ')
622
+ .trim();
623
+ if (!sanitized)
624
+ return '';
625
+ // For trigram tokenizer with CJK, we can pass the text directly
626
+ // The trigram tokenizer handles the text as-is
627
+ if (this.activeTokenizer === 'trigram') {
628
+ // With trigram, wrap the entire query in quotes for phrase matching
629
+ return `"${sanitized}"`;
630
+ }
631
+ // For other tokenizers, split into terms and wrap each
632
+ return sanitized
633
+ .split(/\s+/)
634
+ .filter((term) => term.length > 0)
635
+ .map((term) => `"${term}"`)
636
+ .join(' OR ');
637
+ }
638
+ /**
639
+ * Convert database row to MemoryEntry
640
+ */
641
+ rowToEntry(row) {
642
+ let embedding;
643
+ if (row.embedding) {
644
+ const embeddingData = row.embedding;
645
+ embedding = new Float32Array(embeddingData.buffer.slice(embeddingData.byteOffset, embeddingData.byteOffset + embeddingData.byteLength));
646
+ }
647
+ return {
648
+ id: row.id,
649
+ key: row.key,
650
+ content: row.content,
651
+ embedding,
652
+ type: row.type,
653
+ namespace: row.namespace,
654
+ tags: JSON.parse(row.tags || '[]'),
655
+ metadata: JSON.parse(row.metadata || '{}'),
656
+ sessionId: row.session_id,
657
+ ownerId: row.owner_id,
658
+ accessLevel: row.access_level,
659
+ createdAt: row.created_at,
660
+ updatedAt: row.updated_at,
661
+ expiresAt: row.expires_at,
662
+ version: row.version,
663
+ references: JSON.parse(row.references || '[]'),
664
+ accessCount: row.access_count,
665
+ lastAccessedAt: row.last_accessed_at,
666
+ };
667
+ }
668
+ /**
669
+ * Get configuration
670
+ */
671
+ getConfig() {
672
+ return { ...this.config };
673
+ }
674
+ /**
675
+ * Update configuration
676
+ */
677
+ updateConfig(config) {
678
+ this.config = { ...this.config, ...config };
679
+ }
680
+ }
681
+ /**
682
+ * Create a hybrid search engine
683
+ */
684
+ export function createHybridSearchEngine(db, config, embeddingGenerator) {
685
+ return new HybridSearchEngine(db, config, embeddingGenerator);
686
+ }
687
+ export default HybridSearchEngine;
688
+ //# sourceMappingURL=hybrid-search.js.map