@aitytech/agentkits-memory 1.0.1 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +54 -5
- package/dist/better-sqlite3-backend.d.ts +192 -0
- package/dist/better-sqlite3-backend.d.ts.map +1 -0
- package/dist/better-sqlite3-backend.js +801 -0
- package/dist/better-sqlite3-backend.js.map +1 -0
- package/dist/cli/save.js +0 -0
- package/dist/cli/setup.d.ts +6 -2
- package/dist/cli/setup.d.ts.map +1 -1
- package/dist/cli/setup.js +289 -42
- package/dist/cli/setup.js.map +1 -1
- package/dist/cli/viewer.js +25 -56
- package/dist/cli/viewer.js.map +1 -1
- package/dist/cli/web-viewer.d.ts +2 -1
- package/dist/cli/web-viewer.d.ts.map +1 -1
- package/dist/cli/web-viewer.js +791 -141
- package/dist/cli/web-viewer.js.map +1 -1
- package/dist/embeddings/embedding-cache.d.ts +131 -0
- package/dist/embeddings/embedding-cache.d.ts.map +1 -0
- package/dist/embeddings/embedding-cache.js +217 -0
- package/dist/embeddings/embedding-cache.js.map +1 -0
- package/dist/embeddings/index.d.ts +11 -0
- package/dist/embeddings/index.d.ts.map +1 -0
- package/dist/embeddings/index.js +11 -0
- package/dist/embeddings/index.js.map +1 -0
- package/dist/embeddings/local-embeddings.d.ts +140 -0
- package/dist/embeddings/local-embeddings.d.ts.map +1 -0
- package/dist/embeddings/local-embeddings.js +293 -0
- package/dist/embeddings/local-embeddings.js.map +1 -0
- package/dist/hooks/context.d.ts +6 -1
- package/dist/hooks/context.d.ts.map +1 -1
- package/dist/hooks/context.js +12 -2
- package/dist/hooks/context.js.map +1 -1
- package/dist/hooks/observation.d.ts +6 -1
- package/dist/hooks/observation.d.ts.map +1 -1
- package/dist/hooks/observation.js +12 -2
- package/dist/hooks/observation.js.map +1 -1
- package/dist/hooks/service.d.ts +1 -6
- package/dist/hooks/service.d.ts.map +1 -1
- package/dist/hooks/service.js +33 -85
- package/dist/hooks/service.js.map +1 -1
- package/dist/hooks/session-init.d.ts +6 -1
- package/dist/hooks/session-init.d.ts.map +1 -1
- package/dist/hooks/session-init.js +12 -2
- package/dist/hooks/session-init.js.map +1 -1
- package/dist/hooks/summarize.d.ts +6 -1
- package/dist/hooks/summarize.d.ts.map +1 -1
- package/dist/hooks/summarize.js +12 -2
- package/dist/hooks/summarize.js.map +1 -1
- package/dist/index.d.ts +10 -17
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +172 -94
- package/dist/index.js.map +1 -1
- package/dist/mcp/server.js +17 -3
- package/dist/mcp/server.js.map +1 -1
- package/dist/migration.js +3 -3
- package/dist/migration.js.map +1 -1
- package/dist/search/hybrid-search.d.ts +262 -0
- package/dist/search/hybrid-search.d.ts.map +1 -0
- package/dist/search/hybrid-search.js +688 -0
- package/dist/search/hybrid-search.js.map +1 -0
- package/dist/search/index.d.ts +13 -0
- package/dist/search/index.d.ts.map +1 -0
- package/dist/search/index.js +13 -0
- package/dist/search/index.js.map +1 -0
- package/dist/search/token-economics.d.ts +161 -0
- package/dist/search/token-economics.d.ts.map +1 -0
- package/dist/search/token-economics.js +239 -0
- package/dist/search/token-economics.js.map +1 -0
- package/dist/types.d.ts +0 -68
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js.map +1 -1
- package/package.json +6 -4
- package/src/__tests__/better-sqlite3-backend.test.ts +1466 -0
- package/src/__tests__/cache-manager.test.ts +499 -0
- package/src/__tests__/embedding-integration.test.ts +481 -0
- package/src/__tests__/hnsw-index.test.ts +727 -0
- package/src/__tests__/index.test.ts +432 -0
- package/src/better-sqlite3-backend.ts +1000 -0
- package/src/cli/setup.ts +358 -47
- package/src/cli/viewer.ts +28 -63
- package/src/cli/web-viewer.ts +936 -182
- package/src/embeddings/__tests__/embedding-cache.test.ts +269 -0
- package/src/embeddings/__tests__/local-embeddings.test.ts +495 -0
- package/src/embeddings/embedding-cache.ts +318 -0
- package/src/embeddings/index.ts +20 -0
- package/src/embeddings/local-embeddings.ts +419 -0
- package/src/hooks/__tests__/handlers.test.ts +58 -17
- package/src/hooks/__tests__/integration.test.ts +77 -26
- package/src/hooks/context.ts +13 -2
- package/src/hooks/observation.ts +13 -2
- package/src/hooks/service.ts +39 -100
- package/src/hooks/session-init.ts +13 -2
- package/src/hooks/summarize.ts +13 -2
- package/src/index.ts +210 -116
- package/src/mcp/server.ts +20 -3
- package/src/search/__tests__/hybrid-search.test.ts +669 -0
- package/src/search/__tests__/token-economics.test.ts +276 -0
- package/src/search/hybrid-search.ts +968 -0
- package/src/search/index.ts +29 -0
- package/src/search/token-economics.ts +367 -0
- package/src/types.ts +0 -96
- package/src/__tests__/sqljs-backend.test.ts +0 -410
- package/src/migration.ts +0 -574
- package/src/sql.js.d.ts +0 -70
- package/src/sqljs-backend.ts +0 -789
|
@@ -0,0 +1,968 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Hybrid Search Module
|
|
3
|
+
*
|
|
4
|
+
* Combines FTS5 keyword search with vector semantic search
|
|
5
|
+
* for improved recall (15-20% better than either alone).
|
|
6
|
+
*
|
|
7
|
+
* Features:
|
|
8
|
+
* - SQLite FTS5 full-text search with trigram tokenizer (CJK support)
|
|
9
|
+
* - Score fusion (α*keyword + β*semantic)
|
|
10
|
+
* - 3-layer search workflow for token efficiency
|
|
11
|
+
* - Token economics tracking
|
|
12
|
+
*
|
|
13
|
+
* CJK Language Support:
|
|
14
|
+
* Uses trigram tokenizer which works for Japanese, Chinese, Korean
|
|
15
|
+
* by matching substrings instead of requiring word boundaries.
|
|
16
|
+
*
|
|
17
|
+
* @module @aitytech/agentkits-memory/search
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
import type { Database as BetterDatabase } from 'better-sqlite3';
|
|
21
|
+
import type { MemoryEntry, SearchResult, EmbeddingGenerator } from '../types.js';
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* Hybrid search configuration
|
|
25
|
+
*/
|
|
26
|
+
export interface HybridSearchConfig {
|
|
27
|
+
/** Weight for keyword/FTS5 score (0-1, default: 0.3) */
|
|
28
|
+
keywordWeight: number;
|
|
29
|
+
|
|
30
|
+
/** Weight for semantic/vector score (0-1, default: 0.7) */
|
|
31
|
+
semanticWeight: number;
|
|
32
|
+
|
|
33
|
+
/** Minimum combined score threshold (0-1, default: 0.1) */
|
|
34
|
+
minScore: number;
|
|
35
|
+
|
|
36
|
+
/** Enable BM25 scoring for FTS5 (default: true) */
|
|
37
|
+
useBM25: boolean;
|
|
38
|
+
|
|
39
|
+
/** Maximum results per search layer (default: 100) */
|
|
40
|
+
maxResultsPerLayer: number;
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* FTS5 tokenizer to use (default: 'trigram')
|
|
44
|
+
* - 'trigram': Best for CJK languages (Japanese, Chinese, Korean)
|
|
45
|
+
* - 'unicode61': Standard tokenizer, English/Latin only
|
|
46
|
+
* - 'porter': Stemming for English
|
|
47
|
+
*/
|
|
48
|
+
tokenizer: 'trigram' | 'unicode61' | 'porter';
|
|
49
|
+
|
|
50
|
+
/** Fall back to LIKE search if FTS5 unavailable (default: true) */
|
|
51
|
+
fallbackToLike: boolean;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* Compact search result (Layer 1)
|
|
56
|
+
* Minimal data for initial filtering - saves tokens
|
|
57
|
+
*/
|
|
58
|
+
export interface CompactSearchResult {
|
|
59
|
+
/** Entry ID */
|
|
60
|
+
id: string;
|
|
61
|
+
|
|
62
|
+
/** Entry key */
|
|
63
|
+
key: string;
|
|
64
|
+
|
|
65
|
+
/** Namespace */
|
|
66
|
+
namespace: string;
|
|
67
|
+
|
|
68
|
+
/** Combined relevance score (0-1) */
|
|
69
|
+
score: number;
|
|
70
|
+
|
|
71
|
+
/** Keyword match score */
|
|
72
|
+
keywordScore: number;
|
|
73
|
+
|
|
74
|
+
/** Semantic similarity score */
|
|
75
|
+
semanticScore: number;
|
|
76
|
+
|
|
77
|
+
/** Preview snippet (first 100 chars) */
|
|
78
|
+
snippet: string;
|
|
79
|
+
|
|
80
|
+
/** Estimated token count */
|
|
81
|
+
estimatedTokens: number;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
/**
|
|
85
|
+
* Timeline result (Layer 2)
|
|
86
|
+
* Context around search results
|
|
87
|
+
*/
|
|
88
|
+
export interface TimelineResult {
|
|
89
|
+
/** The target entry */
|
|
90
|
+
entry: CompactSearchResult;
|
|
91
|
+
|
|
92
|
+
/** Related entries before (chronologically) */
|
|
93
|
+
before: CompactSearchResult[];
|
|
94
|
+
|
|
95
|
+
/** Related entries after (chronologically) */
|
|
96
|
+
after: CompactSearchResult[];
|
|
97
|
+
|
|
98
|
+
/** Total context window tokens */
|
|
99
|
+
totalTokens: number;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
/**
|
|
103
|
+
* Token economics for search operations
|
|
104
|
+
*/
|
|
105
|
+
export interface TokenEconomics {
|
|
106
|
+
/** Tokens saved by using compact results */
|
|
107
|
+
tokensSaved: number;
|
|
108
|
+
|
|
109
|
+
/** Tokens that would be used with full results */
|
|
110
|
+
fullResultTokens: number;
|
|
111
|
+
|
|
112
|
+
/** Actual tokens used */
|
|
113
|
+
actualTokens: number;
|
|
114
|
+
|
|
115
|
+
/** Savings percentage */
|
|
116
|
+
savingsPercent: number;
|
|
117
|
+
|
|
118
|
+
/** Layer breakdown */
|
|
119
|
+
layers: {
|
|
120
|
+
compact: number;
|
|
121
|
+
timeline: number;
|
|
122
|
+
full: number;
|
|
123
|
+
};
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
/**
|
|
127
|
+
* Full search result with economics
|
|
128
|
+
*/
|
|
129
|
+
export interface HybridSearchResult {
|
|
130
|
+
/** Search results */
|
|
131
|
+
results: SearchResult[];
|
|
132
|
+
|
|
133
|
+
/** Compact results (layer 1) */
|
|
134
|
+
compact: CompactSearchResult[];
|
|
135
|
+
|
|
136
|
+
/** Token economics */
|
|
137
|
+
economics: TokenEconomics;
|
|
138
|
+
|
|
139
|
+
/** Search timing */
|
|
140
|
+
timing: {
|
|
141
|
+
keywordMs: number;
|
|
142
|
+
semanticMs: number;
|
|
143
|
+
fusionMs: number;
|
|
144
|
+
totalMs: number;
|
|
145
|
+
};
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
/**
|
|
149
|
+
* Default hybrid search configuration
|
|
150
|
+
*/
|
|
151
|
+
const DEFAULT_CONFIG: HybridSearchConfig = {
|
|
152
|
+
keywordWeight: 0.3,
|
|
153
|
+
semanticWeight: 0.7,
|
|
154
|
+
minScore: 0.1,
|
|
155
|
+
useBM25: true,
|
|
156
|
+
maxResultsPerLayer: 100,
|
|
157
|
+
tokenizer: 'trigram', // Best for CJK languages
|
|
158
|
+
fallbackToLike: true,
|
|
159
|
+
};
|
|
160
|
+
|
|
161
|
+
/**
|
|
162
|
+
* Estimate token count for text (rough approximation)
|
|
163
|
+
* Uses ~4 chars per token as average for English text
|
|
164
|
+
*/
|
|
165
|
+
function estimateTokens(text: string): number {
|
|
166
|
+
return Math.ceil(text.length / 4);
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
/**
|
|
170
|
+
* Hybrid Search Engine
|
|
171
|
+
*
|
|
172
|
+
* Provides enterprise-grade search combining keyword and semantic search
|
|
173
|
+
* with token-efficient 3-layer retrieval workflow.
|
|
174
|
+
*
|
|
175
|
+
* Supports CJK languages (Japanese, Chinese, Korean) via trigram tokenizer.
|
|
176
|
+
*/
|
|
177
|
+
export class HybridSearchEngine {
|
|
178
|
+
private db: BetterDatabase;
|
|
179
|
+
private config: HybridSearchConfig;
|
|
180
|
+
private embeddingGenerator?: EmbeddingGenerator;
|
|
181
|
+
private ftsInitialized = false;
|
|
182
|
+
private ftsAvailable = false;
|
|
183
|
+
/** The actual tokenizer being used (may differ from config if tokenizer not available) */
|
|
184
|
+
private activeTokenizer: 'trigram' | 'unicode61' | 'porter' | null = null;
|
|
185
|
+
|
|
186
|
+
constructor(
|
|
187
|
+
db: BetterDatabase,
|
|
188
|
+
config: Partial<HybridSearchConfig> = {},
|
|
189
|
+
embeddingGenerator?: EmbeddingGenerator
|
|
190
|
+
) {
|
|
191
|
+
this.db = db;
|
|
192
|
+
this.config = { ...DEFAULT_CONFIG, ...config };
|
|
193
|
+
this.embeddingGenerator = embeddingGenerator;
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
/**
|
|
197
|
+
* Check if FTS5 is available in this SQLite build
|
|
198
|
+
*/
|
|
199
|
+
private checkFts5Available(): boolean {
|
|
200
|
+
try {
|
|
201
|
+
// Try to create a minimal FTS5 table
|
|
202
|
+
this.db.exec(`CREATE VIRTUAL TABLE IF NOT EXISTS _fts5_check USING fts5(test)`);
|
|
203
|
+
this.db.exec(`DROP TABLE IF EXISTS _fts5_check`);
|
|
204
|
+
return true;
|
|
205
|
+
} catch {
|
|
206
|
+
return false;
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
/**
|
|
211
|
+
* Check if a specific tokenizer is available
|
|
212
|
+
*/
|
|
213
|
+
private checkTokenizerAvailable(tokenizer: string): boolean {
|
|
214
|
+
try {
|
|
215
|
+
this.db.exec(`CREATE VIRTUAL TABLE IF NOT EXISTS _tokenizer_check USING fts5(test, ${tokenizer})`);
|
|
216
|
+
this.db.exec(`DROP TABLE IF EXISTS _tokenizer_check`);
|
|
217
|
+
return true;
|
|
218
|
+
} catch {
|
|
219
|
+
return false;
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
/**
|
|
224
|
+
* Get the best available tokenizer for FTS5
|
|
225
|
+
* Tries trigram first (best for CJK), then unicode61, then porter
|
|
226
|
+
* Also sets the activeTokenizer field
|
|
227
|
+
*/
|
|
228
|
+
private getBestTokenizer(): string {
|
|
229
|
+
// Try tokenizers in order of preference for CJK support
|
|
230
|
+
if (this.config.tokenizer === 'trigram' && this.checkTokenizerAvailable("tokenize='trigram'")) {
|
|
231
|
+
this.activeTokenizer = 'trigram';
|
|
232
|
+
return "tokenize='trigram'";
|
|
233
|
+
}
|
|
234
|
+
if (this.config.tokenizer === 'porter' && this.checkTokenizerAvailable("tokenize='porter unicode61'")) {
|
|
235
|
+
this.activeTokenizer = 'porter';
|
|
236
|
+
return "tokenize='porter unicode61'";
|
|
237
|
+
}
|
|
238
|
+
// Default to unicode61 which should always be available
|
|
239
|
+
this.activeTokenizer = 'unicode61';
|
|
240
|
+
return "tokenize='unicode61'";
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
/**
|
|
244
|
+
* Initialize FTS5 virtual table
|
|
245
|
+
* Note: For best CJK support, use better-sqlite3 which includes trigram tokenizer.
|
|
246
|
+
*/
|
|
247
|
+
async initialize(): Promise<void> {
|
|
248
|
+
if (this.ftsInitialized) return;
|
|
249
|
+
|
|
250
|
+
// Check if FTS5 is available
|
|
251
|
+
this.ftsAvailable = this.checkFts5Available();
|
|
252
|
+
|
|
253
|
+
if (!this.ftsAvailable) {
|
|
254
|
+
console.warn(
|
|
255
|
+
'[HybridSearch] FTS5 not available in this SQLite build. ' +
|
|
256
|
+
'Falling back to LIKE search.'
|
|
257
|
+
);
|
|
258
|
+
this.ftsInitialized = true;
|
|
259
|
+
return;
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
try {
|
|
263
|
+
// Get the best available tokenizer
|
|
264
|
+
const tokenizer = this.getBestTokenizer();
|
|
265
|
+
|
|
266
|
+
// Create FTS5 virtual table for full-text search
|
|
267
|
+
// Uses content= to sync with main table
|
|
268
|
+
// trigram tokenizer provides substring matching for CJK languages
|
|
269
|
+
this.db.exec(`
|
|
270
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS memory_fts USING fts5(
|
|
271
|
+
key,
|
|
272
|
+
content,
|
|
273
|
+
namespace,
|
|
274
|
+
tags,
|
|
275
|
+
content=memory_entries,
|
|
276
|
+
content_rowid=rowid,
|
|
277
|
+
${tokenizer}
|
|
278
|
+
)
|
|
279
|
+
`);
|
|
280
|
+
|
|
281
|
+
// Create triggers to keep FTS in sync with main table
|
|
282
|
+
this.db.exec(`
|
|
283
|
+
CREATE TRIGGER IF NOT EXISTS memory_fts_insert AFTER INSERT ON memory_entries BEGIN
|
|
284
|
+
INSERT INTO memory_fts(rowid, key, content, namespace, tags)
|
|
285
|
+
VALUES (NEW.rowid, NEW.key, NEW.content, NEW.namespace, NEW.tags);
|
|
286
|
+
END
|
|
287
|
+
`);
|
|
288
|
+
|
|
289
|
+
this.db.exec(`
|
|
290
|
+
CREATE TRIGGER IF NOT EXISTS memory_fts_delete AFTER DELETE ON memory_entries BEGIN
|
|
291
|
+
INSERT INTO memory_fts(memory_fts, rowid, key, content, namespace, tags)
|
|
292
|
+
VALUES ('delete', OLD.rowid, OLD.key, OLD.content, OLD.namespace, OLD.tags);
|
|
293
|
+
END
|
|
294
|
+
`);
|
|
295
|
+
|
|
296
|
+
this.db.exec(`
|
|
297
|
+
CREATE TRIGGER IF NOT EXISTS memory_fts_update AFTER UPDATE ON memory_entries BEGIN
|
|
298
|
+
INSERT INTO memory_fts(memory_fts, rowid, key, content, namespace, tags)
|
|
299
|
+
VALUES ('delete', OLD.rowid, OLD.key, OLD.content, OLD.namespace, OLD.tags);
|
|
300
|
+
INSERT INTO memory_fts(rowid, key, content, namespace, tags)
|
|
301
|
+
VALUES (NEW.rowid, NEW.key, NEW.content, NEW.namespace, NEW.tags);
|
|
302
|
+
END
|
|
303
|
+
`);
|
|
304
|
+
|
|
305
|
+
// Rebuild FTS index from existing data
|
|
306
|
+
await this.rebuildFtsIndex();
|
|
307
|
+
} catch (error) {
|
|
308
|
+
console.warn('[HybridSearch] Failed to initialize FTS5:', error);
|
|
309
|
+
this.ftsAvailable = false;
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
this.ftsInitialized = true;
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
/**
|
|
316
|
+
* Check if FTS5 is available and initialized
|
|
317
|
+
*/
|
|
318
|
+
isFtsAvailable(): boolean {
|
|
319
|
+
return this.ftsAvailable;
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
/**
|
|
323
|
+
* Get the active tokenizer being used
|
|
324
|
+
* Returns null if FTS5 is not available
|
|
325
|
+
*/
|
|
326
|
+
getActiveTokenizer(): 'trigram' | 'unicode61' | 'porter' | null {
|
|
327
|
+
return this.activeTokenizer;
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
/**
|
|
331
|
+
* Check if CJK search is fully supported (requires trigram tokenizer)
|
|
332
|
+
* If not, CJK queries will fall back to LIKE search
|
|
333
|
+
*/
|
|
334
|
+
isCjkOptimized(): boolean {
|
|
335
|
+
return this.ftsAvailable && this.activeTokenizer === 'trigram';
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
/**
|
|
339
|
+
* Rebuild FTS index from existing memory entries
|
|
340
|
+
* Uses the FTS5 'rebuild' command for content-synced tables
|
|
341
|
+
*/
|
|
342
|
+
async rebuildFtsIndex(): Promise<void> {
|
|
343
|
+
if (!this.ftsAvailable) return;
|
|
344
|
+
|
|
345
|
+
try {
|
|
346
|
+
// For content-synced FTS5 tables (using content=memory_entries),
|
|
347
|
+
// use the 'rebuild' command which re-reads from the content table
|
|
348
|
+
this.db.exec(`INSERT INTO memory_fts(memory_fts) VALUES('rebuild')`);
|
|
349
|
+
} catch (error) {
|
|
350
|
+
console.warn('[HybridSearch] Failed to rebuild FTS index:', error);
|
|
351
|
+
}
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
/**
|
|
355
|
+
* Layer 1: Compact Search
|
|
356
|
+
*
|
|
357
|
+
* Returns minimal data for initial filtering.
|
|
358
|
+
* ~10x token savings vs full results.
|
|
359
|
+
*/
|
|
360
|
+
async searchCompact(
|
|
361
|
+
query: string,
|
|
362
|
+
options: {
|
|
363
|
+
limit?: number;
|
|
364
|
+
namespace?: string;
|
|
365
|
+
includeKeyword?: boolean;
|
|
366
|
+
includeSemantic?: boolean;
|
|
367
|
+
} = {}
|
|
368
|
+
): Promise<CompactSearchResult[]> {
|
|
369
|
+
const limit = options.limit || this.config.maxResultsPerLayer;
|
|
370
|
+
const includeKeyword = options.includeKeyword ?? true;
|
|
371
|
+
const includeSemantic = options.includeSemantic ?? !!this.embeddingGenerator;
|
|
372
|
+
|
|
373
|
+
const results: Map<string, CompactSearchResult> = new Map();
|
|
374
|
+
|
|
375
|
+
// Keyword search with FTS5
|
|
376
|
+
if (includeKeyword) {
|
|
377
|
+
const keywordResults = await this.keywordSearch(query, limit, options.namespace);
|
|
378
|
+
for (const result of keywordResults) {
|
|
379
|
+
results.set(result.id, result);
|
|
380
|
+
}
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
// Semantic search with embeddings
|
|
384
|
+
if (includeSemantic && this.embeddingGenerator) {
|
|
385
|
+
const semanticResults = await this.semanticSearchCompact(query, limit, options.namespace);
|
|
386
|
+
for (const result of semanticResults) {
|
|
387
|
+
const existing = results.get(result.id);
|
|
388
|
+
if (existing) {
|
|
389
|
+
// Merge scores using fusion
|
|
390
|
+
existing.semanticScore = result.semanticScore;
|
|
391
|
+
existing.score = this.fuseScores(existing.keywordScore, result.semanticScore);
|
|
392
|
+
} else {
|
|
393
|
+
results.set(result.id, result);
|
|
394
|
+
}
|
|
395
|
+
}
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
// Sort by combined score and limit
|
|
399
|
+
return Array.from(results.values())
|
|
400
|
+
.filter((r) => r.score >= this.config.minScore)
|
|
401
|
+
.sort((a, b) => b.score - a.score)
|
|
402
|
+
.slice(0, limit);
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
/**
|
|
406
|
+
* Layer 2: Timeline Search
|
|
407
|
+
*
|
|
408
|
+
* Returns context around matched entries.
|
|
409
|
+
* Useful for understanding temporal relationships.
|
|
410
|
+
*/
|
|
411
|
+
async searchTimeline(
|
|
412
|
+
entryIds: string[],
|
|
413
|
+
contextWindow: number = 3
|
|
414
|
+
): Promise<TimelineResult[]> {
|
|
415
|
+
const results: TimelineResult[] = [];
|
|
416
|
+
|
|
417
|
+
for (const id of entryIds) {
|
|
418
|
+
// Get the target entry
|
|
419
|
+
const targetRow = this.db.prepare(`
|
|
420
|
+
SELECT id, key, namespace, content, created_at
|
|
421
|
+
FROM memory_entries WHERE id = ?
|
|
422
|
+
`).get(id) as {
|
|
423
|
+
id: string;
|
|
424
|
+
key: string;
|
|
425
|
+
namespace: string;
|
|
426
|
+
content: string;
|
|
427
|
+
created_at: number;
|
|
428
|
+
} | undefined;
|
|
429
|
+
|
|
430
|
+
if (!targetRow) continue;
|
|
431
|
+
|
|
432
|
+
const targetCompact: CompactSearchResult = {
|
|
433
|
+
id: targetRow.id,
|
|
434
|
+
key: targetRow.key,
|
|
435
|
+
namespace: targetRow.namespace,
|
|
436
|
+
score: 1.0,
|
|
437
|
+
keywordScore: 0,
|
|
438
|
+
semanticScore: 0,
|
|
439
|
+
snippet: targetRow.content.substring(0, 100),
|
|
440
|
+
estimatedTokens: estimateTokens(targetRow.content),
|
|
441
|
+
};
|
|
442
|
+
|
|
443
|
+
// Get entries before
|
|
444
|
+
const beforeRows = this.db.prepare(`
|
|
445
|
+
SELECT id, key, namespace, content, created_at
|
|
446
|
+
FROM memory_entries
|
|
447
|
+
WHERE namespace = ? AND created_at < ?
|
|
448
|
+
ORDER BY created_at DESC
|
|
449
|
+
LIMIT ?
|
|
450
|
+
`).all(targetRow.namespace, targetRow.created_at, contextWindow) as typeof targetRow[];
|
|
451
|
+
|
|
452
|
+
const before: CompactSearchResult[] = beforeRows.map(row => ({
|
|
453
|
+
id: row.id,
|
|
454
|
+
key: row.key,
|
|
455
|
+
namespace: row.namespace,
|
|
456
|
+
score: 0.5,
|
|
457
|
+
keywordScore: 0,
|
|
458
|
+
semanticScore: 0,
|
|
459
|
+
snippet: row.content.substring(0, 100),
|
|
460
|
+
estimatedTokens: estimateTokens(row.content),
|
|
461
|
+
}));
|
|
462
|
+
|
|
463
|
+
// Get entries after
|
|
464
|
+
const afterRows = this.db.prepare(`
|
|
465
|
+
SELECT id, key, namespace, content, created_at
|
|
466
|
+
FROM memory_entries
|
|
467
|
+
WHERE namespace = ? AND created_at > ?
|
|
468
|
+
ORDER BY created_at ASC
|
|
469
|
+
LIMIT ?
|
|
470
|
+
`).all(targetRow.namespace, targetRow.created_at, contextWindow) as typeof targetRow[];
|
|
471
|
+
|
|
472
|
+
const after: CompactSearchResult[] = afterRows.map(row => ({
|
|
473
|
+
id: row.id,
|
|
474
|
+
key: row.key,
|
|
475
|
+
namespace: row.namespace,
|
|
476
|
+
score: 0.5,
|
|
477
|
+
keywordScore: 0,
|
|
478
|
+
semanticScore: 0,
|
|
479
|
+
snippet: row.content.substring(0, 100),
|
|
480
|
+
estimatedTokens: estimateTokens(row.content),
|
|
481
|
+
}));
|
|
482
|
+
|
|
483
|
+
const totalTokens =
|
|
484
|
+
targetCompact.estimatedTokens +
|
|
485
|
+
before.reduce((sum, r) => sum + r.estimatedTokens, 0) +
|
|
486
|
+
after.reduce((sum, r) => sum + r.estimatedTokens, 0);
|
|
487
|
+
|
|
488
|
+
results.push({
|
|
489
|
+
entry: targetCompact,
|
|
490
|
+
before: before.reverse(), // Chronological order
|
|
491
|
+
after,
|
|
492
|
+
totalTokens,
|
|
493
|
+
});
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
return results;
|
|
497
|
+
}
|
|
498
|
+
|
|
499
|
+
/**
|
|
500
|
+
* Layer 3: Full Search
|
|
501
|
+
*
|
|
502
|
+
* Returns complete entry data for selected IDs.
|
|
503
|
+
* Only fetch what you need after filtering.
|
|
504
|
+
*/
|
|
505
|
+
async getFull(ids: string[]): Promise<MemoryEntry[]> {
|
|
506
|
+
if (ids.length === 0) return [];
|
|
507
|
+
|
|
508
|
+
const placeholders = ids.map(() => '?').join(', ');
|
|
509
|
+
const rows = this.db.prepare(`
|
|
510
|
+
SELECT * FROM memory_entries WHERE id IN (${placeholders})
|
|
511
|
+
`).all(...ids) as Record<string, unknown>[];
|
|
512
|
+
|
|
513
|
+
const entries = rows.map(row => this.rowToEntry(row));
|
|
514
|
+
|
|
515
|
+
// Sort by original order
|
|
516
|
+
const orderMap = new Map(ids.map((id, i) => [id, i]));
|
|
517
|
+
entries.sort((a, b) => (orderMap.get(a.id) || 0) - (orderMap.get(b.id) || 0));
|
|
518
|
+
|
|
519
|
+
return entries;
|
|
520
|
+
}
|
|
521
|
+
|
|
522
|
+
/**
|
|
523
|
+
* Full hybrid search with token economics
|
|
524
|
+
*
|
|
525
|
+
* Combines all three layers with detailed metrics.
|
|
526
|
+
*/
|
|
527
|
+
async search(
|
|
528
|
+
query: string,
|
|
529
|
+
options: {
|
|
530
|
+
limit?: number;
|
|
531
|
+
namespace?: string;
|
|
532
|
+
fetchFull?: boolean;
|
|
533
|
+
} = {}
|
|
534
|
+
): Promise<HybridSearchResult> {
|
|
535
|
+
const startTime = performance.now();
|
|
536
|
+
const limit = options.limit || 10;
|
|
537
|
+
|
|
538
|
+
// Layer 1: Compact search
|
|
539
|
+
const keywordStart = performance.now();
|
|
540
|
+
const compact = await this.searchCompact(query, {
|
|
541
|
+
limit: this.config.maxResultsPerLayer,
|
|
542
|
+
namespace: options.namespace,
|
|
543
|
+
});
|
|
544
|
+
const keywordTime = performance.now() - keywordStart;
|
|
545
|
+
|
|
546
|
+
// Calculate token economics
|
|
547
|
+
const compactTokens = compact.reduce((sum, r) => sum + r.estimatedTokens, 0);
|
|
548
|
+
|
|
549
|
+
// Layer 3: Fetch full results if requested
|
|
550
|
+
const semanticStart = performance.now();
|
|
551
|
+
let results: SearchResult[] = [];
|
|
552
|
+
let fullTokens = 0;
|
|
553
|
+
|
|
554
|
+
if (options.fetchFull !== false) {
|
|
555
|
+
const topIds = compact.slice(0, limit).map((r) => r.id);
|
|
556
|
+
const fullEntries = await this.getFull(topIds);
|
|
557
|
+
|
|
558
|
+
results = fullEntries.map((entry, i) => ({
|
|
559
|
+
entry,
|
|
560
|
+
score: compact[i]?.score || 0,
|
|
561
|
+
distance: 1 - (compact[i]?.score || 0),
|
|
562
|
+
}));
|
|
563
|
+
|
|
564
|
+
fullTokens = fullEntries.reduce((sum, e) => sum + estimateTokens(e.content), 0);
|
|
565
|
+
}
|
|
566
|
+
const semanticTime = performance.now() - semanticStart;
|
|
567
|
+
|
|
568
|
+
const totalTime = performance.now() - startTime;
|
|
569
|
+
|
|
570
|
+
// Calculate savings
|
|
571
|
+
const fullResultTokens = compact.reduce((sum, r) => sum + r.estimatedTokens, 0);
|
|
572
|
+
const actualTokens = options.fetchFull !== false ? fullTokens : compactTokens / 10;
|
|
573
|
+
const tokensSaved = fullResultTokens - actualTokens;
|
|
574
|
+
const savingsPercent = fullResultTokens > 0 ? (tokensSaved / fullResultTokens) * 100 : 0;
|
|
575
|
+
|
|
576
|
+
return {
|
|
577
|
+
results,
|
|
578
|
+
compact: compact.slice(0, limit),
|
|
579
|
+
economics: {
|
|
580
|
+
tokensSaved: Math.max(0, tokensSaved),
|
|
581
|
+
fullResultTokens,
|
|
582
|
+
actualTokens,
|
|
583
|
+
savingsPercent: Math.max(0, savingsPercent),
|
|
584
|
+
layers: {
|
|
585
|
+
compact: compact.length,
|
|
586
|
+
timeline: 0,
|
|
587
|
+
full: results.length,
|
|
588
|
+
},
|
|
589
|
+
},
|
|
590
|
+
timing: {
|
|
591
|
+
keywordMs: keywordTime,
|
|
592
|
+
semanticMs: semanticTime,
|
|
593
|
+
fusionMs: 0,
|
|
594
|
+
totalMs: totalTime,
|
|
595
|
+
},
|
|
596
|
+
};
|
|
597
|
+
}
|
|
598
|
+
|
|
599
|
+
/**
|
|
600
|
+
* Check if text contains CJK characters
|
|
601
|
+
* CJK requires special handling (LIKE or trigram tokenizer)
|
|
602
|
+
*/
|
|
603
|
+
private containsCJK(text: string): boolean {
|
|
604
|
+
// Unicode ranges for CJK characters
|
|
605
|
+
// - CJK Unified Ideographs: \u4E00-\u9FFF
|
|
606
|
+
// - Hiragana: \u3040-\u309F
|
|
607
|
+
// - Katakana: \u30A0-\u30FF
|
|
608
|
+
// - Hangul: \uAC00-\uD7AF
|
|
609
|
+
// - CJK Extension: \u3400-\u4DBF
|
|
610
|
+
return /[\u4E00-\u9FFF\u3040-\u309F\u30A0-\u30FF\uAC00-\uD7AF\u3400-\u4DBF]/.test(text);
|
|
611
|
+
}
|
|
612
|
+
|
|
613
|
+
/**
|
|
614
|
+
* Keyword search using FTS5 (with LIKE fallback)
|
|
615
|
+
*
|
|
616
|
+
* For CJK languages, automatically falls back to LIKE search
|
|
617
|
+
* unless trigram tokenizer is available.
|
|
618
|
+
*/
|
|
619
|
+
private async keywordSearch(
|
|
620
|
+
query: string,
|
|
621
|
+
limit: number,
|
|
622
|
+
namespace?: string
|
|
623
|
+
): Promise<CompactSearchResult[]> {
|
|
624
|
+
// Use LIKE fallback if FTS5 not available
|
|
625
|
+
if (!this.ftsAvailable) {
|
|
626
|
+
return this.likeSearch(query, limit, namespace);
|
|
627
|
+
}
|
|
628
|
+
|
|
629
|
+
// For CJK queries, use LIKE fallback unless trigram tokenizer is actually active
|
|
630
|
+
// (unicode61 tokenizer doesn't work with CJK - no word boundaries)
|
|
631
|
+
if (this.containsCJK(query) && this.activeTokenizer !== 'trigram') {
|
|
632
|
+
return this.likeSearch(query, limit, namespace);
|
|
633
|
+
}
|
|
634
|
+
|
|
635
|
+
// Trigram tokenizer requires at least 3 characters to match
|
|
636
|
+
// For short CJK queries (< 3 chars), fall back to LIKE search
|
|
637
|
+
if (this.activeTokenizer === 'trigram' && this.containsCJK(query)) {
|
|
638
|
+
const cjkChars = query.match(/[\u4E00-\u9FFF\u3040-\u309F\u30A0-\u30FF\uAC00-\uD7AF\u3400-\u4DBF]/g);
|
|
639
|
+
if (cjkChars && cjkChars.length < 3) {
|
|
640
|
+
return this.likeSearch(query, limit, namespace);
|
|
641
|
+
}
|
|
642
|
+
}
|
|
643
|
+
|
|
644
|
+
// Sanitize query for FTS5
|
|
645
|
+
const sanitizedQuery = this.sanitizeFtsQuery(query);
|
|
646
|
+
if (!sanitizedQuery) return [];
|
|
647
|
+
|
|
648
|
+
try {
|
|
649
|
+
let rows: {
|
|
650
|
+
id: string;
|
|
651
|
+
key: string;
|
|
652
|
+
namespace: string;
|
|
653
|
+
content: string;
|
|
654
|
+
rank: number;
|
|
655
|
+
}[];
|
|
656
|
+
|
|
657
|
+
if (namespace) {
|
|
658
|
+
rows = this.db.prepare(`
|
|
659
|
+
SELECT
|
|
660
|
+
m.id, m.key, m.namespace, m.content,
|
|
661
|
+
bm25(memory_fts) as rank
|
|
662
|
+
FROM memory_fts f
|
|
663
|
+
JOIN memory_entries m ON f.rowid = m.rowid
|
|
664
|
+
WHERE memory_fts MATCH ? AND m.namespace = ?
|
|
665
|
+
ORDER BY rank
|
|
666
|
+
LIMIT ?
|
|
667
|
+
`).all(sanitizedQuery, namespace, limit) as typeof rows;
|
|
668
|
+
} else {
|
|
669
|
+
rows = this.db.prepare(`
|
|
670
|
+
SELECT
|
|
671
|
+
m.id, m.key, m.namespace, m.content,
|
|
672
|
+
bm25(memory_fts) as rank
|
|
673
|
+
FROM memory_fts f
|
|
674
|
+
JOIN memory_entries m ON f.rowid = m.rowid
|
|
675
|
+
WHERE memory_fts MATCH ?
|
|
676
|
+
ORDER BY rank
|
|
677
|
+
LIMIT ?
|
|
678
|
+
`).all(sanitizedQuery, limit) as typeof rows;
|
|
679
|
+
}
|
|
680
|
+
|
|
681
|
+
return rows.map(row => {
|
|
682
|
+
// Normalize BM25 score (negative, closer to 0 is better)
|
|
683
|
+
// Convert to 0-1 scale where 1 is best
|
|
684
|
+
const keywordScore = Math.min(1, Math.max(0, 1 + row.rank / 10));
|
|
685
|
+
|
|
686
|
+
return {
|
|
687
|
+
id: row.id,
|
|
688
|
+
key: row.key,
|
|
689
|
+
namespace: row.namespace,
|
|
690
|
+
score: keywordScore * this.config.keywordWeight,
|
|
691
|
+
keywordScore,
|
|
692
|
+
semanticScore: 0,
|
|
693
|
+
snippet: row.content.substring(0, 100),
|
|
694
|
+
estimatedTokens: estimateTokens(row.content),
|
|
695
|
+
};
|
|
696
|
+
});
|
|
697
|
+
} catch (error) {
|
|
698
|
+
// Fall back to LIKE search on error
|
|
699
|
+
if (this.config.fallbackToLike) {
|
|
700
|
+
return this.likeSearch(query, limit, namespace);
|
|
701
|
+
}
|
|
702
|
+
throw error;
|
|
703
|
+
}
|
|
704
|
+
}
|
|
705
|
+
|
|
706
|
+
/**
|
|
707
|
+
* LIKE-based search fallback (works without FTS5)
|
|
708
|
+
*
|
|
709
|
+
* Less efficient but supports all languages.
|
|
710
|
+
*/
|
|
711
|
+
private likeSearch(
|
|
712
|
+
query: string,
|
|
713
|
+
limit: number,
|
|
714
|
+
namespace?: string
|
|
715
|
+
): CompactSearchResult[] {
|
|
716
|
+
// Handle empty query
|
|
717
|
+
const trimmedQuery = query.trim();
|
|
718
|
+
if (!trimmedQuery) return [];
|
|
719
|
+
|
|
720
|
+
const searchPattern = `%${trimmedQuery}%`;
|
|
721
|
+
let rows: {
|
|
722
|
+
id: string;
|
|
723
|
+
key: string;
|
|
724
|
+
namespace: string;
|
|
725
|
+
content: string;
|
|
726
|
+
}[];
|
|
727
|
+
|
|
728
|
+
if (namespace) {
|
|
729
|
+
rows = this.db.prepare(`
|
|
730
|
+
SELECT id, key, namespace, content
|
|
731
|
+
FROM memory_entries
|
|
732
|
+
WHERE (content LIKE ? OR key LIKE ? OR tags LIKE ?)
|
|
733
|
+
AND namespace = ?
|
|
734
|
+
ORDER BY created_at DESC
|
|
735
|
+
LIMIT ?
|
|
736
|
+
`).all(searchPattern, searchPattern, searchPattern, namespace, limit) as typeof rows;
|
|
737
|
+
} else {
|
|
738
|
+
rows = this.db.prepare(`
|
|
739
|
+
SELECT id, key, namespace, content
|
|
740
|
+
FROM memory_entries
|
|
741
|
+
WHERE content LIKE ? OR key LIKE ? OR tags LIKE ?
|
|
742
|
+
ORDER BY created_at DESC
|
|
743
|
+
LIMIT ?
|
|
744
|
+
`).all(searchPattern, searchPattern, searchPattern, limit) as typeof rows;
|
|
745
|
+
}
|
|
746
|
+
|
|
747
|
+
return rows.map(row => {
|
|
748
|
+
// Simple scoring based on match position
|
|
749
|
+
const lowerContent = row.content.toLowerCase();
|
|
750
|
+
const lowerQuery = query.toLowerCase();
|
|
751
|
+
const matchIndex = lowerContent.indexOf(lowerQuery);
|
|
752
|
+
const keywordScore = matchIndex >= 0 ? Math.max(0.3, 1 - matchIndex / 1000) : 0.5;
|
|
753
|
+
|
|
754
|
+
return {
|
|
755
|
+
id: row.id,
|
|
756
|
+
key: row.key,
|
|
757
|
+
namespace: row.namespace,
|
|
758
|
+
score: keywordScore * this.config.keywordWeight,
|
|
759
|
+
keywordScore,
|
|
760
|
+
semanticScore: 0,
|
|
761
|
+
snippet: row.content.substring(0, 100),
|
|
762
|
+
estimatedTokens: estimateTokens(row.content),
|
|
763
|
+
};
|
|
764
|
+
});
|
|
765
|
+
}
|
|
766
|
+
|
|
767
|
+
/**
|
|
768
|
+
* Semantic search returning compact results
|
|
769
|
+
*/
|
|
770
|
+
private async semanticSearchCompact(
|
|
771
|
+
query: string,
|
|
772
|
+
limit: number,
|
|
773
|
+
namespace?: string
|
|
774
|
+
): Promise<CompactSearchResult[]> {
|
|
775
|
+
if (!this.embeddingGenerator) return [];
|
|
776
|
+
|
|
777
|
+
// Generate query embedding
|
|
778
|
+
const queryEmbedding = await this.embeddingGenerator(query);
|
|
779
|
+
|
|
780
|
+
// Get all entries with embeddings
|
|
781
|
+
let rows: {
|
|
782
|
+
id: string;
|
|
783
|
+
key: string;
|
|
784
|
+
namespace: string;
|
|
785
|
+
content: string;
|
|
786
|
+
embedding: Buffer;
|
|
787
|
+
}[];
|
|
788
|
+
|
|
789
|
+
if (namespace) {
|
|
790
|
+
rows = this.db.prepare(`
|
|
791
|
+
SELECT id, key, namespace, content, embedding
|
|
792
|
+
FROM memory_entries
|
|
793
|
+
WHERE embedding IS NOT NULL AND namespace = ?
|
|
794
|
+
`).all(namespace) as typeof rows;
|
|
795
|
+
} else {
|
|
796
|
+
rows = this.db.prepare(`
|
|
797
|
+
SELECT id, key, namespace, content, embedding
|
|
798
|
+
FROM memory_entries
|
|
799
|
+
WHERE embedding IS NOT NULL
|
|
800
|
+
`).all() as typeof rows;
|
|
801
|
+
}
|
|
802
|
+
|
|
803
|
+
const candidates: Array<{
|
|
804
|
+
id: string;
|
|
805
|
+
key: string;
|
|
806
|
+
namespace: string;
|
|
807
|
+
content: string;
|
|
808
|
+
similarity: number;
|
|
809
|
+
}> = [];
|
|
810
|
+
|
|
811
|
+
for (const row of rows) {
|
|
812
|
+
if (row.embedding) {
|
|
813
|
+
const embedding = new Float32Array(row.embedding.buffer.slice(
|
|
814
|
+
row.embedding.byteOffset,
|
|
815
|
+
row.embedding.byteOffset + row.embedding.byteLength
|
|
816
|
+
));
|
|
817
|
+
const similarity = this.cosineSimilarity(queryEmbedding, embedding);
|
|
818
|
+
|
|
819
|
+
candidates.push({
|
|
820
|
+
id: row.id,
|
|
821
|
+
key: row.key,
|
|
822
|
+
namespace: row.namespace,
|
|
823
|
+
content: row.content,
|
|
824
|
+
similarity,
|
|
825
|
+
});
|
|
826
|
+
}
|
|
827
|
+
}
|
|
828
|
+
|
|
829
|
+
// Sort by similarity and take top results
|
|
830
|
+
candidates.sort((a, b) => b.similarity - a.similarity);
|
|
831
|
+
|
|
832
|
+
return candidates.slice(0, limit).map((c) => ({
|
|
833
|
+
id: c.id,
|
|
834
|
+
key: c.key,
|
|
835
|
+
namespace: c.namespace,
|
|
836
|
+
score: c.similarity * this.config.semanticWeight,
|
|
837
|
+
keywordScore: 0,
|
|
838
|
+
semanticScore: c.similarity,
|
|
839
|
+
snippet: c.content.substring(0, 100),
|
|
840
|
+
estimatedTokens: estimateTokens(c.content),
|
|
841
|
+
}));
|
|
842
|
+
}
|
|
843
|
+
|
|
844
|
+
/**
|
|
845
|
+
* Fuse keyword and semantic scores
|
|
846
|
+
*/
|
|
847
|
+
private fuseScores(keywordScore: number, semanticScore: number): number {
|
|
848
|
+
return (
|
|
849
|
+
keywordScore * this.config.keywordWeight +
|
|
850
|
+
semanticScore * this.config.semanticWeight
|
|
851
|
+
);
|
|
852
|
+
}
|
|
853
|
+
|
|
854
|
+
/**
|
|
855
|
+
* Calculate cosine similarity between two vectors
|
|
856
|
+
*/
|
|
857
|
+
private cosineSimilarity(a: Float32Array, b: Float32Array): number {
|
|
858
|
+
if (a.length !== b.length) return 0;
|
|
859
|
+
|
|
860
|
+
let dotProduct = 0;
|
|
861
|
+
let normA = 0;
|
|
862
|
+
let normB = 0;
|
|
863
|
+
|
|
864
|
+
for (let i = 0; i < a.length; i++) {
|
|
865
|
+
dotProduct += a[i] * b[i];
|
|
866
|
+
normA += a[i] * a[i];
|
|
867
|
+
normB += b[i] * b[i];
|
|
868
|
+
}
|
|
869
|
+
|
|
870
|
+
const denominator = Math.sqrt(normA) * Math.sqrt(normB);
|
|
871
|
+
return denominator === 0 ? 0 : dotProduct / denominator;
|
|
872
|
+
}
|
|
873
|
+
|
|
874
|
+
/**
|
|
875
|
+
* Sanitize query for FTS5
|
|
876
|
+
* Preserves CJK characters (Japanese, Chinese, Korean) and basic alphanumerics
|
|
877
|
+
*/
|
|
878
|
+
private sanitizeFtsQuery(query: string): string {
|
|
879
|
+
// For trigram tokenizer, preserve CJK characters
|
|
880
|
+
// Remove only FTS5 special operators: AND, OR, NOT, *, ^, :, ", (, )
|
|
881
|
+
// Keep Unicode letters, digits, and spaces
|
|
882
|
+
const sanitized = query
|
|
883
|
+
// Remove FTS5 special characters and operators, but preserve Unicode letters
|
|
884
|
+
.replace(/[*^:"()]/g, ' ')
|
|
885
|
+
.replace(/\bAND\b|\bOR\b|\bNOT\b|\bNEAR\b/gi, ' ')
|
|
886
|
+
.trim();
|
|
887
|
+
|
|
888
|
+
if (!sanitized) return '';
|
|
889
|
+
|
|
890
|
+
// For trigram tokenizer with CJK, we can pass the text directly
|
|
891
|
+
// The trigram tokenizer handles the text as-is
|
|
892
|
+
if (this.activeTokenizer === 'trigram') {
|
|
893
|
+
// With trigram, wrap the entire query in quotes for phrase matching
|
|
894
|
+
return `"${sanitized}"`;
|
|
895
|
+
}
|
|
896
|
+
|
|
897
|
+
// For other tokenizers, split into terms and wrap each
|
|
898
|
+
return sanitized
|
|
899
|
+
.split(/\s+/)
|
|
900
|
+
.filter((term) => term.length > 0)
|
|
901
|
+
.map((term) => `"${term}"`)
|
|
902
|
+
.join(' OR ');
|
|
903
|
+
}
|
|
904
|
+
|
|
905
|
+
/**
|
|
906
|
+
* Convert database row to MemoryEntry
|
|
907
|
+
*/
|
|
908
|
+
private rowToEntry(row: Record<string, unknown>): MemoryEntry {
|
|
909
|
+
let embedding: Float32Array | undefined;
|
|
910
|
+
if (row.embedding) {
|
|
911
|
+
const embeddingData = row.embedding as Buffer;
|
|
912
|
+
embedding = new Float32Array(
|
|
913
|
+
embeddingData.buffer.slice(
|
|
914
|
+
embeddingData.byteOffset,
|
|
915
|
+
embeddingData.byteOffset + embeddingData.byteLength
|
|
916
|
+
)
|
|
917
|
+
);
|
|
918
|
+
}
|
|
919
|
+
|
|
920
|
+
return {
|
|
921
|
+
id: row.id as string,
|
|
922
|
+
key: row.key as string,
|
|
923
|
+
content: row.content as string,
|
|
924
|
+
embedding,
|
|
925
|
+
type: row.type as MemoryEntry['type'],
|
|
926
|
+
namespace: row.namespace as string,
|
|
927
|
+
tags: JSON.parse((row.tags as string) || '[]'),
|
|
928
|
+
metadata: JSON.parse((row.metadata as string) || '{}'),
|
|
929
|
+
sessionId: row.session_id as string | undefined,
|
|
930
|
+
ownerId: row.owner_id as string | undefined,
|
|
931
|
+
accessLevel: row.access_level as MemoryEntry['accessLevel'],
|
|
932
|
+
createdAt: row.created_at as number,
|
|
933
|
+
updatedAt: row.updated_at as number,
|
|
934
|
+
expiresAt: row.expires_at as number | undefined,
|
|
935
|
+
version: row.version as number,
|
|
936
|
+
references: JSON.parse((row.references as string) || '[]'),
|
|
937
|
+
accessCount: row.access_count as number,
|
|
938
|
+
lastAccessedAt: row.last_accessed_at as number,
|
|
939
|
+
};
|
|
940
|
+
}
|
|
941
|
+
|
|
942
|
+
/**
|
|
943
|
+
* Get configuration
|
|
944
|
+
*/
|
|
945
|
+
getConfig(): HybridSearchConfig {
|
|
946
|
+
return { ...this.config };
|
|
947
|
+
}
|
|
948
|
+
|
|
949
|
+
/**
|
|
950
|
+
* Update configuration
|
|
951
|
+
*/
|
|
952
|
+
updateConfig(config: Partial<HybridSearchConfig>): void {
|
|
953
|
+
this.config = { ...this.config, ...config };
|
|
954
|
+
}
|
|
955
|
+
}
|
|
956
|
+
|
|
957
|
+
/**
|
|
958
|
+
* Create a hybrid search engine
|
|
959
|
+
*/
|
|
960
|
+
export function createHybridSearchEngine(
|
|
961
|
+
db: BetterDatabase,
|
|
962
|
+
config?: Partial<HybridSearchConfig>,
|
|
963
|
+
embeddingGenerator?: EmbeddingGenerator
|
|
964
|
+
): HybridSearchEngine {
|
|
965
|
+
return new HybridSearchEngine(db, config, embeddingGenerator);
|
|
966
|
+
}
|
|
967
|
+
|
|
968
|
+
export default HybridSearchEngine;
|