cozo-memory 1.1.2 → 1.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,602 @@
1
+ "use strict";
2
+ /**
3
+ * Dynamic Fusion Framework for CozoDB Memory
4
+ *
5
+ * Inspired by Allan-Poe (arXiv:2511.00855) - All-in-one Graph-based Indexing
6
+ *
7
+ * Combines 4 retrieval paths with dynamic weighting:
8
+ * 1. Dense Vector Search (HNSW semantic similarity)
9
+ * 2. Sparse Vector Search (TF-IDF/BM25 keyword matching)
10
+ * 3. Full-Text Search (FTS exact/fuzzy matching)
11
+ * 4. Graph Traversal (relationship-based retrieval)
12
+ *
13
+ * Key Innovation: Dynamic fusion without index reconstruction
14
+ */
15
+ Object.defineProperty(exports, "__esModule", { value: true });
16
+ exports.DynamicFusionSearch = exports.DEFAULT_FUSION_CONFIG = void 0;
17
+ /**
18
+ * Default fusion configuration
19
+ */
20
+ exports.DEFAULT_FUSION_CONFIG = {
21
+ vector: {
22
+ enabled: true,
23
+ weight: 0.4,
24
+ topK: 20,
25
+ efSearch: 100
26
+ },
27
+ sparse: {
28
+ enabled: true,
29
+ weight: 0.3,
30
+ topK: 20,
31
+ minScore: 0.1
32
+ },
33
+ fts: {
34
+ enabled: true,
35
+ weight: 0.2,
36
+ topK: 20,
37
+ fuzzy: true
38
+ },
39
+ graph: {
40
+ enabled: true,
41
+ weight: 0.1,
42
+ maxDepth: 2,
43
+ maxResults: 20
44
+ },
45
+ fusion: {
46
+ strategy: 'rrf',
47
+ rrfK: 60,
48
+ minScore: 0.0,
49
+ deduplication: true
50
+ }
51
+ };
52
+ /**
53
+ * Dynamic Fusion Search Engine
54
+ *
55
+ * Combines multiple retrieval paths with configurable weights
56
+ * without requiring index reconstruction
57
+ */
58
+ class DynamicFusionSearch {
59
+ db;
60
+ embeddings;
61
+ constructor(db, embeddings) {
62
+ this.db = db;
63
+ this.embeddings = embeddings;
64
+ }
65
+ /**
66
+ * Execute dynamic fusion search
67
+ */
68
+ async search(query, config = {}) {
69
+ const startTime = Date.now();
70
+ // Merge with defaults
71
+ const fullConfig = this.mergeConfig(config);
72
+ console.log('[DynamicFusion] Starting search with config:', {
73
+ query,
74
+ enabledPaths: this.getEnabledPaths(fullConfig)
75
+ });
76
+ // Execute all enabled paths in parallel
77
+ const pathResults = await this.executeAllPaths(query, fullConfig);
78
+ // Fuse results based on strategy
79
+ const fusedResults = this.fuseResults(pathResults, fullConfig);
80
+ // Calculate statistics
81
+ const stats = this.calculateStats(pathResults, fusedResults, startTime);
82
+ console.log('[DynamicFusion] Search completed:', {
83
+ totalResults: fusedResults.length,
84
+ pathContributions: stats.pathContributions,
85
+ fusionTime: stats.fusionTime
86
+ });
87
+ return { results: fusedResults, stats };
88
+ }
89
+ /**
90
+ * Execute all enabled retrieval paths in parallel
91
+ */
92
+ async executeAllPaths(query, config) {
93
+ const promises = [];
94
+ const pathNames = [];
95
+ // Vector search
96
+ if (config.vector?.enabled) {
97
+ pathNames.push('vector');
98
+ promises.push(this.executeVectorSearch(query, config.vector));
99
+ }
100
+ // Sparse search
101
+ if (config.sparse?.enabled) {
102
+ pathNames.push('sparse');
103
+ promises.push(this.executeSparseSearch(query, config.sparse));
104
+ }
105
+ // FTS search
106
+ if (config.fts?.enabled) {
107
+ pathNames.push('fts');
108
+ promises.push(this.executeFTSSearch(query, config.fts));
109
+ }
110
+ // Graph search
111
+ if (config.graph?.enabled) {
112
+ pathNames.push('graph');
113
+ promises.push(this.executeGraphSearch(query, config.graph));
114
+ }
115
+ const results = await Promise.all(promises);
116
+ // Map results back to path names
117
+ const pathResults = {};
118
+ pathNames.forEach((name, idx) => {
119
+ pathResults[name] = results[idx];
120
+ });
121
+ return pathResults;
122
+ }
123
+ /**
124
+ * Execute dense vector search (HNSW)
125
+ */
126
+ async executeVectorSearch(query, config) {
127
+ const startTime = Date.now();
128
+ try {
129
+ // Generate query embedding
130
+ const embedding = await this.embeddings.embed(query);
131
+ // HNSW vector search using correct CozoDB syntax
132
+ const datalogQuery = `
133
+ ?[id, name, type, score, metadata] :=
134
+ ~entity:semantic{
135
+ id |
136
+ query: vec($embedding),
137
+ k: ${config.topK},
138
+ ef: ${config.efSearch || 100},
139
+ bind_distance: dist
140
+ },
141
+ *entity{
142
+ id,
143
+ name,
144
+ type,
145
+ metadata,
146
+ @ "NOW"
147
+ },
148
+ score = 1.0 - dist
149
+
150
+ :order -score
151
+ :limit ${config.topK}
152
+ `;
153
+ const dbResult = await this.db.run(datalogQuery, { embedding });
154
+ const results = dbResult.rows.map((row) => ({
155
+ id: row[0],
156
+ name: row[1],
157
+ type: row[2],
158
+ entity_id: row[0],
159
+ score: row[3],
160
+ metadata: row[4],
161
+ source: 'vector',
162
+ rawScore: row[3]
163
+ }));
164
+ return {
165
+ results,
166
+ time: Date.now() - startTime
167
+ };
168
+ }
169
+ catch (error) {
170
+ console.error('[DynamicFusion] Vector search error:', error);
171
+ return { results: [], time: Date.now() - startTime };
172
+ }
173
+ }
174
+ /**
175
+ * Execute sparse vector search (keyword-based)
176
+ */
177
+ async executeSparseSearch(query, config) {
178
+ const startTime = Date.now();
179
+ try {
180
+ // Extract keywords (simple tokenization)
181
+ const keywords = query.toLowerCase()
182
+ .split(/\s+/)
183
+ .filter(w => w.length > 2);
184
+ if (keywords.length === 0) {
185
+ return { results: [], time: Date.now() - startTime };
186
+ }
187
+ // Pad keywords to always have 3 (for simpler query)
188
+ while (keywords.length < 3) {
189
+ keywords.push('');
190
+ }
191
+ // Keyword matching with TF-IDF-like scoring using str_includes
192
+ const datalogQuery = `
193
+ ?[id, name, type, score, metadata] :=
194
+ *entity{
195
+ id,
196
+ name,
197
+ type,
198
+ metadata
199
+ },
200
+ name_lower = lowercase(name),
201
+ match_count = if(str_includes(name_lower, $kw1), 1, 0) + if(str_includes(name_lower, $kw2), 1, 0) + if(str_includes(name_lower, $kw3), 1, 0),
202
+ match_count > 0,
203
+ score = to_float(match_count) / to_float(length(name_lower))
204
+
205
+ :order -score
206
+ :limit ${config.topK}
207
+ `;
208
+ const params = {
209
+ kw1: keywords[0],
210
+ kw2: keywords[1],
211
+ kw3: keywords[2]
212
+ };
213
+ const dbResult = await this.db.run(datalogQuery, params);
214
+ return {
215
+ results: dbResult.rows
216
+ .filter((row) => row[3] >= (config.minScore || 0.1))
217
+ .map((row) => ({
218
+ id: row[0],
219
+ name: row[1],
220
+ type: row[2],
221
+ entity_id: row[0],
222
+ score: row[3],
223
+ metadata: row[4],
224
+ source: 'sparse',
225
+ rawScore: row[3]
226
+ })),
227
+ time: Date.now() - startTime
228
+ };
229
+ }
230
+ catch (error) {
231
+ console.error('[DynamicFusion] Sparse search error:', error);
232
+ return { results: [], time: Date.now() - startTime };
233
+ }
234
+ }
235
+ /**
236
+ * Execute full-text search
237
+ */
238
+ async executeFTSSearch(query, config) {
239
+ const startTime = Date.now();
240
+ try {
241
+ // FTS search on entity names using correct CozoDB syntax
242
+ // According to CozoDB v0.7 docs: bind_score (not bind_score_bm_25)
243
+ // score_kind can be 'tf_idf' or 'tf' (default is 'tf_idf')
244
+ const datalogQuery = `
245
+ ?[id, name, type, score, metadata] :=
246
+ ~entity:fts{
247
+ id |
248
+ query: $query,
249
+ k: ${config.topK},
250
+ score_kind: 'tf_idf',
251
+ bind_score: score
252
+ },
253
+ *entity{
254
+ id,
255
+ name,
256
+ type,
257
+ metadata,
258
+ @ "NOW"
259
+ }
260
+
261
+ :order -score
262
+ :limit ${config.topK}
263
+ `;
264
+ const dbResult = await this.db.run(datalogQuery, { query });
265
+ return {
266
+ results: dbResult.rows.map((row) => ({
267
+ id: row[0],
268
+ name: row[1],
269
+ type: row[2],
270
+ entity_id: row[0],
271
+ score: row[3],
272
+ metadata: row[4],
273
+ source: 'fts',
274
+ rawScore: row[3]
275
+ })),
276
+ time: Date.now() - startTime
277
+ };
278
+ }
279
+ catch (error) {
280
+ console.error('[DynamicFusion] FTS search error:', error);
281
+ return { results: [], time: Date.now() - startTime };
282
+ }
283
+ }
284
+ /**
285
+ * Execute graph traversal search
286
+ */
287
+ async executeGraphSearch(query, config) {
288
+ const startTime = Date.now();
289
+ try {
290
+ // First, find seed nodes via vector search
291
+ const embedding = await this.embeddings.embed(query);
292
+ // HNSW index returns 'id' not 'entity_id'
293
+ const seedQuery = `
294
+ ?[id] :=
295
+ ~entity:semantic{
296
+ id |
297
+ query: vec($embedding),
298
+ k: 5,
299
+ ef: 100
300
+ }
301
+ `;
302
+ const seedResult = await this.db.run(seedQuery, { embedding });
303
+ if (seedResult.rows.length === 0) {
304
+ return { results: [], time: Date.now() - startTime };
305
+ }
306
+ // Graph traversal from seeds
307
+ const relationFilter = config.relationTypes && config.relationTypes.length > 0
308
+ ? `is_in(relation_type, [${config.relationTypes.map(t => `"${t}"`).join(', ')}])`
309
+ : 'true';
310
+ const seedIds = seedResult.rows.map((row) => `"${row[0]}"`).join(', ');
311
+ const graphQuery = `
312
+ seed[id] := id in [${seedIds}]
313
+
314
+ reachable[to_id, depth] :=
315
+ seed[from_id],
316
+ *relationship{from_id, to_id, relation_type, @ "NOW"},
317
+ ${relationFilter},
318
+ depth = 1
319
+
320
+ reachable[to_id, depth] :=
321
+ reachable[from_id, prev_depth],
322
+ prev_depth < ${config.maxDepth},
323
+ *relationship{from_id, to_id, relation_type, @ "NOW"},
324
+ ${relationFilter},
325
+ depth = prev_depth + 1
326
+
327
+ ?[id, name, type, score, metadata] :=
328
+ reachable[id, depth],
329
+ *entity{
330
+ id,
331
+ name,
332
+ type,
333
+ metadata,
334
+ @ "NOW"
335
+ },
336
+ score = 1.0 / to_float(depth)
337
+
338
+ :order -score
339
+ :limit ${config.maxResults || 20}
340
+ `;
341
+ const graphResult = await this.db.run(graphQuery, {});
342
+ return {
343
+ results: graphResult.rows.map((row) => ({
344
+ id: row[0],
345
+ name: row[1],
346
+ type: row[2],
347
+ entity_id: row[0],
348
+ score: row[3],
349
+ metadata: row[4],
350
+ source: 'graph',
351
+ rawScore: row[3]
352
+ })),
353
+ time: Date.now() - startTime
354
+ };
355
+ }
356
+ catch (error) {
357
+ console.error('[DynamicFusion] Graph search error:', error);
358
+ return { results: [], time: Date.now() - startTime };
359
+ }
360
+ }
361
+ /**
362
+ * Fuse results from multiple paths
363
+ */
364
+ fuseResults(pathResults, config) {
365
+ const strategy = config.fusion?.strategy || 'rrf';
366
+ switch (strategy) {
367
+ case 'rrf':
368
+ return this.fuseRRF(pathResults, config);
369
+ case 'weighted_sum':
370
+ return this.fuseWeightedSum(pathResults, config);
371
+ case 'max':
372
+ return this.fuseMax(pathResults, config);
373
+ case 'adaptive':
374
+ return this.fuseAdaptive(pathResults, config);
375
+ default:
376
+ return this.fuseRRF(pathResults, config);
377
+ }
378
+ }
379
+ /**
380
+ * Reciprocal Rank Fusion (RRF)
381
+ */
382
+ fuseRRF(pathResults, config) {
383
+ const k = config.fusion?.rrfK || 60;
384
+ const entityScores = new Map();
385
+ // Process each path
386
+ for (const [pathName, pathData] of Object.entries(pathResults)) {
387
+ if (!pathData || !pathData.results)
388
+ continue;
389
+ const weight = config[pathName]?.weight || 1.0;
390
+ pathData.results.forEach((result, rank) => {
391
+ const entityId = result.entity_id;
392
+ const rrfScore = weight / (k + rank + 1);
393
+ if (!entityScores.has(entityId)) {
394
+ entityScores.set(entityId, {
395
+ score: 0,
396
+ sources: new Set(),
397
+ pathScores: {},
398
+ entity: result
399
+ });
400
+ }
401
+ const entry = entityScores.get(entityId);
402
+ entry.score += rrfScore;
403
+ entry.sources.add(pathName);
404
+ entry.pathScores[pathName] = rrfScore;
405
+ });
406
+ }
407
+ // Convert to array and sort
408
+ const results = Array.from(entityScores.entries())
409
+ .map(([entityId, data]) => ({
410
+ id: data.entity.id,
411
+ entity_id: entityId,
412
+ name: data.entity.name,
413
+ type: data.entity.type,
414
+ score: data.score,
415
+ source: Array.from(data.sources).join('+'),
416
+ metadata: data.entity.metadata,
417
+ pathScores: data.pathScores,
418
+ explanation: `RRF fusion from ${data.sources.size} path(s)`
419
+ }))
420
+ .sort((a, b) => b.score - a.score);
421
+ // Apply deduplication and min score filter
422
+ const minScore = config.fusion?.minScore || 0.0;
423
+ const filtered = results.filter(r => r.score >= minScore);
424
+ return config.fusion?.deduplication
425
+ ? this.deduplicateResults(filtered)
426
+ : filtered;
427
+ }
428
+ /**
429
+ * Weighted sum fusion
430
+ */
431
+ fuseWeightedSum(pathResults, config) {
432
+ const entityScores = new Map();
433
+ // Process each path
434
+ for (const [pathName, pathData] of Object.entries(pathResults)) {
435
+ if (!pathData || !pathData.results)
436
+ continue;
437
+ const weight = config[pathName]?.weight || 1.0;
438
+ pathData.results.forEach((result) => {
439
+ const entityId = result.entity_id;
440
+ const weightedScore = result.rawScore * weight;
441
+ if (!entityScores.has(entityId)) {
442
+ entityScores.set(entityId, {
443
+ score: 0,
444
+ sources: new Set(),
445
+ pathScores: {},
446
+ entity: result
447
+ });
448
+ }
449
+ const entry = entityScores.get(entityId);
450
+ entry.score += weightedScore;
451
+ entry.sources.add(pathName);
452
+ entry.pathScores[pathName] = weightedScore;
453
+ });
454
+ }
455
+ // Convert and sort
456
+ const results = Array.from(entityScores.entries())
457
+ .map(([entityId, data]) => ({
458
+ id: data.entity.id,
459
+ entity_id: entityId,
460
+ name: data.entity.name,
461
+ type: data.entity.type,
462
+ score: data.score,
463
+ source: Array.from(data.sources).join('+'),
464
+ metadata: data.entity.metadata,
465
+ pathScores: data.pathScores,
466
+ explanation: `Weighted sum from ${data.sources.size} path(s)`
467
+ }))
468
+ .sort((a, b) => b.score - a.score);
469
+ const minScore = config.fusion?.minScore || 0.0;
470
+ const filtered = results.filter(r => r.score >= minScore);
471
+ return config.fusion?.deduplication
472
+ ? this.deduplicateResults(filtered)
473
+ : filtered;
474
+ }
475
+ /**
476
+ * Max score fusion
477
+ */
478
+ fuseMax(pathResults, config) {
479
+ const entityScores = new Map();
480
+ // Process each path
481
+ for (const [pathName, pathData] of Object.entries(pathResults)) {
482
+ if (!pathData || !pathData.results)
483
+ continue;
484
+ const weight = config[pathName]?.weight || 1.0;
485
+ pathData.results.forEach((result) => {
486
+ const entityId = result.entity_id;
487
+ const weightedScore = result.rawScore * weight;
488
+ if (!entityScores.has(entityId)) {
489
+ entityScores.set(entityId, {
490
+ score: weightedScore,
491
+ sources: new Set([pathName]),
492
+ pathScores: { [pathName]: weightedScore },
493
+ entity: result
494
+ });
495
+ }
496
+ else {
497
+ const entry = entityScores.get(entityId);
498
+ if (weightedScore > entry.score) {
499
+ entry.score = weightedScore;
500
+ }
501
+ entry.sources.add(pathName);
502
+ entry.pathScores[pathName] = weightedScore;
503
+ }
504
+ });
505
+ }
506
+ // Convert and sort
507
+ const results = Array.from(entityScores.entries())
508
+ .map(([entityId, data]) => ({
509
+ id: data.entity.id,
510
+ entity_id: entityId,
511
+ name: data.entity.name,
512
+ type: data.entity.type,
513
+ score: data.score,
514
+ source: Array.from(data.sources).join('+'),
515
+ metadata: data.entity.metadata,
516
+ pathScores: data.pathScores,
517
+ explanation: `Max score from ${data.sources.size} path(s)`
518
+ }))
519
+ .sort((a, b) => b.score - a.score);
520
+ const minScore = config.fusion?.minScore || 0.0;
521
+ const filtered = results.filter(r => r.score >= minScore);
522
+ return config.fusion?.deduplication
523
+ ? this.deduplicateResults(filtered)
524
+ : filtered;
525
+ }
526
+ /**
527
+ * Adaptive fusion (query-dependent weighting)
528
+ */
529
+ fuseAdaptive(pathResults, config) {
530
+ // Analyze query characteristics to adjust weights
531
+ // For now, fall back to RRF
532
+ // TODO: Implement adaptive weighting based on query analysis
533
+ console.log('[DynamicFusion] Adaptive fusion not yet implemented, using RRF');
534
+ return this.fuseRRF(pathResults, config);
535
+ }
536
+ /**
537
+ * Remove duplicate results
538
+ */
539
+ deduplicateResults(results) {
540
+ const seen = new Set();
541
+ return results.filter(r => {
542
+ if (seen.has(r.entity_id)) {
543
+ return false;
544
+ }
545
+ seen.add(r.entity_id);
546
+ return true;
547
+ });
548
+ }
549
+ /**
550
+ * Calculate search statistics
551
+ */
552
+ calculateStats(pathResults, fusedResults, startTime) {
553
+ const pathContributions = {
554
+ vector: 0,
555
+ sparse: 0,
556
+ fts: 0,
557
+ graph: 0
558
+ };
559
+ const pathTimes = {};
560
+ // Count contributions
561
+ for (const [pathName, pathData] of Object.entries(pathResults)) {
562
+ if (pathData && pathData.results) {
563
+ pathContributions[pathName] = pathData.results.length;
564
+ pathTimes[pathName] = pathData.time;
565
+ }
566
+ }
567
+ return {
568
+ totalResults: fusedResults.length,
569
+ pathContributions,
570
+ fusionTime: Date.now() - startTime,
571
+ pathTimes
572
+ };
573
+ }
574
+ /**
575
+ * Merge user config with defaults
576
+ */
577
+ mergeConfig(config) {
578
+ return {
579
+ vector: { ...exports.DEFAULT_FUSION_CONFIG.vector, ...config.vector },
580
+ sparse: { ...exports.DEFAULT_FUSION_CONFIG.sparse, ...config.sparse },
581
+ fts: { ...exports.DEFAULT_FUSION_CONFIG.fts, ...config.fts },
582
+ graph: { ...exports.DEFAULT_FUSION_CONFIG.graph, ...config.graph },
583
+ fusion: { ...exports.DEFAULT_FUSION_CONFIG.fusion, ...config.fusion }
584
+ };
585
+ }
586
+ /**
587
+ * Get list of enabled paths
588
+ */
589
+ getEnabledPaths(config) {
590
+ const paths = [];
591
+ if (config.vector?.enabled)
592
+ paths.push('vector');
593
+ if (config.sparse?.enabled)
594
+ paths.push('sparse');
595
+ if (config.fts?.enabled)
596
+ paths.push('fts');
597
+ if (config.graph?.enabled)
598
+ paths.push('graph');
599
+ return paths;
600
+ }
601
+ }
602
+ exports.DynamicFusionSearch = DynamicFusionSearch;
@@ -204,7 +204,7 @@ class HybridSearch {
204
204
  semanticCall += `, filter: ${hnswFilters.join(" && ")}`;
205
205
  }
206
206
  semanticCall += `}`;
207
- let bodyConstraints = [semanticCall, `*entity{id, name, type, metadata, created_at}`];
207
+ let bodyConstraints = [semanticCall, `*entity{id, name, type, metadata, created_at, @ "NOW"}`];
208
208
  if (metaJoins.length > 0) {
209
209
  bodyConstraints.push(...metaJoins);
210
210
  }
@@ -232,10 +232,10 @@ class HybridSearch {
232
232
  `rank_val[id, r] := *entity{id, @ "NOW"}, not *entity_rank{entity_id: id}, r = 0.0`
233
233
  ];
234
234
  if (graphConstraints?.requiredRelations && graphConstraints.requiredRelations.length > 0) {
235
- helperRules.push(`rel_match[id, rel_type] := *relationship{from_id: id, relation_type: rel_type}`, `rel_match[id, rel_type] := *relationship{to_id: id, relation_type: rel_type}`);
235
+ helperRules.push(`rel_match[id, rel_type] := *relationship{from_id: id, relation_type: rel_type, @ "NOW"}`, `rel_match[id, rel_type] := *relationship{to_id: id, relation_type: rel_type, @ "NOW"}`);
236
236
  }
237
237
  if (graphConstraints?.targetEntityIds && graphConstraints.targetEntityIds.length > 0) {
238
- helperRules.push(`target_match[id, target_id] := *relationship{from_id: id, to_id: target_id}`, `target_match[id, target_id] := *relationship{to_id: id, from_id: target_id}`);
238
+ helperRules.push(`target_match[id, target_id] := *relationship{from_id: id, to_id: target_id, @ "NOW"}`, `target_match[id, target_id] := *relationship{to_id: id, from_id: target_id, @ "NOW"}`);
239
239
  }
240
240
  const datalogQuery = [
241
241
  ...helperRules,
@@ -360,7 +360,7 @@ class HybridSearch {
360
360
 
361
361
  result_entities[id, final_score, depth] := path[seed_id, id, depth], seeds[seed_id, seed_score], rank_val[id, pr], final_score = seed_score * (1.0 - 0.2 * depth)
362
362
 
363
- ?[id, name, type, metadata, created_at, score, source, text] := result_entities[id, score, depth], *entity{id, name, type, metadata, created_at}, source = 'graph_rag_entity', text = ''
363
+ ?[id, name, type, metadata, created_at, score, source, text] := result_entities[id, score, depth], *entity{id, name, type, metadata, created_at, @ "NOW"}, source = 'graph_rag_entity', text = ''
364
364
 
365
365
  :sort -score
366
366
  :limit $limit