cozo-memory 1.1.2 → 1.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +356 -5
- package/dist/adaptive-retrieval.js +520 -0
- package/dist/db-inspect.js +25 -0
- package/dist/dynamic-fusion.js +602 -0
- package/dist/hybrid-search.js +4 -4
- package/dist/index.js +699 -23
- package/dist/inference-engine.js +104 -76
- package/dist/logical-edges-service.js +316 -0
- package/dist/multi-hop-vector-pivot.js +390 -0
- package/dist/temporal-embedding-service.js +313 -0
- package/dist/test-adaptive-integration.js +84 -0
- package/dist/test-adaptive-retrieval.js +135 -0
- package/dist/test-compaction.js +91 -0
- package/dist/test-dynamic-fusion.js +231 -0
- package/dist/test-fact-lifecycle.js +82 -0
- package/dist/test-logical-edges.js +282 -0
- package/dist/test-manual-compact.js +95 -0
- package/dist/test-multi-hop-vector-pivot-v2.js +239 -0
- package/dist/test-multi-hop-vector-pivot.js +240 -0
- package/dist/test-temporal-embeddings.js +123 -0
- package/dist/test-validity-retract.js +45 -0
- package/dist/test-validity-rm.js +49 -0
- package/package.json +1 -1
|
@@ -0,0 +1,602 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Dynamic Fusion Framework for CozoDB Memory
|
|
4
|
+
*
|
|
5
|
+
* Inspired by Allan-Poe (arXiv:2511.00855) - All-in-one Graph-based Indexing
|
|
6
|
+
*
|
|
7
|
+
* Combines 4 retrieval paths with dynamic weighting:
|
|
8
|
+
* 1. Dense Vector Search (HNSW semantic similarity)
|
|
9
|
+
* 2. Sparse Vector Search (TF-IDF/BM25 keyword matching)
|
|
10
|
+
* 3. Full-Text Search (FTS exact/fuzzy matching)
|
|
11
|
+
* 4. Graph Traversal (relationship-based retrieval)
|
|
12
|
+
*
|
|
13
|
+
* Key Innovation: Dynamic fusion without index reconstruction
|
|
14
|
+
*/
|
|
15
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
16
|
+
exports.DynamicFusionSearch = exports.DEFAULT_FUSION_CONFIG = void 0;
|
|
17
|
+
/**
|
|
18
|
+
* Default fusion configuration
|
|
19
|
+
*/
|
|
20
|
+
exports.DEFAULT_FUSION_CONFIG = {
|
|
21
|
+
vector: {
|
|
22
|
+
enabled: true,
|
|
23
|
+
weight: 0.4,
|
|
24
|
+
topK: 20,
|
|
25
|
+
efSearch: 100
|
|
26
|
+
},
|
|
27
|
+
sparse: {
|
|
28
|
+
enabled: true,
|
|
29
|
+
weight: 0.3,
|
|
30
|
+
topK: 20,
|
|
31
|
+
minScore: 0.1
|
|
32
|
+
},
|
|
33
|
+
fts: {
|
|
34
|
+
enabled: true,
|
|
35
|
+
weight: 0.2,
|
|
36
|
+
topK: 20,
|
|
37
|
+
fuzzy: true
|
|
38
|
+
},
|
|
39
|
+
graph: {
|
|
40
|
+
enabled: true,
|
|
41
|
+
weight: 0.1,
|
|
42
|
+
maxDepth: 2,
|
|
43
|
+
maxResults: 20
|
|
44
|
+
},
|
|
45
|
+
fusion: {
|
|
46
|
+
strategy: 'rrf',
|
|
47
|
+
rrfK: 60,
|
|
48
|
+
minScore: 0.0,
|
|
49
|
+
deduplication: true
|
|
50
|
+
}
|
|
51
|
+
};
|
|
52
|
+
/**
|
|
53
|
+
* Dynamic Fusion Search Engine
|
|
54
|
+
*
|
|
55
|
+
* Combines multiple retrieval paths with configurable weights
|
|
56
|
+
* without requiring index reconstruction
|
|
57
|
+
*/
|
|
58
|
+
class DynamicFusionSearch {
|
|
59
|
+
db;
|
|
60
|
+
embeddings;
|
|
61
|
+
constructor(db, embeddings) {
|
|
62
|
+
this.db = db;
|
|
63
|
+
this.embeddings = embeddings;
|
|
64
|
+
}
|
|
65
|
+
/**
|
|
66
|
+
* Execute dynamic fusion search
|
|
67
|
+
*/
|
|
68
|
+
async search(query, config = {}) {
|
|
69
|
+
const startTime = Date.now();
|
|
70
|
+
// Merge with defaults
|
|
71
|
+
const fullConfig = this.mergeConfig(config);
|
|
72
|
+
console.log('[DynamicFusion] Starting search with config:', {
|
|
73
|
+
query,
|
|
74
|
+
enabledPaths: this.getEnabledPaths(fullConfig)
|
|
75
|
+
});
|
|
76
|
+
// Execute all enabled paths in parallel
|
|
77
|
+
const pathResults = await this.executeAllPaths(query, fullConfig);
|
|
78
|
+
// Fuse results based on strategy
|
|
79
|
+
const fusedResults = this.fuseResults(pathResults, fullConfig);
|
|
80
|
+
// Calculate statistics
|
|
81
|
+
const stats = this.calculateStats(pathResults, fusedResults, startTime);
|
|
82
|
+
console.log('[DynamicFusion] Search completed:', {
|
|
83
|
+
totalResults: fusedResults.length,
|
|
84
|
+
pathContributions: stats.pathContributions,
|
|
85
|
+
fusionTime: stats.fusionTime
|
|
86
|
+
});
|
|
87
|
+
return { results: fusedResults, stats };
|
|
88
|
+
}
|
|
89
|
+
/**
|
|
90
|
+
* Execute all enabled retrieval paths in parallel
|
|
91
|
+
*/
|
|
92
|
+
async executeAllPaths(query, config) {
|
|
93
|
+
const promises = [];
|
|
94
|
+
const pathNames = [];
|
|
95
|
+
// Vector search
|
|
96
|
+
if (config.vector?.enabled) {
|
|
97
|
+
pathNames.push('vector');
|
|
98
|
+
promises.push(this.executeVectorSearch(query, config.vector));
|
|
99
|
+
}
|
|
100
|
+
// Sparse search
|
|
101
|
+
if (config.sparse?.enabled) {
|
|
102
|
+
pathNames.push('sparse');
|
|
103
|
+
promises.push(this.executeSparseSearch(query, config.sparse));
|
|
104
|
+
}
|
|
105
|
+
// FTS search
|
|
106
|
+
if (config.fts?.enabled) {
|
|
107
|
+
pathNames.push('fts');
|
|
108
|
+
promises.push(this.executeFTSSearch(query, config.fts));
|
|
109
|
+
}
|
|
110
|
+
// Graph search
|
|
111
|
+
if (config.graph?.enabled) {
|
|
112
|
+
pathNames.push('graph');
|
|
113
|
+
promises.push(this.executeGraphSearch(query, config.graph));
|
|
114
|
+
}
|
|
115
|
+
const results = await Promise.all(promises);
|
|
116
|
+
// Map results back to path names
|
|
117
|
+
const pathResults = {};
|
|
118
|
+
pathNames.forEach((name, idx) => {
|
|
119
|
+
pathResults[name] = results[idx];
|
|
120
|
+
});
|
|
121
|
+
return pathResults;
|
|
122
|
+
}
|
|
123
|
+
/**
|
|
124
|
+
* Execute dense vector search (HNSW)
|
|
125
|
+
*/
|
|
126
|
+
async executeVectorSearch(query, config) {
|
|
127
|
+
const startTime = Date.now();
|
|
128
|
+
try {
|
|
129
|
+
// Generate query embedding
|
|
130
|
+
const embedding = await this.embeddings.embed(query);
|
|
131
|
+
// HNSW vector search using correct CozoDB syntax
|
|
132
|
+
const datalogQuery = `
|
|
133
|
+
?[id, name, type, score, metadata] :=
|
|
134
|
+
~entity:semantic{
|
|
135
|
+
id |
|
|
136
|
+
query: vec($embedding),
|
|
137
|
+
k: ${config.topK},
|
|
138
|
+
ef: ${config.efSearch || 100},
|
|
139
|
+
bind_distance: dist
|
|
140
|
+
},
|
|
141
|
+
*entity{
|
|
142
|
+
id,
|
|
143
|
+
name,
|
|
144
|
+
type,
|
|
145
|
+
metadata,
|
|
146
|
+
@ "NOW"
|
|
147
|
+
},
|
|
148
|
+
score = 1.0 - dist
|
|
149
|
+
|
|
150
|
+
:order -score
|
|
151
|
+
:limit ${config.topK}
|
|
152
|
+
`;
|
|
153
|
+
const dbResult = await this.db.run(datalogQuery, { embedding });
|
|
154
|
+
const results = dbResult.rows.map((row) => ({
|
|
155
|
+
id: row[0],
|
|
156
|
+
name: row[1],
|
|
157
|
+
type: row[2],
|
|
158
|
+
entity_id: row[0],
|
|
159
|
+
score: row[3],
|
|
160
|
+
metadata: row[4],
|
|
161
|
+
source: 'vector',
|
|
162
|
+
rawScore: row[3]
|
|
163
|
+
}));
|
|
164
|
+
return {
|
|
165
|
+
results,
|
|
166
|
+
time: Date.now() - startTime
|
|
167
|
+
};
|
|
168
|
+
}
|
|
169
|
+
catch (error) {
|
|
170
|
+
console.error('[DynamicFusion] Vector search error:', error);
|
|
171
|
+
return { results: [], time: Date.now() - startTime };
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
/**
|
|
175
|
+
* Execute sparse vector search (keyword-based)
|
|
176
|
+
*/
|
|
177
|
+
async executeSparseSearch(query, config) {
|
|
178
|
+
const startTime = Date.now();
|
|
179
|
+
try {
|
|
180
|
+
// Extract keywords (simple tokenization)
|
|
181
|
+
const keywords = query.toLowerCase()
|
|
182
|
+
.split(/\s+/)
|
|
183
|
+
.filter(w => w.length > 2);
|
|
184
|
+
if (keywords.length === 0) {
|
|
185
|
+
return { results: [], time: Date.now() - startTime };
|
|
186
|
+
}
|
|
187
|
+
// Pad keywords to always have 3 (for simpler query)
|
|
188
|
+
while (keywords.length < 3) {
|
|
189
|
+
keywords.push('');
|
|
190
|
+
}
|
|
191
|
+
// Keyword matching with TF-IDF-like scoring using str_includes
|
|
192
|
+
const datalogQuery = `
|
|
193
|
+
?[id, name, type, score, metadata] :=
|
|
194
|
+
*entity{
|
|
195
|
+
id,
|
|
196
|
+
name,
|
|
197
|
+
type,
|
|
198
|
+
metadata
|
|
199
|
+
},
|
|
200
|
+
name_lower = lowercase(name),
|
|
201
|
+
match_count = if(str_includes(name_lower, $kw1), 1, 0) + if(str_includes(name_lower, $kw2), 1, 0) + if(str_includes(name_lower, $kw3), 1, 0),
|
|
202
|
+
match_count > 0,
|
|
203
|
+
score = to_float(match_count) / to_float(length(name_lower))
|
|
204
|
+
|
|
205
|
+
:order -score
|
|
206
|
+
:limit ${config.topK}
|
|
207
|
+
`;
|
|
208
|
+
const params = {
|
|
209
|
+
kw1: keywords[0],
|
|
210
|
+
kw2: keywords[1],
|
|
211
|
+
kw3: keywords[2]
|
|
212
|
+
};
|
|
213
|
+
const dbResult = await this.db.run(datalogQuery, params);
|
|
214
|
+
return {
|
|
215
|
+
results: dbResult.rows
|
|
216
|
+
.filter((row) => row[3] >= (config.minScore || 0.1))
|
|
217
|
+
.map((row) => ({
|
|
218
|
+
id: row[0],
|
|
219
|
+
name: row[1],
|
|
220
|
+
type: row[2],
|
|
221
|
+
entity_id: row[0],
|
|
222
|
+
score: row[3],
|
|
223
|
+
metadata: row[4],
|
|
224
|
+
source: 'sparse',
|
|
225
|
+
rawScore: row[3]
|
|
226
|
+
})),
|
|
227
|
+
time: Date.now() - startTime
|
|
228
|
+
};
|
|
229
|
+
}
|
|
230
|
+
catch (error) {
|
|
231
|
+
console.error('[DynamicFusion] Sparse search error:', error);
|
|
232
|
+
return { results: [], time: Date.now() - startTime };
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
/**
|
|
236
|
+
* Execute full-text search
|
|
237
|
+
*/
|
|
238
|
+
async executeFTSSearch(query, config) {
|
|
239
|
+
const startTime = Date.now();
|
|
240
|
+
try {
|
|
241
|
+
// FTS search on entity names using correct CozoDB syntax
|
|
242
|
+
// According to CozoDB v0.7 docs: bind_score (not bind_score_bm_25)
|
|
243
|
+
// score_kind can be 'tf_idf' or 'tf' (default is 'tf_idf')
|
|
244
|
+
const datalogQuery = `
|
|
245
|
+
?[id, name, type, score, metadata] :=
|
|
246
|
+
~entity:fts{
|
|
247
|
+
id |
|
|
248
|
+
query: $query,
|
|
249
|
+
k: ${config.topK},
|
|
250
|
+
score_kind: 'tf_idf',
|
|
251
|
+
bind_score: score
|
|
252
|
+
},
|
|
253
|
+
*entity{
|
|
254
|
+
id,
|
|
255
|
+
name,
|
|
256
|
+
type,
|
|
257
|
+
metadata,
|
|
258
|
+
@ "NOW"
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
:order -score
|
|
262
|
+
:limit ${config.topK}
|
|
263
|
+
`;
|
|
264
|
+
const dbResult = await this.db.run(datalogQuery, { query });
|
|
265
|
+
return {
|
|
266
|
+
results: dbResult.rows.map((row) => ({
|
|
267
|
+
id: row[0],
|
|
268
|
+
name: row[1],
|
|
269
|
+
type: row[2],
|
|
270
|
+
entity_id: row[0],
|
|
271
|
+
score: row[3],
|
|
272
|
+
metadata: row[4],
|
|
273
|
+
source: 'fts',
|
|
274
|
+
rawScore: row[3]
|
|
275
|
+
})),
|
|
276
|
+
time: Date.now() - startTime
|
|
277
|
+
};
|
|
278
|
+
}
|
|
279
|
+
catch (error) {
|
|
280
|
+
console.error('[DynamicFusion] FTS search error:', error);
|
|
281
|
+
return { results: [], time: Date.now() - startTime };
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
/**
|
|
285
|
+
* Execute graph traversal search
|
|
286
|
+
*/
|
|
287
|
+
async executeGraphSearch(query, config) {
|
|
288
|
+
const startTime = Date.now();
|
|
289
|
+
try {
|
|
290
|
+
// First, find seed nodes via vector search
|
|
291
|
+
const embedding = await this.embeddings.embed(query);
|
|
292
|
+
// HNSW index returns 'id' not 'entity_id'
|
|
293
|
+
const seedQuery = `
|
|
294
|
+
?[id] :=
|
|
295
|
+
~entity:semantic{
|
|
296
|
+
id |
|
|
297
|
+
query: vec($embedding),
|
|
298
|
+
k: 5,
|
|
299
|
+
ef: 100
|
|
300
|
+
}
|
|
301
|
+
`;
|
|
302
|
+
const seedResult = await this.db.run(seedQuery, { embedding });
|
|
303
|
+
if (seedResult.rows.length === 0) {
|
|
304
|
+
return { results: [], time: Date.now() - startTime };
|
|
305
|
+
}
|
|
306
|
+
// Graph traversal from seeds
|
|
307
|
+
const relationFilter = config.relationTypes && config.relationTypes.length > 0
|
|
308
|
+
? `is_in(relation_type, [${config.relationTypes.map(t => `"${t}"`).join(', ')}])`
|
|
309
|
+
: 'true';
|
|
310
|
+
const seedIds = seedResult.rows.map((row) => `"${row[0]}"`).join(', ');
|
|
311
|
+
const graphQuery = `
|
|
312
|
+
seed[id] := id in [${seedIds}]
|
|
313
|
+
|
|
314
|
+
reachable[to_id, depth] :=
|
|
315
|
+
seed[from_id],
|
|
316
|
+
*relationship{from_id, to_id, relation_type, @ "NOW"},
|
|
317
|
+
${relationFilter},
|
|
318
|
+
depth = 1
|
|
319
|
+
|
|
320
|
+
reachable[to_id, depth] :=
|
|
321
|
+
reachable[from_id, prev_depth],
|
|
322
|
+
prev_depth < ${config.maxDepth},
|
|
323
|
+
*relationship{from_id, to_id, relation_type, @ "NOW"},
|
|
324
|
+
${relationFilter},
|
|
325
|
+
depth = prev_depth + 1
|
|
326
|
+
|
|
327
|
+
?[id, name, type, score, metadata] :=
|
|
328
|
+
reachable[id, depth],
|
|
329
|
+
*entity{
|
|
330
|
+
id,
|
|
331
|
+
name,
|
|
332
|
+
type,
|
|
333
|
+
metadata,
|
|
334
|
+
@ "NOW"
|
|
335
|
+
},
|
|
336
|
+
score = 1.0 / to_float(depth)
|
|
337
|
+
|
|
338
|
+
:order -score
|
|
339
|
+
:limit ${config.maxResults || 20}
|
|
340
|
+
`;
|
|
341
|
+
const graphResult = await this.db.run(graphQuery, {});
|
|
342
|
+
return {
|
|
343
|
+
results: graphResult.rows.map((row) => ({
|
|
344
|
+
id: row[0],
|
|
345
|
+
name: row[1],
|
|
346
|
+
type: row[2],
|
|
347
|
+
entity_id: row[0],
|
|
348
|
+
score: row[3],
|
|
349
|
+
metadata: row[4],
|
|
350
|
+
source: 'graph',
|
|
351
|
+
rawScore: row[3]
|
|
352
|
+
})),
|
|
353
|
+
time: Date.now() - startTime
|
|
354
|
+
};
|
|
355
|
+
}
|
|
356
|
+
catch (error) {
|
|
357
|
+
console.error('[DynamicFusion] Graph search error:', error);
|
|
358
|
+
return { results: [], time: Date.now() - startTime };
|
|
359
|
+
}
|
|
360
|
+
}
|
|
361
|
+
/**
|
|
362
|
+
* Fuse results from multiple paths
|
|
363
|
+
*/
|
|
364
|
+
fuseResults(pathResults, config) {
|
|
365
|
+
const strategy = config.fusion?.strategy || 'rrf';
|
|
366
|
+
switch (strategy) {
|
|
367
|
+
case 'rrf':
|
|
368
|
+
return this.fuseRRF(pathResults, config);
|
|
369
|
+
case 'weighted_sum':
|
|
370
|
+
return this.fuseWeightedSum(pathResults, config);
|
|
371
|
+
case 'max':
|
|
372
|
+
return this.fuseMax(pathResults, config);
|
|
373
|
+
case 'adaptive':
|
|
374
|
+
return this.fuseAdaptive(pathResults, config);
|
|
375
|
+
default:
|
|
376
|
+
return this.fuseRRF(pathResults, config);
|
|
377
|
+
}
|
|
378
|
+
}
|
|
379
|
+
/**
|
|
380
|
+
* Reciprocal Rank Fusion (RRF)
|
|
381
|
+
*/
|
|
382
|
+
fuseRRF(pathResults, config) {
|
|
383
|
+
const k = config.fusion?.rrfK || 60;
|
|
384
|
+
const entityScores = new Map();
|
|
385
|
+
// Process each path
|
|
386
|
+
for (const [pathName, pathData] of Object.entries(pathResults)) {
|
|
387
|
+
if (!pathData || !pathData.results)
|
|
388
|
+
continue;
|
|
389
|
+
const weight = config[pathName]?.weight || 1.0;
|
|
390
|
+
pathData.results.forEach((result, rank) => {
|
|
391
|
+
const entityId = result.entity_id;
|
|
392
|
+
const rrfScore = weight / (k + rank + 1);
|
|
393
|
+
if (!entityScores.has(entityId)) {
|
|
394
|
+
entityScores.set(entityId, {
|
|
395
|
+
score: 0,
|
|
396
|
+
sources: new Set(),
|
|
397
|
+
pathScores: {},
|
|
398
|
+
entity: result
|
|
399
|
+
});
|
|
400
|
+
}
|
|
401
|
+
const entry = entityScores.get(entityId);
|
|
402
|
+
entry.score += rrfScore;
|
|
403
|
+
entry.sources.add(pathName);
|
|
404
|
+
entry.pathScores[pathName] = rrfScore;
|
|
405
|
+
});
|
|
406
|
+
}
|
|
407
|
+
// Convert to array and sort
|
|
408
|
+
const results = Array.from(entityScores.entries())
|
|
409
|
+
.map(([entityId, data]) => ({
|
|
410
|
+
id: data.entity.id,
|
|
411
|
+
entity_id: entityId,
|
|
412
|
+
name: data.entity.name,
|
|
413
|
+
type: data.entity.type,
|
|
414
|
+
score: data.score,
|
|
415
|
+
source: Array.from(data.sources).join('+'),
|
|
416
|
+
metadata: data.entity.metadata,
|
|
417
|
+
pathScores: data.pathScores,
|
|
418
|
+
explanation: `RRF fusion from ${data.sources.size} path(s)`
|
|
419
|
+
}))
|
|
420
|
+
.sort((a, b) => b.score - a.score);
|
|
421
|
+
// Apply deduplication and min score filter
|
|
422
|
+
const minScore = config.fusion?.minScore || 0.0;
|
|
423
|
+
const filtered = results.filter(r => r.score >= minScore);
|
|
424
|
+
return config.fusion?.deduplication
|
|
425
|
+
? this.deduplicateResults(filtered)
|
|
426
|
+
: filtered;
|
|
427
|
+
}
|
|
428
|
+
/**
|
|
429
|
+
* Weighted sum fusion
|
|
430
|
+
*/
|
|
431
|
+
fuseWeightedSum(pathResults, config) {
|
|
432
|
+
const entityScores = new Map();
|
|
433
|
+
// Process each path
|
|
434
|
+
for (const [pathName, pathData] of Object.entries(pathResults)) {
|
|
435
|
+
if (!pathData || !pathData.results)
|
|
436
|
+
continue;
|
|
437
|
+
const weight = config[pathName]?.weight || 1.0;
|
|
438
|
+
pathData.results.forEach((result) => {
|
|
439
|
+
const entityId = result.entity_id;
|
|
440
|
+
const weightedScore = result.rawScore * weight;
|
|
441
|
+
if (!entityScores.has(entityId)) {
|
|
442
|
+
entityScores.set(entityId, {
|
|
443
|
+
score: 0,
|
|
444
|
+
sources: new Set(),
|
|
445
|
+
pathScores: {},
|
|
446
|
+
entity: result
|
|
447
|
+
});
|
|
448
|
+
}
|
|
449
|
+
const entry = entityScores.get(entityId);
|
|
450
|
+
entry.score += weightedScore;
|
|
451
|
+
entry.sources.add(pathName);
|
|
452
|
+
entry.pathScores[pathName] = weightedScore;
|
|
453
|
+
});
|
|
454
|
+
}
|
|
455
|
+
// Convert and sort
|
|
456
|
+
const results = Array.from(entityScores.entries())
|
|
457
|
+
.map(([entityId, data]) => ({
|
|
458
|
+
id: data.entity.id,
|
|
459
|
+
entity_id: entityId,
|
|
460
|
+
name: data.entity.name,
|
|
461
|
+
type: data.entity.type,
|
|
462
|
+
score: data.score,
|
|
463
|
+
source: Array.from(data.sources).join('+'),
|
|
464
|
+
metadata: data.entity.metadata,
|
|
465
|
+
pathScores: data.pathScores,
|
|
466
|
+
explanation: `Weighted sum from ${data.sources.size} path(s)`
|
|
467
|
+
}))
|
|
468
|
+
.sort((a, b) => b.score - a.score);
|
|
469
|
+
const minScore = config.fusion?.minScore || 0.0;
|
|
470
|
+
const filtered = results.filter(r => r.score >= minScore);
|
|
471
|
+
return config.fusion?.deduplication
|
|
472
|
+
? this.deduplicateResults(filtered)
|
|
473
|
+
: filtered;
|
|
474
|
+
}
|
|
475
|
+
/**
|
|
476
|
+
* Max score fusion
|
|
477
|
+
*/
|
|
478
|
+
fuseMax(pathResults, config) {
|
|
479
|
+
const entityScores = new Map();
|
|
480
|
+
// Process each path
|
|
481
|
+
for (const [pathName, pathData] of Object.entries(pathResults)) {
|
|
482
|
+
if (!pathData || !pathData.results)
|
|
483
|
+
continue;
|
|
484
|
+
const weight = config[pathName]?.weight || 1.0;
|
|
485
|
+
pathData.results.forEach((result) => {
|
|
486
|
+
const entityId = result.entity_id;
|
|
487
|
+
const weightedScore = result.rawScore * weight;
|
|
488
|
+
if (!entityScores.has(entityId)) {
|
|
489
|
+
entityScores.set(entityId, {
|
|
490
|
+
score: weightedScore,
|
|
491
|
+
sources: new Set([pathName]),
|
|
492
|
+
pathScores: { [pathName]: weightedScore },
|
|
493
|
+
entity: result
|
|
494
|
+
});
|
|
495
|
+
}
|
|
496
|
+
else {
|
|
497
|
+
const entry = entityScores.get(entityId);
|
|
498
|
+
if (weightedScore > entry.score) {
|
|
499
|
+
entry.score = weightedScore;
|
|
500
|
+
}
|
|
501
|
+
entry.sources.add(pathName);
|
|
502
|
+
entry.pathScores[pathName] = weightedScore;
|
|
503
|
+
}
|
|
504
|
+
});
|
|
505
|
+
}
|
|
506
|
+
// Convert and sort
|
|
507
|
+
const results = Array.from(entityScores.entries())
|
|
508
|
+
.map(([entityId, data]) => ({
|
|
509
|
+
id: data.entity.id,
|
|
510
|
+
entity_id: entityId,
|
|
511
|
+
name: data.entity.name,
|
|
512
|
+
type: data.entity.type,
|
|
513
|
+
score: data.score,
|
|
514
|
+
source: Array.from(data.sources).join('+'),
|
|
515
|
+
metadata: data.entity.metadata,
|
|
516
|
+
pathScores: data.pathScores,
|
|
517
|
+
explanation: `Max score from ${data.sources.size} path(s)`
|
|
518
|
+
}))
|
|
519
|
+
.sort((a, b) => b.score - a.score);
|
|
520
|
+
const minScore = config.fusion?.minScore || 0.0;
|
|
521
|
+
const filtered = results.filter(r => r.score >= minScore);
|
|
522
|
+
return config.fusion?.deduplication
|
|
523
|
+
? this.deduplicateResults(filtered)
|
|
524
|
+
: filtered;
|
|
525
|
+
}
|
|
526
|
+
/**
|
|
527
|
+
* Adaptive fusion (query-dependent weighting)
|
|
528
|
+
*/
|
|
529
|
+
fuseAdaptive(pathResults, config) {
|
|
530
|
+
// Analyze query characteristics to adjust weights
|
|
531
|
+
// For now, fall back to RRF
|
|
532
|
+
// TODO: Implement adaptive weighting based on query analysis
|
|
533
|
+
console.log('[DynamicFusion] Adaptive fusion not yet implemented, using RRF');
|
|
534
|
+
return this.fuseRRF(pathResults, config);
|
|
535
|
+
}
|
|
536
|
+
/**
|
|
537
|
+
* Remove duplicate results
|
|
538
|
+
*/
|
|
539
|
+
deduplicateResults(results) {
|
|
540
|
+
const seen = new Set();
|
|
541
|
+
return results.filter(r => {
|
|
542
|
+
if (seen.has(r.entity_id)) {
|
|
543
|
+
return false;
|
|
544
|
+
}
|
|
545
|
+
seen.add(r.entity_id);
|
|
546
|
+
return true;
|
|
547
|
+
});
|
|
548
|
+
}
|
|
549
|
+
/**
|
|
550
|
+
* Calculate search statistics
|
|
551
|
+
*/
|
|
552
|
+
calculateStats(pathResults, fusedResults, startTime) {
|
|
553
|
+
const pathContributions = {
|
|
554
|
+
vector: 0,
|
|
555
|
+
sparse: 0,
|
|
556
|
+
fts: 0,
|
|
557
|
+
graph: 0
|
|
558
|
+
};
|
|
559
|
+
const pathTimes = {};
|
|
560
|
+
// Count contributions
|
|
561
|
+
for (const [pathName, pathData] of Object.entries(pathResults)) {
|
|
562
|
+
if (pathData && pathData.results) {
|
|
563
|
+
pathContributions[pathName] = pathData.results.length;
|
|
564
|
+
pathTimes[pathName] = pathData.time;
|
|
565
|
+
}
|
|
566
|
+
}
|
|
567
|
+
return {
|
|
568
|
+
totalResults: fusedResults.length,
|
|
569
|
+
pathContributions,
|
|
570
|
+
fusionTime: Date.now() - startTime,
|
|
571
|
+
pathTimes
|
|
572
|
+
};
|
|
573
|
+
}
|
|
574
|
+
/**
|
|
575
|
+
* Merge user config with defaults
|
|
576
|
+
*/
|
|
577
|
+
mergeConfig(config) {
|
|
578
|
+
return {
|
|
579
|
+
vector: { ...exports.DEFAULT_FUSION_CONFIG.vector, ...config.vector },
|
|
580
|
+
sparse: { ...exports.DEFAULT_FUSION_CONFIG.sparse, ...config.sparse },
|
|
581
|
+
fts: { ...exports.DEFAULT_FUSION_CONFIG.fts, ...config.fts },
|
|
582
|
+
graph: { ...exports.DEFAULT_FUSION_CONFIG.graph, ...config.graph },
|
|
583
|
+
fusion: { ...exports.DEFAULT_FUSION_CONFIG.fusion, ...config.fusion }
|
|
584
|
+
};
|
|
585
|
+
}
|
|
586
|
+
/**
|
|
587
|
+
* Get list of enabled paths
|
|
588
|
+
*/
|
|
589
|
+
getEnabledPaths(config) {
|
|
590
|
+
const paths = [];
|
|
591
|
+
if (config.vector?.enabled)
|
|
592
|
+
paths.push('vector');
|
|
593
|
+
if (config.sparse?.enabled)
|
|
594
|
+
paths.push('sparse');
|
|
595
|
+
if (config.fts?.enabled)
|
|
596
|
+
paths.push('fts');
|
|
597
|
+
if (config.graph?.enabled)
|
|
598
|
+
paths.push('graph');
|
|
599
|
+
return paths;
|
|
600
|
+
}
|
|
601
|
+
}
|
|
602
|
+
exports.DynamicFusionSearch = DynamicFusionSearch;
|
package/dist/hybrid-search.js
CHANGED
|
@@ -204,7 +204,7 @@ class HybridSearch {
|
|
|
204
204
|
semanticCall += `, filter: ${hnswFilters.join(" && ")}`;
|
|
205
205
|
}
|
|
206
206
|
semanticCall += `}`;
|
|
207
|
-
let bodyConstraints = [semanticCall, `*entity{id, name, type, metadata, created_at}`];
|
|
207
|
+
let bodyConstraints = [semanticCall, `*entity{id, name, type, metadata, created_at, @ "NOW"}`];
|
|
208
208
|
if (metaJoins.length > 0) {
|
|
209
209
|
bodyConstraints.push(...metaJoins);
|
|
210
210
|
}
|
|
@@ -232,10 +232,10 @@ class HybridSearch {
|
|
|
232
232
|
`rank_val[id, r] := *entity{id, @ "NOW"}, not *entity_rank{entity_id: id}, r = 0.0`
|
|
233
233
|
];
|
|
234
234
|
if (graphConstraints?.requiredRelations && graphConstraints.requiredRelations.length > 0) {
|
|
235
|
-
helperRules.push(`rel_match[id, rel_type] := *relationship{from_id: id, relation_type: rel_type}`, `rel_match[id, rel_type] := *relationship{to_id: id, relation_type: rel_type}`);
|
|
235
|
+
helperRules.push(`rel_match[id, rel_type] := *relationship{from_id: id, relation_type: rel_type, @ "NOW"}`, `rel_match[id, rel_type] := *relationship{to_id: id, relation_type: rel_type, @ "NOW"}`);
|
|
236
236
|
}
|
|
237
237
|
if (graphConstraints?.targetEntityIds && graphConstraints.targetEntityIds.length > 0) {
|
|
238
|
-
helperRules.push(`target_match[id, target_id] := *relationship{from_id: id, to_id: target_id}`, `target_match[id, target_id] := *relationship{to_id: id, from_id: target_id}`);
|
|
238
|
+
helperRules.push(`target_match[id, target_id] := *relationship{from_id: id, to_id: target_id, @ "NOW"}`, `target_match[id, target_id] := *relationship{to_id: id, from_id: target_id, @ "NOW"}`);
|
|
239
239
|
}
|
|
240
240
|
const datalogQuery = [
|
|
241
241
|
...helperRules,
|
|
@@ -360,7 +360,7 @@ class HybridSearch {
|
|
|
360
360
|
|
|
361
361
|
result_entities[id, final_score, depth] := path[seed_id, id, depth], seeds[seed_id, seed_score], rank_val[id, pr], final_score = seed_score * (1.0 - 0.2 * depth)
|
|
362
362
|
|
|
363
|
-
?[id, name, type, metadata, created_at, score, source, text] := result_entities[id, score, depth], *entity{id, name, type, metadata, created_at}, source = 'graph_rag_entity', text = ''
|
|
363
|
+
?[id, name, type, metadata, created_at, score, source, text] := result_entities[id, score, depth], *entity{id, name, type, metadata, created_at, @ "NOW"}, source = 'graph_rag_entity', text = ''
|
|
364
364
|
|
|
365
365
|
:sort -score
|
|
366
366
|
:limit $limit
|