cozo-memory 1.1.6 → 1.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/memory-activation.js +295 -0
- package/dist/query-aware-traversal.js +291 -0
- package/dist/query-pipeline.js +355 -0
- package/dist/test-memory-activation.js +222 -0
- package/dist/test-query-aware-traversal.js +226 -0
- package/dist/test-query-pipeline.js +151 -0
- package/package.json +1 -1
|
@@ -0,0 +1,295 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.MemoryActivationService = void 0;
|
|
4
|
+
class MemoryActivationService {
|
|
5
|
+
db;
|
|
6
|
+
config;
|
|
7
|
+
constructor(db, config = {}) {
|
|
8
|
+
this.db = db;
|
|
9
|
+
this.config = {
|
|
10
|
+
initialStrength: config.initialStrength ?? 1.0,
|
|
11
|
+
strengthIncrement: config.strengthIncrement ?? 1.0,
|
|
12
|
+
maxStrength: config.maxStrength ?? 20.0,
|
|
13
|
+
retentionThreshold: config.retentionThreshold ?? 0.15,
|
|
14
|
+
decayBase: config.decayBase ?? Math.E,
|
|
15
|
+
timeUnit: config.timeUnit ?? 'days',
|
|
16
|
+
};
|
|
17
|
+
}
|
|
18
|
+
/**
|
|
19
|
+
* Calculate activation score for an observation
|
|
20
|
+
* Formula: R = e^(-t/S)
|
|
21
|
+
*/
|
|
22
|
+
calculateActivation(timeSinceAccess, strength) {
|
|
23
|
+
if (timeSinceAccess === 0)
|
|
24
|
+
return 1.0;
|
|
25
|
+
const exponent = -timeSinceAccess / strength;
|
|
26
|
+
const activation = Math.pow(this.config.decayBase, exponent);
|
|
27
|
+
return Math.max(0, Math.min(1, activation));
|
|
28
|
+
}
|
|
29
|
+
/**
|
|
30
|
+
* Get current memory strength for an observation
|
|
31
|
+
* S = initialStrength + (accessCount * strengthIncrement)
|
|
32
|
+
*/
|
|
33
|
+
calculateStrength(accessCount) {
|
|
34
|
+
const strength = this.config.initialStrength +
|
|
35
|
+
(accessCount * this.config.strengthIncrement);
|
|
36
|
+
return Math.min(strength, this.config.maxStrength);
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Convert timestamp to time units since last access
|
|
40
|
+
*/
|
|
41
|
+
getTimeSinceAccess(lastAccessTime) {
|
|
42
|
+
const now = Date.now();
|
|
43
|
+
const diffMs = now - lastAccessTime;
|
|
44
|
+
if (this.config.timeUnit === 'hours') {
|
|
45
|
+
return diffMs / (1000 * 60 * 60);
|
|
46
|
+
}
|
|
47
|
+
else {
|
|
48
|
+
return diffMs / (1000 * 60 * 60 * 24);
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
/**
|
|
52
|
+
* Calculate activation scores for all observations
|
|
53
|
+
*/
|
|
54
|
+
async calculateActivationScores(entityId) {
|
|
55
|
+
try {
|
|
56
|
+
// Query observations with access metadata
|
|
57
|
+
const query = entityId
|
|
58
|
+
? `
|
|
59
|
+
?[id, entity_id, text, metadata, created_at] :=
|
|
60
|
+
*observation{id, entity_id, text, metadata, created_at},
|
|
61
|
+
entity_id == $entity_id
|
|
62
|
+
`
|
|
63
|
+
: `
|
|
64
|
+
?[id, entity_id, text, metadata, created_at] :=
|
|
65
|
+
*observation{id, entity_id, text, metadata, created_at}
|
|
66
|
+
`;
|
|
67
|
+
const result = await this.db.run(query, entityId ? { entity_id: entityId } : {});
|
|
68
|
+
const observations = result.rows;
|
|
69
|
+
const scores = [];
|
|
70
|
+
for (const [id, entity_id, text, metadata, created_at] of observations) {
|
|
71
|
+
// Extract access metadata
|
|
72
|
+
const accessCount = (metadata?.access_count || 0);
|
|
73
|
+
const lastAccessTime = (metadata?.last_access_time || created_at);
|
|
74
|
+
// Calculate activation
|
|
75
|
+
const timeSinceAccess = this.getTimeSinceAccess(lastAccessTime);
|
|
76
|
+
const strength = this.calculateStrength(accessCount);
|
|
77
|
+
const activation = this.calculateActivation(timeSinceAccess, strength);
|
|
78
|
+
// Determine retention
|
|
79
|
+
const shouldRetain = activation >= this.config.retentionThreshold;
|
|
80
|
+
const reason = shouldRetain
|
|
81
|
+
? `Active memory (activation: ${activation.toFixed(3)}, strength: ${strength.toFixed(1)})`
|
|
82
|
+
: `Below threshold (activation: ${activation.toFixed(3)} < ${this.config.retentionThreshold})`;
|
|
83
|
+
scores.push({
|
|
84
|
+
observationId: id,
|
|
85
|
+
entityId: entity_id,
|
|
86
|
+
activation,
|
|
87
|
+
strength,
|
|
88
|
+
timeSinceAccess,
|
|
89
|
+
accessCount,
|
|
90
|
+
shouldRetain,
|
|
91
|
+
reason
|
|
92
|
+
});
|
|
93
|
+
}
|
|
94
|
+
return scores.sort((a, b) => b.activation - a.activation);
|
|
95
|
+
}
|
|
96
|
+
catch (error) {
|
|
97
|
+
console.error('[MemoryActivation] Error calculating activation scores:', error);
|
|
98
|
+
return [];
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
/**
|
|
102
|
+
* Update access metadata when an observation is retrieved
|
|
103
|
+
*/
|
|
104
|
+
async recordAccess(observationId) {
|
|
105
|
+
try {
|
|
106
|
+
// Get current observation
|
|
107
|
+
const result = await this.db.run(`
|
|
108
|
+
?[id, entity_id, text, metadata, created_at] :=
|
|
109
|
+
*observation{id, entity_id, text, metadata, created_at},
|
|
110
|
+
id == $id
|
|
111
|
+
`, { id: observationId });
|
|
112
|
+
if (result.rows.length === 0) {
|
|
113
|
+
console.warn(`[MemoryActivation] Observation ${observationId} not found`);
|
|
114
|
+
return;
|
|
115
|
+
}
|
|
116
|
+
const [id, entity_id, text, metadata, created_at] = result.rows[0];
|
|
117
|
+
const currentMetadata = (metadata || {});
|
|
118
|
+
// Update access metadata
|
|
119
|
+
const accessCount = (currentMetadata.access_count || 0) + 1;
|
|
120
|
+
const lastAccessTime = Date.now();
|
|
121
|
+
const updatedMetadata = {
|
|
122
|
+
...currentMetadata,
|
|
123
|
+
access_count: accessCount,
|
|
124
|
+
last_access_time: lastAccessTime,
|
|
125
|
+
};
|
|
126
|
+
// Update observation with new metadata
|
|
127
|
+
await this.db.run(`
|
|
128
|
+
?[id, entity_id, text, metadata, created_at] <- [
|
|
129
|
+
[$id, $entity_id, $text, $metadata, $created_at]
|
|
130
|
+
]
|
|
131
|
+
:put observation {id, entity_id, text => metadata, created_at}
|
|
132
|
+
`, {
|
|
133
|
+
id,
|
|
134
|
+
entity_id,
|
|
135
|
+
text,
|
|
136
|
+
metadata: updatedMetadata,
|
|
137
|
+
created_at
|
|
138
|
+
});
|
|
139
|
+
}
|
|
140
|
+
catch (error) {
|
|
141
|
+
console.error('[MemoryActivation] Error recording access:', error);
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
/**
|
|
145
|
+
* Prune weak memories below activation threshold
|
|
146
|
+
*/
|
|
147
|
+
async pruneWeakMemories(dryRun = true, entityId) {
|
|
148
|
+
try {
|
|
149
|
+
const scores = await this.calculateActivationScores(entityId);
|
|
150
|
+
const candidates = scores.filter(s => !s.shouldRetain);
|
|
151
|
+
if (dryRun) {
|
|
152
|
+
console.error(`[MemoryActivation] Dry run: ${candidates.length} observations below threshold`);
|
|
153
|
+
return {
|
|
154
|
+
pruned: 0,
|
|
155
|
+
preserved: scores.length,
|
|
156
|
+
candidates,
|
|
157
|
+
};
|
|
158
|
+
}
|
|
159
|
+
// Actually delete weak memories
|
|
160
|
+
let pruned = 0;
|
|
161
|
+
for (const candidate of candidates) {
|
|
162
|
+
// Delete the observation
|
|
163
|
+
await this.db.run(`
|
|
164
|
+
?[id, entity_id, text, metadata, created_at] :=
|
|
165
|
+
*observation{id, entity_id, text, metadata, created_at},
|
|
166
|
+
id == $id
|
|
167
|
+
:rm observation {id, entity_id, text => metadata, created_at}
|
|
168
|
+
`, { id: candidate.observationId });
|
|
169
|
+
pruned++;
|
|
170
|
+
}
|
|
171
|
+
return {
|
|
172
|
+
pruned,
|
|
173
|
+
preserved: scores.length - pruned,
|
|
174
|
+
candidates,
|
|
175
|
+
};
|
|
176
|
+
}
|
|
177
|
+
catch (error) {
|
|
178
|
+
console.error('[MemoryActivation] Error pruning weak memories:', error);
|
|
179
|
+
return { pruned: 0, preserved: 0, candidates: [] };
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
/**
|
|
183
|
+
* Get activation statistics
|
|
184
|
+
*/
|
|
185
|
+
async getActivationStats(entityId) {
|
|
186
|
+
try {
|
|
187
|
+
const scores = await this.calculateActivationScores(entityId);
|
|
188
|
+
if (scores.length === 0) {
|
|
189
|
+
return {
|
|
190
|
+
totalObservations: 0,
|
|
191
|
+
averageActivation: 0,
|
|
192
|
+
averageStrength: 0,
|
|
193
|
+
belowThreshold: 0,
|
|
194
|
+
aboveThreshold: 0,
|
|
195
|
+
distribution: {
|
|
196
|
+
veryWeak: 0,
|
|
197
|
+
weak: 0,
|
|
198
|
+
moderate: 0,
|
|
199
|
+
strong: 0,
|
|
200
|
+
veryStrong: 0,
|
|
201
|
+
},
|
|
202
|
+
};
|
|
203
|
+
}
|
|
204
|
+
const totalActivation = scores.reduce((sum, s) => sum + s.activation, 0);
|
|
205
|
+
const totalStrength = scores.reduce((sum, s) => sum + s.strength, 0);
|
|
206
|
+
const distribution = {
|
|
207
|
+
veryWeak: scores.filter(s => s.activation < 0.1).length,
|
|
208
|
+
weak: scores.filter(s => s.activation >= 0.1 && s.activation < 0.3).length,
|
|
209
|
+
moderate: scores.filter(s => s.activation >= 0.3 && s.activation < 0.6).length,
|
|
210
|
+
strong: scores.filter(s => s.activation >= 0.6 && s.activation < 0.9).length,
|
|
211
|
+
veryStrong: scores.filter(s => s.activation >= 0.9).length,
|
|
212
|
+
};
|
|
213
|
+
return {
|
|
214
|
+
totalObservations: scores.length,
|
|
215
|
+
averageActivation: totalActivation / scores.length,
|
|
216
|
+
averageStrength: totalStrength / scores.length,
|
|
217
|
+
belowThreshold: scores.filter(s => !s.shouldRetain).length,
|
|
218
|
+
aboveThreshold: scores.filter(s => s.shouldRetain).length,
|
|
219
|
+
distribution,
|
|
220
|
+
};
|
|
221
|
+
}
|
|
222
|
+
catch (error) {
|
|
223
|
+
console.error('[MemoryActivation] Error getting activation stats:', error);
|
|
224
|
+
return {
|
|
225
|
+
totalObservations: 0,
|
|
226
|
+
averageActivation: 0,
|
|
227
|
+
averageStrength: 0,
|
|
228
|
+
belowThreshold: 0,
|
|
229
|
+
aboveThreshold: 0,
|
|
230
|
+
distribution: {
|
|
231
|
+
veryWeak: 0,
|
|
232
|
+
weak: 0,
|
|
233
|
+
moderate: 0,
|
|
234
|
+
strong: 0,
|
|
235
|
+
veryStrong: 0,
|
|
236
|
+
},
|
|
237
|
+
};
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
/**
|
|
241
|
+
* Boost activation of related memories (priming effect)
|
|
242
|
+
*/
|
|
243
|
+
async boostRelatedMemories(observationId, boostFactor = 0.5) {
|
|
244
|
+
try {
|
|
245
|
+
// Get the observation
|
|
246
|
+
const result = await this.db.run(`
|
|
247
|
+
?[id, entity_id, text, metadata, created_at] :=
|
|
248
|
+
*observation{id, entity_id, text, metadata, created_at},
|
|
249
|
+
id == $id
|
|
250
|
+
`, { id: observationId });
|
|
251
|
+
if (result.rows.length === 0) {
|
|
252
|
+
console.warn(`[MemoryActivation] Observation ${observationId} not found`);
|
|
253
|
+
return 0;
|
|
254
|
+
}
|
|
255
|
+
const [id, entity_id] = result.rows[0];
|
|
256
|
+
// Get all observations for the same entity (excluding the current one)
|
|
257
|
+
const relatedResult = await this.db.run(`
|
|
258
|
+
?[id, entity_id, text, metadata, created_at] :=
|
|
259
|
+
*observation{id, entity_id, text, metadata, created_at},
|
|
260
|
+
entity_id == $entity_id,
|
|
261
|
+
id != $id
|
|
262
|
+
`, { entity_id, id: observationId });
|
|
263
|
+
let boosted = 0;
|
|
264
|
+
for (const [relId, relEntityId, relText, relMetadata, relCreatedAt] of relatedResult.rows) {
|
|
265
|
+
const currentMetadata = (relMetadata || {});
|
|
266
|
+
// Simulate a partial access (priming effect)
|
|
267
|
+
const accessCount = (currentMetadata.access_count || 0) + boostFactor;
|
|
268
|
+
const updatedMetadata = {
|
|
269
|
+
...currentMetadata,
|
|
270
|
+
access_count: accessCount,
|
|
271
|
+
};
|
|
272
|
+
// Update the related observation
|
|
273
|
+
await this.db.run(`
|
|
274
|
+
?[id, entity_id, text, metadata, created_at] <- [
|
|
275
|
+
[$id, $entity_id, $text, $metadata, $created_at]
|
|
276
|
+
]
|
|
277
|
+
:put observation {id, entity_id, text => metadata, created_at}
|
|
278
|
+
`, {
|
|
279
|
+
id: relId,
|
|
280
|
+
entity_id: relEntityId,
|
|
281
|
+
text: relText,
|
|
282
|
+
metadata: updatedMetadata,
|
|
283
|
+
created_at: relCreatedAt
|
|
284
|
+
});
|
|
285
|
+
boosted++;
|
|
286
|
+
}
|
|
287
|
+
return boosted;
|
|
288
|
+
}
|
|
289
|
+
catch (error) {
|
|
290
|
+
console.error('[MemoryActivation] Error boosting related memories:', error);
|
|
291
|
+
return 0;
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
exports.MemoryActivationService = MemoryActivationService;
|
|
@@ -0,0 +1,291 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Query-Aware Flow Diffusion for Graph Traversal
|
|
4
|
+
*
|
|
5
|
+
* Based on QAFD-RAG (ICLR 2026):
|
|
6
|
+
* - Dynamically weights edges based on query-node semantic alignment
|
|
7
|
+
* - Uses flow diffusion similar to Personalized PageRank
|
|
8
|
+
* - Provides statistical guarantees for subgraph retrieval
|
|
9
|
+
* - Training-free approach using cosine similarity
|
|
10
|
+
*
|
|
11
|
+
* Reference: "Query-Aware Flow Diffusion for Graph-Based RAG with Retrieval Guarantees"
|
|
12
|
+
*/
|
|
13
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
14
|
+
exports.QueryAwareTraversal = void 0;
|
|
15
|
+
/**
|
|
16
|
+
* Query-Aware Graph Traversal Service
|
|
17
|
+
*
|
|
18
|
+
* Implements flow diffusion with dynamic edge weighting based on query semantics.
|
|
19
|
+
*/
|
|
20
|
+
class QueryAwareTraversal {
|
|
21
|
+
db;
|
|
22
|
+
embeddingService;
|
|
23
|
+
constructor(db, embeddingService) {
|
|
24
|
+
this.db = db;
|
|
25
|
+
this.embeddingService = embeddingService;
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* Performs query-aware graph traversal from a start entity
|
|
29
|
+
*
|
|
30
|
+
* @param startEntityId - Starting entity ID
|
|
31
|
+
* @param query - Query string for semantic alignment
|
|
32
|
+
* @param options - Traversal options
|
|
33
|
+
* @returns Array of query-aware results
|
|
34
|
+
*/
|
|
35
|
+
async traverse(startEntityId, query, options = {}) {
|
|
36
|
+
const { maxHops = 3, dampingFactor = 0.85, minScore = 0.1, topK = 20, relationTypes, convergenceThreshold = 0.001 } = options;
|
|
37
|
+
console.log('[QueryAwareTraversal] Starting traversal:', {
|
|
38
|
+
startEntityId,
|
|
39
|
+
query,
|
|
40
|
+
maxHops,
|
|
41
|
+
dampingFactor
|
|
42
|
+
});
|
|
43
|
+
try {
|
|
44
|
+
// Generate query embedding
|
|
45
|
+
const queryEmbedding = await this.embeddingService.embed(query);
|
|
46
|
+
// Build relation type filter
|
|
47
|
+
const relationFilter = relationTypes && relationTypes.length > 0
|
|
48
|
+
? `is_in(relation_type, [${relationTypes.map(t => `"${t}"`).join(', ')}])`
|
|
49
|
+
: 'true';
|
|
50
|
+
// Query-Aware Flow Diffusion using Datalog
|
|
51
|
+
// This implements a simplified version of QAFD-RAG's flow diffusion
|
|
52
|
+
const datalogQuery = `
|
|
53
|
+
# Initialize: Start entity has score 1.0
|
|
54
|
+
flow[entity_id, hop, score] :=
|
|
55
|
+
entity_id = $start_id,
|
|
56
|
+
hop = 0,
|
|
57
|
+
score = 1.0
|
|
58
|
+
|
|
59
|
+
# Compute edge weights based on query-node semantic alignment
|
|
60
|
+
# weight = cosine_similarity(node_embedding, query_embedding)
|
|
61
|
+
edge_weight[from_id, to_id, weight, relation_type] :=
|
|
62
|
+
*relationship{
|
|
63
|
+
from_id,
|
|
64
|
+
to_id,
|
|
65
|
+
relation_type,
|
|
66
|
+
@ "NOW"
|
|
67
|
+
},
|
|
68
|
+
${relationFilter},
|
|
69
|
+
*entity{
|
|
70
|
+
id: to_id,
|
|
71
|
+
embedding: to_emb,
|
|
72
|
+
@ "NOW"
|
|
73
|
+
},
|
|
74
|
+
# Cosine similarity between query and target node
|
|
75
|
+
similarity = cos_dist(to_emb, vec($query_emb)),
|
|
76
|
+
# Convert distance to similarity (1 - distance)
|
|
77
|
+
weight = 1.0 - similarity,
|
|
78
|
+
# Ensure positive weights
|
|
79
|
+
weight > 0.0
|
|
80
|
+
|
|
81
|
+
# Flow diffusion: propagate flow through weighted edges
|
|
82
|
+
# score_new = damping_factor * score_current * edge_weight
|
|
83
|
+
flow[to_id, hop_new, score_new] :=
|
|
84
|
+
flow[from_id, hop, score],
|
|
85
|
+
hop < $max_hops,
|
|
86
|
+
edge_weight[from_id, to_id, weight, _],
|
|
87
|
+
hop_new = hop + 1,
|
|
88
|
+
# Flow diffusion formula
|
|
89
|
+
score_new = $damping * score * weight,
|
|
90
|
+
score_new >= $min_score
|
|
91
|
+
|
|
92
|
+
# Join with entity data and aggregate scores
|
|
93
|
+
# Variables without aggregation (id, name, type, metadata) are grouping keys
|
|
94
|
+
?[id, name, type, sum(flow_score), min(hop), metadata] :=
|
|
95
|
+
flow[id, hop, flow_score],
|
|
96
|
+
*entity{
|
|
97
|
+
id,
|
|
98
|
+
name,
|
|
99
|
+
type,
|
|
100
|
+
metadata,
|
|
101
|
+
@ "NOW"
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
:order -sum(flow_score)
|
|
105
|
+
:limit $top_k
|
|
106
|
+
`;
|
|
107
|
+
const result = await this.db.run(datalogQuery, {
|
|
108
|
+
start_id: startEntityId,
|
|
109
|
+
query_emb: queryEmbedding,
|
|
110
|
+
max_hops: maxHops,
|
|
111
|
+
damping: dampingFactor,
|
|
112
|
+
min_score: minScore,
|
|
113
|
+
top_k: topK
|
|
114
|
+
});
|
|
115
|
+
const results = result.rows.map((row) => ({
|
|
116
|
+
id: row[0],
|
|
117
|
+
entity_id: row[0],
|
|
118
|
+
name: row[1],
|
|
119
|
+
type: row[2],
|
|
120
|
+
score: row[3],
|
|
121
|
+
hops: row[4],
|
|
122
|
+
path_score: row[3],
|
|
123
|
+
metadata: row[5],
|
|
124
|
+
source: 'query_aware_traversal'
|
|
125
|
+
}));
|
|
126
|
+
console.log('[QueryAwareTraversal] Traversal completed:', {
|
|
127
|
+
resultsCount: results.length,
|
|
128
|
+
topScore: results[0]?.score || 0
|
|
129
|
+
});
|
|
130
|
+
return results;
|
|
131
|
+
}
|
|
132
|
+
catch (error) {
|
|
133
|
+
console.error('[QueryAwareTraversal] Error during traversal:', error);
|
|
134
|
+
throw error;
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
/**
|
|
138
|
+
* Performs query-aware traversal from multiple seed entities
|
|
139
|
+
*
|
|
140
|
+
* This is useful for starting from vector search results and expanding
|
|
141
|
+
* the graph in a query-aware manner.
|
|
142
|
+
*
|
|
143
|
+
* @param seedEntityIds - Array of starting entity IDs
|
|
144
|
+
* @param query - Query string for semantic alignment
|
|
145
|
+
* @param options - Traversal options
|
|
146
|
+
* @returns Array of query-aware results
|
|
147
|
+
*/
|
|
148
|
+
async traverseFromSeeds(seedEntityIds, query, options = {}) {
|
|
149
|
+
const { maxHops = 3, dampingFactor = 0.85, minScore = 0.1, topK = 20, relationTypes, convergenceThreshold = 0.001 } = options;
|
|
150
|
+
console.log('[QueryAwareTraversal] Starting multi-seed traversal:', {
|
|
151
|
+
seedCount: seedEntityIds.length,
|
|
152
|
+
query,
|
|
153
|
+
maxHops
|
|
154
|
+
});
|
|
155
|
+
try {
|
|
156
|
+
// Generate query embedding
|
|
157
|
+
const queryEmbedding = await this.embeddingService.embed(query);
|
|
158
|
+
// Build relation type filter
|
|
159
|
+
const relationFilter = relationTypes && relationTypes.length > 0
|
|
160
|
+
? `is_in(relation_type, [${relationTypes.map(t => `"${t}"`).join(', ')}])`
|
|
161
|
+
: 'true';
|
|
162
|
+
// Build seed list for Datalog
|
|
163
|
+
const seedList = seedEntityIds.map(id => `"${id}"`).join(', ');
|
|
164
|
+
const datalogQuery = `
|
|
165
|
+
# Initialize: All seed entities start with equal score
|
|
166
|
+
seeds[entity_id] := entity_id in [${seedList}]
|
|
167
|
+
|
|
168
|
+
flow[entity_id, hop, score] :=
|
|
169
|
+
seeds[entity_id],
|
|
170
|
+
hop = 0,
|
|
171
|
+
score = 1.0 / to_float(${seedEntityIds.length})
|
|
172
|
+
|
|
173
|
+
# Compute query-aware edge weights
|
|
174
|
+
edge_weight[from_id, to_id, weight, relation_type] :=
|
|
175
|
+
*relationship{
|
|
176
|
+
from_id,
|
|
177
|
+
to_id,
|
|
178
|
+
relation_type,
|
|
179
|
+
@ "NOW"
|
|
180
|
+
},
|
|
181
|
+
${relationFilter},
|
|
182
|
+
*entity{
|
|
183
|
+
id: to_id,
|
|
184
|
+
embedding: to_emb,
|
|
185
|
+
@ "NOW"
|
|
186
|
+
},
|
|
187
|
+
similarity = cos_dist(to_emb, vec($query_emb)),
|
|
188
|
+
weight = 1.0 - similarity,
|
|
189
|
+
weight > 0.0
|
|
190
|
+
|
|
191
|
+
# Flow diffusion with query-aware weighting
|
|
192
|
+
flow[to_id, hop_new, score_new] :=
|
|
193
|
+
flow[from_id, hop, score],
|
|
194
|
+
hop < $max_hops,
|
|
195
|
+
edge_weight[from_id, to_id, weight, _],
|
|
196
|
+
hop_new = hop + 1,
|
|
197
|
+
score_new = $damping * score * weight,
|
|
198
|
+
score_new >= $min_score
|
|
199
|
+
|
|
200
|
+
# Join with entity data and aggregate scores
|
|
201
|
+
?[id, name, type, sum(flow_score), min(hop), metadata] :=
|
|
202
|
+
flow[id, hop, flow_score],
|
|
203
|
+
*entity{
|
|
204
|
+
id,
|
|
205
|
+
name,
|
|
206
|
+
type,
|
|
207
|
+
metadata,
|
|
208
|
+
@ "NOW"
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
:order -sum(flow_score)
|
|
212
|
+
:limit $top_k
|
|
213
|
+
`;
|
|
214
|
+
const result = await this.db.run(datalogQuery, {
|
|
215
|
+
query_emb: queryEmbedding,
|
|
216
|
+
max_hops: maxHops,
|
|
217
|
+
damping: dampingFactor,
|
|
218
|
+
min_score: minScore,
|
|
219
|
+
top_k: topK
|
|
220
|
+
});
|
|
221
|
+
const results = result.rows.map((row) => ({
|
|
222
|
+
id: row[0],
|
|
223
|
+
entity_id: row[0],
|
|
224
|
+
name: row[1],
|
|
225
|
+
type: row[2],
|
|
226
|
+
score: row[3],
|
|
227
|
+
hops: row[4],
|
|
228
|
+
path_score: row[3],
|
|
229
|
+
metadata: row[5],
|
|
230
|
+
source: 'query_aware_multi_seed'
|
|
231
|
+
}));
|
|
232
|
+
console.log('[QueryAwareTraversal] Multi-seed traversal completed:', {
|
|
233
|
+
resultsCount: results.length,
|
|
234
|
+
topScore: results[0]?.score || 0
|
|
235
|
+
});
|
|
236
|
+
return results;
|
|
237
|
+
}
|
|
238
|
+
catch (error) {
|
|
239
|
+
console.error('[QueryAwareTraversal] Error during multi-seed traversal:', error);
|
|
240
|
+
throw error;
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
/**
|
|
244
|
+
* Hybrid approach: Vector search + Query-Aware Traversal
|
|
245
|
+
*
|
|
246
|
+
* This combines the best of both worlds:
|
|
247
|
+
* 1. Find semantically relevant seed entities via vector search
|
|
248
|
+
* 2. Expand from seeds using query-aware graph traversal
|
|
249
|
+
*
|
|
250
|
+
* @param query - Query string
|
|
251
|
+
* @param options - Traversal options with additional vector search params
|
|
252
|
+
* @returns Array of query-aware results
|
|
253
|
+
*/
|
|
254
|
+
async hybridSearch(query, options = {}) {
|
|
255
|
+
const { seedTopK = 5, ...traversalOptions } = options;
|
|
256
|
+
console.log('[QueryAwareTraversal] Starting hybrid search:', {
|
|
257
|
+
query,
|
|
258
|
+
seedTopK
|
|
259
|
+
});
|
|
260
|
+
try {
|
|
261
|
+
// Step 1: Vector search to find seed entities
|
|
262
|
+
const queryEmbedding = await this.embeddingService.embed(query);
|
|
263
|
+
const seedQuery = `
|
|
264
|
+
?[id] :=
|
|
265
|
+
~entity:semantic{
|
|
266
|
+
id |
|
|
267
|
+
query: vec($embedding),
|
|
268
|
+
k: $seed_k,
|
|
269
|
+
ef: 100
|
|
270
|
+
}
|
|
271
|
+
`;
|
|
272
|
+
const seedResult = await this.db.run(seedQuery, {
|
|
273
|
+
embedding: queryEmbedding,
|
|
274
|
+
seed_k: seedTopK
|
|
275
|
+
});
|
|
276
|
+
if (seedResult.rows.length === 0) {
|
|
277
|
+
console.log('[QueryAwareTraversal] No seed entities found');
|
|
278
|
+
return [];
|
|
279
|
+
}
|
|
280
|
+
const seedIds = seedResult.rows.map((row) => row[0]);
|
|
281
|
+
console.log('[QueryAwareTraversal] Found seed entities:', seedIds.length);
|
|
282
|
+
// Step 2: Query-aware traversal from seeds
|
|
283
|
+
return await this.traverseFromSeeds(seedIds, query, traversalOptions);
|
|
284
|
+
}
|
|
285
|
+
catch (error) {
|
|
286
|
+
console.error('[QueryAwareTraversal] Error during hybrid search:', error);
|
|
287
|
+
throw error;
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
exports.QueryAwareTraversal = QueryAwareTraversal;
|