persyst-mcp 2.1.3 → 2.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/init.js +7 -0
- package/index.js +41 -0
- package/package.json +2 -2
- package/src/attestation.js +7 -1
- package/src/database.js +973 -877
- package/src/extractor-heuristic.js +324 -250
- package/src/git.js +7 -1
- package/src/search.js +597 -456
- package/src/server.js +72 -67
- package/src/tools.js +157 -20
- package/src/watcher.js +306 -0
package/src/search.js
CHANGED
|
@@ -1,456 +1,597 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* search.js — Hybrid Search & Context Optimization Engine
|
|
3
|
-
*
|
|
4
|
-
* Combines keyword and semantic searches, integrates temporal decay,
|
|
5
|
-
* applies agent reputation scores, generates cryptographic search attestations,
|
|
6
|
-
* builds graph-hopped optimized LLM context prompts, and applies MMR
|
|
7
|
-
* for diverse result retrieval.
|
|
8
|
-
*/
|
|
9
|
-
|
|
10
|
-
import db, {
|
|
11
|
-
searchKeyword,
|
|
12
|
-
searchVector,
|
|
13
|
-
getMemoryById,
|
|
14
|
-
boostMemory,
|
|
15
|
-
getProvenance,
|
|
16
|
-
getMemoriesByEntity
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
import {
|
|
20
|
-
import {
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
*
|
|
27
|
-
*
|
|
28
|
-
*
|
|
29
|
-
* @param {
|
|
30
|
-
* @param {
|
|
31
|
-
* @param {string|null}
|
|
32
|
-
* @
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
//
|
|
48
|
-
|
|
49
|
-
const
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
const
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
const
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
let
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
*
|
|
161
|
-
*
|
|
162
|
-
*
|
|
163
|
-
*
|
|
164
|
-
*
|
|
165
|
-
* @param {
|
|
166
|
-
* @
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
}
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
*
|
|
216
|
-
*
|
|
217
|
-
*
|
|
218
|
-
* @
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
*
|
|
237
|
-
*
|
|
238
|
-
* @param {string
|
|
239
|
-
* @param {
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
const
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
}
|
|
449
|
-
}
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
}
|
|
1
|
+
/**
|
|
2
|
+
* search.js — Hybrid Search & Context Optimization Engine
|
|
3
|
+
*
|
|
4
|
+
* Combines keyword and semantic searches, integrates temporal decay,
|
|
5
|
+
* applies agent reputation scores, generates cryptographic search attestations,
|
|
6
|
+
* builds graph-hopped optimized LLM context prompts, and applies MMR
|
|
7
|
+
* for diverse result retrieval.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import db, {
|
|
11
|
+
searchKeyword,
|
|
12
|
+
searchVector,
|
|
13
|
+
getMemoryById,
|
|
14
|
+
boostMemory,
|
|
15
|
+
getProvenance,
|
|
16
|
+
getMemoriesByEntity,
|
|
17
|
+
getAllEntities
|
|
18
|
+
} from './database.js';
|
|
19
|
+
import { generateEmbedding } from './embeddings.js';
|
|
20
|
+
import { createAttestation } from './attestation.js';
|
|
21
|
+
import { searchCache, LRUCache } from './cache.js';
|
|
22
|
+
|
|
23
|
+
let lastDataVersion = 0;
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Search memories using both keyword and semantic strategies.
|
|
27
|
+
* Results are cached in the LRU cache for repeated queries.
|
|
28
|
+
*
|
|
29
|
+
* @param {string} queryText - What to search for
|
|
30
|
+
* @param {number} limit - Max results to return (default: 5)
|
|
31
|
+
* @param {string|null} agentId - Identifying string for the querying agent
|
|
32
|
+
* @param {string|null} sessionId - Session identifier
|
|
33
|
+
* @returns {Promise<Array>} Ranked search results (with .attestation property attached)
|
|
34
|
+
*/
|
|
35
|
+
export async function searchHybrid(queryText, limit = 5, agentId = null, sessionId = null, namespace = null, skipAttestation = false) {
|
|
36
|
+
// Sync in-memory cache with external DB changes using sqlite data_version
|
|
37
|
+
try {
|
|
38
|
+
const currentDataVersion = db.pragma('data_version', { simple: true });
|
|
39
|
+
if (currentDataVersion !== lastDataVersion) {
|
|
40
|
+
searchCache.invalidate();
|
|
41
|
+
lastDataVersion = currentDataVersion;
|
|
42
|
+
}
|
|
43
|
+
} catch (_) {
|
|
44
|
+
// Fallback if pragma fails
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
// --- Check LRU cache first (Feature 1) ---
|
|
48
|
+
// Include namespace in cache key to prevent cross-namespace cache hits
|
|
49
|
+
const cacheKey = LRUCache.key(`${namespace || 'all'}:${queryText}`, limit);
|
|
50
|
+
const cached = searchCache.get(cacheKey);
|
|
51
|
+
if (cached) {
|
|
52
|
+
console.error(`[persyst-cache] Cache HIT for query: "${queryText.slice(0, 50)}..."`);
|
|
53
|
+
return cached;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
// --- Step 1: Keyword search (fast, exact matches) ---
|
|
57
|
+
const keywordHits = searchKeyword(queryText, limit * 2);
|
|
58
|
+
const keywordIds = new Set(keywordHits.map(r => r.id));
|
|
59
|
+
|
|
60
|
+
// --- Step 2: Semantic search (meaning-based) ---
|
|
61
|
+
const queryEmbedding = await generateEmbedding(queryText);
|
|
62
|
+
const vecHits = searchVector(queryEmbedding, limit * 2);
|
|
63
|
+
|
|
64
|
+
const semanticResults = vecHits.map(r => ({
|
|
65
|
+
id: r.rowid,
|
|
66
|
+
distance: r.distance,
|
|
67
|
+
// Convert L2 distance to 0-1 similarity score
|
|
68
|
+
similarity: Math.max(0, 1 - (r.distance * r.distance) / 2)
|
|
69
|
+
}));
|
|
70
|
+
|
|
71
|
+
// --- Step 3: Merge results with keyword boost ---
|
|
72
|
+
const combined = semanticResults
|
|
73
|
+
.map(r => {
|
|
74
|
+
const isKeywordMatch = keywordIds.has(r.id);
|
|
75
|
+
return {
|
|
76
|
+
id: r.id,
|
|
77
|
+
similarity: r.similarity,
|
|
78
|
+
hybrid_score: r.similarity + (isKeywordMatch ? 0.2 : 0),
|
|
79
|
+
keyword_match: isKeywordMatch
|
|
80
|
+
};
|
|
81
|
+
})
|
|
82
|
+
// Filter out low similarity semantic matches if they have no keyword match (threshold 0.30)
|
|
83
|
+
.filter(r => r.keyword_match || r.similarity >= 0.30);
|
|
84
|
+
|
|
85
|
+
// Add keyword-only hits that semantic search missed
|
|
86
|
+
const semanticIds = new Set(semanticResults.map(r => r.id));
|
|
87
|
+
for (const id of keywordIds) {
|
|
88
|
+
if (!semanticIds.has(id)) {
|
|
89
|
+
combined.push({
|
|
90
|
+
id,
|
|
91
|
+
similarity: 0,
|
|
92
|
+
hybrid_score: 0.2, // Keyword-only base score
|
|
93
|
+
keyword_match: true
|
|
94
|
+
});
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
// --- Step 4: Fetch full details, apply namespace filter, reputation adjust, sort and return top N ---
|
|
99
|
+
const finalResults = combined
|
|
100
|
+
.map(r => {
|
|
101
|
+
// Use namespace-aware getMemoryById to filter by agent namespace
|
|
102
|
+
const memory = getMemoryById(r.id, namespace);
|
|
103
|
+
if (!memory) return null; // Memory was archived, deleted, or not in namespace
|
|
104
|
+
|
|
105
|
+
// Boost memory access metrics
|
|
106
|
+
boostMemory(r.id);
|
|
107
|
+
|
|
108
|
+
// Fetch reputation stats for weighting
|
|
109
|
+
let reputationScore = 1.0;
|
|
110
|
+
let reputationWarning = false;
|
|
111
|
+
const prov = memory.provenance;
|
|
112
|
+
if (prov && prov.source_type === 'agent' && prov.source_id) {
|
|
113
|
+
const agentRow = db.prepare('SELECT reputation_score FROM agent_stats WHERE agent_id = ?').get(prov.source_id);
|
|
114
|
+
if (agentRow) {
|
|
115
|
+
reputationScore = agentRow.reputation_score;
|
|
116
|
+
if (reputationScore < 0.5) {
|
|
117
|
+
reputationWarning = true;
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
// Final score formula: base_score * agent_reputation
|
|
123
|
+
const finalScore = r.hybrid_score * reputationScore;
|
|
124
|
+
|
|
125
|
+
return {
|
|
126
|
+
id: memory.id,
|
|
127
|
+
content: memory.content,
|
|
128
|
+
importance_score: memory.importance_score,
|
|
129
|
+
created_at: memory.created_at,
|
|
130
|
+
last_accessed: memory.last_accessed,
|
|
131
|
+
similarity: r.similarity.toFixed(4),
|
|
132
|
+
hybrid_score: finalScore.toFixed(4),
|
|
133
|
+
keyword_match: r.keyword_match,
|
|
134
|
+
reputation_warning: reputationWarning,
|
|
135
|
+
provenance: prov
|
|
136
|
+
};
|
|
137
|
+
})
|
|
138
|
+
.filter(Boolean);
|
|
139
|
+
|
|
140
|
+
// Sort by final score descending
|
|
141
|
+
finalResults.sort((a, b) => parseFloat(b.hybrid_score) - parseFloat(a.hybrid_score));
|
|
142
|
+
|
|
143
|
+
// --- Step 5: Apply MMR for diverse retrieval (Feature 3) ---
|
|
144
|
+
const mmrResults = applyMMR(finalResults, limit);
|
|
145
|
+
|
|
146
|
+
// Generate cryptographic attestation for audit trails (skip if called internally)
|
|
147
|
+
let attestation = null;
|
|
148
|
+
if (!skipAttestation) {
|
|
149
|
+
attestation = createAttestation(queryText, mmrResults, agentId, sessionId);
|
|
150
|
+
mmrResults.attestation = attestation;
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
// --- Store in LRU cache (Feature 1) ---
|
|
154
|
+
searchCache.set(cacheKey, mmrResults);
|
|
155
|
+
|
|
156
|
+
return mmrResults;
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
/**
|
|
160
|
+
* Apply Maximal Marginal Relevance (MMR) re-ranking for diverse results.
|
|
161
|
+
*
|
|
162
|
+
* MMR balances relevance with diversity by penalizing candidates that
|
|
163
|
+
* are too similar to already-selected results.
|
|
164
|
+
*
|
|
165
|
+
* @param {Array} candidates - Scored search results
|
|
166
|
+
* @param {number} limit - Max results to return
|
|
167
|
+
* @param {number} lambda - Trade-off parameter (0.7 = 70% relevance, 30% diversity)
|
|
168
|
+
* @returns {Array} MMR-reranked results
|
|
169
|
+
*/
|
|
170
|
+
function applyMMR(candidates, limit, lambda = 0.7) {
|
|
171
|
+
if (candidates.length <= limit) return candidates;
|
|
172
|
+
|
|
173
|
+
const selected = [];
|
|
174
|
+
const remaining = [...candidates];
|
|
175
|
+
|
|
176
|
+
// Always pick the top-scored result first
|
|
177
|
+
selected.push(remaining.shift());
|
|
178
|
+
|
|
179
|
+
while (selected.length < limit && remaining.length > 0) {
|
|
180
|
+
let bestIdx = -1;
|
|
181
|
+
let bestMMRScore = -Infinity;
|
|
182
|
+
|
|
183
|
+
for (let i = 0; i < remaining.length; i++) {
|
|
184
|
+
const candidate = remaining[i];
|
|
185
|
+
const relevance = parseFloat(candidate.hybrid_score);
|
|
186
|
+
|
|
187
|
+
// Calculate max similarity to any already-selected result
|
|
188
|
+
// Using content-based Jaccard similarity as a proxy
|
|
189
|
+
let maxSimToSelected = 0;
|
|
190
|
+
for (const sel of selected) {
|
|
191
|
+
const sim = jaccardSimilarity(candidate.content, sel.content);
|
|
192
|
+
if (sim > maxSimToSelected) maxSimToSelected = sim;
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
// MMR score = λ * relevance - (1 - λ) * max_similarity_to_selected
|
|
196
|
+
const mmrScore = lambda * relevance - (1 - lambda) * maxSimToSelected;
|
|
197
|
+
|
|
198
|
+
if (mmrScore > bestMMRScore) {
|
|
199
|
+
bestMMRScore = mmrScore;
|
|
200
|
+
bestIdx = i;
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
if (bestIdx >= 0) {
|
|
205
|
+
selected.push(remaining.splice(bestIdx, 1)[0]);
|
|
206
|
+
} else {
|
|
207
|
+
break;
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
return selected;
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
/**
|
|
215
|
+
* Compute Jaccard similarity between two text strings.
|
|
216
|
+
* Uses word-level tokenization for efficiency.
|
|
217
|
+
*
|
|
218
|
+
* @param {string} a - First text
|
|
219
|
+
* @param {string} b - Second text
|
|
220
|
+
* @returns {number} Similarity score between 0 and 1
|
|
221
|
+
*/
|
|
222
|
+
function jaccardSimilarity(a, b) {
|
|
223
|
+
const wordsA = new Set(a.toLowerCase().split(/\s+/));
|
|
224
|
+
const wordsB = new Set(b.toLowerCase().split(/\s+/));
|
|
225
|
+
|
|
226
|
+
let intersection = 0;
|
|
227
|
+
for (const word of wordsA) {
|
|
228
|
+
if (wordsB.has(word)) intersection++;
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
const union = wordsA.size + wordsB.size - intersection;
|
|
232
|
+
return union === 0 ? 0 : intersection / union;
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
/**
|
|
236
|
+
* Optimizes the retrieved context by walking the knowledge graph and compressing content to fit max_tokens.
|
|
237
|
+
*
|
|
238
|
+
* @param {string} queryText - User's query
|
|
239
|
+
* @param {number} maxTokens - Hard limit of tokens for context prompt
|
|
240
|
+
* @param {string|null} agentId - Querying agent identifier
|
|
241
|
+
* @param {string|null} sessionId - Current session ID
|
|
242
|
+
*/
|
|
243
|
+
export async function getOptimizedContext(queryText, maxTokens, agentId = null, sessionId = null, namespace = null) {
|
|
244
|
+
// Extract entities mentioned in the query text to seed the graph search directly
|
|
245
|
+
const entities = getAllEntities(100);
|
|
246
|
+
const matchedEntityIds = new Set();
|
|
247
|
+
for (const ent of entities) {
|
|
248
|
+
const entNameLower = ent.name.toLowerCase();
|
|
249
|
+
if (queryText.toLowerCase().includes(entNameLower)) {
|
|
250
|
+
matchedEntityIds.add(ent.id);
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
// 1. Run hybrid search to fetch top 5 memories as seeds (skip attestation to avoid double-write)
|
|
255
|
+
const searchHits = await searchHybrid(queryText, 5, agentId, sessionId, namespace, true);
|
|
256
|
+
const candidates = new Map();
|
|
257
|
+
|
|
258
|
+
for (const hit of searchHits) {
|
|
259
|
+
candidates.set(hit.id, {
|
|
260
|
+
id: hit.id,
|
|
261
|
+
content: hit.content,
|
|
262
|
+
importance_score: hit.importance_score,
|
|
263
|
+
created_at: hit.created_at,
|
|
264
|
+
last_accessed: hit.last_accessed,
|
|
265
|
+
score: parseFloat(hit.hybrid_score),
|
|
266
|
+
provenance: hit.provenance,
|
|
267
|
+
source: 'search'
|
|
268
|
+
});
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
// 2. Perform Graph Hop (multi-hop traversal) globally
|
|
272
|
+
const hopQueue = [];
|
|
273
|
+
const visitedNodes = new Set(); // Stores "type:id" keys
|
|
274
|
+
|
|
275
|
+
// Seed with matched entities from query text
|
|
276
|
+
for (const entId of matchedEntityIds) {
|
|
277
|
+
const key = `entity:${entId}`;
|
|
278
|
+
if (!visitedNodes.has(key)) {
|
|
279
|
+
visitedNodes.add(key);
|
|
280
|
+
hopQueue.push({ id: entId, type: 'entity', depth: 0 });
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
// Seed with search hit memories
|
|
285
|
+
for (const hit of searchHits) {
|
|
286
|
+
const key = `memory:${hit.id}`;
|
|
287
|
+
if (!visitedNodes.has(key)) {
|
|
288
|
+
visitedNodes.add(key);
|
|
289
|
+
hopQueue.push({ id: hit.id, type: 'memory', depth: 0 });
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
// BFS to traverse memories and entities uniformly up to depth 4
|
|
294
|
+
while (hopQueue.length > 0) {
|
|
295
|
+
const { id, type, depth } = hopQueue.shift();
|
|
296
|
+
if (depth >= 4) continue;
|
|
297
|
+
|
|
298
|
+
const connectedEdges = db.prepare(`
|
|
299
|
+
SELECT * FROM edges
|
|
300
|
+
WHERE (source_id = ? AND source_type = ?)
|
|
301
|
+
OR (target_id = ? AND target_type = ?)
|
|
302
|
+
`).all(id, type, id, type);
|
|
303
|
+
|
|
304
|
+
for (const edge of connectedEdges) {
|
|
305
|
+
let nextId, nextType;
|
|
306
|
+
if (edge.source_id === id && edge.source_type === type) {
|
|
307
|
+
nextId = edge.target_id;
|
|
308
|
+
nextType = edge.target_type;
|
|
309
|
+
} else {
|
|
310
|
+
nextId = edge.source_id;
|
|
311
|
+
nextType = edge.source_type;
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
const key = `${nextType}:${nextId}`;
|
|
315
|
+
if (!visitedNodes.has(key)) {
|
|
316
|
+
visitedNodes.add(key);
|
|
317
|
+
hopQueue.push({ id: nextId, type: nextType, depth: depth + 1 });
|
|
318
|
+
}
|
|
319
|
+
}
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
// Now collect all hopped memories from the visited nodes
|
|
323
|
+
for (const key of visitedNodes) {
|
|
324
|
+
const [type, idStr] = key.split(':');
|
|
325
|
+
if (type === 'memory') {
|
|
326
|
+
const memId = Number(idStr);
|
|
327
|
+
if (candidates.has(memId)) continue; // Keep search hit info
|
|
328
|
+
|
|
329
|
+
// Check namespace filter if present
|
|
330
|
+
const other = getMemoryById(memId, namespace);
|
|
331
|
+
if (!other) continue;
|
|
332
|
+
|
|
333
|
+
let baseScore = 0.4;
|
|
334
|
+
if (searchHits.length > 0) {
|
|
335
|
+
const maxSearchScore = Math.max(...searchHits.map(h => parseFloat(h.hybrid_score)));
|
|
336
|
+
baseScore = maxSearchScore * 0.5;
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
const otherProv = getProvenance(memId);
|
|
340
|
+
candidates.set(memId, {
|
|
341
|
+
id: other.id,
|
|
342
|
+
content: other.content,
|
|
343
|
+
importance_score: other.importance_score,
|
|
344
|
+
created_at: other.created_at,
|
|
345
|
+
last_accessed: other.last_accessed,
|
|
346
|
+
score: baseScore,
|
|
347
|
+
provenance: otherProv,
|
|
348
|
+
source: 'hop'
|
|
349
|
+
});
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
// 3. Apply Scoring Adjustments
|
|
354
|
+
const now = Math.floor(Date.now() / 1000);
|
|
355
|
+
const list = Array.from(candidates.values());
|
|
356
|
+
|
|
357
|
+
for (const c of list) {
|
|
358
|
+
// 3a. Temporal decay: score *= exp(-0.01 * hours_since_accessed)
|
|
359
|
+
const hours = Math.max(0, (now - c.last_accessed) / 3600);
|
|
360
|
+
c.score *= Math.exp(-0.01 * hours);
|
|
361
|
+
|
|
362
|
+
// 3b. Agent reputation weighting
|
|
363
|
+
let reputationScore = 1.0;
|
|
364
|
+
if (c.provenance && c.provenance.source_type === 'agent' && c.provenance.source_id) {
|
|
365
|
+
const agentRow = db.prepare('SELECT reputation_score FROM agent_stats WHERE agent_id = ?').get(c.provenance.source_id);
|
|
366
|
+
if (agentRow) {
|
|
367
|
+
reputationScore = agentRow.reputation_score;
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
c.score *= reputationScore;
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
// 4. Sort candidates
|
|
374
|
+
list.sort((a, b) => b.score - a.score);
|
|
375
|
+
|
|
376
|
+
// 5. Compress context to fit maxTokens
|
|
377
|
+
let currentTokens = 0;
|
|
378
|
+
const accepted = [];
|
|
379
|
+
|
|
380
|
+
for (const c of list) {
|
|
381
|
+
// Heuristic: ~4 characters per token + format headers (~15 tokens)
|
|
382
|
+
const estimatedTokens = Math.max(1, Math.ceil(c.content.length / 4) + 15);
|
|
383
|
+
if (currentTokens + estimatedTokens > maxTokens) {
|
|
384
|
+
continue;
|
|
385
|
+
}
|
|
386
|
+
currentTokens += estimatedTokens;
|
|
387
|
+
accepted.push(c);
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
// 6. Format LLM injection context string
|
|
391
|
+
let context = '=== RETRIEVED AGENT MEMORY CONTEXT ===\n';
|
|
392
|
+
if (accepted.length === 0) {
|
|
393
|
+
context += 'No relevant memories retrieved.\n';
|
|
394
|
+
} else {
|
|
395
|
+
for (const a of accepted) {
|
|
396
|
+
let sourceTag = 'Source: manual';
|
|
397
|
+
if (a.provenance) {
|
|
398
|
+
sourceTag = `Source: ${a.provenance.source_type}${a.provenance.source_id ? ` (${a.provenance.source_id})` : ''}`;
|
|
399
|
+
}
|
|
400
|
+
context += `[Memory #${a.id}] (Score: ${a.score.toFixed(4)}, ${sourceTag})\n${a.content}\n---\n`;
|
|
401
|
+
}
|
|
402
|
+
}
|
|
403
|
+
context += '=== END OF CONTEXT ===';
|
|
404
|
+
|
|
405
|
+
// Bug 8 fix: Skip attestation when no results to avoid audit noise
|
|
406
|
+
let attestation = null;
|
|
407
|
+
if (accepted.length > 0) {
|
|
408
|
+
attestation = createAttestation(queryText, accepted, agentId, sessionId);
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
return {
|
|
412
|
+
context,
|
|
413
|
+
memories: accepted,
|
|
414
|
+
attestation
|
|
415
|
+
};
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
/**
|
|
419
|
+
* Analyze relationship between two similar memories based on token sets.
|
|
420
|
+
* @param {string} a - Content of memory A
|
|
421
|
+
* @param {string} b - Content of memory B
|
|
422
|
+
* @returns {{ type: 'duplicate'|'subset'|'contradiction'|'different', keep?: 'a'|'b'|'canonical' }}
|
|
423
|
+
*/
|
|
424
|
+
function checkRelationship(a, b) {
|
|
425
|
+
const getWords = (text) => new Set(text.toLowerCase().split(/\s+/).map(w => w.replace(/[.,\/#!$%\^&\*;:{}=\-_`~()]/g, "")).filter(Boolean));
|
|
426
|
+
const wordsA = getWords(a);
|
|
427
|
+
const wordsB = getWords(b);
|
|
428
|
+
|
|
429
|
+
if (wordsA.size === 0 || wordsB.size === 0) return { type: 'duplicate', keep: 'a' };
|
|
430
|
+
|
|
431
|
+
let intersection = 0;
|
|
432
|
+
for (const w of wordsA) {
|
|
433
|
+
if (wordsB.has(w)) intersection++;
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
const overlapA = intersection / wordsA.size;
|
|
437
|
+
const overlapB = intersection / wordsB.size;
|
|
438
|
+
|
|
439
|
+
const union = wordsA.size + wordsB.size - intersection;
|
|
440
|
+
const jaccard = 1 - (intersection / union);
|
|
441
|
+
|
|
442
|
+
if (jaccard === 0) {
|
|
443
|
+
return { type: 'duplicate', keep: 'a' };
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
// Contradiction: similar topic, differing key terms
|
|
447
|
+
if (jaccard > 0.15 && jaccard < 0.5) {
|
|
448
|
+
return { type: 'contradiction' };
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
// Subset check
|
|
452
|
+
if (overlapA > 0.85 && wordsB.size > wordsA.size) {
|
|
453
|
+
return { type: 'subset', keep: 'b' };
|
|
454
|
+
}
|
|
455
|
+
if (overlapB > 0.85 && wordsA.size > wordsB.size) {
|
|
456
|
+
return { type: 'subset', keep: 'a' };
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
// Duplicate
|
|
460
|
+
if (jaccard < 0.25) {
|
|
461
|
+
return { type: 'duplicate', keep: 'canonical' };
|
|
462
|
+
}
|
|
463
|
+
|
|
464
|
+
return { type: 'different' };
|
|
465
|
+
}
|
|
466
|
+
|
|
467
|
+
/**
|
|
468
|
+
* Performs memory consolidation by merging highly similar memories.
|
|
469
|
+
* Bug 6 fix: DB mutations are wrapped in a transaction for atomicity.
|
|
470
|
+
*/
|
|
471
|
+
export async function consolidateMemories(namespace = null) {
|
|
472
|
+
const query = namespace
|
|
473
|
+
? "SELECT * FROM memories WHERE valid_until IS NULL AND (namespace = ? OR namespace = 'shared')"
|
|
474
|
+
: 'SELECT * FROM memories WHERE valid_until IS NULL';
|
|
475
|
+
const activeMemories = namespace
|
|
476
|
+
? db.prepare(query).all(namespace)
|
|
477
|
+
: db.prepare(query).all();
|
|
478
|
+
const consolidated = [];
|
|
479
|
+
const visited = new Set();
|
|
480
|
+
|
|
481
|
+
for (const mem of activeMemories) {
|
|
482
|
+
if (visited.has(mem.id)) continue;
|
|
483
|
+
|
|
484
|
+
// Search for similar memories
|
|
485
|
+
const embedding = db.prepare('SELECT embedding FROM memories_vec WHERE rowid = ?').get(mem.id);
|
|
486
|
+
if (!embedding) continue;
|
|
487
|
+
|
|
488
|
+
const hits = db.prepare(`
|
|
489
|
+
SELECT rowid AS id, distance
|
|
490
|
+
FROM memories_vec
|
|
491
|
+
WHERE embedding MATCH ?
|
|
492
|
+
AND k = 10
|
|
493
|
+
`).all(embedding.embedding);
|
|
494
|
+
|
|
495
|
+
const group = [];
|
|
496
|
+
for (const hit of hits) {
|
|
497
|
+
if (visited.has(Number(hit.id))) continue;
|
|
498
|
+
const sim = Math.max(0, 1 - (hit.distance * hit.distance) / 2);
|
|
499
|
+
if (sim > 0.80) {
|
|
500
|
+
const other = db.prepare('SELECT * FROM memories WHERE id = ? AND valid_until IS NULL').get(Number(hit.id));
|
|
501
|
+
if (other) {
|
|
502
|
+
group.push(other);
|
|
503
|
+
}
|
|
504
|
+
}
|
|
505
|
+
}
|
|
506
|
+
|
|
507
|
+
if (group.length > 1) {
|
|
508
|
+
// Sort group by trust score (confidence * reputation) desc, then importance_score desc, then id desc
|
|
509
|
+
const getTrust = (m) => {
|
|
510
|
+
const prov = getProvenance(m.id);
|
|
511
|
+
let reputation = 1.0;
|
|
512
|
+
if (prov && prov.source_type === 'agent' && prov.source_id) {
|
|
513
|
+
const agentRow = db.prepare('SELECT reputation_score FROM agent_stats WHERE agent_id = ?').get(prov.source_id);
|
|
514
|
+
if (agentRow) reputation = agentRow.reputation_score;
|
|
515
|
+
}
|
|
516
|
+
return (prov ? prov.confidence : 1.0) * reputation;
|
|
517
|
+
};
|
|
518
|
+
|
|
519
|
+
const groupWithTrust = group.map(m => ({ ...m, trust: getTrust(m) }));
|
|
520
|
+
groupWithTrust.sort((a, b) => b.trust - a.trust || b.importance_score - a.importance_score || a.id - b.id);
|
|
521
|
+
|
|
522
|
+
// Resolve the group sequentially
|
|
523
|
+
let canonical = groupWithTrust[0];
|
|
524
|
+
const archivedIds = [];
|
|
525
|
+
visited.add(canonical.id);
|
|
526
|
+
|
|
527
|
+
for (let i = 1; i < groupWithTrust.length; i++) {
|
|
528
|
+
const current = groupWithTrust[i];
|
|
529
|
+
const rel = checkRelationship(canonical.content, current.content);
|
|
530
|
+
|
|
531
|
+
if (rel.type === 'contradiction') {
|
|
532
|
+
// Resolve contradiction: keep canonical, archive current
|
|
533
|
+
db.prepare('UPDATE memories SET valid_until = unixepoch() WHERE id = ?').run(current.id);
|
|
534
|
+
db.prepare('INSERT INTO contradictions (old_memory_id, new_memory_id, resolution_reason) VALUES (?, ?, ?)')
|
|
535
|
+
.run(current.id, canonical.id, `Consolidated contradiction: resolved in favor of canonical #${canonical.id}`);
|
|
536
|
+
|
|
537
|
+
// Apply reputation changes since it's a cross-agent contradiction
|
|
538
|
+
const oldProv = getProvenance(current.id);
|
|
539
|
+
const newProv = getProvenance(canonical.id);
|
|
540
|
+
if (oldProv && oldProv.source_type === 'agent' && oldProv.source_id) {
|
|
541
|
+
const isSelf = newProv && newProv.source_type === 'agent' && newProv.source_id === oldProv.source_id;
|
|
542
|
+
if (!isSelf) {
|
|
543
|
+
db.prepare('UPDATE agent_stats SET memories_contradicted = memories_contradicted + 1 WHERE agent_id = ?').run(oldProv.source_id);
|
|
544
|
+
db.prepare('UPDATE agent_stats SET reputation_score = (memories_confirmed + 1.0) / (memories_contradicted + 1.0) WHERE agent_id = ?').run(oldProv.source_id);
|
|
545
|
+
if (newProv && newProv.source_type === 'agent') {
|
|
546
|
+
db.prepare('UPDATE agent_stats SET memories_confirmed = memories_confirmed + 1 WHERE agent_id = ?').run(newProv.source_id);
|
|
547
|
+
db.prepare('UPDATE agent_stats SET reputation_score = (memories_confirmed + 1.0) / (memories_contradicted + 1.0) WHERE agent_id = ?').run(newProv.source_id);
|
|
548
|
+
}
|
|
549
|
+
}
|
|
550
|
+
}
|
|
551
|
+
|
|
552
|
+
archivedIds.push(current.id);
|
|
553
|
+
visited.add(current.id);
|
|
554
|
+
} else if (rel.type === 'subset') {
|
|
555
|
+
if (rel.keep === 'b') {
|
|
556
|
+
// current (B) is a superset of canonical (A). Swap them
|
|
557
|
+
db.prepare('UPDATE memories SET valid_until = unixepoch() WHERE id = ?').run(canonical.id);
|
|
558
|
+
db.prepare('INSERT INTO contradictions (old_memory_id, new_memory_id, resolution_reason) VALUES (?, ?, ?)')
|
|
559
|
+
.run(canonical.id, current.id, `Consolidated subset: replaced by more detailed #${current.id}`);
|
|
560
|
+
|
|
561
|
+
archivedIds.push(canonical.id);
|
|
562
|
+
canonical = current;
|
|
563
|
+
} else {
|
|
564
|
+
// canonical is superset
|
|
565
|
+
db.prepare('UPDATE memories SET valid_until = unixepoch() WHERE id = ?').run(current.id);
|
|
566
|
+
db.prepare('INSERT INTO contradictions (old_memory_id, new_memory_id, resolution_reason) VALUES (?, ?, ?)')
|
|
567
|
+
.run(current.id, canonical.id, `Consolidated subset: subsumed by more detailed #${canonical.id}`);
|
|
568
|
+
|
|
569
|
+
archivedIds.push(current.id);
|
|
570
|
+
}
|
|
571
|
+
visited.add(current.id);
|
|
572
|
+
} else if (rel.type === 'duplicate') {
|
|
573
|
+
db.prepare('UPDATE memories SET valid_until = unixepoch() WHERE id = ?').run(current.id);
|
|
574
|
+
db.prepare('INSERT INTO contradictions (old_memory_id, new_memory_id, resolution_reason) VALUES (?, ?, ?)')
|
|
575
|
+
.run(current.id, canonical.id, `Consolidated duplicate of #${canonical.id}`);
|
|
576
|
+
|
|
577
|
+
archivedIds.push(current.id);
|
|
578
|
+
visited.add(current.id);
|
|
579
|
+
}
|
|
580
|
+
}
|
|
581
|
+
|
|
582
|
+
if (archivedIds.length > 0) {
|
|
583
|
+
consolidated.push({
|
|
584
|
+
canonical_id: canonical.id,
|
|
585
|
+
merged_content: canonical.content,
|
|
586
|
+
archived_ids: archivedIds
|
|
587
|
+
});
|
|
588
|
+
}
|
|
589
|
+
}
|
|
590
|
+
}
|
|
591
|
+
|
|
592
|
+
return {
|
|
593
|
+
success: true,
|
|
594
|
+
consolidated_groups: consolidated.length,
|
|
595
|
+
details: consolidated
|
|
596
|
+
};
|
|
597
|
+
}
|