persyst-mcp 2.2.5 → 2.2.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +103 -114
- package/bin/export.js +4 -4
- package/bin/extract.js +8 -8
- package/bin/import.js +15 -15
- package/bin/init.js +185 -38
- package/bin/mcp.js +3 -0
- package/bin/monitor.js +511 -0
- package/bin/setup.js +9 -9
- package/index.js +31 -11
- package/package.json +10 -11
- package/src/attestation.js +49 -28
- package/src/cache.js +3 -1
- package/src/database.js +227 -34
- package/src/embeddings.js +4 -2
- package/src/events.js +2 -0
- package/src/extractor-heuristic.js +5 -2
- package/src/sdk.js +4 -3
- package/src/search.js +55 -84
- package/src/server.js +884 -723
- package/src/setup-wasm.js +34 -39
- package/src/text-utils.js +52 -0
- package/src/tools.js +98 -53
- package/src/watcher.js +157 -49
package/src/search.js
CHANGED
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
*/
|
|
9
9
|
|
|
10
10
|
import db, {
|
|
11
|
+
stmts,
|
|
11
12
|
searchKeyword,
|
|
12
13
|
searchVector,
|
|
13
14
|
getMemoryById,
|
|
@@ -19,6 +20,7 @@ import db, {
|
|
|
19
20
|
import { generateEmbedding } from './embeddings.js';
|
|
20
21
|
import { createAttestation } from './attestation.js';
|
|
21
22
|
import { searchCache, LRUCache } from './cache.js';
|
|
23
|
+
import { jaccardSimilarity, logInfo } from './text-utils.js';
|
|
22
24
|
|
|
23
25
|
let lastDataVersion = 0;
|
|
24
26
|
|
|
@@ -55,7 +57,7 @@ export async function searchHybrid(queryText, limit = 5, agentId = null, session
|
|
|
55
57
|
const cacheKey = LRUCache.key(`${ns}:${queryText}`, parsedLimit);
|
|
56
58
|
const cached = searchCache.get(cacheKey);
|
|
57
59
|
if (cached) {
|
|
58
|
-
|
|
60
|
+
logInfo(`[persyst-cache] Cache HIT for query: "${queryText.slice(0, 50)}..."`);
|
|
59
61
|
return cached;
|
|
60
62
|
}
|
|
61
63
|
|
|
@@ -68,7 +70,7 @@ export async function searchHybrid(queryText, limit = 5, agentId = null, session
|
|
|
68
70
|
const vecHits = searchVector(queryEmbedding, parsedLimit * 2);
|
|
69
71
|
|
|
70
72
|
const semanticResults = vecHits.map(r => ({
|
|
71
|
-
id: r.rowid,
|
|
73
|
+
id: Number(r.rowid),
|
|
72
74
|
distance: r.distance,
|
|
73
75
|
// Convert L2 distance to 0-1 similarity score
|
|
74
76
|
similarity: Math.max(0, 1 - (r.distance * r.distance) / 2)
|
|
@@ -116,7 +118,7 @@ export async function searchHybrid(queryText, limit = 5, agentId = null, session
|
|
|
116
118
|
let reputationWarning = false;
|
|
117
119
|
const prov = memory.provenance;
|
|
118
120
|
if (prov && prov.source_type === 'agent' && prov.source_id) {
|
|
119
|
-
const agentRow =
|
|
121
|
+
const agentRow = stmts.getReputationScore.get(prov.source_id);
|
|
120
122
|
if (agentRow) {
|
|
121
123
|
reputationScore = agentRow.reputation_score;
|
|
122
124
|
if (reputationScore < 0.5) {
|
|
@@ -134,8 +136,8 @@ export async function searchHybrid(queryText, limit = 5, agentId = null, session
|
|
|
134
136
|
importance_score: memory.importance_score,
|
|
135
137
|
created_at: memory.created_at,
|
|
136
138
|
last_accessed: memory.last_accessed,
|
|
137
|
-
similarity: r.similarity
|
|
138
|
-
hybrid_score:
|
|
139
|
+
similarity: Math.round(r.similarity * 10000) / 10000,
|
|
140
|
+
hybrid_score: Math.round(finalScore * 10000) / 10000,
|
|
139
141
|
keyword_match: r.keyword_match,
|
|
140
142
|
reputation_warning: reputationWarning,
|
|
141
143
|
provenance: prov
|
|
@@ -217,27 +219,6 @@ function applyMMR(candidates, limit, lambda = 0.7) {
|
|
|
217
219
|
return selected;
|
|
218
220
|
}
|
|
219
221
|
|
|
220
|
-
/**
|
|
221
|
-
* Compute Jaccard similarity between two text strings.
|
|
222
|
-
* Uses word-level tokenization for efficiency.
|
|
223
|
-
*
|
|
224
|
-
* @param {string} a - First text
|
|
225
|
-
* @param {string} b - Second text
|
|
226
|
-
* @returns {number} Similarity score between 0 and 1
|
|
227
|
-
*/
|
|
228
|
-
function jaccardSimilarity(a, b) {
|
|
229
|
-
const wordsA = new Set(a.toLowerCase().split(/\s+/));
|
|
230
|
-
const wordsB = new Set(b.toLowerCase().split(/\s+/));
|
|
231
|
-
|
|
232
|
-
let intersection = 0;
|
|
233
|
-
for (const word of wordsA) {
|
|
234
|
-
if (wordsB.has(word)) intersection++;
|
|
235
|
-
}
|
|
236
|
-
|
|
237
|
-
const union = wordsA.size + wordsB.size - intersection;
|
|
238
|
-
return union === 0 ? 0 : intersection / union;
|
|
239
|
-
}
|
|
240
|
-
|
|
241
222
|
/**
|
|
242
223
|
* Optimizes the retrieved context by walking the knowledge graph and compressing content to fit max_tokens.
|
|
243
224
|
*
|
|
@@ -247,6 +228,13 @@ function jaccardSimilarity(a, b) {
|
|
|
247
228
|
* @param {string|null} sessionId - Current session ID
|
|
248
229
|
*/
|
|
249
230
|
export async function getOptimizedContext(queryText, maxTokens, agentId = null, sessionId = null, namespace = null, intentParam = null) {
|
|
231
|
+
// Classify intent and urgency early to adjust token budget dynamically
|
|
232
|
+
const { intent, urgency } = classifyIntentAndUrgency(queryText, intentParam);
|
|
233
|
+
let targetMaxTokens = maxTokens;
|
|
234
|
+
if (intent === 'general' || intent === 'testing') {
|
|
235
|
+
targetMaxTokens = Math.min(maxTokens, 1500);
|
|
236
|
+
}
|
|
237
|
+
|
|
250
238
|
// Extract entities mentioned in the query text to seed the graph search directly
|
|
251
239
|
const entities = getAllEntities(100);
|
|
252
240
|
const matchedEntityIds = new Set();
|
|
@@ -302,11 +290,7 @@ export async function getOptimizedContext(queryText, maxTokens, agentId = null,
|
|
|
302
290
|
if (depth >= 6) continue;
|
|
303
291
|
|
|
304
292
|
// --- 2a. Explicit Graph Edges (from edges table) ---
|
|
305
|
-
const connectedEdges =
|
|
306
|
-
SELECT * FROM edges
|
|
307
|
-
WHERE (source_id = ? AND source_type = ?)
|
|
308
|
-
OR (target_id = ? AND target_type = ?)
|
|
309
|
-
`).all(id, type, id, type);
|
|
293
|
+
const connectedEdges = stmts.getEdgesBySourceAndType.all(id, type, id, type);
|
|
310
294
|
|
|
311
295
|
for (const edge of connectedEdges) {
|
|
312
296
|
let nextId, nextType;
|
|
@@ -327,7 +311,7 @@ export async function getOptimizedContext(queryText, maxTokens, agentId = null,
|
|
|
327
311
|
|
|
328
312
|
// --- 2b. Implicit Name-Based Edges (for robustness when explicit edges are missing) ---
|
|
329
313
|
if (type === 'memory') {
|
|
330
|
-
const memoryRow =
|
|
314
|
+
const memoryRow = stmts.getMemoryContentById.get(id);
|
|
331
315
|
if (memoryRow && memoryRow.content) {
|
|
332
316
|
const contentLower = memoryRow.content.toLowerCase();
|
|
333
317
|
for (const ent of entities) {
|
|
@@ -343,7 +327,7 @@ export async function getOptimizedContext(queryText, maxTokens, agentId = null,
|
|
|
343
327
|
} else if (type === 'entity') {
|
|
344
328
|
const ent = entities.find(e => e.id === id);
|
|
345
329
|
if (ent && ent.name) {
|
|
346
|
-
const matchingMemories =
|
|
330
|
+
const matchingMemories = stmts.getMemoryLikeContent.all(`%${ent.name}%`);
|
|
347
331
|
for (const row of matchingMemories) {
|
|
348
332
|
const nextKey = `memory:${row.id}`;
|
|
349
333
|
if (!visitedNodes.has(nextKey)) {
|
|
@@ -409,22 +393,31 @@ export async function getOptimizedContext(queryText, maxTokens, agentId = null,
|
|
|
409
393
|
// 4. Sort candidates
|
|
410
394
|
list.sort((a, b) => b.score - a.score);
|
|
411
395
|
|
|
412
|
-
// 5. Compress context to fit maxTokens
|
|
396
|
+
// 5. Compress context to fit maxTokens with on-the-fly diversity check
|
|
413
397
|
let currentTokens = 0;
|
|
414
398
|
const accepted = [];
|
|
415
399
|
|
|
416
400
|
for (const c of list) {
|
|
417
|
-
//
|
|
418
|
-
|
|
419
|
-
|
|
401
|
+
// Skip if too similar to any already accepted memory to prevent redundant context bloat
|
|
402
|
+
let isRedundant = false;
|
|
403
|
+
for (const acc of accepted) {
|
|
404
|
+
const sim = jaccardSimilarity(c.content, acc.content);
|
|
405
|
+
if (sim > 0.60) {
|
|
406
|
+
isRedundant = true;
|
|
407
|
+
break;
|
|
408
|
+
}
|
|
409
|
+
}
|
|
410
|
+
if (isRedundant) continue;
|
|
411
|
+
|
|
412
|
+
// Heuristic: ~4 characters per token + format headers (~3 tokens for compact format)
|
|
413
|
+
const estimatedTokens = Math.max(1, Math.ceil(c.content.length / 4) + 3);
|
|
414
|
+
if (currentTokens + estimatedTokens > targetMaxTokens) {
|
|
420
415
|
continue;
|
|
421
416
|
}
|
|
422
417
|
currentTokens += estimatedTokens;
|
|
423
418
|
accepted.push(c);
|
|
424
419
|
}
|
|
425
420
|
|
|
426
|
-
// Classify intent and urgency based on query text and parameters
|
|
427
|
-
const { intent, urgency } = classifyIntentAndUrgency(queryText, intentParam);
|
|
428
421
|
const suggested_actions = generateSuggestedActions(accepted, intent, urgency);
|
|
429
422
|
|
|
430
423
|
// 6. Format LLM injection context string
|
|
@@ -444,11 +437,7 @@ export async function getOptimizedContext(queryText, maxTokens, agentId = null,
|
|
|
444
437
|
context += 'No relevant memories retrieved.\n';
|
|
445
438
|
} else {
|
|
446
439
|
for (const a of accepted) {
|
|
447
|
-
|
|
448
|
-
if (a.provenance) {
|
|
449
|
-
sourceTag = `Source: ${a.provenance.source_type}${a.provenance.source_id ? ` (${a.provenance.source_id})` : ''}`;
|
|
450
|
-
}
|
|
451
|
-
context += `[Memory #${a.id}] (Score: ${a.score.toFixed(4)}, ${sourceTag})\n${a.content}\n---\n`;
|
|
440
|
+
context += `#${a.id}: ${a.content}\n`;
|
|
452
441
|
}
|
|
453
442
|
}
|
|
454
443
|
context += '=== END OF CONTEXT ===';
|
|
@@ -532,26 +521,22 @@ export async function consolidateMemories(namespace = null) {
|
|
|
532
521
|
const consolidated = [];
|
|
533
522
|
const visited = new Set();
|
|
534
523
|
|
|
535
|
-
|
|
536
|
-
|
|
524
|
+
// Wrap all mutations in a transaction so a partial failure rolls back.
|
|
525
|
+
const consolidateOne = db.transaction((mem) => {
|
|
526
|
+
if (visited.has(mem.id)) return;
|
|
537
527
|
|
|
538
528
|
// Search for similar memories
|
|
539
|
-
const embedding =
|
|
540
|
-
if (!embedding)
|
|
529
|
+
const embedding = stmts.getVecByRowId.get(mem.id);
|
|
530
|
+
if (!embedding) return;
|
|
541
531
|
|
|
542
|
-
const hits =
|
|
543
|
-
SELECT rowid AS id, distance
|
|
544
|
-
FROM memories_vec
|
|
545
|
-
WHERE embedding MATCH ?
|
|
546
|
-
AND k = 30
|
|
547
|
-
`).all(embedding.embedding);
|
|
532
|
+
const hits = stmts.consolidateVecSearch.all(embedding.embedding);
|
|
548
533
|
|
|
549
534
|
const group = [];
|
|
550
535
|
for (const hit of hits) {
|
|
551
536
|
if (visited.has(Number(hit.id))) continue;
|
|
552
537
|
const sim = Math.max(0, 1 - (hit.distance * hit.distance) / 2);
|
|
553
538
|
if (sim > 0.80) {
|
|
554
|
-
const other =
|
|
539
|
+
const other = stmts.getMemoryByIdRaw.get(Number(hit.id));
|
|
555
540
|
if (other) {
|
|
556
541
|
group.push(other);
|
|
557
542
|
}
|
|
@@ -564,7 +549,7 @@ export async function consolidateMemories(namespace = null) {
|
|
|
564
549
|
const prov = getProvenance(m.id);
|
|
565
550
|
let reputation = 1.0;
|
|
566
551
|
if (prov && prov.source_type === 'agent' && prov.source_id) {
|
|
567
|
-
const agentRow =
|
|
552
|
+
const agentRow = stmts.getReputationScore.get(prov.source_id);
|
|
568
553
|
if (agentRow) reputation = agentRow.reputation_score;
|
|
569
554
|
}
|
|
570
555
|
return (prov ? prov.confidence : 1.0) * reputation;
|
|
@@ -583,50 +568,32 @@ export async function consolidateMemories(namespace = null) {
|
|
|
583
568
|
const rel = checkRelationship(canonical.content, current.content);
|
|
584
569
|
|
|
585
570
|
if (rel.type === 'contradiction') {
|
|
586
|
-
// Resolve contradiction: keep canonical, archive current
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
// Apply reputation changes since it's a cross-agent contradiction
|
|
592
|
-
const oldProv = getProvenance(current.id);
|
|
593
|
-
const newProv = getProvenance(canonical.id);
|
|
594
|
-
if (oldProv && oldProv.source_type === 'agent' && oldProv.source_id) {
|
|
595
|
-
const isSelf = newProv && newProv.source_type === 'agent' && newProv.source_id === oldProv.source_id;
|
|
596
|
-
if (!isSelf) {
|
|
597
|
-
db.prepare('UPDATE agent_stats SET memories_contradicted = memories_contradicted + 1 WHERE agent_id = ?').run(oldProv.source_id);
|
|
598
|
-
db.prepare('UPDATE agent_stats SET reputation_score = (memories_confirmed + 1.0) / (memories_contradicted + 1.0) WHERE agent_id = ?').run(oldProv.source_id);
|
|
599
|
-
if (newProv && newProv.source_type === 'agent') {
|
|
600
|
-
db.prepare('UPDATE agent_stats SET memories_confirmed = memories_confirmed + 1 WHERE agent_id = ?').run(newProv.source_id);
|
|
601
|
-
db.prepare('UPDATE agent_stats SET reputation_score = (memories_confirmed + 1.0) / (memories_contradicted + 1.0) WHERE agent_id = ?').run(newProv.source_id);
|
|
602
|
-
}
|
|
603
|
-
}
|
|
604
|
-
}
|
|
571
|
+
// Resolve contradiction: keep canonical, archive current.
|
|
572
|
+
// logContradiction already updates agent stats, so we only record the archive here.
|
|
573
|
+
stmts.archiveMemoryById.run(current.id);
|
|
574
|
+
stmts.insertContradiction.run(current.id, canonical.id, `Consolidated contradiction: resolved in favor of canonical #${canonical.id}`);
|
|
605
575
|
|
|
606
576
|
archivedIds.push(current.id);
|
|
607
577
|
visited.add(current.id);
|
|
608
578
|
} else if (rel.type === 'subset') {
|
|
609
579
|
if (rel.keep === 'b') {
|
|
610
580
|
// current (B) is a superset of canonical (A). Swap them
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
.run(canonical.id, current.id, `Consolidated subset: replaced by more detailed #${current.id}`);
|
|
581
|
+
stmts.archiveMemoryById.run(canonical.id);
|
|
582
|
+
stmts.insertContradiction.run(canonical.id, current.id, `Consolidated subset: replaced by more detailed #${current.id}`);
|
|
614
583
|
|
|
615
584
|
archivedIds.push(canonical.id);
|
|
616
585
|
canonical = current;
|
|
617
586
|
} else {
|
|
618
587
|
// canonical is superset
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
.run(current.id, canonical.id, `Consolidated subset: subsumed by more detailed #${canonical.id}`);
|
|
588
|
+
stmts.archiveMemoryById.run(current.id);
|
|
589
|
+
stmts.insertContradiction.run(current.id, canonical.id, `Consolidated subset: subsumed by more detailed #${canonical.id}`);
|
|
622
590
|
|
|
623
591
|
archivedIds.push(current.id);
|
|
624
592
|
}
|
|
625
593
|
visited.add(current.id);
|
|
626
594
|
} else if (rel.type === 'duplicate') {
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
.run(current.id, canonical.id, `Consolidated duplicate of #${canonical.id}`);
|
|
595
|
+
stmts.archiveMemoryById.run(current.id);
|
|
596
|
+
stmts.insertContradiction.run(current.id, canonical.id, `Consolidated duplicate of #${canonical.id}`);
|
|
630
597
|
|
|
631
598
|
archivedIds.push(current.id);
|
|
632
599
|
visited.add(current.id);
|
|
@@ -641,6 +608,10 @@ export async function consolidateMemories(namespace = null) {
|
|
|
641
608
|
});
|
|
642
609
|
}
|
|
643
610
|
}
|
|
611
|
+
});
|
|
612
|
+
|
|
613
|
+
for (const mem of activeMemories) {
|
|
614
|
+
consolidateOne(mem);
|
|
644
615
|
}
|
|
645
616
|
|
|
646
617
|
return {
|