persyst-mcp 2.2.5 → 2.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/search.js CHANGED
@@ -8,6 +8,7 @@
8
8
  */
9
9
 
10
10
  import db, {
11
+ stmts,
11
12
  searchKeyword,
12
13
  searchVector,
13
14
  getMemoryById,
@@ -19,6 +20,7 @@ import db, {
19
20
  import { generateEmbedding } from './embeddings.js';
20
21
  import { createAttestation } from './attestation.js';
21
22
  import { searchCache, LRUCache } from './cache.js';
23
+ import { jaccardSimilarity, logInfo } from './text-utils.js';
22
24
 
23
25
  let lastDataVersion = 0;
24
26
 
@@ -55,7 +57,7 @@ export async function searchHybrid(queryText, limit = 5, agentId = null, session
55
57
  const cacheKey = LRUCache.key(`${ns}:${queryText}`, parsedLimit);
56
58
  const cached = searchCache.get(cacheKey);
57
59
  if (cached) {
58
- console.error(`[persyst-cache] Cache HIT for query: "${queryText.slice(0, 50)}..."`);
60
+ logInfo(`[persyst-cache] Cache HIT for query: "${queryText.slice(0, 50)}..."`);
59
61
  return cached;
60
62
  }
61
63
 
@@ -68,7 +70,7 @@ export async function searchHybrid(queryText, limit = 5, agentId = null, session
68
70
  const vecHits = searchVector(queryEmbedding, parsedLimit * 2);
69
71
 
70
72
  const semanticResults = vecHits.map(r => ({
71
- id: r.rowid,
73
+ id: Number(r.rowid),
72
74
  distance: r.distance,
73
75
  // Convert L2 distance to 0-1 similarity score
74
76
  similarity: Math.max(0, 1 - (r.distance * r.distance) / 2)
@@ -116,7 +118,7 @@ export async function searchHybrid(queryText, limit = 5, agentId = null, session
116
118
  let reputationWarning = false;
117
119
  const prov = memory.provenance;
118
120
  if (prov && prov.source_type === 'agent' && prov.source_id) {
119
- const agentRow = db.prepare('SELECT reputation_score FROM agent_stats WHERE agent_id = ?').get(prov.source_id);
121
+ const agentRow = stmts.getReputationScore.get(prov.source_id);
120
122
  if (agentRow) {
121
123
  reputationScore = agentRow.reputation_score;
122
124
  if (reputationScore < 0.5) {
@@ -134,8 +136,8 @@ export async function searchHybrid(queryText, limit = 5, agentId = null, session
134
136
  importance_score: memory.importance_score,
135
137
  created_at: memory.created_at,
136
138
  last_accessed: memory.last_accessed,
137
- similarity: r.similarity.toFixed(4),
138
- hybrid_score: finalScore.toFixed(4),
139
+ similarity: Math.round(r.similarity * 10000) / 10000,
140
+ hybrid_score: Math.round(finalScore * 10000) / 10000,
139
141
  keyword_match: r.keyword_match,
140
142
  reputation_warning: reputationWarning,
141
143
  provenance: prov
@@ -217,27 +219,6 @@ function applyMMR(candidates, limit, lambda = 0.7) {
217
219
  return selected;
218
220
  }
219
221
 
220
- /**
221
- * Compute Jaccard similarity between two text strings.
222
- * Uses word-level tokenization for efficiency.
223
- *
224
- * @param {string} a - First text
225
- * @param {string} b - Second text
226
- * @returns {number} Similarity score between 0 and 1
227
- */
228
- function jaccardSimilarity(a, b) {
229
- const wordsA = new Set(a.toLowerCase().split(/\s+/));
230
- const wordsB = new Set(b.toLowerCase().split(/\s+/));
231
-
232
- let intersection = 0;
233
- for (const word of wordsA) {
234
- if (wordsB.has(word)) intersection++;
235
- }
236
-
237
- const union = wordsA.size + wordsB.size - intersection;
238
- return union === 0 ? 0 : intersection / union;
239
- }
240
-
241
222
  /**
242
223
  * Optimizes the retrieved context by walking the knowledge graph and compressing content to fit max_tokens.
243
224
  *
@@ -247,6 +228,13 @@ function jaccardSimilarity(a, b) {
247
228
  * @param {string|null} sessionId - Current session ID
248
229
  */
249
230
  export async function getOptimizedContext(queryText, maxTokens, agentId = null, sessionId = null, namespace = null, intentParam = null) {
231
+ // Classify intent and urgency early to adjust token budget dynamically
232
+ const { intent, urgency } = classifyIntentAndUrgency(queryText, intentParam);
233
+ let targetMaxTokens = maxTokens;
234
+ if (intent === 'general' || intent === 'testing') {
235
+ targetMaxTokens = Math.min(maxTokens, 1500);
236
+ }
237
+
250
238
  // Extract entities mentioned in the query text to seed the graph search directly
251
239
  const entities = getAllEntities(100);
252
240
  const matchedEntityIds = new Set();
@@ -302,11 +290,7 @@ export async function getOptimizedContext(queryText, maxTokens, agentId = null,
302
290
  if (depth >= 6) continue;
303
291
 
304
292
  // --- 2a. Explicit Graph Edges (from edges table) ---
305
- const connectedEdges = db.prepare(`
306
- SELECT * FROM edges
307
- WHERE (source_id = ? AND source_type = ?)
308
- OR (target_id = ? AND target_type = ?)
309
- `).all(id, type, id, type);
293
+ const connectedEdges = stmts.getEdgesBySourceAndType.all(id, type, id, type);
310
294
 
311
295
  for (const edge of connectedEdges) {
312
296
  let nextId, nextType;
@@ -327,7 +311,7 @@ export async function getOptimizedContext(queryText, maxTokens, agentId = null,
327
311
 
328
312
  // --- 2b. Implicit Name-Based Edges (for robustness when explicit edges are missing) ---
329
313
  if (type === 'memory') {
330
- const memoryRow = db.prepare('SELECT content FROM memories WHERE id = ?').get(id);
314
+ const memoryRow = stmts.getMemoryContentById.get(id);
331
315
  if (memoryRow && memoryRow.content) {
332
316
  const contentLower = memoryRow.content.toLowerCase();
333
317
  for (const ent of entities) {
@@ -343,7 +327,7 @@ export async function getOptimizedContext(queryText, maxTokens, agentId = null,
343
327
  } else if (type === 'entity') {
344
328
  const ent = entities.find(e => e.id === id);
345
329
  if (ent && ent.name) {
346
- const matchingMemories = db.prepare('SELECT id FROM memories WHERE content LIKE ? AND valid_until IS NULL').all(`%${ent.name}%`);
330
+ const matchingMemories = stmts.getMemoryLikeContent.all(`%${ent.name}%`);
347
331
  for (const row of matchingMemories) {
348
332
  const nextKey = `memory:${row.id}`;
349
333
  if (!visitedNodes.has(nextKey)) {
@@ -409,22 +393,31 @@ export async function getOptimizedContext(queryText, maxTokens, agentId = null,
409
393
  // 4. Sort candidates
410
394
  list.sort((a, b) => b.score - a.score);
411
395
 
412
- // 5. Compress context to fit maxTokens
396
+ // 5. Compress context to fit maxTokens with on-the-fly diversity check
413
397
  let currentTokens = 0;
414
398
  const accepted = [];
415
399
 
416
400
  for (const c of list) {
417
- // Heuristic: ~4 characters per token + format headers (~15 tokens)
418
- const estimatedTokens = Math.max(1, Math.ceil(c.content.length / 4) + 15);
419
- if (currentTokens + estimatedTokens > maxTokens) {
401
+ // Skip if too similar to any already accepted memory to prevent redundant context bloat
402
+ let isRedundant = false;
403
+ for (const acc of accepted) {
404
+ const sim = jaccardSimilarity(c.content, acc.content);
405
+ if (sim > 0.60) {
406
+ isRedundant = true;
407
+ break;
408
+ }
409
+ }
410
+ if (isRedundant) continue;
411
+
412
+ // Heuristic: ~4 characters per token + format headers (~3 tokens for compact format)
413
+ const estimatedTokens = Math.max(1, Math.ceil(c.content.length / 4) + 3);
414
+ if (currentTokens + estimatedTokens > targetMaxTokens) {
420
415
  continue;
421
416
  }
422
417
  currentTokens += estimatedTokens;
423
418
  accepted.push(c);
424
419
  }
425
420
 
426
- // Classify intent and urgency based on query text and parameters
427
- const { intent, urgency } = classifyIntentAndUrgency(queryText, intentParam);
428
421
  const suggested_actions = generateSuggestedActions(accepted, intent, urgency);
429
422
 
430
423
  // 6. Format LLM injection context string
@@ -444,11 +437,7 @@ export async function getOptimizedContext(queryText, maxTokens, agentId = null,
444
437
  context += 'No relevant memories retrieved.\n';
445
438
  } else {
446
439
  for (const a of accepted) {
447
- let sourceTag = 'Source: manual';
448
- if (a.provenance) {
449
- sourceTag = `Source: ${a.provenance.source_type}${a.provenance.source_id ? ` (${a.provenance.source_id})` : ''}`;
450
- }
451
- context += `[Memory #${a.id}] (Score: ${a.score.toFixed(4)}, ${sourceTag})\n${a.content}\n---\n`;
440
+ context += `#${a.id}: ${a.content}\n`;
452
441
  }
453
442
  }
454
443
  context += '=== END OF CONTEXT ===';
@@ -532,26 +521,22 @@ export async function consolidateMemories(namespace = null) {
532
521
  const consolidated = [];
533
522
  const visited = new Set();
534
523
 
535
- for (const mem of activeMemories) {
536
- if (visited.has(mem.id)) continue;
524
+ // Wrap all mutations in a transaction so a partial failure rolls back.
525
+ const consolidateOne = db.transaction((mem) => {
526
+ if (visited.has(mem.id)) return;
537
527
 
538
528
  // Search for similar memories
539
- const embedding = db.prepare('SELECT embedding FROM memories_vec WHERE rowid = ?').get(mem.id);
540
- if (!embedding) continue;
529
+ const embedding = stmts.getVecByRowId.get(mem.id);
530
+ if (!embedding) return;
541
531
 
542
- const hits = db.prepare(`
543
- SELECT rowid AS id, distance
544
- FROM memories_vec
545
- WHERE embedding MATCH ?
546
- AND k = 30
547
- `).all(embedding.embedding);
532
+ const hits = stmts.consolidateVecSearch.all(embedding.embedding);
548
533
 
549
534
  const group = [];
550
535
  for (const hit of hits) {
551
536
  if (visited.has(Number(hit.id))) continue;
552
537
  const sim = Math.max(0, 1 - (hit.distance * hit.distance) / 2);
553
538
  if (sim > 0.80) {
554
- const other = db.prepare('SELECT * FROM memories WHERE id = ? AND valid_until IS NULL').get(Number(hit.id));
539
+ const other = stmts.getMemoryByIdRaw.get(Number(hit.id));
555
540
  if (other) {
556
541
  group.push(other);
557
542
  }
@@ -564,7 +549,7 @@ export async function consolidateMemories(namespace = null) {
564
549
  const prov = getProvenance(m.id);
565
550
  let reputation = 1.0;
566
551
  if (prov && prov.source_type === 'agent' && prov.source_id) {
567
- const agentRow = db.prepare('SELECT reputation_score FROM agent_stats WHERE agent_id = ?').get(prov.source_id);
552
+ const agentRow = stmts.getReputationScore.get(prov.source_id);
568
553
  if (agentRow) reputation = agentRow.reputation_score;
569
554
  }
570
555
  return (prov ? prov.confidence : 1.0) * reputation;
@@ -583,50 +568,32 @@ export async function consolidateMemories(namespace = null) {
583
568
  const rel = checkRelationship(canonical.content, current.content);
584
569
 
585
570
  if (rel.type === 'contradiction') {
586
- // Resolve contradiction: keep canonical, archive current
587
- db.prepare('UPDATE memories SET valid_until = unixepoch() WHERE id = ?').run(current.id);
588
- db.prepare('INSERT INTO contradictions (old_memory_id, new_memory_id, resolution_reason) VALUES (?, ?, ?)')
589
- .run(current.id, canonical.id, `Consolidated contradiction: resolved in favor of canonical #${canonical.id}`);
590
-
591
- // Apply reputation changes since it's a cross-agent contradiction
592
- const oldProv = getProvenance(current.id);
593
- const newProv = getProvenance(canonical.id);
594
- if (oldProv && oldProv.source_type === 'agent' && oldProv.source_id) {
595
- const isSelf = newProv && newProv.source_type === 'agent' && newProv.source_id === oldProv.source_id;
596
- if (!isSelf) {
597
- db.prepare('UPDATE agent_stats SET memories_contradicted = memories_contradicted + 1 WHERE agent_id = ?').run(oldProv.source_id);
598
- db.prepare('UPDATE agent_stats SET reputation_score = (memories_confirmed + 1.0) / (memories_contradicted + 1.0) WHERE agent_id = ?').run(oldProv.source_id);
599
- if (newProv && newProv.source_type === 'agent') {
600
- db.prepare('UPDATE agent_stats SET memories_confirmed = memories_confirmed + 1 WHERE agent_id = ?').run(newProv.source_id);
601
- db.prepare('UPDATE agent_stats SET reputation_score = (memories_confirmed + 1.0) / (memories_contradicted + 1.0) WHERE agent_id = ?').run(newProv.source_id);
602
- }
603
- }
604
- }
571
+ // Resolve contradiction: keep canonical, archive current.
572
+ // logContradiction already updates agent stats, so we only record the archive here.
573
+ stmts.archiveMemoryById.run(current.id);
574
+ stmts.insertContradiction.run(current.id, canonical.id, `Consolidated contradiction: resolved in favor of canonical #${canonical.id}`);
605
575
 
606
576
  archivedIds.push(current.id);
607
577
  visited.add(current.id);
608
578
  } else if (rel.type === 'subset') {
609
579
  if (rel.keep === 'b') {
610
580
  // current (B) is a superset of canonical (A). Swap them
611
- db.prepare('UPDATE memories SET valid_until = unixepoch() WHERE id = ?').run(canonical.id);
612
- db.prepare('INSERT INTO contradictions (old_memory_id, new_memory_id, resolution_reason) VALUES (?, ?, ?)')
613
- .run(canonical.id, current.id, `Consolidated subset: replaced by more detailed #${current.id}`);
581
+ stmts.archiveMemoryById.run(canonical.id);
582
+ stmts.insertContradiction.run(canonical.id, current.id, `Consolidated subset: replaced by more detailed #${current.id}`);
614
583
 
615
584
  archivedIds.push(canonical.id);
616
585
  canonical = current;
617
586
  } else {
618
587
  // canonical is superset
619
- db.prepare('UPDATE memories SET valid_until = unixepoch() WHERE id = ?').run(current.id);
620
- db.prepare('INSERT INTO contradictions (old_memory_id, new_memory_id, resolution_reason) VALUES (?, ?, ?)')
621
- .run(current.id, canonical.id, `Consolidated subset: subsumed by more detailed #${canonical.id}`);
588
+ stmts.archiveMemoryById.run(current.id);
589
+ stmts.insertContradiction.run(current.id, canonical.id, `Consolidated subset: subsumed by more detailed #${canonical.id}`);
622
590
 
623
591
  archivedIds.push(current.id);
624
592
  }
625
593
  visited.add(current.id);
626
594
  } else if (rel.type === 'duplicate') {
627
- db.prepare('UPDATE memories SET valid_until = unixepoch() WHERE id = ?').run(current.id);
628
- db.prepare('INSERT INTO contradictions (old_memory_id, new_memory_id, resolution_reason) VALUES (?, ?, ?)')
629
- .run(current.id, canonical.id, `Consolidated duplicate of #${canonical.id}`);
595
+ stmts.archiveMemoryById.run(current.id);
596
+ stmts.insertContradiction.run(current.id, canonical.id, `Consolidated duplicate of #${canonical.id}`);
630
597
 
631
598
  archivedIds.push(current.id);
632
599
  visited.add(current.id);
@@ -641,6 +608,10 @@ export async function consolidateMemories(namespace = null) {
641
608
  });
642
609
  }
643
610
  }
611
+ });
612
+
613
+ for (const mem of activeMemories) {
614
+ consolidateOne(mem);
644
615
  }
645
616
 
646
617
  return {