npm - persyst-mcp - Versions diffs - 1.1.0 → 2.1.0 - Mend

persyst-mcp 1.1.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/src/cache.js ADDED Viewed

@@ -0,0 +1,122 @@
+/**
+ * cache.js — LRU Query Result Cache
+ *
+ * In-memory LRU cache for search results to avoid
+ * re-computing embeddings for repeated queries.
+ *
+ * - Configurable max size (default: 100 entries)
+ * - Configurable TTL (default: 5 minutes)
+ * - Automatic eviction of oldest entries when full
+ * - Full invalidation on write operations
+ */
+/**
+ * Simple LRU (Least Recently Used) cache with TTL support.
+ */
+export class LRUCache {
+  /**
+   * @param {number} maxSize - Maximum number of entries (default: 100)
+   * @param {number} ttlMs - Time-to-live in milliseconds (default: 300000 = 5 min)
+   */
+  constructor(maxSize = 100, ttlMs = 300000) {
+    this.maxSize = maxSize;
+    this.ttlMs = ttlMs;
+    this.cache = new Map();
+    this.hits = 0;
+    this.misses = 0;
+  }
+  /**
+   * Generate a cache key from query parameters.
+   * @param {string} query - The search query
+   * @param {number} limit - The result limit
+   * @returns {string} Cache key
+   */
+  static key(query, limit) {
+    return `${query}::${limit}`;
+  }
+  /**
+   * Get a cached value if it exists and hasn't expired.
+   * Moves the entry to the "most recently used" position.
+   *
+   * @param {string} key - Cache key
+   * @returns {*|null} Cached value or null if miss/expired
+   */
+  get(key) {
+    const entry = this.cache.get(key);
+    if (!entry) {
+      this.misses++;
+      return null;
+    }
+    // Check TTL expiry
+    if (Date.now() - entry.timestamp > this.ttlMs) {
+      this.cache.delete(key);
+      this.misses++;
+      return null;
+    }
+    // Move to end (most recently used) by re-inserting
+    this.cache.delete(key);
+    this.cache.set(key, entry);
+    this.hits++;
+    return entry.value;
+  }
+  /**
+   * Store a value in the cache. Evicts oldest entry if at capacity.
+   *
+   * @param {string} key - Cache key
+   * @param {*} value - Value to cache
+   */
+  set(key, value) {
+    // If key already exists, delete it first (to update position)
+    if (this.cache.has(key)) {
+      this.cache.delete(key);
+    }
+    // Evict oldest (first) entry if at capacity
+    if (this.cache.size >= this.maxSize) {
+      const oldestKey = this.cache.keys().next().value;
+      this.cache.delete(oldestKey);
+    }
+    this.cache.set(key, {
+      value,
+      timestamp: Date.now()
+    });
+  }
+  /**
+   * Invalidate the entire cache. Called on write operations
+   * (add_memory, update_memory, delete_memory) to ensure
+   * search results are always fresh.
+   */
+  invalidate() {
+    const size = this.cache.size;
+    this.cache.clear();
+    if (size > 0) {
+      console.error(`[persyst-cache] Invalidated ${size} cached entries`);
+    }
+  }
+  /**
+   * Get cache statistics for monitoring.
+   * @returns {{ size: number, maxSize: number, ttlMs: number, hits: number, misses: number, hitRate: string }}
+   */
+  stats() {
+    const total = this.hits + this.misses;
+    return {
+      size: this.cache.size,
+      maxSize: this.maxSize,
+      ttlMs: this.ttlMs,
+      hits: this.hits,
+      misses: this.misses,
+      hitRate: total > 0 ? `${((this.hits / total) * 100).toFixed(1)}%` : '0%'
+    };
+  }
+}
+// Singleton instance for search results
+export const searchCache = new LRUCache(100, 300000);

package/src/database.js CHANGED Viewed

@@ -33,6 +33,7 @@ const DB_PATH = process.env.NODE_ENV === 'test' ? ':memory:' : join(DB_DIR, 'per
 const db = new Database(DB_PATH);
 db.pragma('journal_mode = WAL');   // Better performance for concurrent reads
 db.pragma('foreign_keys = ON');    // Enforce referential integrity
+db.pragma('mmap_size = 268435456'); // 256MB memory-mapped I/O for faster reads
 // Load sqlite-vec BEFORE creating any vec0 tables
 sqliteVec.load(db);
@@ -106,6 +107,11 @@ db.exec(`
   )
 `);
+// --- Migration: add domain column to agent_stats ---
+try {
+  db.exec('ALTER TABLE agent_stats ADD COLUMN domain TEXT DEFAULT "general"');
+} catch (e) { /* Column already exists */ }
 // --- Attestations table ---
 db.exec(`
   CREATE TABLE IF NOT EXISTS attestations (
@@ -346,6 +352,24 @@ const stmts = {
   // -- Dedup --
   findMemoryByContent: db.prepare(
     'SELECT id FROM memories WHERE content = ? AND valid_until IS NULL LIMIT 1'
+  ),
+  // -- Hash-prefix lookup for git dedup (Bug 1 fix) --
+  findMemoryByHashPrefix: db.prepare(
+    'SELECT id FROM memories WHERE content LIKE ? AND valid_until IS NULL LIMIT 1'
+  ),
+  // -- Active memory count --
+  getActiveMemoryCount: db.prepare(
+    'SELECT COUNT(*) as count FROM memories WHERE valid_until IS NULL'
+  ),
+  // -- Memory History Chain (Feature 6: prepared statements) --
+  getContradictionAncestors: db.prepare(
+    'SELECT old_memory_id FROM contradictions WHERE new_memory_id = ?'
+  ),
+  getContradictionDescendants: db.prepare(
+    'SELECT new_memory_id FROM contradictions WHERE old_memory_id = ?'
   )
 };
@@ -593,13 +617,31 @@ export function getMemoriesByEntity(entityId) {
 }
 /**
- * Check if a memory with similar content already exists.
- * Used for deduplication during git ingestion.
- * @param {string} pattern - SQL LIKE pattern to match
+ * Check if a memory with exact content already exists.
+ * Used for deduplication.
+ * @param {string} content - Exact content to match
  * @returns {boolean}
  */
-export function memoryExists(pattern) {
-  return stmts.findMemoryByContent.get(pattern) !== undefined;
+export function memoryExists(content) {
+  return stmts.findMemoryByContent.get(content) !== undefined;
+}
+/**
+ * Check if a memory exists by hash prefix pattern (LIKE query).
+ * Used for git commit deduplication where we match `[hashPrefix]%`.
+ * @param {string} pattern - SQL LIKE pattern to match (e.g. '[abc1234]%')
+ * @returns {boolean}
+ */
+export function memoryExistsByHashPrefix(pattern) {
+  return stmts.findMemoryByHashPrefix.get(pattern) !== undefined;
+}
+/**
+ * Get count of active (non-archived) memories.
+ * @returns {number}
+ */
+export function getActiveMemoryCount() {
+  return stmts.getActiveMemoryCount.get().count;
 }
 // ============================================================
@@ -713,14 +755,14 @@ export function getMemoryHistoryChain(memoryId) {
     if (versions.has(currentId)) continue;
     versions.add(currentId);
-    // Find ancestors (replaced by current)
-    const ancestors = db.prepare('SELECT old_memory_id FROM contradictions WHERE new_memory_id = ?').all(currentId);
+    // Find ancestors (replaced by current) — using prepared statement
+    const ancestors = stmts.getContradictionAncestors.all(currentId);
     ancestors.forEach(a => {
       if (!versions.has(a.old_memory_id)) queue.push(a.old_memory_id);
     });
-    // Find descendants (replaces current)
-    const descendants = db.prepare('SELECT new_memory_id FROM contradictions WHERE old_memory_id = ?').all(currentId);
+    // Find descendants (replaces current) — using prepared statement
+    const descendants = stmts.getContradictionDescendants.all(currentId);
     descendants.forEach(d => {
       if (!versions.has(d.new_memory_id)) queue.push(d.new_memory_id);
     });

package/src/git.js CHANGED Viewed

@@ -3,30 +3,36 @@
  *
  * Reads git log from a repository and converts commits into memories.
  * Performs commit categorization, file diff analysis, and imports notes.
+ *
+ * IMPORTANT: Uses async execFile instead of execSync to avoid blocking
+ * the Node.js event loop during git operations (Bug 4 fix).
  */
-import { execSync } from 'child_process';
+import { execFile } from 'child_process';
+import { promisify } from 'util';
+const execFileAsync = promisify(execFile);
 /**
  * Read the N most recent git commits from a repository.
  *
  * @param {string} repoPath - Absolute path to the git repo
  * @param {number} count - Number of commits to read (default: 20)
- * @returns {Array<{hash: string, message: string, author: string, date: string, fullText: string, files: string[], importance: number}>}
+ * @returns {Promise<Array<{hash: string, message: string, author: string, date: string, fullText: string, files: string[], importance: number}>>}
  */
-export function getRecentCommits(repoPath, count = 20) {
+export async function getRecentCommits(repoPath, count = 20) {
   try {
     // Use a delimiter to split commits reliably
     const DELIM = '---PERSYST-COMMIT---';
     const format = `${DELIM}%n%H%n%an%n%ai%n%s%n%b`;
-    const output = execSync(
-      `git log -n ${count} --pretty=format:"${format}"`,
+    const { stdout: output } = await execFileAsync(
+      'git',
+      ['log', `-n`, `${count}`, `--pretty=format:${format}`],
       {
         cwd: repoPath,
         encoding: 'utf-8',
         timeout: 10000,      // 10s timeout
-        stdio: ['pipe', 'pipe', 'pipe']  // Suppress stderr
       }
     );
@@ -45,7 +51,7 @@ export function getRecentCommits(repoPath, count = 20) {
       const body = lines.slice(4).join(' ').trim();
       // Fetch git notes if available (represents PR metadata)
-      const notes = getGitNotes(repoPath, hash);
+      const notes = await getGitNotes(repoPath, hash);
       // Build a readable memory string
       let fullText = body
@@ -57,7 +63,7 @@ export function getRecentCommits(repoPath, count = 20) {
       }
       // Fetch files touched
-      const files = getCommitFiles(repoPath, hash);
+      const files = await getCommitFiles(repoPath, hash);
       // Classify importance based on message
       const classification = classifyCommit(subject);
@@ -92,17 +98,17 @@ export function getRecentCommits(repoPath, count = 20) {
  *
  * @param {string} repoPath - Absolute path to the git repo
  * @param {string} hash - Full commit hash
- * @returns {string[]} List of changed file paths
+ * @returns {Promise<string[]>} List of changed file paths
  */
-export function getCommitFiles(repoPath, hash) {
+export async function getCommitFiles(repoPath, hash) {
   try {
-    const output = execSync(
-      `git diff-tree --no-commit-id --name-only -r ${hash}`,
+    const { stdout: output } = await execFileAsync(
+      'git',
+      ['diff-tree', '--no-commit-id', '--name-only', '-r', hash],
       {
         cwd: repoPath,
         encoding: 'utf-8',
         timeout: 5000,
-        stdio: ['pipe', 'pipe', 'pipe']
       }
     );
     return output.trim().split('\n').filter(Boolean);
@@ -114,15 +120,15 @@ export function getCommitFiles(repoPath, hash) {
 /**
  * Fetch git notes (representing PR metadata or additional annotations).
  */
-export function getGitNotes(repoPath, hash) {
+export async function getGitNotes(repoPath, hash) {
   try {
-    const output = execSync(
-      `git notes show ${hash}`,
+    const { stdout: output } = await execFileAsync(
+      'git',
+      ['notes', 'show', hash],
       {
         cwd: repoPath,
         encoding: 'utf-8',
         timeout: 3000,
-        stdio: ['pipe', 'pipe', 'pipe']
       }
     );
     return output.trim();

package/src/search.js CHANGED Viewed

@@ -3,7 +3,8 @@
  *
  * Combines keyword and semantic searches, integrates temporal decay,
  * applies agent reputation scores, generates cryptographic search attestations,
- * and builds graph-hopped optimized LLM context prompts.
+ * builds graph-hopped optimized LLM context prompts, and applies MMR
+ * for diverse result retrieval.
  */
 import db, {
@@ -16,9 +17,11 @@ import db, {
 } from './database.js';
 import { generateEmbedding } from './embeddings.js';
 import { createAttestation } from './attestation.js';
+import { searchCache, LRUCache } from './cache.js';
 /**
  * Search memories using both keyword and semantic strategies.
+ * Results are cached in the LRU cache for repeated queries.
  *
  * @param {string} queryText - What to search for
  * @param {number} limit - Max results to return (default: 5)
@@ -27,6 +30,14 @@ import { createAttestation } from './attestation.js';
  * @returns {Promise<Array>} Ranked search results (with .attestation property attached)
  */
 export async function searchHybrid(queryText, limit = 5, agentId = null, sessionId = null) {
+  // --- Check LRU cache first (Feature 1) ---
+  const cacheKey = LRUCache.key(queryText, limit);
+  const cached = searchCache.get(cacheKey);
+  if (cached) {
+    console.error(`[persyst-cache] Cache HIT for query: "${queryText.slice(0, 50)}..."`);
+    return cached;
+  }
   // --- Step 1: Keyword search (fast, exact matches) ---
   const keywordHits = searchKeyword(queryText, limit * 2);
   const keywordIds = new Set(keywordHits.map(r => r.id));
@@ -53,8 +64,8 @@ export async function searchHybrid(queryText, limit = 5, agentId = null, session
         keyword_match: isKeywordMatch
       };
     })
-    // Filter out low similarity semantic matches if they have no keyword match (threshold 0.35)
-    .filter(r => r.keyword_match || r.similarity >= 0.35);
+    // Filter out low similarity semantic matches if they have no keyword match (threshold 0.30)
+    .filter(r => r.keyword_match || r.similarity >= 0.30);
   // Add keyword-only hits that semantic search missed
   const semanticIds = new Set(semanticResults.map(r => r.id));
@@ -112,15 +123,96 @@ export async function searchHybrid(queryText, limit = 5, agentId = null, session
   // Sort by final score descending
   finalResults.sort((a, b) => parseFloat(b.hybrid_score) - parseFloat(a.hybrid_score));
-  const topResults = finalResults.slice(0, limit);
+  // --- Step 5: Apply MMR for diverse retrieval (Feature 3) ---
+  const mmrResults = applyMMR(finalResults, limit);
   // Generate cryptographic attestation for audit trails
-  const attestation = createAttestation(queryText, topResults, agentId, sessionId);
+  const attestation = createAttestation(queryText, mmrResults, agentId, sessionId);
   // Attach attestation object directly to the array to preserve compatibility with existing tests
-  topResults.attestation = attestation;
+  mmrResults.attestation = attestation;
+  // --- Store in LRU cache (Feature 1) ---
+  searchCache.set(cacheKey, mmrResults);
+  return mmrResults;
+}
+/**
+ * Apply Maximal Marginal Relevance (MMR) re-ranking for diverse results.
+ *
+ * MMR balances relevance with diversity by penalizing candidates that
+ * are too similar to already-selected results.
+ *
+ * @param {Array} candidates - Scored search results
+ * @param {number} limit - Max results to return
+ * @param {number} lambda - Trade-off parameter (0.7 = 70% relevance, 30% diversity)
+ * @returns {Array} MMR-reranked results
+ */
+function applyMMR(candidates, limit, lambda = 0.7) {
+  if (candidates.length <= limit) return candidates;
+  const selected = [];
+  const remaining = [...candidates];
+  // Always pick the top-scored result first
+  selected.push(remaining.shift());
+  while (selected.length < limit && remaining.length > 0) {
+    let bestIdx = -1;
+    let bestMMRScore = -Infinity;
+    for (let i = 0; i < remaining.length; i++) {
+      const candidate = remaining[i];
+      const relevance = parseFloat(candidate.hybrid_score);
+      // Calculate max similarity to any already-selected result
+      // Using content-based Jaccard similarity as a proxy
+      let maxSimToSelected = 0;
+      for (const sel of selected) {
+        const sim = jaccardSimilarity(candidate.content, sel.content);
+        if (sim > maxSimToSelected) maxSimToSelected = sim;
+      }
+      // MMR score = λ * relevance - (1 - λ) * max_similarity_to_selected
+      const mmrScore = lambda * relevance - (1 - lambda) * maxSimToSelected;
+      if (mmrScore > bestMMRScore) {
+        bestMMRScore = mmrScore;
+        bestIdx = i;
+      }
+    }
+    if (bestIdx >= 0) {
+      selected.push(remaining.splice(bestIdx, 1)[0]);
+    } else {
+      break;
+    }
+  }
-  return topResults;
+  return selected;
+}
+/**
+ * Compute Jaccard similarity between two text strings.
+ * Uses word-level tokenization for efficiency.
+ *
+ * @param {string} a - First text
+ * @param {string} b - Second text
+ * @returns {number} Similarity score between 0 and 1
+ */
+function jaccardSimilarity(a, b) {
+  const wordsA = new Set(a.toLowerCase().split(/\s+/));
+  const wordsB = new Set(b.toLowerCase().split(/\s+/));
+  let intersection = 0;
+  for (const word of wordsA) {
+    if (wordsB.has(word)) intersection++;
+  }
+  const union = wordsA.size + wordsB.size - intersection;
+  return union === 0 ? 0 : intersection / union;
 }
 /**
@@ -234,8 +326,11 @@ export async function getOptimizedContext(queryText, maxTokens, agentId = null,
   }
   context += '=== END OF CONTEXT ===';
-  // Log retrieval attestation for this prompt generation
-  const attestation = createAttestation(queryText, accepted, agentId, sessionId);
+  // Bug 8 fix: Skip attestation when no results to avoid audit noise
+  let attestation = null;
+  if (accepted.length > 0) {
+    attestation = createAttestation(queryText, accepted, agentId, sessionId);
+  }
   return {
     context,
@@ -246,12 +341,26 @@ export async function getOptimizedContext(queryText, maxTokens, agentId = null,
 /**
  * Performs memory consolidation by merging highly similar memories.
+ * Bug 6 fix: DB mutations are wrapped in a transaction for atomicity.
  */
 export async function consolidateMemories() {
   const activeMemories = db.prepare('SELECT * FROM memories WHERE valid_until IS NULL').all();
   const consolidated = [];
   const visited = new Set();
+  // Pre-compile the transaction for atomic DB operations (Bug 6 fix)
+  const archiveAndMerge = db.transaction((canonicalId, mergedContent, dupIds) => {
+    // Update canonical memory with merged content
+    db.prepare('UPDATE memories SET content = ?, last_accessed = unixepoch() WHERE id = ?').run(mergedContent, canonicalId);
+    // Archive duplicates
+    for (const dupId of dupIds) {
+      db.prepare('UPDATE memories SET valid_until = unixepoch() WHERE id = ?').run(dupId);
+      db.prepare('INSERT INTO contradictions (old_memory_id, new_memory_id, resolution_reason) VALUES (?, ?, ?)')
+        .run(dupId, canonicalId, `Consolidated into canonical memory #${canonicalId}`);
+    }
+  });
   for (const mem of activeMemories) {
     if (visited.has(mem.id)) continue;
@@ -295,20 +404,17 @@ export async function consolidateMemories() {
       const uniqueContents = Array.from(new Set(contents));
       const mergedContent = uniqueContents.join('. ').replace(/\.\./g, '.');
-      // Update canonical memory with merged content
-      db.prepare('UPDATE memories SET content = ?, last_accessed = unixepoch() WHERE id = ?').run(mergedContent, canonical.id);
-      // Update vector embedding for canonical
+      // Generate new embedding OUTSIDE the transaction (async operation)
       const newEmbedding = await generateEmbedding(mergedContent);
+      // Run atomic DB transaction for all mutations (Bug 6 fix)
+      archiveAndMerge(canonical.id, mergedContent, dupesToArchive.map(d => d.id));
+      // Update vector embedding (also outside transaction since vec0 tables have their own handling)
       db.prepare('DELETE FROM memories_vec WHERE rowid = ?').run(canonical.id);
       db.prepare('INSERT INTO memories_vec (rowid, embedding) VALUES (?, ?)').run(BigInt(canonical.id), Buffer.from(newEmbedding.buffer));
-      // Archive duplicates
       for (const dup of dupesToArchive) {
-        db.prepare('UPDATE memories SET valid_until = unixepoch() WHERE id = ?').run(dup.id);
-        db.prepare('INSERT INTO contradictions (old_memory_id, new_memory_id, resolution_reason) VALUES (?, ?, ?)')
-          .run(dup.id, canonical.id, `Consolidated into canonical memory #${canonical.id}`);
         visited.add(dup.id);
       }

package/src/server.js CHANGED Viewed

@@ -11,7 +11,7 @@
 import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
 import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
-import { registerTools } from './tools.js';
+import { registerTools, cleanupWatchers } from './tools.js';
 import { applyTemporalDecay, closeDatabase } from './database.js';
 import { consolidateMemories } from './search.js';
@@ -23,7 +23,7 @@ export async function startServer() {
   // --- Create MCP server ---
   const server = new McpServer({
     name: 'persyst',
-    version: '1.1.0'
+    version: '2.1.0'
   });
   // --- Register all tools ---
@@ -46,11 +46,12 @@ export async function startServer() {
     }
   }, 86400000);
-  // --- Graceful shutdown ---
+  // --- Graceful shutdown (Bug 3 fix: also cleans up git watchers) ---
   const shutdown = () => {
     console.error('[persyst] Shutting down...');
     clearInterval(decayTimer);
     clearInterval(consolidationTimer);
+    cleanupWatchers();  // Bug 3 fix: stop all git repo watchers
     closeDatabase();
     process.exit(0);
   };