npm - persyst-mcp - Versions diffs - 2.1.1 → 2.1.2 - Mend

persyst-mcp 2.1.1 → 2.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/bin/extract-worker.js +387 -0
package/bin/extract.js +185 -0
package/bin/init.js +2 -1
package/bin/setup.js +9 -4
package/hooks/persyst-hook.js +195 -10
package/index.js +7 -0
package/package.json +7 -3
package/src/database.js +84 -16
package/src/extractor-heuristic.js +250 -0
package/src/search.js +18 -10
package/src/server.js +1 -1
package/src/tools.js +40 -26

package/src/extractor-heuristic.js ADDED Viewed

@@ -0,0 +1,250 @@
+/**
+ * extractor-heuristic.js — Tier 2: Zero-Cost Regex-Based Fact Extractor
+ *
+ * Scans raw conversation text for explicit developer preference signals:
+ *   "I prefer...", "we decided...", "always use...", "stack includes..."
+ *
+ * Design decisions:
+ *   - Runs synchronously — zero latency overhead on the hot path
+ *   - Conservative extraction: high-precision, low-recall
+ *   - Returns structured facts with confidence scores (0.0 - 1.0)
+ *   - Deduplication-ready: facts are normalized before output
+ *
+ * This is NOT the primary extraction tier. It's a lightweight safety net
+ * that catches the most obvious signals when Tier 3 (LLM) is unavailable
+ * or still processing asynchronously.
+ */
+// ============================================================
+// PATTERN DEFINITIONS
+// Ordered by specificity — most specific patterns first
+// Each pattern has: regex, category, confidence, and a template
+// to normalize the matched text into a clean fact statement.
+// ============================================================
+const PATTERNS = [
+  // --- Decision patterns (highest confidence) ---
+  {
+    regex: /(?:we|i|the team)\s+(?:have\s+)?decided\s+(?:to\s+)?(?:use|go\s+with|adopt|switch\s+to|move\s+to)\s+(.+?)(?:\.|$)/gi,
+    category: 'decision',
+    confidence: 0.85,
+    template: (match) => `Decision: ${cleanFact(match[1])}`
+  },
+  {
+    regex: /(?:we(?:'re|\s+are)?\s+)?(?:going|moving)\s+(?:to\s+)?(?:use|adopt|switch\s+to|migrate\s+to)\s+(.+?)(?:\s+(?:for|because|since|as)\b|\.|$)/gi,
+    category: 'decision',
+    confidence: 0.80,
+    template: (match) => `Decision: Moving to ${cleanFact(match[1])}`
+  },
+  // --- Explicit preference patterns ---
+  {
+    regex: /i\s+(?:always\s+)?prefer\s+(.+?)(?:\s+(?:over|instead\s+of|rather\s+than)\s+(.+?))?(?:\.|$)/gi,
+    category: 'preference',
+    confidence: 0.80,
+    template: (match) => {
+      const pref = cleanFact(match[1]);
+      const alt = match[2] ? ` over ${cleanFact(match[2])}` : '';
+      return `Preference: ${pref}${alt}`;
+    }
+  },
+  {
+    regex: /(?:we|i)\s+(?:should\s+)?(?:always|never)\s+(?:use|avoid|include|add|write|create)\s+(.+?)(?:\.|$)/gi,
+    category: 'preference',
+    confidence: 0.75,
+    template: (match) => `Rule: ${cleanFact(match[0])}`
+  },
+  // --- Stack / technology patterns ---
+  {
+    regex: /(?:our|the|my)\s+(?:tech\s+)?stack\s+(?:includes?|uses?|is|has)\s+(.+?)(?:\.\s|\.$|$)/gim,
+    category: 'stack',
+    confidence: 0.85,
+    template: (match) => `Stack: ${cleanFact(match[1])}`
+  },
+  {
+    regex: /(?:we(?:'re|\s+are)?\s+)?using\s+(.+?)\s+(?:for|as)\s+(?:our|the)\s+(.+?)(?:\.|$)/gi,
+    category: 'stack',
+    confidence: 0.80,
+    template: (match) => `Stack: Using ${cleanFact(match[1])} for ${cleanFact(match[2])}`
+  },
+  {
+    regex: /(?:our|the)\s+(?:backend|frontend|database|api|server|client|infra(?:structure)?)\s+(?:is|uses?|runs?\s+on)\s+(.+?)(?:\.|$)/gi,
+    category: 'stack',
+    confidence: 0.80,
+    template: (match) => `Stack: ${cleanFact(match[0])}`
+  },
+  // --- Naming / convention patterns ---
+  {
+    regex: /(?:name|call|rename)\s+(?:it|this|the\s+\w+)\s+["'`]?(\w[\w\-\.]+)["'`]?/gi,
+    category: 'naming',
+    confidence: 0.70,
+    template: (match) => `Naming: ${cleanFact(match[0])}`
+  },
+  // --- Architecture patterns ---
+  {
+    regex: /(?:the\s+)?(?:project|app|application|system|architecture)\s+(?:follows?|uses?|is\s+based\s+on|implements?)\s+(.+?)(?:\s+pattern|\s+architecture)?(?:\.|$)/gi,
+    category: 'architecture',
+    confidence: 0.80,
+    template: (match) => `Architecture: ${cleanFact(match[1])}`
+  },
+  // --- Coding rule / style patterns ---
+  {
+    regex: /(?:always|never|must|should|don't|do\s+not)\s+(?:use|write|create|add|include|put|place|keep)\s+(.+?)(?:\.|$)/gi,
+    category: 'rule',
+    confidence: 0.70,
+    template: (match) => `Rule: ${cleanFact(match[0])}`
+  },
+  // --- Config / env patterns ---
+  {
+    regex: /(?:set|change|update|configure)\s+(?:the\s+)?(?:port|host|env|environment|config|setting)\s+(?:to|=|:)\s*["'`]?(.+?)["'`]?(?:\.|$)/gi,
+    category: 'config',
+    confidence: 0.75,
+    template: (match) => `Config: ${cleanFact(match[0])}`
+  }
+];
+// ============================================================
+// NOISE FILTERS
+// Skip lines that look like code, errors, or system output
+// ============================================================
+const NOISE_PATTERNS = [
+  /^[\s]*(?:import|export|const|let|var|function|class|if|else|for|while|return|throw|try|catch)\s/,
+  /^[\s]*[{}\[\]();]/,
+  /^[\s]*\/\//,
+  /^[\s]*\*/,
+  /^[\s]*```/,
+  /^\s*$/,
+  /^(?:error|warning|info|debug|trace):/i,
+  /^\s*at\s+\w+/,           // stack trace lines
+  /^[A-Z_]{2,}=/,           // ENV variable assignments
+  /^\d{4}-\d{2}-\d{2}/,     // timestamp lines
+];
+/**
+ * Check if a line looks like noise (code, logs, etc.)
+ * @param {string} line
+ * @returns {boolean}
+ */
+function isNoiseLine(line) {
+  return NOISE_PATTERNS.some(p => p.test(line));
+}
+// ============================================================
+// FACT NORMALIZATION
+// ============================================================
+/**
+ * Clean and normalize an extracted fact string.
+ * Removes trailing punctuation, excess whitespace, and truncates.
+ * @param {string} raw
+ * @returns {string}
+ */
+function cleanFact(raw) {
+  if (!raw) return '';
+  return raw
+    .trim()
+    .replace(/[\s]+/g, ' ')        // collapse whitespace
+    .replace(/[,;:]+$/, '')        // strip trailing punctuation
+    .replace(/^["'`]+|["'`]+$/g, '') // strip quotes
+    .slice(0, 200);                // hard max fact length
+}
+// ============================================================
+// MAIN EXTRACTION FUNCTION
+// ============================================================
+/**
+ * Extract facts from raw conversation text using regex heuristics.
+ *
+ * @param {string} text - Raw conversation text (user prompt or full turn)
+ * @param {Object} [options={}]
+ * @param {number} [options.minConfidence=0.65] - Minimum confidence to include a fact
+ * @param {number} [options.maxFacts=10] - Maximum facts to extract per call
+ * @returns {Array<{content: string, category: string, confidence: number}>}
+ *
+ * @example
+ *   const facts = extractHeuristic("I prefer Postgres over SQLite for our backend database.");
+ *   // => [{ content: "Preference: Postgres over SQLite", category: "preference", confidence: 0.80 }]
+ */
+export function extractHeuristic(text, options = {}) {
+  const {
+    minConfidence = 0.65,
+    maxFacts = 10
+  } = options;
+  if (!text || typeof text !== 'string' || text.length < 10) {
+    return [];
+  }
+  const facts = [];
+  const seen = new Set(); // dedup by normalized content
+  // Process line-by-line to filter noise
+  const lines = text.split('\n');
+  const cleanLines = lines.filter(line => !isNoiseLine(line));
+  const cleanText = cleanLines.join('\n');
+  for (const pattern of PATTERNS) {
+    // Reset regex state for global matching
+    pattern.regex.lastIndex = 0;
+    let match;
+    while ((match = pattern.regex.exec(cleanText)) !== null) {
+      // Skip matches that are too short to be meaningful
+      if (match[0].length < 8) continue;
+      try {
+        const content = pattern.template(match);
+        if (!content || content.length < 5) continue;
+        // Normalize for dedup
+        const key = content.toLowerCase().replace(/\s+/g, ' ').trim();
+        if (seen.has(key)) continue;
+        seen.add(key);
+        if (pattern.confidence >= minConfidence) {
+          facts.push({
+            content,
+            category: pattern.category,
+            confidence: pattern.confidence
+          });
+        }
+        if (facts.length >= maxFacts) break;
+      } catch (_) {
+        // Template execution failed — skip this match
+        continue;
+      }
+    }
+    if (facts.length >= maxFacts) break;
+  }
+  // Sort by confidence descending
+  facts.sort((a, b) => b.confidence - a.confidence);
+  return facts;
+}
+/**
+ * Quick check: does this text contain any extractable signals?
+ * Cheaper than running full extraction — use as a gate.
+ *
+ * @param {string} text
+ * @returns {boolean}
+ */
+export function hasExtractableSignals(text) {
+  if (!text || text.length < 10) return false;
+  for (const pattern of PATTERNS) {
+    pattern.regex.lastIndex = 0;
+    if (pattern.regex.test(text)) return true;
+  }
+  return false;
+}

package/src/search.js CHANGED Viewed

@@ -31,7 +31,7 @@ let lastDataVersion = 0;
  * @param {string|null} sessionId - Session identifier
  * @returns {Promise<Array>} Ranked search results (with .attestation property attached)
  */
-export async function searchHybrid(queryText, limit = 5, agentId = null, sessionId = null) {
+export async function searchHybrid(queryText, limit = 5, agentId = null, sessionId = null, namespace = null) {
   // Sync in-memory cache with external DB changes using sqlite data_version
   try {
     const currentDataVersion = db.pragma('data_version', { simple: true });
@@ -44,7 +44,8 @@ export async function searchHybrid(queryText, limit = 5, agentId = null, session
   }
   // --- Check LRU cache first (Feature 1) ---
-  const cacheKey = LRUCache.key(queryText, limit);
+  // Include namespace in cache key to prevent cross-namespace cache hits
+  const cacheKey = LRUCache.key(`${namespace || 'all'}:${queryText}`, limit);
   const cached = searchCache.get(cacheKey);
   if (cached) {
     console.error(`[persyst-cache] Cache HIT for query: "${queryText.slice(0, 50)}..."`);
@@ -93,11 +94,12 @@ export async function searchHybrid(queryText, limit = 5, agentId = null, session
     }
   }
-  // --- Step 4: Fetch full details, apply reputation adjust, sort and return top N ---
+  // --- Step 4: Fetch full details, apply namespace filter, reputation adjust, sort and return top N ---
   const finalResults = combined
     .map(r => {
-      const memory = getMemoryById(r.id);
-      if (!memory) return null; // Memory was archived or deleted
+      // Use namespace-aware getMemoryById to filter by agent namespace
+      const memory = getMemoryById(r.id, namespace);
+      if (!memory) return null; // Memory was archived, deleted, or not in namespace
       // Boost memory access metrics
       boostMemory(r.id);
@@ -236,9 +238,9 @@ function jaccardSimilarity(a, b) {
  * @param {string|null} agentId - Querying agent identifier
  * @param {string|null} sessionId - Current session ID
  */
-export async function getOptimizedContext(queryText, maxTokens, agentId = null, sessionId = null) {
-  // 1. Run hybrid search to fetch top 20 memories
-  const searchHits = await searchHybrid(queryText, 20, agentId, sessionId);
+export async function getOptimizedContext(queryText, maxTokens, agentId = null, sessionId = null, namespace = null) {
+  // 1. Run hybrid search to fetch top 20 memories (namespace-aware)
+  const searchHits = await searchHybrid(queryText, 20, agentId, sessionId, namespace);
   const candidates = new Map();
   for (const hit of searchHits) {
@@ -356,8 +358,14 @@ export async function getOptimizedContext(queryText, maxTokens, agentId = null,
  * Performs memory consolidation by merging highly similar memories.
  * Bug 6 fix: DB mutations are wrapped in a transaction for atomicity.
  */
-export async function consolidateMemories() {
-  const activeMemories = db.prepare('SELECT * FROM memories WHERE valid_until IS NULL').all();
+export async function consolidateMemories(namespace = null) {
+  // Only consolidate within namespace boundaries to prevent cross-agent merging
+  const query = namespace
+    ? "SELECT * FROM memories WHERE valid_until IS NULL AND (namespace = ? OR namespace = 'shared')"
+    : 'SELECT * FROM memories WHERE valid_until IS NULL';
+  const activeMemories = namespace
+    ? db.prepare(query).all(namespace)
+    : db.prepare(query).all();
   const consolidated = [];
   const visited = new Set();

package/src/server.js CHANGED Viewed

@@ -23,7 +23,7 @@ export async function startServer() {
   // --- Create MCP server ---
   const server = new McpServer({
     name: 'persyst',
-    version: '2.1.1'
+    version: '2.1.2'
   });
   // --- Register all tools ---

package/src/tools.js CHANGED Viewed

@@ -39,7 +39,8 @@ import {
   getAnyMemoryById,
   searchVector,
   getMemoryById,
-  getActiveMemoryCount
+  getActiveMemoryCount,
+  getNamespaceStats
 } from './database.js';
 import { searchHybrid, getOptimizedContext, consolidateMemories } from './search.js';
 import { getRecentCommits } from './git.js';
@@ -117,14 +118,15 @@ export function registerTools(server) {
   // 1. ADD MEMORY
   server.tool(
     'add_memory',
-    'Store a new memory. It will be searchable by both keywords and meaning.',
+    'Store a new memory. It will be searchable by both keywords and meaning. Use shared=true to make it visible to all agents.',
     {
       content: z.string().describe('The memory content to store'),
       importance: z.number().min(0).max(1).default(1.0).describe('Importance score from 0 (low) to 1 (high)'),
-      agent_id: z.string().optional().describe('Agent ID for provenance tracking'),
-      session_id: z.string().optional().describe('Session ID')
+      agent_id: z.string().optional().describe('Agent ID for provenance tracking and namespace isolation'),
+      session_id: z.string().optional().describe('Session ID'),
+      shared: z.boolean().default(true).describe('If true, memory is visible to all agents. If false, only visible to this agent.')
     },
-    async ({ content, importance, agent_id, session_id }) => {
+    async ({ content, importance, agent_id, session_id, shared }) => {
       try {
         // Bug 7 + Feature 4: Validate content size
         const validation = validateMemoryContent(content);
@@ -132,13 +134,17 @@ export function registerTools(server) {
           return text({ error: validation.error });
         }
-        // Deduplication check
-        const existing = getMemoryByContent(content);
+        // Derive namespace from agent_id and shared flag
+        const namespace = (shared || !agent_id) ? 'shared' : agent_id;
+        // Deduplication check (namespace-aware)
+        const existing = getMemoryByContent(content, namespace);
         if (existing) {
           boostMemory(existing.id);
           return text({
             success: true,
             id: existing.id,
+            namespace,
             message: `Memory #${existing.id} already exists. Boosted importance.`
           });
         }
@@ -147,7 +153,7 @@ export function registerTools(server) {
           source_type: agent_id ? 'agent' : 'manual',
           source_id: agent_id || null,
           confidence: 1.0
-        });
+        }, namespace);
         const embedding = await generateEmbedding(content);
         insertVector(id, embedding);
@@ -165,7 +171,7 @@ export function registerTools(server) {
             const sim = Math.max(0, 1 - (hit.distance * hit.distance) / 2);
             if (sim > 0.75) {
-              const existingMemory = getMemoryById(hitId);
+              const existingMemory = getMemoryById(hitId, namespace);
               if (!existingMemory) continue;
               // Check if content is substantially different (Jaccard distance > 0.5)
@@ -187,7 +193,7 @@ export function registerTools(server) {
           console.error(`[persyst] Contradiction detection error: ${e.message}`);
         }
-        const result = { success: true, id, message: `Memory #${id} stored` };
+        const result = { success: true, id, namespace, message: `Memory #${id} stored` };
         if (contradictions.length > 0) {
           result.contradictions_detected = contradictions;
           result.message += `. Detected ${contradictions.length} contradiction(s) — older memories archived.`;
@@ -203,19 +209,22 @@ export function registerTools(server) {
   // 2. SEARCH MEMORIES
   server.tool(
     'search_memories',
-    'Search memories using hybrid keyword + semantic search with cryptographic attestation.',
+    'Search memories using hybrid keyword + semantic search with cryptographic attestation. Results are filtered by agent namespace.',
     {
       query: z.string().describe('What to search for'),
       limit: z.number().default(5).describe('Max results (default: 5)'),
-      agent_id: z.string().optional().describe('Agent ID calling this search'),
+      agent_id: z.string().optional().describe('Agent ID — filters results to this agent\'s namespace + shared'),
       session_id: z.string().optional().describe('Session ID')
     },
     async ({ query, limit, agent_id, session_id }) => {
       try {
-        const results = await searchHybrid(query, limit, agent_id, session_id);
+        // Derive namespace from agent_id (null = search all)
+        const namespace = agent_id || null;
+        const results = await searchHybrid(query, limit, agent_id, session_id, namespace);
         return text({
           results,
           count: results.length,
+          namespace: namespace || 'all',
           attestation: results.attestation
         });
       } catch (err) {
@@ -314,14 +323,16 @@ export function registerTools(server) {
   // 6. GET RECENT MEMORIES
   server.tool(
     'get_recent_memories',
-    'Get the most recently created memories, newest first.',
+    'Get the most recently created memories, newest first. Filtered by agent namespace if agent_id is provided.',
     {
-      limit: z.number().default(10).describe('How many to return (default: 10)')
+      limit: z.number().default(10).describe('How many to return (default: 10)'),
+      agent_id: z.string().optional().describe('Agent ID — filters to this agent\'s namespace + shared')
     },
-    async ({ limit }) => {
+    async ({ limit, agent_id }) => {
       try {
-        const memories = getRecentMemories(limit);
-        return text({ memories, count: memories.length });
+        const namespace = agent_id || null;
+        const memories = getRecentMemories(limit, namespace);
+        return text({ memories, count: memories.length, namespace: namespace || 'all' });
       } catch (err) {
         return text({ error: err.message });
       }
@@ -331,14 +342,16 @@ export function registerTools(server) {
   // 7. GET IMPORTANT MEMORIES
   server.tool(
     'get_important_memories',
-    'Get memories ranked by importance score, highest first.',
+    'Get memories ranked by importance score, highest first. Filtered by agent namespace if agent_id is provided.',
     {
-      limit: z.number().default(10).describe('How many to return (default: 10)')
+      limit: z.number().default(10).describe('How many to return (default: 10)'),
+      agent_id: z.string().optional().describe('Agent ID — filters to this agent\'s namespace + shared')
     },
-    async ({ limit }) => {
+    async ({ limit, agent_id }) => {
       try {
-        const memories = getImportantMemories(limit);
-        return text({ memories, count: memories.length });
+        const namespace = agent_id || null;
+        const memories = getImportantMemories(limit, namespace);
+        return text({ memories, count: memories.length, namespace: namespace || 'all' });
       } catch (err) {
         return text({ error: err.message });
       }
@@ -634,16 +647,17 @@ export function registerTools(server) {
   // 18. GET OPTIMIZED CONTEXT
   server.tool(
     'get_optimized_context',
-    'Compile a condensed context prompt within a token budget by hopping the knowledge graph and ranking by temporal decay + agent reputation.',
+    'Compile a condensed context prompt within a token budget by hopping the knowledge graph and ranking by temporal decay + agent reputation. Results filtered by agent namespace.',
     {
       query: z.string().describe('The search query context'),
       max_tokens: z.number().default(4000).describe('Token budget for LLM context compression (default: 4000)'),
-      agent_id: z.string().optional().describe('Agent ID requesting context'),
+      agent_id: z.string().optional().describe('Agent ID requesting context — filters to this agent\'s namespace + shared'),
       session_id: z.string().optional().describe('Session ID')
     },
     async ({ query, max_tokens, agent_id, session_id }) => {
       try {
-        const contextData = await getOptimizedContext(query, max_tokens, agent_id, session_id);
+        const namespace = agent_id || null;
+        const contextData = await getOptimizedContext(query, max_tokens, agent_id, session_id, namespace);
         return text(contextData);
       } catch (err) {
         return text({ error: err.message });