npm - persyst-mcp - Versions diffs - 2.1.0 → 2.1.2 - Mend

persyst-mcp 2.1.0 → 2.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/bin/extract-worker.js +387 -0
package/bin/extract.js +185 -0
package/bin/ingest.js +82 -0
package/bin/init.js +174 -0
package/bin/setup.js +9 -4
package/hooks/persyst-hook.js +195 -10
package/index.js +20 -0
package/package.json +9 -3
package/src/database.js +84 -16
package/src/extractor-heuristic.js +250 -0
package/src/search.js +31 -10
package/src/server.js +1 -1
package/src/tools.js +40 -26

package/src/database.js CHANGED Viewed

@@ -72,6 +72,16 @@ try {
   db.exec('ALTER TABLE memories ADD COLUMN assertion_time INTEGER DEFAULT (unixepoch())');
 } catch (e) { /* Column already exists */ }
+// --- Migration: add namespace column for per-agent isolation ---
+try {
+  db.exec("ALTER TABLE memories ADD COLUMN namespace TEXT DEFAULT 'shared'");
+} catch (e) { /* Column already exists */ }
+// --- Index on namespace for fast filtered queries ---
+try {
+  db.exec('CREATE INDEX IF NOT EXISTS idx_memories_namespace ON memories (namespace)');
+} catch (e) { /* Index already exists */ }
 // --- Contradictions table ---
 db.exec(`
   CREATE TABLE IF NOT EXISTS contradictions (
@@ -208,7 +218,7 @@ console.error('[persyst] Schema initialized ✓');
 const stmts = {
   // -- Insert --
   insertMemory: db.prepare(
-    'INSERT INTO memories (content, importance_score) VALUES (?, ?)'
+    'INSERT INTO memories (content, importance_score, namespace) VALUES (?, ?, ?)'
   ),
   insertVec: db.prepare(
     'INSERT INTO memories_vec (rowid, embedding) VALUES (?, ?)'
@@ -246,15 +256,24 @@ const stmts = {
   getById: db.prepare(
     'SELECT * FROM memories WHERE id = ? AND valid_until IS NULL'
   ),
+  getByIdNs: db.prepare(
+    "SELECT * FROM memories WHERE id = ? AND (namespace = ? OR namespace = 'shared') AND valid_until IS NULL"
+  ),
   getAnyById: db.prepare(
     'SELECT * FROM memories WHERE id = ?'
   ),
   getRecent: db.prepare(
     'SELECT * FROM memories WHERE valid_until IS NULL ORDER BY created_at DESC LIMIT ?'
   ),
+  getRecentNs: db.prepare(
+    "SELECT * FROM memories WHERE (namespace = ? OR namespace = 'shared') AND valid_until IS NULL ORDER BY created_at DESC LIMIT ?"
+  ),
   getImportant: db.prepare(
     'SELECT * FROM memories WHERE valid_until IS NULL ORDER BY importance_score DESC LIMIT ?'
   ),
+  getImportantNs: db.prepare(
+    "SELECT * FROM memories WHERE (namespace = ? OR namespace = 'shared') AND valid_until IS NULL ORDER BY importance_score DESC LIMIT ?"
+  ),
   getProvenance: db.prepare(
     'SELECT * FROM provenance WHERE memory_id = ?'
   ),
@@ -353,6 +372,9 @@ const stmts = {
   findMemoryByContent: db.prepare(
     'SELECT id FROM memories WHERE content = ? AND valid_until IS NULL LIMIT 1'
   ),
+  findMemoryByContentNs: db.prepare(
+    "SELECT id FROM memories WHERE content = ? AND (namespace = ? OR namespace = 'shared') AND valid_until IS NULL LIMIT 1"
+  ),
   // -- Hash-prefix lookup for git dedup (Bug 1 fix) --
   findMemoryByHashPrefix: db.prepare(
@@ -363,6 +385,14 @@ const stmts = {
   getActiveMemoryCount: db.prepare(
     'SELECT COUNT(*) as count FROM memories WHERE valid_until IS NULL'
   ),
+  getActiveMemoryCountNs: db.prepare(
+    "SELECT COUNT(*) as count FROM memories WHERE (namespace = ? OR namespace = 'shared') AND valid_until IS NULL"
+  ),
+  // -- Namespace stats --
+  getNamespaceStats: db.prepare(
+    'SELECT namespace, COUNT(*) as count FROM memories WHERE valid_until IS NULL GROUP BY namespace ORDER BY count DESC'
+  ),
   // -- Memory History Chain (Feature 6: prepared statements) --
   getContradictionAncestors: db.prepare(
@@ -380,10 +410,14 @@ const stmts = {
 /**
  * Insert a new memory into the memories table and log its provenance.
+ * @param {string} content - Memory content
+ * @param {number} importance - Importance score (0-1)
+ * @param {Object} provenanceInfo - Provenance metadata
+ * @param {string} namespace - Namespace for agent isolation (default: 'shared')
  * @returns {number} The new memory's ID
  */
-export function insertMemory(content, importance = 1.0, provenanceInfo = null) {
-  const result = stmts.insertMemory.run(content, importance);
+export function insertMemory(content, importance = 1.0, provenanceInfo = null, namespace = 'shared') {
+  const result = stmts.insertMemory.run(content, importance, namespace || 'shared');
   const id = Number(result.lastInsertRowid);
   // Provenance Info handling
@@ -412,13 +446,16 @@ export function insertVector(id, embedding) {
 /**
  * Get a memory by ID. Boosts its importance on access.
+ * @param {number} id - Memory ID
+ * @param {string|null} namespace - Namespace filter (null = no filter)
  * @returns {object|null} The memory row, or null if not found
  */
-export function getMemory(id) {
-  const memory = stmts.getById.get(id);
+export function getMemory(id, namespace = null) {
+  const memory = namespace
+    ? stmts.getByIdNs.get(id, namespace)
+    : stmts.getById.get(id);
   if (memory) {
     boostMemory(id);
-    // Fetch and link provenance info
     const prov = getProvenance(id);
     memory.provenance = prov;
   }
@@ -439,10 +476,14 @@ export function getAnyMemoryById(id) {
 /**
  * Get a memory by ID WITHOUT boosting. Used internally for search results.
+ * @param {number} id - Memory ID
+ * @param {string|null} namespace - Namespace filter (null = no filter)
  * @returns {object|null} The memory row, or null if not found
  */
-export function getMemoryById(id) {
-  const memory = stmts.getById.get(id);
+export function getMemoryById(id, namespace = null) {
+  const memory = namespace
+    ? stmts.getByIdNs.get(id, namespace)
+    : stmts.getById.get(id);
   if (memory) {
     memory.provenance = getProvenance(id);
   }
@@ -480,9 +521,13 @@ export function deleteMemory(id) {
 /**
  * Get the N most recently created memories.
+ * @param {number} limit - Max results
+ * @param {string|null} namespace - Namespace filter (null = all)
  */
-export function getRecentMemories(limit = 10) {
-  const rows = stmts.getRecent.all(limit);
+export function getRecentMemories(limit = 10, namespace = null) {
+  const rows = namespace
+    ? stmts.getRecentNs.all(namespace, limit)
+    : stmts.getRecent.all(limit);
   rows.forEach(r => {
     r.provenance = getProvenance(r.id);
   });
@@ -491,9 +536,13 @@ export function getRecentMemories(limit = 10) {
 /**
  * Get the N most important memories (by importance_score).
+ * @param {number} limit - Max results
+ * @param {string|null} namespace - Namespace filter (null = all)
  */
-export function getImportantMemories(limit = 10) {
-  const rows = stmts.getImportant.all(limit);
+export function getImportantMemories(limit = 10, namespace = null) {
+  const rows = namespace
+    ? stmts.getImportantNs.all(namespace, limit)
+    : stmts.getImportant.all(limit);
   rows.forEach(r => {
     r.provenance = getProvenance(r.id);
   });
@@ -620,9 +669,13 @@ export function getMemoriesByEntity(entityId) {
  * Check if a memory with exact content already exists.
  * Used for deduplication.
  * @param {string} content - Exact content to match
+ * @param {string|null} namespace - Namespace filter (null = global dedup)
  * @returns {boolean}
  */
-export function memoryExists(content) {
+export function memoryExists(content, namespace = null) {
+  if (namespace) {
+    return stmts.findMemoryByContentNs.get(content, namespace) !== undefined;
+  }
   return stmts.findMemoryByContent.get(content) !== undefined;
 }
@@ -638,12 +691,24 @@ export function memoryExistsByHashPrefix(pattern) {
 /**
  * Get count of active (non-archived) memories.
+ * @param {string|null} namespace - Namespace filter (null = all)
  * @returns {number}
  */
-export function getActiveMemoryCount() {
+export function getActiveMemoryCount(namespace = null) {
+  if (namespace) {
+    return stmts.getActiveMemoryCountNs.get(namespace).count;
+  }
   return stmts.getActiveMemoryCount.get().count;
 }
+/**
+ * Get namespace breakdown stats.
+ * @returns {Array<{namespace: string, count: number}>}
+ */
+export function getNamespaceStats() {
+  return stmts.getNamespaceStats.all();
+}
 // ============================================================
 // DEDUPLICATION BY EXACT CONTENT
 // ============================================================
@@ -651,10 +716,13 @@ export function getActiveMemoryCount() {
 /**
  * Find memory by exact content.
  * @param {string} content
+ * @param {string|null} namespace - Namespace filter (null = global)
  * @returns {object|null} The memory row, or null if not found
  */
-export function getMemoryByContent(content) {
-  const row = stmts.findMemoryByContent.get(content);
+export function getMemoryByContent(content, namespace = null) {
+  const row = namespace
+    ? stmts.findMemoryByContentNs.get(content, namespace)
+    : stmts.findMemoryByContent.get(content);
   return row ? getMemoryById(row.id) : null;
 }

package/src/extractor-heuristic.js ADDED Viewed

@@ -0,0 +1,250 @@
+/**
+ * extractor-heuristic.js — Tier 2: Zero-Cost Regex-Based Fact Extractor
+ *
+ * Scans raw conversation text for explicit developer preference signals:
+ *   "I prefer...", "we decided...", "always use...", "stack includes..."
+ *
+ * Design decisions:
+ *   - Runs synchronously — zero latency overhead on the hot path
+ *   - Conservative extraction: high-precision, low-recall
+ *   - Returns structured facts with confidence scores (0.0 - 1.0)
+ *   - Deduplication-ready: facts are normalized before output
+ *
+ * This is NOT the primary extraction tier. It's a lightweight safety net
+ * that catches the most obvious signals when Tier 3 (LLM) is unavailable
+ * or still processing asynchronously.
+ */
+// ============================================================
+// PATTERN DEFINITIONS
+// Ordered by specificity — most specific patterns first
+// Each pattern has: regex, category, confidence, and a template
+// to normalize the matched text into a clean fact statement.
+// ============================================================
+const PATTERNS = [
+  // --- Decision patterns (highest confidence) ---
+  {
+    regex: /(?:we|i|the team)\s+(?:have\s+)?decided\s+(?:to\s+)?(?:use|go\s+with|adopt|switch\s+to|move\s+to)\s+(.+?)(?:\.|$)/gi,
+    category: 'decision',
+    confidence: 0.85,
+    template: (match) => `Decision: ${cleanFact(match[1])}`
+  },
+  {
+    regex: /(?:we(?:'re|\s+are)?\s+)?(?:going|moving)\s+(?:to\s+)?(?:use|adopt|switch\s+to|migrate\s+to)\s+(.+?)(?:\s+(?:for|because|since|as)\b|\.|$)/gi,
+    category: 'decision',
+    confidence: 0.80,
+    template: (match) => `Decision: Moving to ${cleanFact(match[1])}`
+  },
+  // --- Explicit preference patterns ---
+  {
+    regex: /i\s+(?:always\s+)?prefer\s+(.+?)(?:\s+(?:over|instead\s+of|rather\s+than)\s+(.+?))?(?:\.|$)/gi,
+    category: 'preference',
+    confidence: 0.80,
+    template: (match) => {
+      const pref = cleanFact(match[1]);
+      const alt = match[2] ? ` over ${cleanFact(match[2])}` : '';
+      return `Preference: ${pref}${alt}`;
+    }
+  },
+  {
+    regex: /(?:we|i)\s+(?:should\s+)?(?:always|never)\s+(?:use|avoid|include|add|write|create)\s+(.+?)(?:\.|$)/gi,
+    category: 'preference',
+    confidence: 0.75,
+    template: (match) => `Rule: ${cleanFact(match[0])}`
+  },
+  // --- Stack / technology patterns ---
+  {
+    regex: /(?:our|the|my)\s+(?:tech\s+)?stack\s+(?:includes?|uses?|is|has)\s+(.+?)(?:\.\s|\.$|$)/gim,
+    category: 'stack',
+    confidence: 0.85,
+    template: (match) => `Stack: ${cleanFact(match[1])}`
+  },
+  {
+    regex: /(?:we(?:'re|\s+are)?\s+)?using\s+(.+?)\s+(?:for|as)\s+(?:our|the)\s+(.+?)(?:\.|$)/gi,
+    category: 'stack',
+    confidence: 0.80,
+    template: (match) => `Stack: Using ${cleanFact(match[1])} for ${cleanFact(match[2])}`
+  },
+  {
+    regex: /(?:our|the)\s+(?:backend|frontend|database|api|server|client|infra(?:structure)?)\s+(?:is|uses?|runs?\s+on)\s+(.+?)(?:\.|$)/gi,
+    category: 'stack',
+    confidence: 0.80,
+    template: (match) => `Stack: ${cleanFact(match[0])}`
+  },
+  // --- Naming / convention patterns ---
+  {
+    regex: /(?:name|call|rename)\s+(?:it|this|the\s+\w+)\s+["'`]?(\w[\w\-\.]+)["'`]?/gi,
+    category: 'naming',
+    confidence: 0.70,
+    template: (match) => `Naming: ${cleanFact(match[0])}`
+  },
+  // --- Architecture patterns ---
+  {
+    regex: /(?:the\s+)?(?:project|app|application|system|architecture)\s+(?:follows?|uses?|is\s+based\s+on|implements?)\s+(.+?)(?:\s+pattern|\s+architecture)?(?:\.|$)/gi,
+    category: 'architecture',
+    confidence: 0.80,
+    template: (match) => `Architecture: ${cleanFact(match[1])}`
+  },
+  // --- Coding rule / style patterns ---
+  {
+    regex: /(?:always|never|must|should|don't|do\s+not)\s+(?:use|write|create|add|include|put|place|keep)\s+(.+?)(?:\.|$)/gi,
+    category: 'rule',
+    confidence: 0.70,
+    template: (match) => `Rule: ${cleanFact(match[0])}`
+  },
+  // --- Config / env patterns ---
+  {
+    regex: /(?:set|change|update|configure)\s+(?:the\s+)?(?:port|host|env|environment|config|setting)\s+(?:to|=|:)\s*["'`]?(.+?)["'`]?(?:\.|$)/gi,
+    category: 'config',
+    confidence: 0.75,
+    template: (match) => `Config: ${cleanFact(match[0])}`
+  }
+];
+// ============================================================
+// NOISE FILTERS
+// Skip lines that look like code, errors, or system output
+// ============================================================
+const NOISE_PATTERNS = [
+  /^[\s]*(?:import|export|const|let|var|function|class|if|else|for|while|return|throw|try|catch)\s/,
+  /^[\s]*[{}\[\]();]/,
+  /^[\s]*\/\//,
+  /^[\s]*\*/,
+  /^[\s]*```/,
+  /^\s*$/,
+  /^(?:error|warning|info|debug|trace):/i,
+  /^\s*at\s+\w+/,           // stack trace lines
+  /^[A-Z_]{2,}=/,           // ENV variable assignments
+  /^\d{4}-\d{2}-\d{2}/,     // timestamp lines
+];
+/**
+ * Check if a line looks like noise (code, logs, etc.)
+ * @param {string} line
+ * @returns {boolean}
+ */
+function isNoiseLine(line) {
+  return NOISE_PATTERNS.some(p => p.test(line));
+}
+// ============================================================
+// FACT NORMALIZATION
+// ============================================================
+/**
+ * Clean and normalize an extracted fact string.
+ * Removes trailing punctuation, excess whitespace, and truncates.
+ * @param {string} raw
+ * @returns {string}
+ */
+function cleanFact(raw) {
+  if (!raw) return '';
+  return raw
+    .trim()
+    .replace(/[\s]+/g, ' ')        // collapse whitespace
+    .replace(/[,;:]+$/, '')        // strip trailing punctuation
+    .replace(/^["'`]+|["'`]+$/g, '') // strip quotes
+    .slice(0, 200);                // hard max fact length
+}
+// ============================================================
+// MAIN EXTRACTION FUNCTION
+// ============================================================
+/**
+ * Extract facts from raw conversation text using regex heuristics.
+ *
+ * @param {string} text - Raw conversation text (user prompt or full turn)
+ * @param {Object} [options={}]
+ * @param {number} [options.minConfidence=0.65] - Minimum confidence to include a fact
+ * @param {number} [options.maxFacts=10] - Maximum facts to extract per call
+ * @returns {Array<{content: string, category: string, confidence: number}>}
+ *
+ * @example
+ *   const facts = extractHeuristic("I prefer Postgres over SQLite for our backend database.");
+ *   // => [{ content: "Preference: Postgres over SQLite", category: "preference", confidence: 0.80 }]
+ */
+export function extractHeuristic(text, options = {}) {
+  const {
+    minConfidence = 0.65,
+    maxFacts = 10
+  } = options;
+  if (!text || typeof text !== 'string' || text.length < 10) {
+    return [];
+  }
+  const facts = [];
+  const seen = new Set(); // dedup by normalized content
+  // Process line-by-line to filter noise
+  const lines = text.split('\n');
+  const cleanLines = lines.filter(line => !isNoiseLine(line));
+  const cleanText = cleanLines.join('\n');
+  for (const pattern of PATTERNS) {
+    // Reset regex state for global matching
+    pattern.regex.lastIndex = 0;
+    let match;
+    while ((match = pattern.regex.exec(cleanText)) !== null) {
+      // Skip matches that are too short to be meaningful
+      if (match[0].length < 8) continue;
+      try {
+        const content = pattern.template(match);
+        if (!content || content.length < 5) continue;
+        // Normalize for dedup
+        const key = content.toLowerCase().replace(/\s+/g, ' ').trim();
+        if (seen.has(key)) continue;
+        seen.add(key);
+        if (pattern.confidence >= minConfidence) {
+          facts.push({
+            content,
+            category: pattern.category,
+            confidence: pattern.confidence
+          });
+        }
+        if (facts.length >= maxFacts) break;
+      } catch (_) {
+        // Template execution failed — skip this match
+        continue;
+      }
+    }
+    if (facts.length >= maxFacts) break;
+  }
+  // Sort by confidence descending
+  facts.sort((a, b) => b.confidence - a.confidence);
+  return facts;
+}
+/**
+ * Quick check: does this text contain any extractable signals?
+ * Cheaper than running full extraction — use as a gate.
+ *
+ * @param {string} text
+ * @returns {boolean}
+ */
+export function hasExtractableSignals(text) {
+  if (!text || text.length < 10) return false;
+  for (const pattern of PATTERNS) {
+    pattern.regex.lastIndex = 0;
+    if (pattern.regex.test(text)) return true;
+  }
+  return false;
+}

package/src/search.js CHANGED Viewed

@@ -19,6 +19,8 @@ import { generateEmbedding } from './embeddings.js';
 import { createAttestation } from './attestation.js';
 import { searchCache, LRUCache } from './cache.js';
+let lastDataVersion = 0;
 /**
  * Search memories using both keyword and semantic strategies.
  * Results are cached in the LRU cache for repeated queries.
@@ -29,9 +31,21 @@ import { searchCache, LRUCache } from './cache.js';
  * @param {string|null} sessionId - Session identifier
  * @returns {Promise<Array>} Ranked search results (with .attestation property attached)
  */
-export async function searchHybrid(queryText, limit = 5, agentId = null, sessionId = null) {
+export async function searchHybrid(queryText, limit = 5, agentId = null, sessionId = null, namespace = null) {
+  // Sync in-memory cache with external DB changes using sqlite data_version
+  try {
+    const currentDataVersion = db.pragma('data_version', { simple: true });
+    if (currentDataVersion !== lastDataVersion) {
+      searchCache.invalidate();
+      lastDataVersion = currentDataVersion;
+    }
+  } catch (_) {
+    // Fallback if pragma fails
+  }
   // --- Check LRU cache first (Feature 1) ---
-  const cacheKey = LRUCache.key(queryText, limit);
+  // Include namespace in cache key to prevent cross-namespace cache hits
+  const cacheKey = LRUCache.key(`${namespace || 'all'}:${queryText}`, limit);
   const cached = searchCache.get(cacheKey);
   if (cached) {
     console.error(`[persyst-cache] Cache HIT for query: "${queryText.slice(0, 50)}..."`);
@@ -80,11 +94,12 @@ export async function searchHybrid(queryText, limit = 5, agentId = null, session
     }
   }
-  // --- Step 4: Fetch full details, apply reputation adjust, sort and return top N ---
+  // --- Step 4: Fetch full details, apply namespace filter, reputation adjust, sort and return top N ---
   const finalResults = combined
     .map(r => {
-      const memory = getMemoryById(r.id);
-      if (!memory) return null; // Memory was archived or deleted
+      // Use namespace-aware getMemoryById to filter by agent namespace
+      const memory = getMemoryById(r.id, namespace);
+      if (!memory) return null; // Memory was archived, deleted, or not in namespace
       // Boost memory access metrics
       boostMemory(r.id);
@@ -223,9 +238,9 @@ function jaccardSimilarity(a, b) {
  * @param {string|null} agentId - Querying agent identifier
  * @param {string|null} sessionId - Current session ID
  */
-export async function getOptimizedContext(queryText, maxTokens, agentId = null, sessionId = null) {
-  // 1. Run hybrid search to fetch top 20 memories
-  const searchHits = await searchHybrid(queryText, 20, agentId, sessionId);
+export async function getOptimizedContext(queryText, maxTokens, agentId = null, sessionId = null, namespace = null) {
+  // 1. Run hybrid search to fetch top 20 memories (namespace-aware)
+  const searchHits = await searchHybrid(queryText, 20, agentId, sessionId, namespace);
   const candidates = new Map();
   for (const hit of searchHits) {
@@ -343,8 +358,14 @@ export async function getOptimizedContext(queryText, maxTokens, agentId = null,
  * Performs memory consolidation by merging highly similar memories.
  * Bug 6 fix: DB mutations are wrapped in a transaction for atomicity.
  */
-export async function consolidateMemories() {
-  const activeMemories = db.prepare('SELECT * FROM memories WHERE valid_until IS NULL').all();
+export async function consolidateMemories(namespace = null) {
+  // Only consolidate within namespace boundaries to prevent cross-agent merging
+  const query = namespace
+    ? "SELECT * FROM memories WHERE valid_until IS NULL AND (namespace = ? OR namespace = 'shared')"
+    : 'SELECT * FROM memories WHERE valid_until IS NULL';
+  const activeMemories = namespace
+    ? db.prepare(query).all(namespace)
+    : db.prepare(query).all();
   const consolidated = [];
   const visited = new Set();

package/src/server.js CHANGED Viewed

@@ -23,7 +23,7 @@ export async function startServer() {
   // --- Create MCP server ---
   const server = new McpServer({
     name: 'persyst',
-    version: '2.1.0'
+    version: '2.1.2'
   });
   // --- Register all tools ---