persyst-mcp 2.1.1 → 2.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,250 @@
1
+ /**
2
+ * extractor-heuristic.js — Tier 2: Zero-Cost Regex-Based Fact Extractor
3
+ *
4
+ * Scans raw conversation text for explicit developer preference signals:
5
+ * "I prefer...", "we decided...", "always use...", "stack includes..."
6
+ *
7
+ * Design decisions:
8
+ * - Runs synchronously — zero latency overhead on the hot path
9
+ * - Conservative extraction: high-precision, low-recall
10
+ * - Returns structured facts with confidence scores (0.0 - 1.0)
11
+ * - Deduplication-ready: facts are normalized before output
12
+ *
13
+ * This is NOT the primary extraction tier. It's a lightweight safety net
14
+ * that catches the most obvious signals when Tier 3 (LLM) is unavailable
15
+ * or still processing asynchronously.
16
+ */
17
+
18
+ // ============================================================
19
+ // PATTERN DEFINITIONS
20
+ // Ordered by specificity — most specific patterns first
21
+ // Each pattern has: regex, category, confidence, and a template
22
+ // to normalize the matched text into a clean fact statement.
23
+ // ============================================================
24
+
25
+ const PATTERNS = [
26
+ // --- Decision patterns (highest confidence) ---
27
+ {
28
+ regex: /(?:we|i|the team)\s+(?:have\s+)?decided\s+(?:to\s+)?(?:use|go\s+with|adopt|switch\s+to|move\s+to)\s+(.+?)(?:\.|$)/gi,
29
+ category: 'decision',
30
+ confidence: 0.85,
31
+ template: (match) => `Decision: ${cleanFact(match[1])}`
32
+ },
33
+ {
34
+ regex: /(?:we(?:'re|\s+are)?\s+)?(?:going|moving)\s+(?:to\s+)?(?:use|adopt|switch\s+to|migrate\s+to)\s+(.+?)(?:\s+(?:for|because|since|as)\b|\.|$)/gi,
35
+ category: 'decision',
36
+ confidence: 0.80,
37
+ template: (match) => `Decision: Moving to ${cleanFact(match[1])}`
38
+ },
39
+
40
+ // --- Explicit preference patterns ---
41
+ {
42
+ regex: /i\s+(?:always\s+)?prefer\s+(.+?)(?:\s+(?:over|instead\s+of|rather\s+than)\s+(.+?))?(?:\.|$)/gi,
43
+ category: 'preference',
44
+ confidence: 0.80,
45
+ template: (match) => {
46
+ const pref = cleanFact(match[1]);
47
+ const alt = match[2] ? ` over ${cleanFact(match[2])}` : '';
48
+ return `Preference: ${pref}${alt}`;
49
+ }
50
+ },
51
+ {
52
+ regex: /(?:we|i)\s+(?:should\s+)?(?:always|never)\s+(?:use|avoid|include|add|write|create)\s+(.+?)(?:\.|$)/gi,
53
+ category: 'preference',
54
+ confidence: 0.75,
55
+ template: (match) => `Rule: ${cleanFact(match[0])}`
56
+ },
57
+
58
+ // --- Stack / technology patterns ---
59
+ {
60
+ regex: /(?:our|the|my)\s+(?:tech\s+)?stack\s+(?:includes?|uses?|is|has)\s+(.+?)(?:\.\s|\.$|$)/gim,
61
+ category: 'stack',
62
+ confidence: 0.85,
63
+ template: (match) => `Stack: ${cleanFact(match[1])}`
64
+ },
65
+ {
66
+ regex: /(?:we(?:'re|\s+are)?\s+)?using\s+(.+?)\s+(?:for|as)\s+(?:our|the)\s+(.+?)(?:\.|$)/gi,
67
+ category: 'stack',
68
+ confidence: 0.80,
69
+ template: (match) => `Stack: Using ${cleanFact(match[1])} for ${cleanFact(match[2])}`
70
+ },
71
+ {
72
+ regex: /(?:our|the)\s+(?:backend|frontend|database|api|server|client|infra(?:structure)?)\s+(?:is|uses?|runs?\s+on)\s+(.+?)(?:\.|$)/gi,
73
+ category: 'stack',
74
+ confidence: 0.80,
75
+ template: (match) => `Stack: ${cleanFact(match[0])}`
76
+ },
77
+
78
+ // --- Naming / convention patterns ---
79
+ {
80
+ regex: /(?:name|call|rename)\s+(?:it|this|the\s+\w+)\s+["'`]?(\w[\w\-\.]+)["'`]?/gi,
81
+ category: 'naming',
82
+ confidence: 0.70,
83
+ template: (match) => `Naming: ${cleanFact(match[0])}`
84
+ },
85
+
86
+ // --- Architecture patterns ---
87
+ {
88
+ regex: /(?:the\s+)?(?:project|app|application|system|architecture)\s+(?:follows?|uses?|is\s+based\s+on|implements?)\s+(.+?)(?:\s+pattern|\s+architecture)?(?:\.|$)/gi,
89
+ category: 'architecture',
90
+ confidence: 0.80,
91
+ template: (match) => `Architecture: ${cleanFact(match[1])}`
92
+ },
93
+
94
+ // --- Coding rule / style patterns ---
95
+ {
96
+ regex: /(?:always|never|must|should|don't|do\s+not)\s+(?:use|write|create|add|include|put|place|keep)\s+(.+?)(?:\.|$)/gi,
97
+ category: 'rule',
98
+ confidence: 0.70,
99
+ template: (match) => `Rule: ${cleanFact(match[0])}`
100
+ },
101
+
102
+ // --- Config / env patterns ---
103
+ {
104
+ regex: /(?:set|change|update|configure)\s+(?:the\s+)?(?:port|host|env|environment|config|setting)\s+(?:to|=|:)\s*["'`]?(.+?)["'`]?(?:\.|$)/gi,
105
+ category: 'config',
106
+ confidence: 0.75,
107
+ template: (match) => `Config: ${cleanFact(match[0])}`
108
+ }
109
+ ];
110
+
111
+ // ============================================================
112
+ // NOISE FILTERS
113
+ // Skip lines that look like code, errors, or system output
114
+ // ============================================================
115
+
116
+ const NOISE_PATTERNS = [
117
+ /^[\s]*(?:import|export|const|let|var|function|class|if|else|for|while|return|throw|try|catch)\s/,
118
+ /^[\s]*[{}\[\]();]/,
119
+ /^[\s]*\/\//,
120
+ /^[\s]*\*/,
121
+ /^[\s]*```/,
122
+ /^\s*$/,
123
+ /^(?:error|warning|info|debug|trace):/i,
124
+ /^\s*at\s+\w+/, // stack trace lines
125
+ /^[A-Z_]{2,}=/, // ENV variable assignments
126
+ /^\d{4}-\d{2}-\d{2}/, // timestamp lines
127
+ ];
128
+
129
+ /**
130
+ * Check if a line looks like noise (code, logs, etc.)
131
+ * @param {string} line
132
+ * @returns {boolean}
133
+ */
134
+ function isNoiseLine(line) {
135
+ return NOISE_PATTERNS.some(p => p.test(line));
136
+ }
137
+
138
+ // ============================================================
139
+ // FACT NORMALIZATION
140
+ // ============================================================
141
+
142
+ /**
143
+ * Clean and normalize an extracted fact string.
144
+ * Removes trailing punctuation, excess whitespace, and truncates.
145
+ * @param {string} raw
146
+ * @returns {string}
147
+ */
148
+ function cleanFact(raw) {
149
+ if (!raw) return '';
150
+ return raw
151
+ .trim()
152
+ .replace(/[\s]+/g, ' ') // collapse whitespace
153
+ .replace(/[,;:]+$/, '') // strip trailing punctuation
154
+ .replace(/^["'`]+|["'`]+$/g, '') // strip quotes
155
+ .slice(0, 200); // hard max fact length
156
+ }
157
+
158
+ // ============================================================
159
+ // MAIN EXTRACTION FUNCTION
160
+ // ============================================================
161
+
162
+ /**
163
+ * Extract facts from raw conversation text using regex heuristics.
164
+ *
165
+ * @param {string} text - Raw conversation text (user prompt or full turn)
166
+ * @param {Object} [options={}]
167
+ * @param {number} [options.minConfidence=0.65] - Minimum confidence to include a fact
168
+ * @param {number} [options.maxFacts=10] - Maximum facts to extract per call
169
+ * @returns {Array<{content: string, category: string, confidence: number}>}
170
+ *
171
+ * @example
172
+ * const facts = extractHeuristic("I prefer Postgres over SQLite for our backend database.");
173
+ * // => [{ content: "Preference: Postgres over SQLite", category: "preference", confidence: 0.80 }]
174
+ */
175
+ export function extractHeuristic(text, options = {}) {
176
+ const {
177
+ minConfidence = 0.65,
178
+ maxFacts = 10
179
+ } = options;
180
+
181
+ if (!text || typeof text !== 'string' || text.length < 10) {
182
+ return [];
183
+ }
184
+
185
+ const facts = [];
186
+ const seen = new Set(); // dedup by normalized content
187
+
188
+ // Process line-by-line to filter noise
189
+ const lines = text.split('\n');
190
+ const cleanLines = lines.filter(line => !isNoiseLine(line));
191
+ const cleanText = cleanLines.join('\n');
192
+
193
+ for (const pattern of PATTERNS) {
194
+ // Reset regex state for global matching
195
+ pattern.regex.lastIndex = 0;
196
+
197
+ let match;
198
+ while ((match = pattern.regex.exec(cleanText)) !== null) {
199
+ // Skip matches that are too short to be meaningful
200
+ if (match[0].length < 8) continue;
201
+
202
+ try {
203
+ const content = pattern.template(match);
204
+ if (!content || content.length < 5) continue;
205
+
206
+ // Normalize for dedup
207
+ const key = content.toLowerCase().replace(/\s+/g, ' ').trim();
208
+ if (seen.has(key)) continue;
209
+ seen.add(key);
210
+
211
+ if (pattern.confidence >= minConfidence) {
212
+ facts.push({
213
+ content,
214
+ category: pattern.category,
215
+ confidence: pattern.confidence
216
+ });
217
+ }
218
+
219
+ if (facts.length >= maxFacts) break;
220
+ } catch (_) {
221
+ // Template execution failed — skip this match
222
+ continue;
223
+ }
224
+ }
225
+
226
+ if (facts.length >= maxFacts) break;
227
+ }
228
+
229
+ // Sort by confidence descending
230
+ facts.sort((a, b) => b.confidence - a.confidence);
231
+
232
+ return facts;
233
+ }
234
+
235
+ /**
236
+ * Quick check: does this text contain any extractable signals?
237
+ * Cheaper than running full extraction — use as a gate.
238
+ *
239
+ * @param {string} text
240
+ * @returns {boolean}
241
+ */
242
+ export function hasExtractableSignals(text) {
243
+ if (!text || text.length < 10) return false;
244
+
245
+ for (const pattern of PATTERNS) {
246
+ pattern.regex.lastIndex = 0;
247
+ if (pattern.regex.test(text)) return true;
248
+ }
249
+ return false;
250
+ }
package/src/search.js CHANGED
@@ -31,7 +31,7 @@ let lastDataVersion = 0;
31
31
  * @param {string|null} sessionId - Session identifier
32
32
  * @returns {Promise<Array>} Ranked search results (with .attestation property attached)
33
33
  */
34
- export async function searchHybrid(queryText, limit = 5, agentId = null, sessionId = null) {
34
+ export async function searchHybrid(queryText, limit = 5, agentId = null, sessionId = null, namespace = null) {
35
35
  // Sync in-memory cache with external DB changes using sqlite data_version
36
36
  try {
37
37
  const currentDataVersion = db.pragma('data_version', { simple: true });
@@ -44,7 +44,8 @@ export async function searchHybrid(queryText, limit = 5, agentId = null, session
44
44
  }
45
45
 
46
46
  // --- Check LRU cache first (Feature 1) ---
47
- const cacheKey = LRUCache.key(queryText, limit);
47
+ // Include namespace in cache key to prevent cross-namespace cache hits
48
+ const cacheKey = LRUCache.key(`${namespace || 'all'}:${queryText}`, limit);
48
49
  const cached = searchCache.get(cacheKey);
49
50
  if (cached) {
50
51
  console.error(`[persyst-cache] Cache HIT for query: "${queryText.slice(0, 50)}..."`);
@@ -93,11 +94,12 @@ export async function searchHybrid(queryText, limit = 5, agentId = null, session
93
94
  }
94
95
  }
95
96
 
96
- // --- Step 4: Fetch full details, apply reputation adjust, sort and return top N ---
97
+ // --- Step 4: Fetch full details, apply namespace filter, reputation adjust, sort and return top N ---
97
98
  const finalResults = combined
98
99
  .map(r => {
99
- const memory = getMemoryById(r.id);
100
- if (!memory) return null; // Memory was archived or deleted
100
+ // Use namespace-aware getMemoryById to filter by agent namespace
101
+ const memory = getMemoryById(r.id, namespace);
102
+ if (!memory) return null; // Memory was archived, deleted, or not in namespace
101
103
 
102
104
  // Boost memory access metrics
103
105
  boostMemory(r.id);
@@ -236,9 +238,9 @@ function jaccardSimilarity(a, b) {
236
238
  * @param {string|null} agentId - Querying agent identifier
237
239
  * @param {string|null} sessionId - Current session ID
238
240
  */
239
- export async function getOptimizedContext(queryText, maxTokens, agentId = null, sessionId = null) {
240
- // 1. Run hybrid search to fetch top 20 memories
241
- const searchHits = await searchHybrid(queryText, 20, agentId, sessionId);
241
+ export async function getOptimizedContext(queryText, maxTokens, agentId = null, sessionId = null, namespace = null) {
242
+ // 1. Run hybrid search to fetch top 20 memories (namespace-aware)
243
+ const searchHits = await searchHybrid(queryText, 20, agentId, sessionId, namespace);
242
244
  const candidates = new Map();
243
245
 
244
246
  for (const hit of searchHits) {
@@ -356,8 +358,14 @@ export async function getOptimizedContext(queryText, maxTokens, agentId = null,
356
358
  * Performs memory consolidation by merging highly similar memories.
357
359
  * Bug 6 fix: DB mutations are wrapped in a transaction for atomicity.
358
360
  */
359
- export async function consolidateMemories() {
360
- const activeMemories = db.prepare('SELECT * FROM memories WHERE valid_until IS NULL').all();
361
+ export async function consolidateMemories(namespace = null) {
362
+ // Only consolidate within namespace boundaries to prevent cross-agent merging
363
+ const query = namespace
364
+ ? "SELECT * FROM memories WHERE valid_until IS NULL AND (namespace = ? OR namespace = 'shared')"
365
+ : 'SELECT * FROM memories WHERE valid_until IS NULL';
366
+ const activeMemories = namespace
367
+ ? db.prepare(query).all(namespace)
368
+ : db.prepare(query).all();
361
369
  const consolidated = [];
362
370
  const visited = new Set();
363
371
 
package/src/server.js CHANGED
@@ -23,7 +23,7 @@ export async function startServer() {
23
23
  // --- Create MCP server ---
24
24
  const server = new McpServer({
25
25
  name: 'persyst',
26
- version: '2.1.1'
26
+ version: '2.1.2'
27
27
  });
28
28
 
29
29
  // --- Register all tools ---
package/src/tools.js CHANGED
@@ -39,7 +39,8 @@ import {
39
39
  getAnyMemoryById,
40
40
  searchVector,
41
41
  getMemoryById,
42
- getActiveMemoryCount
42
+ getActiveMemoryCount,
43
+ getNamespaceStats
43
44
  } from './database.js';
44
45
  import { searchHybrid, getOptimizedContext, consolidateMemories } from './search.js';
45
46
  import { getRecentCommits } from './git.js';
@@ -117,14 +118,15 @@ export function registerTools(server) {
117
118
  // 1. ADD MEMORY
118
119
  server.tool(
119
120
  'add_memory',
120
- 'Store a new memory. It will be searchable by both keywords and meaning.',
121
+ 'Store a new memory. It will be searchable by both keywords and meaning. Use shared=true to make it visible to all agents.',
121
122
  {
122
123
  content: z.string().describe('The memory content to store'),
123
124
  importance: z.number().min(0).max(1).default(1.0).describe('Importance score from 0 (low) to 1 (high)'),
124
- agent_id: z.string().optional().describe('Agent ID for provenance tracking'),
125
- session_id: z.string().optional().describe('Session ID')
125
+ agent_id: z.string().optional().describe('Agent ID for provenance tracking and namespace isolation'),
126
+ session_id: z.string().optional().describe('Session ID'),
127
+ shared: z.boolean().default(true).describe('If true, memory is visible to all agents. If false, only visible to this agent.')
126
128
  },
127
- async ({ content, importance, agent_id, session_id }) => {
129
+ async ({ content, importance, agent_id, session_id, shared }) => {
128
130
  try {
129
131
  // Bug 7 + Feature 4: Validate content size
130
132
  const validation = validateMemoryContent(content);
@@ -132,13 +134,17 @@ export function registerTools(server) {
132
134
  return text({ error: validation.error });
133
135
  }
134
136
 
135
- // Deduplication check
136
- const existing = getMemoryByContent(content);
137
+ // Derive namespace from agent_id and shared flag
138
+ const namespace = (shared || !agent_id) ? 'shared' : agent_id;
139
+
140
+ // Deduplication check (namespace-aware)
141
+ const existing = getMemoryByContent(content, namespace);
137
142
  if (existing) {
138
143
  boostMemory(existing.id);
139
144
  return text({
140
145
  success: true,
141
146
  id: existing.id,
147
+ namespace,
142
148
  message: `Memory #${existing.id} already exists. Boosted importance.`
143
149
  });
144
150
  }
@@ -147,7 +153,7 @@ export function registerTools(server) {
147
153
  source_type: agent_id ? 'agent' : 'manual',
148
154
  source_id: agent_id || null,
149
155
  confidence: 1.0
150
- });
156
+ }, namespace);
151
157
 
152
158
  const embedding = await generateEmbedding(content);
153
159
  insertVector(id, embedding);
@@ -165,7 +171,7 @@ export function registerTools(server) {
165
171
 
166
172
  const sim = Math.max(0, 1 - (hit.distance * hit.distance) / 2);
167
173
  if (sim > 0.75) {
168
- const existingMemory = getMemoryById(hitId);
174
+ const existingMemory = getMemoryById(hitId, namespace);
169
175
  if (!existingMemory) continue;
170
176
 
171
177
  // Check if content is substantially different (Jaccard distance > 0.5)
@@ -187,7 +193,7 @@ export function registerTools(server) {
187
193
  console.error(`[persyst] Contradiction detection error: ${e.message}`);
188
194
  }
189
195
 
190
- const result = { success: true, id, message: `Memory #${id} stored` };
196
+ const result = { success: true, id, namespace, message: `Memory #${id} stored` };
191
197
  if (contradictions.length > 0) {
192
198
  result.contradictions_detected = contradictions;
193
199
  result.message += `. Detected ${contradictions.length} contradiction(s) — older memories archived.`;
@@ -203,19 +209,22 @@ export function registerTools(server) {
203
209
  // 2. SEARCH MEMORIES
204
210
  server.tool(
205
211
  'search_memories',
206
- 'Search memories using hybrid keyword + semantic search with cryptographic attestation.',
212
+ 'Search memories using hybrid keyword + semantic search with cryptographic attestation. Results are filtered by agent namespace.',
207
213
  {
208
214
  query: z.string().describe('What to search for'),
209
215
  limit: z.number().default(5).describe('Max results (default: 5)'),
210
- agent_id: z.string().optional().describe('Agent ID calling this search'),
216
+ agent_id: z.string().optional().describe('Agent ID filters results to this agent\'s namespace + shared'),
211
217
  session_id: z.string().optional().describe('Session ID')
212
218
  },
213
219
  async ({ query, limit, agent_id, session_id }) => {
214
220
  try {
215
- const results = await searchHybrid(query, limit, agent_id, session_id);
221
+ // Derive namespace from agent_id (null = search all)
222
+ const namespace = agent_id || null;
223
+ const results = await searchHybrid(query, limit, agent_id, session_id, namespace);
216
224
  return text({
217
225
  results,
218
226
  count: results.length,
227
+ namespace: namespace || 'all',
219
228
  attestation: results.attestation
220
229
  });
221
230
  } catch (err) {
@@ -314,14 +323,16 @@ export function registerTools(server) {
314
323
  // 6. GET RECENT MEMORIES
315
324
  server.tool(
316
325
  'get_recent_memories',
317
- 'Get the most recently created memories, newest first.',
326
+ 'Get the most recently created memories, newest first. Filtered by agent namespace if agent_id is provided.',
318
327
  {
319
- limit: z.number().default(10).describe('How many to return (default: 10)')
328
+ limit: z.number().default(10).describe('How many to return (default: 10)'),
329
+ agent_id: z.string().optional().describe('Agent ID — filters to this agent\'s namespace + shared')
320
330
  },
321
- async ({ limit }) => {
331
+ async ({ limit, agent_id }) => {
322
332
  try {
323
- const memories = getRecentMemories(limit);
324
- return text({ memories, count: memories.length });
333
+ const namespace = agent_id || null;
334
+ const memories = getRecentMemories(limit, namespace);
335
+ return text({ memories, count: memories.length, namespace: namespace || 'all' });
325
336
  } catch (err) {
326
337
  return text({ error: err.message });
327
338
  }
@@ -331,14 +342,16 @@ export function registerTools(server) {
331
342
  // 7. GET IMPORTANT MEMORIES
332
343
  server.tool(
333
344
  'get_important_memories',
334
- 'Get memories ranked by importance score, highest first.',
345
+ 'Get memories ranked by importance score, highest first. Filtered by agent namespace if agent_id is provided.',
335
346
  {
336
- limit: z.number().default(10).describe('How many to return (default: 10)')
347
+ limit: z.number().default(10).describe('How many to return (default: 10)'),
348
+ agent_id: z.string().optional().describe('Agent ID — filters to this agent\'s namespace + shared')
337
349
  },
338
- async ({ limit }) => {
350
+ async ({ limit, agent_id }) => {
339
351
  try {
340
- const memories = getImportantMemories(limit);
341
- return text({ memories, count: memories.length });
352
+ const namespace = agent_id || null;
353
+ const memories = getImportantMemories(limit, namespace);
354
+ return text({ memories, count: memories.length, namespace: namespace || 'all' });
342
355
  } catch (err) {
343
356
  return text({ error: err.message });
344
357
  }
@@ -634,16 +647,17 @@ export function registerTools(server) {
634
647
  // 18. GET OPTIMIZED CONTEXT
635
648
  server.tool(
636
649
  'get_optimized_context',
637
- 'Compile a condensed context prompt within a token budget by hopping the knowledge graph and ranking by temporal decay + agent reputation.',
650
+ 'Compile a condensed context prompt within a token budget by hopping the knowledge graph and ranking by temporal decay + agent reputation. Results filtered by agent namespace.',
638
651
  {
639
652
  query: z.string().describe('The search query context'),
640
653
  max_tokens: z.number().default(4000).describe('Token budget for LLM context compression (default: 4000)'),
641
- agent_id: z.string().optional().describe('Agent ID requesting context'),
654
+ agent_id: z.string().optional().describe('Agent ID requesting context — filters to this agent\'s namespace + shared'),
642
655
  session_id: z.string().optional().describe('Session ID')
643
656
  },
644
657
  async ({ query, max_tokens, agent_id, session_id }) => {
645
658
  try {
646
- const contextData = await getOptimizedContext(query, max_tokens, agent_id, session_id);
659
+ const namespace = agent_id || null;
660
+ const contextData = await getOptimizedContext(query, max_tokens, agent_id, session_id, namespace);
647
661
  return text(contextData);
648
662
  } catch (err) {
649
663
  return text({ error: err.message });