persyst-mcp 1.1.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "persyst-mcp",
3
- "version": "1.1.0",
3
+ "version": "2.0.0",
4
4
  "description": "Local-first MCP memory server with hybrid keyword + semantic search for coding agents",
5
5
  "type": "module",
6
6
  "main": "index.js",
@@ -180,7 +180,6 @@ export function verifyChainIntegrity(attestationId) {
180
180
 
181
181
  // If there's a previous link, check it
182
182
  if (att.previous_hash) {
183
- const db = getLastAttestation(); // Wait, we can run raw query on DB or use prepared stmt
184
183
  const prevAtt = getAttestationByHash(att.previous_hash);
185
184
  if (!prevAtt) {
186
185
  return { valid: false, error: `Broken chain: Previous attestation with hash ${att.previous_hash} not found` };
package/src/cache.js ADDED
@@ -0,0 +1,122 @@
1
+ /**
2
+ * cache.js — LRU Query Result Cache
3
+ *
4
+ * In-memory LRU cache for search results to avoid
5
+ * re-computing embeddings for repeated queries.
6
+ *
7
+ * - Configurable max size (default: 100 entries)
8
+ * - Configurable TTL (default: 5 minutes)
9
+ * - Automatic eviction of oldest entries when full
10
+ * - Full invalidation on write operations
11
+ */
12
+
13
+ /**
14
+ * Simple LRU (Least Recently Used) cache with TTL support.
15
+ */
16
+ export class LRUCache {
17
+ /**
18
+ * @param {number} maxSize - Maximum number of entries (default: 100)
19
+ * @param {number} ttlMs - Time-to-live in milliseconds (default: 300000 = 5 min)
20
+ */
21
+ constructor(maxSize = 100, ttlMs = 300000) {
22
+ this.maxSize = maxSize;
23
+ this.ttlMs = ttlMs;
24
+ this.cache = new Map();
25
+ this.hits = 0;
26
+ this.misses = 0;
27
+ }
28
+
29
+ /**
30
+ * Generate a cache key from query parameters.
31
+ * @param {string} query - The search query
32
+ * @param {number} limit - The result limit
33
+ * @returns {string} Cache key
34
+ */
35
+ static key(query, limit) {
36
+ return `${query}::${limit}`;
37
+ }
38
+
39
+ /**
40
+ * Get a cached value if it exists and hasn't expired.
41
+ * Moves the entry to the "most recently used" position.
42
+ *
43
+ * @param {string} key - Cache key
44
+ * @returns {*|null} Cached value or null if miss/expired
45
+ */
46
+ get(key) {
47
+ const entry = this.cache.get(key);
48
+ if (!entry) {
49
+ this.misses++;
50
+ return null;
51
+ }
52
+
53
+ // Check TTL expiry
54
+ if (Date.now() - entry.timestamp > this.ttlMs) {
55
+ this.cache.delete(key);
56
+ this.misses++;
57
+ return null;
58
+ }
59
+
60
+ // Move to end (most recently used) by re-inserting
61
+ this.cache.delete(key);
62
+ this.cache.set(key, entry);
63
+ this.hits++;
64
+ return entry.value;
65
+ }
66
+
67
+ /**
68
+ * Store a value in the cache. Evicts oldest entry if at capacity.
69
+ *
70
+ * @param {string} key - Cache key
71
+ * @param {*} value - Value to cache
72
+ */
73
+ set(key, value) {
74
+ // If key already exists, delete it first (to update position)
75
+ if (this.cache.has(key)) {
76
+ this.cache.delete(key);
77
+ }
78
+
79
+ // Evict oldest (first) entry if at capacity
80
+ if (this.cache.size >= this.maxSize) {
81
+ const oldestKey = this.cache.keys().next().value;
82
+ this.cache.delete(oldestKey);
83
+ }
84
+
85
+ this.cache.set(key, {
86
+ value,
87
+ timestamp: Date.now()
88
+ });
89
+ }
90
+
91
+ /**
92
+ * Invalidate the entire cache. Called on write operations
93
+ * (add_memory, update_memory, delete_memory) to ensure
94
+ * search results are always fresh.
95
+ */
96
+ invalidate() {
97
+ const size = this.cache.size;
98
+ this.cache.clear();
99
+ if (size > 0) {
100
+ console.error(`[persyst-cache] Invalidated ${size} cached entries`);
101
+ }
102
+ }
103
+
104
+ /**
105
+ * Get cache statistics for monitoring.
106
+ * @returns {{ size: number, maxSize: number, ttlMs: number, hits: number, misses: number, hitRate: string }}
107
+ */
108
+ stats() {
109
+ const total = this.hits + this.misses;
110
+ return {
111
+ size: this.cache.size,
112
+ maxSize: this.maxSize,
113
+ ttlMs: this.ttlMs,
114
+ hits: this.hits,
115
+ misses: this.misses,
116
+ hitRate: total > 0 ? `${((this.hits / total) * 100).toFixed(1)}%` : '0%'
117
+ };
118
+ }
119
+ }
120
+
121
+ // Singleton instance for search results
122
+ export const searchCache = new LRUCache(100, 300000);
package/src/database.js CHANGED
@@ -33,6 +33,7 @@ const DB_PATH = process.env.NODE_ENV === 'test' ? ':memory:' : join(DB_DIR, 'per
33
33
  const db = new Database(DB_PATH);
34
34
  db.pragma('journal_mode = WAL'); // Better performance for concurrent reads
35
35
  db.pragma('foreign_keys = ON'); // Enforce referential integrity
36
+ db.pragma('mmap_size = 268435456'); // 256MB memory-mapped I/O for faster reads
36
37
 
37
38
  // Load sqlite-vec BEFORE creating any vec0 tables
38
39
  sqliteVec.load(db);
@@ -106,6 +107,11 @@ db.exec(`
106
107
  )
107
108
  `);
108
109
 
110
+ // --- Migration: add domain column to agent_stats ---
111
+ try {
112
+ db.exec('ALTER TABLE agent_stats ADD COLUMN domain TEXT DEFAULT "general"');
113
+ } catch (e) { /* Column already exists */ }
114
+
109
115
  // --- Attestations table ---
110
116
  db.exec(`
111
117
  CREATE TABLE IF NOT EXISTS attestations (
@@ -346,6 +352,24 @@ const stmts = {
346
352
  // -- Dedup --
347
353
  findMemoryByContent: db.prepare(
348
354
  'SELECT id FROM memories WHERE content = ? AND valid_until IS NULL LIMIT 1'
355
+ ),
356
+
357
+ // -- Hash-prefix lookup for git dedup (Bug 1 fix) --
358
+ findMemoryByHashPrefix: db.prepare(
359
+ 'SELECT id FROM memories WHERE content LIKE ? AND valid_until IS NULL LIMIT 1'
360
+ ),
361
+
362
+ // -- Active memory count --
363
+ getActiveMemoryCount: db.prepare(
364
+ 'SELECT COUNT(*) as count FROM memories WHERE valid_until IS NULL'
365
+ ),
366
+
367
+ // -- Memory History Chain (Feature 6: prepared statements) --
368
+ getContradictionAncestors: db.prepare(
369
+ 'SELECT old_memory_id FROM contradictions WHERE new_memory_id = ?'
370
+ ),
371
+ getContradictionDescendants: db.prepare(
372
+ 'SELECT new_memory_id FROM contradictions WHERE old_memory_id = ?'
349
373
  )
350
374
  };
351
375
 
@@ -593,13 +617,31 @@ export function getMemoriesByEntity(entityId) {
593
617
  }
594
618
 
595
619
  /**
596
- * Check if a memory with similar content already exists.
597
- * Used for deduplication during git ingestion.
598
- * @param {string} pattern - SQL LIKE pattern to match
620
+ * Check if a memory with exact content already exists.
621
+ * Used for deduplication.
622
+ * @param {string} content - Exact content to match
599
623
  * @returns {boolean}
600
624
  */
601
- export function memoryExists(pattern) {
602
- return stmts.findMemoryByContent.get(pattern) !== undefined;
625
+ export function memoryExists(content) {
626
+ return stmts.findMemoryByContent.get(content) !== undefined;
627
+ }
628
+
629
+ /**
630
+ * Check if a memory exists by hash prefix pattern (LIKE query).
631
+ * Used for git commit deduplication where we match `[hashPrefix]%`.
632
+ * @param {string} pattern - SQL LIKE pattern to match (e.g. '[abc1234]%')
633
+ * @returns {boolean}
634
+ */
635
+ export function memoryExistsByHashPrefix(pattern) {
636
+ return stmts.findMemoryByHashPrefix.get(pattern) !== undefined;
637
+ }
638
+
639
+ /**
640
+ * Get count of active (non-archived) memories.
641
+ * @returns {number}
642
+ */
643
+ export function getActiveMemoryCount() {
644
+ return stmts.getActiveMemoryCount.get().count;
603
645
  }
604
646
 
605
647
  // ============================================================
@@ -713,14 +755,14 @@ export function getMemoryHistoryChain(memoryId) {
713
755
  if (versions.has(currentId)) continue;
714
756
  versions.add(currentId);
715
757
 
716
- // Find ancestors (replaced by current)
717
- const ancestors = db.prepare('SELECT old_memory_id FROM contradictions WHERE new_memory_id = ?').all(currentId);
758
+ // Find ancestors (replaced by current) — using prepared statement
759
+ const ancestors = stmts.getContradictionAncestors.all(currentId);
718
760
  ancestors.forEach(a => {
719
761
  if (!versions.has(a.old_memory_id)) queue.push(a.old_memory_id);
720
762
  });
721
763
 
722
- // Find descendants (replaces current)
723
- const descendants = db.prepare('SELECT new_memory_id FROM contradictions WHERE old_memory_id = ?').all(currentId);
764
+ // Find descendants (replaces current) — using prepared statement
765
+ const descendants = stmts.getContradictionDescendants.all(currentId);
724
766
  descendants.forEach(d => {
725
767
  if (!versions.has(d.new_memory_id)) queue.push(d.new_memory_id);
726
768
  });
package/src/git.js CHANGED
@@ -3,30 +3,36 @@
3
3
  *
4
4
  * Reads git log from a repository and converts commits into memories.
5
5
  * Performs commit categorization, file diff analysis, and imports notes.
6
+ *
7
+ * IMPORTANT: Uses async execFile instead of execSync to avoid blocking
8
+ * the Node.js event loop during git operations (Bug 4 fix).
6
9
  */
7
10
 
8
- import { execSync } from 'child_process';
11
+ import { execFile } from 'child_process';
12
+ import { promisify } from 'util';
13
+
14
+ const execFileAsync = promisify(execFile);
9
15
 
10
16
  /**
11
17
  * Read the N most recent git commits from a repository.
12
18
  *
13
19
  * @param {string} repoPath - Absolute path to the git repo
14
20
  * @param {number} count - Number of commits to read (default: 20)
15
- * @returns {Array<{hash: string, message: string, author: string, date: string, fullText: string, files: string[], importance: number}>}
21
+ * @returns {Promise<Array<{hash: string, message: string, author: string, date: string, fullText: string, files: string[], importance: number}>>}
16
22
  */
17
- export function getRecentCommits(repoPath, count = 20) {
23
+ export async function getRecentCommits(repoPath, count = 20) {
18
24
  try {
19
25
  // Use a delimiter to split commits reliably
20
26
  const DELIM = '---PERSYST-COMMIT---';
21
27
  const format = `${DELIM}%n%H%n%an%n%ai%n%s%n%b`;
22
28
 
23
- const output = execSync(
24
- `git log -n ${count} --pretty=format:"${format}"`,
29
+ const { stdout: output } = await execFileAsync(
30
+ 'git',
31
+ ['log', `-n`, `${count}`, `--pretty=format:${format}`],
25
32
  {
26
33
  cwd: repoPath,
27
34
  encoding: 'utf-8',
28
35
  timeout: 10000, // 10s timeout
29
- stdio: ['pipe', 'pipe', 'pipe'] // Suppress stderr
30
36
  }
31
37
  );
32
38
 
@@ -45,7 +51,7 @@ export function getRecentCommits(repoPath, count = 20) {
45
51
  const body = lines.slice(4).join(' ').trim();
46
52
 
47
53
  // Fetch git notes if available (represents PR metadata)
48
- const notes = getGitNotes(repoPath, hash);
54
+ const notes = await getGitNotes(repoPath, hash);
49
55
 
50
56
  // Build a readable memory string
51
57
  let fullText = body
@@ -57,7 +63,7 @@ export function getRecentCommits(repoPath, count = 20) {
57
63
  }
58
64
 
59
65
  // Fetch files touched
60
- const files = getCommitFiles(repoPath, hash);
66
+ const files = await getCommitFiles(repoPath, hash);
61
67
 
62
68
  // Classify importance based on message
63
69
  const classification = classifyCommit(subject);
@@ -92,17 +98,17 @@ export function getRecentCommits(repoPath, count = 20) {
92
98
  *
93
99
  * @param {string} repoPath - Absolute path to the git repo
94
100
  * @param {string} hash - Full commit hash
95
- * @returns {string[]} List of changed file paths
101
+ * @returns {Promise<string[]>} List of changed file paths
96
102
  */
97
- export function getCommitFiles(repoPath, hash) {
103
+ export async function getCommitFiles(repoPath, hash) {
98
104
  try {
99
- const output = execSync(
100
- `git diff-tree --no-commit-id --name-only -r ${hash}`,
105
+ const { stdout: output } = await execFileAsync(
106
+ 'git',
107
+ ['diff-tree', '--no-commit-id', '--name-only', '-r', hash],
101
108
  {
102
109
  cwd: repoPath,
103
110
  encoding: 'utf-8',
104
111
  timeout: 5000,
105
- stdio: ['pipe', 'pipe', 'pipe']
106
112
  }
107
113
  );
108
114
  return output.trim().split('\n').filter(Boolean);
@@ -114,15 +120,15 @@ export function getCommitFiles(repoPath, hash) {
114
120
  /**
115
121
  * Fetch git notes (representing PR metadata or additional annotations).
116
122
  */
117
- export function getGitNotes(repoPath, hash) {
123
+ export async function getGitNotes(repoPath, hash) {
118
124
  try {
119
- const output = execSync(
120
- `git notes show ${hash}`,
125
+ const { stdout: output } = await execFileAsync(
126
+ 'git',
127
+ ['notes', 'show', hash],
121
128
  {
122
129
  cwd: repoPath,
123
130
  encoding: 'utf-8',
124
131
  timeout: 3000,
125
- stdio: ['pipe', 'pipe', 'pipe']
126
132
  }
127
133
  );
128
134
  return output.trim();
package/src/search.js CHANGED
@@ -3,7 +3,8 @@
3
3
  *
4
4
  * Combines keyword and semantic searches, integrates temporal decay,
5
5
  * applies agent reputation scores, generates cryptographic search attestations,
6
- * and builds graph-hopped optimized LLM context prompts.
6
+ * builds graph-hopped optimized LLM context prompts, and applies MMR
7
+ * for diverse result retrieval.
7
8
  */
8
9
 
9
10
  import db, {
@@ -16,9 +17,11 @@ import db, {
16
17
  } from './database.js';
17
18
  import { generateEmbedding } from './embeddings.js';
18
19
  import { createAttestation } from './attestation.js';
20
+ import { searchCache, LRUCache } from './cache.js';
19
21
 
20
22
  /**
21
23
  * Search memories using both keyword and semantic strategies.
24
+ * Results are cached in the LRU cache for repeated queries.
22
25
  *
23
26
  * @param {string} queryText - What to search for
24
27
  * @param {number} limit - Max results to return (default: 5)
@@ -27,6 +30,14 @@ import { createAttestation } from './attestation.js';
27
30
  * @returns {Promise<Array>} Ranked search results (with .attestation property attached)
28
31
  */
29
32
  export async function searchHybrid(queryText, limit = 5, agentId = null, sessionId = null) {
33
+ // --- Check LRU cache first (Feature 1) ---
34
+ const cacheKey = LRUCache.key(queryText, limit);
35
+ const cached = searchCache.get(cacheKey);
36
+ if (cached) {
37
+ console.error(`[persyst-cache] Cache HIT for query: "${queryText.slice(0, 50)}..."`);
38
+ return cached;
39
+ }
40
+
30
41
  // --- Step 1: Keyword search (fast, exact matches) ---
31
42
  const keywordHits = searchKeyword(queryText, limit * 2);
32
43
  const keywordIds = new Set(keywordHits.map(r => r.id));
@@ -112,15 +123,96 @@ export async function searchHybrid(queryText, limit = 5, agentId = null, session
112
123
 
113
124
  // Sort by final score descending
114
125
  finalResults.sort((a, b) => parseFloat(b.hybrid_score) - parseFloat(a.hybrid_score));
115
- const topResults = finalResults.slice(0, limit);
126
+
127
+ // --- Step 5: Apply MMR for diverse retrieval (Feature 3) ---
128
+ const mmrResults = applyMMR(finalResults, limit);
116
129
 
117
130
  // Generate cryptographic attestation for audit trails
118
- const attestation = createAttestation(queryText, topResults, agentId, sessionId);
131
+ const attestation = createAttestation(queryText, mmrResults, agentId, sessionId);
119
132
 
120
133
  // Attach attestation object directly to the array to preserve compatibility with existing tests
121
- topResults.attestation = attestation;
134
+ mmrResults.attestation = attestation;
135
+
136
+ // --- Store in LRU cache (Feature 1) ---
137
+ searchCache.set(cacheKey, mmrResults);
138
+
139
+ return mmrResults;
140
+ }
141
+
142
+ /**
143
+ * Apply Maximal Marginal Relevance (MMR) re-ranking for diverse results.
144
+ *
145
+ * MMR balances relevance with diversity by penalizing candidates that
146
+ * are too similar to already-selected results.
147
+ *
148
+ * @param {Array} candidates - Scored search results
149
+ * @param {number} limit - Max results to return
150
+ * @param {number} lambda - Trade-off parameter (0.7 = 70% relevance, 30% diversity)
151
+ * @returns {Array} MMR-reranked results
152
+ */
153
+ function applyMMR(candidates, limit, lambda = 0.7) {
154
+ if (candidates.length <= limit) return candidates;
155
+
156
+ const selected = [];
157
+ const remaining = [...candidates];
158
+
159
+ // Always pick the top-scored result first
160
+ selected.push(remaining.shift());
161
+
162
+ while (selected.length < limit && remaining.length > 0) {
163
+ let bestIdx = -1;
164
+ let bestMMRScore = -Infinity;
165
+
166
+ for (let i = 0; i < remaining.length; i++) {
167
+ const candidate = remaining[i];
168
+ const relevance = parseFloat(candidate.hybrid_score);
169
+
170
+ // Calculate max similarity to any already-selected result
171
+ // Using content-based Jaccard similarity as a proxy
172
+ let maxSimToSelected = 0;
173
+ for (const sel of selected) {
174
+ const sim = jaccardSimilarity(candidate.content, sel.content);
175
+ if (sim > maxSimToSelected) maxSimToSelected = sim;
176
+ }
177
+
178
+ // MMR score = λ * relevance - (1 - λ) * max_similarity_to_selected
179
+ const mmrScore = lambda * relevance - (1 - lambda) * maxSimToSelected;
180
+
181
+ if (mmrScore > bestMMRScore) {
182
+ bestMMRScore = mmrScore;
183
+ bestIdx = i;
184
+ }
185
+ }
186
+
187
+ if (bestIdx >= 0) {
188
+ selected.push(remaining.splice(bestIdx, 1)[0]);
189
+ } else {
190
+ break;
191
+ }
192
+ }
122
193
 
123
- return topResults;
194
+ return selected;
195
+ }
196
+
197
+ /**
198
+ * Compute Jaccard similarity between two text strings.
199
+ * Uses word-level tokenization for efficiency.
200
+ *
201
+ * @param {string} a - First text
202
+ * @param {string} b - Second text
203
+ * @returns {number} Similarity score between 0 and 1
204
+ */
205
+ function jaccardSimilarity(a, b) {
206
+ const wordsA = new Set(a.toLowerCase().split(/\s+/));
207
+ const wordsB = new Set(b.toLowerCase().split(/\s+/));
208
+
209
+ let intersection = 0;
210
+ for (const word of wordsA) {
211
+ if (wordsB.has(word)) intersection++;
212
+ }
213
+
214
+ const union = wordsA.size + wordsB.size - intersection;
215
+ return union === 0 ? 0 : intersection / union;
124
216
  }
125
217
 
126
218
  /**
@@ -234,8 +326,11 @@ export async function getOptimizedContext(queryText, maxTokens, agentId = null,
234
326
  }
235
327
  context += '=== END OF CONTEXT ===';
236
328
 
237
- // Log retrieval attestation for this prompt generation
238
- const attestation = createAttestation(queryText, accepted, agentId, sessionId);
329
+ // Bug 8 fix: Skip attestation when no results to avoid audit noise
330
+ let attestation = null;
331
+ if (accepted.length > 0) {
332
+ attestation = createAttestation(queryText, accepted, agentId, sessionId);
333
+ }
239
334
 
240
335
  return {
241
336
  context,
@@ -246,12 +341,26 @@ export async function getOptimizedContext(queryText, maxTokens, agentId = null,
246
341
 
247
342
  /**
248
343
  * Performs memory consolidation by merging highly similar memories.
344
+ * Bug 6 fix: DB mutations are wrapped in a transaction for atomicity.
249
345
  */
250
346
  export async function consolidateMemories() {
251
347
  const activeMemories = db.prepare('SELECT * FROM memories WHERE valid_until IS NULL').all();
252
348
  const consolidated = [];
253
349
  const visited = new Set();
254
350
 
351
+ // Pre-compile the transaction for atomic DB operations (Bug 6 fix)
352
+ const archiveAndMerge = db.transaction((canonicalId, mergedContent, dupIds) => {
353
+ // Update canonical memory with merged content
354
+ db.prepare('UPDATE memories SET content = ?, last_accessed = unixepoch() WHERE id = ?').run(mergedContent, canonicalId);
355
+
356
+ // Archive duplicates
357
+ for (const dupId of dupIds) {
358
+ db.prepare('UPDATE memories SET valid_until = unixepoch() WHERE id = ?').run(dupId);
359
+ db.prepare('INSERT INTO contradictions (old_memory_id, new_memory_id, resolution_reason) VALUES (?, ?, ?)')
360
+ .run(dupId, canonicalId, `Consolidated into canonical memory #${canonicalId}`);
361
+ }
362
+ });
363
+
255
364
  for (const mem of activeMemories) {
256
365
  if (visited.has(mem.id)) continue;
257
366
 
@@ -295,20 +404,17 @@ export async function consolidateMemories() {
295
404
  const uniqueContents = Array.from(new Set(contents));
296
405
  const mergedContent = uniqueContents.join('. ').replace(/\.\./g, '.');
297
406
 
298
- // Update canonical memory with merged content
299
- db.prepare('UPDATE memories SET content = ?, last_accessed = unixepoch() WHERE id = ?').run(mergedContent, canonical.id);
300
-
301
- // Update vector embedding for canonical
407
+ // Generate new embedding OUTSIDE the transaction (async operation)
302
408
  const newEmbedding = await generateEmbedding(mergedContent);
409
+
410
+ // Run atomic DB transaction for all mutations (Bug 6 fix)
411
+ archiveAndMerge(canonical.id, mergedContent, dupesToArchive.map(d => d.id));
412
+
413
+ // Update vector embedding (also outside transaction since vec0 tables have their own handling)
303
414
  db.prepare('DELETE FROM memories_vec WHERE rowid = ?').run(canonical.id);
304
415
  db.prepare('INSERT INTO memories_vec (rowid, embedding) VALUES (?, ?)').run(BigInt(canonical.id), Buffer.from(newEmbedding.buffer));
305
416
 
306
- // Archive duplicates
307
417
  for (const dup of dupesToArchive) {
308
- db.prepare('UPDATE memories SET valid_until = unixepoch() WHERE id = ?').run(dup.id);
309
- db.prepare('INSERT INTO contradictions (old_memory_id, new_memory_id, resolution_reason) VALUES (?, ?, ?)')
310
- .run(dup.id, canonical.id, `Consolidated into canonical memory #${canonical.id}`);
311
-
312
418
  visited.add(dup.id);
313
419
  }
314
420
 
package/src/server.js CHANGED
@@ -11,7 +11,7 @@
11
11
 
12
12
  import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
13
13
  import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
14
- import { registerTools } from './tools.js';
14
+ import { registerTools, cleanupWatchers } from './tools.js';
15
15
  import { applyTemporalDecay, closeDatabase } from './database.js';
16
16
  import { consolidateMemories } from './search.js';
17
17
 
@@ -23,7 +23,7 @@ export async function startServer() {
23
23
  // --- Create MCP server ---
24
24
  const server = new McpServer({
25
25
  name: 'persyst',
26
- version: '1.1.0'
26
+ version: '2.0.0'
27
27
  });
28
28
 
29
29
  // --- Register all tools ---
@@ -46,11 +46,12 @@ export async function startServer() {
46
46
  }
47
47
  }, 86400000);
48
48
 
49
- // --- Graceful shutdown ---
49
+ // --- Graceful shutdown (Bug 3 fix: also cleans up git watchers) ---
50
50
  const shutdown = () => {
51
51
  console.error('[persyst] Shutting down...');
52
52
  clearInterval(decayTimer);
53
53
  clearInterval(consolidationTimer);
54
+ cleanupWatchers(); // Bug 3 fix: stop all git repo watchers
54
55
  closeDatabase();
55
56
  process.exit(0);
56
57
  };
package/src/tools.js CHANGED
@@ -2,6 +2,13 @@
2
2
  * tools.js — MCP Tool Definitions & Handlers
3
3
  *
4
4
  * Defines all 19 tools that AI agents can call via MCP.
5
+ *
6
+ * v2.0 changes:
7
+ * - Bug 1: Uses memoryExistsByHashPrefix for git dedup
8
+ * - Bug 3: Exports cleanupWatchers for graceful shutdown
9
+ * - Bug 7 + Feature 4: Memory content size validation
10
+ * - Feature 1: Cache invalidation on write operations
11
+ * - Feature 2: Contradiction detection on add_memory
5
12
  */
6
13
 
7
14
  import { z } from 'zod';
@@ -21,6 +28,7 @@ import {
21
28
  getMemoriesByEntity,
22
29
  getAllEntities,
23
30
  memoryExists,
31
+ memoryExistsByHashPrefix,
24
32
  getMemoryByContent,
25
33
  boostMemory,
26
34
  logContradiction,
@@ -28,15 +36,67 @@ import {
28
36
  getAttestationsByDateRange,
29
37
  getMemoryHistoryChain,
30
38
  searchAllMemoriesFts,
31
- getAnyMemoryById
39
+ getAnyMemoryById,
40
+ searchVector,
41
+ getMemoryById,
42
+ getActiveMemoryCount
32
43
  } from './database.js';
33
44
  import { searchHybrid, getOptimizedContext, consolidateMemories } from './search.js';
34
45
  import { getRecentCommits } from './git.js';
35
46
  import { verifyChainIntegrity } from './attestation.js';
47
+ import { searchCache } from './cache.js';
48
+
49
+ // ============================================================
50
+ // CONSTANTS
51
+ // ============================================================
52
+
53
+ /** Maximum allowed memory content length (50,000 characters) */
54
+ const MAX_MEMORY_CONTENT_LENGTH = 50000;
55
+
56
+ /** Minimum content length (must have actual content) */
57
+ const MIN_MEMORY_CONTENT_LENGTH = 1;
58
+
59
+ // ============================================================
60
+ // WATCHER REGISTRY
61
+ // ============================================================
36
62
 
37
63
  // In-memory registry of active git watchers
38
64
  const watchers = new Map();
39
65
 
66
+ /**
67
+ * Clean up all active git watchers. Called during graceful shutdown.
68
+ * (Bug 3 fix: prevents memory leak from orphaned setInterval handles)
69
+ */
70
+ export function cleanupWatchers() {
71
+ for (const [repoPath, intervalId] of watchers.entries()) {
72
+ clearInterval(intervalId);
73
+ console.error(`[persyst-watcher] Stopped watching: ${repoPath}`);
74
+ }
75
+ watchers.clear();
76
+ }
77
+
78
+ // ============================================================
79
+ // VALIDATION HELPERS
80
+ // ============================================================
81
+
82
+ /**
83
+ * Validate memory content for size and emptiness.
84
+ * @param {string} content - The content to validate
85
+ * @returns {{ valid: boolean, error?: string }} Validation result
86
+ */
87
+ function validateMemoryContent(content) {
88
+ if (!content || content.trim().length < MIN_MEMORY_CONTENT_LENGTH) {
89
+ return { valid: false, error: 'Memory content cannot be empty or whitespace-only.' };
90
+ }
91
+ if (content.length > MAX_MEMORY_CONTENT_LENGTH) {
92
+ return {
93
+ valid: false,
94
+ error: `Memory content exceeds maximum length of ${MAX_MEMORY_CONTENT_LENGTH} characters (got ${content.length}). Please split into smaller memories.`
95
+ };
96
+ }
97
+ return { valid: true };
98
+ }
99
+
40
100
  /**
41
101
  * Register all MCP tools on the server.
42
102
  * @param {McpServer} server - The MCP server instance
@@ -66,6 +126,13 @@ export function registerTools(server) {
66
126
  },
67
127
  async ({ content, importance, agent_id, session_id }) => {
68
128
  try {
129
+ // Bug 7 + Feature 4: Validate content size
130
+ const validation = validateMemoryContent(content);
131
+ if (!validation.valid) {
132
+ return text({ error: validation.error });
133
+ }
134
+
135
+ // Deduplication check
69
136
  const existing = getMemoryByContent(content);
70
137
  if (existing) {
71
138
  boostMemory(existing.id);
@@ -85,7 +152,48 @@ export function registerTools(server) {
85
152
  const embedding = await generateEmbedding(content);
86
153
  insertVector(id, embedding);
87
154
 
88
- return text({ success: true, id, message: `Memory #${id} stored` });
155
+ // Feature 1: Invalidate search cache on write
156
+ searchCache.invalidate();
157
+
158
+ // Feature 2: Contradiction Detection
159
+ let contradictions = [];
160
+ try {
161
+ const similarHits = searchVector(embedding, 3);
162
+ for (const hit of similarHits) {
163
+ const hitId = Number(hit.rowid);
164
+ if (hitId === id) continue; // Skip self
165
+
166
+ const sim = Math.max(0, 1 - (hit.distance * hit.distance) / 2);
167
+ if (sim > 0.75) {
168
+ const existingMemory = getMemoryById(hitId);
169
+ if (!existingMemory) continue;
170
+
171
+ // Check if content is substantially different (Jaccard distance > 0.5)
172
+ const jaccard = jaccardDistance(content, existingMemory.content);
173
+ if (jaccard > 0.5) {
174
+ // This is a contradiction: similar topic, different content
175
+ logContradiction(hitId, id, `Auto-detected contradiction (similarity: ${sim.toFixed(3)}, content_diff: ${jaccard.toFixed(3)})`);
176
+ contradictions.push({
177
+ old_memory_id: hitId,
178
+ old_content_preview: existingMemory.content.slice(0, 100),
179
+ similarity: sim.toFixed(4),
180
+ content_difference: jaccard.toFixed(4)
181
+ });
182
+ }
183
+ }
184
+ }
185
+ } catch (e) {
186
+ // Contradiction detection is best-effort, don't fail the memory insertion
187
+ console.error(`[persyst] Contradiction detection error: ${e.message}`);
188
+ }
189
+
190
+ const result = { success: true, id, message: `Memory #${id} stored` };
191
+ if (contradictions.length > 0) {
192
+ result.contradictions_detected = contradictions;
193
+ result.message += `. Detected ${contradictions.length} contradiction(s) — older memories archived.`;
194
+ }
195
+
196
+ return text(result);
89
197
  } catch (err) {
90
198
  return text({ error: err.message });
91
199
  }
@@ -145,6 +253,12 @@ export function registerTools(server) {
145
253
  },
146
254
  async ({ id, content, agent_id }) => {
147
255
  try {
256
+ // Bug 7 + Feature 4: Validate content size
257
+ const validation = validateMemoryContent(content);
258
+ if (!validation.valid) {
259
+ return text({ error: validation.error });
260
+ }
261
+
148
262
  const oldMemory = getMemory(id);
149
263
  if (!oldMemory) return text({ error: `Memory #${id} not found` });
150
264
 
@@ -161,6 +275,9 @@ export function registerTools(server) {
161
275
  // Record contradiction and archive the old one
162
276
  logContradiction(id, newId, 'Content updated via update_memory');
163
277
 
278
+ // Feature 1: Invalidate search cache on write
279
+ searchCache.invalidate();
280
+
164
281
  return text({
165
282
  success: true,
166
283
  id: newId,
@@ -183,6 +300,10 @@ export function registerTools(server) {
183
300
  try {
184
301
  const deleted = deleteMemory(id);
185
302
  if (!deleted) return text({ error: `Memory #${id} not found` });
303
+
304
+ // Feature 1: Invalidate search cache on write
305
+ searchCache.invalidate();
306
+
186
307
  return text({ success: true, id, message: `Memory #${id} deleted` });
187
308
  } catch (err) {
188
309
  return text({ error: err.message });
@@ -234,13 +355,14 @@ export function registerTools(server) {
234
355
  },
235
356
  async ({ repo_path, count }) => {
236
357
  try {
237
- const commits = getRecentCommits(repo_path, count);
358
+ const commits = await getRecentCommits(repo_path, count);
238
359
  let added = 0;
239
360
  let skipped = 0;
240
361
 
241
362
  for (const commit of commits) {
242
363
  const hashPrefix = commit.hash.slice(0, 7);
243
- if (memoryExists(`[${hashPrefix}]%`)) {
364
+ // Bug 1 fix: use LIKE-based query for hash prefix matching
365
+ if (memoryExistsByHashPrefix(`[${hashPrefix}]%`)) {
244
366
  skipped++;
245
367
  continue;
246
368
  }
@@ -272,6 +394,9 @@ export function registerTools(server) {
272
394
  added++;
273
395
  }
274
396
 
397
+ // Feature 1: Invalidate search cache after git ingestion
398
+ if (added > 0) searchCache.invalidate();
399
+
275
400
  return text({
276
401
  success: true,
277
402
  added,
@@ -465,11 +590,12 @@ export function registerTools(server) {
465
590
  const intervalId = setInterval(async () => {
466
591
  console.error(`[persyst-watcher] Running scheduled ingestion for: ${repo_path}`);
467
592
  try {
468
- const result = getRecentCommits(repo_path, 10);
593
+ const result = await getRecentCommits(repo_path, 10);
469
594
  let added = 0;
470
595
  for (const commit of result) {
471
596
  const hashPrefix = commit.hash.slice(0, 7);
472
- if (memoryExists(`[${hashPrefix}]%`)) continue;
597
+ // Bug 1 fix: use LIKE-based query for hash prefix matching
598
+ if (memoryExistsByHashPrefix(`[${hashPrefix}]%`)) continue;
473
599
 
474
600
  const id = insertMemory(commit.fullText, commit.importance, {
475
601
  source_type: 'git',
@@ -489,6 +615,7 @@ export function registerTools(server) {
489
615
  added++;
490
616
  }
491
617
  if (added > 0) {
618
+ searchCache.invalidate();
492
619
  console.error(`[persyst-watcher] Ingested ${added} new commits from ${repo_path}`);
493
620
  }
494
621
  } catch (e) {
@@ -545,7 +672,7 @@ export function registerTools(server) {
545
672
  }
546
673
 
547
674
  // ============================================================
548
- // HELPER
675
+ // HELPERS
549
676
  // ============================================================
550
677
 
551
678
  /** Format a response as MCP text content */
@@ -554,3 +681,24 @@ function text(data) {
554
681
  content: [{ type: 'text', text: JSON.stringify(data, null, 2) }]
555
682
  };
556
683
  }
684
+
685
+ /**
686
+ * Compute Jaccard distance between two text strings.
687
+ * Used for contradiction detection — higher distance means more different content.
688
+ * @param {string} a - First text
689
+ * @param {string} b - Second text
690
+ * @returns {number} Distance score between 0 (identical) and 1 (completely different)
691
+ */
692
+ function jaccardDistance(a, b) {
693
+ const wordsA = new Set(a.toLowerCase().split(/\s+/));
694
+ const wordsB = new Set(b.toLowerCase().split(/\s+/));
695
+
696
+ let intersection = 0;
697
+ for (const word of wordsA) {
698
+ if (wordsB.has(word)) intersection++;
699
+ }
700
+
701
+ const union = wordsA.size + wordsB.size - intersection;
702
+ if (union === 0) return 0;
703
+ return 1 - (intersection / union);
704
+ }