persyst-mcp 1.1.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/attestation.js +0 -1
- package/src/cache.js +122 -0
- package/src/database.js +51 -9
- package/src/git.js +23 -17
- package/src/search.js +122 -16
- package/src/server.js +4 -3
- package/src/tools.js +155 -7
package/package.json
CHANGED
package/src/attestation.js
CHANGED
|
@@ -180,7 +180,6 @@ export function verifyChainIntegrity(attestationId) {
|
|
|
180
180
|
|
|
181
181
|
// If there's a previous link, check it
|
|
182
182
|
if (att.previous_hash) {
|
|
183
|
-
const db = getLastAttestation(); // Wait, we can run raw query on DB or use prepared stmt
|
|
184
183
|
const prevAtt = getAttestationByHash(att.previous_hash);
|
|
185
184
|
if (!prevAtt) {
|
|
186
185
|
return { valid: false, error: `Broken chain: Previous attestation with hash ${att.previous_hash} not found` };
|
package/src/cache.js
ADDED
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* cache.js — LRU Query Result Cache
|
|
3
|
+
*
|
|
4
|
+
* In-memory LRU cache for search results to avoid
|
|
5
|
+
* re-computing embeddings for repeated queries.
|
|
6
|
+
*
|
|
7
|
+
* - Configurable max size (default: 100 entries)
|
|
8
|
+
* - Configurable TTL (default: 5 minutes)
|
|
9
|
+
* - Automatic eviction of oldest entries when full
|
|
10
|
+
* - Full invalidation on write operations
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Simple LRU (Least Recently Used) cache with TTL support.
|
|
15
|
+
*/
|
|
16
|
+
export class LRUCache {
|
|
17
|
+
/**
|
|
18
|
+
* @param {number} maxSize - Maximum number of entries (default: 100)
|
|
19
|
+
* @param {number} ttlMs - Time-to-live in milliseconds (default: 300000 = 5 min)
|
|
20
|
+
*/
|
|
21
|
+
constructor(maxSize = 100, ttlMs = 300000) {
|
|
22
|
+
this.maxSize = maxSize;
|
|
23
|
+
this.ttlMs = ttlMs;
|
|
24
|
+
this.cache = new Map();
|
|
25
|
+
this.hits = 0;
|
|
26
|
+
this.misses = 0;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Generate a cache key from query parameters.
|
|
31
|
+
* @param {string} query - The search query
|
|
32
|
+
* @param {number} limit - The result limit
|
|
33
|
+
* @returns {string} Cache key
|
|
34
|
+
*/
|
|
35
|
+
static key(query, limit) {
|
|
36
|
+
return `${query}::${limit}`;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
/**
|
|
40
|
+
* Get a cached value if it exists and hasn't expired.
|
|
41
|
+
* Moves the entry to the "most recently used" position.
|
|
42
|
+
*
|
|
43
|
+
* @param {string} key - Cache key
|
|
44
|
+
* @returns {*|null} Cached value or null if miss/expired
|
|
45
|
+
*/
|
|
46
|
+
get(key) {
|
|
47
|
+
const entry = this.cache.get(key);
|
|
48
|
+
if (!entry) {
|
|
49
|
+
this.misses++;
|
|
50
|
+
return null;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
// Check TTL expiry
|
|
54
|
+
if (Date.now() - entry.timestamp > this.ttlMs) {
|
|
55
|
+
this.cache.delete(key);
|
|
56
|
+
this.misses++;
|
|
57
|
+
return null;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
// Move to end (most recently used) by re-inserting
|
|
61
|
+
this.cache.delete(key);
|
|
62
|
+
this.cache.set(key, entry);
|
|
63
|
+
this.hits++;
|
|
64
|
+
return entry.value;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* Store a value in the cache. Evicts oldest entry if at capacity.
|
|
69
|
+
*
|
|
70
|
+
* @param {string} key - Cache key
|
|
71
|
+
* @param {*} value - Value to cache
|
|
72
|
+
*/
|
|
73
|
+
set(key, value) {
|
|
74
|
+
// If key already exists, delete it first (to update position)
|
|
75
|
+
if (this.cache.has(key)) {
|
|
76
|
+
this.cache.delete(key);
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
// Evict oldest (first) entry if at capacity
|
|
80
|
+
if (this.cache.size >= this.maxSize) {
|
|
81
|
+
const oldestKey = this.cache.keys().next().value;
|
|
82
|
+
this.cache.delete(oldestKey);
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
this.cache.set(key, {
|
|
86
|
+
value,
|
|
87
|
+
timestamp: Date.now()
|
|
88
|
+
});
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* Invalidate the entire cache. Called on write operations
|
|
93
|
+
* (add_memory, update_memory, delete_memory) to ensure
|
|
94
|
+
* search results are always fresh.
|
|
95
|
+
*/
|
|
96
|
+
invalidate() {
|
|
97
|
+
const size = this.cache.size;
|
|
98
|
+
this.cache.clear();
|
|
99
|
+
if (size > 0) {
|
|
100
|
+
console.error(`[persyst-cache] Invalidated ${size} cached entries`);
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
/**
|
|
105
|
+
* Get cache statistics for monitoring.
|
|
106
|
+
* @returns {{ size: number, maxSize: number, ttlMs: number, hits: number, misses: number, hitRate: string }}
|
|
107
|
+
*/
|
|
108
|
+
stats() {
|
|
109
|
+
const total = this.hits + this.misses;
|
|
110
|
+
return {
|
|
111
|
+
size: this.cache.size,
|
|
112
|
+
maxSize: this.maxSize,
|
|
113
|
+
ttlMs: this.ttlMs,
|
|
114
|
+
hits: this.hits,
|
|
115
|
+
misses: this.misses,
|
|
116
|
+
hitRate: total > 0 ? `${((this.hits / total) * 100).toFixed(1)}%` : '0%'
|
|
117
|
+
};
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
// Singleton instance for search results
|
|
122
|
+
export const searchCache = new LRUCache(100, 300000);
|
package/src/database.js
CHANGED
|
@@ -33,6 +33,7 @@ const DB_PATH = process.env.NODE_ENV === 'test' ? ':memory:' : join(DB_DIR, 'per
|
|
|
33
33
|
const db = new Database(DB_PATH);
|
|
34
34
|
db.pragma('journal_mode = WAL'); // Better performance for concurrent reads
|
|
35
35
|
db.pragma('foreign_keys = ON'); // Enforce referential integrity
|
|
36
|
+
db.pragma('mmap_size = 268435456'); // 256MB memory-mapped I/O for faster reads
|
|
36
37
|
|
|
37
38
|
// Load sqlite-vec BEFORE creating any vec0 tables
|
|
38
39
|
sqliteVec.load(db);
|
|
@@ -106,6 +107,11 @@ db.exec(`
|
|
|
106
107
|
)
|
|
107
108
|
`);
|
|
108
109
|
|
|
110
|
+
// --- Migration: add domain column to agent_stats ---
|
|
111
|
+
try {
|
|
112
|
+
db.exec('ALTER TABLE agent_stats ADD COLUMN domain TEXT DEFAULT "general"');
|
|
113
|
+
} catch (e) { /* Column already exists */ }
|
|
114
|
+
|
|
109
115
|
// --- Attestations table ---
|
|
110
116
|
db.exec(`
|
|
111
117
|
CREATE TABLE IF NOT EXISTS attestations (
|
|
@@ -346,6 +352,24 @@ const stmts = {
|
|
|
346
352
|
// -- Dedup --
|
|
347
353
|
findMemoryByContent: db.prepare(
|
|
348
354
|
'SELECT id FROM memories WHERE content = ? AND valid_until IS NULL LIMIT 1'
|
|
355
|
+
),
|
|
356
|
+
|
|
357
|
+
// -- Hash-prefix lookup for git dedup (Bug 1 fix) --
|
|
358
|
+
findMemoryByHashPrefix: db.prepare(
|
|
359
|
+
'SELECT id FROM memories WHERE content LIKE ? AND valid_until IS NULL LIMIT 1'
|
|
360
|
+
),
|
|
361
|
+
|
|
362
|
+
// -- Active memory count --
|
|
363
|
+
getActiveMemoryCount: db.prepare(
|
|
364
|
+
'SELECT COUNT(*) as count FROM memories WHERE valid_until IS NULL'
|
|
365
|
+
),
|
|
366
|
+
|
|
367
|
+
// -- Memory History Chain (Feature 6: prepared statements) --
|
|
368
|
+
getContradictionAncestors: db.prepare(
|
|
369
|
+
'SELECT old_memory_id FROM contradictions WHERE new_memory_id = ?'
|
|
370
|
+
),
|
|
371
|
+
getContradictionDescendants: db.prepare(
|
|
372
|
+
'SELECT new_memory_id FROM contradictions WHERE old_memory_id = ?'
|
|
349
373
|
)
|
|
350
374
|
};
|
|
351
375
|
|
|
@@ -593,13 +617,31 @@ export function getMemoriesByEntity(entityId) {
|
|
|
593
617
|
}
|
|
594
618
|
|
|
595
619
|
/**
|
|
596
|
-
* Check if a memory with
|
|
597
|
-
* Used for deduplication
|
|
598
|
-
* @param {string}
|
|
620
|
+
* Check if a memory with exact content already exists.
|
|
621
|
+
* Used for deduplication.
|
|
622
|
+
* @param {string} content - Exact content to match
|
|
599
623
|
* @returns {boolean}
|
|
600
624
|
*/
|
|
601
|
-
export function memoryExists(
|
|
602
|
-
return stmts.findMemoryByContent.get(
|
|
625
|
+
export function memoryExists(content) {
|
|
626
|
+
return stmts.findMemoryByContent.get(content) !== undefined;
|
|
627
|
+
}
|
|
628
|
+
|
|
629
|
+
/**
|
|
630
|
+
* Check if a memory exists by hash prefix pattern (LIKE query).
|
|
631
|
+
* Used for git commit deduplication where we match `[hashPrefix]%`.
|
|
632
|
+
* @param {string} pattern - SQL LIKE pattern to match (e.g. '[abc1234]%')
|
|
633
|
+
* @returns {boolean}
|
|
634
|
+
*/
|
|
635
|
+
export function memoryExistsByHashPrefix(pattern) {
|
|
636
|
+
return stmts.findMemoryByHashPrefix.get(pattern) !== undefined;
|
|
637
|
+
}
|
|
638
|
+
|
|
639
|
+
/**
|
|
640
|
+
* Get count of active (non-archived) memories.
|
|
641
|
+
* @returns {number}
|
|
642
|
+
*/
|
|
643
|
+
export function getActiveMemoryCount() {
|
|
644
|
+
return stmts.getActiveMemoryCount.get().count;
|
|
603
645
|
}
|
|
604
646
|
|
|
605
647
|
// ============================================================
|
|
@@ -713,14 +755,14 @@ export function getMemoryHistoryChain(memoryId) {
|
|
|
713
755
|
if (versions.has(currentId)) continue;
|
|
714
756
|
versions.add(currentId);
|
|
715
757
|
|
|
716
|
-
// Find ancestors (replaced by current)
|
|
717
|
-
const ancestors =
|
|
758
|
+
// Find ancestors (replaced by current) — using prepared statement
|
|
759
|
+
const ancestors = stmts.getContradictionAncestors.all(currentId);
|
|
718
760
|
ancestors.forEach(a => {
|
|
719
761
|
if (!versions.has(a.old_memory_id)) queue.push(a.old_memory_id);
|
|
720
762
|
});
|
|
721
763
|
|
|
722
|
-
// Find descendants (replaces current)
|
|
723
|
-
const descendants =
|
|
764
|
+
// Find descendants (replaces current) — using prepared statement
|
|
765
|
+
const descendants = stmts.getContradictionDescendants.all(currentId);
|
|
724
766
|
descendants.forEach(d => {
|
|
725
767
|
if (!versions.has(d.new_memory_id)) queue.push(d.new_memory_id);
|
|
726
768
|
});
|
package/src/git.js
CHANGED
|
@@ -3,30 +3,36 @@
|
|
|
3
3
|
*
|
|
4
4
|
* Reads git log from a repository and converts commits into memories.
|
|
5
5
|
* Performs commit categorization, file diff analysis, and imports notes.
|
|
6
|
+
*
|
|
7
|
+
* IMPORTANT: Uses async execFile instead of execSync to avoid blocking
|
|
8
|
+
* the Node.js event loop during git operations (Bug 4 fix).
|
|
6
9
|
*/
|
|
7
10
|
|
|
8
|
-
import {
|
|
11
|
+
import { execFile } from 'child_process';
|
|
12
|
+
import { promisify } from 'util';
|
|
13
|
+
|
|
14
|
+
const execFileAsync = promisify(execFile);
|
|
9
15
|
|
|
10
16
|
/**
|
|
11
17
|
* Read the N most recent git commits from a repository.
|
|
12
18
|
*
|
|
13
19
|
* @param {string} repoPath - Absolute path to the git repo
|
|
14
20
|
* @param {number} count - Number of commits to read (default: 20)
|
|
15
|
-
* @returns {Array<{hash: string, message: string, author: string, date: string, fullText: string, files: string[], importance: number}
|
|
21
|
+
* @returns {Promise<Array<{hash: string, message: string, author: string, date: string, fullText: string, files: string[], importance: number}>>}
|
|
16
22
|
*/
|
|
17
|
-
export function getRecentCommits(repoPath, count = 20) {
|
|
23
|
+
export async function getRecentCommits(repoPath, count = 20) {
|
|
18
24
|
try {
|
|
19
25
|
// Use a delimiter to split commits reliably
|
|
20
26
|
const DELIM = '---PERSYST-COMMIT---';
|
|
21
27
|
const format = `${DELIM}%n%H%n%an%n%ai%n%s%n%b`;
|
|
22
28
|
|
|
23
|
-
const output =
|
|
24
|
-
|
|
29
|
+
const { stdout: output } = await execFileAsync(
|
|
30
|
+
'git',
|
|
31
|
+
['log', `-n`, `${count}`, `--pretty=format:${format}`],
|
|
25
32
|
{
|
|
26
33
|
cwd: repoPath,
|
|
27
34
|
encoding: 'utf-8',
|
|
28
35
|
timeout: 10000, // 10s timeout
|
|
29
|
-
stdio: ['pipe', 'pipe', 'pipe'] // Suppress stderr
|
|
30
36
|
}
|
|
31
37
|
);
|
|
32
38
|
|
|
@@ -45,7 +51,7 @@ export function getRecentCommits(repoPath, count = 20) {
|
|
|
45
51
|
const body = lines.slice(4).join(' ').trim();
|
|
46
52
|
|
|
47
53
|
// Fetch git notes if available (represents PR metadata)
|
|
48
|
-
const notes = getGitNotes(repoPath, hash);
|
|
54
|
+
const notes = await getGitNotes(repoPath, hash);
|
|
49
55
|
|
|
50
56
|
// Build a readable memory string
|
|
51
57
|
let fullText = body
|
|
@@ -57,7 +63,7 @@ export function getRecentCommits(repoPath, count = 20) {
|
|
|
57
63
|
}
|
|
58
64
|
|
|
59
65
|
// Fetch files touched
|
|
60
|
-
const files = getCommitFiles(repoPath, hash);
|
|
66
|
+
const files = await getCommitFiles(repoPath, hash);
|
|
61
67
|
|
|
62
68
|
// Classify importance based on message
|
|
63
69
|
const classification = classifyCommit(subject);
|
|
@@ -92,17 +98,17 @@ export function getRecentCommits(repoPath, count = 20) {
|
|
|
92
98
|
*
|
|
93
99
|
* @param {string} repoPath - Absolute path to the git repo
|
|
94
100
|
* @param {string} hash - Full commit hash
|
|
95
|
-
* @returns {string[]} List of changed file paths
|
|
101
|
+
* @returns {Promise<string[]>} List of changed file paths
|
|
96
102
|
*/
|
|
97
|
-
export function getCommitFiles(repoPath, hash) {
|
|
103
|
+
export async function getCommitFiles(repoPath, hash) {
|
|
98
104
|
try {
|
|
99
|
-
const output =
|
|
100
|
-
|
|
105
|
+
const { stdout: output } = await execFileAsync(
|
|
106
|
+
'git',
|
|
107
|
+
['diff-tree', '--no-commit-id', '--name-only', '-r', hash],
|
|
101
108
|
{
|
|
102
109
|
cwd: repoPath,
|
|
103
110
|
encoding: 'utf-8',
|
|
104
111
|
timeout: 5000,
|
|
105
|
-
stdio: ['pipe', 'pipe', 'pipe']
|
|
106
112
|
}
|
|
107
113
|
);
|
|
108
114
|
return output.trim().split('\n').filter(Boolean);
|
|
@@ -114,15 +120,15 @@ export function getCommitFiles(repoPath, hash) {
|
|
|
114
120
|
/**
|
|
115
121
|
* Fetch git notes (representing PR metadata or additional annotations).
|
|
116
122
|
*/
|
|
117
|
-
export function getGitNotes(repoPath, hash) {
|
|
123
|
+
export async function getGitNotes(repoPath, hash) {
|
|
118
124
|
try {
|
|
119
|
-
const output =
|
|
120
|
-
|
|
125
|
+
const { stdout: output } = await execFileAsync(
|
|
126
|
+
'git',
|
|
127
|
+
['notes', 'show', hash],
|
|
121
128
|
{
|
|
122
129
|
cwd: repoPath,
|
|
123
130
|
encoding: 'utf-8',
|
|
124
131
|
timeout: 3000,
|
|
125
|
-
stdio: ['pipe', 'pipe', 'pipe']
|
|
126
132
|
}
|
|
127
133
|
);
|
|
128
134
|
return output.trim();
|
package/src/search.js
CHANGED
|
@@ -3,7 +3,8 @@
|
|
|
3
3
|
*
|
|
4
4
|
* Combines keyword and semantic searches, integrates temporal decay,
|
|
5
5
|
* applies agent reputation scores, generates cryptographic search attestations,
|
|
6
|
-
*
|
|
6
|
+
* builds graph-hopped optimized LLM context prompts, and applies MMR
|
|
7
|
+
* for diverse result retrieval.
|
|
7
8
|
*/
|
|
8
9
|
|
|
9
10
|
import db, {
|
|
@@ -16,9 +17,11 @@ import db, {
|
|
|
16
17
|
} from './database.js';
|
|
17
18
|
import { generateEmbedding } from './embeddings.js';
|
|
18
19
|
import { createAttestation } from './attestation.js';
|
|
20
|
+
import { searchCache, LRUCache } from './cache.js';
|
|
19
21
|
|
|
20
22
|
/**
|
|
21
23
|
* Search memories using both keyword and semantic strategies.
|
|
24
|
+
* Results are cached in the LRU cache for repeated queries.
|
|
22
25
|
*
|
|
23
26
|
* @param {string} queryText - What to search for
|
|
24
27
|
* @param {number} limit - Max results to return (default: 5)
|
|
@@ -27,6 +30,14 @@ import { createAttestation } from './attestation.js';
|
|
|
27
30
|
* @returns {Promise<Array>} Ranked search results (with .attestation property attached)
|
|
28
31
|
*/
|
|
29
32
|
export async function searchHybrid(queryText, limit = 5, agentId = null, sessionId = null) {
|
|
33
|
+
// --- Check LRU cache first (Feature 1) ---
|
|
34
|
+
const cacheKey = LRUCache.key(queryText, limit);
|
|
35
|
+
const cached = searchCache.get(cacheKey);
|
|
36
|
+
if (cached) {
|
|
37
|
+
console.error(`[persyst-cache] Cache HIT for query: "${queryText.slice(0, 50)}..."`);
|
|
38
|
+
return cached;
|
|
39
|
+
}
|
|
40
|
+
|
|
30
41
|
// --- Step 1: Keyword search (fast, exact matches) ---
|
|
31
42
|
const keywordHits = searchKeyword(queryText, limit * 2);
|
|
32
43
|
const keywordIds = new Set(keywordHits.map(r => r.id));
|
|
@@ -112,15 +123,96 @@ export async function searchHybrid(queryText, limit = 5, agentId = null, session
|
|
|
112
123
|
|
|
113
124
|
// Sort by final score descending
|
|
114
125
|
finalResults.sort((a, b) => parseFloat(b.hybrid_score) - parseFloat(a.hybrid_score));
|
|
115
|
-
|
|
126
|
+
|
|
127
|
+
// --- Step 5: Apply MMR for diverse retrieval (Feature 3) ---
|
|
128
|
+
const mmrResults = applyMMR(finalResults, limit);
|
|
116
129
|
|
|
117
130
|
// Generate cryptographic attestation for audit trails
|
|
118
|
-
const attestation = createAttestation(queryText,
|
|
131
|
+
const attestation = createAttestation(queryText, mmrResults, agentId, sessionId);
|
|
119
132
|
|
|
120
133
|
// Attach attestation object directly to the array to preserve compatibility with existing tests
|
|
121
|
-
|
|
134
|
+
mmrResults.attestation = attestation;
|
|
135
|
+
|
|
136
|
+
// --- Store in LRU cache (Feature 1) ---
|
|
137
|
+
searchCache.set(cacheKey, mmrResults);
|
|
138
|
+
|
|
139
|
+
return mmrResults;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
/**
|
|
143
|
+
* Apply Maximal Marginal Relevance (MMR) re-ranking for diverse results.
|
|
144
|
+
*
|
|
145
|
+
* MMR balances relevance with diversity by penalizing candidates that
|
|
146
|
+
* are too similar to already-selected results.
|
|
147
|
+
*
|
|
148
|
+
* @param {Array} candidates - Scored search results
|
|
149
|
+
* @param {number} limit - Max results to return
|
|
150
|
+
* @param {number} lambda - Trade-off parameter (0.7 = 70% relevance, 30% diversity)
|
|
151
|
+
* @returns {Array} MMR-reranked results
|
|
152
|
+
*/
|
|
153
|
+
function applyMMR(candidates, limit, lambda = 0.7) {
|
|
154
|
+
if (candidates.length <= limit) return candidates;
|
|
155
|
+
|
|
156
|
+
const selected = [];
|
|
157
|
+
const remaining = [...candidates];
|
|
158
|
+
|
|
159
|
+
// Always pick the top-scored result first
|
|
160
|
+
selected.push(remaining.shift());
|
|
161
|
+
|
|
162
|
+
while (selected.length < limit && remaining.length > 0) {
|
|
163
|
+
let bestIdx = -1;
|
|
164
|
+
let bestMMRScore = -Infinity;
|
|
165
|
+
|
|
166
|
+
for (let i = 0; i < remaining.length; i++) {
|
|
167
|
+
const candidate = remaining[i];
|
|
168
|
+
const relevance = parseFloat(candidate.hybrid_score);
|
|
169
|
+
|
|
170
|
+
// Calculate max similarity to any already-selected result
|
|
171
|
+
// Using content-based Jaccard similarity as a proxy
|
|
172
|
+
let maxSimToSelected = 0;
|
|
173
|
+
for (const sel of selected) {
|
|
174
|
+
const sim = jaccardSimilarity(candidate.content, sel.content);
|
|
175
|
+
if (sim > maxSimToSelected) maxSimToSelected = sim;
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
// MMR score = λ * relevance - (1 - λ) * max_similarity_to_selected
|
|
179
|
+
const mmrScore = lambda * relevance - (1 - lambda) * maxSimToSelected;
|
|
180
|
+
|
|
181
|
+
if (mmrScore > bestMMRScore) {
|
|
182
|
+
bestMMRScore = mmrScore;
|
|
183
|
+
bestIdx = i;
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
if (bestIdx >= 0) {
|
|
188
|
+
selected.push(remaining.splice(bestIdx, 1)[0]);
|
|
189
|
+
} else {
|
|
190
|
+
break;
|
|
191
|
+
}
|
|
192
|
+
}
|
|
122
193
|
|
|
123
|
-
return
|
|
194
|
+
return selected;
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
/**
|
|
198
|
+
* Compute Jaccard similarity between two text strings.
|
|
199
|
+
* Uses word-level tokenization for efficiency.
|
|
200
|
+
*
|
|
201
|
+
* @param {string} a - First text
|
|
202
|
+
* @param {string} b - Second text
|
|
203
|
+
* @returns {number} Similarity score between 0 and 1
|
|
204
|
+
*/
|
|
205
|
+
function jaccardSimilarity(a, b) {
|
|
206
|
+
const wordsA = new Set(a.toLowerCase().split(/\s+/));
|
|
207
|
+
const wordsB = new Set(b.toLowerCase().split(/\s+/));
|
|
208
|
+
|
|
209
|
+
let intersection = 0;
|
|
210
|
+
for (const word of wordsA) {
|
|
211
|
+
if (wordsB.has(word)) intersection++;
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
const union = wordsA.size + wordsB.size - intersection;
|
|
215
|
+
return union === 0 ? 0 : intersection / union;
|
|
124
216
|
}
|
|
125
217
|
|
|
126
218
|
/**
|
|
@@ -234,8 +326,11 @@ export async function getOptimizedContext(queryText, maxTokens, agentId = null,
|
|
|
234
326
|
}
|
|
235
327
|
context += '=== END OF CONTEXT ===';
|
|
236
328
|
|
|
237
|
-
//
|
|
238
|
-
|
|
329
|
+
// Bug 8 fix: Skip attestation when no results to avoid audit noise
|
|
330
|
+
let attestation = null;
|
|
331
|
+
if (accepted.length > 0) {
|
|
332
|
+
attestation = createAttestation(queryText, accepted, agentId, sessionId);
|
|
333
|
+
}
|
|
239
334
|
|
|
240
335
|
return {
|
|
241
336
|
context,
|
|
@@ -246,12 +341,26 @@ export async function getOptimizedContext(queryText, maxTokens, agentId = null,
|
|
|
246
341
|
|
|
247
342
|
/**
|
|
248
343
|
* Performs memory consolidation by merging highly similar memories.
|
|
344
|
+
* Bug 6 fix: DB mutations are wrapped in a transaction for atomicity.
|
|
249
345
|
*/
|
|
250
346
|
export async function consolidateMemories() {
|
|
251
347
|
const activeMemories = db.prepare('SELECT * FROM memories WHERE valid_until IS NULL').all();
|
|
252
348
|
const consolidated = [];
|
|
253
349
|
const visited = new Set();
|
|
254
350
|
|
|
351
|
+
// Pre-compile the transaction for atomic DB operations (Bug 6 fix)
|
|
352
|
+
const archiveAndMerge = db.transaction((canonicalId, mergedContent, dupIds) => {
|
|
353
|
+
// Update canonical memory with merged content
|
|
354
|
+
db.prepare('UPDATE memories SET content = ?, last_accessed = unixepoch() WHERE id = ?').run(mergedContent, canonicalId);
|
|
355
|
+
|
|
356
|
+
// Archive duplicates
|
|
357
|
+
for (const dupId of dupIds) {
|
|
358
|
+
db.prepare('UPDATE memories SET valid_until = unixepoch() WHERE id = ?').run(dupId);
|
|
359
|
+
db.prepare('INSERT INTO contradictions (old_memory_id, new_memory_id, resolution_reason) VALUES (?, ?, ?)')
|
|
360
|
+
.run(dupId, canonicalId, `Consolidated into canonical memory #${canonicalId}`);
|
|
361
|
+
}
|
|
362
|
+
});
|
|
363
|
+
|
|
255
364
|
for (const mem of activeMemories) {
|
|
256
365
|
if (visited.has(mem.id)) continue;
|
|
257
366
|
|
|
@@ -295,20 +404,17 @@ export async function consolidateMemories() {
|
|
|
295
404
|
const uniqueContents = Array.from(new Set(contents));
|
|
296
405
|
const mergedContent = uniqueContents.join('. ').replace(/\.\./g, '.');
|
|
297
406
|
|
|
298
|
-
//
|
|
299
|
-
db.prepare('UPDATE memories SET content = ?, last_accessed = unixepoch() WHERE id = ?').run(mergedContent, canonical.id);
|
|
300
|
-
|
|
301
|
-
// Update vector embedding for canonical
|
|
407
|
+
// Generate new embedding OUTSIDE the transaction (async operation)
|
|
302
408
|
const newEmbedding = await generateEmbedding(mergedContent);
|
|
409
|
+
|
|
410
|
+
// Run atomic DB transaction for all mutations (Bug 6 fix)
|
|
411
|
+
archiveAndMerge(canonical.id, mergedContent, dupesToArchive.map(d => d.id));
|
|
412
|
+
|
|
413
|
+
// Update vector embedding (also outside transaction since vec0 tables have their own handling)
|
|
303
414
|
db.prepare('DELETE FROM memories_vec WHERE rowid = ?').run(canonical.id);
|
|
304
415
|
db.prepare('INSERT INTO memories_vec (rowid, embedding) VALUES (?, ?)').run(BigInt(canonical.id), Buffer.from(newEmbedding.buffer));
|
|
305
416
|
|
|
306
|
-
// Archive duplicates
|
|
307
417
|
for (const dup of dupesToArchive) {
|
|
308
|
-
db.prepare('UPDATE memories SET valid_until = unixepoch() WHERE id = ?').run(dup.id);
|
|
309
|
-
db.prepare('INSERT INTO contradictions (old_memory_id, new_memory_id, resolution_reason) VALUES (?, ?, ?)')
|
|
310
|
-
.run(dup.id, canonical.id, `Consolidated into canonical memory #${canonical.id}`);
|
|
311
|
-
|
|
312
418
|
visited.add(dup.id);
|
|
313
419
|
}
|
|
314
420
|
|
package/src/server.js
CHANGED
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
|
|
12
12
|
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
|
|
13
13
|
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
|
|
14
|
-
import { registerTools } from './tools.js';
|
|
14
|
+
import { registerTools, cleanupWatchers } from './tools.js';
|
|
15
15
|
import { applyTemporalDecay, closeDatabase } from './database.js';
|
|
16
16
|
import { consolidateMemories } from './search.js';
|
|
17
17
|
|
|
@@ -23,7 +23,7 @@ export async function startServer() {
|
|
|
23
23
|
// --- Create MCP server ---
|
|
24
24
|
const server = new McpServer({
|
|
25
25
|
name: 'persyst',
|
|
26
|
-
version: '
|
|
26
|
+
version: '2.0.0'
|
|
27
27
|
});
|
|
28
28
|
|
|
29
29
|
// --- Register all tools ---
|
|
@@ -46,11 +46,12 @@ export async function startServer() {
|
|
|
46
46
|
}
|
|
47
47
|
}, 86400000);
|
|
48
48
|
|
|
49
|
-
// --- Graceful shutdown ---
|
|
49
|
+
// --- Graceful shutdown (Bug 3 fix: also cleans up git watchers) ---
|
|
50
50
|
const shutdown = () => {
|
|
51
51
|
console.error('[persyst] Shutting down...');
|
|
52
52
|
clearInterval(decayTimer);
|
|
53
53
|
clearInterval(consolidationTimer);
|
|
54
|
+
cleanupWatchers(); // Bug 3 fix: stop all git repo watchers
|
|
54
55
|
closeDatabase();
|
|
55
56
|
process.exit(0);
|
|
56
57
|
};
|
package/src/tools.js
CHANGED
|
@@ -2,6 +2,13 @@
|
|
|
2
2
|
* tools.js — MCP Tool Definitions & Handlers
|
|
3
3
|
*
|
|
4
4
|
* Defines all 19 tools that AI agents can call via MCP.
|
|
5
|
+
*
|
|
6
|
+
* v2.0 changes:
|
|
7
|
+
* - Bug 1: Uses memoryExistsByHashPrefix for git dedup
|
|
8
|
+
* - Bug 3: Exports cleanupWatchers for graceful shutdown
|
|
9
|
+
* - Bug 7 + Feature 4: Memory content size validation
|
|
10
|
+
* - Feature 1: Cache invalidation on write operations
|
|
11
|
+
* - Feature 2: Contradiction detection on add_memory
|
|
5
12
|
*/
|
|
6
13
|
|
|
7
14
|
import { z } from 'zod';
|
|
@@ -21,6 +28,7 @@ import {
|
|
|
21
28
|
getMemoriesByEntity,
|
|
22
29
|
getAllEntities,
|
|
23
30
|
memoryExists,
|
|
31
|
+
memoryExistsByHashPrefix,
|
|
24
32
|
getMemoryByContent,
|
|
25
33
|
boostMemory,
|
|
26
34
|
logContradiction,
|
|
@@ -28,15 +36,67 @@ import {
|
|
|
28
36
|
getAttestationsByDateRange,
|
|
29
37
|
getMemoryHistoryChain,
|
|
30
38
|
searchAllMemoriesFts,
|
|
31
|
-
getAnyMemoryById
|
|
39
|
+
getAnyMemoryById,
|
|
40
|
+
searchVector,
|
|
41
|
+
getMemoryById,
|
|
42
|
+
getActiveMemoryCount
|
|
32
43
|
} from './database.js';
|
|
33
44
|
import { searchHybrid, getOptimizedContext, consolidateMemories } from './search.js';
|
|
34
45
|
import { getRecentCommits } from './git.js';
|
|
35
46
|
import { verifyChainIntegrity } from './attestation.js';
|
|
47
|
+
import { searchCache } from './cache.js';
|
|
48
|
+
|
|
49
|
+
// ============================================================
|
|
50
|
+
// CONSTANTS
|
|
51
|
+
// ============================================================
|
|
52
|
+
|
|
53
|
+
/** Maximum allowed memory content length (50,000 characters) */
|
|
54
|
+
const MAX_MEMORY_CONTENT_LENGTH = 50000;
|
|
55
|
+
|
|
56
|
+
/** Minimum content length (must have actual content) */
|
|
57
|
+
const MIN_MEMORY_CONTENT_LENGTH = 1;
|
|
58
|
+
|
|
59
|
+
// ============================================================
|
|
60
|
+
// WATCHER REGISTRY
|
|
61
|
+
// ============================================================
|
|
36
62
|
|
|
37
63
|
// In-memory registry of active git watchers
|
|
38
64
|
const watchers = new Map();
|
|
39
65
|
|
|
66
|
+
/**
|
|
67
|
+
* Clean up all active git watchers. Called during graceful shutdown.
|
|
68
|
+
* (Bug 3 fix: prevents memory leak from orphaned setInterval handles)
|
|
69
|
+
*/
|
|
70
|
+
export function cleanupWatchers() {
|
|
71
|
+
for (const [repoPath, intervalId] of watchers.entries()) {
|
|
72
|
+
clearInterval(intervalId);
|
|
73
|
+
console.error(`[persyst-watcher] Stopped watching: ${repoPath}`);
|
|
74
|
+
}
|
|
75
|
+
watchers.clear();
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
// ============================================================
|
|
79
|
+
// VALIDATION HELPERS
|
|
80
|
+
// ============================================================
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* Validate memory content for size and emptiness.
|
|
84
|
+
* @param {string} content - The content to validate
|
|
85
|
+
* @returns {{ valid: boolean, error?: string }} Validation result
|
|
86
|
+
*/
|
|
87
|
+
function validateMemoryContent(content) {
|
|
88
|
+
if (!content || content.trim().length < MIN_MEMORY_CONTENT_LENGTH) {
|
|
89
|
+
return { valid: false, error: 'Memory content cannot be empty or whitespace-only.' };
|
|
90
|
+
}
|
|
91
|
+
if (content.length > MAX_MEMORY_CONTENT_LENGTH) {
|
|
92
|
+
return {
|
|
93
|
+
valid: false,
|
|
94
|
+
error: `Memory content exceeds maximum length of ${MAX_MEMORY_CONTENT_LENGTH} characters (got ${content.length}). Please split into smaller memories.`
|
|
95
|
+
};
|
|
96
|
+
}
|
|
97
|
+
return { valid: true };
|
|
98
|
+
}
|
|
99
|
+
|
|
40
100
|
/**
|
|
41
101
|
* Register all MCP tools on the server.
|
|
42
102
|
* @param {McpServer} server - The MCP server instance
|
|
@@ -66,6 +126,13 @@ export function registerTools(server) {
|
|
|
66
126
|
},
|
|
67
127
|
async ({ content, importance, agent_id, session_id }) => {
|
|
68
128
|
try {
|
|
129
|
+
// Bug 7 + Feature 4: Validate content size
|
|
130
|
+
const validation = validateMemoryContent(content);
|
|
131
|
+
if (!validation.valid) {
|
|
132
|
+
return text({ error: validation.error });
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
// Deduplication check
|
|
69
136
|
const existing = getMemoryByContent(content);
|
|
70
137
|
if (existing) {
|
|
71
138
|
boostMemory(existing.id);
|
|
@@ -85,7 +152,48 @@ export function registerTools(server) {
|
|
|
85
152
|
const embedding = await generateEmbedding(content);
|
|
86
153
|
insertVector(id, embedding);
|
|
87
154
|
|
|
88
|
-
|
|
155
|
+
// Feature 1: Invalidate search cache on write
|
|
156
|
+
searchCache.invalidate();
|
|
157
|
+
|
|
158
|
+
// Feature 2: Contradiction Detection
|
|
159
|
+
let contradictions = [];
|
|
160
|
+
try {
|
|
161
|
+
const similarHits = searchVector(embedding, 3);
|
|
162
|
+
for (const hit of similarHits) {
|
|
163
|
+
const hitId = Number(hit.rowid);
|
|
164
|
+
if (hitId === id) continue; // Skip self
|
|
165
|
+
|
|
166
|
+
const sim = Math.max(0, 1 - (hit.distance * hit.distance) / 2);
|
|
167
|
+
if (sim > 0.75) {
|
|
168
|
+
const existingMemory = getMemoryById(hitId);
|
|
169
|
+
if (!existingMemory) continue;
|
|
170
|
+
|
|
171
|
+
// Check if content is substantially different (Jaccard distance > 0.5)
|
|
172
|
+
const jaccard = jaccardDistance(content, existingMemory.content);
|
|
173
|
+
if (jaccard > 0.5) {
|
|
174
|
+
// This is a contradiction: similar topic, different content
|
|
175
|
+
logContradiction(hitId, id, `Auto-detected contradiction (similarity: ${sim.toFixed(3)}, content_diff: ${jaccard.toFixed(3)})`);
|
|
176
|
+
contradictions.push({
|
|
177
|
+
old_memory_id: hitId,
|
|
178
|
+
old_content_preview: existingMemory.content.slice(0, 100),
|
|
179
|
+
similarity: sim.toFixed(4),
|
|
180
|
+
content_difference: jaccard.toFixed(4)
|
|
181
|
+
});
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
} catch (e) {
|
|
186
|
+
// Contradiction detection is best-effort, don't fail the memory insertion
|
|
187
|
+
console.error(`[persyst] Contradiction detection error: ${e.message}`);
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
const result = { success: true, id, message: `Memory #${id} stored` };
|
|
191
|
+
if (contradictions.length > 0) {
|
|
192
|
+
result.contradictions_detected = contradictions;
|
|
193
|
+
result.message += `. Detected ${contradictions.length} contradiction(s) — older memories archived.`;
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
return text(result);
|
|
89
197
|
} catch (err) {
|
|
90
198
|
return text({ error: err.message });
|
|
91
199
|
}
|
|
@@ -145,6 +253,12 @@ export function registerTools(server) {
|
|
|
145
253
|
},
|
|
146
254
|
async ({ id, content, agent_id }) => {
|
|
147
255
|
try {
|
|
256
|
+
// Bug 7 + Feature 4: Validate content size
|
|
257
|
+
const validation = validateMemoryContent(content);
|
|
258
|
+
if (!validation.valid) {
|
|
259
|
+
return text({ error: validation.error });
|
|
260
|
+
}
|
|
261
|
+
|
|
148
262
|
const oldMemory = getMemory(id);
|
|
149
263
|
if (!oldMemory) return text({ error: `Memory #${id} not found` });
|
|
150
264
|
|
|
@@ -161,6 +275,9 @@ export function registerTools(server) {
|
|
|
161
275
|
// Record contradiction and archive the old one
|
|
162
276
|
logContradiction(id, newId, 'Content updated via update_memory');
|
|
163
277
|
|
|
278
|
+
// Feature 1: Invalidate search cache on write
|
|
279
|
+
searchCache.invalidate();
|
|
280
|
+
|
|
164
281
|
return text({
|
|
165
282
|
success: true,
|
|
166
283
|
id: newId,
|
|
@@ -183,6 +300,10 @@ export function registerTools(server) {
|
|
|
183
300
|
try {
|
|
184
301
|
const deleted = deleteMemory(id);
|
|
185
302
|
if (!deleted) return text({ error: `Memory #${id} not found` });
|
|
303
|
+
|
|
304
|
+
// Feature 1: Invalidate search cache on write
|
|
305
|
+
searchCache.invalidate();
|
|
306
|
+
|
|
186
307
|
return text({ success: true, id, message: `Memory #${id} deleted` });
|
|
187
308
|
} catch (err) {
|
|
188
309
|
return text({ error: err.message });
|
|
@@ -234,13 +355,14 @@ export function registerTools(server) {
|
|
|
234
355
|
},
|
|
235
356
|
async ({ repo_path, count }) => {
|
|
236
357
|
try {
|
|
237
|
-
const commits = getRecentCommits(repo_path, count);
|
|
358
|
+
const commits = await getRecentCommits(repo_path, count);
|
|
238
359
|
let added = 0;
|
|
239
360
|
let skipped = 0;
|
|
240
361
|
|
|
241
362
|
for (const commit of commits) {
|
|
242
363
|
const hashPrefix = commit.hash.slice(0, 7);
|
|
243
|
-
|
|
364
|
+
// Bug 1 fix: use LIKE-based query for hash prefix matching
|
|
365
|
+
if (memoryExistsByHashPrefix(`[${hashPrefix}]%`)) {
|
|
244
366
|
skipped++;
|
|
245
367
|
continue;
|
|
246
368
|
}
|
|
@@ -272,6 +394,9 @@ export function registerTools(server) {
|
|
|
272
394
|
added++;
|
|
273
395
|
}
|
|
274
396
|
|
|
397
|
+
// Feature 1: Invalidate search cache after git ingestion
|
|
398
|
+
if (added > 0) searchCache.invalidate();
|
|
399
|
+
|
|
275
400
|
return text({
|
|
276
401
|
success: true,
|
|
277
402
|
added,
|
|
@@ -465,11 +590,12 @@ export function registerTools(server) {
|
|
|
465
590
|
const intervalId = setInterval(async () => {
|
|
466
591
|
console.error(`[persyst-watcher] Running scheduled ingestion for: ${repo_path}`);
|
|
467
592
|
try {
|
|
468
|
-
const result = getRecentCommits(repo_path, 10);
|
|
593
|
+
const result = await getRecentCommits(repo_path, 10);
|
|
469
594
|
let added = 0;
|
|
470
595
|
for (const commit of result) {
|
|
471
596
|
const hashPrefix = commit.hash.slice(0, 7);
|
|
472
|
-
|
|
597
|
+
// Bug 1 fix: use LIKE-based query for hash prefix matching
|
|
598
|
+
if (memoryExistsByHashPrefix(`[${hashPrefix}]%`)) continue;
|
|
473
599
|
|
|
474
600
|
const id = insertMemory(commit.fullText, commit.importance, {
|
|
475
601
|
source_type: 'git',
|
|
@@ -489,6 +615,7 @@ export function registerTools(server) {
|
|
|
489
615
|
added++;
|
|
490
616
|
}
|
|
491
617
|
if (added > 0) {
|
|
618
|
+
searchCache.invalidate();
|
|
492
619
|
console.error(`[persyst-watcher] Ingested ${added} new commits from ${repo_path}`);
|
|
493
620
|
}
|
|
494
621
|
} catch (e) {
|
|
@@ -545,7 +672,7 @@ export function registerTools(server) {
|
|
|
545
672
|
}
|
|
546
673
|
|
|
547
674
|
// ============================================================
|
|
548
|
-
//
|
|
675
|
+
// HELPERS
|
|
549
676
|
// ============================================================
|
|
550
677
|
|
|
551
678
|
/** Format a response as MCP text content */
|
|
@@ -554,3 +681,24 @@ function text(data) {
|
|
|
554
681
|
content: [{ type: 'text', text: JSON.stringify(data, null, 2) }]
|
|
555
682
|
};
|
|
556
683
|
}
|
|
684
|
+
|
|
685
|
+
/**
|
|
686
|
+
* Compute Jaccard distance between two text strings.
|
|
687
|
+
* Used for contradiction detection — higher distance means more different content.
|
|
688
|
+
* @param {string} a - First text
|
|
689
|
+
* @param {string} b - Second text
|
|
690
|
+
* @returns {number} Distance score between 0 (identical) and 1 (completely different)
|
|
691
|
+
*/
|
|
692
|
+
function jaccardDistance(a, b) {
|
|
693
|
+
const wordsA = new Set(a.toLowerCase().split(/\s+/));
|
|
694
|
+
const wordsB = new Set(b.toLowerCase().split(/\s+/));
|
|
695
|
+
|
|
696
|
+
let intersection = 0;
|
|
697
|
+
for (const word of wordsA) {
|
|
698
|
+
if (wordsB.has(word)) intersection++;
|
|
699
|
+
}
|
|
700
|
+
|
|
701
|
+
const union = wordsA.size + wordsB.size - intersection;
|
|
702
|
+
if (union === 0) return 0;
|
|
703
|
+
return 1 - (intersection / union);
|
|
704
|
+
}
|