persyst-mcp 1.1.0 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/aider.js +204 -0
- package/bin/setup.js +168 -0
- package/hooks/persyst-hook.js +234 -0
- package/index.js +16 -7
- package/package.json +6 -2
- package/src/attestation.js +0 -1
- package/src/cache.js +122 -0
- package/src/database.js +51 -9
- package/src/git.js +23 -17
- package/src/search.js +124 -18
- package/src/server.js +4 -3
- package/src/tools.js +155 -7
package/src/cache.js
ADDED
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* cache.js — LRU Query Result Cache
|
|
3
|
+
*
|
|
4
|
+
* In-memory LRU cache for search results to avoid
|
|
5
|
+
* re-computing embeddings for repeated queries.
|
|
6
|
+
*
|
|
7
|
+
* - Configurable max size (default: 100 entries)
|
|
8
|
+
* - Configurable TTL (default: 5 minutes)
|
|
9
|
+
* - Automatic eviction of oldest entries when full
|
|
10
|
+
* - Full invalidation on write operations
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Simple LRU (Least Recently Used) cache with TTL support.
|
|
15
|
+
*/
|
|
16
|
+
export class LRUCache {
|
|
17
|
+
/**
|
|
18
|
+
* @param {number} maxSize - Maximum number of entries (default: 100)
|
|
19
|
+
* @param {number} ttlMs - Time-to-live in milliseconds (default: 300000 = 5 min)
|
|
20
|
+
*/
|
|
21
|
+
constructor(maxSize = 100, ttlMs = 300000) {
|
|
22
|
+
this.maxSize = maxSize;
|
|
23
|
+
this.ttlMs = ttlMs;
|
|
24
|
+
this.cache = new Map();
|
|
25
|
+
this.hits = 0;
|
|
26
|
+
this.misses = 0;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Generate a cache key from query parameters.
|
|
31
|
+
* @param {string} query - The search query
|
|
32
|
+
* @param {number} limit - The result limit
|
|
33
|
+
* @returns {string} Cache key
|
|
34
|
+
*/
|
|
35
|
+
static key(query, limit) {
|
|
36
|
+
return `${query}::${limit}`;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
/**
|
|
40
|
+
* Get a cached value if it exists and hasn't expired.
|
|
41
|
+
* Moves the entry to the "most recently used" position.
|
|
42
|
+
*
|
|
43
|
+
* @param {string} key - Cache key
|
|
44
|
+
* @returns {*|null} Cached value or null if miss/expired
|
|
45
|
+
*/
|
|
46
|
+
get(key) {
|
|
47
|
+
const entry = this.cache.get(key);
|
|
48
|
+
if (!entry) {
|
|
49
|
+
this.misses++;
|
|
50
|
+
return null;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
// Check TTL expiry
|
|
54
|
+
if (Date.now() - entry.timestamp > this.ttlMs) {
|
|
55
|
+
this.cache.delete(key);
|
|
56
|
+
this.misses++;
|
|
57
|
+
return null;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
// Move to end (most recently used) by re-inserting
|
|
61
|
+
this.cache.delete(key);
|
|
62
|
+
this.cache.set(key, entry);
|
|
63
|
+
this.hits++;
|
|
64
|
+
return entry.value;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* Store a value in the cache. Evicts oldest entry if at capacity.
|
|
69
|
+
*
|
|
70
|
+
* @param {string} key - Cache key
|
|
71
|
+
* @param {*} value - Value to cache
|
|
72
|
+
*/
|
|
73
|
+
set(key, value) {
|
|
74
|
+
// If key already exists, delete it first (to update position)
|
|
75
|
+
if (this.cache.has(key)) {
|
|
76
|
+
this.cache.delete(key);
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
// Evict oldest (first) entry if at capacity
|
|
80
|
+
if (this.cache.size >= this.maxSize) {
|
|
81
|
+
const oldestKey = this.cache.keys().next().value;
|
|
82
|
+
this.cache.delete(oldestKey);
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
this.cache.set(key, {
|
|
86
|
+
value,
|
|
87
|
+
timestamp: Date.now()
|
|
88
|
+
});
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* Invalidate the entire cache. Called on write operations
|
|
93
|
+
* (add_memory, update_memory, delete_memory) to ensure
|
|
94
|
+
* search results are always fresh.
|
|
95
|
+
*/
|
|
96
|
+
invalidate() {
|
|
97
|
+
const size = this.cache.size;
|
|
98
|
+
this.cache.clear();
|
|
99
|
+
if (size > 0) {
|
|
100
|
+
console.error(`[persyst-cache] Invalidated ${size} cached entries`);
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
/**
|
|
105
|
+
* Get cache statistics for monitoring.
|
|
106
|
+
* @returns {{ size: number, maxSize: number, ttlMs: number, hits: number, misses: number, hitRate: string }}
|
|
107
|
+
*/
|
|
108
|
+
stats() {
|
|
109
|
+
const total = this.hits + this.misses;
|
|
110
|
+
return {
|
|
111
|
+
size: this.cache.size,
|
|
112
|
+
maxSize: this.maxSize,
|
|
113
|
+
ttlMs: this.ttlMs,
|
|
114
|
+
hits: this.hits,
|
|
115
|
+
misses: this.misses,
|
|
116
|
+
hitRate: total > 0 ? `${((this.hits / total) * 100).toFixed(1)}%` : '0%'
|
|
117
|
+
};
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
// Singleton instance for search results
|
|
122
|
+
export const searchCache = new LRUCache(100, 300000);
|
package/src/database.js
CHANGED
|
@@ -33,6 +33,7 @@ const DB_PATH = process.env.NODE_ENV === 'test' ? ':memory:' : join(DB_DIR, 'per
|
|
|
33
33
|
const db = new Database(DB_PATH);
|
|
34
34
|
db.pragma('journal_mode = WAL'); // Better performance for concurrent reads
|
|
35
35
|
db.pragma('foreign_keys = ON'); // Enforce referential integrity
|
|
36
|
+
db.pragma('mmap_size = 268435456'); // 256MB memory-mapped I/O for faster reads
|
|
36
37
|
|
|
37
38
|
// Load sqlite-vec BEFORE creating any vec0 tables
|
|
38
39
|
sqliteVec.load(db);
|
|
@@ -106,6 +107,11 @@ db.exec(`
|
|
|
106
107
|
)
|
|
107
108
|
`);
|
|
108
109
|
|
|
110
|
+
// --- Migration: add domain column to agent_stats ---
|
|
111
|
+
try {
|
|
112
|
+
db.exec('ALTER TABLE agent_stats ADD COLUMN domain TEXT DEFAULT "general"');
|
|
113
|
+
} catch (e) { /* Column already exists */ }
|
|
114
|
+
|
|
109
115
|
// --- Attestations table ---
|
|
110
116
|
db.exec(`
|
|
111
117
|
CREATE TABLE IF NOT EXISTS attestations (
|
|
@@ -346,6 +352,24 @@ const stmts = {
|
|
|
346
352
|
// -- Dedup --
|
|
347
353
|
findMemoryByContent: db.prepare(
|
|
348
354
|
'SELECT id FROM memories WHERE content = ? AND valid_until IS NULL LIMIT 1'
|
|
355
|
+
),
|
|
356
|
+
|
|
357
|
+
// -- Hash-prefix lookup for git dedup (Bug 1 fix) --
|
|
358
|
+
findMemoryByHashPrefix: db.prepare(
|
|
359
|
+
'SELECT id FROM memories WHERE content LIKE ? AND valid_until IS NULL LIMIT 1'
|
|
360
|
+
),
|
|
361
|
+
|
|
362
|
+
// -- Active memory count --
|
|
363
|
+
getActiveMemoryCount: db.prepare(
|
|
364
|
+
'SELECT COUNT(*) as count FROM memories WHERE valid_until IS NULL'
|
|
365
|
+
),
|
|
366
|
+
|
|
367
|
+
// -- Memory History Chain (Feature 6: prepared statements) --
|
|
368
|
+
getContradictionAncestors: db.prepare(
|
|
369
|
+
'SELECT old_memory_id FROM contradictions WHERE new_memory_id = ?'
|
|
370
|
+
),
|
|
371
|
+
getContradictionDescendants: db.prepare(
|
|
372
|
+
'SELECT new_memory_id FROM contradictions WHERE old_memory_id = ?'
|
|
349
373
|
)
|
|
350
374
|
};
|
|
351
375
|
|
|
@@ -593,13 +617,31 @@ export function getMemoriesByEntity(entityId) {
|
|
|
593
617
|
}
|
|
594
618
|
|
|
595
619
|
/**
|
|
596
|
-
* Check if a memory with
|
|
597
|
-
* Used for deduplication
|
|
598
|
-
* @param {string}
|
|
620
|
+
* Check if a memory with exact content already exists.
|
|
621
|
+
* Used for deduplication.
|
|
622
|
+
* @param {string} content - Exact content to match
|
|
599
623
|
* @returns {boolean}
|
|
600
624
|
*/
|
|
601
|
-
export function memoryExists(
|
|
602
|
-
return stmts.findMemoryByContent.get(
|
|
625
|
+
export function memoryExists(content) {
|
|
626
|
+
return stmts.findMemoryByContent.get(content) !== undefined;
|
|
627
|
+
}
|
|
628
|
+
|
|
629
|
+
/**
|
|
630
|
+
* Check if a memory exists by hash prefix pattern (LIKE query).
|
|
631
|
+
* Used for git commit deduplication where we match `[hashPrefix]%`.
|
|
632
|
+
* @param {string} pattern - SQL LIKE pattern to match (e.g. '[abc1234]%')
|
|
633
|
+
* @returns {boolean}
|
|
634
|
+
*/
|
|
635
|
+
export function memoryExistsByHashPrefix(pattern) {
|
|
636
|
+
return stmts.findMemoryByHashPrefix.get(pattern) !== undefined;
|
|
637
|
+
}
|
|
638
|
+
|
|
639
|
+
/**
|
|
640
|
+
* Get count of active (non-archived) memories.
|
|
641
|
+
* @returns {number}
|
|
642
|
+
*/
|
|
643
|
+
export function getActiveMemoryCount() {
|
|
644
|
+
return stmts.getActiveMemoryCount.get().count;
|
|
603
645
|
}
|
|
604
646
|
|
|
605
647
|
// ============================================================
|
|
@@ -713,14 +755,14 @@ export function getMemoryHistoryChain(memoryId) {
|
|
|
713
755
|
if (versions.has(currentId)) continue;
|
|
714
756
|
versions.add(currentId);
|
|
715
757
|
|
|
716
|
-
// Find ancestors (replaced by current)
|
|
717
|
-
const ancestors =
|
|
758
|
+
// Find ancestors (replaced by current) — using prepared statement
|
|
759
|
+
const ancestors = stmts.getContradictionAncestors.all(currentId);
|
|
718
760
|
ancestors.forEach(a => {
|
|
719
761
|
if (!versions.has(a.old_memory_id)) queue.push(a.old_memory_id);
|
|
720
762
|
});
|
|
721
763
|
|
|
722
|
-
// Find descendants (replaces current)
|
|
723
|
-
const descendants =
|
|
764
|
+
// Find descendants (replaces current) — using prepared statement
|
|
765
|
+
const descendants = stmts.getContradictionDescendants.all(currentId);
|
|
724
766
|
descendants.forEach(d => {
|
|
725
767
|
if (!versions.has(d.new_memory_id)) queue.push(d.new_memory_id);
|
|
726
768
|
});
|
package/src/git.js
CHANGED
|
@@ -3,30 +3,36 @@
|
|
|
3
3
|
*
|
|
4
4
|
* Reads git log from a repository and converts commits into memories.
|
|
5
5
|
* Performs commit categorization, file diff analysis, and imports notes.
|
|
6
|
+
*
|
|
7
|
+
* IMPORTANT: Uses async execFile instead of execSync to avoid blocking
|
|
8
|
+
* the Node.js event loop during git operations (Bug 4 fix).
|
|
6
9
|
*/
|
|
7
10
|
|
|
8
|
-
import {
|
|
11
|
+
import { execFile } from 'child_process';
|
|
12
|
+
import { promisify } from 'util';
|
|
13
|
+
|
|
14
|
+
const execFileAsync = promisify(execFile);
|
|
9
15
|
|
|
10
16
|
/**
|
|
11
17
|
* Read the N most recent git commits from a repository.
|
|
12
18
|
*
|
|
13
19
|
* @param {string} repoPath - Absolute path to the git repo
|
|
14
20
|
* @param {number} count - Number of commits to read (default: 20)
|
|
15
|
-
* @returns {Array<{hash: string, message: string, author: string, date: string, fullText: string, files: string[], importance: number}
|
|
21
|
+
* @returns {Promise<Array<{hash: string, message: string, author: string, date: string, fullText: string, files: string[], importance: number}>>}
|
|
16
22
|
*/
|
|
17
|
-
export function getRecentCommits(repoPath, count = 20) {
|
|
23
|
+
export async function getRecentCommits(repoPath, count = 20) {
|
|
18
24
|
try {
|
|
19
25
|
// Use a delimiter to split commits reliably
|
|
20
26
|
const DELIM = '---PERSYST-COMMIT---';
|
|
21
27
|
const format = `${DELIM}%n%H%n%an%n%ai%n%s%n%b`;
|
|
22
28
|
|
|
23
|
-
const output =
|
|
24
|
-
|
|
29
|
+
const { stdout: output } = await execFileAsync(
|
|
30
|
+
'git',
|
|
31
|
+
['log', `-n`, `${count}`, `--pretty=format:${format}`],
|
|
25
32
|
{
|
|
26
33
|
cwd: repoPath,
|
|
27
34
|
encoding: 'utf-8',
|
|
28
35
|
timeout: 10000, // 10s timeout
|
|
29
|
-
stdio: ['pipe', 'pipe', 'pipe'] // Suppress stderr
|
|
30
36
|
}
|
|
31
37
|
);
|
|
32
38
|
|
|
@@ -45,7 +51,7 @@ export function getRecentCommits(repoPath, count = 20) {
|
|
|
45
51
|
const body = lines.slice(4).join(' ').trim();
|
|
46
52
|
|
|
47
53
|
// Fetch git notes if available (represents PR metadata)
|
|
48
|
-
const notes = getGitNotes(repoPath, hash);
|
|
54
|
+
const notes = await getGitNotes(repoPath, hash);
|
|
49
55
|
|
|
50
56
|
// Build a readable memory string
|
|
51
57
|
let fullText = body
|
|
@@ -57,7 +63,7 @@ export function getRecentCommits(repoPath, count = 20) {
|
|
|
57
63
|
}
|
|
58
64
|
|
|
59
65
|
// Fetch files touched
|
|
60
|
-
const files = getCommitFiles(repoPath, hash);
|
|
66
|
+
const files = await getCommitFiles(repoPath, hash);
|
|
61
67
|
|
|
62
68
|
// Classify importance based on message
|
|
63
69
|
const classification = classifyCommit(subject);
|
|
@@ -92,17 +98,17 @@ export function getRecentCommits(repoPath, count = 20) {
|
|
|
92
98
|
*
|
|
93
99
|
* @param {string} repoPath - Absolute path to the git repo
|
|
94
100
|
* @param {string} hash - Full commit hash
|
|
95
|
-
* @returns {string[]} List of changed file paths
|
|
101
|
+
* @returns {Promise<string[]>} List of changed file paths
|
|
96
102
|
*/
|
|
97
|
-
export function getCommitFiles(repoPath, hash) {
|
|
103
|
+
export async function getCommitFiles(repoPath, hash) {
|
|
98
104
|
try {
|
|
99
|
-
const output =
|
|
100
|
-
|
|
105
|
+
const { stdout: output } = await execFileAsync(
|
|
106
|
+
'git',
|
|
107
|
+
['diff-tree', '--no-commit-id', '--name-only', '-r', hash],
|
|
101
108
|
{
|
|
102
109
|
cwd: repoPath,
|
|
103
110
|
encoding: 'utf-8',
|
|
104
111
|
timeout: 5000,
|
|
105
|
-
stdio: ['pipe', 'pipe', 'pipe']
|
|
106
112
|
}
|
|
107
113
|
);
|
|
108
114
|
return output.trim().split('\n').filter(Boolean);
|
|
@@ -114,15 +120,15 @@ export function getCommitFiles(repoPath, hash) {
|
|
|
114
120
|
/**
|
|
115
121
|
* Fetch git notes (representing PR metadata or additional annotations).
|
|
116
122
|
*/
|
|
117
|
-
export function getGitNotes(repoPath, hash) {
|
|
123
|
+
export async function getGitNotes(repoPath, hash) {
|
|
118
124
|
try {
|
|
119
|
-
const output =
|
|
120
|
-
|
|
125
|
+
const { stdout: output } = await execFileAsync(
|
|
126
|
+
'git',
|
|
127
|
+
['notes', 'show', hash],
|
|
121
128
|
{
|
|
122
129
|
cwd: repoPath,
|
|
123
130
|
encoding: 'utf-8',
|
|
124
131
|
timeout: 3000,
|
|
125
|
-
stdio: ['pipe', 'pipe', 'pipe']
|
|
126
132
|
}
|
|
127
133
|
);
|
|
128
134
|
return output.trim();
|
package/src/search.js
CHANGED
|
@@ -3,7 +3,8 @@
|
|
|
3
3
|
*
|
|
4
4
|
* Combines keyword and semantic searches, integrates temporal decay,
|
|
5
5
|
* applies agent reputation scores, generates cryptographic search attestations,
|
|
6
|
-
*
|
|
6
|
+
* builds graph-hopped optimized LLM context prompts, and applies MMR
|
|
7
|
+
* for diverse result retrieval.
|
|
7
8
|
*/
|
|
8
9
|
|
|
9
10
|
import db, {
|
|
@@ -16,9 +17,11 @@ import db, {
|
|
|
16
17
|
} from './database.js';
|
|
17
18
|
import { generateEmbedding } from './embeddings.js';
|
|
18
19
|
import { createAttestation } from './attestation.js';
|
|
20
|
+
import { searchCache, LRUCache } from './cache.js';
|
|
19
21
|
|
|
20
22
|
/**
|
|
21
23
|
* Search memories using both keyword and semantic strategies.
|
|
24
|
+
* Results are cached in the LRU cache for repeated queries.
|
|
22
25
|
*
|
|
23
26
|
* @param {string} queryText - What to search for
|
|
24
27
|
* @param {number} limit - Max results to return (default: 5)
|
|
@@ -27,6 +30,14 @@ import { createAttestation } from './attestation.js';
|
|
|
27
30
|
* @returns {Promise<Array>} Ranked search results (with .attestation property attached)
|
|
28
31
|
*/
|
|
29
32
|
export async function searchHybrid(queryText, limit = 5, agentId = null, sessionId = null) {
|
|
33
|
+
// --- Check LRU cache first (Feature 1) ---
|
|
34
|
+
const cacheKey = LRUCache.key(queryText, limit);
|
|
35
|
+
const cached = searchCache.get(cacheKey);
|
|
36
|
+
if (cached) {
|
|
37
|
+
console.error(`[persyst-cache] Cache HIT for query: "${queryText.slice(0, 50)}..."`);
|
|
38
|
+
return cached;
|
|
39
|
+
}
|
|
40
|
+
|
|
30
41
|
// --- Step 1: Keyword search (fast, exact matches) ---
|
|
31
42
|
const keywordHits = searchKeyword(queryText, limit * 2);
|
|
32
43
|
const keywordIds = new Set(keywordHits.map(r => r.id));
|
|
@@ -53,8 +64,8 @@ export async function searchHybrid(queryText, limit = 5, agentId = null, session
|
|
|
53
64
|
keyword_match: isKeywordMatch
|
|
54
65
|
};
|
|
55
66
|
})
|
|
56
|
-
// Filter out low similarity semantic matches if they have no keyword match (threshold 0.
|
|
57
|
-
.filter(r => r.keyword_match || r.similarity >= 0.
|
|
67
|
+
// Filter out low similarity semantic matches if they have no keyword match (threshold 0.30)
|
|
68
|
+
.filter(r => r.keyword_match || r.similarity >= 0.30);
|
|
58
69
|
|
|
59
70
|
// Add keyword-only hits that semantic search missed
|
|
60
71
|
const semanticIds = new Set(semanticResults.map(r => r.id));
|
|
@@ -112,15 +123,96 @@ export async function searchHybrid(queryText, limit = 5, agentId = null, session
|
|
|
112
123
|
|
|
113
124
|
// Sort by final score descending
|
|
114
125
|
finalResults.sort((a, b) => parseFloat(b.hybrid_score) - parseFloat(a.hybrid_score));
|
|
115
|
-
|
|
126
|
+
|
|
127
|
+
// --- Step 5: Apply MMR for diverse retrieval (Feature 3) ---
|
|
128
|
+
const mmrResults = applyMMR(finalResults, limit);
|
|
116
129
|
|
|
117
130
|
// Generate cryptographic attestation for audit trails
|
|
118
|
-
const attestation = createAttestation(queryText,
|
|
131
|
+
const attestation = createAttestation(queryText, mmrResults, agentId, sessionId);
|
|
119
132
|
|
|
120
133
|
// Attach attestation object directly to the array to preserve compatibility with existing tests
|
|
121
|
-
|
|
134
|
+
mmrResults.attestation = attestation;
|
|
135
|
+
|
|
136
|
+
// --- Store in LRU cache (Feature 1) ---
|
|
137
|
+
searchCache.set(cacheKey, mmrResults);
|
|
138
|
+
|
|
139
|
+
return mmrResults;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
/**
|
|
143
|
+
* Apply Maximal Marginal Relevance (MMR) re-ranking for diverse results.
|
|
144
|
+
*
|
|
145
|
+
* MMR balances relevance with diversity by penalizing candidates that
|
|
146
|
+
* are too similar to already-selected results.
|
|
147
|
+
*
|
|
148
|
+
* @param {Array} candidates - Scored search results
|
|
149
|
+
* @param {number} limit - Max results to return
|
|
150
|
+
* @param {number} lambda - Trade-off parameter (0.7 = 70% relevance, 30% diversity)
|
|
151
|
+
* @returns {Array} MMR-reranked results
|
|
152
|
+
*/
|
|
153
|
+
function applyMMR(candidates, limit, lambda = 0.7) {
|
|
154
|
+
if (candidates.length <= limit) return candidates;
|
|
155
|
+
|
|
156
|
+
const selected = [];
|
|
157
|
+
const remaining = [...candidates];
|
|
158
|
+
|
|
159
|
+
// Always pick the top-scored result first
|
|
160
|
+
selected.push(remaining.shift());
|
|
161
|
+
|
|
162
|
+
while (selected.length < limit && remaining.length > 0) {
|
|
163
|
+
let bestIdx = -1;
|
|
164
|
+
let bestMMRScore = -Infinity;
|
|
165
|
+
|
|
166
|
+
for (let i = 0; i < remaining.length; i++) {
|
|
167
|
+
const candidate = remaining[i];
|
|
168
|
+
const relevance = parseFloat(candidate.hybrid_score);
|
|
169
|
+
|
|
170
|
+
// Calculate max similarity to any already-selected result
|
|
171
|
+
// Using content-based Jaccard similarity as a proxy
|
|
172
|
+
let maxSimToSelected = 0;
|
|
173
|
+
for (const sel of selected) {
|
|
174
|
+
const sim = jaccardSimilarity(candidate.content, sel.content);
|
|
175
|
+
if (sim > maxSimToSelected) maxSimToSelected = sim;
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
// MMR score = λ * relevance - (1 - λ) * max_similarity_to_selected
|
|
179
|
+
const mmrScore = lambda * relevance - (1 - lambda) * maxSimToSelected;
|
|
180
|
+
|
|
181
|
+
if (mmrScore > bestMMRScore) {
|
|
182
|
+
bestMMRScore = mmrScore;
|
|
183
|
+
bestIdx = i;
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
if (bestIdx >= 0) {
|
|
188
|
+
selected.push(remaining.splice(bestIdx, 1)[0]);
|
|
189
|
+
} else {
|
|
190
|
+
break;
|
|
191
|
+
}
|
|
192
|
+
}
|
|
122
193
|
|
|
123
|
-
return
|
|
194
|
+
return selected;
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
/**
|
|
198
|
+
* Compute Jaccard similarity between two text strings.
|
|
199
|
+
* Uses word-level tokenization for efficiency.
|
|
200
|
+
*
|
|
201
|
+
* @param {string} a - First text
|
|
202
|
+
* @param {string} b - Second text
|
|
203
|
+
* @returns {number} Similarity score between 0 and 1
|
|
204
|
+
*/
|
|
205
|
+
function jaccardSimilarity(a, b) {
|
|
206
|
+
const wordsA = new Set(a.toLowerCase().split(/\s+/));
|
|
207
|
+
const wordsB = new Set(b.toLowerCase().split(/\s+/));
|
|
208
|
+
|
|
209
|
+
let intersection = 0;
|
|
210
|
+
for (const word of wordsA) {
|
|
211
|
+
if (wordsB.has(word)) intersection++;
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
const union = wordsA.size + wordsB.size - intersection;
|
|
215
|
+
return union === 0 ? 0 : intersection / union;
|
|
124
216
|
}
|
|
125
217
|
|
|
126
218
|
/**
|
|
@@ -234,8 +326,11 @@ export async function getOptimizedContext(queryText, maxTokens, agentId = null,
|
|
|
234
326
|
}
|
|
235
327
|
context += '=== END OF CONTEXT ===';
|
|
236
328
|
|
|
237
|
-
//
|
|
238
|
-
|
|
329
|
+
// Bug 8 fix: Skip attestation when no results to avoid audit noise
|
|
330
|
+
let attestation = null;
|
|
331
|
+
if (accepted.length > 0) {
|
|
332
|
+
attestation = createAttestation(queryText, accepted, agentId, sessionId);
|
|
333
|
+
}
|
|
239
334
|
|
|
240
335
|
return {
|
|
241
336
|
context,
|
|
@@ -246,12 +341,26 @@ export async function getOptimizedContext(queryText, maxTokens, agentId = null,
|
|
|
246
341
|
|
|
247
342
|
/**
|
|
248
343
|
* Performs memory consolidation by merging highly similar memories.
|
|
344
|
+
* Bug 6 fix: DB mutations are wrapped in a transaction for atomicity.
|
|
249
345
|
*/
|
|
250
346
|
export async function consolidateMemories() {
|
|
251
347
|
const activeMemories = db.prepare('SELECT * FROM memories WHERE valid_until IS NULL').all();
|
|
252
348
|
const consolidated = [];
|
|
253
349
|
const visited = new Set();
|
|
254
350
|
|
|
351
|
+
// Pre-compile the transaction for atomic DB operations (Bug 6 fix)
|
|
352
|
+
const archiveAndMerge = db.transaction((canonicalId, mergedContent, dupIds) => {
|
|
353
|
+
// Update canonical memory with merged content
|
|
354
|
+
db.prepare('UPDATE memories SET content = ?, last_accessed = unixepoch() WHERE id = ?').run(mergedContent, canonicalId);
|
|
355
|
+
|
|
356
|
+
// Archive duplicates
|
|
357
|
+
for (const dupId of dupIds) {
|
|
358
|
+
db.prepare('UPDATE memories SET valid_until = unixepoch() WHERE id = ?').run(dupId);
|
|
359
|
+
db.prepare('INSERT INTO contradictions (old_memory_id, new_memory_id, resolution_reason) VALUES (?, ?, ?)')
|
|
360
|
+
.run(dupId, canonicalId, `Consolidated into canonical memory #${canonicalId}`);
|
|
361
|
+
}
|
|
362
|
+
});
|
|
363
|
+
|
|
255
364
|
for (const mem of activeMemories) {
|
|
256
365
|
if (visited.has(mem.id)) continue;
|
|
257
366
|
|
|
@@ -295,20 +404,17 @@ export async function consolidateMemories() {
|
|
|
295
404
|
const uniqueContents = Array.from(new Set(contents));
|
|
296
405
|
const mergedContent = uniqueContents.join('. ').replace(/\.\./g, '.');
|
|
297
406
|
|
|
298
|
-
//
|
|
299
|
-
db.prepare('UPDATE memories SET content = ?, last_accessed = unixepoch() WHERE id = ?').run(mergedContent, canonical.id);
|
|
300
|
-
|
|
301
|
-
// Update vector embedding for canonical
|
|
407
|
+
// Generate new embedding OUTSIDE the transaction (async operation)
|
|
302
408
|
const newEmbedding = await generateEmbedding(mergedContent);
|
|
409
|
+
|
|
410
|
+
// Run atomic DB transaction for all mutations (Bug 6 fix)
|
|
411
|
+
archiveAndMerge(canonical.id, mergedContent, dupesToArchive.map(d => d.id));
|
|
412
|
+
|
|
413
|
+
// Update vector embedding (also outside transaction since vec0 tables have their own handling)
|
|
303
414
|
db.prepare('DELETE FROM memories_vec WHERE rowid = ?').run(canonical.id);
|
|
304
415
|
db.prepare('INSERT INTO memories_vec (rowid, embedding) VALUES (?, ?)').run(BigInt(canonical.id), Buffer.from(newEmbedding.buffer));
|
|
305
416
|
|
|
306
|
-
// Archive duplicates
|
|
307
417
|
for (const dup of dupesToArchive) {
|
|
308
|
-
db.prepare('UPDATE memories SET valid_until = unixepoch() WHERE id = ?').run(dup.id);
|
|
309
|
-
db.prepare('INSERT INTO contradictions (old_memory_id, new_memory_id, resolution_reason) VALUES (?, ?, ?)')
|
|
310
|
-
.run(dup.id, canonical.id, `Consolidated into canonical memory #${canonical.id}`);
|
|
311
|
-
|
|
312
418
|
visited.add(dup.id);
|
|
313
419
|
}
|
|
314
420
|
|
package/src/server.js
CHANGED
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
|
|
12
12
|
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
|
|
13
13
|
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
|
|
14
|
-
import { registerTools } from './tools.js';
|
|
14
|
+
import { registerTools, cleanupWatchers } from './tools.js';
|
|
15
15
|
import { applyTemporalDecay, closeDatabase } from './database.js';
|
|
16
16
|
import { consolidateMemories } from './search.js';
|
|
17
17
|
|
|
@@ -23,7 +23,7 @@ export async function startServer() {
|
|
|
23
23
|
// --- Create MCP server ---
|
|
24
24
|
const server = new McpServer({
|
|
25
25
|
name: 'persyst',
|
|
26
|
-
version: '
|
|
26
|
+
version: '2.1.0'
|
|
27
27
|
});
|
|
28
28
|
|
|
29
29
|
// --- Register all tools ---
|
|
@@ -46,11 +46,12 @@ export async function startServer() {
|
|
|
46
46
|
}
|
|
47
47
|
}, 86400000);
|
|
48
48
|
|
|
49
|
-
// --- Graceful shutdown ---
|
|
49
|
+
// --- Graceful shutdown (Bug 3 fix: also cleans up git watchers) ---
|
|
50
50
|
const shutdown = () => {
|
|
51
51
|
console.error('[persyst] Shutting down...');
|
|
52
52
|
clearInterval(decayTimer);
|
|
53
53
|
clearInterval(consolidationTimer);
|
|
54
|
+
cleanupWatchers(); // Bug 3 fix: stop all git repo watchers
|
|
54
55
|
closeDatabase();
|
|
55
56
|
process.exit(0);
|
|
56
57
|
};
|