persyst-mcp 1.0.1 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +85 -62
- package/index.js +4 -1
- package/package.json +16 -2
- package/src/attestation.js +206 -0
- package/src/cache.js +122 -0
- package/src/database.js +369 -33
- package/src/git.js +87 -20
- package/src/search.js +375 -49
- package/src/server.js +19 -4
- package/src/tools.js +502 -98
package/src/git.js
CHANGED
|
@@ -1,37 +1,38 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* git.js — Git Commit Ingestion
|
|
2
|
+
* git.js — Git Commit Ingestion & Analysis
|
|
3
3
|
*
|
|
4
4
|
* Reads git log from a repository and converts commits into memories.
|
|
5
|
-
*
|
|
5
|
+
* Performs commit categorization, file diff analysis, and imports notes.
|
|
6
6
|
*
|
|
7
|
-
*
|
|
8
|
-
*
|
|
9
|
-
*
|
|
10
|
-
* Deduplicates by commit hash so you can ingest safely multiple times.
|
|
7
|
+
* IMPORTANT: Uses async execFile instead of execSync to avoid blocking
|
|
8
|
+
* the Node.js event loop during git operations (Bug 4 fix).
|
|
11
9
|
*/
|
|
12
10
|
|
|
13
|
-
import {
|
|
11
|
+
import { execFile } from 'child_process';
|
|
12
|
+
import { promisify } from 'util';
|
|
13
|
+
|
|
14
|
+
const execFileAsync = promisify(execFile);
|
|
14
15
|
|
|
15
16
|
/**
|
|
16
17
|
* Read the N most recent git commits from a repository.
|
|
17
18
|
*
|
|
18
19
|
* @param {string} repoPath - Absolute path to the git repo
|
|
19
20
|
* @param {number} count - Number of commits to read (default: 20)
|
|
20
|
-
* @returns {Array<{hash: string, message: string, author: string, date: string, fullText: string}
|
|
21
|
+
* @returns {Promise<Array<{hash: string, message: string, author: string, date: string, fullText: string, files: string[], importance: number}>>}
|
|
21
22
|
*/
|
|
22
|
-
export function getRecentCommits(repoPath, count = 20) {
|
|
23
|
+
export async function getRecentCommits(repoPath, count = 20) {
|
|
23
24
|
try {
|
|
24
25
|
// Use a delimiter to split commits reliably
|
|
25
26
|
const DELIM = '---PERSYST-COMMIT---';
|
|
26
27
|
const format = `${DELIM}%n%H%n%an%n%ai%n%s%n%b`;
|
|
27
28
|
|
|
28
|
-
const output =
|
|
29
|
-
|
|
29
|
+
const { stdout: output } = await execFileAsync(
|
|
30
|
+
'git',
|
|
31
|
+
['log', `-n`, `${count}`, `--pretty=format:${format}`],
|
|
30
32
|
{
|
|
31
33
|
cwd: repoPath,
|
|
32
34
|
encoding: 'utf-8',
|
|
33
35
|
timeout: 10000, // 10s timeout
|
|
34
|
-
stdio: ['pipe', 'pipe', 'pipe'] // Suppress stderr
|
|
35
36
|
}
|
|
36
37
|
);
|
|
37
38
|
|
|
@@ -49,17 +50,37 @@ export function getRecentCommits(repoPath, count = 20) {
|
|
|
49
50
|
const subject = lines[3].trim();
|
|
50
51
|
const body = lines.slice(4).join(' ').trim();
|
|
51
52
|
|
|
53
|
+
// Fetch git notes if available (represents PR metadata)
|
|
54
|
+
const notes = await getGitNotes(repoPath, hash);
|
|
55
|
+
|
|
52
56
|
// Build a readable memory string
|
|
53
|
-
|
|
57
|
+
let fullText = body
|
|
54
58
|
? `[${hash.slice(0, 7)}] ${subject} — by ${author} on ${date}. ${body}`
|
|
55
59
|
: `[${hash.slice(0, 7)}] ${subject} — by ${author} on ${date}`;
|
|
56
60
|
|
|
57
|
-
|
|
61
|
+
if (notes) {
|
|
62
|
+
fullText += ` [PR Notes] ${notes}`;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// Fetch files touched
|
|
66
|
+
const files = await getCommitFiles(repoPath, hash);
|
|
67
|
+
|
|
68
|
+
// Classify importance based on message
|
|
69
|
+
const classification = classifyCommit(subject);
|
|
70
|
+
|
|
71
|
+
commits.push({
|
|
72
|
+
hash,
|
|
73
|
+
message: subject,
|
|
74
|
+
author,
|
|
75
|
+
date,
|
|
76
|
+
fullText,
|
|
77
|
+
files,
|
|
78
|
+
importance: classification.importance
|
|
79
|
+
});
|
|
58
80
|
}
|
|
59
81
|
|
|
60
82
|
return commits;
|
|
61
83
|
} catch (err) {
|
|
62
|
-
// Not a git repo, or git not installed
|
|
63
84
|
const message = err.message || String(err);
|
|
64
85
|
if (message.includes('not a git repository')) {
|
|
65
86
|
throw new Error(`Not a git repository: ${repoPath}`);
|
|
@@ -77,17 +98,17 @@ export function getRecentCommits(repoPath, count = 20) {
|
|
|
77
98
|
*
|
|
78
99
|
* @param {string} repoPath - Absolute path to the git repo
|
|
79
100
|
* @param {string} hash - Full commit hash
|
|
80
|
-
* @returns {string[]} List of changed file paths
|
|
101
|
+
* @returns {Promise<string[]>} List of changed file paths
|
|
81
102
|
*/
|
|
82
|
-
export function getCommitFiles(repoPath, hash) {
|
|
103
|
+
export async function getCommitFiles(repoPath, hash) {
|
|
83
104
|
try {
|
|
84
|
-
const output =
|
|
85
|
-
|
|
105
|
+
const { stdout: output } = await execFileAsync(
|
|
106
|
+
'git',
|
|
107
|
+
['diff-tree', '--no-commit-id', '--name-only', '-r', hash],
|
|
86
108
|
{
|
|
87
109
|
cwd: repoPath,
|
|
88
110
|
encoding: 'utf-8',
|
|
89
111
|
timeout: 5000,
|
|
90
|
-
stdio: ['pipe', 'pipe', 'pipe']
|
|
91
112
|
}
|
|
92
113
|
);
|
|
93
114
|
return output.trim().split('\n').filter(Boolean);
|
|
@@ -95,3 +116,49 @@ export function getCommitFiles(repoPath, hash) {
|
|
|
95
116
|
return [];
|
|
96
117
|
}
|
|
97
118
|
}
|
|
119
|
+
|
|
120
|
+
/**
|
|
121
|
+
* Fetch git notes (representing PR metadata or additional annotations).
|
|
122
|
+
*/
|
|
123
|
+
export async function getGitNotes(repoPath, hash) {
|
|
124
|
+
try {
|
|
125
|
+
const { stdout: output } = await execFileAsync(
|
|
126
|
+
'git',
|
|
127
|
+
['notes', 'show', hash],
|
|
128
|
+
{
|
|
129
|
+
cwd: repoPath,
|
|
130
|
+
encoding: 'utf-8',
|
|
131
|
+
timeout: 3000,
|
|
132
|
+
}
|
|
133
|
+
);
|
|
134
|
+
return output.trim();
|
|
135
|
+
} catch {
|
|
136
|
+
return '';
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
/**
|
|
141
|
+
* Categorize commit and assign importance.
|
|
142
|
+
*/
|
|
143
|
+
export function classifyCommit(subject) {
|
|
144
|
+
const s = subject.toLowerCase().trim();
|
|
145
|
+
if (
|
|
146
|
+
s.startsWith('feat:') ||
|
|
147
|
+
s.startsWith('fix:') ||
|
|
148
|
+
s.startsWith('refactor:') ||
|
|
149
|
+
s.startsWith('breaking:') ||
|
|
150
|
+
s.startsWith('decision:')
|
|
151
|
+
) {
|
|
152
|
+
return { type: 'architectural', importance: 0.9 };
|
|
153
|
+
}
|
|
154
|
+
if (
|
|
155
|
+
s.startsWith('chore:') ||
|
|
156
|
+
s.startsWith('docs:') ||
|
|
157
|
+
s.startsWith('test:') ||
|
|
158
|
+
s.startsWith('style:') ||
|
|
159
|
+
s.startsWith('ci:')
|
|
160
|
+
) {
|
|
161
|
+
return { type: 'chore', importance: 0.4 };
|
|
162
|
+
}
|
|
163
|
+
return { type: 'other', importance: 0.6 };
|
|
164
|
+
}
|
package/src/search.js
CHANGED
|
@@ -1,49 +1,43 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* search.js — Hybrid Search Engine
|
|
2
|
+
* search.js — Hybrid Search & Context Optimization Engine
|
|
3
3
|
*
|
|
4
|
-
* Combines
|
|
5
|
-
*
|
|
6
|
-
*
|
|
7
|
-
*
|
|
8
|
-
*
|
|
9
|
-
* 2. SEMANTIC SEARCH (sqlite-vec + embeddings)
|
|
10
|
-
* → Finds by meaning. "dark mode" matches "night theme".
|
|
11
|
-
*
|
|
12
|
-
* 3. HYBRID = keyword + semantic merged
|
|
13
|
-
* → Keyword matches get a +0.2 score boost on top of semantic score.
|
|
14
|
-
* → Best of both worlds.
|
|
4
|
+
* Combines keyword and semantic searches, integrates temporal decay,
|
|
5
|
+
* applies agent reputation scores, generates cryptographic search attestations,
|
|
6
|
+
* builds graph-hopped optimized LLM context prompts, and applies MMR
|
|
7
|
+
* for diverse result retrieval.
|
|
15
8
|
*/
|
|
16
9
|
|
|
17
|
-
import {
|
|
18
|
-
import {
|
|
10
|
+
import db, {
|
|
19
11
|
searchKeyword,
|
|
20
12
|
searchVector,
|
|
21
13
|
getMemoryById,
|
|
22
|
-
boostMemory
|
|
14
|
+
boostMemory,
|
|
15
|
+
getProvenance,
|
|
16
|
+
getMemoriesByEntity
|
|
23
17
|
} from './database.js';
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
// ============================================================
|
|
18
|
+
import { generateEmbedding } from './embeddings.js';
|
|
19
|
+
import { createAttestation } from './attestation.js';
|
|
20
|
+
import { searchCache, LRUCache } from './cache.js';
|
|
28
21
|
|
|
29
22
|
/**
|
|
30
23
|
* Search memories using both keyword and semantic strategies.
|
|
31
|
-
*
|
|
32
|
-
* How it works:
|
|
33
|
-
* 1. Run FTS5 keyword search → get matching memory IDs
|
|
34
|
-
* 2. Run vector semantic search → get memories ranked by meaning
|
|
35
|
-
* 3. If a memory appears in BOTH, boost its score by +0.2
|
|
36
|
-
* 4. Sort by combined score, return top N
|
|
24
|
+
* Results are cached in the LRU cache for repeated queries.
|
|
37
25
|
*
|
|
38
26
|
* @param {string} queryText - What to search for
|
|
39
27
|
* @param {number} limit - Max results to return (default: 5)
|
|
40
|
-
* @
|
|
41
|
-
*
|
|
42
|
-
* @
|
|
43
|
-
* const results = await searchHybrid("night theme", 5);
|
|
44
|
-
* // Will find memories about "dark mode" via semantic match
|
|
28
|
+
* @param {string|null} agentId - Identifying string for the querying agent
|
|
29
|
+
* @param {string|null} sessionId - Session identifier
|
|
30
|
+
* @returns {Promise<Array>} Ranked search results (with .attestation property attached)
|
|
45
31
|
*/
|
|
46
|
-
export async function searchHybrid(queryText, limit = 5) {
|
|
32
|
+
export async function searchHybrid(queryText, limit = 5, agentId = null, sessionId = null) {
|
|
33
|
+
// --- Check LRU cache first (Feature 1) ---
|
|
34
|
+
const cacheKey = LRUCache.key(queryText, limit);
|
|
35
|
+
const cached = searchCache.get(cacheKey);
|
|
36
|
+
if (cached) {
|
|
37
|
+
console.error(`[persyst-cache] Cache HIT for query: "${queryText.slice(0, 50)}..."`);
|
|
38
|
+
return cached;
|
|
39
|
+
}
|
|
40
|
+
|
|
47
41
|
// --- Step 1: Keyword search (fast, exact matches) ---
|
|
48
42
|
const keywordHits = searchKeyword(queryText, limit * 2);
|
|
49
43
|
const keywordIds = new Set(keywordHits.map(r => r.id));
|
|
@@ -56,17 +50,22 @@ export async function searchHybrid(queryText, limit = 5) {
|
|
|
56
50
|
id: r.rowid,
|
|
57
51
|
distance: r.distance,
|
|
58
52
|
// Convert L2 distance to 0-1 similarity score
|
|
59
|
-
// For normalized vectors: cosine_sim = 1 - (L2_distance² / 2)
|
|
60
53
|
similarity: Math.max(0, 1 - (r.distance * r.distance) / 2)
|
|
61
54
|
}));
|
|
62
55
|
|
|
63
56
|
// --- Step 3: Merge results with keyword boost ---
|
|
64
|
-
const combined = semanticResults
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
57
|
+
const combined = semanticResults
|
|
58
|
+
.map(r => {
|
|
59
|
+
const isKeywordMatch = keywordIds.has(r.id);
|
|
60
|
+
return {
|
|
61
|
+
id: r.id,
|
|
62
|
+
similarity: r.similarity,
|
|
63
|
+
hybrid_score: r.similarity + (isKeywordMatch ? 0.2 : 0),
|
|
64
|
+
keyword_match: isKeywordMatch
|
|
65
|
+
};
|
|
66
|
+
})
|
|
67
|
+
// Filter out low similarity semantic matches if they have no keyword match (threshold 0.35)
|
|
68
|
+
.filter(r => r.keyword_match || r.similarity >= 0.35);
|
|
70
69
|
|
|
71
70
|
// Add keyword-only hits that semantic search missed
|
|
72
71
|
const semanticIds = new Set(semanticResults.map(r => r.id));
|
|
@@ -81,29 +80,356 @@ export async function searchHybrid(queryText, limit = 5) {
|
|
|
81
80
|
}
|
|
82
81
|
}
|
|
83
82
|
|
|
84
|
-
// --- Step 4:
|
|
85
|
-
|
|
86
|
-
const topResults = combined.slice(0, limit);
|
|
87
|
-
|
|
88
|
-
const results = topResults
|
|
83
|
+
// --- Step 4: Fetch full details, apply reputation adjust, sort and return top N ---
|
|
84
|
+
const finalResults = combined
|
|
89
85
|
.map(r => {
|
|
90
86
|
const memory = getMemoryById(r.id);
|
|
91
|
-
if (!memory) return null;
|
|
87
|
+
if (!memory) return null; // Memory was archived or deleted
|
|
92
88
|
|
|
93
|
-
// Boost
|
|
89
|
+
// Boost memory access metrics
|
|
94
90
|
boostMemory(r.id);
|
|
95
91
|
|
|
92
|
+
// Fetch reputation stats for weighting
|
|
93
|
+
let reputationScore = 1.0;
|
|
94
|
+
let reputationWarning = false;
|
|
95
|
+
const prov = memory.provenance;
|
|
96
|
+
if (prov && prov.source_type === 'agent' && prov.source_id) {
|
|
97
|
+
const agentRow = db.prepare('SELECT reputation_score FROM agent_stats WHERE agent_id = ?').get(prov.source_id);
|
|
98
|
+
if (agentRow) {
|
|
99
|
+
reputationScore = agentRow.reputation_score;
|
|
100
|
+
if (reputationScore < 0.5) {
|
|
101
|
+
reputationWarning = true;
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
// Final score formula: base_score * agent_reputation
|
|
107
|
+
const finalScore = r.hybrid_score * reputationScore;
|
|
108
|
+
|
|
96
109
|
return {
|
|
97
110
|
id: memory.id,
|
|
98
111
|
content: memory.content,
|
|
99
112
|
importance_score: memory.importance_score,
|
|
100
113
|
created_at: memory.created_at,
|
|
114
|
+
last_accessed: memory.last_accessed,
|
|
101
115
|
similarity: r.similarity.toFixed(4),
|
|
102
|
-
hybrid_score:
|
|
103
|
-
keyword_match: r.keyword_match
|
|
116
|
+
hybrid_score: finalScore.toFixed(4),
|
|
117
|
+
keyword_match: r.keyword_match,
|
|
118
|
+
reputation_warning: reputationWarning,
|
|
119
|
+
provenance: prov
|
|
104
120
|
};
|
|
105
121
|
})
|
|
106
|
-
.filter(Boolean);
|
|
122
|
+
.filter(Boolean);
|
|
123
|
+
|
|
124
|
+
// Sort by final score descending
|
|
125
|
+
finalResults.sort((a, b) => parseFloat(b.hybrid_score) - parseFloat(a.hybrid_score));
|
|
126
|
+
|
|
127
|
+
// --- Step 5: Apply MMR for diverse retrieval (Feature 3) ---
|
|
128
|
+
const mmrResults = applyMMR(finalResults, limit);
|
|
129
|
+
|
|
130
|
+
// Generate cryptographic attestation for audit trails
|
|
131
|
+
const attestation = createAttestation(queryText, mmrResults, agentId, sessionId);
|
|
132
|
+
|
|
133
|
+
// Attach attestation object directly to the array to preserve compatibility with existing tests
|
|
134
|
+
mmrResults.attestation = attestation;
|
|
135
|
+
|
|
136
|
+
// --- Store in LRU cache (Feature 1) ---
|
|
137
|
+
searchCache.set(cacheKey, mmrResults);
|
|
138
|
+
|
|
139
|
+
return mmrResults;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
/**
|
|
143
|
+
* Apply Maximal Marginal Relevance (MMR) re-ranking for diverse results.
|
|
144
|
+
*
|
|
145
|
+
* MMR balances relevance with diversity by penalizing candidates that
|
|
146
|
+
* are too similar to already-selected results.
|
|
147
|
+
*
|
|
148
|
+
* @param {Array} candidates - Scored search results
|
|
149
|
+
* @param {number} limit - Max results to return
|
|
150
|
+
* @param {number} lambda - Trade-off parameter (0.7 = 70% relevance, 30% diversity)
|
|
151
|
+
* @returns {Array} MMR-reranked results
|
|
152
|
+
*/
|
|
153
|
+
function applyMMR(candidates, limit, lambda = 0.7) {
|
|
154
|
+
if (candidates.length <= limit) return candidates;
|
|
155
|
+
|
|
156
|
+
const selected = [];
|
|
157
|
+
const remaining = [...candidates];
|
|
158
|
+
|
|
159
|
+
// Always pick the top-scored result first
|
|
160
|
+
selected.push(remaining.shift());
|
|
161
|
+
|
|
162
|
+
while (selected.length < limit && remaining.length > 0) {
|
|
163
|
+
let bestIdx = -1;
|
|
164
|
+
let bestMMRScore = -Infinity;
|
|
165
|
+
|
|
166
|
+
for (let i = 0; i < remaining.length; i++) {
|
|
167
|
+
const candidate = remaining[i];
|
|
168
|
+
const relevance = parseFloat(candidate.hybrid_score);
|
|
169
|
+
|
|
170
|
+
// Calculate max similarity to any already-selected result
|
|
171
|
+
// Using content-based Jaccard similarity as a proxy
|
|
172
|
+
let maxSimToSelected = 0;
|
|
173
|
+
for (const sel of selected) {
|
|
174
|
+
const sim = jaccardSimilarity(candidate.content, sel.content);
|
|
175
|
+
if (sim > maxSimToSelected) maxSimToSelected = sim;
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
// MMR score = λ * relevance - (1 - λ) * max_similarity_to_selected
|
|
179
|
+
const mmrScore = lambda * relevance - (1 - lambda) * maxSimToSelected;
|
|
180
|
+
|
|
181
|
+
if (mmrScore > bestMMRScore) {
|
|
182
|
+
bestMMRScore = mmrScore;
|
|
183
|
+
bestIdx = i;
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
if (bestIdx >= 0) {
|
|
188
|
+
selected.push(remaining.splice(bestIdx, 1)[0]);
|
|
189
|
+
} else {
|
|
190
|
+
break;
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
return selected;
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
/**
|
|
198
|
+
* Compute Jaccard similarity between two text strings.
|
|
199
|
+
* Uses word-level tokenization for efficiency.
|
|
200
|
+
*
|
|
201
|
+
* @param {string} a - First text
|
|
202
|
+
* @param {string} b - Second text
|
|
203
|
+
* @returns {number} Similarity score between 0 and 1
|
|
204
|
+
*/
|
|
205
|
+
function jaccardSimilarity(a, b) {
|
|
206
|
+
const wordsA = new Set(a.toLowerCase().split(/\s+/));
|
|
207
|
+
const wordsB = new Set(b.toLowerCase().split(/\s+/));
|
|
208
|
+
|
|
209
|
+
let intersection = 0;
|
|
210
|
+
for (const word of wordsA) {
|
|
211
|
+
if (wordsB.has(word)) intersection++;
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
const union = wordsA.size + wordsB.size - intersection;
|
|
215
|
+
return union === 0 ? 0 : intersection / union;
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
/**
|
|
219
|
+
* Optimizes the retrieved context by walking the knowledge graph and compressing content to fit max_tokens.
|
|
220
|
+
*
|
|
221
|
+
* @param {string} queryText - User's query
|
|
222
|
+
* @param {number} maxTokens - Hard limit of tokens for context prompt
|
|
223
|
+
* @param {string|null} agentId - Querying agent identifier
|
|
224
|
+
* @param {string|null} sessionId - Current session ID
|
|
225
|
+
*/
|
|
226
|
+
export async function getOptimizedContext(queryText, maxTokens, agentId = null, sessionId = null) {
|
|
227
|
+
// 1. Run hybrid search to fetch top 20 memories
|
|
228
|
+
const searchHits = await searchHybrid(queryText, 20, agentId, sessionId);
|
|
229
|
+
const candidates = new Map();
|
|
230
|
+
|
|
231
|
+
for (const hit of searchHits) {
|
|
232
|
+
candidates.set(hit.id, {
|
|
233
|
+
id: hit.id,
|
|
234
|
+
content: hit.content,
|
|
235
|
+
importance_score: hit.importance_score,
|
|
236
|
+
created_at: hit.created_at,
|
|
237
|
+
last_accessed: hit.last_accessed,
|
|
238
|
+
score: parseFloat(hit.hybrid_score),
|
|
239
|
+
provenance: hit.provenance,
|
|
240
|
+
source: 'search'
|
|
241
|
+
});
|
|
242
|
+
|
|
243
|
+
// 2. Perform Graph Hop
|
|
244
|
+
const edges = db.prepare(`
|
|
245
|
+
SELECT * FROM edges
|
|
246
|
+
WHERE (source_id = ? AND source_type = 'memory')
|
|
247
|
+
OR (target_id = ? AND target_type = 'memory')
|
|
248
|
+
`).all(hit.id, hit.id);
|
|
249
|
+
|
|
250
|
+
const entityIds = [];
|
|
251
|
+
for (const edge of edges) {
|
|
252
|
+
if (edge.source_type === 'entity') entityIds.push(edge.source_id);
|
|
253
|
+
if (edge.target_type === 'entity') entityIds.push(edge.target_id);
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
for (const entId of entityIds) {
|
|
257
|
+
const otherMemories = getMemoriesByEntity(entId);
|
|
258
|
+
for (const other of otherMemories) {
|
|
259
|
+
if (other.id === hit.id) continue;
|
|
260
|
+
if (candidates.has(other.id)) continue;
|
|
261
|
+
|
|
262
|
+
const otherProv = getProvenance(other.id);
|
|
263
|
+
candidates.set(other.id, {
|
|
264
|
+
id: other.id,
|
|
265
|
+
content: other.content,
|
|
266
|
+
importance_score: other.importance_score,
|
|
267
|
+
created_at: other.created_at,
|
|
268
|
+
last_accessed: other.last_accessed,
|
|
269
|
+
score: parseFloat(hit.hybrid_score) * 0.5, // 50% graph-hop penalty
|
|
270
|
+
provenance: otherProv,
|
|
271
|
+
source: 'hop'
|
|
272
|
+
});
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
// 3. Apply Scoring Adjustments
|
|
278
|
+
const now = Math.floor(Date.now() / 1000);
|
|
279
|
+
const list = Array.from(candidates.values());
|
|
280
|
+
|
|
281
|
+
for (const c of list) {
|
|
282
|
+
// 3a. Temporal decay: score *= exp(-0.01 * hours_since_accessed)
|
|
283
|
+
const hours = Math.max(0, (now - c.last_accessed) / 3600);
|
|
284
|
+
c.score *= Math.exp(-0.01 * hours);
|
|
285
|
+
|
|
286
|
+
// 3b. Agent reputation weighting
|
|
287
|
+
let reputationScore = 1.0;
|
|
288
|
+
if (c.provenance && c.provenance.source_type === 'agent' && c.provenance.source_id) {
|
|
289
|
+
const agentRow = db.prepare('SELECT reputation_score FROM agent_stats WHERE agent_id = ?').get(c.provenance.source_id);
|
|
290
|
+
if (agentRow) {
|
|
291
|
+
reputationScore = agentRow.reputation_score;
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
c.score *= reputationScore;
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
// 4. Sort candidates
|
|
298
|
+
list.sort((a, b) => b.score - a.score);
|
|
299
|
+
|
|
300
|
+
// 5. Compress context to fit maxTokens
|
|
301
|
+
let currentTokens = 0;
|
|
302
|
+
const accepted = [];
|
|
303
|
+
|
|
304
|
+
for (const c of list) {
|
|
305
|
+
// Heuristic: ~4 characters per token + format headers (~15 tokens)
|
|
306
|
+
const estimatedTokens = Math.max(1, Math.ceil(c.content.length / 4) + 15);
|
|
307
|
+
if (currentTokens + estimatedTokens > maxTokens) {
|
|
308
|
+
continue;
|
|
309
|
+
}
|
|
310
|
+
currentTokens += estimatedTokens;
|
|
311
|
+
accepted.push(c);
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
// 6. Format LLM injection context string
|
|
315
|
+
let context = '=== RETRIEVED AGENT MEMORY CONTEXT ===\n';
|
|
316
|
+
if (accepted.length === 0) {
|
|
317
|
+
context += 'No relevant memories retrieved.\n';
|
|
318
|
+
} else {
|
|
319
|
+
for (const a of accepted) {
|
|
320
|
+
let sourceTag = 'Source: manual';
|
|
321
|
+
if (a.provenance) {
|
|
322
|
+
sourceTag = `Source: ${a.provenance.source_type}${a.provenance.source_id ? ` (${a.provenance.source_id})` : ''}`;
|
|
323
|
+
}
|
|
324
|
+
context += `[Memory #${a.id}] (Score: ${a.score.toFixed(4)}, ${sourceTag})\n${a.content}\n---\n`;
|
|
325
|
+
}
|
|
326
|
+
}
|
|
327
|
+
context += '=== END OF CONTEXT ===';
|
|
328
|
+
|
|
329
|
+
// Bug 8 fix: Skip attestation when no results to avoid audit noise
|
|
330
|
+
let attestation = null;
|
|
331
|
+
if (accepted.length > 0) {
|
|
332
|
+
attestation = createAttestation(queryText, accepted, agentId, sessionId);
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
return {
|
|
336
|
+
context,
|
|
337
|
+
memories: accepted,
|
|
338
|
+
attestation
|
|
339
|
+
};
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
/**
|
|
343
|
+
* Performs memory consolidation by merging highly similar memories.
|
|
344
|
+
* Bug 6 fix: DB mutations are wrapped in a transaction for atomicity.
|
|
345
|
+
*/
|
|
346
|
+
export async function consolidateMemories() {
|
|
347
|
+
const activeMemories = db.prepare('SELECT * FROM memories WHERE valid_until IS NULL').all();
|
|
348
|
+
const consolidated = [];
|
|
349
|
+
const visited = new Set();
|
|
350
|
+
|
|
351
|
+
// Pre-compile the transaction for atomic DB operations (Bug 6 fix)
|
|
352
|
+
const archiveAndMerge = db.transaction((canonicalId, mergedContent, dupIds) => {
|
|
353
|
+
// Update canonical memory with merged content
|
|
354
|
+
db.prepare('UPDATE memories SET content = ?, last_accessed = unixepoch() WHERE id = ?').run(mergedContent, canonicalId);
|
|
355
|
+
|
|
356
|
+
// Archive duplicates
|
|
357
|
+
for (const dupId of dupIds) {
|
|
358
|
+
db.prepare('UPDATE memories SET valid_until = unixepoch() WHERE id = ?').run(dupId);
|
|
359
|
+
db.prepare('INSERT INTO contradictions (old_memory_id, new_memory_id, resolution_reason) VALUES (?, ?, ?)')
|
|
360
|
+
.run(dupId, canonicalId, `Consolidated into canonical memory #${canonicalId}`);
|
|
361
|
+
}
|
|
362
|
+
});
|
|
363
|
+
|
|
364
|
+
for (const mem of activeMemories) {
|
|
365
|
+
if (visited.has(mem.id)) continue;
|
|
366
|
+
|
|
367
|
+
// Search for similar memories
|
|
368
|
+
const embedding = db.prepare('SELECT embedding FROM memories_vec WHERE rowid = ?').get(mem.id);
|
|
369
|
+
if (!embedding) continue;
|
|
370
|
+
|
|
371
|
+
// sqlite-vec similarity search
|
|
372
|
+
const hits = db.prepare(`
|
|
373
|
+
SELECT rowid AS id, distance
|
|
374
|
+
FROM memories_vec
|
|
375
|
+
WHERE embedding MATCH ?
|
|
376
|
+
AND k = 10
|
|
377
|
+
`).all(embedding.embedding);
|
|
378
|
+
|
|
379
|
+
const duplicates = [];
|
|
380
|
+
for (const hit of hits) {
|
|
381
|
+
if (Number(hit.id) === mem.id) continue;
|
|
382
|
+
if (visited.has(Number(hit.id))) continue;
|
|
383
|
+
|
|
384
|
+
const sim = Math.max(0, 1 - (hit.distance * hit.distance) / 2);
|
|
385
|
+
if (sim > 0.85) {
|
|
386
|
+
const dupMemory = db.prepare('SELECT * FROM memories WHERE id = ? AND valid_until IS NULL').get(Number(hit.id));
|
|
387
|
+
if (dupMemory) {
|
|
388
|
+
duplicates.push(dupMemory);
|
|
389
|
+
}
|
|
390
|
+
}
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
if (duplicates.length > 0) {
|
|
394
|
+
// Group found! Merge them.
|
|
395
|
+
const allMemoriesInGroup = [mem, ...duplicates];
|
|
396
|
+
|
|
397
|
+
// Sort by importance to pick canonical
|
|
398
|
+
allMemoriesInGroup.sort((a, b) => b.importance_score - a.importance_score);
|
|
399
|
+
const canonical = allMemoriesInGroup[0];
|
|
400
|
+
const dupesToArchive = allMemoriesInGroup.slice(1);
|
|
401
|
+
|
|
402
|
+
// Merge contents (unique sentences or concatenated text)
|
|
403
|
+
const contents = allMemoriesInGroup.map(m => m.content.trim());
|
|
404
|
+
const uniqueContents = Array.from(new Set(contents));
|
|
405
|
+
const mergedContent = uniqueContents.join('. ').replace(/\.\./g, '.');
|
|
406
|
+
|
|
407
|
+
// Generate new embedding OUTSIDE the transaction (async operation)
|
|
408
|
+
const newEmbedding = await generateEmbedding(mergedContent);
|
|
409
|
+
|
|
410
|
+
// Run atomic DB transaction for all mutations (Bug 6 fix)
|
|
411
|
+
archiveAndMerge(canonical.id, mergedContent, dupesToArchive.map(d => d.id));
|
|
412
|
+
|
|
413
|
+
// Update vector embedding (also outside transaction since vec0 tables have their own handling)
|
|
414
|
+
db.prepare('DELETE FROM memories_vec WHERE rowid = ?').run(canonical.id);
|
|
415
|
+
db.prepare('INSERT INTO memories_vec (rowid, embedding) VALUES (?, ?)').run(BigInt(canonical.id), Buffer.from(newEmbedding.buffer));
|
|
416
|
+
|
|
417
|
+
for (const dup of dupesToArchive) {
|
|
418
|
+
visited.add(dup.id);
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
visited.add(canonical.id);
|
|
422
|
+
consolidated.push({
|
|
423
|
+
canonical_id: canonical.id,
|
|
424
|
+
merged_content: mergedContent,
|
|
425
|
+
archived_ids: dupesToArchive.map(d => d.id)
|
|
426
|
+
});
|
|
427
|
+
}
|
|
428
|
+
}
|
|
107
429
|
|
|
108
|
-
return
|
|
430
|
+
return {
|
|
431
|
+
success: true,
|
|
432
|
+
consolidated_groups: consolidated.length,
|
|
433
|
+
details: consolidated
|
|
434
|
+
};
|
|
109
435
|
}
|
package/src/server.js
CHANGED
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
*
|
|
4
4
|
* Creates the MCP server, registers all tools, and connects
|
|
5
5
|
* via stdio transport (the standard MCP communication method).
|
|
6
|
+
* Sets up hourly temporal decay and daily consolidation background tasks.
|
|
6
7
|
*
|
|
7
8
|
* IMPORTANT: Never write to stdout — it's reserved for MCP protocol.
|
|
8
9
|
* All logging goes to stderr via console.error().
|
|
@@ -10,8 +11,9 @@
|
|
|
10
11
|
|
|
11
12
|
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
|
|
12
13
|
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
|
|
13
|
-
import { registerTools } from './tools.js';
|
|
14
|
+
import { registerTools, cleanupWatchers } from './tools.js';
|
|
14
15
|
import { applyTemporalDecay, closeDatabase } from './database.js';
|
|
16
|
+
import { consolidateMemories } from './search.js';
|
|
15
17
|
|
|
16
18
|
/**
|
|
17
19
|
* Start the Persyst MCP server.
|
|
@@ -21,7 +23,7 @@ export async function startServer() {
|
|
|
21
23
|
// --- Create MCP server ---
|
|
22
24
|
const server = new McpServer({
|
|
23
25
|
name: 'persyst',
|
|
24
|
-
version: '
|
|
26
|
+
version: '2.0.0'
|
|
25
27
|
});
|
|
26
28
|
|
|
27
29
|
// --- Register all tools ---
|
|
@@ -32,10 +34,24 @@ export async function startServer() {
|
|
|
32
34
|
// Runs every hour: reduces importance of memories not accessed in 7+ days
|
|
33
35
|
const decayTimer = setInterval(applyTemporalDecay, 3600000);
|
|
34
36
|
|
|
35
|
-
// ---
|
|
37
|
+
// --- Start daily consolidation sweep ---
|
|
38
|
+
// Runs every 24 hours: merges similar memories (similarity > 0.85)
|
|
39
|
+
const consolidationTimer = setInterval(async () => {
|
|
40
|
+
console.error('[persyst] Running scheduled daily memory consolidation sweep...');
|
|
41
|
+
try {
|
|
42
|
+
const report = await consolidateMemories();
|
|
43
|
+
console.error(`[persyst] Consolidation sweep completed: consolidated ${report.consolidated_groups} duplicate groups.`);
|
|
44
|
+
} catch (err) {
|
|
45
|
+
console.error('[persyst] Daily consolidation sweep failed:', err.message);
|
|
46
|
+
}
|
|
47
|
+
}, 86400000);
|
|
48
|
+
|
|
49
|
+
// --- Graceful shutdown (Bug 3 fix: also cleans up git watchers) ---
|
|
36
50
|
const shutdown = () => {
|
|
37
51
|
console.error('[persyst] Shutting down...');
|
|
38
52
|
clearInterval(decayTimer);
|
|
53
|
+
clearInterval(consolidationTimer);
|
|
54
|
+
cleanupWatchers(); // Bug 3 fix: stop all git repo watchers
|
|
39
55
|
closeDatabase();
|
|
40
56
|
process.exit(0);
|
|
41
57
|
};
|
|
@@ -43,7 +59,6 @@ export async function startServer() {
|
|
|
43
59
|
process.on('SIGTERM', shutdown);
|
|
44
60
|
|
|
45
61
|
// --- Connect via stdio ---
|
|
46
|
-
// This is how Claude Code, Cursor, and Aider communicate with us
|
|
47
62
|
const transport = new StdioServerTransport();
|
|
48
63
|
await server.connect(transport);
|
|
49
64
|
|