persyst-mcp 2.2.5 → 2.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +25 -2
- package/bin/init.js +168 -32
- package/bin/mcp.js +7 -0
- package/index.js +31 -11
- package/package.json +6 -11
- package/src/attestation.js +49 -28
- package/src/database.js +223 -32
- package/src/extractor-heuristic.js +5 -2
- package/src/sdk.js +4 -3
- package/src/search.js +54 -83
- package/src/server.js +856 -723
- package/src/setup-wasm.js +34 -39
- package/src/text-utils.js +41 -0
- package/src/tools.js +49 -45
- package/src/watcher.js +147 -38
package/src/setup-wasm.js
CHANGED
|
@@ -9,62 +9,57 @@ const require = createRequire(import.meta.url);
|
|
|
9
9
|
const onnxWebPath = require.resolve('onnxruntime-web');
|
|
10
10
|
const wasmDir = path.dirname(onnxWebPath);
|
|
11
11
|
|
|
12
|
-
// Redirect native Node session creation to WebAssembly session creation
|
|
13
12
|
ONNX_NODE.InferenceSession.create = ONNX_WEB.InferenceSession.create;
|
|
14
13
|
|
|
15
|
-
// Override URL.createObjectURL to return file URL of the existing local file
|
|
16
14
|
const originalCreateObjectURL = URL.createObjectURL;
|
|
17
|
-
|
|
15
|
+
const patchedCreateObjectURL = (blob) => {
|
|
18
16
|
const type = blob.type || '';
|
|
19
17
|
if (type.includes('javascript') || type.includes('mjs')) {
|
|
20
18
|
const filePath = path.join(wasmDir, 'ort-wasm-simd-threaded.asyncify.mjs');
|
|
21
|
-
|
|
22
|
-
return fileUrl;
|
|
19
|
+
return pathToFileURL(filePath).href;
|
|
23
20
|
}
|
|
24
21
|
return originalCreateObjectURL(blob);
|
|
25
22
|
};
|
|
23
|
+
URL.createObjectURL = patchedCreateObjectURL;
|
|
24
|
+
|
|
25
|
+
function readLocalFile(filePath, urlStr) {
|
|
26
|
+
const normalized = path.normalize(filePath);
|
|
27
|
+
const buffer = fs.readFileSync(normalized);
|
|
28
|
+
let contentType = 'application/octet-stream';
|
|
29
|
+
if (normalized.endsWith('.wasm')) contentType = 'application/wasm';
|
|
30
|
+
else if (normalized.endsWith('.mjs') || normalized.endsWith('.js')) contentType = 'text/javascript';
|
|
31
|
+
else if (normalized.endsWith('.onnx') || normalized.endsWith('.ort')) contentType = 'application/octet-stream';
|
|
32
|
+
return new Response(buffer, {
|
|
33
|
+
status: 200,
|
|
34
|
+
statusText: 'OK',
|
|
35
|
+
headers: { 'Content-Type': contentType }
|
|
36
|
+
});
|
|
37
|
+
}
|
|
26
38
|
|
|
27
|
-
// Override global fetch to load ONNX WASM binaries and model files from local disk
|
|
28
39
|
const originalFetch = globalThis.fetch;
|
|
29
|
-
|
|
40
|
+
const patchedFetch = async (url, options) => {
|
|
30
41
|
const urlStr = typeof url === 'string' ? url : url.url;
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
let filePath = '';
|
|
34
|
-
|
|
35
|
-
if (urlStr.startsWith('file://')) {
|
|
36
|
-
isLocal = true;
|
|
37
|
-
filePath = fileURLToPath(urlStr);
|
|
38
|
-
} else if (!urlStr.startsWith('http://') && !urlStr.startsWith('https://') && !urlStr.startsWith('data:')) {
|
|
39
|
-
isLocal = true;
|
|
40
|
-
filePath = urlStr;
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
// Intercept onnxruntime-web CDN URLs and route them to node_modules/onnxruntime-web/dist
|
|
42
|
+
|
|
43
|
+
// onnxruntime-web WASM binaries — resolve from node_modules
|
|
44
44
|
if (urlStr.includes('onnxruntime-web') || urlStr.includes('ort-wasm')) {
|
|
45
|
-
isLocal = true;
|
|
46
45
|
const filename = urlStr.split('/').pop().split('?')[0].split('#')[0];
|
|
47
|
-
|
|
46
|
+
return readLocalFile(path.join(wasmDir, filename), urlStr);
|
|
48
47
|
}
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
48
|
+
|
|
49
|
+
// file:// URLs — Node.js fetch does not support them natively
|
|
50
|
+
if (urlStr.startsWith('file://')) {
|
|
51
|
+
return readLocalFile(fileURLToPath(urlStr), urlStr);
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
// fallback for any non-http/https/data URL (onnxruntime internal schemes, bare paths)
|
|
55
|
+
if (!urlStr.startsWith('http://') && !urlStr.startsWith('https://') && !urlStr.startsWith('data:')) {
|
|
52
56
|
try {
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
else if (filePath.endsWith('.mjs') || filePath.endsWith('.js')) contentType = 'text/javascript';
|
|
57
|
-
|
|
58
|
-
return new Response(buffer, {
|
|
59
|
-
status: 200,
|
|
60
|
-
statusText: 'OK',
|
|
61
|
-
headers: { 'Content-Type': contentType }
|
|
62
|
-
});
|
|
63
|
-
} catch (err) {
|
|
64
|
-
console.error('[persyst] Failed to read local file:', filePath, err.message);
|
|
65
|
-
throw err;
|
|
57
|
+
return readLocalFile(urlStr, urlStr);
|
|
58
|
+
} catch (e) {
|
|
59
|
+
throw e;
|
|
66
60
|
}
|
|
67
61
|
}
|
|
68
|
-
|
|
62
|
+
|
|
69
63
|
return originalFetch(url, options);
|
|
70
64
|
};
|
|
65
|
+
globalThis.fetch = patchedFetch;
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* text-utils.js — Shared text-processing helpers used across Persyst.
|
|
3
|
+
*
|
|
4
|
+
* Keeping these in one place avoids duplicated logic and divergent behavior
|
|
5
|
+
* between modules.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Compute Jaccard similarity between two text strings.
|
|
10
|
+
* Uses word-level tokenization for efficiency.
|
|
11
|
+
*
|
|
12
|
+
* @param {string} a - First text
|
|
13
|
+
* @param {string} b - Second text
|
|
14
|
+
* @returns {number} Similarity score between 0 and 1
|
|
15
|
+
*/
|
|
16
|
+
export function jaccardSimilarity(a, b) {
|
|
17
|
+
if (!a || !b) return 0;
|
|
18
|
+
|
|
19
|
+
const wordsA = new Set(a.toLowerCase().split(/\s+/));
|
|
20
|
+
const wordsB = new Set(b.toLowerCase().split(/\s+/));
|
|
21
|
+
|
|
22
|
+
let intersection = 0;
|
|
23
|
+
for (const word of wordsA) {
|
|
24
|
+
if (wordsB.has(word)) intersection++;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
const union = wordsA.size + wordsB.size - intersection;
|
|
28
|
+
return union === 0 ? 0 : intersection / union;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* Compute Jaccard distance between two text strings.
|
|
33
|
+
* Distance = 1 - similarity, so 0 means identical and 1 means completely different.
|
|
34
|
+
*
|
|
35
|
+
* @param {string} a - First text
|
|
36
|
+
* @param {string} b - Second text
|
|
37
|
+
* @returns {number} Distance score between 0 and 1
|
|
38
|
+
*/
|
|
39
|
+
export function jaccardDistance(a, b) {
|
|
40
|
+
return 1 - jaccardSimilarity(a, b);
|
|
41
|
+
}
|
package/src/tools.js
CHANGED
|
@@ -14,8 +14,10 @@
|
|
|
14
14
|
import { z } from 'zod';
|
|
15
15
|
import { generateEmbedding } from './embeddings.js';
|
|
16
16
|
import db, {
|
|
17
|
+
stmts,
|
|
17
18
|
insertMemory,
|
|
18
19
|
insertVector,
|
|
20
|
+
redactSecrets,
|
|
19
21
|
getMemory,
|
|
20
22
|
updateMemoryContent,
|
|
21
23
|
deleteMemory,
|
|
@@ -45,6 +47,7 @@ import db, {
|
|
|
45
47
|
getNamespaceStats
|
|
46
48
|
} from './database.js';
|
|
47
49
|
import { searchHybrid, getOptimizedContext, consolidateMemories } from './search.js';
|
|
50
|
+
import { jaccardDistance } from './text-utils.js';
|
|
48
51
|
import { getRecentCommits } from './git.js';
|
|
49
52
|
import { verifyChainIntegrity } from './attestation.js';
|
|
50
53
|
import { searchCache } from './cache.js';
|
|
@@ -109,8 +112,11 @@ export async function addMemoryInternal({ content, importance = 1.0, agent_id, s
|
|
|
109
112
|
try {
|
|
110
113
|
const normalizedAgentId = agent_id ? agent_id.toLowerCase() : null;
|
|
111
114
|
|
|
115
|
+
// Redact secrets/credentials on write
|
|
116
|
+
const redactedContent = redactSecrets(content);
|
|
117
|
+
|
|
112
118
|
// Bug 7 + Feature 4: Validate content size
|
|
113
|
-
const validation = validateMemoryContent(
|
|
119
|
+
const validation = validateMemoryContent(redactedContent);
|
|
114
120
|
if (!validation.valid) {
|
|
115
121
|
return { error: validation.error };
|
|
116
122
|
}
|
|
@@ -119,14 +125,13 @@ export async function addMemoryInternal({ content, importance = 1.0, agent_id, s
|
|
|
119
125
|
const namespace = (shared || !normalizedAgentId) ? 'shared' : normalizedAgentId;
|
|
120
126
|
|
|
121
127
|
// Deduplication check (namespace-aware)
|
|
122
|
-
const existing = getMemoryByContent(
|
|
128
|
+
const existing = getMemoryByContent(redactedContent, namespace);
|
|
123
129
|
if (existing) {
|
|
124
130
|
// Re-attribute provenance to the calling agent if it was previously auto-attributed to log-watcher
|
|
125
131
|
const prov = getProvenance(existing.id);
|
|
126
132
|
if (prov && (prov.source_id === 'antigravity-worker' || prov.source_id === 'user-dialogue') && normalizedAgentId) {
|
|
127
133
|
try {
|
|
128
|
-
|
|
129
|
-
.run(normalizedAgentId, existing.id);
|
|
134
|
+
stmts.updateProvenanceOwner.run(normalizedAgentId, existing.id);
|
|
130
135
|
incrementAgentStat(normalizedAgentId, 'created');
|
|
131
136
|
} catch (e) {
|
|
132
137
|
console.error(`[persyst] Re-attribute provenance error: ${e.message}`);
|
|
@@ -141,20 +146,20 @@ export async function addMemoryInternal({ content, importance = 1.0, agent_id, s
|
|
|
141
146
|
};
|
|
142
147
|
}
|
|
143
148
|
|
|
144
|
-
const id = insertMemory(
|
|
149
|
+
const id = insertMemory(redactedContent, importance, {
|
|
145
150
|
source_type: normalizedAgentId ? 'agent' : 'manual',
|
|
146
151
|
source_id: normalizedAgentId,
|
|
147
152
|
confidence: 1.0
|
|
148
153
|
}, namespace);
|
|
149
154
|
|
|
150
|
-
const embedding = await generateEmbedding(
|
|
155
|
+
const embedding = await generateEmbedding(redactedContent);
|
|
151
156
|
insertVector(id, embedding);
|
|
152
157
|
|
|
153
158
|
// Feature 1: Invalidate search cache on write
|
|
154
159
|
searchCache.invalidate();
|
|
155
160
|
|
|
156
161
|
// Broadcast to SSE subscribers (HTTP gateway + SSE clients)
|
|
157
|
-
memoryEventBus.emit('memory_added', { id, content, namespace, source: normalizedAgentId || 'manual' });
|
|
162
|
+
memoryEventBus.emit('memory_added', { id, content: redactedContent, namespace, source: normalizedAgentId || 'manual' });
|
|
158
163
|
|
|
159
164
|
// Feature 2: Contradiction Detection
|
|
160
165
|
let contradictions = [];
|
|
@@ -169,20 +174,20 @@ export async function addMemoryInternal({ content, importance = 1.0, agent_id, s
|
|
|
169
174
|
const existingMemory = getMemoryById(hitId, namespace);
|
|
170
175
|
if (!existingMemory) continue;
|
|
171
176
|
|
|
172
|
-
const jaccard = jaccardDistance(
|
|
177
|
+
const jaccard = jaccardDistance(redactedContent, existingMemory.content);
|
|
173
178
|
// Contradiction: similar topic (high similarity), but differing key terms
|
|
174
179
|
if (jaccard > 0 && jaccard < 0.65) {
|
|
175
180
|
// Fetch provenances for trust calculation
|
|
176
181
|
const oldProv = getProvenance(hitId);
|
|
177
182
|
let oldReputation = 1.0;
|
|
178
183
|
if (oldProv && oldProv.source_type === 'agent' && oldProv.source_id) {
|
|
179
|
-
const agentRow =
|
|
184
|
+
const agentRow = stmts.getReputationScore.get(oldProv.source_id);
|
|
180
185
|
if (agentRow) oldReputation = agentRow.reputation_score;
|
|
181
186
|
}
|
|
182
187
|
|
|
183
188
|
let newReputation = 1.0;
|
|
184
189
|
if (normalizedAgentId) {
|
|
185
|
-
const agentRow =
|
|
190
|
+
const agentRow = stmts.getReputationScore.get(normalizedAgentId);
|
|
186
191
|
if (agentRow) newReputation = agentRow.reputation_score;
|
|
187
192
|
}
|
|
188
193
|
|
|
@@ -191,7 +196,11 @@ export async function addMemoryInternal({ content, importance = 1.0, agent_id, s
|
|
|
191
196
|
|
|
192
197
|
const isSelfUpdate = oldProv && oldProv.source_type === 'agent' && oldProv.source_id === normalizedAgentId;
|
|
193
198
|
|
|
194
|
-
if (isSelfUpdate
|
|
199
|
+
if (isSelfUpdate) {
|
|
200
|
+
continue; // Same agent: treat as complementary, not contradictory
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
if (trustNew > trustOld) {
|
|
195
204
|
// New is preferred
|
|
196
205
|
logContradiction(hitId, id, `Auto-detected contradiction: new memory is more trustworthy (similarity: ${sim.toFixed(3)}, content_diff: ${jaccard.toFixed(3)})`);
|
|
197
206
|
contradictions.push({
|
|
@@ -320,11 +329,13 @@ export function registerTools(server) {
|
|
|
320
329
|
'get_memory',
|
|
321
330
|
'Get a specific memory by its ID. Boosts its importance automatically.',
|
|
322
331
|
{
|
|
323
|
-
id: z.number().describe('Memory ID to retrieve')
|
|
332
|
+
id: z.number().describe('Memory ID to retrieve'),
|
|
333
|
+
agent_id: z.string().optional().describe('Agent ID — restricts access to this agent\'s namespace + shared')
|
|
324
334
|
},
|
|
325
|
-
async ({ id }) => {
|
|
335
|
+
async ({ id, agent_id }) => {
|
|
326
336
|
try {
|
|
327
|
-
const
|
|
337
|
+
const namespace = agent_id ? agent_id.toLowerCase() : null;
|
|
338
|
+
const memory = getMemory(id, namespace);
|
|
328
339
|
if (!memory) return text({ error: `Memory #${id} not found` });
|
|
329
340
|
return text(memory);
|
|
330
341
|
} catch (err) {
|
|
@@ -346,13 +357,17 @@ export function registerTools(server) {
|
|
|
346
357
|
try {
|
|
347
358
|
const normalizedAgentId = agent_id ? agent_id.toLowerCase() : null;
|
|
348
359
|
|
|
360
|
+
// Redact secrets/credentials on update
|
|
361
|
+
const redactedContent = redactSecrets(content);
|
|
362
|
+
|
|
349
363
|
// Bug 7 + Feature 4: Validate content size
|
|
350
|
-
const validation = validateMemoryContent(
|
|
364
|
+
const validation = validateMemoryContent(redactedContent);
|
|
351
365
|
if (!validation.valid) {
|
|
352
366
|
return text({ error: validation.error });
|
|
353
367
|
}
|
|
354
368
|
|
|
355
|
-
const
|
|
369
|
+
const namespace = normalizedAgentId;
|
|
370
|
+
const oldMemory = getMemory(id, namespace);
|
|
356
371
|
if (!oldMemory) return text({ error: `Memory #${id} not found` });
|
|
357
372
|
|
|
358
373
|
// Retrieve old agent_id from provenance
|
|
@@ -361,7 +376,7 @@ export function registerTools(server) {
|
|
|
361
376
|
|
|
362
377
|
// Insert new version
|
|
363
378
|
const newId = insertMemory(
|
|
364
|
-
|
|
379
|
+
redactedContent,
|
|
365
380
|
oldMemory.importance_score,
|
|
366
381
|
{
|
|
367
382
|
source_type: resolvedAgentId ? 'agent' : 'manual',
|
|
@@ -372,7 +387,7 @@ export function registerTools(server) {
|
|
|
372
387
|
id
|
|
373
388
|
);
|
|
374
389
|
|
|
375
|
-
const embedding = await generateEmbedding(
|
|
390
|
+
const embedding = await generateEmbedding(redactedContent);
|
|
376
391
|
insertVector(newId, embedding);
|
|
377
392
|
|
|
378
393
|
// Record contradiction and archive the old one
|
|
@@ -381,6 +396,9 @@ export function registerTools(server) {
|
|
|
381
396
|
// Feature 1: Invalidate search cache on write
|
|
382
397
|
searchCache.invalidate();
|
|
383
398
|
|
|
399
|
+
// Broadcast update to SSE subscribers
|
|
400
|
+
memoryEventBus.emit('memory_updated', { old_id: id, new_id: newId, namespace: oldMemory.namespace || 'shared' });
|
|
401
|
+
|
|
384
402
|
return text({
|
|
385
403
|
success: true,
|
|
386
404
|
id: newId,
|
|
@@ -397,10 +415,15 @@ export function registerTools(server) {
|
|
|
397
415
|
'delete_memory',
|
|
398
416
|
'Permanently delete a memory by its ID.',
|
|
399
417
|
{
|
|
400
|
-
id: z.number().describe('Memory ID to delete')
|
|
418
|
+
id: z.number().describe('Memory ID to delete'),
|
|
419
|
+
agent_id: z.string().optional().describe('Agent ID — restricts deletion to this agent\'s namespace + shared')
|
|
401
420
|
},
|
|
402
|
-
async ({ id }) => {
|
|
421
|
+
async ({ id, agent_id }) => {
|
|
403
422
|
try {
|
|
423
|
+
const namespace = agent_id ? agent_id.toLowerCase() : null;
|
|
424
|
+
const memory = getMemory(id, namespace);
|
|
425
|
+
if (!memory) return text({ error: `Memory #${id} not found` });
|
|
426
|
+
|
|
404
427
|
const deleted = deleteMemory(id);
|
|
405
428
|
if (!deleted) return text({ error: `Memory #${id} not found` });
|
|
406
429
|
|
|
@@ -408,7 +431,7 @@ export function registerTools(server) {
|
|
|
408
431
|
searchCache.invalidate();
|
|
409
432
|
|
|
410
433
|
// Broadcast deletion to SSE subscribers
|
|
411
|
-
memoryEventBus.emit('memory_deleted', { id });
|
|
434
|
+
memoryEventBus.emit('memory_deleted', { id, namespace: memory.namespace || 'shared' });
|
|
412
435
|
|
|
413
436
|
return text({ success: true, id, message: `Memory #${id} deleted` });
|
|
414
437
|
} catch (err) {
|
|
@@ -545,14 +568,16 @@ export function registerTools(server) {
|
|
|
545
568
|
{
|
|
546
569
|
entity_name: z.string().describe('Name of the entity'),
|
|
547
570
|
memory_id: z.number().describe('ID of the memory to link'),
|
|
548
|
-
relation: z.string().default('mentions').describe('Relationship type')
|
|
571
|
+
relation: z.string().default('mentions').describe('Relationship type'),
|
|
572
|
+
agent_id: z.string().optional().describe('Agent ID — restricts linking to this agent\'s namespace + shared')
|
|
549
573
|
},
|
|
550
|
-
async ({ entity_name, memory_id, relation }) => {
|
|
574
|
+
async ({ entity_name, memory_id, relation, agent_id }) => {
|
|
551
575
|
try {
|
|
576
|
+
const namespace = agent_id ? agent_id.toLowerCase() : null;
|
|
552
577
|
const entity = getEntityByName(entity_name);
|
|
553
578
|
if (!entity) return text({ error: `Entity "${entity_name}" not found.` });
|
|
554
579
|
|
|
555
|
-
const memory = getMemory(memory_id);
|
|
580
|
+
const memory = getMemory(memory_id, namespace);
|
|
556
581
|
if (!memory) return text({ error: `Memory #${memory_id} not found` });
|
|
557
582
|
|
|
558
583
|
insertEdge(entity.id, memory_id, relation, 'entity', 'memory');
|
|
@@ -830,27 +855,6 @@ function text(data) {
|
|
|
830
855
|
};
|
|
831
856
|
}
|
|
832
857
|
|
|
833
|
-
/**
|
|
834
|
-
* Compute Jaccard distance between two text strings.
|
|
835
|
-
* Used for contradiction detection — higher distance means more different content.
|
|
836
|
-
* @param {string} a - First text
|
|
837
|
-
* @param {string} b - Second text
|
|
838
|
-
* @returns {number} Distance score between 0 (identical) and 1 (completely different)
|
|
839
|
-
*/
|
|
840
|
-
function jaccardDistance(a, b) {
|
|
841
|
-
const wordsA = new Set(a.toLowerCase().split(/\s+/));
|
|
842
|
-
const wordsB = new Set(b.toLowerCase().split(/\s+/));
|
|
843
|
-
|
|
844
|
-
let intersection = 0;
|
|
845
|
-
for (const word of wordsA) {
|
|
846
|
-
if (wordsB.has(word)) intersection++;
|
|
847
|
-
}
|
|
848
|
-
|
|
849
|
-
const union = wordsA.size + wordsB.size - intersection;
|
|
850
|
-
if (union === 0) return 0;
|
|
851
|
-
return 1 - (intersection / union);
|
|
852
|
-
}
|
|
853
|
-
|
|
854
858
|
/**
|
|
855
859
|
* Compute word-level diff between two text strings using dynamic programming.
|
|
856
860
|
* Highlights additions as [+added+] and deletions as [-deleted-].
|
package/src/watcher.js
CHANGED
|
@@ -14,18 +14,20 @@ import {
|
|
|
14
14
|
upsertWatchPosition,
|
|
15
15
|
insertMemory,
|
|
16
16
|
insertVector,
|
|
17
|
-
memoryExists
|
|
17
|
+
memoryExists,
|
|
18
|
+
deleteMemory
|
|
18
19
|
} from './database.js';
|
|
19
20
|
import { generateEmbedding } from './embeddings.js';
|
|
20
|
-
import { extractHeuristic } from './extractor-heuristic.js';
|
|
21
|
+
import { extractHeuristic, hasExtractableSignals } from './extractor-heuristic.js';
|
|
21
22
|
import { searchHybrid } from './search.js';
|
|
22
23
|
import { searchCache } from './cache.js';
|
|
23
24
|
import { memoryEventBus } from './events.js';
|
|
25
|
+
import chokidar from 'chokidar';
|
|
24
26
|
|
|
25
|
-
// Config path: ~/.persyst/config.json
|
|
26
|
-
const CONFIG_FILE = join(homedir(), '.persyst', 'config.json');
|
|
27
|
+
// Config path: ~/.persyst/config.json (overridable for tests)
|
|
28
|
+
const CONFIG_FILE = process.env.PERSYST_CONFIG_FILE || join(homedir(), '.persyst', 'config.json');
|
|
27
29
|
|
|
28
|
-
let
|
|
30
|
+
let chokidarWatcher = null;
|
|
29
31
|
const DEDUP_THRESHOLD = 0.80;
|
|
30
32
|
|
|
31
33
|
/**
|
|
@@ -103,18 +105,31 @@ async function processJsonlFile(filePath) {
|
|
|
103
105
|
|
|
104
106
|
const lines = text.split('\n');
|
|
105
107
|
let addedCount = 0;
|
|
108
|
+
let processedOffset = lastPos;
|
|
106
109
|
|
|
107
|
-
for (
|
|
108
|
-
|
|
110
|
+
for (let i = 0; i < lines.length; i++) {
|
|
111
|
+
const line = lines[i];
|
|
112
|
+
const isLastLine = i === lines.length - 1;
|
|
113
|
+
|
|
114
|
+
// Empty trailing line after a newline is expected; skip it without treating it as partial.
|
|
115
|
+
if (!line.trim()) {
|
|
116
|
+
if (!isLastLine) processedOffset += line.length + 1;
|
|
117
|
+
continue;
|
|
118
|
+
}
|
|
109
119
|
|
|
110
120
|
let record;
|
|
111
121
|
try {
|
|
112
122
|
record = JSON.parse(line);
|
|
113
123
|
} catch (_) {
|
|
114
|
-
//
|
|
124
|
+
// If the last line fails to parse, it may be partially written. Leave processedOffset
|
|
125
|
+
// before this line so the next scan re-reads it from the start.
|
|
126
|
+
if (!isLastLine) processedOffset += line.length + 1;
|
|
115
127
|
continue;
|
|
116
128
|
}
|
|
117
129
|
|
|
130
|
+
// Commit the bytes for this line (including the newline that produced the split).
|
|
131
|
+
processedOffset += line.length + 1;
|
|
132
|
+
|
|
118
133
|
// Check if it's user prompt or assistant response
|
|
119
134
|
if (
|
|
120
135
|
record.content &&
|
|
@@ -122,7 +137,7 @@ async function processJsonlFile(filePath) {
|
|
|
122
137
|
) {
|
|
123
138
|
// Strip XML/markdown wrapper tags (like <USER_REQUEST> or <ADDITIONAL_METADATA>)
|
|
124
139
|
const cleanText = record.content.replace(/<[^>]+>[\s\S]*?<\/[^>]+>/g, '').trim();
|
|
125
|
-
if (cleanText.length < 15) continue;
|
|
140
|
+
if (cleanText.length < 15 || !hasExtractableSignals(cleanText)) continue;
|
|
126
141
|
|
|
127
142
|
const facts = extractHeuristic(cleanText);
|
|
128
143
|
for (const fact of facts) {
|
|
@@ -142,8 +157,16 @@ async function processJsonlFile(filePath) {
|
|
|
142
157
|
confidence: fact.confidence
|
|
143
158
|
});
|
|
144
159
|
|
|
145
|
-
|
|
146
|
-
|
|
160
|
+
try {
|
|
161
|
+
const embedding = await generateEmbedding(fact.content);
|
|
162
|
+
insertVector(id, embedding);
|
|
163
|
+
} catch (embedErr) {
|
|
164
|
+
console.error(`[persyst-watcher] Embedding failed for fact #${id}: ${embedErr.message}`);
|
|
165
|
+
// Clean up: delete the memory so we don't have orphaned entries
|
|
166
|
+
try { deleteMemory(id); } catch (_) {}
|
|
167
|
+
continue;
|
|
168
|
+
}
|
|
169
|
+
|
|
147
170
|
addedCount++;
|
|
148
171
|
console.error(`[persyst-watcher] Auto-extracted fact: "${fact.content}" (Memory #${id})`);
|
|
149
172
|
memoryEventBus.emit('memory_added', { id, content: fact.content, namespace: 'shared', source: 'watcher-antigravity' });
|
|
@@ -155,10 +178,13 @@ async function processJsonlFile(filePath) {
|
|
|
155
178
|
searchCache.invalidate();
|
|
156
179
|
}
|
|
157
180
|
|
|
158
|
-
// Persist
|
|
159
|
-
|
|
181
|
+
// Persist the byte offset up to the last successfully parsed complete line.
|
|
182
|
+
// Do not advance past an incomplete trailing line so it is re-read on the next scan.
|
|
183
|
+
upsertWatchPosition(filePath, processedOffset);
|
|
184
|
+
return addedCount;
|
|
160
185
|
} catch (err) {
|
|
161
186
|
console.error(`[persyst-watcher] Failed to process JSONL file ${filePath}: ${err.message}`);
|
|
187
|
+
return 0;
|
|
162
188
|
}
|
|
163
189
|
}
|
|
164
190
|
|
|
@@ -186,7 +212,7 @@ async function processJsonFile(filePath) {
|
|
|
186
212
|
// Process only newly added messages
|
|
187
213
|
for (let i = lastMsgCount; i < history.length; i++) {
|
|
188
214
|
const msg = history[i];
|
|
189
|
-
if (!msg.content || typeof msg.content !== 'string') continue;
|
|
215
|
+
if (!msg.content || typeof msg.content !== 'string' || !hasExtractableSignals(msg.content)) continue;
|
|
190
216
|
|
|
191
217
|
// Filter out system message structures
|
|
192
218
|
if (msg.role === 'user' || msg.role === 'assistant') {
|
|
@@ -208,8 +234,16 @@ async function processJsonFile(filePath) {
|
|
|
208
234
|
confidence: fact.confidence
|
|
209
235
|
});
|
|
210
236
|
|
|
211
|
-
|
|
212
|
-
|
|
237
|
+
try {
|
|
238
|
+
const embedding = await generateEmbedding(fact.content);
|
|
239
|
+
insertVector(id, embedding);
|
|
240
|
+
} catch (embedErr) {
|
|
241
|
+
console.error(`[persyst-watcher] Embedding failed for fact #${id}: ${embedErr.message}`);
|
|
242
|
+
// Clean up: delete the memory so we don't have orphaned entries
|
|
243
|
+
try { deleteMemory(id); } catch (_) {}
|
|
244
|
+
continue;
|
|
245
|
+
}
|
|
246
|
+
|
|
213
247
|
addedCount++;
|
|
214
248
|
console.error(`[persyst-watcher] Auto-extracted fact: "${fact.content}" (Memory #${id})`);
|
|
215
249
|
memoryEventBus.emit('memory_added', { id, content: fact.content, namespace: 'shared', source: 'watcher-roo' });
|
|
@@ -223,8 +257,10 @@ async function processJsonFile(filePath) {
|
|
|
223
257
|
|
|
224
258
|
// Persist message count index
|
|
225
259
|
upsertWatchPosition(filePath, history.length);
|
|
260
|
+
return addedCount;
|
|
226
261
|
} catch (err) {
|
|
227
262
|
console.error(`[persyst-watcher] Failed to process JSON file ${filePath}: ${err.message}`);
|
|
263
|
+
return 0;
|
|
228
264
|
}
|
|
229
265
|
}
|
|
230
266
|
|
|
@@ -263,6 +299,7 @@ function findFiles(dir, ext, depth = 3) {
|
|
|
263
299
|
*/
|
|
264
300
|
export async function scanDirectories() {
|
|
265
301
|
const watchDirs = loadWatchedDirs();
|
|
302
|
+
let totalAdded = 0;
|
|
266
303
|
|
|
267
304
|
for (const dir of watchDirs) {
|
|
268
305
|
if (!existsSync(dir)) continue;
|
|
@@ -270,7 +307,7 @@ export async function scanDirectories() {
|
|
|
270
307
|
// Scan for JSONL (Antigravity transcripts)
|
|
271
308
|
const jsonlFiles = findFiles(dir, 'transcript.jsonl', 3);
|
|
272
309
|
for (const file of jsonlFiles) {
|
|
273
|
-
await processJsonlFile(file);
|
|
310
|
+
totalAdded += await processJsonlFile(file);
|
|
274
311
|
}
|
|
275
312
|
|
|
276
313
|
// Scan for JSON (Roo Code / Cline task files)
|
|
@@ -278,44 +315,116 @@ export async function scanDirectories() {
|
|
|
278
315
|
for (const file of jsonFiles) {
|
|
279
316
|
// Avoid processing general configurations/settings files
|
|
280
317
|
if (file.includes('tasks')) {
|
|
281
|
-
await processJsonFile(file);
|
|
318
|
+
totalAdded += await processJsonFile(file);
|
|
282
319
|
}
|
|
283
320
|
}
|
|
284
321
|
}
|
|
322
|
+
|
|
323
|
+
// Auto-consolidate memories if new ones were added to keep prompt context slim
|
|
324
|
+
if (totalAdded > 0) {
|
|
325
|
+
try {
|
|
326
|
+
console.error(`[persyst-watcher] Running automatic memory consolidation sweep...`);
|
|
327
|
+
const { consolidateMemories } = await import('./search.js');
|
|
328
|
+
const report = await consolidateMemories();
|
|
329
|
+
console.error(`[persyst-watcher] Auto-consolidation complete: merged ${report.consolidated_groups} duplicate groups.`);
|
|
330
|
+
} catch (e) {
|
|
331
|
+
console.error(`[persyst-watcher] Auto-consolidation failed: ${e.message}`);
|
|
332
|
+
}
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
// Run periodic auto-expiry check on every folder scan (fast query)
|
|
336
|
+
try {
|
|
337
|
+
const { archiveExpiredMemories } = await import('./database.js');
|
|
338
|
+
archiveExpiredMemories();
|
|
339
|
+
} catch (e) {
|
|
340
|
+
console.error(`[persyst-watcher] Auto-expiry execution failed: ${e.message}`);
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
/**
|
|
345
|
+
* Handle a file addition or modification event from Chokidar.
|
|
346
|
+
* @param {string} filePath
|
|
347
|
+
*/
|
|
348
|
+
async function handleFileChange(filePath) {
|
|
349
|
+
const normalizedPath = filePath.replace(/\\/g, '/');
|
|
350
|
+
let addedCount = 0;
|
|
351
|
+
|
|
352
|
+
if (normalizedPath.endsWith('transcript.jsonl')) {
|
|
353
|
+
addedCount = await processJsonlFile(filePath);
|
|
354
|
+
} else if (normalizedPath.endsWith('.json') && normalizedPath.includes('tasks')) {
|
|
355
|
+
addedCount = await processJsonFile(filePath);
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
if (addedCount > 0) {
|
|
359
|
+
try {
|
|
360
|
+
console.error(`[persyst-watcher] Running automatic memory consolidation sweep...`);
|
|
361
|
+
const { consolidateMemories } = await import('./search.js');
|
|
362
|
+
const report = await consolidateMemories();
|
|
363
|
+
console.error(`[persyst-watcher] Auto-consolidation complete: merged ${report.consolidated_groups} duplicate groups.`);
|
|
364
|
+
} catch (e) {
|
|
365
|
+
console.error(`[persyst-watcher] Auto-consolidation failed: ${e.message}`);
|
|
366
|
+
}
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
// Run periodic auto-expiry check on every change (fast query)
|
|
370
|
+
try {
|
|
371
|
+
const { archiveExpiredMemories } = await import('./database.js');
|
|
372
|
+
archiveExpiredMemories();
|
|
373
|
+
} catch (e) {
|
|
374
|
+
console.error(`[persyst-watcher] Auto-expiry execution failed: ${e.message}`);
|
|
375
|
+
}
|
|
285
376
|
}
|
|
286
377
|
|
|
287
378
|
/**
|
|
288
379
|
* Start the background log watcher daemon.
|
|
289
380
|
*/
|
|
290
381
|
export function startWatcher() {
|
|
291
|
-
if (
|
|
292
|
-
|
|
293
|
-
console.error('[persyst-watcher] Starting background log watcher daemon...');
|
|
294
|
-
// Warm up config/paths
|
|
295
|
-
loadWatchedDirs();
|
|
382
|
+
if (chokidarWatcher) return;
|
|
296
383
|
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
console.error(`[persyst-watcher] Initial scan failed: ${err.message}`);
|
|
300
|
-
});
|
|
384
|
+
console.error('[persyst-watcher] Starting background log watcher daemon (Chokidar)...');
|
|
385
|
+
const watchDirs = loadWatchedDirs();
|
|
301
386
|
|
|
302
|
-
//
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
}
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
387
|
+
// Run initial scan, then start watching
|
|
388
|
+
scanDirectories()
|
|
389
|
+
.catch(err => {
|
|
390
|
+
console.error(`[persyst-watcher] Initial scan failed: ${err.message}`);
|
|
391
|
+
})
|
|
392
|
+
.then(() => {
|
|
393
|
+
if (chokidarWatcher) return;
|
|
394
|
+
chokidarWatcher = chokidar.watch(watchDirs, {
|
|
395
|
+
persistent: true,
|
|
396
|
+
ignoreInitial: true, // we already ran scanDirectories
|
|
397
|
+
awaitWriteFinish: {
|
|
398
|
+
stabilityThreshold: 300,
|
|
399
|
+
pollInterval: 100
|
|
400
|
+
}
|
|
401
|
+
});
|
|
402
|
+
|
|
403
|
+
chokidarWatcher.on('add', filePath => {
|
|
404
|
+
handleFileChange(filePath).catch(err => {
|
|
405
|
+
console.error(`[persyst-watcher] Error handling added file ${filePath}:`, err);
|
|
406
|
+
});
|
|
407
|
+
});
|
|
408
|
+
|
|
409
|
+
chokidarWatcher.on('change', filePath => {
|
|
410
|
+
handleFileChange(filePath).catch(err => {
|
|
411
|
+
console.error(`[persyst-watcher] Error handling changed file ${filePath}:`, err);
|
|
412
|
+
});
|
|
413
|
+
});
|
|
414
|
+
|
|
415
|
+
chokidarWatcher.on('error', err => {
|
|
416
|
+
console.error(`[persyst-watcher] Chokidar watcher error: ${err.message}`);
|
|
417
|
+
});
|
|
418
|
+
});
|
|
310
419
|
}
|
|
311
420
|
|
|
312
421
|
/**
|
|
313
422
|
* Stop the background log watcher daemon.
|
|
314
423
|
*/
|
|
315
424
|
export function stopWatcher() {
|
|
316
|
-
if (
|
|
317
|
-
|
|
318
|
-
|
|
425
|
+
if (chokidarWatcher) {
|
|
426
|
+
chokidarWatcher.close().catch(() => {});
|
|
427
|
+
chokidarWatcher = null;
|
|
319
428
|
console.error('[persyst-watcher] Background log watcher daemon stopped.');
|
|
320
429
|
}
|
|
321
430
|
}
|