persyst-mcp 2.2.5 → 2.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/watcher.js CHANGED
@@ -14,18 +14,21 @@ import {
14
14
  upsertWatchPosition,
15
15
  insertMemory,
16
16
  insertVector,
17
- memoryExists
17
+ memoryExists,
18
+ deleteMemory
18
19
  } from './database.js';
19
20
  import { generateEmbedding } from './embeddings.js';
20
- import { extractHeuristic } from './extractor-heuristic.js';
21
+ import { extractHeuristic, hasExtractableSignals } from './extractor-heuristic.js';
21
22
  import { searchHybrid } from './search.js';
22
23
  import { searchCache } from './cache.js';
23
24
  import { memoryEventBus } from './events.js';
25
+ import { logInfo } from './text-utils.js';
26
+ import chokidar from 'chokidar';
24
27
 
25
- // Config path: ~/.persyst/config.json
26
- const CONFIG_FILE = join(homedir(), '.persyst', 'config.json');
28
+ // Config path: ~/.persyst/config.json (overridable for tests)
29
+ const CONFIG_FILE = process.env.PERSYST_CONFIG_FILE || join(homedir(), '.persyst', 'config.json');
27
30
 
28
- let intervalId = null;
31
+ let chokidarWatcher = null;
29
32
  const DEDUP_THRESHOLD = 0.80;
30
33
 
31
34
  /**
@@ -103,18 +106,31 @@ async function processJsonlFile(filePath) {
103
106
 
104
107
  const lines = text.split('\n');
105
108
  let addedCount = 0;
109
+ let processedOffset = lastPos;
106
110
 
107
- for (const line of lines) {
108
- if (!line.trim()) continue;
111
+ for (let i = 0; i < lines.length; i++) {
112
+ const line = lines[i];
113
+ const isLastLine = i === lines.length - 1;
114
+
115
+ // Empty trailing line after a newline is expected; skip it without treating it as partial.
116
+ if (!line.trim()) {
117
+ if (!isLastLine) processedOffset += line.length + 1;
118
+ continue;
119
+ }
109
120
 
110
121
  let record;
111
122
  try {
112
123
  record = JSON.parse(line);
113
124
  } catch (_) {
114
- // Line might be incomplete/partially written skip and parse next time
125
+ // If the last line fails to parse, it may be partially written. Leave processedOffset
126
+ // before this line so the next scan re-reads it from the start.
127
+ if (!isLastLine) processedOffset += line.length + 1;
115
128
  continue;
116
129
  }
117
130
 
131
+ // Commit the bytes for this line (including the newline that produced the split).
132
+ processedOffset += line.length + 1;
133
+
118
134
  // Check if it's user prompt or assistant response
119
135
  if (
120
136
  record.content &&
@@ -122,7 +138,7 @@ async function processJsonlFile(filePath) {
122
138
  ) {
123
139
  // Strip XML/markdown wrapper tags (like <USER_REQUEST> or <ADDITIONAL_METADATA>)
124
140
  const cleanText = record.content.replace(/<[^>]+>[\s\S]*?<\/[^>]+>/g, '').trim();
125
- if (cleanText.length < 15) continue;
141
+ if (cleanText.length < 15 || !hasExtractableSignals(cleanText)) continue;
126
142
 
127
143
  const facts = extractHeuristic(cleanText);
128
144
  for (const fact of facts) {
@@ -135,18 +151,27 @@ async function processJsonlFile(filePath) {
135
151
  continue;
136
152
  }
137
153
 
138
- // Insert memory with provenance (written to 'shared' by default)
154
+ // Insert memory with provenance (written to project namespace or 'shared')
155
+ const watcherNs = process.env.PERSYST_PROJECT || 'shared';
139
156
  const id = insertMemory(fact.content, fact.confidence, {
140
157
  source_type: 'agent',
141
158
  source_id: record.source === 'MODEL' ? 'antigravity-worker' : 'user-dialogue',
142
159
  confidence: fact.confidence
143
- });
160
+ }, watcherNs);
161
+
162
+ try {
163
+ const embedding = await generateEmbedding(fact.content);
164
+ insertVector(id, embedding);
165
+ } catch (embedErr) {
166
+ console.error(`[persyst-watcher] Embedding failed for fact #${id}: ${embedErr.message}`);
167
+ // Clean up: delete the memory so we don't have orphaned entries
168
+ try { deleteMemory(id); } catch (_) {}
169
+ continue;
170
+ }
144
171
 
145
- const embedding = await generateEmbedding(fact.content);
146
- insertVector(id, embedding);
147
172
  addedCount++;
148
173
  console.error(`[persyst-watcher] Auto-extracted fact: "${fact.content}" (Memory #${id})`);
149
- memoryEventBus.emit('memory_added', { id, content: fact.content, namespace: 'shared', source: 'watcher-antigravity' });
174
+ memoryEventBus.emit('memory_added', { id, content: fact.content, namespace: watcherNs, source: 'watcher-antigravity' });
150
175
  }
151
176
  }
152
177
  }
@@ -155,10 +180,13 @@ async function processJsonlFile(filePath) {
155
180
  searchCache.invalidate();
156
181
  }
157
182
 
158
- // Persist new byte offset position
159
- upsertWatchPosition(filePath, stat.size);
183
+ // Persist the byte offset up to the last successfully parsed complete line.
184
+ // Do not advance past an incomplete trailing line so it is re-read on the next scan.
185
+ upsertWatchPosition(filePath, processedOffset);
186
+ return addedCount;
160
187
  } catch (err) {
161
188
  console.error(`[persyst-watcher] Failed to process JSONL file ${filePath}: ${err.message}`);
189
+ return 0;
162
190
  }
163
191
  }
164
192
 
@@ -186,33 +214,38 @@ async function processJsonFile(filePath) {
186
214
  // Process only newly added messages
187
215
  for (let i = lastMsgCount; i < history.length; i++) {
188
216
  const msg = history[i];
189
- if (!msg.content || typeof msg.content !== 'string') continue;
217
+ if (!msg.content || typeof msg.content !== 'string' || !hasExtractableSignals(msg.content)) continue;
190
218
 
191
219
  // Filter out system message structures
192
220
  if (msg.role === 'user' || msg.role === 'assistant') {
193
221
  const facts = extractHeuristic(msg.content);
194
222
  for (const fact of facts) {
195
- // Verify against exact duplicate (Bug A fix: check namespace 'shared')
196
- if (memoryExists(fact.content, 'shared')) continue;
223
+ const watcherNs = process.env.PERSYST_PROJECT || 'shared';
224
+ if (memoryExists(fact.content, watcherNs)) continue;
197
225
 
198
- // Verify against semantic similarity (Bug B fix: check namespace 'shared')
199
- const similar = await searchHybrid(fact.content, 1, null, null, 'shared');
226
+ const similar = await searchHybrid(fact.content, 1, null, null, watcherNs);
200
227
  if (similar.length > 0 && parseFloat(similar[0].similarity) >= DEDUP_THRESHOLD) {
201
228
  continue;
202
229
  }
203
230
 
204
- // Insert memory with provenance (written to 'shared' by default)
205
231
  const id = insertMemory(fact.content, fact.confidence, {
206
232
  source_type: 'agent',
207
233
  source_id: msg.role === 'assistant' ? 'roo-worker' : 'user-dialogue',
208
234
  confidence: fact.confidence
209
- });
235
+ }, watcherNs);
236
+
237
+ try {
238
+ const embedding = await generateEmbedding(fact.content);
239
+ insertVector(id, embedding);
240
+ } catch (embedErr) {
241
+ console.error(`[persyst-watcher] Embedding failed for fact #${id}: ${embedErr.message}`);
242
+ try { deleteMemory(id); } catch (_) {}
243
+ continue;
244
+ }
210
245
 
211
- const embedding = await generateEmbedding(fact.content);
212
- insertVector(id, embedding);
213
246
  addedCount++;
214
247
  console.error(`[persyst-watcher] Auto-extracted fact: "${fact.content}" (Memory #${id})`);
215
- memoryEventBus.emit('memory_added', { id, content: fact.content, namespace: 'shared', source: 'watcher-roo' });
248
+ memoryEventBus.emit('memory_added', { id, content: fact.content, namespace: watcherNs, source: 'watcher-roo' });
216
249
  }
217
250
  }
218
251
  }
@@ -223,8 +256,10 @@ async function processJsonFile(filePath) {
223
256
 
224
257
  // Persist message count index
225
258
  upsertWatchPosition(filePath, history.length);
259
+ return addedCount;
226
260
  } catch (err) {
227
261
  console.error(`[persyst-watcher] Failed to process JSON file ${filePath}: ${err.message}`);
262
+ return 0;
228
263
  }
229
264
  }
230
265
 
@@ -263,6 +298,7 @@ function findFiles(dir, ext, depth = 3) {
263
298
  */
264
299
  export async function scanDirectories() {
265
300
  const watchDirs = loadWatchedDirs();
301
+ let totalAdded = 0;
266
302
 
267
303
  for (const dir of watchDirs) {
268
304
  if (!existsSync(dir)) continue;
@@ -270,7 +306,7 @@ export async function scanDirectories() {
270
306
  // Scan for JSONL (Antigravity transcripts)
271
307
  const jsonlFiles = findFiles(dir, 'transcript.jsonl', 3);
272
308
  for (const file of jsonlFiles) {
273
- await processJsonlFile(file);
309
+ totalAdded += await processJsonlFile(file);
274
310
  }
275
311
 
276
312
  // Scan for JSON (Roo Code / Cline task files)
@@ -278,44 +314,116 @@ export async function scanDirectories() {
278
314
  for (const file of jsonFiles) {
279
315
  // Avoid processing general configurations/settings files
280
316
  if (file.includes('tasks')) {
281
- await processJsonFile(file);
317
+ totalAdded += await processJsonFile(file);
282
318
  }
283
319
  }
284
320
  }
321
+
322
+ // Auto-consolidate memories if new ones were added to keep prompt context slim
323
+ if (totalAdded > 0) {
324
+ try {
325
+ console.error(`[persyst-watcher] Running automatic memory consolidation sweep...`);
326
+ const { consolidateMemories } = await import('./search.js');
327
+ const report = await consolidateMemories();
328
+ console.error(`[persyst-watcher] Auto-consolidation complete: merged ${report.consolidated_groups} duplicate groups.`);
329
+ } catch (e) {
330
+ console.error(`[persyst-watcher] Auto-consolidation failed: ${e.message}`);
331
+ }
332
+ }
333
+
334
+ // Run periodic auto-expiry check on every folder scan (fast query)
335
+ try {
336
+ const { archiveExpiredMemories } = await import('./database.js');
337
+ archiveExpiredMemories();
338
+ } catch (e) {
339
+ console.error(`[persyst-watcher] Auto-expiry execution failed: ${e.message}`);
340
+ }
341
+ }
342
+
343
+ /**
344
+ * Handle a file addition or modification event from Chokidar.
345
+ * @param {string} filePath
346
+ */
347
+ async function handleFileChange(filePath) {
348
+ const normalizedPath = filePath.replace(/\\/g, '/');
349
+ let addedCount = 0;
350
+
351
+ if (normalizedPath.endsWith('transcript.jsonl')) {
352
+ addedCount = await processJsonlFile(filePath);
353
+ } else if (normalizedPath.endsWith('.json') && normalizedPath.includes('tasks')) {
354
+ addedCount = await processJsonFile(filePath);
355
+ }
356
+
357
+ if (addedCount > 0) {
358
+ try {
359
+ console.error(`[persyst-watcher] Running automatic memory consolidation sweep...`);
360
+ const { consolidateMemories } = await import('./search.js');
361
+ const report = await consolidateMemories();
362
+ console.error(`[persyst-watcher] Auto-consolidation complete: merged ${report.consolidated_groups} duplicate groups.`);
363
+ } catch (e) {
364
+ console.error(`[persyst-watcher] Auto-consolidation failed: ${e.message}`);
365
+ }
366
+ }
367
+
368
+ // Run periodic auto-expiry check on every change (fast query)
369
+ try {
370
+ const { archiveExpiredMemories } = await import('./database.js');
371
+ archiveExpiredMemories();
372
+ } catch (e) {
373
+ console.error(`[persyst-watcher] Auto-expiry execution failed: ${e.message}`);
374
+ }
285
375
  }
286
376
 
287
377
  /**
288
378
  * Start the background log watcher daemon.
289
379
  */
290
380
  export function startWatcher() {
291
- if (intervalId) return;
292
-
293
- console.error('[persyst-watcher] Starting background log watcher daemon...');
294
- // Warm up config/paths
295
- loadWatchedDirs();
381
+ if (chokidarWatcher) return;
296
382
 
297
- // Run initial scan
298
- scanDirectories().catch(err => {
299
- console.error(`[persyst-watcher] Initial scan failed: ${err.message}`);
300
- });
383
+ logInfo('[persyst-watcher] Starting background log watcher daemon (Chokidar)...');
384
+ const watchDirs = loadWatchedDirs();
301
385
 
302
- // Polling directory scan every 5 seconds
303
- intervalId = setInterval(async () => {
304
- try {
305
- await scanDirectories();
306
- } catch (err) {
307
- console.error(`[persyst-watcher] Folder scan failed: ${err.message}`);
308
- }
309
- }, 5000);
386
+ // Run initial scan, then start watching
387
+ scanDirectories()
388
+ .catch(err => {
389
+ console.error(`[persyst-watcher] Initial scan failed: ${err.message}`);
390
+ })
391
+ .then(() => {
392
+ if (chokidarWatcher) return;
393
+ chokidarWatcher = chokidar.watch(watchDirs, {
394
+ persistent: true,
395
+ ignoreInitial: true, // we already ran scanDirectories
396
+ awaitWriteFinish: {
397
+ stabilityThreshold: 300,
398
+ pollInterval: 100
399
+ }
400
+ });
401
+
402
+ chokidarWatcher.on('add', filePath => {
403
+ handleFileChange(filePath).catch(err => {
404
+ console.error(`[persyst-watcher] Error handling added file ${filePath}:`, err);
405
+ });
406
+ });
407
+
408
+ chokidarWatcher.on('change', filePath => {
409
+ handleFileChange(filePath).catch(err => {
410
+ console.error(`[persyst-watcher] Error handling changed file ${filePath}:`, err);
411
+ });
412
+ });
413
+
414
+ chokidarWatcher.on('error', err => {
415
+ console.error(`[persyst-watcher] Chokidar watcher error: ${err.message}`);
416
+ });
417
+ });
310
418
  }
311
419
 
312
420
  /**
313
421
  * Stop the background log watcher daemon.
314
422
  */
315
423
  export function stopWatcher() {
316
- if (intervalId) {
317
- clearInterval(intervalId);
318
- intervalId = null;
319
- console.error('[persyst-watcher] Background log watcher daemon stopped.');
424
+ if (chokidarWatcher) {
425
+ chokidarWatcher.close().catch(() => {});
426
+ chokidarWatcher = null;
427
+ logInfo('[persyst-watcher] Background log watcher daemon stopped.');
320
428
  }
321
429
  }