@softerist/heuristic-mcp 3.0.15 → 3.0.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/README.md +104 -104
  2. package/config.jsonc +173 -173
  3. package/features/ann-config.js +131 -0
  4. package/features/clear-cache.js +84 -0
  5. package/features/find-similar-code.js +291 -0
  6. package/features/hybrid-search.js +544 -0
  7. package/features/index-codebase.js +3268 -0
  8. package/features/lifecycle.js +1189 -0
  9. package/features/package-version.js +302 -0
  10. package/features/register.js +408 -0
  11. package/features/resources.js +156 -0
  12. package/features/set-workspace.js +265 -0
  13. package/index.js +96 -96
  14. package/lib/cache-ops.js +22 -22
  15. package/lib/cache-utils.js +565 -565
  16. package/lib/cache.js +1870 -1870
  17. package/lib/call-graph.js +396 -396
  18. package/lib/cli.js +1 -1
  19. package/lib/config.js +517 -517
  20. package/lib/constants.js +39 -39
  21. package/lib/embed-query-process.js +7 -7
  22. package/lib/embedding-process.js +7 -7
  23. package/lib/embedding-worker.js +299 -299
  24. package/lib/ignore-patterns.js +316 -316
  25. package/lib/json-worker.js +14 -14
  26. package/lib/json-writer.js +337 -337
  27. package/lib/logging.js +164 -164
  28. package/lib/memory-logger.js +13 -13
  29. package/lib/onnx-backend.js +193 -193
  30. package/lib/project-detector.js +84 -84
  31. package/lib/server-lifecycle.js +165 -165
  32. package/lib/settings-editor.js +754 -754
  33. package/lib/tokenizer.js +256 -256
  34. package/lib/utils.js +428 -428
  35. package/lib/vector-store-binary.js +627 -627
  36. package/lib/vector-store-sqlite.js +95 -95
  37. package/lib/workspace-env.js +28 -28
  38. package/mcp_config.json +9 -9
  39. package/package.json +86 -75
  40. package/scripts/clear-cache.js +20 -0
  41. package/scripts/download-model.js +43 -0
  42. package/scripts/mcp-launcher.js +49 -0
  43. package/scripts/postinstall.js +12 -0
  44. package/search-configs.js +36 -36
  45. package/.prettierrc +0 -7
  46. package/debug-pids.js +0 -30
  47. package/eslint.config.js +0 -36
  48. package/specs/plan.md +0 -23
  49. package/vitest.config.js +0 -39
@@ -1,565 +1,565 @@
1
- import fs from 'fs/promises';
2
- import path from 'path';
3
- import os from 'os';
4
- import { getGlobalCacheDir } from './config.js';
5
-
6
- /**
7
- * Check if a process is running
8
- */
9
- function isProcessRunning(pid) {
10
- try {
11
- process.kill(pid, 0);
12
- return true;
13
- } catch (err) {
14
- // On Windows, EPERM can happen even if process exists
15
- if (err && err.code === 'EPERM') {
16
- return true;
17
- }
18
- return false;
19
- }
20
- }
21
-
22
- /**
23
- * Normalize workspace path for comparison
24
- * - Resolves symlinks
25
- * - Lowercase on Windows
26
- * - Normalizes path separators
27
- */
28
- async function normalizeWorkspacePath(workspacePath) {
29
- if (!workspacePath) return null;
30
-
31
- try {
32
- let normalized = workspacePath;
33
-
34
- // Resolve symlinks if path exists
35
- try {
36
- normalized = await fs.realpath(normalized);
37
- } catch {
38
- // Path doesn't exist, use as-is
39
- }
40
-
41
- // Lowercase on Windows for case-insensitive comparison
42
- if (process.platform === 'win32') {
43
- normalized = normalized.toLowerCase();
44
- }
45
-
46
- // Normalize path separators
47
- normalized = path.normalize(normalized);
48
-
49
- return normalized;
50
- } catch {
51
- return workspacePath; // Return original if normalization fails
52
- }
53
- }
54
-
55
- /**
56
- * Check if workspace path indicates a temporary/test workspace
57
- */
58
- function isTemporaryWorkspace(workspacePath) {
59
- if (!workspacePath) return false;
60
-
61
- const normalized = workspacePath.toLowerCase();
62
- const patterns = ['temp-workspace', '.tmp', '\\temp\\', '/tmp/', '\\\\temp\\\\'];
63
-
64
- if (patterns.some((p) => normalized.includes(p))) {
65
- return true;
66
- }
67
-
68
- // Check if under OS temp directory
69
- try {
70
- const tempDir = os.tmpdir().toLowerCase();
71
- return normalized.startsWith(tempDir);
72
- } catch {
73
- return false;
74
- }
75
- }
76
-
77
- /**
78
- * Get timestamp from progress data
79
- */
80
- function getProgressTimestamp(cacheInfo) {
81
- if (cacheInfo.progress?.updatedAt) {
82
- const timestamp = Date.parse(cacheInfo.progress.updatedAt);
83
- if (Number.isFinite(timestamp)) {
84
- return timestamp;
85
- }
86
- }
87
-
88
- // Fallback to file mtime
89
- if (cacheInfo.stats.progressFile) {
90
- return cacheInfo.stats.progressFile.mtimeMs;
91
- }
92
-
93
- return 0;
94
- }
95
-
96
- /**
97
- * Check if progress was updated recently
98
- */
99
- function hasRecentProgress(cacheInfo, thresholdMs = 5 * 60 * 1000) {
100
- if (!cacheInfo.progress) return false;
101
-
102
- const progressTime = getProgressTimestamp(cacheInfo);
103
- return Date.now() - progressTime < thresholdMs;
104
- }
105
-
106
- /**
107
- * Safe file stat - returns null on error
108
- */
109
- async function safeStat(filePath) {
110
- try {
111
- return await fs.stat(filePath);
112
- } catch {
113
- return null;
114
- }
115
- }
116
-
117
- /**
118
- * Safe JSON parse - returns null on error
119
- */
120
- async function safeReadJson(filePath) {
121
- try {
122
- const content = await fs.readFile(filePath, 'utf-8');
123
- return JSON.parse(content);
124
- } catch {
125
- return null;
126
- }
127
- }
128
-
129
- /**
130
- * Collect comprehensive cache information
131
- */
132
- async function collectCacheInfo(cacheDir) {
133
- const cacheId = path.basename(cacheDir);
134
- const errors = [];
135
-
136
- // File paths
137
- const metaPath = path.join(cacheDir, 'meta.json');
138
- const progressPath = path.join(cacheDir, 'progress.json');
139
- const lockPath = path.join(cacheDir, 'server.lock.json');
140
- const embeddingsJsonPath = path.join(cacheDir, 'embeddings.json');
141
- const vectorsBinPath = path.join(cacheDir, 'vectors.bin');
142
- const vectorsSqlitePath = path.join(cacheDir, 'vectors.sqlite');
143
- const annIndexPath = path.join(cacheDir, 'ann-index.bin');
144
-
145
- // Read metadata
146
- const [meta, progress, lock] = await Promise.all([
147
- safeReadJson(metaPath),
148
- safeReadJson(progressPath),
149
- safeReadJson(lockPath),
150
- ]);
151
-
152
- // Gather file stats
153
- const [
154
- cacheDirStat,
155
- metaFileStat,
156
- progressFileStat,
157
- lockFileStat,
158
- embeddingsJsonStat,
159
- vectorsBinStat,
160
- vectorsSqliteStat,
161
- annIndexStat,
162
- ] = await Promise.all([
163
- safeStat(cacheDir),
164
- safeStat(metaPath),
165
- safeStat(progressPath),
166
- safeStat(lockPath),
167
- safeStat(embeddingsJsonPath),
168
- safeStat(vectorsBinPath),
169
- safeStat(vectorsSqlitePath),
170
- safeStat(annIndexPath),
171
- ]);
172
-
173
- // Determine embeddings file (prefer binary/sqlite over JSON)
174
- const embeddingsFileStat = vectorsBinStat || vectorsSqliteStat || embeddingsJsonStat;
175
-
176
- // Extract workspace path
177
- const workspacePath = meta?.workspace || null;
178
-
179
- // Check workspace existence
180
- let workspaceExists = false;
181
- if (workspacePath) {
182
- try {
183
- await fs.access(workspacePath);
184
- workspaceExists = true;
185
- } catch {
186
- workspaceExists = false;
187
- }
188
- }
189
-
190
- // Normalize workspace path
191
- const workspacePathNormalized = await normalizeWorkspacePath(workspacePath);
192
-
193
- // Calculate lastActivityMs from all known timestamps
194
- const timestamps = [
195
- meta?.lastSaveTime ? Date.parse(meta.lastSaveTime) : null,
196
- getProgressTimestamp({ progress, stats: { progressFile: progressFileStat } }),
197
- lock?.startedAt ? Date.parse(lock.startedAt) : null,
198
- embeddingsFileStat?.mtimeMs,
199
- annIndexStat?.mtimeMs,
200
- ].filter((t) => t && Number.isFinite(t));
201
-
202
- const lastActivityMs =
203
- timestamps.length > 0
204
- ? Math.max(...timestamps)
205
- : cacheDirStat?.mtimeMs || 0;
206
-
207
- // Determine if cache is active
208
- let isActive = false;
209
-
210
- // Check lock file with valid PID
211
- if (lock && Number.isInteger(lock.pid)) {
212
- isActive = isProcessRunning(lock.pid);
213
- }
214
-
215
- return {
216
- cacheDir,
217
- cacheId,
218
- meta,
219
- progress,
220
- lock,
221
- stats: {
222
- cacheDir: cacheDirStat,
223
- metaFile: metaFileStat,
224
- progressFile: progressFileStat,
225
- lockFile: lockFileStat,
226
- embeddingsFile: embeddingsFileStat,
227
- annIndexFile: annIndexStat,
228
- },
229
- workspacePath,
230
- workspacePathNormalized,
231
- workspaceExists,
232
- lastActivityMs,
233
- isActive,
234
- errors,
235
- };
236
- }
237
-
238
- /**
239
- * Evaluate a single cache using the safe decision pipeline
240
- */
241
- function evaluateCache(cacheInfo, thresholds) {
242
- const now = Date.now();
243
- const age = now - cacheInfo.lastActivityMs;
244
-
245
- // Step 1: Keep if active or within safety window
246
- if (cacheInfo.isActive || age < thresholds.safetyWindowMs) {
247
- return {
248
- action: 'KEEP',
249
- reason: cacheInfo.isActive ? 'active_lock' : 'recent_activity',
250
- details: {
251
- isActive: cacheInfo.isActive,
252
- lockPid: cacheInfo.lock?.pid,
253
- ageMs: age,
254
- },
255
- };
256
- }
257
-
258
- // Step 2: No meta.json
259
- if (!cacheInfo.meta) {
260
- const dirAge = now - (cacheInfo.stats.cacheDir?.mtimeMs || 0);
261
- if (dirAge > thresholds.staleNoMetaMs && !hasRecentProgress(cacheInfo)) {
262
- return {
263
- action: 'REMOVE',
264
- reason: 'no_meta_stale',
265
- details: { ageMs: dirAge },
266
- };
267
- }
268
- return {
269
- action: 'KEEP',
270
- reason: 'initializing',
271
- details: { ageMs: dirAge },
272
- };
273
- }
274
-
275
- // Step 3: Temporary workspace (check BEFORE missing workspace - shorter threshold!)
276
- // This ensures temp workspaces use 24h threshold instead of 7-day grace period
277
- if (isTemporaryWorkspace(cacheInfo.workspacePath)) {
278
- if (age > thresholds.tempThresholdMs) {
279
- return {
280
- action: 'REMOVE',
281
- reason: 'temp_workspace',
282
- details: {
283
- workspace: cacheInfo.workspacePath,
284
- ageMs: age,
285
- },
286
- };
287
- }
288
- // Recent temp workspace - keep for now
289
- return {
290
- action: 'KEEP',
291
- reason: 'recent_temp_workspace',
292
- details: {
293
- workspace: cacheInfo.workspacePath,
294
- ageMs: age,
295
- },
296
- };
297
- }
298
-
299
- // Step 4: Empty cache
300
- const filesIndexed = cacheInfo.meta.filesIndexed ?? 0;
301
- const chunksStored = cacheInfo.meta.chunksStored ?? 0;
302
-
303
- if (filesIndexed === 0 || chunksStored === 0) {
304
- if (age > thresholds.emptyThresholdMs) {
305
- return {
306
- action: 'REMOVE',
307
- reason: 'empty_cache',
308
- details: {
309
- filesIndexed,
310
- chunksStored,
311
- ageMs: age,
312
- },
313
- };
314
- }
315
- return {
316
- action: 'KEEP',
317
- reason: 'recent_empty',
318
- details: {
319
- filesIndexed,
320
- chunksStored,
321
- ageMs: age,
322
- },
323
- };
324
- }
325
-
326
- // Step 5: Missing workspace (non-temp workspaces only, due to Step 3 early return)
327
- if (cacheInfo.workspacePath && !cacheInfo.workspaceExists) {
328
- if (age > thresholds.workspaceGraceMs) {
329
- return {
330
- action: 'REMOVE',
331
- reason: 'workspace_missing',
332
- details: {
333
- workspace: cacheInfo.workspacePath,
334
- ageMs: age,
335
- },
336
- };
337
- }
338
- return {
339
- action: 'KEEP',
340
- reason: 'workspace_grace_period',
341
- details: {
342
- workspace: cacheInfo.workspacePath,
343
- ageMs: age,
344
- },
345
- };
346
- }
347
-
348
- // Step 6: Stuck indexing
349
- if (cacheInfo.progress && !hasRecentProgress(cacheInfo, thresholds.safetyWindowMs)) {
350
- const progressAge = now - getProgressTimestamp(cacheInfo);
351
- if (progressAge > thresholds.staleProgressMs) {
352
- return {
353
- action: 'REMOVE',
354
- reason: 'stuck_indexing',
355
- details: {
356
- progressAgeMs: progressAge,
357
- lastProgress: cacheInfo.progress,
358
- },
359
- };
360
- }
361
- }
362
-
363
- // Step 7: Long unused
364
- if (age > thresholds.maxUnusedMs) {
365
- return {
366
- action: 'REMOVE',
367
- reason: 'long_unused',
368
- details: { ageMs: age },
369
- };
370
- }
371
-
372
- // Step 8: Default - keep
373
- return {
374
- action: 'KEEP',
375
- reason: 'valid_cache',
376
- details: {
377
- filesIndexed,
378
- chunksStored,
379
- ageMs: age,
380
- },
381
- };
382
- }
383
-
384
- /**
385
- * Find duplicate workspace caches
386
- */
387
- function findDuplicateWorkspaces(cacheInfos) {
388
- const workspaceMap = new Map(); // normalized workspace -> [cacheInfos]
389
-
390
- for (const info of cacheInfos) {
391
- if (!info.workspacePathNormalized) continue;
392
-
393
- // Key includes embedding model + dimension to avoid deleting caches for different embeddings
394
- const dimLabel = info.meta?.embeddingDimension ?? 'default';
395
- const key = `${info.workspacePathNormalized}::${info.meta?.embeddingModel || 'default'}::${dimLabel}`;
396
-
397
- if (!workspaceMap.has(key)) {
398
- workspaceMap.set(key, []);
399
- }
400
- workspaceMap.get(key).push(info);
401
- }
402
-
403
- const duplicates = [];
404
- for (const [key, infos] of workspaceMap) {
405
- if (infos.length > 1) {
406
- // Sort by lastActivityMs descending
407
- infos.sort((a, b) => b.lastActivityMs - a.lastActivityMs);
408
-
409
- // Keep newest, mark others for removal (if not active)
410
- for (let i = 1; i < infos.length; i++) {
411
- if (!infos[i].isActive) {
412
- duplicates.push({
413
- info: infos[i],
414
- action: 'REMOVE',
415
- reason: 'duplicate_workspace',
416
- details: {
417
- newestCache: infos[0].cacheId,
418
- workspace: key,
419
- ageMs: Date.now() - infos[i].lastActivityMs,
420
- },
421
- });
422
- }
423
- }
424
- }
425
- }
426
-
427
- return duplicates;
428
- }
429
-
430
- /**
431
- * Main cache cleanup function with intelligent sanitization
432
- */
433
- export async function clearStaleCaches(options = {}) {
434
- const config = {
435
- staleNoMetaHours: 6,
436
- emptyThresholdHours: 24,
437
- workspaceGraceDays: 7,
438
- maxUnusedDays: 30,
439
- tempThresholdHours: 24,
440
- staleProgressHours: 6,
441
- safetyWindowMinutes: 10,
442
- removeDuplicates: true,
443
- dryRun: false,
444
- logger: console,
445
- ...options,
446
- };
447
-
448
- // Convert to milliseconds
449
- const thresholds = {
450
- staleNoMetaMs: config.staleNoMetaHours * 60 * 60 * 1000,
451
- emptyThresholdMs: config.emptyThresholdHours * 60 * 60 * 1000,
452
- workspaceGraceMs: config.workspaceGraceDays * 24 * 60 * 60 * 1000,
453
- maxUnusedMs: config.maxUnusedDays * 24 * 60 * 60 * 1000,
454
- tempThresholdMs: config.tempThresholdHours * 60 * 60 * 1000,
455
- staleProgressMs: config.staleProgressHours * 60 * 60 * 1000,
456
- safetyWindowMs: config.safetyWindowMinutes * 60 * 1000,
457
- };
458
-
459
- const globalCacheRoot = path.join(getGlobalCacheDir(), 'heuristic-mcp');
460
- const cacheDirs = await fs.readdir(globalCacheRoot).catch(() => []);
461
-
462
- if (cacheDirs.length === 0) {
463
- return { removed: 0, kept: 0, dryRun: config.dryRun, decisions: [] };
464
- }
465
-
466
- // Step 1: Collect all cache info
467
- const cacheInfos = await Promise.all(
468
- cacheDirs.map((dir) => collectCacheInfo(path.join(globalCacheRoot, dir)))
469
- );
470
-
471
- // Step 2: Evaluate each cache individually
472
- const decisions = cacheInfos.map((info) => {
473
- const evaluation = evaluateCache(info, thresholds);
474
- return {
475
- cacheDir: info.cacheDir,
476
- cacheId: info.cacheId,
477
- info,
478
- ...evaluation,
479
- };
480
- });
481
-
482
- // Step 3: Find duplicates
483
- if (config.removeDuplicates) {
484
- const duplicates = findDuplicateWorkspaces(cacheInfos);
485
- for (const dup of duplicates) {
486
- // Override decision if not already marked for removal
487
- const existing = decisions.find((d) => d.cacheId === dup.info.cacheId);
488
- if (existing && existing.action === 'KEEP') {
489
- existing.action = dup.action;
490
- existing.reason = dup.reason;
491
- existing.details = dup.details;
492
- }
493
- }
494
- }
495
-
496
- // Step 4: Execute removals
497
- let removed = 0;
498
- let kept = 0;
499
-
500
- for (const decision of decisions) {
501
- if (decision.action === 'REMOVE') {
502
- if (!config.dryRun) {
503
- try {
504
- await fs.rm(decision.cacheDir, { recursive: true, force: true });
505
- removed++;
506
- if (config.logger) {
507
- config.logger.info(
508
- `[Cache] Removed ${decision.cacheId}: ${decision.reason} (${formatAge(decision.details.ageMs)})`
509
- );
510
- }
511
- } catch (err) {
512
- if (config.logger) {
513
- config.logger.warn(
514
- `[Cache] Failed to remove ${decision.cacheId}: ${err.message}`
515
- );
516
- }
517
- // Count as kept if removal failed
518
- kept++;
519
- decision.action = 'KEEP';
520
- decision.reason = 'removal_failed';
521
- decision.details.error = err.message;
522
- }
523
- } else {
524
- removed++;
525
- if (config.logger) {
526
- config.logger.info(
527
- `[Cache] Would remove ${decision.cacheId}: ${decision.reason} (${formatAge(decision.details.ageMs)})`
528
- );
529
- }
530
- }
531
- } else {
532
- kept++;
533
- }
534
- }
535
-
536
- if (removed > 0 && config.logger) {
537
- config.logger.info(
538
- `[Cache] ${config.dryRun ? 'Would remove' : 'Removed'} ${removed} stale cache ${removed === 1 ? 'directory' : 'directories'}.`
539
- );
540
- }
541
-
542
- return {
543
- removed,
544
- kept,
545
- dryRun: config.dryRun,
546
- decisions,
547
- };
548
- }
549
-
550
- /**
551
- * Format age in human-readable form
552
- */
553
- function formatAge(ms) {
554
- if (!Number.isFinite(ms)) return 'unknown';
555
-
556
- const seconds = Math.floor(ms / 1000);
557
- const minutes = Math.floor(seconds / 60);
558
- const hours = Math.floor(minutes / 60);
559
- const days = Math.floor(hours / 24);
560
-
561
- if (days > 0) return `${days}d ${hours % 24}h`;
562
- if (hours > 0) return `${hours}h ${minutes % 60}m`;
563
- if (minutes > 0) return `${minutes}m`;
564
- return `${seconds}s`;
565
- }
1
+ import fs from 'fs/promises';
2
+ import path from 'path';
3
+ import os from 'os';
4
+ import { getGlobalCacheDir } from './config.js';
5
+
6
+ /**
7
+ * Check if a process is running
8
+ */
9
+ function isProcessRunning(pid) {
10
+ try {
11
+ process.kill(pid, 0);
12
+ return true;
13
+ } catch (err) {
14
+ // On Windows, EPERM can happen even if process exists
15
+ if (err && err.code === 'EPERM') {
16
+ return true;
17
+ }
18
+ return false;
19
+ }
20
+ }
21
+
22
+ /**
23
+ * Normalize workspace path for comparison
24
+ * - Resolves symlinks
25
+ * - Lowercase on Windows
26
+ * - Normalizes path separators
27
+ */
28
+ async function normalizeWorkspacePath(workspacePath) {
29
+ if (!workspacePath) return null;
30
+
31
+ try {
32
+ let normalized = workspacePath;
33
+
34
+ // Resolve symlinks if path exists
35
+ try {
36
+ normalized = await fs.realpath(normalized);
37
+ } catch {
38
+ // Path doesn't exist, use as-is
39
+ }
40
+
41
+ // Lowercase on Windows for case-insensitive comparison
42
+ if (process.platform === 'win32') {
43
+ normalized = normalized.toLowerCase();
44
+ }
45
+
46
+ // Normalize path separators
47
+ normalized = path.normalize(normalized);
48
+
49
+ return normalized;
50
+ } catch {
51
+ return workspacePath; // Return original if normalization fails
52
+ }
53
+ }
54
+
55
+ /**
56
+ * Check if workspace path indicates a temporary/test workspace
57
+ */
58
+ function isTemporaryWorkspace(workspacePath) {
59
+ if (!workspacePath) return false;
60
+
61
+ const normalized = workspacePath.toLowerCase();
62
+ const patterns = ['temp-workspace', '.tmp', '\\temp\\', '/tmp/', '\\\\temp\\\\'];
63
+
64
+ if (patterns.some((p) => normalized.includes(p))) {
65
+ return true;
66
+ }
67
+
68
+ // Check if under OS temp directory
69
+ try {
70
+ const tempDir = os.tmpdir().toLowerCase();
71
+ return normalized.startsWith(tempDir);
72
+ } catch {
73
+ return false;
74
+ }
75
+ }
76
+
77
+ /**
78
+ * Get timestamp from progress data
79
+ */
80
+ function getProgressTimestamp(cacheInfo) {
81
+ if (cacheInfo.progress?.updatedAt) {
82
+ const timestamp = Date.parse(cacheInfo.progress.updatedAt);
83
+ if (Number.isFinite(timestamp)) {
84
+ return timestamp;
85
+ }
86
+ }
87
+
88
+ // Fallback to file mtime
89
+ if (cacheInfo.stats.progressFile) {
90
+ return cacheInfo.stats.progressFile.mtimeMs;
91
+ }
92
+
93
+ return 0;
94
+ }
95
+
96
+ /**
97
+ * Check if progress was updated recently
98
+ */
99
+ function hasRecentProgress(cacheInfo, thresholdMs = 5 * 60 * 1000) {
100
+ if (!cacheInfo.progress) return false;
101
+
102
+ const progressTime = getProgressTimestamp(cacheInfo);
103
+ return Date.now() - progressTime < thresholdMs;
104
+ }
105
+
106
+ /**
107
+ * Safe file stat - returns null on error
108
+ */
109
+ async function safeStat(filePath) {
110
+ try {
111
+ return await fs.stat(filePath);
112
+ } catch {
113
+ return null;
114
+ }
115
+ }
116
+
117
+ /**
118
+ * Safe JSON parse - returns null on error
119
+ */
120
+ async function safeReadJson(filePath) {
121
+ try {
122
+ const content = await fs.readFile(filePath, 'utf-8');
123
+ return JSON.parse(content);
124
+ } catch {
125
+ return null;
126
+ }
127
+ }
128
+
129
+ /**
130
+ * Collect comprehensive cache information
131
+ */
132
+ async function collectCacheInfo(cacheDir) {
133
+ const cacheId = path.basename(cacheDir);
134
+ const errors = [];
135
+
136
+ // File paths
137
+ const metaPath = path.join(cacheDir, 'meta.json');
138
+ const progressPath = path.join(cacheDir, 'progress.json');
139
+ const lockPath = path.join(cacheDir, 'server.lock.json');
140
+ const embeddingsJsonPath = path.join(cacheDir, 'embeddings.json');
141
+ const vectorsBinPath = path.join(cacheDir, 'vectors.bin');
142
+ const vectorsSqlitePath = path.join(cacheDir, 'vectors.sqlite');
143
+ const annIndexPath = path.join(cacheDir, 'ann-index.bin');
144
+
145
+ // Read metadata
146
+ const [meta, progress, lock] = await Promise.all([
147
+ safeReadJson(metaPath),
148
+ safeReadJson(progressPath),
149
+ safeReadJson(lockPath),
150
+ ]);
151
+
152
+ // Gather file stats
153
+ const [
154
+ cacheDirStat,
155
+ metaFileStat,
156
+ progressFileStat,
157
+ lockFileStat,
158
+ embeddingsJsonStat,
159
+ vectorsBinStat,
160
+ vectorsSqliteStat,
161
+ annIndexStat,
162
+ ] = await Promise.all([
163
+ safeStat(cacheDir),
164
+ safeStat(metaPath),
165
+ safeStat(progressPath),
166
+ safeStat(lockPath),
167
+ safeStat(embeddingsJsonPath),
168
+ safeStat(vectorsBinPath),
169
+ safeStat(vectorsSqlitePath),
170
+ safeStat(annIndexPath),
171
+ ]);
172
+
173
+ // Determine embeddings file (prefer binary/sqlite over JSON)
174
+ const embeddingsFileStat = vectorsBinStat || vectorsSqliteStat || embeddingsJsonStat;
175
+
176
+ // Extract workspace path
177
+ const workspacePath = meta?.workspace || null;
178
+
179
+ // Check workspace existence
180
+ let workspaceExists = false;
181
+ if (workspacePath) {
182
+ try {
183
+ await fs.access(workspacePath);
184
+ workspaceExists = true;
185
+ } catch {
186
+ workspaceExists = false;
187
+ }
188
+ }
189
+
190
+ // Normalize workspace path
191
+ const workspacePathNormalized = await normalizeWorkspacePath(workspacePath);
192
+
193
+ // Calculate lastActivityMs from all known timestamps
194
+ const timestamps = [
195
+ meta?.lastSaveTime ? Date.parse(meta.lastSaveTime) : null,
196
+ getProgressTimestamp({ progress, stats: { progressFile: progressFileStat } }),
197
+ lock?.startedAt ? Date.parse(lock.startedAt) : null,
198
+ embeddingsFileStat?.mtimeMs,
199
+ annIndexStat?.mtimeMs,
200
+ ].filter((t) => t && Number.isFinite(t));
201
+
202
+ const lastActivityMs =
203
+ timestamps.length > 0
204
+ ? Math.max(...timestamps)
205
+ : cacheDirStat?.mtimeMs || 0;
206
+
207
+ // Determine if cache is active
208
+ let isActive = false;
209
+
210
+ // Check lock file with valid PID
211
+ if (lock && Number.isInteger(lock.pid)) {
212
+ isActive = isProcessRunning(lock.pid);
213
+ }
214
+
215
+ return {
216
+ cacheDir,
217
+ cacheId,
218
+ meta,
219
+ progress,
220
+ lock,
221
+ stats: {
222
+ cacheDir: cacheDirStat,
223
+ metaFile: metaFileStat,
224
+ progressFile: progressFileStat,
225
+ lockFile: lockFileStat,
226
+ embeddingsFile: embeddingsFileStat,
227
+ annIndexFile: annIndexStat,
228
+ },
229
+ workspacePath,
230
+ workspacePathNormalized,
231
+ workspaceExists,
232
+ lastActivityMs,
233
+ isActive,
234
+ errors,
235
+ };
236
+ }
237
+
238
+ /**
239
+ * Evaluate a single cache using the safe decision pipeline
240
+ */
241
+ function evaluateCache(cacheInfo, thresholds) {
242
+ const now = Date.now();
243
+ const age = now - cacheInfo.lastActivityMs;
244
+
245
+ // Step 1: Keep if active or within safety window
246
+ if (cacheInfo.isActive || age < thresholds.safetyWindowMs) {
247
+ return {
248
+ action: 'KEEP',
249
+ reason: cacheInfo.isActive ? 'active_lock' : 'recent_activity',
250
+ details: {
251
+ isActive: cacheInfo.isActive,
252
+ lockPid: cacheInfo.lock?.pid,
253
+ ageMs: age,
254
+ },
255
+ };
256
+ }
257
+
258
+ // Step 2: No meta.json
259
+ if (!cacheInfo.meta) {
260
+ const dirAge = now - (cacheInfo.stats.cacheDir?.mtimeMs || 0);
261
+ if (dirAge > thresholds.staleNoMetaMs && !hasRecentProgress(cacheInfo)) {
262
+ return {
263
+ action: 'REMOVE',
264
+ reason: 'no_meta_stale',
265
+ details: { ageMs: dirAge },
266
+ };
267
+ }
268
+ return {
269
+ action: 'KEEP',
270
+ reason: 'initializing',
271
+ details: { ageMs: dirAge },
272
+ };
273
+ }
274
+
275
+ // Step 3: Temporary workspace (check BEFORE missing workspace - shorter threshold!)
276
+ // This ensures temp workspaces use 24h threshold instead of 7-day grace period
277
+ if (isTemporaryWorkspace(cacheInfo.workspacePath)) {
278
+ if (age > thresholds.tempThresholdMs) {
279
+ return {
280
+ action: 'REMOVE',
281
+ reason: 'temp_workspace',
282
+ details: {
283
+ workspace: cacheInfo.workspacePath,
284
+ ageMs: age,
285
+ },
286
+ };
287
+ }
288
+ // Recent temp workspace - keep for now
289
+ return {
290
+ action: 'KEEP',
291
+ reason: 'recent_temp_workspace',
292
+ details: {
293
+ workspace: cacheInfo.workspacePath,
294
+ ageMs: age,
295
+ },
296
+ };
297
+ }
298
+
299
+ // Step 4: Empty cache
300
+ const filesIndexed = cacheInfo.meta.filesIndexed ?? 0;
301
+ const chunksStored = cacheInfo.meta.chunksStored ?? 0;
302
+
303
+ if (filesIndexed === 0 || chunksStored === 0) {
304
+ if (age > thresholds.emptyThresholdMs) {
305
+ return {
306
+ action: 'REMOVE',
307
+ reason: 'empty_cache',
308
+ details: {
309
+ filesIndexed,
310
+ chunksStored,
311
+ ageMs: age,
312
+ },
313
+ };
314
+ }
315
+ return {
316
+ action: 'KEEP',
317
+ reason: 'recent_empty',
318
+ details: {
319
+ filesIndexed,
320
+ chunksStored,
321
+ ageMs: age,
322
+ },
323
+ };
324
+ }
325
+
326
+ // Step 5: Missing workspace (non-temp workspaces only, due to Step 3 early return)
327
+ if (cacheInfo.workspacePath && !cacheInfo.workspaceExists) {
328
+ if (age > thresholds.workspaceGraceMs) {
329
+ return {
330
+ action: 'REMOVE',
331
+ reason: 'workspace_missing',
332
+ details: {
333
+ workspace: cacheInfo.workspacePath,
334
+ ageMs: age,
335
+ },
336
+ };
337
+ }
338
+ return {
339
+ action: 'KEEP',
340
+ reason: 'workspace_grace_period',
341
+ details: {
342
+ workspace: cacheInfo.workspacePath,
343
+ ageMs: age,
344
+ },
345
+ };
346
+ }
347
+
348
+ // Step 6: Stuck indexing
349
+ if (cacheInfo.progress && !hasRecentProgress(cacheInfo, thresholds.safetyWindowMs)) {
350
+ const progressAge = now - getProgressTimestamp(cacheInfo);
351
+ if (progressAge > thresholds.staleProgressMs) {
352
+ return {
353
+ action: 'REMOVE',
354
+ reason: 'stuck_indexing',
355
+ details: {
356
+ progressAgeMs: progressAge,
357
+ lastProgress: cacheInfo.progress,
358
+ },
359
+ };
360
+ }
361
+ }
362
+
363
+ // Step 7: Long unused
364
+ if (age > thresholds.maxUnusedMs) {
365
+ return {
366
+ action: 'REMOVE',
367
+ reason: 'long_unused',
368
+ details: { ageMs: age },
369
+ };
370
+ }
371
+
372
+ // Step 8: Default - keep
373
+ return {
374
+ action: 'KEEP',
375
+ reason: 'valid_cache',
376
+ details: {
377
+ filesIndexed,
378
+ chunksStored,
379
+ ageMs: age,
380
+ },
381
+ };
382
+ }
383
+
384
+ /**
385
+ * Find duplicate workspace caches
386
+ */
387
+ function findDuplicateWorkspaces(cacheInfos) {
388
+ const workspaceMap = new Map(); // normalized workspace -> [cacheInfos]
389
+
390
+ for (const info of cacheInfos) {
391
+ if (!info.workspacePathNormalized) continue;
392
+
393
+ // Key includes embedding model + dimension to avoid deleting caches for different embeddings
394
+ const dimLabel = info.meta?.embeddingDimension ?? 'default';
395
+ const key = `${info.workspacePathNormalized}::${info.meta?.embeddingModel || 'default'}::${dimLabel}`;
396
+
397
+ if (!workspaceMap.has(key)) {
398
+ workspaceMap.set(key, []);
399
+ }
400
+ workspaceMap.get(key).push(info);
401
+ }
402
+
403
+ const duplicates = [];
404
+ for (const [key, infos] of workspaceMap) {
405
+ if (infos.length > 1) {
406
+ // Sort by lastActivityMs descending
407
+ infos.sort((a, b) => b.lastActivityMs - a.lastActivityMs);
408
+
409
+ // Keep newest, mark others for removal (if not active)
410
+ for (let i = 1; i < infos.length; i++) {
411
+ if (!infos[i].isActive) {
412
+ duplicates.push({
413
+ info: infos[i],
414
+ action: 'REMOVE',
415
+ reason: 'duplicate_workspace',
416
+ details: {
417
+ newestCache: infos[0].cacheId,
418
+ workspace: key,
419
+ ageMs: Date.now() - infos[i].lastActivityMs,
420
+ },
421
+ });
422
+ }
423
+ }
424
+ }
425
+ }
426
+
427
+ return duplicates;
428
+ }
429
+
430
+ /**
431
+ * Main cache cleanup function with intelligent sanitization
432
+ */
433
+ export async function clearStaleCaches(options = {}) {
434
+ const config = {
435
+ staleNoMetaHours: 6,
436
+ emptyThresholdHours: 24,
437
+ workspaceGraceDays: 7,
438
+ maxUnusedDays: 30,
439
+ tempThresholdHours: 24,
440
+ staleProgressHours: 6,
441
+ safetyWindowMinutes: 10,
442
+ removeDuplicates: true,
443
+ dryRun: false,
444
+ logger: console,
445
+ ...options,
446
+ };
447
+
448
+ // Convert to milliseconds
449
+ const thresholds = {
450
+ staleNoMetaMs: config.staleNoMetaHours * 60 * 60 * 1000,
451
+ emptyThresholdMs: config.emptyThresholdHours * 60 * 60 * 1000,
452
+ workspaceGraceMs: config.workspaceGraceDays * 24 * 60 * 60 * 1000,
453
+ maxUnusedMs: config.maxUnusedDays * 24 * 60 * 60 * 1000,
454
+ tempThresholdMs: config.tempThresholdHours * 60 * 60 * 1000,
455
+ staleProgressMs: config.staleProgressHours * 60 * 60 * 1000,
456
+ safetyWindowMs: config.safetyWindowMinutes * 60 * 1000,
457
+ };
458
+
459
+ const globalCacheRoot = path.join(getGlobalCacheDir(), 'heuristic-mcp');
460
+ const cacheDirs = await fs.readdir(globalCacheRoot).catch(() => []);
461
+
462
+ if (cacheDirs.length === 0) {
463
+ return { removed: 0, kept: 0, dryRun: config.dryRun, decisions: [] };
464
+ }
465
+
466
+ // Step 1: Collect all cache info
467
+ const cacheInfos = await Promise.all(
468
+ cacheDirs.map((dir) => collectCacheInfo(path.join(globalCacheRoot, dir)))
469
+ );
470
+
471
+ // Step 2: Evaluate each cache individually
472
+ const decisions = cacheInfos.map((info) => {
473
+ const evaluation = evaluateCache(info, thresholds);
474
+ return {
475
+ cacheDir: info.cacheDir,
476
+ cacheId: info.cacheId,
477
+ info,
478
+ ...evaluation,
479
+ };
480
+ });
481
+
482
+ // Step 3: Find duplicates
483
+ if (config.removeDuplicates) {
484
+ const duplicates = findDuplicateWorkspaces(cacheInfos);
485
+ for (const dup of duplicates) {
486
+ // Override decision if not already marked for removal
487
+ const existing = decisions.find((d) => d.cacheId === dup.info.cacheId);
488
+ if (existing && existing.action === 'KEEP') {
489
+ existing.action = dup.action;
490
+ existing.reason = dup.reason;
491
+ existing.details = dup.details;
492
+ }
493
+ }
494
+ }
495
+
496
+ // Step 4: Execute removals
497
+ let removed = 0;
498
+ let kept = 0;
499
+
500
+ for (const decision of decisions) {
501
+ if (decision.action === 'REMOVE') {
502
+ if (!config.dryRun) {
503
+ try {
504
+ await fs.rm(decision.cacheDir, { recursive: true, force: true });
505
+ removed++;
506
+ if (config.logger) {
507
+ config.logger.info(
508
+ `[Cache] Removed ${decision.cacheId}: ${decision.reason} (${formatAge(decision.details.ageMs)})`
509
+ );
510
+ }
511
+ } catch (err) {
512
+ if (config.logger) {
513
+ config.logger.warn(
514
+ `[Cache] Failed to remove ${decision.cacheId}: ${err.message}`
515
+ );
516
+ }
517
+ // Count as kept if removal failed
518
+ kept++;
519
+ decision.action = 'KEEP';
520
+ decision.reason = 'removal_failed';
521
+ decision.details.error = err.message;
522
+ }
523
+ } else {
524
+ removed++;
525
+ if (config.logger) {
526
+ config.logger.info(
527
+ `[Cache] Would remove ${decision.cacheId}: ${decision.reason} (${formatAge(decision.details.ageMs)})`
528
+ );
529
+ }
530
+ }
531
+ } else {
532
+ kept++;
533
+ }
534
+ }
535
+
536
+ if (removed > 0 && config.logger) {
537
+ config.logger.info(
538
+ `[Cache] ${config.dryRun ? 'Would remove' : 'Removed'} ${removed} stale cache ${removed === 1 ? 'directory' : 'directories'}.`
539
+ );
540
+ }
541
+
542
+ return {
543
+ removed,
544
+ kept,
545
+ dryRun: config.dryRun,
546
+ decisions,
547
+ };
548
+ }
549
+
550
+ /**
551
+ * Format age in human-readable form
552
+ */
553
+ function formatAge(ms) {
554
+ if (!Number.isFinite(ms)) return 'unknown';
555
+
556
+ const seconds = Math.floor(ms / 1000);
557
+ const minutes = Math.floor(seconds / 60);
558
+ const hours = Math.floor(minutes / 60);
559
+ const days = Math.floor(hours / 24);
560
+
561
+ if (days > 0) return `${days}d ${hours % 24}h`;
562
+ if (hours > 0) return `${hours}h ${minutes % 60}m`;
563
+ if (minutes > 0) return `${minutes}m`;
564
+ return `${seconds}s`;
565
+ }