@softerist/heuristic-mcp 2.1.46 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. package/.agent/workflows/code-review.md +60 -0
  2. package/.prettierrc +7 -0
  3. package/ARCHITECTURE.md +105 -170
  4. package/CONTRIBUTING.md +32 -113
  5. package/GEMINI.md +73 -0
  6. package/LICENSE +21 -21
  7. package/README.md +161 -54
  8. package/config.json +876 -76
  9. package/debug-pids.js +27 -0
  10. package/eslint.config.js +36 -0
  11. package/features/ann-config.js +37 -26
  12. package/features/clear-cache.js +28 -19
  13. package/features/find-similar-code.js +142 -66
  14. package/features/hybrid-search.js +253 -93
  15. package/features/index-codebase.js +1455 -394
  16. package/features/lifecycle.js +813 -180
  17. package/features/register.js +58 -52
  18. package/index.js +450 -306
  19. package/lib/cache-ops.js +22 -0
  20. package/lib/cache-utils.js +68 -0
  21. package/lib/cache.js +1392 -587
  22. package/lib/call-graph.js +165 -50
  23. package/lib/cli.js +154 -0
  24. package/lib/config.js +462 -121
  25. package/lib/embedding-process.js +77 -0
  26. package/lib/embedding-worker.js +545 -30
  27. package/lib/ignore-patterns.js +61 -59
  28. package/lib/json-worker.js +14 -0
  29. package/lib/json-writer.js +344 -0
  30. package/lib/logging.js +88 -0
  31. package/lib/memory-logger.js +13 -0
  32. package/lib/project-detector.js +13 -17
  33. package/lib/server-lifecycle.js +38 -0
  34. package/lib/settings-editor.js +645 -0
  35. package/lib/tokenizer.js +207 -104
  36. package/lib/utils.js +273 -198
  37. package/lib/vector-store-binary.js +592 -0
  38. package/mcp_config.example.json +13 -0
  39. package/package.json +13 -2
  40. package/scripts/clear-cache.js +6 -17
  41. package/scripts/download-model.js +14 -9
  42. package/scripts/postinstall.js +5 -5
  43. package/search-configs.js +36 -0
  44. package/test/ann-config.test.js +179 -0
  45. package/test/ann-fallback.test.js +6 -6
  46. package/test/binary-store.test.js +69 -0
  47. package/test/cache-branches.test.js +120 -0
  48. package/test/cache-errors.test.js +264 -0
  49. package/test/cache-extra.test.js +300 -0
  50. package/test/cache-helpers.test.js +205 -0
  51. package/test/cache-hnsw-failure.test.js +40 -0
  52. package/test/cache-json-worker.test.js +190 -0
  53. package/test/cache-worker.test.js +102 -0
  54. package/test/cache.test.js +443 -0
  55. package/test/call-graph.test.js +103 -4
  56. package/test/clear-cache.test.js +69 -68
  57. package/test/code-review-workflow.test.js +50 -0
  58. package/test/config.test.js +418 -0
  59. package/test/coverage-gap.test.js +497 -0
  60. package/test/coverage-maximizer.test.js +236 -0
  61. package/test/debug-analysis.js +107 -0
  62. package/test/embedding-model.test.js +173 -103
  63. package/test/embedding-worker-extra.test.js +272 -0
  64. package/test/embedding-worker.test.js +158 -0
  65. package/test/features.test.js +139 -0
  66. package/test/final-boost.test.js +271 -0
  67. package/test/final-polish.test.js +183 -0
  68. package/test/final.test.js +95 -0
  69. package/test/find-similar-code.test.js +191 -0
  70. package/test/helpers.js +92 -11
  71. package/test/helpers.test.js +46 -0
  72. package/test/hybrid-search-basic.test.js +62 -0
  73. package/test/hybrid-search-branch.test.js +202 -0
  74. package/test/hybrid-search-callgraph.test.js +229 -0
  75. package/test/hybrid-search-extra.test.js +81 -0
  76. package/test/hybrid-search.test.js +484 -71
  77. package/test/index-cli.test.js +520 -0
  78. package/test/index-codebase-batch.test.js +119 -0
  79. package/test/index-codebase-branches.test.js +585 -0
  80. package/test/index-codebase-core.test.js +1032 -0
  81. package/test/index-codebase-edge-cases.test.js +254 -0
  82. package/test/index-codebase-errors.test.js +132 -0
  83. package/test/index-codebase-gap.test.js +239 -0
  84. package/test/index-codebase-lines.test.js +151 -0
  85. package/test/index-codebase-watcher.test.js +259 -0
  86. package/test/index-codebase-zone.test.js +259 -0
  87. package/test/index-codebase.test.js +371 -69
  88. package/test/index-memory.test.js +220 -0
  89. package/test/indexer-detailed.test.js +176 -0
  90. package/test/integration.test.js +148 -92
  91. package/test/json-worker.test.js +50 -0
  92. package/test/lifecycle.test.js +541 -0
  93. package/test/master.test.js +198 -0
  94. package/test/perfection.test.js +349 -0
  95. package/test/project-detector.test.js +65 -0
  96. package/test/register.test.js +262 -0
  97. package/test/tokenizer.test.js +55 -93
  98. package/test/ultra-maximizer.test.js +116 -0
  99. package/test/utils-branches.test.js +161 -0
  100. package/test/utils-extra.test.js +116 -0
  101. package/test/utils.test.js +131 -0
  102. package/test/verify_fixes.js +76 -0
  103. package/test/worker-errors.test.js +96 -0
  104. package/test/worker-init.test.js +102 -0
  105. package/test/worker_throttling.test.js +93 -0
  106. package/tools/scripts/benchmark-search.js +95 -0
  107. package/tools/scripts/cache-stats.js +71 -0
  108. package/tools/scripts/manual-search.js +34 -0
  109. package/vitest.config.js +19 -9
@@ -0,0 +1,236 @@
1
+ import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
2
+ import { CodebaseIndexer, handleToolCall } from '../features/index-codebase.js';
3
+ import fs from 'fs/promises';
4
+ import path from 'path';
5
+
6
+ // Mock dependencies
7
+ vi.mock('fs/promises');
8
+ vi.mock('../lib/call-graph.js', () => ({
9
+ extractCallData: vi.fn(),
10
+ }));
11
+ vi.mock('../lib/utils.js', async () => {
12
+ const actual = await vi.importActual('../lib/utils.js');
13
+ return {
14
+ ...actual,
15
+ hashContent: vi.fn().mockReturnValue('fixed-hash'), // predictable hash
16
+ smartChunk: actual.smartChunk,
17
+ };
18
+ });
19
+ vi.mock('worker_threads', async () => {
20
+ const { EventEmitter } = await import('events');
21
+ class Worker extends EventEmitter {
22
+ constructor() {
23
+ super();
24
+ setTimeout(() => this.emit('message', { type: 'ready' }), 1);
25
+ }
26
+ terminate() {
27
+ return Promise.resolve();
28
+ }
29
+ postMessage(msg) {
30
+ if (msg.type === 'process') {
31
+ this.emit('message', { type: 'results', results: [], batchId: msg.batchId });
32
+ }
33
+ }
34
+ }
35
+ return { Worker };
36
+ });
37
+
38
+ vi.mock('os', async () => {
39
+ return {
40
+ default: { cpus: () => [{}, {}, {}, {}] },
41
+ cpus: () => [{}, {}, {}, {}],
42
+ };
43
+ });
44
+
45
+ describe('CodebaseIndexer Coverage Maximizer', () => {
46
+ let indexer;
47
+ let config;
48
+ let cache;
49
+ let embedder;
50
+ let extractCallDataMock;
51
+
52
+ beforeEach(async () => {
53
+ const callGraph = await import('../lib/call-graph.js');
54
+ extractCallDataMock = callGraph.extractCallData;
55
+ extractCallDataMock.mockReturnValue({}); // Default success
56
+
57
+ config = {
58
+ workerThreads: 2,
59
+ verbose: true, // Important for logging branches
60
+ embeddingModel: 'test-model',
61
+ searchDirectory: '/test',
62
+ maxFileSize: 100,
63
+ fileExtensions: ['js'],
64
+ excludePatterns: [],
65
+ callGraphEnabled: true,
66
+ };
67
+
68
+ const cacheMock = {
69
+ save: vi.fn(),
70
+ getVectorStore: vi.fn().mockReturnValue([]),
71
+ setVectorStore: vi.fn(),
72
+ reset: vi.fn(),
73
+ fileHashes: new Map(),
74
+ fileCallData: new Map(),
75
+ getFileHash: vi.fn(),
76
+ setFileHash: vi.fn(),
77
+ removeFileFromStore: vi.fn(),
78
+ addToStore: vi.fn(),
79
+ setFileCallData: vi.fn(),
80
+ setFileCallDataEntries: vi.fn((entries) => {
81
+ if (entries instanceof Map) {
82
+ cacheMock.fileCallData = entries;
83
+ } else {
84
+ cacheMock.fileCallData = new Map(Object.entries(entries || {}));
85
+ }
86
+ }),
87
+ clearFileCallData: vi.fn(() => {
88
+ cacheMock.fileCallData = new Map();
89
+ }),
90
+ clearCallGraphData: vi.fn(),
91
+ pruneCallGraphData: vi.fn().mockReturnValue(5), // Cover line 612 (if > 0)
92
+ rebuildCallGraph: vi.fn(),
93
+ ensureAnnIndex: vi.fn().mockResolvedValue(),
94
+ deleteFileHash: vi.fn(),
95
+ setLastIndexDuration: vi.fn(),
96
+ setLastIndexStats: vi.fn(),
97
+ setFileHashes: vi.fn((map) => { cacheMock.fileHashes = map; }),
98
+ getFileHashKeys: vi.fn().mockImplementation(() => [...cacheMock.fileHashes.keys()]),
99
+ getFileCallDataKeys: vi.fn().mockImplementation(() => [...cacheMock.fileCallData.keys()]),
100
+ getFileMeta: vi.fn(),
101
+ };
102
+ cache = cacheMock;
103
+
104
+ embedder = vi.fn().mockResolvedValue({ data: [] });
105
+
106
+ indexer = new CodebaseIndexer(embedder, cache, config);
107
+ // Mock discoverFiles to control input
108
+ indexer.discoverFiles = vi.fn().mockResolvedValue(['/test/file1.js']);
109
+ });
110
+
111
+ afterEach(() => {
112
+ vi.restoreAllMocks();
113
+ });
114
+
115
+ it('Line 146: Worker initialization failure catch block', async () => {
116
+ // Actually, let's verify line 343: indexFile error handling.
117
+ const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {});
118
+ const errorSpy = vi.spyOn(console, 'error').mockImplementation(() => {});
119
+ vi.spyOn(fs, 'stat').mockRejectedValue(new Error('Stat failed'));
120
+
121
+ await indexer.indexFile('/test/bad.js');
122
+
123
+ // Expect 2 args
124
+ expect(warnSpy.mock.calls.length + errorSpy.mock.calls.length).toBeGreaterThan(0);
125
+ });
126
+
127
+ it('Line 357 & 362: indexFile size and directory checks', async () => {
128
+ const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {});
129
+ vi.spyOn(console, 'error').mockImplementation(() => {});
130
+
131
+ // 357: isDirectory
132
+ vi.spyOn(fs, 'stat').mockResolvedValue({
133
+ isDirectory: () => true,
134
+ size: 50,
135
+ });
136
+ await indexer.indexFile('/test/dir');
137
+
138
+ // 362: maxFileSize
139
+ vi.spyOn(fs, 'stat').mockResolvedValue({
140
+ isDirectory: () => false,
141
+ size: 1000, // > 100
142
+ });
143
+ await indexer.indexFile('/test/large.js');
144
+
145
+ expect(warnSpy).toHaveBeenCalledWith(expect.stringContaining('too large'));
146
+ });
147
+
148
+ it('Lines 515-516: preFilterFiles error handling', async () => {
149
+ vi.spyOn(fs, 'stat').mockRejectedValue(new Error('PreFilter Fail'));
150
+
151
+ // Trigger preFilterFiles via indexAll or directly
152
+ const files = ['/test/bad.js'];
153
+ const results = await indexer.preFilterFiles(files);
154
+
155
+ // Should catch error and return null (filtered out)
156
+ expect(results.length).toBe(0);
157
+ });
158
+
159
+ it('Lines 603 & 612: indexAll pruning branches', async () => {
160
+ const consoleSpy = vi.spyOn(console, 'info').mockImplementation(() => {});
161
+
162
+ // Mock cached files that are NOT in discovered files
163
+ cache.setFileHashes(new Map([['/test/deleted.js', 'hash']]));
164
+ cache.setFileCallDataEntries(new Map([['/test/deleted.js', {}]]));
165
+
166
+ // discoverFiles returns ["/test/file1.js"] (mocked in beforeEach)
167
+
168
+ await indexer.indexAll(false); // force=false to enable pruning
169
+
170
+ expect(cache.removeFileFromStore).toHaveBeenCalledWith('/test/deleted.js');
171
+ expect(consoleSpy).toHaveBeenCalledWith(
172
+ expect.stringContaining('Pruned 1 deleted/excluded files')
173
+ );
174
+ expect(consoleSpy).toHaveBeenCalledWith(expect.stringContaining('Pruned 5 call-graph entries'));
175
+ });
176
+
177
+ it('Line 662: indexAll missing call graph data re-indexing', async () => {
178
+ const consoleSpy = vi.spyOn(console, 'info').mockImplementation(() => {});
179
+
180
+ // Setup state
181
+ cache.getVectorStore.mockReturnValue([{ file: '/test/file1.js' }]);
182
+ cache.clearFileCallData(); // Empty, so file1.js is missing data
183
+ // Use fixed-hash to match mock
184
+ cache.setFileHashes(new Map([['/test/file1.js', 'fixed-hash']]));
185
+ cache.getFileHash.mockReturnValue('fixed-hash');
186
+ cache.getFileMeta.mockReturnValue({ mtimeMs: 123, size: 50 });
187
+
188
+ // Mock fs for re-indexing check
189
+ vi.spyOn(fs, 'stat').mockResolvedValue({
190
+ isDirectory: () => false,
191
+ size: 50,
192
+ mtimeMs: 123,
193
+ });
194
+ vi.spyOn(fs, 'readFile').mockResolvedValue('content');
195
+
196
+ await indexer.indexAll(false);
197
+
198
+ expect(consoleSpy).toHaveBeenCalledWith(expect.stringContaining('missing call graph data'));
199
+ // Should have processed file1.js
200
+ expect(cache.setFileCallData).toHaveBeenCalled();
201
+ });
202
+
203
+ it('Line 746 & 773: indexAll loop and call graph extraction error', async () => {
204
+ // Force extractCallData to throw
205
+ extractCallDataMock.mockImplementation(() => {
206
+ throw new Error('Parse Error');
207
+ });
208
+
209
+ // Must ensure filesToProcess is NOT empty
210
+ vi.spyOn(fs, 'stat').mockResolvedValue({ isDirectory: () => false, size: 50, mtimeMs: 123 });
211
+ vi.spyOn(fs, 'readFile').mockResolvedValue('content');
212
+ // Ensure hash mismatch so it processes
213
+ cache.getFileHash.mockReturnValue('old-hash');
214
+
215
+ await indexer.indexAll(true);
216
+
217
+ expect(extractCallDataMock).toHaveBeenCalled();
218
+ expect(cache.setFileCallData).not.toHaveBeenCalled();
219
+ });
220
+
221
+ it('Line 992: handleToolCall stats', async () => {
222
+ const request = { params: { arguments: { force: true } } };
223
+
224
+ // Mock indexAll to return specific stats
225
+ indexer.indexAll = vi.fn().mockResolvedValue({
226
+ skipped: false,
227
+ filesProcessed: 5,
228
+ chunksCreated: 10,
229
+ totalFiles: 5,
230
+ totalChunks: 10,
231
+ });
232
+
233
+ const result = await handleToolCall(request, indexer);
234
+ expect(result.content[0].text).toContain('Files processed this run: 5');
235
+ });
236
+ });
@@ -0,0 +1,107 @@
1
+
2
+ import { smartChunk } from './lib/utils.js';
3
+
4
+ const mockConfig = { embeddingModel: 'mock-model' };
5
+
6
+ // Mock tokenizer
7
+ import { vi } from 'vitest';
8
+ const estimateTokens = (str) => str.length;
9
+ const getChunkingParams = () => ({
10
+ maxTokens: 50,
11
+ targetTokens: 30,
12
+ overlapTokens: 5
13
+ });
14
+
15
+ // Mocking dependencies manually since we are running with node directly
16
+ // We need to overwrite the imports in utils.js or mock them.
17
+ // Since utils.js imports from tokenizer.js, we can't easily mock that with just node unless we use a loader or modify utils.js.
18
+ // So instead, let's create a temporary modified version of utils.js or just run the test file with console.info and capture output.
19
+
20
+ // Actually, I can use the existing test file but add logging there and run with `npm test ...` and look closer at output?
21
+ // The previous run captured stderr, but maybe I missed it?
22
+ // The output showed "Failed Tests 1", but no console.error output from my previous change.
23
+
24
+ // Wait, I see "stderr | test/index-codebase-phase2.test.js" in previous logs, but not for utils-branches.test.js.
25
+ // Vitest might suppress console output if test fails? Or implies it.
26
+
27
+ // Let's rely on reading the code again manually.
28
+
29
+ // Code:
30
+ /*
31
+ 196: if (inComment) {
32
+ 197: // Look for end of block comment
33
+ 198: if (line.includes('*\u002f')) {
34
+ ...
35
+ 200: // If there's content after the comment, process it (simplified)
36
+ 201: if (parts[parts.length - 1].trim().length > 0) {
37
+ 202: inComment = false;
38
+ 204: // We just assume the line is mixed and skip granular checks
39
+ 205: } else {
40
+ 206: inComment = false;
41
+ 207: }
42
+ 208: }
43
+ 209: }
44
+ */
45
+
46
+ // If `inComment` is true and line does NOT include `*/`, it goes to... nowhere?
47
+ // line 209 ends the `if (inComment)` block.
48
+ // Then line 251: `// Split lines that are too large...`
49
+ // Then line 336: `currentChunk.push(line);`
50
+
51
+ // Wait. If `inComment` is true, we just skip the character analysis (lines 210-249).
52
+ // We DO fall through to `currentChunk.push(line)` (line 336).
53
+
54
+ // So "middle line" SHOULD be added to `currentChunk`.
55
+
56
+ // Why is it not in the output?
57
+ // Maybe `estimateTokens` is returning 0 or small number, and it gets flushed/dropped?
58
+ // `smartChunk` calls `estimateTokens(line)` (line 192).
59
+
60
+ // In my test: `expect(chunks[0].text).toContain('middle line');`
61
+ // `chunks` length IS > 0 (checked).
62
+ // But text doesn't contain it.
63
+
64
+ // Maybe it was put in a chunk that was then dropped?
65
+ // `chunkText.trim().length > 20` check?
66
+ // content was `/*\n middle line \n*/\n` + "x".repeat(40)
67
+ // Line 1: `/*` -> pushed.
68
+ // Line 2: ` middle line ` -> pushed.
69
+ // Line 3: `*/` -> pushed.
70
+ // Line 4: `xxxxxxxx...` -> pushed?
71
+
72
+ // Wait, line tokens.
73
+ // "middle line" has spaces. `trim()` length is ~11 chars.
74
+ // If it's pushed to `currentChunk`.
75
+ // Then we hit the oversized line (x*40).
76
+ // Line 252: `if (lineTokens > maxTokens)`
77
+ // x*40 is 40 tokens. maxTokens is 50. So it is NOT oversized.
78
+
79
+ // Wait, input setup: `const content = "/*\n middle line \n*/\n" + "x".repeat(40);`
80
+ // Line 4 is "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" (40 chars).
81
+
82
+ // currentChunk has: "/*", " middle line ", "*/", "xxxx..."
83
+ // Total tokens?
84
+ // 2 + 13 + 2 + 40 = 57.
85
+ // targetTokens is 30.
86
+ // So `wouldExceedLimit` (line 283) might be true?
87
+
88
+ // Line 283: `currentChunk` (17 tokens) + `line` (40 tokens) = 57 > 30.
89
+ // `wouldExceedLimit` = true.
90
+
91
+ // `shouldSplit` = true (line 301).
92
+ // `safeToSplit` = true (line 305).
93
+
94
+ // Line 307: `if (shouldSplit && safeToSplit && currentChunk.length > 0)`
95
+ // -> Flush currentChunk ("/*", " middle line ", "*/").
96
+ // -> Text: "/*\n middle line \n*/". Length ~ 20.
97
+
98
+ // "/*" (2) + "\n" (1) + " middle line " (13) + "\n" (1) + "*/" (2) = 19 chars.
99
+ // 20 chars? "/*\n middle line \n*/" has length 2 + 1 + 13 + 1 + 2 = 19.
100
+ // `chunkText.trim().length > 20` (line 255/309) -> 19 <= 20 -> FALSE.
101
+ // CHUNK DROPPED!
102
+
103
+ // That explains it. The chunk containing the comment is being dropped because it's too small.
104
+
105
+ // Fix: Make the comment content longer!
106
+
107
+ console.info('Analysis complete: Middle line chunk is dropped because total size is < 20 chars.');