@softerist/heuristic-mcp 2.1.47 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agent/workflows/code-review.md +60 -0
- package/.prettierrc +7 -0
- package/ARCHITECTURE.md +105 -170
- package/CONTRIBUTING.md +32 -113
- package/GEMINI.md +73 -0
- package/LICENSE +21 -21
- package/README.md +161 -54
- package/config.json +876 -75
- package/debug-pids.js +27 -0
- package/eslint.config.js +36 -0
- package/features/ann-config.js +37 -26
- package/features/clear-cache.js +28 -19
- package/features/find-similar-code.js +142 -66
- package/features/hybrid-search.js +253 -93
- package/features/index-codebase.js +1455 -394
- package/features/lifecycle.js +813 -180
- package/features/register.js +58 -52
- package/index.js +450 -306
- package/lib/cache-ops.js +22 -0
- package/lib/cache-utils.js +68 -0
- package/lib/cache.js +1392 -587
- package/lib/call-graph.js +165 -50
- package/lib/cli.js +154 -0
- package/lib/config.js +462 -121
- package/lib/embedding-process.js +77 -0
- package/lib/embedding-worker.js +545 -30
- package/lib/ignore-patterns.js +61 -59
- package/lib/json-worker.js +14 -0
- package/lib/json-writer.js +344 -0
- package/lib/logging.js +88 -0
- package/lib/memory-logger.js +13 -0
- package/lib/project-detector.js +13 -17
- package/lib/server-lifecycle.js +38 -0
- package/lib/settings-editor.js +645 -0
- package/lib/tokenizer.js +207 -104
- package/lib/utils.js +273 -198
- package/lib/vector-store-binary.js +592 -0
- package/mcp_config.example.json +13 -0
- package/package.json +13 -2
- package/scripts/clear-cache.js +6 -17
- package/scripts/download-model.js +14 -9
- package/scripts/postinstall.js +5 -5
- package/search-configs.js +36 -0
- package/test/ann-config.test.js +179 -0
- package/test/ann-fallback.test.js +6 -6
- package/test/binary-store.test.js +69 -0
- package/test/cache-branches.test.js +120 -0
- package/test/cache-errors.test.js +264 -0
- package/test/cache-extra.test.js +300 -0
- package/test/cache-helpers.test.js +205 -0
- package/test/cache-hnsw-failure.test.js +40 -0
- package/test/cache-json-worker.test.js +190 -0
- package/test/cache-worker.test.js +102 -0
- package/test/cache.test.js +443 -0
- package/test/call-graph.test.js +103 -4
- package/test/clear-cache.test.js +69 -68
- package/test/code-review-workflow.test.js +50 -0
- package/test/config.test.js +418 -0
- package/test/coverage-gap.test.js +497 -0
- package/test/coverage-maximizer.test.js +236 -0
- package/test/debug-analysis.js +107 -0
- package/test/embedding-model.test.js +173 -103
- package/test/embedding-worker-extra.test.js +272 -0
- package/test/embedding-worker.test.js +158 -0
- package/test/features.test.js +139 -0
- package/test/final-boost.test.js +271 -0
- package/test/final-polish.test.js +183 -0
- package/test/final.test.js +95 -0
- package/test/find-similar-code.test.js +191 -0
- package/test/helpers.js +92 -11
- package/test/helpers.test.js +46 -0
- package/test/hybrid-search-basic.test.js +62 -0
- package/test/hybrid-search-branch.test.js +202 -0
- package/test/hybrid-search-callgraph.test.js +229 -0
- package/test/hybrid-search-extra.test.js +81 -0
- package/test/hybrid-search.test.js +484 -71
- package/test/index-cli.test.js +520 -0
- package/test/index-codebase-batch.test.js +119 -0
- package/test/index-codebase-branches.test.js +585 -0
- package/test/index-codebase-core.test.js +1032 -0
- package/test/index-codebase-edge-cases.test.js +254 -0
- package/test/index-codebase-errors.test.js +132 -0
- package/test/index-codebase-gap.test.js +239 -0
- package/test/index-codebase-lines.test.js +151 -0
- package/test/index-codebase-watcher.test.js +259 -0
- package/test/index-codebase-zone.test.js +259 -0
- package/test/index-codebase.test.js +371 -69
- package/test/index-memory.test.js +220 -0
- package/test/indexer-detailed.test.js +176 -0
- package/test/integration.test.js +148 -92
- package/test/json-worker.test.js +50 -0
- package/test/lifecycle.test.js +541 -0
- package/test/master.test.js +198 -0
- package/test/perfection.test.js +349 -0
- package/test/project-detector.test.js +65 -0
- package/test/register.test.js +262 -0
- package/test/tokenizer.test.js +55 -93
- package/test/ultra-maximizer.test.js +116 -0
- package/test/utils-branches.test.js +161 -0
- package/test/utils-extra.test.js +116 -0
- package/test/utils.test.js +131 -0
- package/test/verify_fixes.js +76 -0
- package/test/worker-errors.test.js +96 -0
- package/test/worker-init.test.js +102 -0
- package/test/worker_throttling.test.js +93 -0
- package/tools/scripts/benchmark-search.js +95 -0
- package/tools/scripts/cache-stats.js +71 -0
- package/tools/scripts/manual-search.js +34 -0
- package/vitest.config.js +19 -9
|
@@ -0,0 +1,262 @@
|
|
|
1
|
+
// Note: This test file cannot achieve 100% coverage because some code paths
|
|
2
|
+
// in register.js are specific to Windows, macOS, and Linux. The tests are
|
|
3
|
+
// running on a single platform, so the other platform-specific code is not
|
|
4
|
+
// executed.
|
|
5
|
+
|
|
6
|
+
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
|
7
|
+
|
|
8
|
+
const fsPromisesMock = {};
|
|
9
|
+
const fsMock = {};
|
|
10
|
+
|
|
11
|
+
vi.mock('fs/promises', () => ({ default: fsPromisesMock }));
|
|
12
|
+
vi.mock('fs', () => fsMock);
|
|
13
|
+
|
|
14
|
+
const setPlatform = (value) => {
|
|
15
|
+
Object.defineProperty(process, 'platform', { value, configurable: true });
|
|
16
|
+
};
|
|
17
|
+
|
|
18
|
+
describe('register', () => {
|
|
19
|
+
const originalEnv = { ...process.env };
|
|
20
|
+
const originalPlatform = process.platform;
|
|
21
|
+
let consoleError;
|
|
22
|
+
|
|
23
|
+
beforeEach(() => {
|
|
24
|
+
fsPromisesMock.access = vi.fn();
|
|
25
|
+
fsPromisesMock.mkdir = vi.fn();
|
|
26
|
+
fsPromisesMock.readFile = vi.fn();
|
|
27
|
+
fsPromisesMock.writeFileSync = vi.fn();
|
|
28
|
+
fsMock.writeFileSync = vi.fn();
|
|
29
|
+
fsMock.existsSync = vi.fn();
|
|
30
|
+
fsMock.statSync = vi.fn();
|
|
31
|
+
consoleError = vi.spyOn(console, 'error').mockImplementation(() => {});
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
afterEach(() => {
|
|
35
|
+
process.env = { ...originalEnv };
|
|
36
|
+
setPlatform(originalPlatform);
|
|
37
|
+
consoleError.mockRestore();
|
|
38
|
+
vi.resetModules();
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
it('creates a config when Antigravity is detected', async () => {
|
|
42
|
+
process.env.ANTIGRAVITY_AGENT = '1';
|
|
43
|
+
setPlatform('win32');
|
|
44
|
+
fsPromisesMock.access.mockRejectedValue(new Error('missing'));
|
|
45
|
+
fsPromisesMock.mkdir.mockResolvedValue();
|
|
46
|
+
|
|
47
|
+
const { register } = await import('../features/register.js');
|
|
48
|
+
|
|
49
|
+
await register();
|
|
50
|
+
|
|
51
|
+
expect(fsPromisesMock.mkdir).toHaveBeenCalled();
|
|
52
|
+
expect(fsMock.writeFileSync).toHaveBeenCalled();
|
|
53
|
+
});
|
|
54
|
+
|
|
55
|
+
it('prints manual config when no IDE configs are writable', async () => {
|
|
56
|
+
delete process.env.ANTIGRAVITY_AGENT;
|
|
57
|
+
delete process.env.CURSOR_AGENT;
|
|
58
|
+
setPlatform('win32');
|
|
59
|
+
fsMock.existsSync.mockReturnValue(false);
|
|
60
|
+
fsMock.statSync.mockImplementation(() => {
|
|
61
|
+
throw new Error('missing');
|
|
62
|
+
});
|
|
63
|
+
fsPromisesMock.access.mockRejectedValue(new Error('missing'));
|
|
64
|
+
|
|
65
|
+
const { register } = await import('../features/register.js');
|
|
66
|
+
|
|
67
|
+
await register();
|
|
68
|
+
|
|
69
|
+
expect(consoleError).toHaveBeenCalledWith(expect.stringContaining('Manual Config'));
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
it('detects Antigravity via fallback directory check', async () => {
|
|
73
|
+
delete process.env.ANTIGRAVITY_AGENT;
|
|
74
|
+
delete process.env.CURSOR_AGENT;
|
|
75
|
+
setPlatform('linux');
|
|
76
|
+
fsMock.existsSync.mockReturnValue(true);
|
|
77
|
+
fsMock.statSync.mockReturnValue({ isDirectory: () => true });
|
|
78
|
+
fsPromisesMock.access.mockRejectedValue(new Error('missing'));
|
|
79
|
+
fsPromisesMock.mkdir.mockResolvedValue();
|
|
80
|
+
|
|
81
|
+
const { register } = await import('../features/register.js');
|
|
82
|
+
|
|
83
|
+
await register();
|
|
84
|
+
|
|
85
|
+
expect(fsPromisesMock.mkdir).toHaveBeenCalled();
|
|
86
|
+
expect(fsMock.writeFileSync).toHaveBeenCalled();
|
|
87
|
+
});
|
|
88
|
+
|
|
89
|
+
it('uses darwin config paths for Claude Desktop and Cursor', async () => {
|
|
90
|
+
process.env.ANTIGRAVITY_AGENT = '1';
|
|
91
|
+
setPlatform('darwin');
|
|
92
|
+
fsPromisesMock.access.mockRejectedValue(new Error('missing'));
|
|
93
|
+
fsPromisesMock.mkdir.mockResolvedValue();
|
|
94
|
+
|
|
95
|
+
const { register } = await import('../features/register.js');
|
|
96
|
+
|
|
97
|
+
await register();
|
|
98
|
+
|
|
99
|
+
expect(fsPromisesMock.mkdir).toHaveBeenCalled();
|
|
100
|
+
expect(fsMock.writeFileSync).toHaveBeenCalled();
|
|
101
|
+
});
|
|
102
|
+
|
|
103
|
+
it('handles corrupt config files gracefully', async () => {
|
|
104
|
+
delete process.env.ANTIGRAVITY_AGENT;
|
|
105
|
+
process.env.CURSOR_AGENT = '1';
|
|
106
|
+
setPlatform('win32');
|
|
107
|
+
fsPromisesMock.access.mockResolvedValue();
|
|
108
|
+
fsPromisesMock.readFile.mockResolvedValue('{not-json');
|
|
109
|
+
|
|
110
|
+
const { register } = await import('../features/register.js');
|
|
111
|
+
|
|
112
|
+
await register();
|
|
113
|
+
|
|
114
|
+
expect(consoleError).toHaveBeenCalledWith(expect.stringContaining('Warning'));
|
|
115
|
+
expect(fsMock.writeFileSync).not.toHaveBeenCalled();
|
|
116
|
+
});
|
|
117
|
+
|
|
118
|
+
it('handles empty config files as new', async () => {
|
|
119
|
+
delete process.env.ANTIGRAVITY_AGENT;
|
|
120
|
+
process.env.CURSOR_AGENT = '1';
|
|
121
|
+
setPlatform('win32');
|
|
122
|
+
fsPromisesMock.access.mockResolvedValue();
|
|
123
|
+
fsPromisesMock.readFile.mockResolvedValue(' ');
|
|
124
|
+
|
|
125
|
+
const { register } = await import('../features/register.js');
|
|
126
|
+
|
|
127
|
+
await register();
|
|
128
|
+
|
|
129
|
+
expect(fsMock.writeFileSync).toHaveBeenCalled();
|
|
130
|
+
});
|
|
131
|
+
|
|
132
|
+
it('skips non-matching IDEs when filter is provided', async () => {
|
|
133
|
+
process.env.ANTIGRAVITY_AGENT = '1';
|
|
134
|
+
setPlatform('win32');
|
|
135
|
+
fsPromisesMock.access.mockRejectedValue(new Error('missing'));
|
|
136
|
+
fsPromisesMock.mkdir.mockResolvedValue();
|
|
137
|
+
|
|
138
|
+
const { register } = await import('../features/register.js');
|
|
139
|
+
|
|
140
|
+
await register('cursor');
|
|
141
|
+
|
|
142
|
+
expect(fsMock.writeFileSync).not.toHaveBeenCalled();
|
|
143
|
+
});
|
|
144
|
+
|
|
145
|
+
it('logs when config directory cannot be created', async () => {
|
|
146
|
+
process.env.ANTIGRAVITY_AGENT = '1';
|
|
147
|
+
setPlatform('win32');
|
|
148
|
+
fsPromisesMock.access.mockRejectedValue(new Error('missing'));
|
|
149
|
+
fsPromisesMock.mkdir.mockRejectedValue(new Error('nope'));
|
|
150
|
+
|
|
151
|
+
const { register } = await import('../features/register.js');
|
|
152
|
+
|
|
153
|
+
await register();
|
|
154
|
+
|
|
155
|
+
expect(consoleError).toHaveBeenCalledWith(
|
|
156
|
+
expect.stringContaining('Cannot create config directory')
|
|
157
|
+
);
|
|
158
|
+
});
|
|
159
|
+
|
|
160
|
+
it('logs registration failures when writing config fails', async () => {
|
|
161
|
+
process.env.ANTIGRAVITY_AGENT = '1';
|
|
162
|
+
setPlatform('win32');
|
|
163
|
+
fsPromisesMock.access.mockResolvedValue();
|
|
164
|
+
fsPromisesMock.readFile.mockResolvedValue('{}');
|
|
165
|
+
fsMock.writeFileSync.mockImplementation(() => {
|
|
166
|
+
throw new Error('write failed');
|
|
167
|
+
});
|
|
168
|
+
|
|
169
|
+
const { register } = await import('../features/register.js');
|
|
170
|
+
|
|
171
|
+
await register();
|
|
172
|
+
|
|
173
|
+
expect(consoleError).toHaveBeenCalledWith(expect.stringContaining('Failed to register'));
|
|
174
|
+
});
|
|
175
|
+
|
|
176
|
+
it('registers config on non-win32 platforms', async () => {
|
|
177
|
+
process.env.ANTIGRAVITY_AGENT = '1';
|
|
178
|
+
setPlatform('linux');
|
|
179
|
+
fsPromisesMock.access.mockRejectedValue(new Error('missing'));
|
|
180
|
+
fsPromisesMock.mkdir.mockResolvedValue();
|
|
181
|
+
|
|
182
|
+
const { register } = await import('../features/register.js');
|
|
183
|
+
|
|
184
|
+
await register();
|
|
185
|
+
|
|
186
|
+
expect(fsMock.writeFileSync).toHaveBeenCalled();
|
|
187
|
+
});
|
|
188
|
+
|
|
189
|
+
it('falls back to console.error when tty logging fails', async () => {
|
|
190
|
+
process.env.ANTIGRAVITY_AGENT = '1';
|
|
191
|
+
setPlatform('linux');
|
|
192
|
+
fsPromisesMock.access.mockRejectedValue(new Error('missing'));
|
|
193
|
+
fsPromisesMock.mkdir.mockResolvedValue();
|
|
194
|
+
fsMock.writeFileSync.mockImplementation((targetPath) => {
|
|
195
|
+
if (targetPath === '/dev/tty') {
|
|
196
|
+
throw new Error('tty denied');
|
|
197
|
+
}
|
|
198
|
+
});
|
|
199
|
+
|
|
200
|
+
const { register } = await import('../features/register.js');
|
|
201
|
+
|
|
202
|
+
await register();
|
|
203
|
+
|
|
204
|
+
expect(consoleError).toHaveBeenCalled();
|
|
205
|
+
});
|
|
206
|
+
|
|
207
|
+
it('handles missing APPDATA on Windows', async () => {
|
|
208
|
+
process.env.ANTIGRAVITY_AGENT = '1';
|
|
209
|
+
setPlatform('win32');
|
|
210
|
+
delete process.env.APPDATA;
|
|
211
|
+
fsPromisesMock.access.mockRejectedValue(new Error('missing'));
|
|
212
|
+
fsPromisesMock.mkdir.mockResolvedValue();
|
|
213
|
+
|
|
214
|
+
const { register } = await import('../features/register.js');
|
|
215
|
+
await register();
|
|
216
|
+
|
|
217
|
+
expect(fsMock.writeFileSync).toHaveBeenCalled();
|
|
218
|
+
});
|
|
219
|
+
|
|
220
|
+
it('handles missing INIT_CWD for Antigravity', async () => {
|
|
221
|
+
process.env.ANTIGRAVITY_AGENT = '1';
|
|
222
|
+
setPlatform('win32');
|
|
223
|
+
delete process.env.INIT_CWD;
|
|
224
|
+
fsPromisesMock.access.mockResolvedValue();
|
|
225
|
+
fsPromisesMock.readFile.mockResolvedValue('{}');
|
|
226
|
+
|
|
227
|
+
const { register } = await import('../features/register.js');
|
|
228
|
+
await register();
|
|
229
|
+
|
|
230
|
+
expect(fsMock.writeFileSync).toHaveBeenCalled();
|
|
231
|
+
});
|
|
232
|
+
|
|
233
|
+
it('handles existing mcpServers object', async () => {
|
|
234
|
+
delete process.env.ANTIGRAVITY_AGENT;
|
|
235
|
+
process.env.CURSOR_AGENT = '1';
|
|
236
|
+
setPlatform('win32');
|
|
237
|
+
fsPromisesMock.access.mockResolvedValue();
|
|
238
|
+
fsPromisesMock.readFile.mockResolvedValue(JSON.stringify({ mcpServers: { other: {} } }));
|
|
239
|
+
|
|
240
|
+
const { register } = await import('../features/register.js');
|
|
241
|
+
await register();
|
|
242
|
+
|
|
243
|
+
const written = JSON.parse(fsMock.writeFileSync.mock.calls[0][1]);
|
|
244
|
+
expect(written.mcpServers.other).toBeDefined();
|
|
245
|
+
expect(written.mcpServers['heuristic-mcp']).toBeDefined();
|
|
246
|
+
});
|
|
247
|
+
|
|
248
|
+
it('handles missing LOCALAPPDATA on Windows', async () => {
|
|
249
|
+
// This targets line 205
|
|
250
|
+
process.env.ANTIGRAVITY_AGENT = '1';
|
|
251
|
+
setPlatform('win32');
|
|
252
|
+
delete process.env.LOCALAPPDATA;
|
|
253
|
+
// Ensure registerCount > 0 to hit the block
|
|
254
|
+
fsPromisesMock.access.mockResolvedValue();
|
|
255
|
+
fsPromisesMock.readFile.mockResolvedValue('{}');
|
|
256
|
+
|
|
257
|
+
const { register } = await import('../features/register.js');
|
|
258
|
+
await register();
|
|
259
|
+
|
|
260
|
+
expect(fsMock.writeFileSync).toHaveBeenCalled();
|
|
261
|
+
});
|
|
262
|
+
});
|
package/test/tokenizer.test.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Tests for Tokenizer utilities
|
|
3
|
-
*
|
|
3
|
+
*
|
|
4
4
|
* Tests the token estimation and model-specific limits including:
|
|
5
5
|
* - Token estimation for various text types
|
|
6
6
|
* - Model token limits lookup
|
|
@@ -9,12 +9,11 @@
|
|
|
9
9
|
*/
|
|
10
10
|
|
|
11
11
|
import { describe, it, expect } from 'vitest';
|
|
12
|
-
import {
|
|
13
|
-
estimateTokens,
|
|
14
|
-
getModelTokenLimit,
|
|
12
|
+
import {
|
|
13
|
+
estimateTokens,
|
|
14
|
+
getModelTokenLimit,
|
|
15
15
|
getChunkingParams,
|
|
16
|
-
|
|
17
|
-
MODEL_TOKEN_LIMITS
|
|
16
|
+
MODEL_TOKEN_LIMITS,
|
|
18
17
|
} from '../lib/tokenizer.js';
|
|
19
18
|
|
|
20
19
|
describe('Token Estimation', () => {
|
|
@@ -24,30 +23,30 @@ describe('Token Estimation', () => {
|
|
|
24
23
|
expect(estimateTokens(null)).toBe(0);
|
|
25
24
|
expect(estimateTokens(undefined)).toBe(0);
|
|
26
25
|
});
|
|
27
|
-
|
|
26
|
+
|
|
28
27
|
it('should count simple words correctly', () => {
|
|
29
28
|
// Simple words get ~1 token each + 2 for CLS/SEP
|
|
30
29
|
const result = estimateTokens('hello world');
|
|
31
30
|
expect(result).toBeGreaterThanOrEqual(4); // 2 words + 2 special tokens
|
|
32
31
|
expect(result).toBeLessThanOrEqual(6);
|
|
33
32
|
});
|
|
34
|
-
|
|
33
|
+
|
|
35
34
|
it('should add extra tokens for long words', () => {
|
|
36
35
|
const shortWord = estimateTokens('cat');
|
|
37
36
|
const longWord = estimateTokens('internationalization');
|
|
38
|
-
|
|
37
|
+
|
|
39
38
|
// Long words should have more tokens due to subword splitting
|
|
40
39
|
expect(longWord).toBeGreaterThan(shortWord);
|
|
41
40
|
});
|
|
42
|
-
|
|
41
|
+
|
|
43
42
|
it('should count special characters', () => {
|
|
44
43
|
const withoutSpecial = estimateTokens('hello world');
|
|
45
44
|
const withSpecial = estimateTokens('hello(); world{}');
|
|
46
|
-
|
|
45
|
+
|
|
47
46
|
// Special characters add to token count
|
|
48
47
|
expect(withSpecial).toBeGreaterThan(withoutSpecial);
|
|
49
48
|
});
|
|
50
|
-
|
|
49
|
+
|
|
51
50
|
it('should handle code snippets', () => {
|
|
52
51
|
const code = `
|
|
53
52
|
function test() {
|
|
@@ -55,18 +54,18 @@ describe('Token Estimation', () => {
|
|
|
55
54
|
return x * 2;
|
|
56
55
|
}
|
|
57
56
|
`;
|
|
58
|
-
|
|
57
|
+
|
|
59
58
|
const tokens = estimateTokens(code);
|
|
60
|
-
|
|
59
|
+
|
|
61
60
|
// Code has many special chars, should have reasonable token count
|
|
62
61
|
expect(tokens).toBeGreaterThan(10);
|
|
63
62
|
expect(tokens).toBeLessThan(100);
|
|
64
63
|
});
|
|
65
|
-
|
|
64
|
+
|
|
66
65
|
it('should handle multiline text', () => {
|
|
67
66
|
const multiline = 'line one\nline two\nline three';
|
|
68
67
|
const tokens = estimateTokens(multiline);
|
|
69
|
-
|
|
68
|
+
|
|
70
69
|
expect(tokens).toBeGreaterThan(5);
|
|
71
70
|
});
|
|
72
71
|
});
|
|
@@ -76,117 +75,80 @@ describe('Model Token Limits', () => {
|
|
|
76
75
|
describe('MODEL_TOKEN_LIMITS', () => {
|
|
77
76
|
it('should have default limit', () => {
|
|
78
77
|
expect(MODEL_TOKEN_LIMITS['default']).toBeDefined();
|
|
79
|
-
expect(MODEL_TOKEN_LIMITS['default']).toBe(
|
|
80
|
-
});
|
|
81
|
-
|
|
82
|
-
it('should have limits for MiniLM models', () => {
|
|
83
|
-
expect(MODEL_TOKEN_LIMITS['Xenova/all-MiniLM-L6-v2']).toBe(256);
|
|
84
|
-
expect(MODEL_TOKEN_LIMITS['Xenova/all-MiniLM-L12-v2']).toBe(256);
|
|
78
|
+
expect(MODEL_TOKEN_LIMITS['default']).toBe(512);
|
|
85
79
|
});
|
|
86
|
-
|
|
87
|
-
it('should have limits for
|
|
88
|
-
expect(MODEL_TOKEN_LIMITS['
|
|
89
|
-
expect(MODEL_TOKEN_LIMITS['Xenova/graphcodebert-base']).toBe(512);
|
|
90
|
-
});
|
|
91
|
-
|
|
92
|
-
it('should have limits for E5 and BGE models', () => {
|
|
93
|
-
expect(MODEL_TOKEN_LIMITS['Xenova/e5-small-v2']).toBe(512);
|
|
94
|
-
expect(MODEL_TOKEN_LIMITS['Xenova/bge-base-en-v1.5']).toBe(512);
|
|
80
|
+
|
|
81
|
+
it('should have limits for Jina models', () => {
|
|
82
|
+
expect(MODEL_TOKEN_LIMITS['jinaai/jina-embeddings-v2-base-code']).toBe(8192);
|
|
95
83
|
});
|
|
96
84
|
});
|
|
97
85
|
|
|
98
86
|
describe('getModelTokenLimit', () => {
|
|
99
87
|
it('should return correct limit for known models', () => {
|
|
100
|
-
expect(getModelTokenLimit('
|
|
101
|
-
expect(getModelTokenLimit('Xenova/codebert-base')).toBe(512);
|
|
88
|
+
expect(getModelTokenLimit('jinaai/jina-embeddings-v2-base-code')).toBe(8192);
|
|
102
89
|
});
|
|
103
|
-
|
|
90
|
+
|
|
104
91
|
it('should return default for unknown models', () => {
|
|
105
|
-
expect(getModelTokenLimit('unknown/model-name')).toBe(
|
|
92
|
+
expect(getModelTokenLimit('unknown/model-name')).toBe(512);
|
|
106
93
|
});
|
|
107
|
-
|
|
94
|
+
|
|
108
95
|
it('should return default for null/undefined', () => {
|
|
109
|
-
expect(getModelTokenLimit(null)).toBe(
|
|
110
|
-
expect(getModelTokenLimit(undefined)).toBe(
|
|
96
|
+
expect(getModelTokenLimit(null)).toBe(512);
|
|
97
|
+
expect(getModelTokenLimit(undefined)).toBe(512);
|
|
111
98
|
});
|
|
112
|
-
|
|
99
|
+
|
|
113
100
|
it('should be case-insensitive', () => {
|
|
114
101
|
const normalCase = getModelTokenLimit('Xenova/all-MiniLM-L6-v2');
|
|
115
102
|
const lowerCase = getModelTokenLimit('xenova/all-minilm-l6-v2');
|
|
116
|
-
|
|
103
|
+
|
|
117
104
|
expect(lowerCase).toBe(normalCase);
|
|
118
105
|
});
|
|
106
|
+
|
|
107
|
+
it('should match known models case-insensitively', () => {
|
|
108
|
+
const mixedCase = getModelTokenLimit('JINAAI/JINA-EMBEDDINGS-V2-BASE-CODE');
|
|
109
|
+
expect(mixedCase).toBe(8192);
|
|
110
|
+
});
|
|
119
111
|
});
|
|
120
112
|
});
|
|
121
113
|
|
|
122
114
|
describe('Chunking Parameters', () => {
|
|
123
115
|
describe('getChunkingParams', () => {
|
|
124
116
|
it('should return correct params for default model', () => {
|
|
125
|
-
const params = getChunkingParams('
|
|
126
|
-
|
|
127
|
-
expect(params.maxTokens).toBe(
|
|
128
|
-
expect(params.targetTokens).toBeLessThan(
|
|
129
|
-
expect(params.targetTokens).toBeGreaterThan(
|
|
117
|
+
const params = getChunkingParams('jinaai/jina-embeddings-v2-base-code');
|
|
118
|
+
|
|
119
|
+
expect(params.maxTokens).toBe(8192);
|
|
120
|
+
expect(params.targetTokens).toBeLessThan(8192); // 85% of max
|
|
121
|
+
expect(params.targetTokens).toBeGreaterThan(6000);
|
|
130
122
|
expect(params.overlapTokens).toBeLessThan(params.targetTokens);
|
|
131
123
|
});
|
|
132
|
-
|
|
124
|
+
|
|
133
125
|
it('should calculate ~85% for target tokens', () => {
|
|
134
|
-
const params = getChunkingParams('
|
|
135
|
-
|
|
136
|
-
// 85% of
|
|
137
|
-
expect(params.targetTokens).toBe(Math.floor(
|
|
126
|
+
const params = getChunkingParams('jinaai/jina-embeddings-v2-base-code'); // 8192 limit
|
|
127
|
+
|
|
128
|
+
// 85% of 8192 = 6963.2 -> floor = 6963
|
|
129
|
+
expect(params.targetTokens).toBe(Math.floor(8192 * 0.85));
|
|
138
130
|
});
|
|
139
|
-
|
|
131
|
+
|
|
140
132
|
it('should calculate ~18% overlap', () => {
|
|
141
|
-
const params = getChunkingParams('
|
|
142
|
-
|
|
133
|
+
const params = getChunkingParams('jinaai/jina-embeddings-v2-base-code');
|
|
134
|
+
|
|
143
135
|
const expectedOverlap = Math.floor(params.targetTokens * 0.18);
|
|
144
136
|
expect(params.overlapTokens).toBe(expectedOverlap);
|
|
145
137
|
});
|
|
146
|
-
|
|
138
|
+
|
|
147
139
|
it('should return all three parameters', () => {
|
|
148
140
|
const params = getChunkingParams('Xenova/all-MiniLM-L6-v2');
|
|
149
|
-
|
|
141
|
+
|
|
150
142
|
expect(params).toHaveProperty('maxTokens');
|
|
151
143
|
expect(params).toHaveProperty('targetTokens');
|
|
152
144
|
expect(params).toHaveProperty('overlapTokens');
|
|
153
145
|
});
|
|
154
|
-
|
|
146
|
+
|
|
155
147
|
it('should handle unknown models with defaults', () => {
|
|
156
148
|
const params = getChunkingParams('unknown/model');
|
|
157
|
-
|
|
158
|
-
expect(params.maxTokens).toBe(256);
|
|
159
|
-
expect(params.targetTokens).toBeLessThan(256);
|
|
160
|
-
});
|
|
161
|
-
});
|
|
162
|
-
});
|
|
163
149
|
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
it('should return false for short text', () => {
|
|
167
|
-
const shortText = 'hello world';
|
|
168
|
-
expect(exceedsTokenLimit(shortText, 'Xenova/all-MiniLM-L6-v2')).toBe(false);
|
|
169
|
-
});
|
|
170
|
-
|
|
171
|
-
it('should return true for very long text', () => {
|
|
172
|
-
// Create text that definitely exceeds 256 tokens
|
|
173
|
-
const longText = 'word '.repeat(500);
|
|
174
|
-
expect(exceedsTokenLimit(longText, 'Xenova/all-MiniLM-L6-v2')).toBe(true);
|
|
175
|
-
});
|
|
176
|
-
|
|
177
|
-
it('should consider different model limits', () => {
|
|
178
|
-
// Create text that exceeds 256 but not 512
|
|
179
|
-
const mediumText = 'word '.repeat(300);
|
|
180
|
-
|
|
181
|
-
// Should exceed small model limit
|
|
182
|
-
expect(exceedsTokenLimit(mediumText, 'Xenova/all-MiniLM-L6-v2')).toBe(true);
|
|
183
|
-
|
|
184
|
-
// Should not exceed large model limit
|
|
185
|
-
expect(exceedsTokenLimit(mediumText, 'Xenova/codebert-base')).toBe(false);
|
|
186
|
-
});
|
|
187
|
-
|
|
188
|
-
it('should handle empty text', () => {
|
|
189
|
-
expect(exceedsTokenLimit('', 'Xenova/all-MiniLM-L6-v2')).toBe(false);
|
|
150
|
+
expect(params.maxTokens).toBe(512);
|
|
151
|
+
expect(params.targetTokens).toBeLessThan(512);
|
|
190
152
|
});
|
|
191
153
|
});
|
|
192
154
|
});
|
|
@@ -208,18 +170,18 @@ describe('Integration: Token Estimation Accuracy', () => {
|
|
|
208
170
|
}
|
|
209
171
|
}
|
|
210
172
|
`;
|
|
211
|
-
|
|
173
|
+
|
|
212
174
|
const tokens = estimateTokens(typicalCodeChunk);
|
|
213
|
-
|
|
175
|
+
|
|
214
176
|
// Should be within typical chunk size
|
|
215
177
|
expect(tokens).toBeGreaterThan(30);
|
|
216
178
|
expect(tokens).toBeLessThan(200);
|
|
217
179
|
});
|
|
218
|
-
|
|
180
|
+
|
|
219
181
|
it('should keep small code chunks under model limits', () => {
|
|
220
182
|
// A small chunk should definitely be under the limit
|
|
221
183
|
const safeChunk = 'const x = 1;\n'.repeat(10);
|
|
222
|
-
|
|
223
|
-
expect(
|
|
184
|
+
const limit = getModelTokenLimit('jinaai/jina-embeddings-v2-base-code');
|
|
185
|
+
expect(estimateTokens(safeChunk)).toBeLessThanOrEqual(limit);
|
|
224
186
|
});
|
|
225
187
|
});
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
|
|
2
|
+
import { CodebaseIndexer, handleToolCall } from '../features/index-codebase.js';
|
|
3
|
+
import { EmbeddingsCache } from '../lib/cache.js';
|
|
4
|
+
import fs from 'fs/promises';
|
|
5
|
+
import path from 'path';
|
|
6
|
+
|
|
7
|
+
// Mock dependencies
|
|
8
|
+
vi.mock('fs/promises');
|
|
9
|
+
vi.mock('worker_threads', async () => {
|
|
10
|
+
const { EventEmitter } = await import('events');
|
|
11
|
+
class Worker extends EventEmitter {
|
|
12
|
+
constructor() {
|
|
13
|
+
super();
|
|
14
|
+
// Don't emit ready automatically to allow manual control in tests
|
|
15
|
+
}
|
|
16
|
+
terminate() {
|
|
17
|
+
return Promise.resolve();
|
|
18
|
+
}
|
|
19
|
+
postMessage(msg) {
|
|
20
|
+
if (msg.type === 'process') {
|
|
21
|
+
this.emit('message', { type: 'results', results: [], batchId: msg.batchId });
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
return { Worker };
|
|
26
|
+
});
|
|
27
|
+
|
|
28
|
+
vi.mock('os', async () => {
|
|
29
|
+
return {
|
|
30
|
+
default: { cpus: () => [{}, {}, {}, {}] },
|
|
31
|
+
cpus: () => [{}, {}, {}, {}],
|
|
32
|
+
};
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
describe('Ultra Maximizer', () => {
|
|
36
|
+
afterEach(() => {
|
|
37
|
+
vi.restoreAllMocks();
|
|
38
|
+
});
|
|
39
|
+
|
|
40
|
+
describe('lib/cache.js Internals', () => {
|
|
41
|
+
it('Line 673: logs call-graph load in verbose mode', async () => {
|
|
42
|
+
const config = {
|
|
43
|
+
enableCache: true,
|
|
44
|
+
cacheDirectory: '/cache',
|
|
45
|
+
embeddingModel: 'test',
|
|
46
|
+
fileExtensions: ['js'],
|
|
47
|
+
verbose: true, // Crucial for line 673
|
|
48
|
+
};
|
|
49
|
+
const cache = new EmbeddingsCache(config);
|
|
50
|
+
const consoleSpy = vi.spyOn(console, 'info').mockImplementation(() => {});
|
|
51
|
+
|
|
52
|
+
// Mock file system for load()
|
|
53
|
+
vi.spyOn(fs, 'mkdir').mockResolvedValue();
|
|
54
|
+
vi.spyOn(fs, 'readFile').mockImplementation(async (p) => {
|
|
55
|
+
if (p.endsWith('meta.json')) return JSON.stringify({ version: 1, embeddingModel: 'test' });
|
|
56
|
+
if (p.endsWith('embeddings.json')) return '[]';
|
|
57
|
+
if (p.endsWith('file-hashes.json')) return '{}';
|
|
58
|
+
if (p.endsWith('call-graph.json')) return JSON.stringify({ 'f.js': {} });
|
|
59
|
+
return null;
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
await cache.load();
|
|
63
|
+
|
|
64
|
+
expect(cache.getFileCallDataCount()).toBe(1);
|
|
65
|
+
expect(cache.hasFileCallData('f.js')).toBe(true);
|
|
66
|
+
});
|
|
67
|
+
});
|
|
68
|
+
|
|
69
|
+
describe('features/index-codebase.js Worker Path', () => {
|
|
70
|
+
it('Line 146: covers initializeWorkers failure and termination', async () => {
|
|
71
|
+
const config = { workerThreads: 2, verbose: true, embeddingModel: 'test' };
|
|
72
|
+
const embedder = vi.fn();
|
|
73
|
+
const cache = { save: vi.fn(), getVectorStore: () => [] };
|
|
74
|
+
const indexer = new CodebaseIndexer(embedder, cache, config);
|
|
75
|
+
|
|
76
|
+
// Mock Worker to fail immediately/emit error
|
|
77
|
+
const { Worker } = await import('worker_threads');
|
|
78
|
+
// We can't change the class constructor behavior easily here.
|
|
79
|
+
// But we can emit error on the worker instances after creation?
|
|
80
|
+
// initializeWorkers creates workers and waits for "ready".
|
|
81
|
+
|
|
82
|
+
const consoleSpy = vi.spyOn(console, 'warn').mockImplementation(() => {});
|
|
83
|
+
const terminateSpy = vi.spyOn(indexer, 'terminateWorkers');
|
|
84
|
+
|
|
85
|
+
// We need to trigger the "error" event on the worker.
|
|
86
|
+
// We can spy on the workers array push?
|
|
87
|
+
// Or wait a tick?
|
|
88
|
+
|
|
89
|
+
// Let's rely on the timeout? No, timeout takes too long.
|
|
90
|
+
// We need to get access to the worker instance.
|
|
91
|
+
|
|
92
|
+
// Better strategy: Mock the Worker constructor to return a specific instance we control.
|
|
93
|
+
// But vi.mock is hoisted.
|
|
94
|
+
// We can modify prototype?
|
|
95
|
+
|
|
96
|
+
// Actually, we can just run initializeWorkers, then manually emit error on indexer.workers[0].
|
|
97
|
+
|
|
98
|
+
const initPromise = indexer.initializeWorkers();
|
|
99
|
+
|
|
100
|
+
// Wait a tick for workers to be created
|
|
101
|
+
await new Promise((r) => setTimeout(r, 0));
|
|
102
|
+
|
|
103
|
+
if (indexer.workers.length > 0) {
|
|
104
|
+
indexer.workers[0].emit('message', { type: 'error', error: 'Init Fail' });
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
await initPromise;
|
|
108
|
+
|
|
109
|
+
// initializeWorkers catches the error and calls terminateWorkers (Line 146)
|
|
110
|
+
expect(terminateSpy).toHaveBeenCalled();
|
|
111
|
+
expect(consoleSpy).toHaveBeenCalledWith(
|
|
112
|
+
expect.stringContaining('Worker initialization failed')
|
|
113
|
+
);
|
|
114
|
+
});
|
|
115
|
+
});
|
|
116
|
+
});
|