@softerist/heuristic-mcp 2.1.47 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agent/workflows/code-review.md +60 -0
- package/.prettierrc +7 -0
- package/ARCHITECTURE.md +105 -170
- package/CONTRIBUTING.md +32 -113
- package/GEMINI.md +73 -0
- package/LICENSE +21 -21
- package/README.md +161 -54
- package/config.json +876 -75
- package/debug-pids.js +27 -0
- package/eslint.config.js +36 -0
- package/features/ann-config.js +37 -26
- package/features/clear-cache.js +28 -19
- package/features/find-similar-code.js +142 -66
- package/features/hybrid-search.js +253 -93
- package/features/index-codebase.js +1455 -394
- package/features/lifecycle.js +813 -180
- package/features/register.js +58 -52
- package/index.js +450 -306
- package/lib/cache-ops.js +22 -0
- package/lib/cache-utils.js +68 -0
- package/lib/cache.js +1392 -587
- package/lib/call-graph.js +165 -50
- package/lib/cli.js +154 -0
- package/lib/config.js +462 -121
- package/lib/embedding-process.js +77 -0
- package/lib/embedding-worker.js +545 -30
- package/lib/ignore-patterns.js +61 -59
- package/lib/json-worker.js +14 -0
- package/lib/json-writer.js +344 -0
- package/lib/logging.js +88 -0
- package/lib/memory-logger.js +13 -0
- package/lib/project-detector.js +13 -17
- package/lib/server-lifecycle.js +38 -0
- package/lib/settings-editor.js +645 -0
- package/lib/tokenizer.js +207 -104
- package/lib/utils.js +273 -198
- package/lib/vector-store-binary.js +592 -0
- package/mcp_config.example.json +13 -0
- package/package.json +13 -2
- package/scripts/clear-cache.js +6 -17
- package/scripts/download-model.js +14 -9
- package/scripts/postinstall.js +5 -5
- package/search-configs.js +36 -0
- package/test/ann-config.test.js +179 -0
- package/test/ann-fallback.test.js +6 -6
- package/test/binary-store.test.js +69 -0
- package/test/cache-branches.test.js +120 -0
- package/test/cache-errors.test.js +264 -0
- package/test/cache-extra.test.js +300 -0
- package/test/cache-helpers.test.js +205 -0
- package/test/cache-hnsw-failure.test.js +40 -0
- package/test/cache-json-worker.test.js +190 -0
- package/test/cache-worker.test.js +102 -0
- package/test/cache.test.js +443 -0
- package/test/call-graph.test.js +103 -4
- package/test/clear-cache.test.js +69 -68
- package/test/code-review-workflow.test.js +50 -0
- package/test/config.test.js +418 -0
- package/test/coverage-gap.test.js +497 -0
- package/test/coverage-maximizer.test.js +236 -0
- package/test/debug-analysis.js +107 -0
- package/test/embedding-model.test.js +173 -103
- package/test/embedding-worker-extra.test.js +272 -0
- package/test/embedding-worker.test.js +158 -0
- package/test/features.test.js +139 -0
- package/test/final-boost.test.js +271 -0
- package/test/final-polish.test.js +183 -0
- package/test/final.test.js +95 -0
- package/test/find-similar-code.test.js +191 -0
- package/test/helpers.js +92 -11
- package/test/helpers.test.js +46 -0
- package/test/hybrid-search-basic.test.js +62 -0
- package/test/hybrid-search-branch.test.js +202 -0
- package/test/hybrid-search-callgraph.test.js +229 -0
- package/test/hybrid-search-extra.test.js +81 -0
- package/test/hybrid-search.test.js +484 -71
- package/test/index-cli.test.js +520 -0
- package/test/index-codebase-batch.test.js +119 -0
- package/test/index-codebase-branches.test.js +585 -0
- package/test/index-codebase-core.test.js +1032 -0
- package/test/index-codebase-edge-cases.test.js +254 -0
- package/test/index-codebase-errors.test.js +132 -0
- package/test/index-codebase-gap.test.js +239 -0
- package/test/index-codebase-lines.test.js +151 -0
- package/test/index-codebase-watcher.test.js +259 -0
- package/test/index-codebase-zone.test.js +259 -0
- package/test/index-codebase.test.js +371 -69
- package/test/index-memory.test.js +220 -0
- package/test/indexer-detailed.test.js +176 -0
- package/test/integration.test.js +148 -92
- package/test/json-worker.test.js +50 -0
- package/test/lifecycle.test.js +541 -0
- package/test/master.test.js +198 -0
- package/test/perfection.test.js +349 -0
- package/test/project-detector.test.js +65 -0
- package/test/register.test.js +262 -0
- package/test/tokenizer.test.js +55 -93
- package/test/ultra-maximizer.test.js +116 -0
- package/test/utils-branches.test.js +161 -0
- package/test/utils-extra.test.js +116 -0
- package/test/utils.test.js +131 -0
- package/test/verify_fixes.js +76 -0
- package/test/worker-errors.test.js +96 -0
- package/test/worker-init.test.js +102 -0
- package/test/worker_throttling.test.js +93 -0
- package/tools/scripts/benchmark-search.js +95 -0
- package/tools/scripts/cache-stats.js +71 -0
- package/tools/scripts/manual-search.js +34 -0
- package/vitest.config.js +19 -9
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
|
|
2
|
+
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
|
|
3
|
+
import { CodebaseIndexer } from '../features/index-codebase.js';
|
|
4
|
+
import os from 'os';
|
|
5
|
+
|
|
6
|
+
vi.mock('os');
|
|
7
|
+
vi.mock('../lib/embedding-worker.js', () => {
|
|
8
|
+
return {
|
|
9
|
+
default: class MockWorker {
|
|
10
|
+
constructor() {
|
|
11
|
+
this.on = vi.fn();
|
|
12
|
+
this.once = vi.fn();
|
|
13
|
+
this.postMessage = vi.fn();
|
|
14
|
+
this.terminate = vi.fn();
|
|
15
|
+
// Simulate ready immediately
|
|
16
|
+
setTimeout(() => {
|
|
17
|
+
const calls = this.once.mock.calls;
|
|
18
|
+
const readyHandler = calls.find(c => c[0] === 'message')?.[1];
|
|
19
|
+
if (readyHandler) readyHandler({ type: 'ready' });
|
|
20
|
+
}, 10);
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
};
|
|
24
|
+
});
|
|
25
|
+
|
|
26
|
+
// Mock Worker from worker_threads
|
|
27
|
+
vi.mock('worker_threads', () => {
|
|
28
|
+
return {
|
|
29
|
+
Worker: class {
|
|
30
|
+
constructor() {
|
|
31
|
+
this.on = vi.fn();
|
|
32
|
+
this.once = vi.fn();
|
|
33
|
+
this.postMessage = vi.fn();
|
|
34
|
+
this.terminate = vi.fn();
|
|
35
|
+
setTimeout(() => {
|
|
36
|
+
const calls = this.once.mock.calls;
|
|
37
|
+
const readyHandler = calls.find(c => c[0] === 'message')?.[1];
|
|
38
|
+
if (readyHandler) readyHandler({ type: 'ready' });
|
|
39
|
+
}, 10);
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
};
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
describe('CodebaseIndexer RAM Throttling', () => {
|
|
47
|
+
const originalEnv = process.env;
|
|
48
|
+
|
|
49
|
+
beforeEach(() => {
|
|
50
|
+
vi.resetModules();
|
|
51
|
+
process.env = { ...originalEnv };
|
|
52
|
+
// Trick the module into thinking we are NOT in a test env so throttling runs
|
|
53
|
+
delete process.env.VITEST;
|
|
54
|
+
delete process.env.NODE_ENV;
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
afterEach(() => {
|
|
58
|
+
process.env = originalEnv;
|
|
59
|
+
vi.clearAllMocks();
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
it('should throttle workers when RAM is low', async () => {
|
|
63
|
+
// Mock 16 cores
|
|
64
|
+
os.cpus.mockReturnValue(Array(16).fill({}));
|
|
65
|
+
// Mock 2GB free memory (enough for ~1 worker @ 1.5GB/worker)
|
|
66
|
+
os.freemem.mockReturnValue(2 * 1024 * 1024 * 1024);
|
|
67
|
+
|
|
68
|
+
const config = {
|
|
69
|
+
workerThreads: 10, // Simulated "resolved" auto count or user explicit
|
|
70
|
+
embeddingModel: 'jinaai/jina-embeddings-v2-base-code',
|
|
71
|
+
verbose: true,
|
|
72
|
+
searchDirectory: '/tmp'
|
|
73
|
+
};
|
|
74
|
+
|
|
75
|
+
const indexer = new CodebaseIndexer({}, {}, config);
|
|
76
|
+
|
|
77
|
+
// We expect initializeWorkers to NOT throttle effectively because it checks for 'auto'
|
|
78
|
+
// but we are passing a number (10).
|
|
79
|
+
// Current buggy behavior: It uses 10 workers.
|
|
80
|
+
// Desired behavior: It sees 2GB RAM and throttles to ~1 worker.
|
|
81
|
+
|
|
82
|
+
await indexer.initializeWorkers();
|
|
83
|
+
|
|
84
|
+
const activeWorkers = indexer.workers.length;
|
|
85
|
+
console.log(`Initialized ${activeWorkers} workers`);
|
|
86
|
+
|
|
87
|
+
// CURRENTLY BROKEN: This expectation represents the BUG.
|
|
88
|
+
// It SHOULD be 1, but it WILL be 10.
|
|
89
|
+
// We write the test to expect 1 (the correct behavior), so it fails now.
|
|
90
|
+
expect(activeWorkers).toBeLessThan(10);
|
|
91
|
+
expect(activeWorkers).toBe(1); // 2GB / ~1.5GB = 1 worker
|
|
92
|
+
});
|
|
93
|
+
});
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
import { loadConfig } from '../../lib/config.js';
|
|
2
|
+
import { EmbeddingsCache } from '../../lib/cache.js';
|
|
3
|
+
import { HybridSearch } from '../../features/hybrid-search.js';
|
|
4
|
+
import { pipeline, env } from '@xenova/transformers';
|
|
5
|
+
|
|
6
|
+
// Force same thread config as server
|
|
7
|
+
env.backends.onnx.numThreads = 2;
|
|
8
|
+
env.backends.onnx.wasm.numThreads = 2;
|
|
9
|
+
|
|
10
|
+
function parseArgs(argv) {
|
|
11
|
+
const args = argv.slice(2);
|
|
12
|
+
let query = null;
|
|
13
|
+
let runs = 5;
|
|
14
|
+
let maxResults = 5;
|
|
15
|
+
|
|
16
|
+
for (let i = 0; i < args.length; i += 1) {
|
|
17
|
+
const arg = args[i];
|
|
18
|
+
if (arg === '--query' && args[i + 1]) {
|
|
19
|
+
query = args[i + 1];
|
|
20
|
+
i += 1;
|
|
21
|
+
} else if (arg === '--runs' && args[i + 1]) {
|
|
22
|
+
runs = parseInt(args[i + 1], 10);
|
|
23
|
+
i += 1;
|
|
24
|
+
} else if (arg === '--max-results' && args[i + 1]) {
|
|
25
|
+
maxResults = parseInt(args[i + 1], 10);
|
|
26
|
+
i += 1;
|
|
27
|
+
} else if (!arg.startsWith('-') && !query) {
|
|
28
|
+
query = arg;
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
return {
|
|
33
|
+
query: query || 'database implementation',
|
|
34
|
+
runs: Number.isFinite(runs) && runs > 0 ? runs : 5,
|
|
35
|
+
maxResults: Number.isFinite(maxResults) && maxResults > 0 ? maxResults : 5,
|
|
36
|
+
};
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
function percentile(sorted, p) {
|
|
40
|
+
if (sorted.length === 0) return 0;
|
|
41
|
+
const idx = Math.min(sorted.length - 1, Math.floor((p / 100) * sorted.length));
|
|
42
|
+
return sorted[idx];
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
async function main() {
|
|
46
|
+
const { query, runs, maxResults } = parseArgs(process.argv);
|
|
47
|
+
const config = await loadConfig(process.cwd());
|
|
48
|
+
const cache = new EmbeddingsCache(config);
|
|
49
|
+
await cache.load();
|
|
50
|
+
|
|
51
|
+
const embedder = async (text) => {
|
|
52
|
+
if (!embedder._pipeline) {
|
|
53
|
+
embedder._pipeline = await pipeline('feature-extraction', config.embeddingModel, {
|
|
54
|
+
session_options: { numThreads: 2 },
|
|
55
|
+
});
|
|
56
|
+
}
|
|
57
|
+
return embedder._pipeline(text, { pooling: 'mean', normalize: true });
|
|
58
|
+
};
|
|
59
|
+
|
|
60
|
+
const searcher = new HybridSearch(embedder, cache, config);
|
|
61
|
+
|
|
62
|
+
console.info(`Benchmark query: "${query}"`);
|
|
63
|
+
console.info(`Runs: ${runs}, maxResults: ${maxResults}`);
|
|
64
|
+
console.info(
|
|
65
|
+
`Vector load mode: ${config.vectorStoreLoadMode}, format: ${config.vectorStoreFormat}`
|
|
66
|
+
);
|
|
67
|
+
|
|
68
|
+
// Warm-up
|
|
69
|
+
await searcher.search(query, maxResults);
|
|
70
|
+
|
|
71
|
+
const durations = [];
|
|
72
|
+
const memBefore = process.memoryUsage().rss;
|
|
73
|
+
|
|
74
|
+
for (let i = 0; i < runs; i += 1) {
|
|
75
|
+
const start = process.hrtime.bigint();
|
|
76
|
+
await searcher.search(query, maxResults);
|
|
77
|
+
const end = process.hrtime.bigint();
|
|
78
|
+
const ms = Number(end - start) / 1e6;
|
|
79
|
+
durations.push(ms);
|
|
80
|
+
console.info(`Run ${i + 1}: ${ms.toFixed(2)}ms`);
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
const memAfter = process.memoryUsage().rss;
|
|
84
|
+
const sorted = [...durations].sort((a, b) => a - b);
|
|
85
|
+
const avg = durations.reduce((a, b) => a + b, 0) / durations.length;
|
|
86
|
+
const p95 = percentile(sorted, 95);
|
|
87
|
+
|
|
88
|
+
console.info(`Avg: ${avg.toFixed(2)}ms, p95: ${p95.toFixed(2)}ms`);
|
|
89
|
+
console.info(`RSS change: ${((memAfter - memBefore) / 1024 / 1024).toFixed(1)}MB`);
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
main().catch((err) => {
|
|
93
|
+
console.error(err);
|
|
94
|
+
process.exit(1);
|
|
95
|
+
});
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import fs from 'fs/promises';
|
|
3
|
+
import path from 'path';
|
|
4
|
+
import { loadConfig } from '../../lib/config.js';
|
|
5
|
+
import { BinaryVectorStore } from '../../lib/vector-store-binary.js';
|
|
6
|
+
|
|
7
|
+
async function main() {
|
|
8
|
+
const args = process.argv.slice(2);
|
|
9
|
+
const workspaceIndex = args.indexOf('--workspace');
|
|
10
|
+
const workspaceDir = workspaceIndex !== -1 ? args[workspaceIndex + 1] : null;
|
|
11
|
+
|
|
12
|
+
const config = await loadConfig(workspaceDir || process.cwd());
|
|
13
|
+
const cacheDir = config.cacheDirectory;
|
|
14
|
+
|
|
15
|
+
const metaPath = path.join(cacheDir, 'meta.json');
|
|
16
|
+
const hashPath = path.join(cacheDir, 'file-hashes.json');
|
|
17
|
+
const jsonPath = path.join(cacheDir, 'embeddings.json');
|
|
18
|
+
|
|
19
|
+
const stats = {
|
|
20
|
+
cacheDir,
|
|
21
|
+
vectorStoreFormat: config.vectorStoreFormat,
|
|
22
|
+
hasMeta: false,
|
|
23
|
+
hasJson: false,
|
|
24
|
+
hasBinary: false,
|
|
25
|
+
vectorCount: 0,
|
|
26
|
+
fileHashCount: 0,
|
|
27
|
+
};
|
|
28
|
+
|
|
29
|
+
try {
|
|
30
|
+
const metaRaw = await fs.readFile(metaPath, 'utf-8');
|
|
31
|
+
stats.hasMeta = true;
|
|
32
|
+
stats.meta = JSON.parse(metaRaw);
|
|
33
|
+
} catch {
|
|
34
|
+
// ignore
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
try {
|
|
38
|
+
const hashRaw = await fs.readFile(hashPath, 'utf-8');
|
|
39
|
+
const hashes = JSON.parse(hashRaw);
|
|
40
|
+
stats.fileHashCount = Object.keys(hashes).length;
|
|
41
|
+
} catch {
|
|
42
|
+
// ignore
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
try {
|
|
46
|
+
const jsonRaw = await fs.readFile(jsonPath, 'utf-8');
|
|
47
|
+
stats.hasJson = true;
|
|
48
|
+
const parsed = JSON.parse(jsonRaw);
|
|
49
|
+
if (Array.isArray(parsed)) {
|
|
50
|
+
stats.jsonVectorCount = parsed.length;
|
|
51
|
+
}
|
|
52
|
+
} catch {
|
|
53
|
+
// ignore
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
try {
|
|
57
|
+
const store = await BinaryVectorStore.load(cacheDir);
|
|
58
|
+
stats.hasBinary = true;
|
|
59
|
+
stats.vectorCount = store.length;
|
|
60
|
+
stats.binaryDim = store.dim;
|
|
61
|
+
} catch {
|
|
62
|
+
// ignore
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
console.info(JSON.stringify(stats, null, 2));
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
main().catch((err) => {
|
|
69
|
+
console.error(`[cache-stats] ${err.message}`);
|
|
70
|
+
process.exit(1);
|
|
71
|
+
});
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
|
|
2
|
+
import { loadConfig } from '../../lib/config.js';
|
|
3
|
+
import { EmbeddingsCache } from '../../lib/cache.js';
|
|
4
|
+
import { HybridSearch } from '../../features/hybrid-search.js';
|
|
5
|
+
import { pipeline, env } from '@xenova/transformers';
|
|
6
|
+
|
|
7
|
+
// Force same thread config as server
|
|
8
|
+
env.backends.onnx.numThreads = 2;
|
|
9
|
+
env.backends.onnx.wasm.numThreads = 2;
|
|
10
|
+
|
|
11
|
+
async function runSearch(query) {
|
|
12
|
+
const config = await loadConfig(process.cwd());
|
|
13
|
+
const cache = new EmbeddingsCache(config);
|
|
14
|
+
await cache.load();
|
|
15
|
+
|
|
16
|
+
const embedder = async (text) => {
|
|
17
|
+
const pipe = await pipeline('feature-extraction', config.embeddingModel, {
|
|
18
|
+
session_options: { numThreads: 2 }
|
|
19
|
+
});
|
|
20
|
+
return pipe(text, { pooling: 'mean', normalize: true });
|
|
21
|
+
};
|
|
22
|
+
|
|
23
|
+
const searcher = new HybridSearch(embedder, cache, config);
|
|
24
|
+
console.info(`\n--- Searching for: "${query}" ---`);
|
|
25
|
+
const { results } = await searcher.search(query, 5);
|
|
26
|
+
|
|
27
|
+
results.forEach((r, i) => {
|
|
28
|
+
console.info(`[${i+1}] ${r.file}:${r.startLine}-${r.endLine} (Score: ${r.score.toFixed(4)})`);
|
|
29
|
+
console.info(` Content: ${r.content.substring(0, 200).replace(/\n/g, ' ')}...`);
|
|
30
|
+
});
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
const query = process.argv[2] || 'database implementation';
|
|
34
|
+
runSearch(query).catch(err => console.error(err));
|
package/vitest.config.js
CHANGED
|
@@ -4,26 +4,36 @@ export default defineConfig({
|
|
|
4
4
|
test: {
|
|
5
5
|
// Test files pattern
|
|
6
6
|
include: ['test/**/*.test.js'],
|
|
7
|
-
|
|
7
|
+
|
|
8
8
|
// Global test timeout (embedding models can be slow)
|
|
9
9
|
testTimeout: 180000,
|
|
10
|
-
|
|
10
|
+
|
|
11
11
|
// Hook timeout for setup/teardown
|
|
12
12
|
hookTimeout: 180000,
|
|
13
|
-
|
|
13
|
+
|
|
14
14
|
// Run test files sequentially to avoid resource conflicts
|
|
15
15
|
// Each file loads the embedding model which uses significant memory
|
|
16
16
|
fileParallelism: false,
|
|
17
|
-
|
|
17
|
+
|
|
18
18
|
// Run tests within a file sequentially
|
|
19
19
|
sequence: {
|
|
20
|
-
concurrent: false
|
|
20
|
+
concurrent: false,
|
|
21
21
|
},
|
|
22
|
-
|
|
22
|
+
|
|
23
23
|
// Verbose output
|
|
24
24
|
reporters: ['verbose'],
|
|
25
|
-
|
|
25
|
+
|
|
26
26
|
// Isolate tests to prevent memory leaks between test files
|
|
27
|
-
isolate: true
|
|
28
|
-
|
|
27
|
+
isolate: true,
|
|
28
|
+
|
|
29
|
+
coverage: {
|
|
30
|
+
provider: 'v8',
|
|
31
|
+
all: true,
|
|
32
|
+
include: ['features/**/*.js', 'lib/**/*.js', 'index.js'],
|
|
33
|
+
exclude: ['**/test/**'],
|
|
34
|
+
reporter: ['text', 'html', 'json'],
|
|
35
|
+
reportsDirectory: '.vitest-coverage',
|
|
36
|
+
clean: true,
|
|
37
|
+
},
|
|
38
|
+
},
|
|
29
39
|
});
|