@softerist/heuristic-mcp 2.1.47 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agent/workflows/code-review.md +60 -0
- package/.prettierrc +7 -0
- package/ARCHITECTURE.md +105 -170
- package/CONTRIBUTING.md +32 -113
- package/GEMINI.md +73 -0
- package/LICENSE +21 -21
- package/README.md +161 -54
- package/config.json +876 -75
- package/debug-pids.js +27 -0
- package/eslint.config.js +36 -0
- package/features/ann-config.js +37 -26
- package/features/clear-cache.js +28 -19
- package/features/find-similar-code.js +142 -66
- package/features/hybrid-search.js +253 -93
- package/features/index-codebase.js +1455 -394
- package/features/lifecycle.js +813 -180
- package/features/register.js +58 -52
- package/index.js +450 -306
- package/lib/cache-ops.js +22 -0
- package/lib/cache-utils.js +68 -0
- package/lib/cache.js +1392 -587
- package/lib/call-graph.js +165 -50
- package/lib/cli.js +154 -0
- package/lib/config.js +462 -121
- package/lib/embedding-process.js +77 -0
- package/lib/embedding-worker.js +545 -30
- package/lib/ignore-patterns.js +61 -59
- package/lib/json-worker.js +14 -0
- package/lib/json-writer.js +344 -0
- package/lib/logging.js +88 -0
- package/lib/memory-logger.js +13 -0
- package/lib/project-detector.js +13 -17
- package/lib/server-lifecycle.js +38 -0
- package/lib/settings-editor.js +645 -0
- package/lib/tokenizer.js +207 -104
- package/lib/utils.js +273 -198
- package/lib/vector-store-binary.js +592 -0
- package/mcp_config.example.json +13 -0
- package/package.json +13 -2
- package/scripts/clear-cache.js +6 -17
- package/scripts/download-model.js +14 -9
- package/scripts/postinstall.js +5 -5
- package/search-configs.js +36 -0
- package/test/ann-config.test.js +179 -0
- package/test/ann-fallback.test.js +6 -6
- package/test/binary-store.test.js +69 -0
- package/test/cache-branches.test.js +120 -0
- package/test/cache-errors.test.js +264 -0
- package/test/cache-extra.test.js +300 -0
- package/test/cache-helpers.test.js +205 -0
- package/test/cache-hnsw-failure.test.js +40 -0
- package/test/cache-json-worker.test.js +190 -0
- package/test/cache-worker.test.js +102 -0
- package/test/cache.test.js +443 -0
- package/test/call-graph.test.js +103 -4
- package/test/clear-cache.test.js +69 -68
- package/test/code-review-workflow.test.js +50 -0
- package/test/config.test.js +418 -0
- package/test/coverage-gap.test.js +497 -0
- package/test/coverage-maximizer.test.js +236 -0
- package/test/debug-analysis.js +107 -0
- package/test/embedding-model.test.js +173 -103
- package/test/embedding-worker-extra.test.js +272 -0
- package/test/embedding-worker.test.js +158 -0
- package/test/features.test.js +139 -0
- package/test/final-boost.test.js +271 -0
- package/test/final-polish.test.js +183 -0
- package/test/final.test.js +95 -0
- package/test/find-similar-code.test.js +191 -0
- package/test/helpers.js +92 -11
- package/test/helpers.test.js +46 -0
- package/test/hybrid-search-basic.test.js +62 -0
- package/test/hybrid-search-branch.test.js +202 -0
- package/test/hybrid-search-callgraph.test.js +229 -0
- package/test/hybrid-search-extra.test.js +81 -0
- package/test/hybrid-search.test.js +484 -71
- package/test/index-cli.test.js +520 -0
- package/test/index-codebase-batch.test.js +119 -0
- package/test/index-codebase-branches.test.js +585 -0
- package/test/index-codebase-core.test.js +1032 -0
- package/test/index-codebase-edge-cases.test.js +254 -0
- package/test/index-codebase-errors.test.js +132 -0
- package/test/index-codebase-gap.test.js +239 -0
- package/test/index-codebase-lines.test.js +151 -0
- package/test/index-codebase-watcher.test.js +259 -0
- package/test/index-codebase-zone.test.js +259 -0
- package/test/index-codebase.test.js +371 -69
- package/test/index-memory.test.js +220 -0
- package/test/indexer-detailed.test.js +176 -0
- package/test/integration.test.js +148 -92
- package/test/json-worker.test.js +50 -0
- package/test/lifecycle.test.js +541 -0
- package/test/master.test.js +198 -0
- package/test/perfection.test.js +349 -0
- package/test/project-detector.test.js +65 -0
- package/test/register.test.js +262 -0
- package/test/tokenizer.test.js +55 -93
- package/test/ultra-maximizer.test.js +116 -0
- package/test/utils-branches.test.js +161 -0
- package/test/utils-extra.test.js +116 -0
- package/test/utils.test.js +131 -0
- package/test/verify_fixes.js +76 -0
- package/test/worker-errors.test.js +96 -0
- package/test/worker-init.test.js +102 -0
- package/test/worker_throttling.test.js +93 -0
- package/tools/scripts/benchmark-search.js +95 -0
- package/tools/scripts/cache-stats.js +71 -0
- package/tools/scripts/manual-search.js +34 -0
- package/vitest.config.js +19 -9
|
@@ -1,77 +1,86 @@
|
|
|
1
|
-
import { fdir } from
|
|
2
|
-
import fs from
|
|
3
|
-
import chokidar from
|
|
4
|
-
import path from
|
|
5
|
-
import os from
|
|
6
|
-
import { Worker } from
|
|
7
|
-
import {
|
|
8
|
-
import {
|
|
9
|
-
import {
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
1
|
+
import { fdir } from 'fdir';
|
|
2
|
+
import fs from 'fs/promises';
|
|
3
|
+
import chokidar from 'chokidar';
|
|
4
|
+
import path from 'path';
|
|
5
|
+
import os from 'os';
|
|
6
|
+
import { Worker } from 'worker_threads';
|
|
7
|
+
import { spawn } from 'child_process';
|
|
8
|
+
import { setTimeout as delay } from 'timers/promises';
|
|
9
|
+
import { fileURLToPath } from 'url';
|
|
10
|
+
import { smartChunk, hashContent } from '../lib/utils.js';
|
|
11
|
+
import { extractCallData } from '../lib/call-graph.js';
|
|
12
|
+
|
|
13
|
+
import ignore from 'ignore';
|
|
14
|
+
|
|
15
|
+
function toFloat32Array(vector) {
|
|
16
|
+
// Always create a copy to ensure we have a unique buffer
|
|
17
|
+
// and avoid issues with reusable WASM memory views
|
|
18
|
+
return new Float32Array(vector);
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
function isTestEnv() {
|
|
22
|
+
return process.env.VITEST === 'true' || process.env.NODE_ENV === 'test';
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
function normalizePath(value) {
|
|
26
|
+
if (typeof value !== 'string') return '';
|
|
27
|
+
return value.split(path.sep).join('/');
|
|
13
28
|
}
|
|
14
29
|
|
|
15
30
|
function globToRegExp(pattern) {
|
|
16
|
-
let regex =
|
|
17
|
-
for (let i = 0; i < pattern.length; ) {
|
|
31
|
+
let regex = '^';
|
|
32
|
+
for (let i = 0; i < pattern.length; i += 1) {
|
|
18
33
|
const char = pattern[i];
|
|
19
|
-
if (char ===
|
|
20
|
-
if (pattern[i + 1] ===
|
|
21
|
-
if (pattern[i + 2] ===
|
|
22
|
-
regex +=
|
|
23
|
-
i += 3;
|
|
24
|
-
} else {
|
|
25
|
-
regex += ".*";
|
|
34
|
+
if (char === '*') {
|
|
35
|
+
if (pattern[i + 1] === '*') {
|
|
36
|
+
if (pattern[i + 2] === '/') {
|
|
37
|
+
regex += '(?:.*/)?';
|
|
26
38
|
i += 2;
|
|
39
|
+
} else {
|
|
40
|
+
regex += '.*';
|
|
41
|
+
i += 1;
|
|
27
42
|
}
|
|
28
43
|
} else {
|
|
29
|
-
regex +=
|
|
30
|
-
i += 1;
|
|
44
|
+
regex += '[^/]*';
|
|
31
45
|
}
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
if (char
|
|
35
|
-
regex +=
|
|
36
|
-
|
|
37
|
-
|
|
46
|
+
} else if (char === '?') {
|
|
47
|
+
regex += '[^/]';
|
|
48
|
+
} else if ('\\.[]{}()+-^$|'.includes(char)) {
|
|
49
|
+
regex += `\\${char}`;
|
|
50
|
+
} else {
|
|
51
|
+
regex += char;
|
|
38
52
|
}
|
|
39
|
-
regex += escapeRegExp(char);
|
|
40
|
-
i += 1;
|
|
41
53
|
}
|
|
42
|
-
regex +=
|
|
54
|
+
regex += '$';
|
|
43
55
|
return new RegExp(regex);
|
|
44
56
|
}
|
|
45
57
|
|
|
46
|
-
function normalizePath(filePath) {
|
|
47
|
-
return filePath.split(path.sep).join("/");
|
|
48
|
-
}
|
|
49
|
-
|
|
50
58
|
function buildExcludeMatchers(patterns) {
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
.
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
59
|
+
if (!Array.isArray(patterns)) return [];
|
|
60
|
+
return patterns
|
|
61
|
+
.filter((pattern) => typeof pattern === 'string' && pattern.length > 0)
|
|
62
|
+
.map((pattern) => {
|
|
63
|
+
const normalized = pattern.replace(/\\/g, '/');
|
|
64
|
+
const matchBase = !normalized.includes('/');
|
|
65
|
+
return {
|
|
66
|
+
pattern: normalized,
|
|
67
|
+
matchBase,
|
|
68
|
+
regex: globToRegExp(normalized),
|
|
69
|
+
};
|
|
70
|
+
});
|
|
57
71
|
}
|
|
58
72
|
|
|
59
73
|
function matchesExcludePatterns(filePath, matchers) {
|
|
60
|
-
if (matchers.length === 0) return false;
|
|
74
|
+
if (!filePath || matchers.length === 0) return false;
|
|
61
75
|
const normalized = normalizePath(filePath);
|
|
62
|
-
const
|
|
63
|
-
|
|
76
|
+
const base = path.posix.basename(normalized);
|
|
64
77
|
for (const matcher of matchers) {
|
|
65
|
-
const target = matcher.matchBase ?
|
|
66
|
-
if (matcher.regex.test(target))
|
|
67
|
-
return true;
|
|
68
|
-
}
|
|
78
|
+
const target = matcher.matchBase ? base : normalized;
|
|
79
|
+
if (matcher.regex.test(target)) return true;
|
|
69
80
|
}
|
|
70
81
|
return false;
|
|
71
82
|
}
|
|
72
83
|
|
|
73
|
-
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
74
|
-
|
|
75
84
|
export class CodebaseIndexer {
|
|
76
85
|
constructor(embedder, cache, config, server = null) {
|
|
77
86
|
this.embedder = embedder;
|
|
@@ -82,54 +91,175 @@ export class CodebaseIndexer {
|
|
|
82
91
|
this.workers = [];
|
|
83
92
|
this.workerReady = [];
|
|
84
93
|
this.isIndexing = false;
|
|
85
|
-
this.
|
|
94
|
+
this.processingWatchEvents = false;
|
|
95
|
+
this.pendingWatchEvents = new Map();
|
|
96
|
+
const cacheRelative = this.getCacheRelativePath();
|
|
97
|
+
const autoExclude = ['.smart-coding-cache'];
|
|
98
|
+
if (cacheRelative) {
|
|
99
|
+
autoExclude.push(cacheRelative, `${cacheRelative}/**`);
|
|
100
|
+
}
|
|
101
|
+
this.excludeMatchers = buildExcludeMatchers([
|
|
102
|
+
...autoExclude,
|
|
103
|
+
...(this.config.excludePatterns || []),
|
|
104
|
+
]);
|
|
105
|
+
this.gitignore = ignore();
|
|
106
|
+
this.workerFailureCount = 0;
|
|
107
|
+
this.workersDisabledUntil = 0;
|
|
108
|
+
this.workerCircuitOpen = false;
|
|
109
|
+
this._retryTimer = null;
|
|
110
|
+
this._lastProgress = null;
|
|
111
|
+
this.currentIndexMode = null;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
maybeResetWorkerCircuit() {
|
|
115
|
+
if (
|
|
116
|
+
this.workerCircuitOpen &&
|
|
117
|
+
this.workersDisabledUntil &&
|
|
118
|
+
Date.now() >= this.workersDisabledUntil
|
|
119
|
+
) {
|
|
120
|
+
this.workerCircuitOpen = false;
|
|
121
|
+
this.workersDisabledUntil = 0;
|
|
122
|
+
this.workerFailureCount = 0;
|
|
123
|
+
if (this.config.verbose) {
|
|
124
|
+
console.info('[Indexer] Worker circuit closed; resuming worker use');
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
shouldUseWorkers() {
|
|
130
|
+
this.maybeResetWorkerCircuit();
|
|
131
|
+
if (this.workersDisabledUntil && Date.now() < this.workersDisabledUntil) {
|
|
132
|
+
return false;
|
|
133
|
+
}
|
|
134
|
+
if (isTestEnv()) return false;
|
|
135
|
+
return os.cpus().length > 1 && this.config.workerThreads !== 0 && !this.config.embeddingProcessPerBatch;
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
scheduleRetry() {
|
|
139
|
+
if (this._retryTimer || isTestEnv()) return;
|
|
140
|
+
const delayMs = Math.max(1000, this.workersDisabledUntil - Date.now());
|
|
141
|
+
if (!Number.isFinite(delayMs) || delayMs <= 0) return;
|
|
142
|
+
this._retryTimer = setTimeout(() => {
|
|
143
|
+
this._retryTimer = null;
|
|
144
|
+
if (!this.isIndexing && !this.processingWatchEvents) {
|
|
145
|
+
this.indexAll().catch(() => null);
|
|
146
|
+
}
|
|
147
|
+
}, delayMs);
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
recordWorkerFailure(reason) {
|
|
151
|
+
const threshold = Number.isInteger(this.config.workerFailureThreshold)
|
|
152
|
+
? this.config.workerFailureThreshold
|
|
153
|
+
: 1;
|
|
154
|
+
const cooldownMs = Number.isInteger(this.config.workerFailureCooldownMs)
|
|
155
|
+
? this.config.workerFailureCooldownMs
|
|
156
|
+
: 10 * 60 * 1000;
|
|
157
|
+
|
|
158
|
+
this.workerFailureCount += 1;
|
|
159
|
+
console.warn(`[Indexer] Worker failure: ${reason} (${this.workerFailureCount}/${threshold})`);
|
|
160
|
+
|
|
161
|
+
if (this.workerFailureCount >= threshold) {
|
|
162
|
+
this.workersDisabledUntil = Date.now() + cooldownMs;
|
|
163
|
+
this.workerCircuitOpen = true;
|
|
164
|
+
console.warn(
|
|
165
|
+
`[Indexer] Worker circuit open; pausing worker use for ${Math.round(cooldownMs / 1000)}s`
|
|
166
|
+
);
|
|
167
|
+
this.scheduleRetry();
|
|
168
|
+
}
|
|
86
169
|
}
|
|
87
170
|
|
|
88
171
|
/**
|
|
89
172
|
* Initialize worker thread pool for parallel embedding
|
|
90
173
|
*/
|
|
91
174
|
async initializeWorkers() {
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
175
|
+
// Check if we have any active workers
|
|
176
|
+
const activeWorkers = this.workers.filter(w => w !== null);
|
|
177
|
+
if (activeWorkers.length > 0) return;
|
|
178
|
+
|
|
179
|
+
// If we have workers array but they are all null, reset it
|
|
180
|
+
if (this.workers.length > 0) {
|
|
181
|
+
this.workers = [];
|
|
182
|
+
this.workerReady = [];
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
if (this.initWorkerPromise) return this.initWorkerPromise;
|
|
186
|
+
|
|
187
|
+
this.initWorkerPromise = (async () => {
|
|
188
|
+
try {
|
|
189
|
+
let numWorkers =
|
|
190
|
+
this.config.workerThreads === 'auto'
|
|
191
|
+
? Math.min(2, Math.max(1, os.cpus().length - 1)) // Cap 'auto' at 2 workers
|
|
192
|
+
: typeof this.config.workerThreads === 'number'
|
|
193
|
+
? this.config.workerThreads
|
|
194
|
+
: 1;
|
|
195
|
+
|
|
196
|
+
// Resource-aware scaling: check available RAM (skip in test env to avoid mocking issues)
|
|
197
|
+
// We apply this if we have > 1 worker, regardless of whether it was 'auto' or explicit
|
|
198
|
+
if (numWorkers > 1 && !isTestEnv() && typeof os.freemem === 'function') {
|
|
199
|
+
// Jina model typically requires ~1.5GB - 2GB per worker
|
|
200
|
+
const freeMemGb = os.freemem() / 1024 / 1024 / 1024;
|
|
201
|
+
const isHeavyModel = this.config.embeddingModel.includes('jina');
|
|
202
|
+
const memPerWorker = isHeavyModel ? 2.0 : 0.8;
|
|
203
|
+
|
|
204
|
+
const memCappedWorkers = Math.max(1, Math.floor(freeMemGb / memPerWorker));
|
|
205
|
+
if (memCappedWorkers < numWorkers) {
|
|
206
|
+
if (this.config.verbose) {
|
|
207
|
+
console.info(
|
|
208
|
+
`[Indexer] Throttling workers from ${numWorkers} to ${memCappedWorkers} due to available RAM (${freeMemGb.toFixed(1)}GB)`
|
|
209
|
+
);
|
|
210
|
+
}
|
|
211
|
+
numWorkers = memCappedWorkers;
|
|
212
|
+
}
|
|
213
|
+
}
|
|
95
214
|
|
|
96
|
-
//
|
|
97
|
-
if (numWorkers
|
|
98
|
-
console.
|
|
215
|
+
// Use workers even for single worker to benefit from --expose-gc and separate heap
|
|
216
|
+
if (numWorkers < 1) {
|
|
217
|
+
console.info('[Indexer] No workers configured, using main thread (warning: higher RAM usage)');
|
|
99
218
|
return;
|
|
100
219
|
}
|
|
101
220
|
|
|
102
221
|
if (this.config.verbose) {
|
|
103
|
-
console.
|
|
222
|
+
console.info(
|
|
223
|
+
`[Indexer] Worker config: workerThreads=${this.config.workerThreads}, resolved to ${numWorkers}`
|
|
224
|
+
);
|
|
104
225
|
}
|
|
105
226
|
|
|
106
|
-
|
|
227
|
+
// Force 1 thread per worker to prevent CPU saturation (ONNX is very aggressive)
|
|
228
|
+
const threadsPerWorker = 1;
|
|
107
229
|
|
|
108
|
-
|
|
230
|
+
console.info(`[Indexer] Initializing ${numWorkers} worker threads (${threadsPerWorker} threads per worker)...`);
|
|
109
231
|
|
|
110
232
|
for (let i = 0; i < numWorkers; i++) {
|
|
111
233
|
try {
|
|
112
|
-
const worker = new Worker(
|
|
234
|
+
const worker = new Worker(new URL('../lib/embedding-worker.js', import.meta.url), {
|
|
113
235
|
workerData: {
|
|
236
|
+
workerId: i,
|
|
114
237
|
embeddingModel: this.config.embeddingModel,
|
|
115
|
-
verbose: this.config.verbose
|
|
116
|
-
|
|
238
|
+
verbose: this.config.verbose,
|
|
239
|
+
numThreads: threadsPerWorker,
|
|
240
|
+
},
|
|
117
241
|
});
|
|
118
242
|
|
|
119
243
|
const readyPromise = new Promise((resolve, reject) => {
|
|
120
|
-
const
|
|
244
|
+
const readyTimeoutMs = isTestEnv() ? 1000 : 120000;
|
|
245
|
+
const timeout = setTimeout(
|
|
246
|
+
() => reject(new Error('Worker init timeout')),
|
|
247
|
+
readyTimeoutMs
|
|
248
|
+
);
|
|
121
249
|
|
|
122
|
-
worker.once(
|
|
250
|
+
worker.once('message', (msg) => {
|
|
123
251
|
clearTimeout(timeout);
|
|
124
|
-
if (msg.type ===
|
|
252
|
+
if (msg.type === 'ready') {
|
|
125
253
|
resolve(worker);
|
|
126
|
-
} else if (msg.type ===
|
|
254
|
+
} else if (msg.type === 'error') {
|
|
255
|
+
console.warn(`[Indexer] Worker initialization failed: ${msg.error}`);
|
|
127
256
|
reject(new Error(msg.error));
|
|
128
257
|
}
|
|
129
258
|
});
|
|
130
259
|
|
|
131
|
-
worker.once(
|
|
260
|
+
worker.once('error', (err) => {
|
|
132
261
|
clearTimeout(timeout);
|
|
262
|
+
console.warn(`[Indexer] Worker initialization failed: ${err.message}`);
|
|
133
263
|
reject(err);
|
|
134
264
|
});
|
|
135
265
|
});
|
|
@@ -137,40 +267,143 @@ export class CodebaseIndexer {
|
|
|
137
267
|
this.workers.push(worker);
|
|
138
268
|
this.workerReady.push(readyPromise);
|
|
139
269
|
} catch (err) {
|
|
140
|
-
console.
|
|
270
|
+
console.warn(`[Indexer] Failed to create worker ${i}: ${err.message}`);
|
|
141
271
|
}
|
|
142
272
|
}
|
|
143
273
|
|
|
144
274
|
// Wait for all workers to be ready
|
|
145
275
|
try {
|
|
146
276
|
await Promise.all(this.workerReady);
|
|
147
|
-
console.
|
|
277
|
+
console.info(`[Indexer] ${this.workers.length} workers ready`);
|
|
148
278
|
if (this.config.verbose) {
|
|
149
|
-
console.
|
|
279
|
+
console.info(`[Indexer] Each worker loaded model: ${this.config.embeddingModel}`);
|
|
150
280
|
}
|
|
151
281
|
} catch (err) {
|
|
152
|
-
console.
|
|
282
|
+
console.warn(
|
|
283
|
+
`[Indexer] Worker initialization failed: ${err.message}, falling back to single-threaded`
|
|
284
|
+
);
|
|
153
285
|
await this.terminateWorkers();
|
|
154
286
|
}
|
|
287
|
+
} finally {
|
|
288
|
+
this.initWorkerPromise = null;
|
|
289
|
+
}
|
|
290
|
+
})();
|
|
291
|
+
return this.initWorkerPromise;
|
|
155
292
|
}
|
|
156
293
|
|
|
157
294
|
/**
|
|
158
295
|
* Terminate all worker threads
|
|
159
296
|
*/
|
|
160
297
|
async terminateWorkers() {
|
|
161
|
-
const
|
|
298
|
+
const WORKER_SHUTDOWN_TIMEOUT = isTestEnv() ? 50 : 5000;
|
|
299
|
+
const terminations = this.workers
|
|
300
|
+
.filter(Boolean)
|
|
301
|
+
.map((worker) => {
|
|
162
302
|
try {
|
|
163
|
-
worker.postMessage({ type:
|
|
164
|
-
} catch {}
|
|
165
|
-
|
|
166
|
-
|
|
303
|
+
worker.postMessage({ type: 'shutdown' });
|
|
304
|
+
} catch { /* ignore */ }
|
|
305
|
+
|
|
306
|
+
let exited = false;
|
|
307
|
+
const exitPromise = new Promise((resolve) => {
|
|
308
|
+
worker.once('exit', () => {
|
|
309
|
+
exited = true;
|
|
310
|
+
resolve();
|
|
311
|
+
});
|
|
312
|
+
});
|
|
313
|
+
const timeoutPromise = delay(WORKER_SHUTDOWN_TIMEOUT);
|
|
314
|
+
|
|
315
|
+
return Promise.race([exitPromise, timeoutPromise]).then(() => {
|
|
316
|
+
if (!exited) {
|
|
317
|
+
const termination = worker.terminate?.();
|
|
318
|
+
return Promise.resolve(termination).catch(() => null);
|
|
319
|
+
}
|
|
320
|
+
return null;
|
|
321
|
+
});
|
|
322
|
+
});
|
|
167
323
|
await Promise.all(terminations);
|
|
168
324
|
this.workers = [];
|
|
169
325
|
this.workerReady = [];
|
|
170
326
|
}
|
|
171
327
|
|
|
328
|
+
async loadGitignore() {
|
|
329
|
+
if (!this.config.searchDirectory) {
|
|
330
|
+
this.gitignore = ignore();
|
|
331
|
+
return;
|
|
332
|
+
}
|
|
333
|
+
try {
|
|
334
|
+
const gitignorePath = path.join(this.config.searchDirectory, '.gitignore');
|
|
335
|
+
const content = await fs.readFile(gitignorePath, 'utf8');
|
|
336
|
+
this.gitignore = ignore().add(content);
|
|
337
|
+
if (this.config.verbose) console.info('[Indexer] Loaded .gitignore rules');
|
|
338
|
+
} catch (_e) {
|
|
339
|
+
// No .gitignore or error reading it
|
|
340
|
+
this.gitignore = ignore();
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
getCacheRelativePath() {
|
|
345
|
+
if (!this.config.cacheDirectory || !this.config.searchDirectory) return null;
|
|
346
|
+
const relative = path.relative(this.config.searchDirectory, this.config.cacheDirectory);
|
|
347
|
+
if (!relative || relative.startsWith('..') || path.isAbsolute(relative)) return null;
|
|
348
|
+
return normalizePath(relative);
|
|
349
|
+
}
|
|
350
|
+
|
|
172
351
|
isExcluded(filePath) {
|
|
173
|
-
|
|
352
|
+
if (!filePath || typeof filePath !== 'string') {
|
|
353
|
+
return false;
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
let relative = filePath;
|
|
357
|
+
if (path.isAbsolute(filePath)) {
|
|
358
|
+
if (this.config.searchDirectory) {
|
|
359
|
+
relative = path.relative(this.config.searchDirectory, filePath);
|
|
360
|
+
if (!relative || relative.startsWith('..') || path.isAbsolute(relative)) {
|
|
361
|
+
return false;
|
|
362
|
+
}
|
|
363
|
+
} else {
|
|
364
|
+
const root = path.parse(filePath).root;
|
|
365
|
+
relative = filePath.slice(root.length);
|
|
366
|
+
}
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
relative = normalizePath(relative);
|
|
370
|
+
|
|
371
|
+
if (matchesExcludePatterns(relative, this.excludeMatchers)) return true;
|
|
372
|
+
|
|
373
|
+
if (this.gitignore.ignores(relative)) return true;
|
|
374
|
+
|
|
375
|
+
return false;
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
async replaceDeadWorker(index) {
|
|
379
|
+
if (this.config.verbose) console.info(`[Indexer] Replacing dead worker at index ${index}...`);
|
|
380
|
+
|
|
381
|
+
const newWorker = new Worker(new URL('../lib/embedding-worker.js', import.meta.url), {
|
|
382
|
+
workerData: {
|
|
383
|
+
workerId: index,
|
|
384
|
+
embeddingModel: this.config.embeddingModel,
|
|
385
|
+
verbose: this.config.verbose,
|
|
386
|
+
numThreads: 1,
|
|
387
|
+
},
|
|
388
|
+
});
|
|
389
|
+
|
|
390
|
+
// Wait for ready
|
|
391
|
+
await new Promise((resolve, reject) => {
|
|
392
|
+
const timeout = setTimeout(() => reject(new Error('Timeout')), 30000);
|
|
393
|
+
newWorker.once('message', (msg) => {
|
|
394
|
+
if (msg.type === 'ready') {
|
|
395
|
+
clearTimeout(timeout);
|
|
396
|
+
resolve();
|
|
397
|
+
}
|
|
398
|
+
});
|
|
399
|
+
newWorker.once('error', (err) => {
|
|
400
|
+
clearTimeout(timeout);
|
|
401
|
+
reject(err);
|
|
402
|
+
});
|
|
403
|
+
});
|
|
404
|
+
|
|
405
|
+
this.workers[index] = newWorker;
|
|
406
|
+
if (this.config.verbose) console.info(`[Indexer] Worker ${index} respawned successfully`);
|
|
174
407
|
}
|
|
175
408
|
|
|
176
409
|
/**
|
|
@@ -179,87 +412,319 @@ export class CodebaseIndexer {
|
|
|
179
412
|
sendProgress(progress, total, message) {
|
|
180
413
|
if (this.server) {
|
|
181
414
|
try {
|
|
182
|
-
this.server.sendNotification(
|
|
183
|
-
progressToken:
|
|
415
|
+
this.server.sendNotification('notifications/progress', {
|
|
416
|
+
progressToken: 'indexing',
|
|
184
417
|
progress,
|
|
185
418
|
total,
|
|
186
|
-
message
|
|
419
|
+
message,
|
|
187
420
|
});
|
|
188
|
-
} catch (
|
|
421
|
+
} catch (_err) {
|
|
189
422
|
// Silently ignore if client doesn't support progress notifications
|
|
190
423
|
}
|
|
191
424
|
}
|
|
425
|
+
this.writeProgressFile(progress, total, message).catch(() => null);
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
async writeProgressFile(progress, total, message) {
|
|
429
|
+
if (!this.config.enableCache) return;
|
|
430
|
+
|
|
431
|
+
const payload = {
|
|
432
|
+
progress,
|
|
433
|
+
total,
|
|
434
|
+
message,
|
|
435
|
+
updatedAt: new Date().toISOString(),
|
|
436
|
+
indexMode: this.currentIndexMode || null,
|
|
437
|
+
workerCircuitOpen: !!this.workerCircuitOpen,
|
|
438
|
+
workersDisabledUntil: Number.isFinite(this.workersDisabledUntil)
|
|
439
|
+
? this.workersDisabledUntil
|
|
440
|
+
: null,
|
|
441
|
+
};
|
|
442
|
+
|
|
443
|
+
const prev = this._lastProgress;
|
|
444
|
+
if (
|
|
445
|
+
prev &&
|
|
446
|
+
prev.progress === payload.progress &&
|
|
447
|
+
prev.total === payload.total &&
|
|
448
|
+
prev.message === payload.message
|
|
449
|
+
) {
|
|
450
|
+
return;
|
|
451
|
+
}
|
|
452
|
+
|
|
453
|
+
this._lastProgress = payload;
|
|
454
|
+
try {
|
|
455
|
+
await fs.mkdir(this.config.cacheDirectory, { recursive: true });
|
|
456
|
+
const progressPath = path.join(this.config.cacheDirectory, 'progress.json');
|
|
457
|
+
await fs.writeFile(progressPath, JSON.stringify(payload), 'utf-8');
|
|
458
|
+
} catch {
|
|
459
|
+
// ignore progress write errors
|
|
460
|
+
}
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
async processFilesWithWorkers(allFiles) {
|
|
464
|
+
const activeWorkers = this.workers
|
|
465
|
+
.map((worker, index) => ({ worker, index }))
|
|
466
|
+
.filter((entry) => entry.worker);
|
|
467
|
+
|
|
468
|
+
if (activeWorkers.length === 0) {
|
|
469
|
+
// Fallback: This method shouldn't be called if workers aren't available,
|
|
470
|
+
// but if it is, we return empty and let the caller handle legacy fallback.
|
|
471
|
+
return [];
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
const results = [];
|
|
475
|
+
const chunkSize = Math.ceil(allFiles.length / activeWorkers.length);
|
|
476
|
+
const workerPromises = [];
|
|
477
|
+
const configuredTimeout = Number.isInteger(this.config.workerBatchTimeoutMs)
|
|
478
|
+
? this.config.workerBatchTimeoutMs
|
|
479
|
+
: 300000;
|
|
480
|
+
const WORKER_TIMEOUT = isTestEnv() ? 1000 : configuredTimeout;
|
|
481
|
+
|
|
482
|
+
for (let i = 0; i < activeWorkers.length; i++) {
|
|
483
|
+
const { worker, index: workerIndex } = activeWorkers[i];
|
|
484
|
+
const workerFiles = allFiles.slice(i * chunkSize, (i + 1) * chunkSize);
|
|
485
|
+
if (workerFiles.length === 0) continue;
|
|
486
|
+
|
|
487
|
+
if (this.config.verbose) {
|
|
488
|
+
console.info(`[Indexer] Worker ${workerIndex}: processing ${workerFiles.length} files`);
|
|
489
|
+
}
|
|
490
|
+
|
|
491
|
+
const promise = new Promise((resolve) => {
|
|
492
|
+
const batchId = `file-batch-${i}-${Date.now()}`;
|
|
493
|
+
const batchResults = [];
|
|
494
|
+
|
|
495
|
+
const killWorker = async () => {
|
|
496
|
+
try {
|
|
497
|
+
await worker.terminate?.();
|
|
498
|
+
} catch (_err) {
|
|
499
|
+
// ignore termination errors
|
|
500
|
+
}
|
|
501
|
+
this.workers[workerIndex] = null;
|
|
502
|
+
this.replaceDeadWorker(workerIndex).catch(() => {});
|
|
503
|
+
};
|
|
504
|
+
|
|
505
|
+
const handleTimeout = () => {
|
|
506
|
+
// Terminate first to ensure no more messages arrive
|
|
507
|
+
void killWorker();
|
|
508
|
+
worker.off('message', handler);
|
|
509
|
+
worker.off('error', errorHandler);
|
|
510
|
+
console.warn(`[Indexer] Worker ${workerIndex} timed out (files)`);
|
|
511
|
+
this.recordWorkerFailure(`timeout (batch ${batchId})`);
|
|
512
|
+
resolve([]);
|
|
513
|
+
};
|
|
514
|
+
|
|
515
|
+
let timeout = setTimeout(handleTimeout, WORKER_TIMEOUT);
|
|
516
|
+
|
|
517
|
+
const finalize = (results) => {
|
|
518
|
+
clearTimeout(timeout);
|
|
519
|
+
worker.off('message', handler);
|
|
520
|
+
worker.off('error', errorHandler);
|
|
521
|
+
resolve(results);
|
|
522
|
+
};
|
|
523
|
+
|
|
524
|
+
const handler = (msg) => {
|
|
525
|
+
if (msg.batchId === batchId) {
|
|
526
|
+
if (msg.type === 'results') {
|
|
527
|
+
if (Array.isArray(msg.results)) {
|
|
528
|
+
batchResults.push(...msg.results);
|
|
529
|
+
}
|
|
530
|
+
if (msg.done) {
|
|
531
|
+
finalize(batchResults);
|
|
532
|
+
}
|
|
533
|
+
} else if (msg.type === 'error') {
|
|
534
|
+
finalize([]);
|
|
535
|
+
}
|
|
536
|
+
}
|
|
537
|
+
};
|
|
538
|
+
|
|
539
|
+
const errorHandler = (err) => {
|
|
540
|
+
console.warn(`[Indexer] Worker ${workerIndex} crashed: ${err.message}`);
|
|
541
|
+
this.recordWorkerFailure(`crash (${err.message})`);
|
|
542
|
+
void killWorker();
|
|
543
|
+
finalize([]);
|
|
544
|
+
};
|
|
545
|
+
|
|
546
|
+
worker.once('error', errorHandler);
|
|
547
|
+
worker.on('message', handler);
|
|
548
|
+
|
|
549
|
+
try {
|
|
550
|
+
worker.postMessage({ type: 'processFiles', files: workerFiles, batchId, chunkConfig: this.config });
|
|
551
|
+
} catch (_error) {
|
|
552
|
+
finalize([]);
|
|
553
|
+
}
|
|
554
|
+
});
|
|
555
|
+
|
|
556
|
+
workerPromises.push({ promise, files: workerFiles });
|
|
557
|
+
}
|
|
558
|
+
|
|
559
|
+
const workerResults = await Promise.all(workerPromises.map((p) => p.promise));
|
|
560
|
+
|
|
561
|
+
// Identify failed files for retry
|
|
562
|
+
const failedFiles = [];
|
|
563
|
+
for (let i = 0; i < workerResults.length; i++) {
|
|
564
|
+
if (workerResults[i].length > 0) {
|
|
565
|
+
results.push(...workerResults[i]);
|
|
566
|
+
} else if (workerPromises[i].files.length > 0) {
|
|
567
|
+
failedFiles.push(...workerPromises[i].files);
|
|
568
|
+
}
|
|
569
|
+
}
|
|
570
|
+
|
|
571
|
+
// Pass failed files back to be handled by legacy path
|
|
572
|
+
if (failedFiles.length > 0) {
|
|
573
|
+
if (this.config.verbose) {
|
|
574
|
+
console.warn(`[Indexer] ${failedFiles.length} files failed in workers, falling back to main thread`);
|
|
575
|
+
}
|
|
576
|
+
// Mark these as failed in the results so the caller knows to process them manually
|
|
577
|
+
for (const f of failedFiles) {
|
|
578
|
+
results.push({ file: f.file, status: 'retry' });
|
|
579
|
+
}
|
|
580
|
+
}
|
|
581
|
+
|
|
582
|
+
return results;
|
|
192
583
|
}
|
|
193
584
|
|
|
194
585
|
/**
|
|
195
586
|
* Process chunks using worker thread pool with timeout and error recovery
|
|
196
587
|
*/
|
|
197
588
|
async processChunksWithWorkers(allChunks) {
|
|
198
|
-
|
|
589
|
+
const activeWorkers = this.workers
|
|
590
|
+
.map((worker, index) => ({ worker, index }))
|
|
591
|
+
.filter((entry) => entry.worker);
|
|
592
|
+
|
|
593
|
+
if (activeWorkers.length === 0) {
|
|
199
594
|
// Fallback to single-threaded processing
|
|
200
595
|
return this.processChunksSingleThreaded(allChunks);
|
|
201
596
|
}
|
|
202
597
|
|
|
203
598
|
const results = [];
|
|
204
|
-
const
|
|
599
|
+
const allowSingleThreadFallback = this.config.allowSingleThreadFallback !== false;
|
|
600
|
+
const chunkSize = Math.ceil(allChunks.length / activeWorkers.length);
|
|
205
601
|
const workerPromises = [];
|
|
206
|
-
const
|
|
602
|
+
const configuredTimeout = Number.isInteger(this.config.workerBatchTimeoutMs)
|
|
603
|
+
? this.config.workerBatchTimeoutMs
|
|
604
|
+
: 300000;
|
|
605
|
+
const WORKER_TIMEOUT = isTestEnv() ? 1000 : configuredTimeout; // 1s in tests, configurable in prod
|
|
207
606
|
|
|
208
607
|
if (this.config.verbose) {
|
|
209
|
-
console.
|
|
608
|
+
console.info(
|
|
609
|
+
`[Indexer] Distributing ${allChunks.length} chunks across ${activeWorkers.length} workers (~${chunkSize} chunks each)`
|
|
610
|
+
);
|
|
210
611
|
}
|
|
211
612
|
|
|
212
|
-
for (let i = 0; i <
|
|
613
|
+
for (let i = 0; i < activeWorkers.length; i++) {
|
|
614
|
+
const { worker, index: workerIndex } = activeWorkers[i];
|
|
213
615
|
const workerChunks = allChunks.slice(i * chunkSize, (i + 1) * chunkSize);
|
|
214
616
|
if (workerChunks.length === 0) continue;
|
|
215
617
|
|
|
216
618
|
if (this.config.verbose) {
|
|
217
|
-
console.
|
|
619
|
+
console.info(`[Indexer] Worker ${workerIndex}: processing ${workerChunks.length} chunks`);
|
|
218
620
|
}
|
|
219
621
|
|
|
220
|
-
const promise = new Promise((resolve,
|
|
221
|
-
const worker = this.workers[i];
|
|
622
|
+
const promise = new Promise((resolve, _reject) => {
|
|
222
623
|
const batchId = `batch-${i}-${Date.now()}`;
|
|
624
|
+
const batchResults = [];
|
|
223
625
|
|
|
224
626
|
// Timeout handler
|
|
225
|
-
const
|
|
226
|
-
|
|
227
|
-
|
|
627
|
+
const killWorker = async () => {
|
|
628
|
+
try {
|
|
629
|
+
await worker.terminate?.();
|
|
630
|
+
} catch {
|
|
631
|
+
// ignore terminate errors
|
|
632
|
+
}
|
|
633
|
+
this.workers[workerIndex] = null;
|
|
634
|
+
|
|
635
|
+
// Attempt to replace the dead worker asynchronously
|
|
636
|
+
this.replaceDeadWorker(workerIndex).catch(err => {
|
|
637
|
+
console.warn(`[Indexer] Failed to replace worker ${workerIndex}: ${err.message}`);
|
|
638
|
+
});
|
|
639
|
+
};
|
|
640
|
+
|
|
641
|
+
const handleTimeout = (label) => {
|
|
642
|
+
// Terminate first to ensure no more messages arrive
|
|
643
|
+
void killWorker();
|
|
644
|
+
worker.off('message', handler);
|
|
645
|
+
worker.off('error', errorHandler);
|
|
646
|
+
console.warn(
|
|
647
|
+
`[Indexer] Worker ${workerIndex} timed out, ${label}`
|
|
648
|
+
);
|
|
649
|
+
this.recordWorkerFailure(`timeout (batch ${batchId})`);
|
|
228
650
|
// Return empty and let fallback handle it
|
|
229
651
|
resolve([]);
|
|
230
|
-
}
|
|
652
|
+
};
|
|
653
|
+
|
|
654
|
+
let timeout = setTimeout(
|
|
655
|
+
() => handleTimeout('killing worker and falling back to single-threaded for this batch'),
|
|
656
|
+
WORKER_TIMEOUT
|
|
657
|
+
);
|
|
658
|
+
|
|
659
|
+
const resetTimeout = () => {
|
|
660
|
+
clearTimeout(timeout);
|
|
661
|
+
timeout = setTimeout(
|
|
662
|
+
() => handleTimeout('killing worker and falling back to single-threaded for this batch'),
|
|
663
|
+
WORKER_TIMEOUT
|
|
664
|
+
);
|
|
665
|
+
};
|
|
666
|
+
|
|
667
|
+
let exitHandler;
|
|
668
|
+
|
|
669
|
+
const finalize = (results) => {
|
|
670
|
+
clearTimeout(timeout);
|
|
671
|
+
worker.off('message', handler);
|
|
672
|
+
worker.off('error', errorHandler);
|
|
673
|
+
if (exitHandler) worker.off('exit', exitHandler);
|
|
674
|
+
resolve(results);
|
|
675
|
+
};
|
|
231
676
|
|
|
232
677
|
const handler = (msg) => {
|
|
233
678
|
if (msg.batchId === batchId) {
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
679
|
+
resetTimeout();
|
|
680
|
+
if (msg.type === 'results') {
|
|
681
|
+
if (Array.isArray(msg.results) && msg.results.length > 0) {
|
|
682
|
+
batchResults.push(...msg.results);
|
|
683
|
+
}
|
|
684
|
+
if (msg.done === false) {
|
|
685
|
+
return;
|
|
686
|
+
}
|
|
687
|
+
finalize(batchResults);
|
|
688
|
+
} else if (msg.type === 'error') {
|
|
689
|
+
console.warn(`[Indexer] Worker ${workerIndex} error: ${msg.error}`);
|
|
690
|
+
finalize([]); // Return empty, don't reject - let fallback handle
|
|
241
691
|
}
|
|
242
692
|
}
|
|
243
693
|
};
|
|
244
694
|
|
|
245
695
|
// Handle worker crash
|
|
246
696
|
const errorHandler = (err) => {
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
697
|
+
console.warn(`[Indexer] Worker ${workerIndex} crashed: ${err.message}`);
|
|
698
|
+
this.recordWorkerFailure(`crash (${err.message})`);
|
|
699
|
+
void killWorker();
|
|
700
|
+
finalize([]); // Return empty, don't reject
|
|
251
701
|
};
|
|
252
|
-
worker.once(
|
|
702
|
+
worker.once('error', errorHandler);
|
|
703
|
+
|
|
704
|
+
exitHandler = (code) => {
|
|
705
|
+
if (code !== 0) {
|
|
706
|
+
console.warn(`[Indexer] Worker ${workerIndex} exited unexpectedly with code ${code}`);
|
|
707
|
+
this.recordWorkerFailure(`exit ${code}`);
|
|
708
|
+
void killWorker();
|
|
709
|
+
finalize([]);
|
|
710
|
+
}
|
|
711
|
+
};
|
|
712
|
+
worker.once('exit', exitHandler);
|
|
253
713
|
|
|
254
|
-
worker.on(
|
|
255
|
-
|
|
714
|
+
worker.on('message', handler);
|
|
715
|
+
try {
|
|
716
|
+
worker.postMessage({ type: 'process', chunks: workerChunks, batchId });
|
|
717
|
+
} catch (error) {
|
|
718
|
+
console.warn(`[Indexer] Worker ${i} postMessage failed: ${error.message}`);
|
|
719
|
+
finalize([]);
|
|
720
|
+
}
|
|
256
721
|
});
|
|
257
722
|
|
|
258
723
|
workerPromises.push({ promise, chunks: workerChunks });
|
|
259
724
|
}
|
|
260
725
|
|
|
261
726
|
// Wait for all workers with error recovery
|
|
262
|
-
const workerResults = await Promise.all(workerPromises.map(p => p.promise));
|
|
727
|
+
const workerResults = await Promise.all(workerPromises.map((p) => p.promise));
|
|
263
728
|
|
|
264
729
|
// Collect results and identify failed chunks that need retry
|
|
265
730
|
const failedChunks = [];
|
|
@@ -273,39 +738,128 @@ export class CodebaseIndexer {
|
|
|
273
738
|
}
|
|
274
739
|
|
|
275
740
|
// Retry failed chunks with single-threaded fallback
|
|
276
|
-
if (failedChunks.length > 0) {
|
|
277
|
-
console.
|
|
741
|
+
if (failedChunks.length > 0 && allowSingleThreadFallback) {
|
|
742
|
+
console.warn(
|
|
743
|
+
`[Indexer] Retrying ${failedChunks.length} chunks with single-threaded fallback...`
|
|
744
|
+
);
|
|
278
745
|
const retryResults = await this.processChunksSingleThreaded(failedChunks);
|
|
279
746
|
results.push(...retryResults);
|
|
747
|
+
} else if (failedChunks.length > 0) {
|
|
748
|
+
console.warn(
|
|
749
|
+
`[Indexer] Skipping ${failedChunks.length} chunks (single-threaded fallback disabled)`
|
|
750
|
+
);
|
|
280
751
|
}
|
|
281
752
|
|
|
282
753
|
return results;
|
|
283
754
|
}
|
|
284
755
|
|
|
756
|
+
async processChunksInChildProcess(chunks) {
|
|
757
|
+
const nodePath = process.execPath || 'node';
|
|
758
|
+
const scriptPath = fileURLToPath(new URL('../lib/embedding-process.js', import.meta.url));
|
|
759
|
+
const payload = {
|
|
760
|
+
embeddingModel: this.config.embeddingModel,
|
|
761
|
+
chunks,
|
|
762
|
+
numThreads: 1,
|
|
763
|
+
};
|
|
764
|
+
|
|
765
|
+
return new Promise((resolve) => {
|
|
766
|
+
const child = spawn(nodePath, [scriptPath], {
|
|
767
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
|
768
|
+
});
|
|
769
|
+
|
|
770
|
+
let stdout = '';
|
|
771
|
+
let stderr = '';
|
|
772
|
+
child.stdout.on('data', (chunk) => {
|
|
773
|
+
stdout += chunk.toString();
|
|
774
|
+
});
|
|
775
|
+
child.stderr.on('data', (chunk) => {
|
|
776
|
+
stderr += chunk.toString();
|
|
777
|
+
});
|
|
778
|
+
|
|
779
|
+
const timeoutMs = Number.isInteger(this.config.workerBatchTimeoutMs)
|
|
780
|
+
? this.config.workerBatchTimeoutMs
|
|
781
|
+
: 120000;
|
|
782
|
+
const timeout = setTimeout(() => {
|
|
783
|
+
try {
|
|
784
|
+
child.kill('SIGKILL');
|
|
785
|
+
} catch {
|
|
786
|
+
// ignore
|
|
787
|
+
}
|
|
788
|
+
this.recordWorkerFailure('child process timeout');
|
|
789
|
+
resolve([]);
|
|
790
|
+
}, timeoutMs);
|
|
791
|
+
|
|
792
|
+
child.on('error', (err) => {
|
|
793
|
+
clearTimeout(timeout);
|
|
794
|
+
this.recordWorkerFailure(`child process error (${err.message})`);
|
|
795
|
+
resolve([]);
|
|
796
|
+
});
|
|
797
|
+
|
|
798
|
+
child.on('close', (code, signal) => {
|
|
799
|
+
clearTimeout(timeout);
|
|
800
|
+
if (code !== 0) {
|
|
801
|
+
this.recordWorkerFailure(
|
|
802
|
+
`child process exited (${code ?? 'null'}${signal ? `, signal=${signal}` : ''})`
|
|
803
|
+
);
|
|
804
|
+
if (stderr) {
|
|
805
|
+
console.warn(`[Indexer] Child process error: ${stderr.trim()}`);
|
|
806
|
+
}
|
|
807
|
+
return resolve([]);
|
|
808
|
+
}
|
|
809
|
+
try {
|
|
810
|
+
const parsed = JSON.parse(stdout);
|
|
811
|
+
resolve(parsed?.results || []);
|
|
812
|
+
} catch (err) {
|
|
813
|
+
this.recordWorkerFailure(`child process parse error (${err.message})`);
|
|
814
|
+
resolve([]);
|
|
815
|
+
}
|
|
816
|
+
});
|
|
817
|
+
|
|
818
|
+
child.stdin.end(JSON.stringify(payload));
|
|
819
|
+
});
|
|
820
|
+
}
|
|
821
|
+
|
|
285
822
|
/**
|
|
286
823
|
* Single-threaded chunk processing (fallback)
|
|
287
824
|
*/
|
|
288
825
|
async processChunksSingleThreaded(chunks) {
|
|
289
826
|
const results = [];
|
|
290
827
|
|
|
828
|
+
// Manual GC and yield loop to prevent CPU lockup
|
|
829
|
+
let processedSinceGc = 0;
|
|
830
|
+
|
|
291
831
|
for (const chunk of chunks) {
|
|
832
|
+
// Throttle speed (balanced) - yield to event loop but don't wait unnecessarily
|
|
833
|
+
await delay(0);
|
|
834
|
+
|
|
292
835
|
try {
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
836
|
+
const output = await this.embedder(chunk.text, {
|
|
837
|
+
pooling: 'mean',
|
|
838
|
+
normalize: true,
|
|
839
|
+
});
|
|
840
|
+
results.push({
|
|
841
|
+
file: chunk.file,
|
|
842
|
+
startLine: chunk.startLine,
|
|
843
|
+
endLine: chunk.endLine,
|
|
844
|
+
content: chunk.text,
|
|
845
|
+
vector: toFloat32Array(output.data),
|
|
846
|
+
success: true,
|
|
847
|
+
});
|
|
848
|
+
|
|
849
|
+
// Periodic GC to prevent memory creep (only if flag is present)
|
|
850
|
+
processedSinceGc++;
|
|
851
|
+
// Removed manual GC call to prevent performance degradation
|
|
852
|
+
if (processedSinceGc >= 50) {
|
|
853
|
+
processedSinceGc = 0;
|
|
854
|
+
}
|
|
855
|
+
|
|
302
856
|
} catch (error) {
|
|
303
857
|
results.push({
|
|
304
858
|
file: chunk.file,
|
|
305
859
|
startLine: chunk.startLine,
|
|
306
860
|
endLine: chunk.endLine,
|
|
307
861
|
error: error.message,
|
|
308
|
-
success: false
|
|
862
|
+
success: false,
|
|
309
863
|
});
|
|
310
864
|
}
|
|
311
865
|
}
|
|
@@ -317,12 +871,12 @@ export class CodebaseIndexer {
|
|
|
317
871
|
const fileName = path.basename(file);
|
|
318
872
|
if (this.isExcluded(file)) {
|
|
319
873
|
if (this.config.verbose) {
|
|
320
|
-
console.
|
|
874
|
+
console.info(`[Indexer] Skipped ${fileName} (excluded by pattern)`);
|
|
321
875
|
}
|
|
322
876
|
return 0;
|
|
323
877
|
}
|
|
324
878
|
if (this.config.verbose) {
|
|
325
|
-
console.
|
|
879
|
+
console.info(`[Indexer] Processing: ${fileName}...`);
|
|
326
880
|
}
|
|
327
881
|
|
|
328
882
|
try {
|
|
@@ -336,62 +890,115 @@ export class CodebaseIndexer {
|
|
|
336
890
|
|
|
337
891
|
if (stats.size > this.config.maxFileSize) {
|
|
338
892
|
if (this.config.verbose) {
|
|
339
|
-
console.
|
|
893
|
+
console.warn(
|
|
894
|
+
`[Indexer] Skipped ${fileName} (too large: ${(stats.size / 1024 / 1024).toFixed(2)}MB)`
|
|
895
|
+
);
|
|
340
896
|
}
|
|
341
897
|
return 0;
|
|
342
898
|
}
|
|
343
899
|
|
|
344
|
-
const content = await fs.readFile(file,
|
|
900
|
+
const content = await fs.readFile(file, 'utf-8');
|
|
345
901
|
const hash = hashContent(content);
|
|
346
902
|
|
|
347
903
|
// Skip if file hasn't changed
|
|
348
|
-
|
|
904
|
+
const cachedHash = typeof this.cache.getFileHash === 'function' ? this.cache.getFileHash(file) : null;
|
|
905
|
+
if (cachedHash === hash) {
|
|
349
906
|
if (this.config.verbose) {
|
|
350
|
-
console.
|
|
907
|
+
console.info(`[Indexer] Skipped ${fileName} (unchanged)`);
|
|
351
908
|
}
|
|
909
|
+
// Still update metadata (size, mtime) even if hash is same
|
|
910
|
+
this.cache.setFileHash(file, hash, stats);
|
|
352
911
|
return 0;
|
|
353
912
|
}
|
|
354
913
|
|
|
355
914
|
if (this.config.verbose) {
|
|
356
|
-
console.
|
|
915
|
+
console.info(`[Indexer] Indexing ${fileName}...`);
|
|
357
916
|
}
|
|
358
917
|
|
|
359
|
-
//
|
|
360
|
-
|
|
918
|
+
// Extract call graph data if enabled
|
|
919
|
+
let callData = null;
|
|
920
|
+
if (this.config.callGraphEnabled) {
|
|
921
|
+
try {
|
|
922
|
+
callData = extractCallData(content, file);
|
|
923
|
+
} catch (err) {
|
|
924
|
+
if (this.config.verbose) {
|
|
925
|
+
console.warn(
|
|
926
|
+
`[Indexer] Call graph extraction failed for ${fileName}: ${err.message}`
|
|
927
|
+
);
|
|
928
|
+
}
|
|
929
|
+
}
|
|
930
|
+
}
|
|
361
931
|
|
|
362
|
-
const
|
|
932
|
+
const rawChunks = smartChunk(content, file, this.config);
|
|
933
|
+
const chunks = Array.isArray(rawChunks) ? rawChunks : [];
|
|
363
934
|
let addedChunks = 0;
|
|
935
|
+
let successChunks = 0;
|
|
364
936
|
let failedChunks = 0;
|
|
937
|
+
const newChunks = [];
|
|
365
938
|
|
|
366
|
-
for
|
|
367
|
-
|
|
368
|
-
|
|
939
|
+
// Use workers for watcher-triggered embedding to keep main thread responsive
|
|
940
|
+
const useWorkers = this.shouldUseWorkers();
|
|
941
|
+
if (useWorkers && this.workers.length === 0) {
|
|
942
|
+
await this.initializeWorkers();
|
|
943
|
+
}
|
|
944
|
+
|
|
945
|
+
const chunksToProcess = chunks.map((c) => ({
|
|
946
|
+
file,
|
|
947
|
+
text: c.text,
|
|
948
|
+
startLine: c.startLine,
|
|
949
|
+
endLine: c.endLine
|
|
950
|
+
}));
|
|
369
951
|
|
|
370
|
-
|
|
952
|
+
let results = [];
|
|
953
|
+
if (useWorkers && this.workers.length > 0) {
|
|
954
|
+
results = await this.processChunksWithWorkers(chunksToProcess);
|
|
955
|
+
} else {
|
|
956
|
+
results = await this.processChunksSingleThreaded(chunksToProcess);
|
|
957
|
+
}
|
|
958
|
+
|
|
959
|
+
for (const result of results) {
|
|
960
|
+
if (result.success) {
|
|
961
|
+
newChunks.push({
|
|
371
962
|
file,
|
|
372
|
-
startLine:
|
|
373
|
-
endLine:
|
|
374
|
-
content:
|
|
375
|
-
vector:
|
|
963
|
+
startLine: result.startLine,
|
|
964
|
+
endLine: result.endLine,
|
|
965
|
+
content: result.content,
|
|
966
|
+
vector: toFloat32Array(result.vector),
|
|
376
967
|
});
|
|
377
968
|
addedChunks++;
|
|
378
|
-
|
|
969
|
+
successChunks++;
|
|
970
|
+
} else {
|
|
971
|
+
console.warn(`[Indexer] Failed to embed chunk in ${fileName}:`, result.error);
|
|
379
972
|
failedChunks++;
|
|
380
|
-
console.error(`[Indexer] Failed to embed chunk in ${fileName}:`, embeddingError.message);
|
|
381
973
|
}
|
|
382
974
|
}
|
|
383
975
|
|
|
384
|
-
|
|
385
|
-
|
|
976
|
+
const totalChunks = chunks.length;
|
|
977
|
+
const allSucceeded = totalChunks === 0 || failedChunks === 0;
|
|
978
|
+
|
|
979
|
+
if (allSucceeded) {
|
|
980
|
+
this.cache.removeFileFromStore(file);
|
|
981
|
+
for (const chunk of newChunks) {
|
|
982
|
+
this.cache.addToStore(chunk);
|
|
983
|
+
}
|
|
984
|
+
this.cache.setFileHash(file, hash, stats);
|
|
985
|
+
if (this.config.callGraphEnabled && callData) {
|
|
986
|
+
this.cache.setFileCallData(file, callData);
|
|
987
|
+
}
|
|
386
988
|
} else if (this.config.verbose) {
|
|
387
|
-
console.
|
|
989
|
+
console.warn(
|
|
990
|
+
`[Indexer] Skipped hash update for ${fileName} (${successChunks}/${totalChunks} chunks embedded)`
|
|
991
|
+
);
|
|
388
992
|
}
|
|
993
|
+
|
|
389
994
|
if (this.config.verbose) {
|
|
390
|
-
console.
|
|
995
|
+
console.info(`[Indexer] Completed ${fileName} (${addedChunks} chunks)`);
|
|
391
996
|
}
|
|
392
997
|
return addedChunks;
|
|
393
998
|
} catch (error) {
|
|
394
|
-
|
|
999
|
+
if (this.config.verbose) {
|
|
1000
|
+
console.warn(`[Indexer] Error indexing ${fileName}:`, error.message);
|
|
1001
|
+
}
|
|
395
1002
|
return 0;
|
|
396
1003
|
}
|
|
397
1004
|
}
|
|
@@ -404,40 +1011,39 @@ export class CodebaseIndexer {
|
|
|
404
1011
|
const startTime = Date.now();
|
|
405
1012
|
|
|
406
1013
|
// Build extension filter from config
|
|
407
|
-
const extensions = new Set(this.config.fileExtensions.map(ext => `.${ext}`));
|
|
1014
|
+
const extensions = new Set(this.config.fileExtensions.map((ext) => `.${ext}`));
|
|
1015
|
+
const allowedFileNames = new Set(this.config.fileNames || []);
|
|
408
1016
|
|
|
409
|
-
//
|
|
410
|
-
|
|
411
|
-
const excludeDirs = new Set();
|
|
412
|
-
for (const pattern of this.config.excludePatterns) {
|
|
413
|
-
// Extract directory names from glob patterns
|
|
414
|
-
const match = pattern.match(/\*\*\/([^/*]+)\/?\*?\*?$/);
|
|
415
|
-
if (match) {
|
|
416
|
-
excludeDirs.add(match[1]);
|
|
417
|
-
}
|
|
418
|
-
// Also handle patterns like "**/dirname/**"
|
|
419
|
-
const match2 = pattern.match(/\*\*\/([^/*]+)\/\*\*$/);
|
|
420
|
-
if (match2) {
|
|
421
|
-
excludeDirs.add(match2[1]);
|
|
422
|
-
}
|
|
423
|
-
}
|
|
424
|
-
|
|
425
|
-
// Always exclude cache directory
|
|
426
|
-
excludeDirs.add(".smart-coding-cache");
|
|
1017
|
+
// Load .gitignore before discovery
|
|
1018
|
+
await this.loadGitignore();
|
|
427
1019
|
|
|
428
|
-
if (this.config.
|
|
429
|
-
|
|
1020
|
+
if (!this.config.searchDirectory) {
|
|
1021
|
+
return [];
|
|
430
1022
|
}
|
|
431
1023
|
|
|
432
1024
|
const api = new fdir()
|
|
433
1025
|
.withFullPaths()
|
|
434
|
-
.exclude((dirName) =>
|
|
435
|
-
|
|
1026
|
+
.exclude((dirName, dirPath) => {
|
|
1027
|
+
// Always exclude specific heavy folders immediately
|
|
1028
|
+
if (dirName === 'node_modules' || dirName === '.git' || dirName === '.smart-coding-cache') return true;
|
|
1029
|
+
|
|
1030
|
+
// Check exclusion rules for directories
|
|
1031
|
+
const fullPath = path.join(dirPath, dirName);
|
|
1032
|
+
return this.isExcluded(fullPath);
|
|
1033
|
+
})
|
|
1034
|
+
.filter((filePath) => {
|
|
1035
|
+
if (this.isExcluded(filePath)) return false;
|
|
1036
|
+
|
|
1037
|
+
// Check extensions/filenames
|
|
1038
|
+
const base = path.basename(filePath);
|
|
1039
|
+
const ext = path.extname(filePath);
|
|
1040
|
+
return (extensions.has(ext) || allowedFileNames.has(base));
|
|
1041
|
+
})
|
|
436
1042
|
.crawl(this.config.searchDirectory);
|
|
437
1043
|
|
|
438
1044
|
const files = await api.withPromise();
|
|
439
1045
|
|
|
440
|
-
console.
|
|
1046
|
+
console.info(`[Indexer] File discovery: ${files.length} files in ${Date.now() - startTime}ms`);
|
|
441
1047
|
return files;
|
|
442
1048
|
}
|
|
443
1049
|
|
|
@@ -450,13 +1056,17 @@ export class CodebaseIndexer {
|
|
|
450
1056
|
const skippedCount = { unchanged: 0, tooLarge: 0, error: 0 };
|
|
451
1057
|
|
|
452
1058
|
// Process in parallel batches for speed
|
|
453
|
-
|
|
1059
|
+
// We fetch stats for 100 files at a time to keep IO efficient
|
|
1060
|
+
const STAT_BATCH_SIZE = Math.min(100, this.config.batchSize || 100);
|
|
1061
|
+
// Limit concurrent file reads to 50MB to prevent OOM
|
|
1062
|
+
const MAX_READ_BATCH_BYTES = 50 * 1024 * 1024;
|
|
454
1063
|
|
|
455
|
-
for (let i = 0; i < files.length; i +=
|
|
456
|
-
const
|
|
1064
|
+
for (let i = 0; i < files.length; i += STAT_BATCH_SIZE) {
|
|
1065
|
+
const batchFiles = files.slice(i, i + STAT_BATCH_SIZE);
|
|
457
1066
|
|
|
458
|
-
|
|
459
|
-
|
|
1067
|
+
// 1. Get stats for all files in this batch parallel
|
|
1068
|
+
const fileStats = await Promise.all(
|
|
1069
|
+
batchFiles.map(async (file) => {
|
|
460
1070
|
try {
|
|
461
1071
|
const stats = await fs.stat(file);
|
|
462
1072
|
|
|
@@ -469,112 +1079,202 @@ export class CodebaseIndexer {
|
|
|
469
1079
|
return null;
|
|
470
1080
|
}
|
|
471
1081
|
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
if (this.cache.getFileHash(file) === hash) {
|
|
476
|
-
skippedCount.unchanged++;
|
|
477
|
-
return null;
|
|
478
|
-
}
|
|
479
|
-
|
|
480
|
-
return { file, content, hash };
|
|
481
|
-
} catch (error) {
|
|
1082
|
+
return { file, size: stats.size, mtimeMs: stats.mtimeMs };
|
|
1083
|
+
} catch (_err) {
|
|
482
1084
|
skippedCount.error++;
|
|
483
1085
|
return null;
|
|
484
1086
|
}
|
|
485
1087
|
})
|
|
486
1088
|
);
|
|
487
1089
|
|
|
488
|
-
|
|
489
|
-
|
|
1090
|
+
// 2. Process valid files in size-constrained sub-batches
|
|
1091
|
+
let currentReadBatch = [];
|
|
1092
|
+
let currentReadBytes = 0;
|
|
1093
|
+
|
|
1094
|
+
const processReadBatch = async (batch) => {
|
|
1095
|
+
const results = await Promise.all(
|
|
1096
|
+
batch.map(async ({ file, size, mtimeMs }) => {
|
|
1097
|
+
// Check if we have cached metadata for this file
|
|
1098
|
+
const cachedHash =
|
|
1099
|
+
typeof this.cache.getFileHash === 'function' ? this.cache.getFileHash(file) : null;
|
|
1100
|
+
const cachedMeta = this.cache.getFileMeta ? this.cache.getFileMeta(file) : null;
|
|
1101
|
+
|
|
1102
|
+
if (cachedHash && cachedMeta &&
|
|
1103
|
+
Number.isFinite(cachedMeta.mtimeMs) && cachedMeta.mtimeMs === mtimeMs &&
|
|
1104
|
+
Number.isFinite(cachedMeta.size) && cachedMeta.size === size) {
|
|
1105
|
+
// Metadata matches exactly, skip reading/hashing
|
|
1106
|
+
skippedCount.unchanged++;
|
|
1107
|
+
return null;
|
|
1108
|
+
}
|
|
1109
|
+
|
|
1110
|
+
// Suspect file: Either new, or metadata changed.
|
|
1111
|
+
// We pass it to indexAll with the cachedHash as 'expectedHash'
|
|
1112
|
+
// so workers can perform the actual hashing and unchanged check.
|
|
1113
|
+
return { file, hash: null, expectedHash: cachedHash, force: false, size, mtimeMs };
|
|
1114
|
+
})
|
|
1115
|
+
);
|
|
1116
|
+
|
|
1117
|
+
for (const result of results) {
|
|
1118
|
+
if (result) filesToProcess.push(result);
|
|
1119
|
+
}
|
|
1120
|
+
};
|
|
1121
|
+
|
|
1122
|
+
for (const item of fileStats) {
|
|
1123
|
+
if (!item) continue;
|
|
1124
|
+
|
|
1125
|
+
if (
|
|
1126
|
+
currentReadBytes + item.size > MAX_READ_BATCH_BYTES &&
|
|
1127
|
+
currentReadBatch.length > 0
|
|
1128
|
+
) {
|
|
1129
|
+
await processReadBatch(currentReadBatch);
|
|
1130
|
+
currentReadBatch = [];
|
|
1131
|
+
currentReadBytes = 0;
|
|
1132
|
+
}
|
|
1133
|
+
|
|
1134
|
+
currentReadBatch.push(item);
|
|
1135
|
+
currentReadBytes += item.size;
|
|
1136
|
+
}
|
|
1137
|
+
|
|
1138
|
+
if (currentReadBatch.length > 0) {
|
|
1139
|
+
await processReadBatch(currentReadBatch);
|
|
1140
|
+
}
|
|
1141
|
+
|
|
1142
|
+
// Pre-warm HybridSearch cache if available
|
|
1143
|
+
if (this.server && this.server.hybridSearch && this.server.hybridSearch.fileModTimes) {
|
|
1144
|
+
for (const stat of fileStats) {
|
|
1145
|
+
if (stat && stat.file && typeof stat.mtimeMs === 'number') {
|
|
1146
|
+
this.server.hybridSearch.fileModTimes.set(stat.file, stat.mtimeMs);
|
|
1147
|
+
}
|
|
1148
|
+
}
|
|
490
1149
|
}
|
|
491
1150
|
}
|
|
492
1151
|
|
|
493
|
-
|
|
1152
|
+
if (this.config.verbose) {
|
|
1153
|
+
console.info(
|
|
1154
|
+
`[Indexer] Pre-filter: ${filesToProcess.length} changed, ${skippedCount.unchanged} unchanged, ${skippedCount.tooLarge} too large, ${skippedCount.error} errors (${Date.now() - startTime}ms)`
|
|
1155
|
+
);
|
|
1156
|
+
}
|
|
1157
|
+
|
|
1158
|
+
|
|
1159
|
+
|
|
494
1160
|
return filesToProcess;
|
|
495
1161
|
}
|
|
496
1162
|
|
|
497
1163
|
async indexAll(force = false) {
|
|
498
|
-
if (this.isIndexing) {
|
|
499
|
-
console.
|
|
500
|
-
return {
|
|
1164
|
+
if (this.isIndexing || this.processingWatchEvents) {
|
|
1165
|
+
console.warn('[Indexer] Indexing already in progress, skipping concurrent request');
|
|
1166
|
+
return {
|
|
1167
|
+
skipped: true,
|
|
1168
|
+
reason: 'Indexing already in progress or pending file updates are being applied',
|
|
1169
|
+
};
|
|
501
1170
|
}
|
|
502
1171
|
|
|
503
1172
|
this.isIndexing = true;
|
|
1173
|
+
let memoryTimer = null;
|
|
1174
|
+
const logMemory = (label) => {
|
|
1175
|
+
if (!this.config.verbose) return;
|
|
1176
|
+
const { rss, heapUsed, heapTotal } = process.memoryUsage();
|
|
1177
|
+
const toMb = (value) => `${(value / 1024 / 1024).toFixed(1)}MB`;
|
|
1178
|
+
console.info(
|
|
1179
|
+
`[Indexer] Memory ${label}: rss=${toMb(rss)} heap=${toMb(heapUsed)}/${toMb(heapTotal)}`,
|
|
1180
|
+
);
|
|
1181
|
+
};
|
|
504
1182
|
|
|
505
1183
|
try {
|
|
1184
|
+
logMemory('start');
|
|
1185
|
+
if (this.config.verbose) {
|
|
1186
|
+
memoryTimer = setInterval(() => logMemory('periodic'), 15000);
|
|
1187
|
+
}
|
|
1188
|
+
|
|
506
1189
|
if (force) {
|
|
507
|
-
console.
|
|
508
|
-
this.cache.
|
|
509
|
-
|
|
510
|
-
|
|
1190
|
+
console.info('[Indexer] Force reindex requested: clearing cache');
|
|
1191
|
+
await this.cache.reset();
|
|
1192
|
+
} else {
|
|
1193
|
+
if (typeof this.cache.ensureLoaded === 'function') {
|
|
1194
|
+
await this.cache.ensureLoaded();
|
|
1195
|
+
}
|
|
511
1196
|
}
|
|
512
1197
|
|
|
513
1198
|
const totalStartTime = Date.now();
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
this.sendProgress(
|
|
522
|
-
|
|
523
|
-
|
|
1199
|
+
const indexStartedAt = new Date(totalStartTime).toISOString();
|
|
1200
|
+
let indexMode = force
|
|
1201
|
+
? 'full'
|
|
1202
|
+
: this.cache.getVectorStore().length === 0
|
|
1203
|
+
? 'initial'
|
|
1204
|
+
: 'incremental';
|
|
1205
|
+
this.currentIndexMode = indexMode;
|
|
1206
|
+
this.sendProgress(0, 100, 'Indexing started');
|
|
1207
|
+
console.info(`[Indexer] Starting optimized indexing in ${this.config.searchDirectory}...`);
|
|
1208
|
+
|
|
1209
|
+
// Step 1: Fast file discovery with fdir
|
|
1210
|
+
const files = await this.discoverFiles();
|
|
1211
|
+
|
|
1212
|
+
if (files.length === 0) {
|
|
1213
|
+
console.info('[Indexer] No files found to index');
|
|
1214
|
+
this.sendProgress(100, 100, 'No files found to index');
|
|
1215
|
+
return {
|
|
1216
|
+
skipped: false,
|
|
1217
|
+
filesProcessed: 0,
|
|
1218
|
+
chunksCreated: 0,
|
|
1219
|
+
message: 'No files found to index',
|
|
1220
|
+
};
|
|
1221
|
+
}
|
|
524
1222
|
|
|
525
|
-
|
|
526
|
-
|
|
1223
|
+
// Send progress: discovery complete
|
|
1224
|
+
this.sendProgress(5, 100, `Discovered ${files.length} files`);
|
|
527
1225
|
|
|
528
|
-
|
|
1226
|
+
const currentFilesSet = new Set(files);
|
|
529
1227
|
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
1228
|
+
// Step 1.5: Prune deleted or excluded files from cache
|
|
1229
|
+
if (!force) {
|
|
1230
|
+
const cachedFiles =
|
|
1231
|
+
typeof this.cache.getFileHashKeys === 'function' ? this.cache.getFileHashKeys() : [];
|
|
1232
|
+
let prunedCount = 0;
|
|
534
1233
|
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
1234
|
+
for (const cachedFile of cachedFiles) {
|
|
1235
|
+
if (!currentFilesSet.has(cachedFile)) {
|
|
1236
|
+
this.cache.removeFileFromStore(cachedFile);
|
|
1237
|
+
this.cache.deleteFileHash(cachedFile);
|
|
1238
|
+
prunedCount++;
|
|
1239
|
+
}
|
|
540
1240
|
}
|
|
541
|
-
}
|
|
542
1241
|
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
1242
|
+
if (prunedCount > 0) {
|
|
1243
|
+
if (this.config.verbose) {
|
|
1244
|
+
console.info(`[Indexer] Pruned ${prunedCount} deleted/excluded files from index`);
|
|
1245
|
+
}
|
|
1246
|
+
// If we pruned files, we should save these changes even if no other files changed
|
|
546
1247
|
}
|
|
547
|
-
// If we pruned files, we should save these changes even if no other files changed
|
|
548
|
-
}
|
|
549
1248
|
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
1249
|
+
const prunedCallGraph = this.cache.pruneCallGraphData(currentFilesSet);
|
|
1250
|
+
if (prunedCallGraph > 0 && this.config.verbose) {
|
|
1251
|
+
console.info(`[Indexer] Pruned ${prunedCallGraph} call-graph entries`);
|
|
1252
|
+
}
|
|
553
1253
|
}
|
|
554
|
-
}
|
|
555
|
-
|
|
556
|
-
// Step 2: Pre-filter unchanged files (early hash check)
|
|
557
|
-
const filesToProcess = await this.preFilterFiles(files);
|
|
558
|
-
const filesToProcessSet = new Set(filesToProcess.map(entry => entry.file));
|
|
559
1254
|
|
|
560
|
-
|
|
561
|
-
|
|
1255
|
+
// Step 2: Pre-filter unchanged files (early hash check)
|
|
1256
|
+
const filesToProcess = await this.preFilterFiles(files);
|
|
1257
|
+
const filesToProcessSet = new Set(filesToProcess.map((entry) => entry.file));
|
|
1258
|
+
const filesToProcessByFile = new Map(filesToProcess.map((entry) => [entry.file, entry]));
|
|
562
1259
|
|
|
563
|
-
//
|
|
1260
|
+
// Re-index files missing call graph data (if enabled)
|
|
564
1261
|
if (this.config.callGraphEnabled && this.cache.getVectorStore().length > 0) {
|
|
565
|
-
|
|
566
|
-
const
|
|
567
|
-
const callDataFiles = new Set(this.cache.fileCallData.keys());
|
|
1262
|
+
const cachedFiles = new Set(this.cache.getVectorStore().map((c) => c.file));
|
|
1263
|
+
const callDataFiles = new Set(this.cache.getFileCallDataKeys());
|
|
568
1264
|
|
|
569
1265
|
const missingCallData = [];
|
|
570
1266
|
for (const file of cachedFiles) {
|
|
571
1267
|
if (!callDataFiles.has(file) && currentFilesSet.has(file)) {
|
|
572
1268
|
missingCallData.push(file);
|
|
1269
|
+
const existing = filesToProcessByFile.get(file);
|
|
1270
|
+
if (existing) existing.force = true;
|
|
573
1271
|
}
|
|
574
1272
|
}
|
|
575
1273
|
|
|
576
1274
|
if (missingCallData.length > 0) {
|
|
577
|
-
console.
|
|
1275
|
+
console.info(
|
|
1276
|
+
`[Indexer] Found ${missingCallData.length} files missing call graph data, re-indexing...`
|
|
1277
|
+
);
|
|
578
1278
|
const BATCH_SIZE = 100;
|
|
579
1279
|
for (let i = 0; i < missingCallData.length; i += BATCH_SIZE) {
|
|
580
1280
|
const batch = missingCallData.slice(i, i + BATCH_SIZE);
|
|
@@ -582,11 +1282,14 @@ export class CodebaseIndexer {
|
|
|
582
1282
|
batch.map(async (file) => {
|
|
583
1283
|
try {
|
|
584
1284
|
const stats = await fs.stat(file);
|
|
1285
|
+
if (!stats || typeof stats.isDirectory !== 'function') {
|
|
1286
|
+
return null;
|
|
1287
|
+
}
|
|
585
1288
|
if (stats.isDirectory()) return null;
|
|
586
1289
|
if (stats.size > this.config.maxFileSize) return null;
|
|
587
|
-
const content = await fs.readFile(file,
|
|
1290
|
+
const content = await fs.readFile(file, 'utf-8');
|
|
588
1291
|
const hash = hashContent(content);
|
|
589
|
-
return { file,
|
|
1292
|
+
return { file, hash, force: true, size: stats.size, mtimeMs: stats.mtimeMs };
|
|
590
1293
|
} catch {
|
|
591
1294
|
return null;
|
|
592
1295
|
}
|
|
@@ -595,198 +1298,535 @@ export class CodebaseIndexer {
|
|
|
595
1298
|
|
|
596
1299
|
for (const result of results) {
|
|
597
1300
|
if (!result) continue;
|
|
598
|
-
if (filesToProcessSet.has(result.file))
|
|
599
|
-
|
|
600
|
-
|
|
1301
|
+
if (!filesToProcessSet.has(result.file)) {
|
|
1302
|
+
filesToProcess.push(result);
|
|
1303
|
+
filesToProcessSet.add(result.file);
|
|
1304
|
+
}
|
|
601
1305
|
}
|
|
602
1306
|
}
|
|
603
1307
|
}
|
|
604
1308
|
}
|
|
605
1309
|
|
|
606
|
-
|
|
1310
|
+
indexMode = force
|
|
1311
|
+
? 'full'
|
|
1312
|
+
: this.cache.getVectorStore().length === 0
|
|
1313
|
+
? 'initial'
|
|
1314
|
+
: filesToProcess.length === files.length
|
|
1315
|
+
? 'full'
|
|
1316
|
+
: 'incremental';
|
|
1317
|
+
this.currentIndexMode = indexMode;
|
|
1318
|
+
|
|
607
1319
|
if (filesToProcess.length === 0) {
|
|
608
|
-
|
|
1320
|
+
console.info('[Indexer] All files unchanged, nothing to index');
|
|
1321
|
+
this.sendProgress(100, 100, 'All files up to date');
|
|
609
1322
|
await this.cache.save();
|
|
610
1323
|
const vectorStore = this.cache.getVectorStore();
|
|
611
1324
|
return {
|
|
612
1325
|
skipped: false,
|
|
613
1326
|
filesProcessed: 0,
|
|
614
1327
|
chunksCreated: 0,
|
|
615
|
-
totalFiles: new Set(vectorStore.map(v => v.file)).size,
|
|
1328
|
+
totalFiles: new Set(vectorStore.map((v) => v.file)).size,
|
|
616
1329
|
totalChunks: vectorStore.length,
|
|
617
|
-
message:
|
|
1330
|
+
message: 'All files up to date',
|
|
618
1331
|
};
|
|
619
1332
|
}
|
|
620
|
-
}
|
|
621
1333
|
|
|
622
|
-
|
|
623
|
-
|
|
1334
|
+
// Send progress: filtering complete
|
|
1335
|
+
console.info(`[Indexer] Processing ${filesToProcess.length} changed files`);
|
|
1336
|
+
this.sendProgress(10, 100, `Processing ${filesToProcess.length} changed files`);
|
|
1337
|
+
|
|
1338
|
+
// Step 3: Determine batch size based on project size
|
|
1339
|
+
// Adaptive batch size: use larger batches for larger projects to reduce overhead
|
|
1340
|
+
let adaptiveBatchSize = 10;
|
|
1341
|
+
if (files.length > 500) adaptiveBatchSize = 50;
|
|
1342
|
+
if (files.length > 1000) adaptiveBatchSize = 100;
|
|
1343
|
+
if (files.length > 5000) adaptiveBatchSize = 500;
|
|
1344
|
+
|
|
1345
|
+
if (this.config.verbose) {
|
|
1346
|
+
console.info(
|
|
1347
|
+
`[Indexer] Processing ${filesToProcess.length} files (batch size: ${adaptiveBatchSize})`
|
|
1348
|
+
);
|
|
1349
|
+
}
|
|
624
1350
|
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
1351
|
+
// Step 4: Initialize worker threads (skip if explicitly disabled)
|
|
1352
|
+
const allowSingleThreadFallback =
|
|
1353
|
+
this.config.allowSingleThreadFallback !== false ||
|
|
1354
|
+
this.config.workerThreads === 0 ||
|
|
1355
|
+
isTestEnv();
|
|
1356
|
+
const useWorkers = this.shouldUseWorkers();
|
|
1357
|
+
|
|
1358
|
+
if (useWorkers) {
|
|
1359
|
+
await this.initializeWorkers();
|
|
1360
|
+
if (this.config.verbose && this.workers.length > 0) {
|
|
1361
|
+
console.info(`[Indexer] Multi-threaded mode: ${this.workers.length} workers active`);
|
|
1362
|
+
}
|
|
1363
|
+
} else if (this.config.verbose) {
|
|
1364
|
+
const until = this.workersDisabledUntil - Date.now();
|
|
1365
|
+
if (this.workersDisabledUntil && until > 0) {
|
|
1366
|
+
console.info(
|
|
1367
|
+
`[Indexer] Workers disabled for ${Math.round(until / 1000)}s; single-threaded fallback ${allowSingleThreadFallback ? 'enabled' : 'disabled'}`
|
|
1368
|
+
);
|
|
1369
|
+
} else {
|
|
1370
|
+
console.info(`[Indexer] Single-threaded mode (single-core system)`);
|
|
1371
|
+
}
|
|
1372
|
+
}
|
|
629
1373
|
|
|
630
|
-
|
|
1374
|
+
const resolvedWorkerThreads = useWorkers ? this.workers.length : 0;
|
|
631
1375
|
|
|
632
|
-
|
|
633
|
-
|
|
1376
|
+
let totalChunks = 0;
|
|
1377
|
+
let processedFiles = 0;
|
|
634
1378
|
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
} else {
|
|
639
|
-
console.error(`[Indexer] Single-threaded mode (single-core system)`);
|
|
640
|
-
}
|
|
1379
|
+
console.info(
|
|
1380
|
+
`[Indexer] Embedding pass started: ${filesToProcess.length} files using ${this.config.embeddingModel}`
|
|
1381
|
+
);
|
|
641
1382
|
|
|
642
|
-
|
|
643
|
-
|
|
1383
|
+
// Step 5: Process files in adaptive batches
|
|
1384
|
+
for (let i = 0; i < filesToProcess.length; i += adaptiveBatchSize) {
|
|
1385
|
+
const batch = filesToProcess.slice(i, i + adaptiveBatchSize);
|
|
644
1386
|
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
1387
|
+
const allChunks = [];
|
|
1388
|
+
const fileStats = new Map();
|
|
1389
|
+
const newChunksByFile = new Map();
|
|
1390
|
+
const callDataByFile = new Map();
|
|
1391
|
+
const filesForWorkers = [];
|
|
648
1392
|
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
1393
|
+
// Memory safeguard
|
|
1394
|
+
const mem = process.memoryUsage();
|
|
1395
|
+
if (mem.rss > 2048 * 1024 * 1024) {
|
|
1396
|
+
if (global.gc) global.gc();
|
|
1397
|
+
}
|
|
652
1398
|
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
1399
|
+
const useWorkersForBatch = useWorkers && this.workers.length > 0 && !this.config.embeddingProcessPerBatch;
|
|
1400
|
+
|
|
1401
|
+
for (const item of batch) {
|
|
1402
|
+
const { file, force, content: presetContent, hash: presetHash, expectedHash: presetExpectedHash, size: presetSize, mtimeMs: presetMtimeMs } = item;
|
|
1403
|
+
let content = presetContent;
|
|
1404
|
+
let liveHash = presetHash;
|
|
1405
|
+
let size = presetSize;
|
|
1406
|
+
let mtimeMs = presetMtimeMs;
|
|
1407
|
+
const expectedHash =
|
|
1408
|
+
presetExpectedHash ||
|
|
1409
|
+
(typeof this.cache.getFileHash === 'function' ? this.cache.getFileHash(file) : null);
|
|
1410
|
+
|
|
1411
|
+
if (useWorkersForBatch && (content === undefined || content === null)) {
|
|
1412
|
+
// Speed optimization: Offload reading and hashing to workers.
|
|
1413
|
+
// Main thread skips I/O entirely for this file.
|
|
1414
|
+
filesForWorkers.push({ file, content: null, force, expectedHash });
|
|
1415
|
+
// Initialize stats placeholder (will be updated with worker results)
|
|
1416
|
+
fileStats.set(file, { hash: null, totalChunks: 0, successChunks: 0, size, mtimeMs });
|
|
1417
|
+
continue;
|
|
1418
|
+
}
|
|
656
1419
|
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
1420
|
+
// Read content if not provided (Legacy Path or workers disabled)
|
|
1421
|
+
if (content === undefined || content === null) {
|
|
1422
|
+
let stats = null;
|
|
1423
|
+
try {
|
|
1424
|
+
stats = await fs.stat(file);
|
|
1425
|
+
} catch (err) {
|
|
1426
|
+
if (this.config.verbose) {
|
|
1427
|
+
console.warn(`[Indexer] Failed to stat ${path.basename(file)}: ${err.message}`);
|
|
1428
|
+
}
|
|
1429
|
+
continue;
|
|
1430
|
+
}
|
|
1431
|
+
if (!stats || typeof stats.isDirectory !== 'function') {
|
|
1432
|
+
if (this.config.verbose) {
|
|
1433
|
+
console.warn(`[Indexer] Invalid stat result for ${path.basename(file)}`);
|
|
1434
|
+
}
|
|
1435
|
+
continue;
|
|
1436
|
+
}
|
|
1437
|
+
if (stats.isDirectory()) continue;
|
|
1438
|
+
if (stats.size > this.config.maxFileSize) {
|
|
1439
|
+
if (this.config.verbose) {
|
|
1440
|
+
console.warn(
|
|
1441
|
+
`[Indexer] Skipped ${path.basename(file)} (too large: ${(stats.size / 1024 / 1024).toFixed(2)}MB)`
|
|
1442
|
+
);
|
|
1443
|
+
}
|
|
1444
|
+
continue;
|
|
1445
|
+
}
|
|
1446
|
+
try {
|
|
1447
|
+
content = await fs.readFile(file, 'utf-8');
|
|
1448
|
+
} catch (err) {
|
|
1449
|
+
if (this.config.verbose) {
|
|
1450
|
+
console.warn(`[Indexer] Failed to read ${path.basename(file)}: ${err.message}`);
|
|
1451
|
+
}
|
|
1452
|
+
continue;
|
|
1453
|
+
}
|
|
1454
|
+
liveHash = hashContent(content);
|
|
1455
|
+
size = stats.size;
|
|
1456
|
+
mtimeMs = stats.mtimeMs;
|
|
1457
|
+
} else {
|
|
1458
|
+
if (typeof content !== 'string') content = String(content);
|
|
1459
|
+
if (!liveHash) liveHash = hashContent(content);
|
|
1460
|
+
if (!Number.isFinite(size)) {
|
|
1461
|
+
// Use character length as approximation to avoid blocking Buffer.byteLength on large strings
|
|
1462
|
+
size = content.length;
|
|
1463
|
+
}
|
|
1464
|
+
if (size > this.config.maxFileSize) {
|
|
1465
|
+
if (this.config.verbose) {
|
|
1466
|
+
console.warn(
|
|
1467
|
+
`[Indexer] Skipped ${path.basename(file)} (too large: ${(size / 1024 / 1024).toFixed(2)}MB)`
|
|
1468
|
+
);
|
|
1469
|
+
}
|
|
1470
|
+
continue;
|
|
665
1471
|
}
|
|
666
1472
|
}
|
|
667
|
-
}
|
|
668
1473
|
|
|
669
|
-
|
|
670
|
-
|
|
1474
|
+
const cachedFileHash =
|
|
1475
|
+
typeof this.cache.getFileHash === 'function' ? this.cache.getFileHash(file) : null;
|
|
1476
|
+
if (!force && liveHash && cachedFileHash === liveHash) {
|
|
1477
|
+
if (this.config.verbose) console.info(`[Indexer] Skipped ${path.basename(file)} (unchanged)`);
|
|
1478
|
+
this.cache.setFileHash(file, liveHash, { size, mtimeMs });
|
|
1479
|
+
continue;
|
|
1480
|
+
}
|
|
671
1481
|
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
if (
|
|
681
|
-
|
|
1482
|
+
if (useWorkersForBatch) {
|
|
1483
|
+
filesForWorkers.push({ file, content, force, expectedHash: liveHash });
|
|
1484
|
+
// Initialize stats placeholder (will be updated with worker results)
|
|
1485
|
+
fileStats.set(file, { hash: liveHash, totalChunks: 0, successChunks: 0, size, mtimeMs });
|
|
1486
|
+
continue;
|
|
1487
|
+
}
|
|
1488
|
+
|
|
1489
|
+
// Legacy / Fallback path: Chunk on main thread
|
|
1490
|
+
if (this.config.callGraphEnabled) {
|
|
1491
|
+
try {
|
|
1492
|
+
const callData = extractCallData(content, file);
|
|
1493
|
+
callDataByFile.set(file, callData);
|
|
1494
|
+
} catch (err) {
|
|
1495
|
+
if (this.config.verbose) {
|
|
1496
|
+
console.warn(
|
|
1497
|
+
`[Indexer] Call graph extraction failed for ${path.basename(file)}: ${err.message}`
|
|
1498
|
+
);
|
|
1499
|
+
}
|
|
1500
|
+
}
|
|
1501
|
+
}
|
|
1502
|
+
|
|
1503
|
+
const rawChunks = smartChunk(content, file, this.config);
|
|
1504
|
+
const chunks = Array.isArray(rawChunks) ? rawChunks : [];
|
|
1505
|
+
fileStats.set(file, { hash: liveHash, totalChunks: chunks.length, successChunks: 0, size, mtimeMs });
|
|
1506
|
+
|
|
1507
|
+
for (const chunk of chunks) {
|
|
1508
|
+
allChunks.push({
|
|
1509
|
+
file,
|
|
1510
|
+
text: chunk.text,
|
|
1511
|
+
startLine: chunk.startLine,
|
|
1512
|
+
endLine: chunk.endLine,
|
|
1513
|
+
});
|
|
682
1514
|
}
|
|
683
1515
|
}
|
|
684
|
-
}
|
|
685
1516
|
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
1517
|
+
// Process files with workers (New Path)
|
|
1518
|
+
if (filesForWorkers.length > 0) {
|
|
1519
|
+
const results = await this.processFilesWithWorkers(filesForWorkers);
|
|
1520
|
+
|
|
1521
|
+
for (const res of results) {
|
|
1522
|
+
const stats = fileStats.get(res.file);
|
|
1523
|
+
if (res.status === 'indexed' && stats) {
|
|
1524
|
+
stats.totalChunks = res.results.length;
|
|
1525
|
+
stats.successChunks = res.results.length;
|
|
1526
|
+
if (res.hash) stats.hash = res.hash; // Update with new hash from worker
|
|
1527
|
+
if (res.callData) callDataByFile.set(res.file, res.callData);
|
|
1528
|
+
|
|
1529
|
+
const chunks = res.results.map(r => ({
|
|
1530
|
+
file: res.file,
|
|
1531
|
+
startLine: r.startLine,
|
|
1532
|
+
endLine: r.endLine,
|
|
1533
|
+
content: r.text,
|
|
1534
|
+
vector: toFloat32Array(r.vectorBuffer),
|
|
1535
|
+
}));
|
|
1536
|
+
newChunksByFile.set(res.file, chunks);
|
|
1537
|
+
} else if (res.status === 'unchanged' && stats) {
|
|
1538
|
+
// Worker found file hash matches old hash
|
|
1539
|
+
stats.totalChunks = 0; // Signal skip commit
|
|
1540
|
+
stats.successChunks = 0;
|
|
1541
|
+
stats.hash = res.hash;
|
|
1542
|
+
this.cache.setFileHash(res.file, res.hash, { size: res.size, mtimeMs: res.mtimeMs });
|
|
1543
|
+
if (res.callData && this.config.callGraphEnabled) {
|
|
1544
|
+
this.cache.setFileCallData(res.file, res.callData);
|
|
1545
|
+
}
|
|
1546
|
+
} else if ((res.status === 'retry' || res.status === 'error') && stats) {
|
|
1547
|
+
// Worker failed, fallback to local chunking + single threaded
|
|
1548
|
+
const original = filesForWorkers.find(f => f.file === res.file);
|
|
1549
|
+
if (original) {
|
|
1550
|
+
if (this.config.verbose) console.info(`[Indexer] Fallback for ${path.basename(res.file)}`);
|
|
1551
|
+
|
|
1552
|
+
if (this.config.callGraphEnabled) {
|
|
1553
|
+
try {
|
|
1554
|
+
callDataByFile.set(res.file, extractCallData(original.content, res.file));
|
|
1555
|
+
} catch (err) {
|
|
1556
|
+
if (this.config.verbose) {
|
|
1557
|
+
console.warn(
|
|
1558
|
+
`[Indexer] Call graph extraction failed for ${path.basename(res.file)}: ${err.message}`
|
|
1559
|
+
);
|
|
1560
|
+
}
|
|
1561
|
+
}
|
|
1562
|
+
}
|
|
1563
|
+
const fallbackChunks = smartChunk(original.content, res.file, this.config);
|
|
1564
|
+
const chunks = Array.isArray(fallbackChunks) ? fallbackChunks : [];
|
|
1565
|
+
stats.totalChunks = chunks.length;
|
|
1566
|
+
for (const chunk of chunks) {
|
|
1567
|
+
allChunks.push({
|
|
1568
|
+
file: res.file,
|
|
1569
|
+
text: chunk.text,
|
|
1570
|
+
startLine: chunk.startLine,
|
|
1571
|
+
endLine: chunk.endLine,
|
|
1572
|
+
});
|
|
1573
|
+
}
|
|
1574
|
+
}
|
|
1575
|
+
}
|
|
1576
|
+
}
|
|
1577
|
+
}
|
|
693
1578
|
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
this.
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
}
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
stats.
|
|
1579
|
+
// Process chunks (Legacy Path & Fallbacks)
|
|
1580
|
+
if (allChunks.length > 0) {
|
|
1581
|
+
const chunksToProcess = allChunks.slice();
|
|
1582
|
+
let results = [];
|
|
1583
|
+
if (this.config.embeddingProcessPerBatch) {
|
|
1584
|
+
results = await this.processChunksInChildProcess(chunksToProcess);
|
|
1585
|
+
} else {
|
|
1586
|
+
// If we are here, either workers are disabled/full or these are retry chunks
|
|
1587
|
+
// Use single threaded fallback if not using child process
|
|
1588
|
+
results = await this.processChunksSingleThreaded(chunksToProcess);
|
|
1589
|
+
}
|
|
1590
|
+
|
|
1591
|
+
for (const result of results) {
|
|
1592
|
+
const stats = fileStats.get(result.file);
|
|
1593
|
+
if (result.success && stats) {
|
|
1594
|
+
const items = newChunksByFile.get(result.file) || [];
|
|
1595
|
+
items.push({
|
|
1596
|
+
file: result.file,
|
|
1597
|
+
startLine: result.startLine,
|
|
1598
|
+
endLine: result.endLine,
|
|
1599
|
+
content: result.content,
|
|
1600
|
+
vector: toFloat32Array(result.vector),
|
|
1601
|
+
});
|
|
1602
|
+
newChunksByFile.set(result.file, items);
|
|
1603
|
+
stats.successChunks++;
|
|
1604
|
+
}
|
|
1605
|
+
}
|
|
1606
|
+
}
|
|
1607
|
+
|
|
1608
|
+
// Commit changes to cache
|
|
1609
|
+
for (const [file, stats] of fileStats) {
|
|
1610
|
+
if (stats.totalChunks > 0 && stats.successChunks === stats.totalChunks) {
|
|
1611
|
+
this.cache.removeFileFromStore(file);
|
|
1612
|
+
const newChunks = newChunksByFile.get(file) || [];
|
|
1613
|
+
for (const chunk of newChunks) {
|
|
1614
|
+
this.cache.addToStore(chunk);
|
|
1615
|
+
totalChunks++;
|
|
1616
|
+
}
|
|
1617
|
+
this.cache.setFileHash(file, stats.hash, { size: stats.size, mtimeMs: stats.mtimeMs });
|
|
1618
|
+
const callData = callDataByFile.get(file);
|
|
1619
|
+
if (callData && this.config.callGraphEnabled) {
|
|
1620
|
+
this.cache.setFileCallData(file, callData);
|
|
1621
|
+
}
|
|
1622
|
+
} else if (stats.totalChunks === 0) {
|
|
1623
|
+
// File had no chunks (empty or comments only), just mark as indexed
|
|
1624
|
+
this.cache.setFileHash(file, stats.hash, { size: stats.size, mtimeMs: stats.mtimeMs });
|
|
1625
|
+
const callData = callDataByFile.get(file);
|
|
1626
|
+
if (callData && this.config.callGraphEnabled) {
|
|
1627
|
+
this.cache.setFileCallData(file, callData);
|
|
1628
|
+
}
|
|
1629
|
+
} else if (this.config.verbose) {
|
|
1630
|
+
console.warn(
|
|
1631
|
+
`[Indexer] Skipped hash update for ${path.basename(file)} (${stats.successChunks}/${stats.totalChunks} chunks embedded)`
|
|
1632
|
+
);
|
|
708
1633
|
}
|
|
709
1634
|
}
|
|
1635
|
+
|
|
1636
|
+
if (global.gc) global.gc();
|
|
1637
|
+
|
|
1638
|
+
processedFiles += batch.length;
|
|
1639
|
+
|
|
1640
|
+
// Progress indicator
|
|
1641
|
+
if (
|
|
1642
|
+
processedFiles % (adaptiveBatchSize * 2) === 0 ||
|
|
1643
|
+
processedFiles === filesToProcess.length
|
|
1644
|
+
) {
|
|
1645
|
+
const elapsed = ((Date.now() - totalStartTime) / 1000).toFixed(1);
|
|
1646
|
+
const rate = (processedFiles / parseFloat(elapsed)).toFixed(1);
|
|
1647
|
+
console.info(
|
|
1648
|
+
`[Indexer] Progress: ${processedFiles}/${filesToProcess.length} files (${rate} files/sec, ${elapsed}s elapsed)`
|
|
1649
|
+
);
|
|
1650
|
+
const progressPercent = Math.floor(10 + (processedFiles / filesToProcess.length) * 85);
|
|
1651
|
+
this.sendProgress(
|
|
1652
|
+
progressPercent,
|
|
1653
|
+
100,
|
|
1654
|
+
`Indexed ${processedFiles}/${filesToProcess.length} files (${rate}/sec)`
|
|
1655
|
+
);
|
|
1656
|
+
}
|
|
1657
|
+
|
|
1658
|
+
// Batch-level memory cleanup to reduce peak usage
|
|
1659
|
+
allChunks.length = 0;
|
|
1660
|
+
filesForWorkers.length = 0;
|
|
1661
|
+
fileStats.clear();
|
|
1662
|
+
newChunksByFile.clear();
|
|
1663
|
+
callDataByFile.clear();
|
|
1664
|
+
await delay(0);
|
|
1665
|
+
}
|
|
1666
|
+
|
|
1667
|
+
// Cleanup workers
|
|
1668
|
+
if (this.workers.length > 0) {
|
|
1669
|
+
await this.terminateWorkers();
|
|
710
1670
|
}
|
|
1671
|
+
if (global.gc) global.gc();
|
|
1672
|
+
|
|
1673
|
+
const totalDurationMs = Date.now() - totalStartTime;
|
|
1674
|
+
const totalTime = (totalDurationMs / 1000).toFixed(1);
|
|
1675
|
+
console.info(
|
|
1676
|
+
`[Indexer] Embedding pass complete: ${totalChunks} chunks from ${filesToProcess.length} files in ${totalTime}s`
|
|
1677
|
+
);
|
|
1678
|
+
|
|
1679
|
+
// Send completion progress
|
|
1680
|
+
this.sendProgress(
|
|
1681
|
+
100,
|
|
1682
|
+
100,
|
|
1683
|
+
`Complete: ${totalChunks} chunks from ${filesToProcess.length} files in ${totalTime}s`
|
|
1684
|
+
);
|
|
1685
|
+
|
|
1686
|
+
this.cache.setLastIndexDuration?.(totalDurationMs);
|
|
1687
|
+
this.cache.setLastIndexStats?.({
|
|
1688
|
+
lastIndexStartedAt: indexStartedAt,
|
|
1689
|
+
lastIndexEndedAt: new Date().toISOString(),
|
|
1690
|
+
lastDiscoveredFiles: files.length,
|
|
1691
|
+
lastFilesProcessed: filesToProcess.length,
|
|
1692
|
+
lastIndexMode: indexMode,
|
|
1693
|
+
lastBatchSize: adaptiveBatchSize,
|
|
1694
|
+
lastWorkerThreads: resolvedWorkerThreads,
|
|
1695
|
+
lastEmbeddingProcessPerBatch: this.config.embeddingProcessPerBatch,
|
|
1696
|
+
});
|
|
1697
|
+
await this.cache.save();
|
|
711
1698
|
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
1699
|
+
if (this.config.clearCacheAfterIndex) {
|
|
1700
|
+
console.info(
|
|
1701
|
+
'[Indexer] clearCacheAfterIndex enabled; in-memory vectors will be reloaded on next query'
|
|
1702
|
+
);
|
|
1703
|
+
await this.cache.dropInMemoryVectors();
|
|
1704
|
+
if (this.config.verbose) {
|
|
1705
|
+
console.info('[Cache] Cleared in-memory vectors after indexing');
|
|
718
1706
|
}
|
|
719
1707
|
}
|
|
720
1708
|
|
|
721
|
-
|
|
1709
|
+
// Rebuild call graph in background
|
|
1710
|
+
if (this.config.callGraphEnabled) {
|
|
1711
|
+
this.cache.rebuildCallGraph();
|
|
1712
|
+
}
|
|
722
1713
|
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
1714
|
+
if (!this.config.clearCacheAfterIndex) {
|
|
1715
|
+
void this.cache.ensureAnnIndex().catch((error) => {
|
|
1716
|
+
if (this.config.verbose) {
|
|
1717
|
+
console.warn(`[ANN] Background ANN build failed: ${error.message}`);
|
|
1718
|
+
}
|
|
1719
|
+
});
|
|
1720
|
+
}
|
|
728
1721
|
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
1722
|
+
const vectorStore = this.cache.getVectorStore();
|
|
1723
|
+
return {
|
|
1724
|
+
skipped: false,
|
|
1725
|
+
filesProcessed: filesToProcess.length,
|
|
1726
|
+
chunksCreated: totalChunks,
|
|
1727
|
+
totalFiles: new Set(vectorStore.map((v) => v.file)).size,
|
|
1728
|
+
totalChunks: vectorStore.length,
|
|
1729
|
+
duration: totalTime,
|
|
1730
|
+
message: `Indexed ${filesToProcess.length} files (${totalChunks} chunks) in ${totalTime}s`,
|
|
1731
|
+
};
|
|
1732
|
+
} finally {
|
|
1733
|
+
if (memoryTimer) {
|
|
1734
|
+
clearInterval(memoryTimer);
|
|
732
1735
|
}
|
|
1736
|
+
logMemory('end');
|
|
1737
|
+
this.isIndexing = false;
|
|
1738
|
+
try {
|
|
1739
|
+
await this.processPendingWatchEvents();
|
|
1740
|
+
} catch (error) {
|
|
1741
|
+
console.warn(`[Indexer] Failed to apply queued file updates: ${error.message}`);
|
|
733
1742
|
}
|
|
1743
|
+
}
|
|
1744
|
+
}
|
|
734
1745
|
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
1746
|
+
enqueueWatchEvent(type, filePath) {
|
|
1747
|
+
// If it's a delete, it always wins
|
|
1748
|
+
if (type === 'unlink') {
|
|
1749
|
+
this.pendingWatchEvents.set(filePath, 'unlink');
|
|
1750
|
+
return;
|
|
738
1751
|
}
|
|
739
1752
|
|
|
740
|
-
|
|
741
|
-
|
|
1753
|
+
// If we're adding/changing, it overwrites a potential unlink (file came back)
|
|
1754
|
+
this.pendingWatchEvents.set(filePath, type);
|
|
1755
|
+
}
|
|
742
1756
|
|
|
743
|
-
|
|
744
|
-
this.
|
|
1757
|
+
async processPendingWatchEvents() {
|
|
1758
|
+
if (this.processingWatchEvents || this.pendingWatchEvents.size === 0) {
|
|
1759
|
+
return;
|
|
1760
|
+
}
|
|
745
1761
|
|
|
746
|
-
|
|
1762
|
+
this.processingWatchEvents = true;
|
|
1763
|
+
try {
|
|
1764
|
+
while (this.pendingWatchEvents.size > 0) {
|
|
1765
|
+
const pending = Array.from(this.pendingWatchEvents.entries());
|
|
1766
|
+
this.pendingWatchEvents.clear();
|
|
747
1767
|
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
1768
|
+
for (const [filePath, type] of pending) {
|
|
1769
|
+
if (this.server && this.server.hybridSearch) {
|
|
1770
|
+
this.server.hybridSearch.clearFileModTime(filePath);
|
|
1771
|
+
}
|
|
752
1772
|
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
1773
|
+
if (type === 'unlink') {
|
|
1774
|
+
await this.cache.removeFileFromStore(filePath);
|
|
1775
|
+
this.cache.deleteFileHash(filePath);
|
|
1776
|
+
} else {
|
|
1777
|
+
await this.indexFile(filePath);
|
|
1778
|
+
}
|
|
1779
|
+
}
|
|
758
1780
|
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
skipped: false,
|
|
762
|
-
filesProcessed: filesToProcess.length,
|
|
763
|
-
chunksCreated: totalChunks,
|
|
764
|
-
totalFiles: new Set(vectorStore.map(v => v.file)).size,
|
|
765
|
-
totalChunks: vectorStore.length,
|
|
766
|
-
duration: totalTime,
|
|
767
|
-
message: `Indexed ${filesToProcess.length} files (${totalChunks} chunks) in ${totalTime}s`
|
|
768
|
-
};
|
|
1781
|
+
await this.cache.save();
|
|
1782
|
+
}
|
|
769
1783
|
} finally {
|
|
770
|
-
this.
|
|
1784
|
+
this.processingWatchEvents = false;
|
|
771
1785
|
}
|
|
772
1786
|
}
|
|
773
1787
|
|
|
774
|
-
setupFileWatcher() {
|
|
1788
|
+
async setupFileWatcher() {
|
|
775
1789
|
if (!this.config.watchFiles) return;
|
|
776
1790
|
|
|
777
|
-
|
|
1791
|
+
// Close existing watcher if active to prevent leaks
|
|
1792
|
+
if (this.watcher) {
|
|
1793
|
+
await this.watcher.close();
|
|
1794
|
+
this.watcher = null;
|
|
1795
|
+
}
|
|
1796
|
+
|
|
1797
|
+
await this.loadGitignore();
|
|
1798
|
+
|
|
1799
|
+
const pattern = [
|
|
1800
|
+
...this.config.fileExtensions.map((ext) => `**/*.${ext}`),
|
|
1801
|
+
...(this.config.fileNames || []).map((name) => `**/${name}`),
|
|
1802
|
+
];
|
|
1803
|
+
|
|
1804
|
+
const ignored = (filePath) => {
|
|
1805
|
+
const fullPath = path.isAbsolute(filePath)
|
|
1806
|
+
? filePath
|
|
1807
|
+
: path.join(this.config.searchDirectory, filePath);
|
|
1808
|
+
return this.isExcluded(fullPath);
|
|
1809
|
+
};
|
|
778
1810
|
|
|
779
1811
|
this.watcher = chokidar.watch(pattern, {
|
|
780
1812
|
cwd: this.config.searchDirectory,
|
|
781
|
-
ignored
|
|
1813
|
+
ignored,
|
|
782
1814
|
persistent: true,
|
|
783
|
-
ignoreInitial: true
|
|
1815
|
+
ignoreInitial: true,
|
|
784
1816
|
});
|
|
785
1817
|
|
|
786
1818
|
this.watcher
|
|
787
|
-
.on(
|
|
1819
|
+
.on('add', async (filePath) => {
|
|
788
1820
|
const fullPath = path.join(this.config.searchDirectory, filePath);
|
|
789
|
-
console.
|
|
1821
|
+
console.info(`[Indexer] New file detected: ${filePath}`);
|
|
1822
|
+
|
|
1823
|
+
if (this.isIndexing || this.processingWatchEvents) {
|
|
1824
|
+
if (this.config.verbose) {
|
|
1825
|
+
console.info(`[Indexer] Queued add event during indexing: ${filePath}`);
|
|
1826
|
+
}
|
|
1827
|
+
this.enqueueWatchEvent('add', fullPath);
|
|
1828
|
+
return;
|
|
1829
|
+
}
|
|
790
1830
|
|
|
791
1831
|
// Invalidate recency cache
|
|
792
1832
|
if (this.server && this.server.hybridSearch) {
|
|
@@ -796,9 +1836,17 @@ export class CodebaseIndexer {
|
|
|
796
1836
|
await this.indexFile(fullPath);
|
|
797
1837
|
await this.cache.save();
|
|
798
1838
|
})
|
|
799
|
-
.on(
|
|
1839
|
+
.on('change', async (filePath) => {
|
|
800
1840
|
const fullPath = path.join(this.config.searchDirectory, filePath);
|
|
801
|
-
console.
|
|
1841
|
+
console.info(`[Indexer] File changed: ${filePath}`);
|
|
1842
|
+
|
|
1843
|
+
if (this.isIndexing || this.processingWatchEvents) {
|
|
1844
|
+
if (this.config.verbose) {
|
|
1845
|
+
console.info(`[Indexer] Queued change event during indexing: ${filePath}`);
|
|
1846
|
+
}
|
|
1847
|
+
this.enqueueWatchEvent('change', fullPath);
|
|
1848
|
+
return;
|
|
1849
|
+
}
|
|
802
1850
|
|
|
803
1851
|
// Invalidate recency cache
|
|
804
1852
|
if (this.server && this.server.hybridSearch) {
|
|
@@ -808,46 +1856,55 @@ export class CodebaseIndexer {
|
|
|
808
1856
|
await this.indexFile(fullPath);
|
|
809
1857
|
await this.cache.save();
|
|
810
1858
|
})
|
|
811
|
-
.on(
|
|
1859
|
+
.on('unlink', async (filePath) => {
|
|
812
1860
|
const fullPath = path.join(this.config.searchDirectory, filePath);
|
|
813
|
-
console.
|
|
1861
|
+
console.info(`[Indexer] File deleted: ${filePath}`);
|
|
1862
|
+
|
|
1863
|
+
if (this.isIndexing || this.processingWatchEvents) {
|
|
1864
|
+
if (this.config.verbose) {
|
|
1865
|
+
console.info(`[Indexer] Queued delete event during indexing: ${filePath}`);
|
|
1866
|
+
}
|
|
1867
|
+
this.enqueueWatchEvent('unlink', fullPath);
|
|
1868
|
+
return;
|
|
1869
|
+
}
|
|
814
1870
|
|
|
815
1871
|
// Invalidate recency cache
|
|
816
1872
|
if (this.server && this.server.hybridSearch) {
|
|
817
1873
|
this.server.hybridSearch.clearFileModTime(fullPath);
|
|
818
1874
|
}
|
|
819
1875
|
|
|
820
|
-
this.cache.removeFileFromStore(fullPath);
|
|
1876
|
+
await this.cache.removeFileFromStore(fullPath);
|
|
821
1877
|
this.cache.deleteFileHash(fullPath);
|
|
822
|
-
this.cache.save();
|
|
1878
|
+
await this.cache.save();
|
|
823
1879
|
});
|
|
824
1880
|
|
|
825
|
-
console.
|
|
1881
|
+
console.info('[Indexer] File watcher enabled for incremental indexing');
|
|
826
1882
|
}
|
|
827
1883
|
}
|
|
828
1884
|
|
|
829
1885
|
// MCP Tool definition for this feature
|
|
830
1886
|
export function getToolDefinition() {
|
|
831
1887
|
return {
|
|
832
|
-
name:
|
|
833
|
-
description:
|
|
1888
|
+
name: 'b_index_codebase',
|
|
1889
|
+
description:
|
|
1890
|
+
'Manually trigger a full reindex of the codebase. This will scan all files and update the embeddings cache. Useful after large code changes or if the index seems out of date.',
|
|
834
1891
|
inputSchema: {
|
|
835
|
-
type:
|
|
1892
|
+
type: 'object',
|
|
836
1893
|
properties: {
|
|
837
1894
|
force: {
|
|
838
|
-
type:
|
|
1895
|
+
type: 'boolean',
|
|
839
1896
|
description: "Force reindex even if files haven't changed",
|
|
840
|
-
default: false
|
|
841
|
-
}
|
|
842
|
-
}
|
|
1897
|
+
default: false,
|
|
1898
|
+
},
|
|
1899
|
+
},
|
|
843
1900
|
},
|
|
844
1901
|
annotations: {
|
|
845
|
-
title:
|
|
1902
|
+
title: 'Reindex Codebase',
|
|
846
1903
|
readOnlyHint: false,
|
|
847
1904
|
destructiveHint: false,
|
|
848
1905
|
idempotentHint: true,
|
|
849
|
-
openWorldHint: false
|
|
850
|
-
}
|
|
1906
|
+
openWorldHint: false,
|
|
1907
|
+
},
|
|
851
1908
|
};
|
|
852
1909
|
}
|
|
853
1910
|
|
|
@@ -859,10 +1916,12 @@ export async function handleToolCall(request, indexer) {
|
|
|
859
1916
|
// Handle case when indexing was skipped due to concurrent request
|
|
860
1917
|
if (result?.skipped) {
|
|
861
1918
|
return {
|
|
862
|
-
content: [
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
1919
|
+
content: [
|
|
1920
|
+
{
|
|
1921
|
+
type: 'text',
|
|
1922
|
+
text: `Indexing skipped: ${result.reason}\n\nPlease wait for the current indexing operation to complete before requesting another reindex.`,
|
|
1923
|
+
},
|
|
1924
|
+
],
|
|
866
1925
|
};
|
|
867
1926
|
}
|
|
868
1927
|
|
|
@@ -870,9 +1929,9 @@ export async function handleToolCall(request, indexer) {
|
|
|
870
1929
|
const vectorStore = indexer.cache.getVectorStore();
|
|
871
1930
|
const stats = {
|
|
872
1931
|
totalChunks: result?.totalChunks ?? vectorStore.length,
|
|
873
|
-
totalFiles: result?.totalFiles ?? new Set(vectorStore.map(v => v.file)).size,
|
|
1932
|
+
totalFiles: result?.totalFiles ?? new Set(vectorStore.map((v) => v.file)).size,
|
|
874
1933
|
filesProcessed: result?.filesProcessed ?? 0,
|
|
875
|
-
chunksCreated: result?.chunksCreated ?? 0
|
|
1934
|
+
chunksCreated: result?.chunksCreated ?? 0,
|
|
876
1935
|
};
|
|
877
1936
|
|
|
878
1937
|
let message = result?.message
|
|
@@ -886,9 +1945,11 @@ export async function handleToolCall(request, indexer) {
|
|
|
886
1945
|
}
|
|
887
1946
|
|
|
888
1947
|
return {
|
|
889
|
-
content: [
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
|
|
1948
|
+
content: [
|
|
1949
|
+
{
|
|
1950
|
+
type: 'text',
|
|
1951
|
+
text: message,
|
|
1952
|
+
},
|
|
1953
|
+
],
|
|
893
1954
|
};
|
|
894
1955
|
}
|