ruvector 0.2.28 → 0.2.29
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -21
- package/README.md +2270 -2270
- package/bin/cli.js +9570 -9479
- package/bin/mcp-server.js +3854 -3854
- package/dist/core/intelligence-engine.d.ts +13 -0
- package/dist/core/intelligence-engine.d.ts.map +1 -1
- package/dist/core/intelligence-engine.js +38 -0
- package/dist/core/onnx/bundled-parallel.mjs +164 -164
- package/dist/core/onnx/embed-worker.mjs +67 -67
- package/dist/core/onnx/loader.js +434 -434
- package/dist/core/onnx/package.json +3 -3
- package/dist/core/onnx/pkg/LICENSE +21 -21
- package/dist/core/onnx/pkg/loader.js +348 -348
- package/dist/core/onnx/pkg/package.json +3 -3
- package/dist/core/onnx/pkg/ruvector_onnx_embeddings_wasm.d.ts +112 -112
- package/dist/core/onnx/pkg/ruvector_onnx_embeddings_wasm.js +5 -5
- package/dist/core/onnx/pkg/ruvector_onnx_embeddings_wasm_bg.js +638 -638
- package/dist/core/onnx/pkg/ruvector_onnx_embeddings_wasm_bg.wasm.d.ts +29 -29
- package/dist/core/parallel-workers.js +439 -439
- package/dist/workers/benchmark.js +15 -15
- package/package.json +122 -122
- package/src/decompiler/api-prober.js +302 -302
- package/src/decompiler/index.js +463 -463
- package/src/decompiler/metrics.js +86 -86
- package/src/decompiler/model-decompiler.js +423 -423
- package/src/decompiler/module-splitter.js +498 -498
- package/src/decompiler/module-tree.js +142 -142
- package/src/decompiler/name-predictor.js +400 -400
- package/src/decompiler/npm-fetch.js +176 -176
- package/src/decompiler/reconstructor.js +499 -499
- package/src/decompiler/reference-tracker.js +285 -285
- package/src/decompiler/statement-parser.js +285 -285
- package/src/decompiler/style-improver.js +438 -438
- package/src/decompiler/subcategories.js +339 -339
- package/src/decompiler/validator.js +379 -379
- package/src/decompiler/witness.js +140 -140
- package/wasm/package.json +26 -26
- package/wasm/ruvector_decompiler_wasm.d.ts +27 -27
- package/wasm/ruvector_decompiler_wasm.js +220 -220
- package/wasm/ruvector_decompiler_wasm_bg.wasm.d.ts +16 -16
- package/dist/core/onnx/pkg/ruvector.db +0 -0
|
@@ -105,9 +105,9 @@ class ExtendedWorkerPool {
|
|
|
105
105
|
const workerBlob = new Blob([workerCode], { type: 'application/javascript' });
|
|
106
106
|
for (let i = 0; i < this.config.numWorkers; i++) {
|
|
107
107
|
// Create worker from inline code
|
|
108
|
-
const worker = new worker_threads_1.Worker(`
|
|
109
|
-
const { parentPort, workerData } = require('worker_threads');
|
|
110
|
-
${this.getWorkerHandlers()}
|
|
108
|
+
const worker = new worker_threads_1.Worker(`
|
|
109
|
+
const { parentPort, workerData } = require('worker_threads');
|
|
110
|
+
${this.getWorkerHandlers()}
|
|
111
111
|
`, { eval: true, workerData: { workerId: i } });
|
|
112
112
|
worker.on('message', (result) => {
|
|
113
113
|
this.handleWorkerResult(worker, result);
|
|
@@ -122,445 +122,445 @@ class ExtendedWorkerPool {
|
|
|
122
122
|
this.initialized = true;
|
|
123
123
|
}
|
|
124
124
|
getWorkerCode() {
|
|
125
|
-
return `
|
|
126
|
-
const { parentPort, workerData } = require('worker_threads');
|
|
127
|
-
${this.getWorkerHandlers()}
|
|
125
|
+
return `
|
|
126
|
+
const { parentPort, workerData } = require('worker_threads');
|
|
127
|
+
${this.getWorkerHandlers()}
|
|
128
128
|
`;
|
|
129
129
|
}
|
|
130
130
|
getWorkerHandlers() {
|
|
131
|
-
return `
|
|
132
|
-
parentPort.on('message', async (task) => {
|
|
133
|
-
try {
|
|
134
|
-
let result;
|
|
135
|
-
switch (task.type) {
|
|
136
|
-
case 'speculative-embed':
|
|
137
|
-
result = await speculativeEmbed(task.files, task.coEditGraph);
|
|
138
|
-
break;
|
|
139
|
-
case 'ast-analyze':
|
|
140
|
-
result = await astAnalyze(task.files);
|
|
141
|
-
break;
|
|
142
|
-
case 'security-scan':
|
|
143
|
-
result = await securityScan(task.files, task.rules);
|
|
144
|
-
break;
|
|
145
|
-
case 'rag-retrieve':
|
|
146
|
-
result = await ragRetrieve(task.query, task.chunks, task.topK);
|
|
147
|
-
break;
|
|
148
|
-
case 'context-rank':
|
|
149
|
-
result = await contextRank(task.context, task.query);
|
|
150
|
-
break;
|
|
151
|
-
case 'git-blame':
|
|
152
|
-
result = await gitBlame(task.files);
|
|
153
|
-
break;
|
|
154
|
-
case 'git-churn':
|
|
155
|
-
result = await gitChurn(task.files, task.since);
|
|
156
|
-
break;
|
|
157
|
-
case 'complexity-analyze':
|
|
158
|
-
result = await complexityAnalyze(task.files);
|
|
159
|
-
break;
|
|
160
|
-
case 'dependency-graph':
|
|
161
|
-
result = await dependencyGraph(task.entryPoints);
|
|
162
|
-
break;
|
|
163
|
-
case 'deduplicate':
|
|
164
|
-
result = await deduplicate(task.items, task.threshold);
|
|
165
|
-
break;
|
|
166
|
-
default:
|
|
167
|
-
throw new Error('Unknown task type: ' + task.type);
|
|
168
|
-
}
|
|
169
|
-
parentPort.postMessage({ success: true, data: result, taskId: task.taskId });
|
|
170
|
-
} catch (error) {
|
|
171
|
-
parentPort.postMessage({ success: false, error: error.message, taskId: task.taskId });
|
|
172
|
-
}
|
|
173
|
-
});
|
|
174
|
-
|
|
175
|
-
// Worker implementations
|
|
176
|
-
|
|
177
|
-
// Hash-based embedding: deterministic, no external deps, 128-dim
|
|
178
|
-
function hashEmbed(text, dim = 128) {
|
|
179
|
-
const embedding = new Float64Array(dim);
|
|
180
|
-
const tokens = text.split(/\\s+|[{}()\\[\\];,.<>=/+\\-*&|!~^%@#]/);
|
|
181
|
-
|
|
182
|
-
for (let t = 0; t < tokens.length; t++) {
|
|
183
|
-
const token = tokens[t];
|
|
184
|
-
if (!token) continue;
|
|
185
|
-
|
|
186
|
-
// FNV-1a hash
|
|
187
|
-
let h = 0x811c9dc5;
|
|
188
|
-
for (let i = 0; i < token.length; i++) {
|
|
189
|
-
h ^= token.charCodeAt(i);
|
|
190
|
-
h = Math.imul(h, 0x01000193);
|
|
191
|
-
}
|
|
192
|
-
|
|
193
|
-
// Positional weight (tokens near start matter more)
|
|
194
|
-
const posWeight = 1.0 / (1.0 + Math.log1p(t));
|
|
195
|
-
|
|
196
|
-
// Distribute across multiple dimensions using hash rotations
|
|
197
|
-
for (let d = 0; d < 4; d++) {
|
|
198
|
-
const idx = ((h >>> 0) + d * 37) % dim;
|
|
199
|
-
const sign = (h & (1 << d)) ? 1 : -1;
|
|
200
|
-
embedding[idx] += sign * posWeight;
|
|
201
|
-
h = (h >>> 7) | (h << 25); // rotate
|
|
202
|
-
}
|
|
203
|
-
}
|
|
204
|
-
|
|
205
|
-
// L2 normalize
|
|
206
|
-
let norm = 0;
|
|
207
|
-
for (let i = 0; i < dim; i++) norm += embedding[i] * embedding[i];
|
|
208
|
-
norm = Math.sqrt(norm) || 1;
|
|
209
|
-
const result = new Array(dim);
|
|
210
|
-
for (let i = 0; i < dim; i++) result[i] = embedding[i] / norm;
|
|
211
|
-
return result;
|
|
212
|
-
}
|
|
213
|
-
|
|
214
|
-
async function speculativeEmbed(files, coEditGraph) {
|
|
215
|
-
const fs = require('fs');
|
|
216
|
-
return files.map(file => {
|
|
217
|
-
try {
|
|
218
|
-
if (!fs.existsSync(file)) {
|
|
219
|
-
return { file, embedding: hashEmbed(file), confidence: 0.2, timestamp: Date.now() };
|
|
220
|
-
}
|
|
221
|
-
const content = fs.readFileSync(file, 'utf8');
|
|
222
|
-
const embedding = hashEmbed(content);
|
|
223
|
-
|
|
224
|
-
// Confidence based on file size (more content = higher confidence)
|
|
225
|
-
const lines = content.split('\\n').length;
|
|
226
|
-
const confidence = Math.min(0.95, 0.3 + (lines / 500) * 0.65);
|
|
227
|
-
|
|
228
|
-
return { file, embedding, confidence, timestamp: Date.now() };
|
|
229
|
-
} catch {
|
|
230
|
-
return { file, embedding: hashEmbed(file), confidence: 0.1, timestamp: Date.now() };
|
|
231
|
-
}
|
|
232
|
-
});
|
|
233
|
-
}
|
|
234
|
-
|
|
235
|
-
async function astAnalyze(files) {
|
|
236
|
-
const fs = require('fs');
|
|
237
|
-
return files.map(file => {
|
|
238
|
-
try {
|
|
239
|
-
const content = fs.existsSync(file) ? fs.readFileSync(file, 'utf8') : '';
|
|
240
|
-
const lines = content.split('\\n');
|
|
241
|
-
return {
|
|
242
|
-
file,
|
|
243
|
-
language: file.split('.').pop() || 'unknown',
|
|
244
|
-
complexity: Math.min(lines.length / 10, 100),
|
|
245
|
-
functions: extractFunctions(content),
|
|
246
|
-
imports: extractImports(content),
|
|
247
|
-
exports: extractExports(content),
|
|
248
|
-
dependencies: [],
|
|
249
|
-
};
|
|
250
|
-
} catch {
|
|
251
|
-
return { file, language: 'unknown', complexity: 0, functions: [], imports: [], exports: [], dependencies: [] };
|
|
252
|
-
}
|
|
253
|
-
});
|
|
254
|
-
}
|
|
255
|
-
|
|
256
|
-
function extractFunctions(content) {
|
|
257
|
-
const patterns = [
|
|
258
|
-
/function\\s+(\\w+)/g,
|
|
259
|
-
/const\\s+(\\w+)\\s*=\\s*(?:async\\s*)?\\([^)]*\\)\\s*=>/g,
|
|
260
|
-
/(?:async\\s+)?(?:public|private|protected)?\\s*(\\w+)\\s*\\([^)]*\\)\\s*{/g,
|
|
261
|
-
];
|
|
262
|
-
const funcs = new Set();
|
|
263
|
-
for (const pattern of patterns) {
|
|
264
|
-
let match;
|
|
265
|
-
while ((match = pattern.exec(content)) !== null) {
|
|
266
|
-
if (match[1] && !['if', 'for', 'while', 'switch', 'catch'].includes(match[1])) {
|
|
267
|
-
funcs.add(match[1]);
|
|
268
|
-
}
|
|
269
|
-
}
|
|
270
|
-
}
|
|
271
|
-
return Array.from(funcs);
|
|
272
|
-
}
|
|
273
|
-
|
|
274
|
-
function extractImports(content) {
|
|
275
|
-
const imports = [];
|
|
276
|
-
const patterns = [
|
|
277
|
-
/import\\s+.*?from\\s+['"]([^'"]+)['"]/g,
|
|
278
|
-
/require\\s*\\(['"]([^'"]+)['"]\\)/g,
|
|
279
|
-
];
|
|
280
|
-
for (const pattern of patterns) {
|
|
281
|
-
let match;
|
|
282
|
-
while ((match = pattern.exec(content)) !== null) {
|
|
283
|
-
imports.push(match[1]);
|
|
284
|
-
}
|
|
285
|
-
}
|
|
286
|
-
return imports;
|
|
287
|
-
}
|
|
288
|
-
|
|
289
|
-
function extractExports(content) {
|
|
290
|
-
const exports = [];
|
|
291
|
-
const patterns = [
|
|
292
|
-
/export\\s+(?:default\\s+)?(?:class|function|const|let|var)\\s+(\\w+)/g,
|
|
293
|
-
/module\\.exports\\s*=\\s*(\\w+)/g,
|
|
294
|
-
];
|
|
295
|
-
for (const pattern of patterns) {
|
|
296
|
-
let match;
|
|
297
|
-
while ((match = pattern.exec(content)) !== null) {
|
|
298
|
-
exports.push(match[1]);
|
|
299
|
-
}
|
|
300
|
-
}
|
|
301
|
-
return exports;
|
|
302
|
-
}
|
|
303
|
-
|
|
304
|
-
async function securityScan(files, rules) {
|
|
305
|
-
const fs = require('fs');
|
|
306
|
-
const findings = [];
|
|
307
|
-
const securityPatterns = [
|
|
308
|
-
{ pattern: /eval\\s*\\(/g, rule: 'no-eval', severity: 'high', message: 'Avoid eval()' },
|
|
309
|
-
{ pattern: /innerHTML\\s*=/g, rule: 'no-inner-html', severity: 'medium', message: 'Avoid innerHTML, use textContent' },
|
|
310
|
-
{ pattern: /password\\s*=\\s*['"][^'"]+['"]/gi, rule: 'no-hardcoded-secrets', severity: 'critical', message: 'Hardcoded password detected' },
|
|
311
|
-
{ pattern: /api[_-]?key\\s*=\\s*['"][^'"]+['"]/gi, rule: 'no-hardcoded-secrets', severity: 'critical', message: 'Hardcoded API key detected' },
|
|
312
|
-
{ pattern: /exec\\s*\\(/g, rule: 'no-exec', severity: 'high', message: 'Avoid exec(), use execFile or spawn' },
|
|
313
|
-
{ pattern: /\\$\\{.*\\}/g, rule: 'template-injection', severity: 'medium', message: 'Potential template injection' },
|
|
314
|
-
];
|
|
315
|
-
|
|
316
|
-
for (const file of files) {
|
|
317
|
-
try {
|
|
318
|
-
if (!fs.existsSync(file)) continue;
|
|
319
|
-
const content = fs.readFileSync(file, 'utf8');
|
|
320
|
-
const lines = content.split('\\n');
|
|
321
|
-
|
|
322
|
-
for (const { pattern, rule, severity, message } of securityPatterns) {
|
|
323
|
-
let match;
|
|
324
|
-
const regex = new RegExp(pattern.source, pattern.flags);
|
|
325
|
-
while ((match = regex.exec(content)) !== null) {
|
|
326
|
-
const lineNum = content.substring(0, match.index).split('\\n').length;
|
|
327
|
-
findings.push({ file, line: lineNum, severity, rule, message });
|
|
328
|
-
}
|
|
329
|
-
}
|
|
330
|
-
} catch {}
|
|
331
|
-
}
|
|
332
|
-
return findings;
|
|
333
|
-
}
|
|
334
|
-
|
|
335
|
-
function cosineSimilarity(a, b) {
|
|
336
|
-
if (!a || !b || a.length !== b.length || a.length === 0) return 0;
|
|
337
|
-
let dot = 0, normA = 0, normB = 0;
|
|
338
|
-
for (let i = 0; i < a.length; i++) {
|
|
339
|
-
dot += a[i] * b[i];
|
|
340
|
-
normA += a[i] * a[i];
|
|
341
|
-
normB += b[i] * b[i];
|
|
342
|
-
}
|
|
343
|
-
const denom = Math.sqrt(normA) * Math.sqrt(normB);
|
|
344
|
-
return denom === 0 ? 0 : dot / denom;
|
|
345
|
-
}
|
|
346
|
-
|
|
347
|
-
async function ragRetrieve(query, chunks, topK) {
|
|
348
|
-
// If chunks have embeddings, use cosine similarity (semantic retrieval)
|
|
349
|
-
const hasEmbeddings = chunks.some(c => c.embedding && c.embedding.length > 0);
|
|
350
|
-
|
|
351
|
-
if (hasEmbeddings) {
|
|
352
|
-
const queryEmbedding = hashEmbed(query, chunks[0].embedding.length);
|
|
353
|
-
return chunks
|
|
354
|
-
.map(chunk => {
|
|
355
|
-
const semantic = chunk.embedding && chunk.embedding.length > 0
|
|
356
|
-
? cosineSimilarity(queryEmbedding, chunk.embedding)
|
|
357
|
-
: 0;
|
|
358
|
-
// Blend semantic + keyword for robustness
|
|
359
|
-
const queryTerms = query.toLowerCase().split(/\\s+/);
|
|
360
|
-
const content = chunk.content.toLowerCase();
|
|
361
|
-
const kwMatches = queryTerms.filter(t => content.includes(t)).length;
|
|
362
|
-
const keyword = queryTerms.length > 0 ? kwMatches / queryTerms.length : 0;
|
|
363
|
-
const relevance = semantic * 0.7 + keyword * 0.3;
|
|
364
|
-
return { ...chunk, relevance };
|
|
365
|
-
})
|
|
366
|
-
.sort((a, b) => b.relevance - a.relevance)
|
|
367
|
-
.slice(0, topK);
|
|
368
|
-
}
|
|
369
|
-
|
|
370
|
-
// Fallback: TF-IDF-weighted keyword matching
|
|
371
|
-
const queryTerms = query.toLowerCase().split(/\\s+/).filter(Boolean);
|
|
372
|
-
const allContent = chunks.map(c => c.content.toLowerCase());
|
|
373
|
-
// IDF: log(N / df) for each query term
|
|
374
|
-
const idf = {};
|
|
375
|
-
for (const term of queryTerms) {
|
|
376
|
-
const df = allContent.filter(c => c.includes(term)).length || 1;
|
|
377
|
-
idf[term] = Math.log(allContent.length / df);
|
|
378
|
-
}
|
|
379
|
-
return chunks
|
|
380
|
-
.map(chunk => {
|
|
381
|
-
const content = chunk.content.toLowerCase();
|
|
382
|
-
const words = content.split(/\\s+/);
|
|
383
|
-
let score = 0;
|
|
384
|
-
for (const term of queryTerms) {
|
|
385
|
-
const tf = words.filter(w => w === term).length / (words.length || 1);
|
|
386
|
-
score += tf * (idf[term] || 1);
|
|
387
|
-
}
|
|
388
|
-
return { ...chunk, relevance: score };
|
|
389
|
-
})
|
|
390
|
-
.sort((a, b) => b.relevance - a.relevance)
|
|
391
|
-
.slice(0, topK);
|
|
392
|
-
}
|
|
393
|
-
|
|
394
|
-
async function contextRank(context, query) {
|
|
395
|
-
// Use TF-IDF scoring instead of raw keyword matching
|
|
396
|
-
const queryTerms = query.toLowerCase().split(/\\s+/).filter(Boolean);
|
|
397
|
-
const allContent = context.map(c => c.toLowerCase());
|
|
398
|
-
const idf = {};
|
|
399
|
-
for (const term of queryTerms) {
|
|
400
|
-
const df = allContent.filter(c => c.includes(term)).length || 1;
|
|
401
|
-
idf[term] = Math.log(allContent.length / df);
|
|
402
|
-
}
|
|
403
|
-
return context
|
|
404
|
-
.map((ctx, i) => {
|
|
405
|
-
const content = ctx.toLowerCase();
|
|
406
|
-
const words = content.split(/\\s+/);
|
|
407
|
-
let score = 0;
|
|
408
|
-
for (const term of queryTerms) {
|
|
409
|
-
const tf = words.filter(w => w === term).length / (words.length || 1);
|
|
410
|
-
score += tf * (idf[term] || 1);
|
|
411
|
-
}
|
|
412
|
-
return { index: i, content: ctx, relevance: score };
|
|
413
|
-
})
|
|
414
|
-
.sort((a, b) => b.relevance - a.relevance);
|
|
415
|
-
}
|
|
416
|
-
|
|
417
|
-
async function gitBlame(files) {
|
|
418
|
-
const { execSync } = require('child_process');
|
|
419
|
-
const results = [];
|
|
420
|
-
for (const file of files) {
|
|
421
|
-
try {
|
|
422
|
-
const output = execSync(\`git blame --line-porcelain "\${file}" 2>/dev/null\`, { encoding: 'utf8', maxBuffer: 10 * 1024 * 1024 });
|
|
423
|
-
const lines = [];
|
|
424
|
-
let currentLine = {};
|
|
425
|
-
for (const line of output.split('\\n')) {
|
|
426
|
-
if (line.startsWith('author ')) currentLine.author = line.slice(7);
|
|
427
|
-
else if (line.startsWith('author-time ')) currentLine.date = new Date(parseInt(line.slice(12)) * 1000).toISOString();
|
|
428
|
-
else if (line.match(/^[a-f0-9]{40}/)) currentLine.commit = line.slice(0, 40);
|
|
429
|
-
else if (line.startsWith('\\t')) {
|
|
430
|
-
lines.push({ ...currentLine, line: lines.length + 1 });
|
|
431
|
-
currentLine = {};
|
|
432
|
-
}
|
|
433
|
-
}
|
|
434
|
-
results.push({ file, lines });
|
|
435
|
-
} catch {
|
|
436
|
-
results.push({ file, lines: [] });
|
|
437
|
-
}
|
|
438
|
-
}
|
|
439
|
-
return results;
|
|
440
|
-
}
|
|
441
|
-
|
|
442
|
-
async function gitChurn(files, since) {
|
|
443
|
-
const { execSync } = require('child_process');
|
|
444
|
-
const results = [];
|
|
445
|
-
const sinceArg = since ? \`--since="\${since}"\` : '--since="30 days ago"';
|
|
446
|
-
|
|
447
|
-
for (const file of files) {
|
|
448
|
-
try {
|
|
449
|
-
const log = execSync(\`git log \${sinceArg} --format="%H|%an|%aI" --numstat -- "\${file}" 2>/dev/null\`, { encoding: 'utf8' });
|
|
450
|
-
let additions = 0, deletions = 0, commits = 0;
|
|
451
|
-
const authors = new Set();
|
|
452
|
-
let lastModified = '';
|
|
453
|
-
|
|
454
|
-
for (const line of log.split('\\n')) {
|
|
455
|
-
if (line.includes('|')) {
|
|
456
|
-
const [commit, author, date] = line.split('|');
|
|
457
|
-
authors.add(author);
|
|
458
|
-
commits++;
|
|
459
|
-
if (!lastModified) lastModified = date;
|
|
460
|
-
} else if (line.match(/^\\d+\\s+\\d+/)) {
|
|
461
|
-
const [add, del] = line.split('\\t');
|
|
462
|
-
additions += parseInt(add) || 0;
|
|
463
|
-
deletions += parseInt(del) || 0;
|
|
464
|
-
}
|
|
465
|
-
}
|
|
466
|
-
|
|
467
|
-
results.push({ file, additions, deletions, commits, authors: Array.from(authors), lastModified });
|
|
468
|
-
} catch {
|
|
469
|
-
results.push({ file, additions: 0, deletions: 0, commits: 0, authors: [], lastModified: '' });
|
|
470
|
-
}
|
|
471
|
-
}
|
|
472
|
-
return results;
|
|
473
|
-
}
|
|
474
|
-
|
|
475
|
-
async function complexityAnalyze(files) {
|
|
476
|
-
const fs = require('fs');
|
|
477
|
-
return files.map(file => {
|
|
478
|
-
try {
|
|
479
|
-
const content = fs.existsSync(file) ? fs.readFileSync(file, 'utf8') : '';
|
|
480
|
-
const lines = content.split('\\n');
|
|
481
|
-
const nonEmpty = lines.filter(l => l.trim()).length;
|
|
482
|
-
const branches = (content.match(/\\b(if|else|switch|case|for|while|catch|\\?|&&|\\|\\|)\\b/g) || []).length;
|
|
483
|
-
const functions = (content.match(/function|=>|\\bdef\\b|\\bfn\\b/g) || []).length;
|
|
484
|
-
|
|
485
|
-
return {
|
|
486
|
-
file,
|
|
487
|
-
lines: lines.length,
|
|
488
|
-
nonEmptyLines: nonEmpty,
|
|
489
|
-
cyclomaticComplexity: branches + 1,
|
|
490
|
-
functions,
|
|
491
|
-
avgFunctionSize: functions > 0 ? Math.round(nonEmpty / functions) : nonEmpty,
|
|
492
|
-
};
|
|
493
|
-
} catch {
|
|
494
|
-
return { file, lines: 0, nonEmptyLines: 0, cyclomaticComplexity: 1, functions: 0, avgFunctionSize: 0 };
|
|
495
|
-
}
|
|
496
|
-
});
|
|
497
|
-
}
|
|
498
|
-
|
|
499
|
-
async function dependencyGraph(entryPoints) {
|
|
500
|
-
const fs = require('fs');
|
|
501
|
-
const path = require('path');
|
|
502
|
-
const graph = new Map();
|
|
503
|
-
|
|
504
|
-
function analyze(file, visited = new Set()) {
|
|
505
|
-
if (visited.has(file)) return;
|
|
506
|
-
visited.add(file);
|
|
507
|
-
|
|
508
|
-
try {
|
|
509
|
-
if (!fs.existsSync(file)) return;
|
|
510
|
-
const content = fs.readFileSync(file, 'utf8');
|
|
511
|
-
const deps = [];
|
|
512
|
-
|
|
513
|
-
// Extract imports
|
|
514
|
-
const importRegex = /(?:import|require)\\s*\\(?['"]([^'"]+)['"]/g;
|
|
515
|
-
let match;
|
|
516
|
-
while ((match = importRegex.exec(content)) !== null) {
|
|
517
|
-
const dep = match[1];
|
|
518
|
-
if (dep.startsWith('.')) {
|
|
519
|
-
const resolved = path.resolve(path.dirname(file), dep);
|
|
520
|
-
deps.push(resolved);
|
|
521
|
-
analyze(resolved, visited);
|
|
522
|
-
} else {
|
|
523
|
-
deps.push(dep);
|
|
524
|
-
}
|
|
525
|
-
}
|
|
526
|
-
|
|
527
|
-
graph.set(file, deps);
|
|
528
|
-
} catch {}
|
|
529
|
-
}
|
|
530
|
-
|
|
531
|
-
for (const entry of entryPoints) {
|
|
532
|
-
analyze(entry);
|
|
533
|
-
}
|
|
534
|
-
|
|
535
|
-
return Object.fromEntries(graph);
|
|
536
|
-
}
|
|
537
|
-
|
|
538
|
-
async function deduplicate(items, threshold) {
|
|
539
|
-
// Simple Jaccard similarity deduplication
|
|
540
|
-
const unique = [];
|
|
541
|
-
const seen = new Set();
|
|
542
|
-
|
|
543
|
-
for (const item of items) {
|
|
544
|
-
const tokens = new Set(item.toLowerCase().split(/\\s+/));
|
|
545
|
-
let isDup = false;
|
|
546
|
-
|
|
547
|
-
for (const existing of unique) {
|
|
548
|
-
const existingTokens = new Set(existing.toLowerCase().split(/\\s+/));
|
|
549
|
-
const intersection = [...tokens].filter(t => existingTokens.has(t)).length;
|
|
550
|
-
const union = new Set([...tokens, ...existingTokens]).size;
|
|
551
|
-
const similarity = intersection / union;
|
|
552
|
-
|
|
553
|
-
if (similarity >= threshold) {
|
|
554
|
-
isDup = true;
|
|
555
|
-
break;
|
|
556
|
-
}
|
|
557
|
-
}
|
|
558
|
-
|
|
559
|
-
if (!isDup) unique.push(item);
|
|
560
|
-
}
|
|
561
|
-
|
|
562
|
-
return unique;
|
|
563
|
-
}
|
|
131
|
+
return `
|
|
132
|
+
parentPort.on('message', async (task) => {
|
|
133
|
+
try {
|
|
134
|
+
let result;
|
|
135
|
+
switch (task.type) {
|
|
136
|
+
case 'speculative-embed':
|
|
137
|
+
result = await speculativeEmbed(task.files, task.coEditGraph);
|
|
138
|
+
break;
|
|
139
|
+
case 'ast-analyze':
|
|
140
|
+
result = await astAnalyze(task.files);
|
|
141
|
+
break;
|
|
142
|
+
case 'security-scan':
|
|
143
|
+
result = await securityScan(task.files, task.rules);
|
|
144
|
+
break;
|
|
145
|
+
case 'rag-retrieve':
|
|
146
|
+
result = await ragRetrieve(task.query, task.chunks, task.topK);
|
|
147
|
+
break;
|
|
148
|
+
case 'context-rank':
|
|
149
|
+
result = await contextRank(task.context, task.query);
|
|
150
|
+
break;
|
|
151
|
+
case 'git-blame':
|
|
152
|
+
result = await gitBlame(task.files);
|
|
153
|
+
break;
|
|
154
|
+
case 'git-churn':
|
|
155
|
+
result = await gitChurn(task.files, task.since);
|
|
156
|
+
break;
|
|
157
|
+
case 'complexity-analyze':
|
|
158
|
+
result = await complexityAnalyze(task.files);
|
|
159
|
+
break;
|
|
160
|
+
case 'dependency-graph':
|
|
161
|
+
result = await dependencyGraph(task.entryPoints);
|
|
162
|
+
break;
|
|
163
|
+
case 'deduplicate':
|
|
164
|
+
result = await deduplicate(task.items, task.threshold);
|
|
165
|
+
break;
|
|
166
|
+
default:
|
|
167
|
+
throw new Error('Unknown task type: ' + task.type);
|
|
168
|
+
}
|
|
169
|
+
parentPort.postMessage({ success: true, data: result, taskId: task.taskId });
|
|
170
|
+
} catch (error) {
|
|
171
|
+
parentPort.postMessage({ success: false, error: error.message, taskId: task.taskId });
|
|
172
|
+
}
|
|
173
|
+
});
|
|
174
|
+
|
|
175
|
+
// Worker implementations
|
|
176
|
+
|
|
177
|
+
// Hash-based embedding: deterministic, no external deps, 128-dim
|
|
178
|
+
function hashEmbed(text, dim = 128) {
|
|
179
|
+
const embedding = new Float64Array(dim);
|
|
180
|
+
const tokens = text.split(/\\s+|[{}()\\[\\];,.<>=/+\\-*&|!~^%@#]/);
|
|
181
|
+
|
|
182
|
+
for (let t = 0; t < tokens.length; t++) {
|
|
183
|
+
const token = tokens[t];
|
|
184
|
+
if (!token) continue;
|
|
185
|
+
|
|
186
|
+
// FNV-1a hash
|
|
187
|
+
let h = 0x811c9dc5;
|
|
188
|
+
for (let i = 0; i < token.length; i++) {
|
|
189
|
+
h ^= token.charCodeAt(i);
|
|
190
|
+
h = Math.imul(h, 0x01000193);
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
// Positional weight (tokens near start matter more)
|
|
194
|
+
const posWeight = 1.0 / (1.0 + Math.log1p(t));
|
|
195
|
+
|
|
196
|
+
// Distribute across multiple dimensions using hash rotations
|
|
197
|
+
for (let d = 0; d < 4; d++) {
|
|
198
|
+
const idx = ((h >>> 0) + d * 37) % dim;
|
|
199
|
+
const sign = (h & (1 << d)) ? 1 : -1;
|
|
200
|
+
embedding[idx] += sign * posWeight;
|
|
201
|
+
h = (h >>> 7) | (h << 25); // rotate
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
// L2 normalize
|
|
206
|
+
let norm = 0;
|
|
207
|
+
for (let i = 0; i < dim; i++) norm += embedding[i] * embedding[i];
|
|
208
|
+
norm = Math.sqrt(norm) || 1;
|
|
209
|
+
const result = new Array(dim);
|
|
210
|
+
for (let i = 0; i < dim; i++) result[i] = embedding[i] / norm;
|
|
211
|
+
return result;
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
async function speculativeEmbed(files, coEditGraph) {
|
|
215
|
+
const fs = require('fs');
|
|
216
|
+
return files.map(file => {
|
|
217
|
+
try {
|
|
218
|
+
if (!fs.existsSync(file)) {
|
|
219
|
+
return { file, embedding: hashEmbed(file), confidence: 0.2, timestamp: Date.now() };
|
|
220
|
+
}
|
|
221
|
+
const content = fs.readFileSync(file, 'utf8');
|
|
222
|
+
const embedding = hashEmbed(content);
|
|
223
|
+
|
|
224
|
+
// Confidence based on file size (more content = higher confidence)
|
|
225
|
+
const lines = content.split('\\n').length;
|
|
226
|
+
const confidence = Math.min(0.95, 0.3 + (lines / 500) * 0.65);
|
|
227
|
+
|
|
228
|
+
return { file, embedding, confidence, timestamp: Date.now() };
|
|
229
|
+
} catch {
|
|
230
|
+
return { file, embedding: hashEmbed(file), confidence: 0.1, timestamp: Date.now() };
|
|
231
|
+
}
|
|
232
|
+
});
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
async function astAnalyze(files) {
|
|
236
|
+
const fs = require('fs');
|
|
237
|
+
return files.map(file => {
|
|
238
|
+
try {
|
|
239
|
+
const content = fs.existsSync(file) ? fs.readFileSync(file, 'utf8') : '';
|
|
240
|
+
const lines = content.split('\\n');
|
|
241
|
+
return {
|
|
242
|
+
file,
|
|
243
|
+
language: file.split('.').pop() || 'unknown',
|
|
244
|
+
complexity: Math.min(lines.length / 10, 100),
|
|
245
|
+
functions: extractFunctions(content),
|
|
246
|
+
imports: extractImports(content),
|
|
247
|
+
exports: extractExports(content),
|
|
248
|
+
dependencies: [],
|
|
249
|
+
};
|
|
250
|
+
} catch {
|
|
251
|
+
return { file, language: 'unknown', complexity: 0, functions: [], imports: [], exports: [], dependencies: [] };
|
|
252
|
+
}
|
|
253
|
+
});
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
function extractFunctions(content) {
|
|
257
|
+
const patterns = [
|
|
258
|
+
/function\\s+(\\w+)/g,
|
|
259
|
+
/const\\s+(\\w+)\\s*=\\s*(?:async\\s*)?\\([^)]*\\)\\s*=>/g,
|
|
260
|
+
/(?:async\\s+)?(?:public|private|protected)?\\s*(\\w+)\\s*\\([^)]*\\)\\s*{/g,
|
|
261
|
+
];
|
|
262
|
+
const funcs = new Set();
|
|
263
|
+
for (const pattern of patterns) {
|
|
264
|
+
let match;
|
|
265
|
+
while ((match = pattern.exec(content)) !== null) {
|
|
266
|
+
if (match[1] && !['if', 'for', 'while', 'switch', 'catch'].includes(match[1])) {
|
|
267
|
+
funcs.add(match[1]);
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
return Array.from(funcs);
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
function extractImports(content) {
|
|
275
|
+
const imports = [];
|
|
276
|
+
const patterns = [
|
|
277
|
+
/import\\s+.*?from\\s+['"]([^'"]+)['"]/g,
|
|
278
|
+
/require\\s*\\(['"]([^'"]+)['"]\\)/g,
|
|
279
|
+
];
|
|
280
|
+
for (const pattern of patterns) {
|
|
281
|
+
let match;
|
|
282
|
+
while ((match = pattern.exec(content)) !== null) {
|
|
283
|
+
imports.push(match[1]);
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
return imports;
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
function extractExports(content) {
|
|
290
|
+
const exports = [];
|
|
291
|
+
const patterns = [
|
|
292
|
+
/export\\s+(?:default\\s+)?(?:class|function|const|let|var)\\s+(\\w+)/g,
|
|
293
|
+
/module\\.exports\\s*=\\s*(\\w+)/g,
|
|
294
|
+
];
|
|
295
|
+
for (const pattern of patterns) {
|
|
296
|
+
let match;
|
|
297
|
+
while ((match = pattern.exec(content)) !== null) {
|
|
298
|
+
exports.push(match[1]);
|
|
299
|
+
}
|
|
300
|
+
}
|
|
301
|
+
return exports;
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
async function securityScan(files, rules) {
|
|
305
|
+
const fs = require('fs');
|
|
306
|
+
const findings = [];
|
|
307
|
+
const securityPatterns = [
|
|
308
|
+
{ pattern: /eval\\s*\\(/g, rule: 'no-eval', severity: 'high', message: 'Avoid eval()' },
|
|
309
|
+
{ pattern: /innerHTML\\s*=/g, rule: 'no-inner-html', severity: 'medium', message: 'Avoid innerHTML, use textContent' },
|
|
310
|
+
{ pattern: /password\\s*=\\s*['"][^'"]+['"]/gi, rule: 'no-hardcoded-secrets', severity: 'critical', message: 'Hardcoded password detected' },
|
|
311
|
+
{ pattern: /api[_-]?key\\s*=\\s*['"][^'"]+['"]/gi, rule: 'no-hardcoded-secrets', severity: 'critical', message: 'Hardcoded API key detected' },
|
|
312
|
+
{ pattern: /exec\\s*\\(/g, rule: 'no-exec', severity: 'high', message: 'Avoid exec(), use execFile or spawn' },
|
|
313
|
+
{ pattern: /\\$\\{.*\\}/g, rule: 'template-injection', severity: 'medium', message: 'Potential template injection' },
|
|
314
|
+
];
|
|
315
|
+
|
|
316
|
+
for (const file of files) {
|
|
317
|
+
try {
|
|
318
|
+
if (!fs.existsSync(file)) continue;
|
|
319
|
+
const content = fs.readFileSync(file, 'utf8');
|
|
320
|
+
const lines = content.split('\\n');
|
|
321
|
+
|
|
322
|
+
for (const { pattern, rule, severity, message } of securityPatterns) {
|
|
323
|
+
let match;
|
|
324
|
+
const regex = new RegExp(pattern.source, pattern.flags);
|
|
325
|
+
while ((match = regex.exec(content)) !== null) {
|
|
326
|
+
const lineNum = content.substring(0, match.index).split('\\n').length;
|
|
327
|
+
findings.push({ file, line: lineNum, severity, rule, message });
|
|
328
|
+
}
|
|
329
|
+
}
|
|
330
|
+
} catch {}
|
|
331
|
+
}
|
|
332
|
+
return findings;
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
function cosineSimilarity(a, b) {
|
|
336
|
+
if (!a || !b || a.length !== b.length || a.length === 0) return 0;
|
|
337
|
+
let dot = 0, normA = 0, normB = 0;
|
|
338
|
+
for (let i = 0; i < a.length; i++) {
|
|
339
|
+
dot += a[i] * b[i];
|
|
340
|
+
normA += a[i] * a[i];
|
|
341
|
+
normB += b[i] * b[i];
|
|
342
|
+
}
|
|
343
|
+
const denom = Math.sqrt(normA) * Math.sqrt(normB);
|
|
344
|
+
return denom === 0 ? 0 : dot / denom;
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
async function ragRetrieve(query, chunks, topK) {
|
|
348
|
+
// If chunks have embeddings, use cosine similarity (semantic retrieval)
|
|
349
|
+
const hasEmbeddings = chunks.some(c => c.embedding && c.embedding.length > 0);
|
|
350
|
+
|
|
351
|
+
if (hasEmbeddings) {
|
|
352
|
+
const queryEmbedding = hashEmbed(query, chunks[0].embedding.length);
|
|
353
|
+
return chunks
|
|
354
|
+
.map(chunk => {
|
|
355
|
+
const semantic = chunk.embedding && chunk.embedding.length > 0
|
|
356
|
+
? cosineSimilarity(queryEmbedding, chunk.embedding)
|
|
357
|
+
: 0;
|
|
358
|
+
// Blend semantic + keyword for robustness
|
|
359
|
+
const queryTerms = query.toLowerCase().split(/\\s+/);
|
|
360
|
+
const content = chunk.content.toLowerCase();
|
|
361
|
+
const kwMatches = queryTerms.filter(t => content.includes(t)).length;
|
|
362
|
+
const keyword = queryTerms.length > 0 ? kwMatches / queryTerms.length : 0;
|
|
363
|
+
const relevance = semantic * 0.7 + keyword * 0.3;
|
|
364
|
+
return { ...chunk, relevance };
|
|
365
|
+
})
|
|
366
|
+
.sort((a, b) => b.relevance - a.relevance)
|
|
367
|
+
.slice(0, topK);
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
// Fallback: TF-IDF-weighted keyword matching
|
|
371
|
+
const queryTerms = query.toLowerCase().split(/\\s+/).filter(Boolean);
|
|
372
|
+
const allContent = chunks.map(c => c.content.toLowerCase());
|
|
373
|
+
// IDF: log(N / df) for each query term
|
|
374
|
+
const idf = {};
|
|
375
|
+
for (const term of queryTerms) {
|
|
376
|
+
const df = allContent.filter(c => c.includes(term)).length || 1;
|
|
377
|
+
idf[term] = Math.log(allContent.length / df);
|
|
378
|
+
}
|
|
379
|
+
return chunks
|
|
380
|
+
.map(chunk => {
|
|
381
|
+
const content = chunk.content.toLowerCase();
|
|
382
|
+
const words = content.split(/\\s+/);
|
|
383
|
+
let score = 0;
|
|
384
|
+
for (const term of queryTerms) {
|
|
385
|
+
const tf = words.filter(w => w === term).length / (words.length || 1);
|
|
386
|
+
score += tf * (idf[term] || 1);
|
|
387
|
+
}
|
|
388
|
+
return { ...chunk, relevance: score };
|
|
389
|
+
})
|
|
390
|
+
.sort((a, b) => b.relevance - a.relevance)
|
|
391
|
+
.slice(0, topK);
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
async function contextRank(context, query) {
|
|
395
|
+
// Use TF-IDF scoring instead of raw keyword matching
|
|
396
|
+
const queryTerms = query.toLowerCase().split(/\\s+/).filter(Boolean);
|
|
397
|
+
const allContent = context.map(c => c.toLowerCase());
|
|
398
|
+
const idf = {};
|
|
399
|
+
for (const term of queryTerms) {
|
|
400
|
+
const df = allContent.filter(c => c.includes(term)).length || 1;
|
|
401
|
+
idf[term] = Math.log(allContent.length / df);
|
|
402
|
+
}
|
|
403
|
+
return context
|
|
404
|
+
.map((ctx, i) => {
|
|
405
|
+
const content = ctx.toLowerCase();
|
|
406
|
+
const words = content.split(/\\s+/);
|
|
407
|
+
let score = 0;
|
|
408
|
+
for (const term of queryTerms) {
|
|
409
|
+
const tf = words.filter(w => w === term).length / (words.length || 1);
|
|
410
|
+
score += tf * (idf[term] || 1);
|
|
411
|
+
}
|
|
412
|
+
return { index: i, content: ctx, relevance: score };
|
|
413
|
+
})
|
|
414
|
+
.sort((a, b) => b.relevance - a.relevance);
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
async function gitBlame(files) {
|
|
418
|
+
const { execSync } = require('child_process');
|
|
419
|
+
const results = [];
|
|
420
|
+
for (const file of files) {
|
|
421
|
+
try {
|
|
422
|
+
const output = execSync(\`git blame --line-porcelain "\${file}" 2>/dev/null\`, { encoding: 'utf8', maxBuffer: 10 * 1024 * 1024 });
|
|
423
|
+
const lines = [];
|
|
424
|
+
let currentLine = {};
|
|
425
|
+
for (const line of output.split('\\n')) {
|
|
426
|
+
if (line.startsWith('author ')) currentLine.author = line.slice(7);
|
|
427
|
+
else if (line.startsWith('author-time ')) currentLine.date = new Date(parseInt(line.slice(12)) * 1000).toISOString();
|
|
428
|
+
else if (line.match(/^[a-f0-9]{40}/)) currentLine.commit = line.slice(0, 40);
|
|
429
|
+
else if (line.startsWith('\\t')) {
|
|
430
|
+
lines.push({ ...currentLine, line: lines.length + 1 });
|
|
431
|
+
currentLine = {};
|
|
432
|
+
}
|
|
433
|
+
}
|
|
434
|
+
results.push({ file, lines });
|
|
435
|
+
} catch {
|
|
436
|
+
results.push({ file, lines: [] });
|
|
437
|
+
}
|
|
438
|
+
}
|
|
439
|
+
return results;
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
async function gitChurn(files, since) {
|
|
443
|
+
const { execSync } = require('child_process');
|
|
444
|
+
const results = [];
|
|
445
|
+
const sinceArg = since ? \`--since="\${since}"\` : '--since="30 days ago"';
|
|
446
|
+
|
|
447
|
+
for (const file of files) {
|
|
448
|
+
try {
|
|
449
|
+
const log = execSync(\`git log \${sinceArg} --format="%H|%an|%aI" --numstat -- "\${file}" 2>/dev/null\`, { encoding: 'utf8' });
|
|
450
|
+
let additions = 0, deletions = 0, commits = 0;
|
|
451
|
+
const authors = new Set();
|
|
452
|
+
let lastModified = '';
|
|
453
|
+
|
|
454
|
+
for (const line of log.split('\\n')) {
|
|
455
|
+
if (line.includes('|')) {
|
|
456
|
+
const [commit, author, date] = line.split('|');
|
|
457
|
+
authors.add(author);
|
|
458
|
+
commits++;
|
|
459
|
+
if (!lastModified) lastModified = date;
|
|
460
|
+
} else if (line.match(/^\\d+\\s+\\d+/)) {
|
|
461
|
+
const [add, del] = line.split('\\t');
|
|
462
|
+
additions += parseInt(add) || 0;
|
|
463
|
+
deletions += parseInt(del) || 0;
|
|
464
|
+
}
|
|
465
|
+
}
|
|
466
|
+
|
|
467
|
+
results.push({ file, additions, deletions, commits, authors: Array.from(authors), lastModified });
|
|
468
|
+
} catch {
|
|
469
|
+
results.push({ file, additions: 0, deletions: 0, commits: 0, authors: [], lastModified: '' });
|
|
470
|
+
}
|
|
471
|
+
}
|
|
472
|
+
return results;
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
async function complexityAnalyze(files) {
|
|
476
|
+
const fs = require('fs');
|
|
477
|
+
return files.map(file => {
|
|
478
|
+
try {
|
|
479
|
+
const content = fs.existsSync(file) ? fs.readFileSync(file, 'utf8') : '';
|
|
480
|
+
const lines = content.split('\\n');
|
|
481
|
+
const nonEmpty = lines.filter(l => l.trim()).length;
|
|
482
|
+
const branches = (content.match(/\\b(if|else|switch|case|for|while|catch|\\?|&&|\\|\\|)\\b/g) || []).length;
|
|
483
|
+
const functions = (content.match(/function|=>|\\bdef\\b|\\bfn\\b/g) || []).length;
|
|
484
|
+
|
|
485
|
+
return {
|
|
486
|
+
file,
|
|
487
|
+
lines: lines.length,
|
|
488
|
+
nonEmptyLines: nonEmpty,
|
|
489
|
+
cyclomaticComplexity: branches + 1,
|
|
490
|
+
functions,
|
|
491
|
+
avgFunctionSize: functions > 0 ? Math.round(nonEmpty / functions) : nonEmpty,
|
|
492
|
+
};
|
|
493
|
+
} catch {
|
|
494
|
+
return { file, lines: 0, nonEmptyLines: 0, cyclomaticComplexity: 1, functions: 0, avgFunctionSize: 0 };
|
|
495
|
+
}
|
|
496
|
+
});
|
|
497
|
+
}
|
|
498
|
+
|
|
499
|
+
async function dependencyGraph(entryPoints) {
|
|
500
|
+
const fs = require('fs');
|
|
501
|
+
const path = require('path');
|
|
502
|
+
const graph = new Map();
|
|
503
|
+
|
|
504
|
+
function analyze(file, visited = new Set()) {
|
|
505
|
+
if (visited.has(file)) return;
|
|
506
|
+
visited.add(file);
|
|
507
|
+
|
|
508
|
+
try {
|
|
509
|
+
if (!fs.existsSync(file)) return;
|
|
510
|
+
const content = fs.readFileSync(file, 'utf8');
|
|
511
|
+
const deps = [];
|
|
512
|
+
|
|
513
|
+
// Extract imports
|
|
514
|
+
const importRegex = /(?:import|require)\\s*\\(?['"]([^'"]+)['"]/g;
|
|
515
|
+
let match;
|
|
516
|
+
while ((match = importRegex.exec(content)) !== null) {
|
|
517
|
+
const dep = match[1];
|
|
518
|
+
if (dep.startsWith('.')) {
|
|
519
|
+
const resolved = path.resolve(path.dirname(file), dep);
|
|
520
|
+
deps.push(resolved);
|
|
521
|
+
analyze(resolved, visited);
|
|
522
|
+
} else {
|
|
523
|
+
deps.push(dep);
|
|
524
|
+
}
|
|
525
|
+
}
|
|
526
|
+
|
|
527
|
+
graph.set(file, deps);
|
|
528
|
+
} catch {}
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
for (const entry of entryPoints) {
|
|
532
|
+
analyze(entry);
|
|
533
|
+
}
|
|
534
|
+
|
|
535
|
+
return Object.fromEntries(graph);
|
|
536
|
+
}
|
|
537
|
+
|
|
538
|
+
async function deduplicate(items, threshold) {
|
|
539
|
+
// Simple Jaccard similarity deduplication
|
|
540
|
+
const unique = [];
|
|
541
|
+
const seen = new Set();
|
|
542
|
+
|
|
543
|
+
for (const item of items) {
|
|
544
|
+
const tokens = new Set(item.toLowerCase().split(/\\s+/));
|
|
545
|
+
let isDup = false;
|
|
546
|
+
|
|
547
|
+
for (const existing of unique) {
|
|
548
|
+
const existingTokens = new Set(existing.toLowerCase().split(/\\s+/));
|
|
549
|
+
const intersection = [...tokens].filter(t => existingTokens.has(t)).length;
|
|
550
|
+
const union = new Set([...tokens, ...existingTokens]).size;
|
|
551
|
+
const similarity = intersection / union;
|
|
552
|
+
|
|
553
|
+
if (similarity >= threshold) {
|
|
554
|
+
isDup = true;
|
|
555
|
+
break;
|
|
556
|
+
}
|
|
557
|
+
}
|
|
558
|
+
|
|
559
|
+
if (!isDup) unique.push(item);
|
|
560
|
+
}
|
|
561
|
+
|
|
562
|
+
return unique;
|
|
563
|
+
}
|
|
564
564
|
`;
|
|
565
565
|
}
|
|
566
566
|
handleWorkerResult(worker, result) {
|