ruvector 0.2.28 → 0.2.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/LICENSE +21 -21
  2. package/README.md +2270 -2270
  3. package/bin/cli.js +9598 -9479
  4. package/bin/mcp-server.js +1 -1
  5. package/dist/core/intelligence-engine.d.ts +13 -0
  6. package/dist/core/intelligence-engine.d.ts.map +1 -1
  7. package/dist/core/intelligence-engine.js +38 -0
  8. package/dist/core/onnx/bundled-parallel.mjs +164 -164
  9. package/dist/core/onnx/embed-worker.mjs +67 -67
  10. package/dist/core/onnx/loader.js +434 -434
  11. package/dist/core/onnx/package.json +3 -3
  12. package/dist/core/onnx/pkg/LICENSE +21 -21
  13. package/dist/core/onnx/pkg/loader.js +348 -348
  14. package/dist/core/onnx/pkg/package.json +3 -3
  15. package/dist/core/onnx/pkg/ruvector_onnx_embeddings_wasm.d.ts +112 -112
  16. package/dist/core/onnx/pkg/ruvector_onnx_embeddings_wasm.js +5 -5
  17. package/dist/core/onnx/pkg/ruvector_onnx_embeddings_wasm_bg.js +638 -638
  18. package/dist/core/onnx/pkg/ruvector_onnx_embeddings_wasm_bg.wasm.d.ts +29 -29
  19. package/dist/core/parallel-workers.js +439 -439
  20. package/dist/workers/benchmark.js +15 -15
  21. package/package.json +122 -122
  22. package/src/decompiler/api-prober.js +302 -302
  23. package/src/decompiler/index.js +463 -463
  24. package/src/decompiler/metrics.js +86 -86
  25. package/src/decompiler/model-decompiler.js +423 -423
  26. package/src/decompiler/module-splitter.js +498 -498
  27. package/src/decompiler/module-tree.js +142 -142
  28. package/src/decompiler/name-predictor.js +400 -400
  29. package/src/decompiler/npm-fetch.js +176 -176
  30. package/src/decompiler/reconstructor.js +499 -499
  31. package/src/decompiler/reference-tracker.js +285 -285
  32. package/src/decompiler/statement-parser.js +285 -285
  33. package/src/decompiler/style-improver.js +438 -438
  34. package/src/decompiler/subcategories.js +339 -339
  35. package/src/decompiler/validator.js +379 -379
  36. package/src/decompiler/witness.js +140 -140
  37. package/wasm/package.json +26 -26
  38. package/wasm/ruvector_decompiler_wasm.d.ts +27 -27
  39. package/wasm/ruvector_decompiler_wasm.js +220 -220
  40. package/wasm/ruvector_decompiler_wasm_bg.wasm.d.ts +16 -16
  41. package/dist/core/onnx/pkg/ruvector.db +0 -0
@@ -105,9 +105,9 @@ class ExtendedWorkerPool {
105
105
  const workerBlob = new Blob([workerCode], { type: 'application/javascript' });
106
106
  for (let i = 0; i < this.config.numWorkers; i++) {
107
107
  // Create worker from inline code
108
- const worker = new worker_threads_1.Worker(`
109
- const { parentPort, workerData } = require('worker_threads');
110
- ${this.getWorkerHandlers()}
108
+ const worker = new worker_threads_1.Worker(`
109
+ const { parentPort, workerData } = require('worker_threads');
110
+ ${this.getWorkerHandlers()}
111
111
  `, { eval: true, workerData: { workerId: i } });
112
112
  worker.on('message', (result) => {
113
113
  this.handleWorkerResult(worker, result);
@@ -122,445 +122,445 @@ class ExtendedWorkerPool {
122
122
  this.initialized = true;
123
123
  }
124
124
  getWorkerCode() {
125
- return `
126
- const { parentPort, workerData } = require('worker_threads');
127
- ${this.getWorkerHandlers()}
125
+ return `
126
+ const { parentPort, workerData } = require('worker_threads');
127
+ ${this.getWorkerHandlers()}
128
128
  `;
129
129
  }
130
130
  getWorkerHandlers() {
131
- return `
132
- parentPort.on('message', async (task) => {
133
- try {
134
- let result;
135
- switch (task.type) {
136
- case 'speculative-embed':
137
- result = await speculativeEmbed(task.files, task.coEditGraph);
138
- break;
139
- case 'ast-analyze':
140
- result = await astAnalyze(task.files);
141
- break;
142
- case 'security-scan':
143
- result = await securityScan(task.files, task.rules);
144
- break;
145
- case 'rag-retrieve':
146
- result = await ragRetrieve(task.query, task.chunks, task.topK);
147
- break;
148
- case 'context-rank':
149
- result = await contextRank(task.context, task.query);
150
- break;
151
- case 'git-blame':
152
- result = await gitBlame(task.files);
153
- break;
154
- case 'git-churn':
155
- result = await gitChurn(task.files, task.since);
156
- break;
157
- case 'complexity-analyze':
158
- result = await complexityAnalyze(task.files);
159
- break;
160
- case 'dependency-graph':
161
- result = await dependencyGraph(task.entryPoints);
162
- break;
163
- case 'deduplicate':
164
- result = await deduplicate(task.items, task.threshold);
165
- break;
166
- default:
167
- throw new Error('Unknown task type: ' + task.type);
168
- }
169
- parentPort.postMessage({ success: true, data: result, taskId: task.taskId });
170
- } catch (error) {
171
- parentPort.postMessage({ success: false, error: error.message, taskId: task.taskId });
172
- }
173
- });
174
-
175
- // Worker implementations
176
-
177
- // Hash-based embedding: deterministic, no external deps, 128-dim
178
- function hashEmbed(text, dim = 128) {
179
- const embedding = new Float64Array(dim);
180
- const tokens = text.split(/\\s+|[{}()\\[\\];,.<>=/+\\-*&|!~^%@#]/);
181
-
182
- for (let t = 0; t < tokens.length; t++) {
183
- const token = tokens[t];
184
- if (!token) continue;
185
-
186
- // FNV-1a hash
187
- let h = 0x811c9dc5;
188
- for (let i = 0; i < token.length; i++) {
189
- h ^= token.charCodeAt(i);
190
- h = Math.imul(h, 0x01000193);
191
- }
192
-
193
- // Positional weight (tokens near start matter more)
194
- const posWeight = 1.0 / (1.0 + Math.log1p(t));
195
-
196
- // Distribute across multiple dimensions using hash rotations
197
- for (let d = 0; d < 4; d++) {
198
- const idx = ((h >>> 0) + d * 37) % dim;
199
- const sign = (h & (1 << d)) ? 1 : -1;
200
- embedding[idx] += sign * posWeight;
201
- h = (h >>> 7) | (h << 25); // rotate
202
- }
203
- }
204
-
205
- // L2 normalize
206
- let norm = 0;
207
- for (let i = 0; i < dim; i++) norm += embedding[i] * embedding[i];
208
- norm = Math.sqrt(norm) || 1;
209
- const result = new Array(dim);
210
- for (let i = 0; i < dim; i++) result[i] = embedding[i] / norm;
211
- return result;
212
- }
213
-
214
- async function speculativeEmbed(files, coEditGraph) {
215
- const fs = require('fs');
216
- return files.map(file => {
217
- try {
218
- if (!fs.existsSync(file)) {
219
- return { file, embedding: hashEmbed(file), confidence: 0.2, timestamp: Date.now() };
220
- }
221
- const content = fs.readFileSync(file, 'utf8');
222
- const embedding = hashEmbed(content);
223
-
224
- // Confidence based on file size (more content = higher confidence)
225
- const lines = content.split('\\n').length;
226
- const confidence = Math.min(0.95, 0.3 + (lines / 500) * 0.65);
227
-
228
- return { file, embedding, confidence, timestamp: Date.now() };
229
- } catch {
230
- return { file, embedding: hashEmbed(file), confidence: 0.1, timestamp: Date.now() };
231
- }
232
- });
233
- }
234
-
235
- async function astAnalyze(files) {
236
- const fs = require('fs');
237
- return files.map(file => {
238
- try {
239
- const content = fs.existsSync(file) ? fs.readFileSync(file, 'utf8') : '';
240
- const lines = content.split('\\n');
241
- return {
242
- file,
243
- language: file.split('.').pop() || 'unknown',
244
- complexity: Math.min(lines.length / 10, 100),
245
- functions: extractFunctions(content),
246
- imports: extractImports(content),
247
- exports: extractExports(content),
248
- dependencies: [],
249
- };
250
- } catch {
251
- return { file, language: 'unknown', complexity: 0, functions: [], imports: [], exports: [], dependencies: [] };
252
- }
253
- });
254
- }
255
-
256
- function extractFunctions(content) {
257
- const patterns = [
258
- /function\\s+(\\w+)/g,
259
- /const\\s+(\\w+)\\s*=\\s*(?:async\\s*)?\\([^)]*\\)\\s*=>/g,
260
- /(?:async\\s+)?(?:public|private|protected)?\\s*(\\w+)\\s*\\([^)]*\\)\\s*{/g,
261
- ];
262
- const funcs = new Set();
263
- for (const pattern of patterns) {
264
- let match;
265
- while ((match = pattern.exec(content)) !== null) {
266
- if (match[1] && !['if', 'for', 'while', 'switch', 'catch'].includes(match[1])) {
267
- funcs.add(match[1]);
268
- }
269
- }
270
- }
271
- return Array.from(funcs);
272
- }
273
-
274
- function extractImports(content) {
275
- const imports = [];
276
- const patterns = [
277
- /import\\s+.*?from\\s+['"]([^'"]+)['"]/g,
278
- /require\\s*\\(['"]([^'"]+)['"]\\)/g,
279
- ];
280
- for (const pattern of patterns) {
281
- let match;
282
- while ((match = pattern.exec(content)) !== null) {
283
- imports.push(match[1]);
284
- }
285
- }
286
- return imports;
287
- }
288
-
289
- function extractExports(content) {
290
- const exports = [];
291
- const patterns = [
292
- /export\\s+(?:default\\s+)?(?:class|function|const|let|var)\\s+(\\w+)/g,
293
- /module\\.exports\\s*=\\s*(\\w+)/g,
294
- ];
295
- for (const pattern of patterns) {
296
- let match;
297
- while ((match = pattern.exec(content)) !== null) {
298
- exports.push(match[1]);
299
- }
300
- }
301
- return exports;
302
- }
303
-
304
- async function securityScan(files, rules) {
305
- const fs = require('fs');
306
- const findings = [];
307
- const securityPatterns = [
308
- { pattern: /eval\\s*\\(/g, rule: 'no-eval', severity: 'high', message: 'Avoid eval()' },
309
- { pattern: /innerHTML\\s*=/g, rule: 'no-inner-html', severity: 'medium', message: 'Avoid innerHTML, use textContent' },
310
- { pattern: /password\\s*=\\s*['"][^'"]+['"]/gi, rule: 'no-hardcoded-secrets', severity: 'critical', message: 'Hardcoded password detected' },
311
- { pattern: /api[_-]?key\\s*=\\s*['"][^'"]+['"]/gi, rule: 'no-hardcoded-secrets', severity: 'critical', message: 'Hardcoded API key detected' },
312
- { pattern: /exec\\s*\\(/g, rule: 'no-exec', severity: 'high', message: 'Avoid exec(), use execFile or spawn' },
313
- { pattern: /\\$\\{.*\\}/g, rule: 'template-injection', severity: 'medium', message: 'Potential template injection' },
314
- ];
315
-
316
- for (const file of files) {
317
- try {
318
- if (!fs.existsSync(file)) continue;
319
- const content = fs.readFileSync(file, 'utf8');
320
- const lines = content.split('\\n');
321
-
322
- for (const { pattern, rule, severity, message } of securityPatterns) {
323
- let match;
324
- const regex = new RegExp(pattern.source, pattern.flags);
325
- while ((match = regex.exec(content)) !== null) {
326
- const lineNum = content.substring(0, match.index).split('\\n').length;
327
- findings.push({ file, line: lineNum, severity, rule, message });
328
- }
329
- }
330
- } catch {}
331
- }
332
- return findings;
333
- }
334
-
335
- function cosineSimilarity(a, b) {
336
- if (!a || !b || a.length !== b.length || a.length === 0) return 0;
337
- let dot = 0, normA = 0, normB = 0;
338
- for (let i = 0; i < a.length; i++) {
339
- dot += a[i] * b[i];
340
- normA += a[i] * a[i];
341
- normB += b[i] * b[i];
342
- }
343
- const denom = Math.sqrt(normA) * Math.sqrt(normB);
344
- return denom === 0 ? 0 : dot / denom;
345
- }
346
-
347
- async function ragRetrieve(query, chunks, topK) {
348
- // If chunks have embeddings, use cosine similarity (semantic retrieval)
349
- const hasEmbeddings = chunks.some(c => c.embedding && c.embedding.length > 0);
350
-
351
- if (hasEmbeddings) {
352
- const queryEmbedding = hashEmbed(query, chunks[0].embedding.length);
353
- return chunks
354
- .map(chunk => {
355
- const semantic = chunk.embedding && chunk.embedding.length > 0
356
- ? cosineSimilarity(queryEmbedding, chunk.embedding)
357
- : 0;
358
- // Blend semantic + keyword for robustness
359
- const queryTerms = query.toLowerCase().split(/\\s+/);
360
- const content = chunk.content.toLowerCase();
361
- const kwMatches = queryTerms.filter(t => content.includes(t)).length;
362
- const keyword = queryTerms.length > 0 ? kwMatches / queryTerms.length : 0;
363
- const relevance = semantic * 0.7 + keyword * 0.3;
364
- return { ...chunk, relevance };
365
- })
366
- .sort((a, b) => b.relevance - a.relevance)
367
- .slice(0, topK);
368
- }
369
-
370
- // Fallback: TF-IDF-weighted keyword matching
371
- const queryTerms = query.toLowerCase().split(/\\s+/).filter(Boolean);
372
- const allContent = chunks.map(c => c.content.toLowerCase());
373
- // IDF: log(N / df) for each query term
374
- const idf = {};
375
- for (const term of queryTerms) {
376
- const df = allContent.filter(c => c.includes(term)).length || 1;
377
- idf[term] = Math.log(allContent.length / df);
378
- }
379
- return chunks
380
- .map(chunk => {
381
- const content = chunk.content.toLowerCase();
382
- const words = content.split(/\\s+/);
383
- let score = 0;
384
- for (const term of queryTerms) {
385
- const tf = words.filter(w => w === term).length / (words.length || 1);
386
- score += tf * (idf[term] || 1);
387
- }
388
- return { ...chunk, relevance: score };
389
- })
390
- .sort((a, b) => b.relevance - a.relevance)
391
- .slice(0, topK);
392
- }
393
-
394
- async function contextRank(context, query) {
395
- // Use TF-IDF scoring instead of raw keyword matching
396
- const queryTerms = query.toLowerCase().split(/\\s+/).filter(Boolean);
397
- const allContent = context.map(c => c.toLowerCase());
398
- const idf = {};
399
- for (const term of queryTerms) {
400
- const df = allContent.filter(c => c.includes(term)).length || 1;
401
- idf[term] = Math.log(allContent.length / df);
402
- }
403
- return context
404
- .map((ctx, i) => {
405
- const content = ctx.toLowerCase();
406
- const words = content.split(/\\s+/);
407
- let score = 0;
408
- for (const term of queryTerms) {
409
- const tf = words.filter(w => w === term).length / (words.length || 1);
410
- score += tf * (idf[term] || 1);
411
- }
412
- return { index: i, content: ctx, relevance: score };
413
- })
414
- .sort((a, b) => b.relevance - a.relevance);
415
- }
416
-
417
- async function gitBlame(files) {
418
- const { execSync } = require('child_process');
419
- const results = [];
420
- for (const file of files) {
421
- try {
422
- const output = execSync(\`git blame --line-porcelain "\${file}" 2>/dev/null\`, { encoding: 'utf8', maxBuffer: 10 * 1024 * 1024 });
423
- const lines = [];
424
- let currentLine = {};
425
- for (const line of output.split('\\n')) {
426
- if (line.startsWith('author ')) currentLine.author = line.slice(7);
427
- else if (line.startsWith('author-time ')) currentLine.date = new Date(parseInt(line.slice(12)) * 1000).toISOString();
428
- else if (line.match(/^[a-f0-9]{40}/)) currentLine.commit = line.slice(0, 40);
429
- else if (line.startsWith('\\t')) {
430
- lines.push({ ...currentLine, line: lines.length + 1 });
431
- currentLine = {};
432
- }
433
- }
434
- results.push({ file, lines });
435
- } catch {
436
- results.push({ file, lines: [] });
437
- }
438
- }
439
- return results;
440
- }
441
-
442
- async function gitChurn(files, since) {
443
- const { execSync } = require('child_process');
444
- const results = [];
445
- const sinceArg = since ? \`--since="\${since}"\` : '--since="30 days ago"';
446
-
447
- for (const file of files) {
448
- try {
449
- const log = execSync(\`git log \${sinceArg} --format="%H|%an|%aI" --numstat -- "\${file}" 2>/dev/null\`, { encoding: 'utf8' });
450
- let additions = 0, deletions = 0, commits = 0;
451
- const authors = new Set();
452
- let lastModified = '';
453
-
454
- for (const line of log.split('\\n')) {
455
- if (line.includes('|')) {
456
- const [commit, author, date] = line.split('|');
457
- authors.add(author);
458
- commits++;
459
- if (!lastModified) lastModified = date;
460
- } else if (line.match(/^\\d+\\s+\\d+/)) {
461
- const [add, del] = line.split('\\t');
462
- additions += parseInt(add) || 0;
463
- deletions += parseInt(del) || 0;
464
- }
465
- }
466
-
467
- results.push({ file, additions, deletions, commits, authors: Array.from(authors), lastModified });
468
- } catch {
469
- results.push({ file, additions: 0, deletions: 0, commits: 0, authors: [], lastModified: '' });
470
- }
471
- }
472
- return results;
473
- }
474
-
475
- async function complexityAnalyze(files) {
476
- const fs = require('fs');
477
- return files.map(file => {
478
- try {
479
- const content = fs.existsSync(file) ? fs.readFileSync(file, 'utf8') : '';
480
- const lines = content.split('\\n');
481
- const nonEmpty = lines.filter(l => l.trim()).length;
482
- const branches = (content.match(/\\b(if|else|switch|case|for|while|catch|\\?|&&|\\|\\|)\\b/g) || []).length;
483
- const functions = (content.match(/function|=>|\\bdef\\b|\\bfn\\b/g) || []).length;
484
-
485
- return {
486
- file,
487
- lines: lines.length,
488
- nonEmptyLines: nonEmpty,
489
- cyclomaticComplexity: branches + 1,
490
- functions,
491
- avgFunctionSize: functions > 0 ? Math.round(nonEmpty / functions) : nonEmpty,
492
- };
493
- } catch {
494
- return { file, lines: 0, nonEmptyLines: 0, cyclomaticComplexity: 1, functions: 0, avgFunctionSize: 0 };
495
- }
496
- });
497
- }
498
-
499
- async function dependencyGraph(entryPoints) {
500
- const fs = require('fs');
501
- const path = require('path');
502
- const graph = new Map();
503
-
504
- function analyze(file, visited = new Set()) {
505
- if (visited.has(file)) return;
506
- visited.add(file);
507
-
508
- try {
509
- if (!fs.existsSync(file)) return;
510
- const content = fs.readFileSync(file, 'utf8');
511
- const deps = [];
512
-
513
- // Extract imports
514
- const importRegex = /(?:import|require)\\s*\\(?['"]([^'"]+)['"]/g;
515
- let match;
516
- while ((match = importRegex.exec(content)) !== null) {
517
- const dep = match[1];
518
- if (dep.startsWith('.')) {
519
- const resolved = path.resolve(path.dirname(file), dep);
520
- deps.push(resolved);
521
- analyze(resolved, visited);
522
- } else {
523
- deps.push(dep);
524
- }
525
- }
526
-
527
- graph.set(file, deps);
528
- } catch {}
529
- }
530
-
531
- for (const entry of entryPoints) {
532
- analyze(entry);
533
- }
534
-
535
- return Object.fromEntries(graph);
536
- }
537
-
538
- async function deduplicate(items, threshold) {
539
- // Simple Jaccard similarity deduplication
540
- const unique = [];
541
- const seen = new Set();
542
-
543
- for (const item of items) {
544
- const tokens = new Set(item.toLowerCase().split(/\\s+/));
545
- let isDup = false;
546
-
547
- for (const existing of unique) {
548
- const existingTokens = new Set(existing.toLowerCase().split(/\\s+/));
549
- const intersection = [...tokens].filter(t => existingTokens.has(t)).length;
550
- const union = new Set([...tokens, ...existingTokens]).size;
551
- const similarity = intersection / union;
552
-
553
- if (similarity >= threshold) {
554
- isDup = true;
555
- break;
556
- }
557
- }
558
-
559
- if (!isDup) unique.push(item);
560
- }
561
-
562
- return unique;
563
- }
131
+ return `
132
+ parentPort.on('message', async (task) => {
133
+ try {
134
+ let result;
135
+ switch (task.type) {
136
+ case 'speculative-embed':
137
+ result = await speculativeEmbed(task.files, task.coEditGraph);
138
+ break;
139
+ case 'ast-analyze':
140
+ result = await astAnalyze(task.files);
141
+ break;
142
+ case 'security-scan':
143
+ result = await securityScan(task.files, task.rules);
144
+ break;
145
+ case 'rag-retrieve':
146
+ result = await ragRetrieve(task.query, task.chunks, task.topK);
147
+ break;
148
+ case 'context-rank':
149
+ result = await contextRank(task.context, task.query);
150
+ break;
151
+ case 'git-blame':
152
+ result = await gitBlame(task.files);
153
+ break;
154
+ case 'git-churn':
155
+ result = await gitChurn(task.files, task.since);
156
+ break;
157
+ case 'complexity-analyze':
158
+ result = await complexityAnalyze(task.files);
159
+ break;
160
+ case 'dependency-graph':
161
+ result = await dependencyGraph(task.entryPoints);
162
+ break;
163
+ case 'deduplicate':
164
+ result = await deduplicate(task.items, task.threshold);
165
+ break;
166
+ default:
167
+ throw new Error('Unknown task type: ' + task.type);
168
+ }
169
+ parentPort.postMessage({ success: true, data: result, taskId: task.taskId });
170
+ } catch (error) {
171
+ parentPort.postMessage({ success: false, error: error.message, taskId: task.taskId });
172
+ }
173
+ });
174
+
175
+ // Worker implementations
176
+
177
+ // Hash-based embedding: deterministic, no external deps, 128-dim
178
+ function hashEmbed(text, dim = 128) {
179
+ const embedding = new Float64Array(dim);
180
+ const tokens = text.split(/\\s+|[{}()\\[\\];,.<>=/+\\-*&|!~^%@#]/);
181
+
182
+ for (let t = 0; t < tokens.length; t++) {
183
+ const token = tokens[t];
184
+ if (!token) continue;
185
+
186
+ // FNV-1a hash
187
+ let h = 0x811c9dc5;
188
+ for (let i = 0; i < token.length; i++) {
189
+ h ^= token.charCodeAt(i);
190
+ h = Math.imul(h, 0x01000193);
191
+ }
192
+
193
+ // Positional weight (tokens near start matter more)
194
+ const posWeight = 1.0 / (1.0 + Math.log1p(t));
195
+
196
+ // Distribute across multiple dimensions using hash rotations
197
+ for (let d = 0; d < 4; d++) {
198
+ const idx = ((h >>> 0) + d * 37) % dim;
199
+ const sign = (h & (1 << d)) ? 1 : -1;
200
+ embedding[idx] += sign * posWeight;
201
+ h = (h >>> 7) | (h << 25); // rotate
202
+ }
203
+ }
204
+
205
+ // L2 normalize
206
+ let norm = 0;
207
+ for (let i = 0; i < dim; i++) norm += embedding[i] * embedding[i];
208
+ norm = Math.sqrt(norm) || 1;
209
+ const result = new Array(dim);
210
+ for (let i = 0; i < dim; i++) result[i] = embedding[i] / norm;
211
+ return result;
212
+ }
213
+
214
+ async function speculativeEmbed(files, coEditGraph) {
215
+ const fs = require('fs');
216
+ return files.map(file => {
217
+ try {
218
+ if (!fs.existsSync(file)) {
219
+ return { file, embedding: hashEmbed(file), confidence: 0.2, timestamp: Date.now() };
220
+ }
221
+ const content = fs.readFileSync(file, 'utf8');
222
+ const embedding = hashEmbed(content);
223
+
224
+ // Confidence based on file size (more content = higher confidence)
225
+ const lines = content.split('\\n').length;
226
+ const confidence = Math.min(0.95, 0.3 + (lines / 500) * 0.65);
227
+
228
+ return { file, embedding, confidence, timestamp: Date.now() };
229
+ } catch {
230
+ return { file, embedding: hashEmbed(file), confidence: 0.1, timestamp: Date.now() };
231
+ }
232
+ });
233
+ }
234
+
235
+ async function astAnalyze(files) {
236
+ const fs = require('fs');
237
+ return files.map(file => {
238
+ try {
239
+ const content = fs.existsSync(file) ? fs.readFileSync(file, 'utf8') : '';
240
+ const lines = content.split('\\n');
241
+ return {
242
+ file,
243
+ language: file.split('.').pop() || 'unknown',
244
+ complexity: Math.min(lines.length / 10, 100),
245
+ functions: extractFunctions(content),
246
+ imports: extractImports(content),
247
+ exports: extractExports(content),
248
+ dependencies: [],
249
+ };
250
+ } catch {
251
+ return { file, language: 'unknown', complexity: 0, functions: [], imports: [], exports: [], dependencies: [] };
252
+ }
253
+ });
254
+ }
255
+
256
+ function extractFunctions(content) {
257
+ const patterns = [
258
+ /function\\s+(\\w+)/g,
259
+ /const\\s+(\\w+)\\s*=\\s*(?:async\\s*)?\\([^)]*\\)\\s*=>/g,
260
+ /(?:async\\s+)?(?:public|private|protected)?\\s*(\\w+)\\s*\\([^)]*\\)\\s*{/g,
261
+ ];
262
+ const funcs = new Set();
263
+ for (const pattern of patterns) {
264
+ let match;
265
+ while ((match = pattern.exec(content)) !== null) {
266
+ if (match[1] && !['if', 'for', 'while', 'switch', 'catch'].includes(match[1])) {
267
+ funcs.add(match[1]);
268
+ }
269
+ }
270
+ }
271
+ return Array.from(funcs);
272
+ }
273
+
274
+ function extractImports(content) {
275
+ const imports = [];
276
+ const patterns = [
277
+ /import\\s+.*?from\\s+['"]([^'"]+)['"]/g,
278
+ /require\\s*\\(['"]([^'"]+)['"]\\)/g,
279
+ ];
280
+ for (const pattern of patterns) {
281
+ let match;
282
+ while ((match = pattern.exec(content)) !== null) {
283
+ imports.push(match[1]);
284
+ }
285
+ }
286
+ return imports;
287
+ }
288
+
289
+ function extractExports(content) {
290
+ const exports = [];
291
+ const patterns = [
292
+ /export\\s+(?:default\\s+)?(?:class|function|const|let|var)\\s+(\\w+)/g,
293
+ /module\\.exports\\s*=\\s*(\\w+)/g,
294
+ ];
295
+ for (const pattern of patterns) {
296
+ let match;
297
+ while ((match = pattern.exec(content)) !== null) {
298
+ exports.push(match[1]);
299
+ }
300
+ }
301
+ return exports;
302
+ }
303
+
304
+ async function securityScan(files, rules) {
305
+ const fs = require('fs');
306
+ const findings = [];
307
+ const securityPatterns = [
308
+ { pattern: /eval\\s*\\(/g, rule: 'no-eval', severity: 'high', message: 'Avoid eval()' },
309
+ { pattern: /innerHTML\\s*=/g, rule: 'no-inner-html', severity: 'medium', message: 'Avoid innerHTML, use textContent' },
310
+ { pattern: /password\\s*=\\s*['"][^'"]+['"]/gi, rule: 'no-hardcoded-secrets', severity: 'critical', message: 'Hardcoded password detected' },
311
+ { pattern: /api[_-]?key\\s*=\\s*['"][^'"]+['"]/gi, rule: 'no-hardcoded-secrets', severity: 'critical', message: 'Hardcoded API key detected' },
312
+ { pattern: /exec\\s*\\(/g, rule: 'no-exec', severity: 'high', message: 'Avoid exec(), use execFile or spawn' },
313
+ { pattern: /\\$\\{.*\\}/g, rule: 'template-injection', severity: 'medium', message: 'Potential template injection' },
314
+ ];
315
+
316
+ for (const file of files) {
317
+ try {
318
+ if (!fs.existsSync(file)) continue;
319
+ const content = fs.readFileSync(file, 'utf8');
320
+ const lines = content.split('\\n');
321
+
322
+ for (const { pattern, rule, severity, message } of securityPatterns) {
323
+ let match;
324
+ const regex = new RegExp(pattern.source, pattern.flags);
325
+ while ((match = regex.exec(content)) !== null) {
326
+ const lineNum = content.substring(0, match.index).split('\\n').length;
327
+ findings.push({ file, line: lineNum, severity, rule, message });
328
+ }
329
+ }
330
+ } catch {}
331
+ }
332
+ return findings;
333
+ }
334
+
335
+ function cosineSimilarity(a, b) {
336
+ if (!a || !b || a.length !== b.length || a.length === 0) return 0;
337
+ let dot = 0, normA = 0, normB = 0;
338
+ for (let i = 0; i < a.length; i++) {
339
+ dot += a[i] * b[i];
340
+ normA += a[i] * a[i];
341
+ normB += b[i] * b[i];
342
+ }
343
+ const denom = Math.sqrt(normA) * Math.sqrt(normB);
344
+ return denom === 0 ? 0 : dot / denom;
345
+ }
346
+
347
+ async function ragRetrieve(query, chunks, topK) {
348
+ // If chunks have embeddings, use cosine similarity (semantic retrieval)
349
+ const hasEmbeddings = chunks.some(c => c.embedding && c.embedding.length > 0);
350
+
351
+ if (hasEmbeddings) {
352
+ const queryEmbedding = hashEmbed(query, chunks[0].embedding.length);
353
+ return chunks
354
+ .map(chunk => {
355
+ const semantic = chunk.embedding && chunk.embedding.length > 0
356
+ ? cosineSimilarity(queryEmbedding, chunk.embedding)
357
+ : 0;
358
+ // Blend semantic + keyword for robustness
359
+ const queryTerms = query.toLowerCase().split(/\\s+/);
360
+ const content = chunk.content.toLowerCase();
361
+ const kwMatches = queryTerms.filter(t => content.includes(t)).length;
362
+ const keyword = queryTerms.length > 0 ? kwMatches / queryTerms.length : 0;
363
+ const relevance = semantic * 0.7 + keyword * 0.3;
364
+ return { ...chunk, relevance };
365
+ })
366
+ .sort((a, b) => b.relevance - a.relevance)
367
+ .slice(0, topK);
368
+ }
369
+
370
+ // Fallback: TF-IDF-weighted keyword matching
371
+ const queryTerms = query.toLowerCase().split(/\\s+/).filter(Boolean);
372
+ const allContent = chunks.map(c => c.content.toLowerCase());
373
+ // IDF: log(N / df) for each query term
374
+ const idf = {};
375
+ for (const term of queryTerms) {
376
+ const df = allContent.filter(c => c.includes(term)).length || 1;
377
+ idf[term] = Math.log(allContent.length / df);
378
+ }
379
+ return chunks
380
+ .map(chunk => {
381
+ const content = chunk.content.toLowerCase();
382
+ const words = content.split(/\\s+/);
383
+ let score = 0;
384
+ for (const term of queryTerms) {
385
+ const tf = words.filter(w => w === term).length / (words.length || 1);
386
+ score += tf * (idf[term] || 1);
387
+ }
388
+ return { ...chunk, relevance: score };
389
+ })
390
+ .sort((a, b) => b.relevance - a.relevance)
391
+ .slice(0, topK);
392
+ }
393
+
394
+ async function contextRank(context, query) {
395
+ // Use TF-IDF scoring instead of raw keyword matching
396
+ const queryTerms = query.toLowerCase().split(/\\s+/).filter(Boolean);
397
+ const allContent = context.map(c => c.toLowerCase());
398
+ const idf = {};
399
+ for (const term of queryTerms) {
400
+ const df = allContent.filter(c => c.includes(term)).length || 1;
401
+ idf[term] = Math.log(allContent.length / df);
402
+ }
403
+ return context
404
+ .map((ctx, i) => {
405
+ const content = ctx.toLowerCase();
406
+ const words = content.split(/\\s+/);
407
+ let score = 0;
408
+ for (const term of queryTerms) {
409
+ const tf = words.filter(w => w === term).length / (words.length || 1);
410
+ score += tf * (idf[term] || 1);
411
+ }
412
+ return { index: i, content: ctx, relevance: score };
413
+ })
414
+ .sort((a, b) => b.relevance - a.relevance);
415
+ }
416
+
417
+ async function gitBlame(files) {
418
+ const { execSync } = require('child_process');
419
+ const results = [];
420
+ for (const file of files) {
421
+ try {
422
+ const output = execSync(\`git blame --line-porcelain "\${file}" 2>/dev/null\`, { encoding: 'utf8', maxBuffer: 10 * 1024 * 1024 });
423
+ const lines = [];
424
+ let currentLine = {};
425
+ for (const line of output.split('\\n')) {
426
+ if (line.startsWith('author ')) currentLine.author = line.slice(7);
427
+ else if (line.startsWith('author-time ')) currentLine.date = new Date(parseInt(line.slice(12)) * 1000).toISOString();
428
+ else if (line.match(/^[a-f0-9]{40}/)) currentLine.commit = line.slice(0, 40);
429
+ else if (line.startsWith('\\t')) {
430
+ lines.push({ ...currentLine, line: lines.length + 1 });
431
+ currentLine = {};
432
+ }
433
+ }
434
+ results.push({ file, lines });
435
+ } catch {
436
+ results.push({ file, lines: [] });
437
+ }
438
+ }
439
+ return results;
440
+ }
441
+
442
+ async function gitChurn(files, since) {
443
+ const { execSync } = require('child_process');
444
+ const results = [];
445
+ const sinceArg = since ? \`--since="\${since}"\` : '--since="30 days ago"';
446
+
447
+ for (const file of files) {
448
+ try {
449
+ const log = execSync(\`git log \${sinceArg} --format="%H|%an|%aI" --numstat -- "\${file}" 2>/dev/null\`, { encoding: 'utf8' });
450
+ let additions = 0, deletions = 0, commits = 0;
451
+ const authors = new Set();
452
+ let lastModified = '';
453
+
454
+ for (const line of log.split('\\n')) {
455
+ if (line.includes('|')) {
456
+ const [commit, author, date] = line.split('|');
457
+ authors.add(author);
458
+ commits++;
459
+ if (!lastModified) lastModified = date;
460
+ } else if (line.match(/^\\d+\\s+\\d+/)) {
461
+ const [add, del] = line.split('\\t');
462
+ additions += parseInt(add) || 0;
463
+ deletions += parseInt(del) || 0;
464
+ }
465
+ }
466
+
467
+ results.push({ file, additions, deletions, commits, authors: Array.from(authors), lastModified });
468
+ } catch {
469
+ results.push({ file, additions: 0, deletions: 0, commits: 0, authors: [], lastModified: '' });
470
+ }
471
+ }
472
+ return results;
473
+ }
474
+
475
+ async function complexityAnalyze(files) {
476
+ const fs = require('fs');
477
+ return files.map(file => {
478
+ try {
479
+ const content = fs.existsSync(file) ? fs.readFileSync(file, 'utf8') : '';
480
+ const lines = content.split('\\n');
481
+ const nonEmpty = lines.filter(l => l.trim()).length;
482
+ const branches = (content.match(/\\b(if|else|switch|case|for|while|catch|\\?|&&|\\|\\|)\\b/g) || []).length;
483
+ const functions = (content.match(/function|=>|\\bdef\\b|\\bfn\\b/g) || []).length;
484
+
485
+ return {
486
+ file,
487
+ lines: lines.length,
488
+ nonEmptyLines: nonEmpty,
489
+ cyclomaticComplexity: branches + 1,
490
+ functions,
491
+ avgFunctionSize: functions > 0 ? Math.round(nonEmpty / functions) : nonEmpty,
492
+ };
493
+ } catch {
494
+ return { file, lines: 0, nonEmptyLines: 0, cyclomaticComplexity: 1, functions: 0, avgFunctionSize: 0 };
495
+ }
496
+ });
497
+ }
498
+
499
+ async function dependencyGraph(entryPoints) {
500
+ const fs = require('fs');
501
+ const path = require('path');
502
+ const graph = new Map();
503
+
504
+ function analyze(file, visited = new Set()) {
505
+ if (visited.has(file)) return;
506
+ visited.add(file);
507
+
508
+ try {
509
+ if (!fs.existsSync(file)) return;
510
+ const content = fs.readFileSync(file, 'utf8');
511
+ const deps = [];
512
+
513
+ // Extract imports
514
+ const importRegex = /(?:import|require)\\s*\\(?['"]([^'"]+)['"]/g;
515
+ let match;
516
+ while ((match = importRegex.exec(content)) !== null) {
517
+ const dep = match[1];
518
+ if (dep.startsWith('.')) {
519
+ const resolved = path.resolve(path.dirname(file), dep);
520
+ deps.push(resolved);
521
+ analyze(resolved, visited);
522
+ } else {
523
+ deps.push(dep);
524
+ }
525
+ }
526
+
527
+ graph.set(file, deps);
528
+ } catch {}
529
+ }
530
+
531
+ for (const entry of entryPoints) {
532
+ analyze(entry);
533
+ }
534
+
535
+ return Object.fromEntries(graph);
536
+ }
537
+
538
+ async function deduplicate(items, threshold) {
539
+ // Simple Jaccard similarity deduplication
540
+ const unique = [];
541
+ const seen = new Set();
542
+
543
+ for (const item of items) {
544
+ const tokens = new Set(item.toLowerCase().split(/\\s+/));
545
+ let isDup = false;
546
+
547
+ for (const existing of unique) {
548
+ const existingTokens = new Set(existing.toLowerCase().split(/\\s+/));
549
+ const intersection = [...tokens].filter(t => existingTokens.has(t)).length;
550
+ const union = new Set([...tokens, ...existingTokens]).size;
551
+ const similarity = intersection / union;
552
+
553
+ if (similarity >= threshold) {
554
+ isDup = true;
555
+ break;
556
+ }
557
+ }
558
+
559
+ if (!isDup) unique.push(item);
560
+ }
561
+
562
+ return unique;
563
+ }
564
564
  `;
565
565
  }
566
566
  handleWorkerResult(worker, result) {