@aiready/pattern-detect 0.11.36 → 0.11.38
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chunk-4BPRGZRG.mjs +1041 -0
- package/dist/chunk-6OEHUI5J.mjs +1045 -0
- package/dist/chunk-CTDBJP25.mjs +1043 -0
- package/dist/chunk-DGAKXYIP.mjs +1041 -0
- package/dist/chunk-P7B6Z4I2.mjs +1043 -0
- package/dist/chunk-QEP76HGK.mjs +1039 -0
- package/dist/cli.js +161 -258
- package/dist/cli.mjs +1 -1
- package/dist/index.d.mts +2 -2
- package/dist/index.d.ts +2 -2
- package/dist/index.js +161 -258
- package/dist/index.mjs +1 -1
- package/package.json +2 -2
package/dist/cli.js
CHANGED
|
@@ -323,7 +323,7 @@ function filterBySeverity(duplicates, minSeverity) {
|
|
|
323
323
|
});
|
|
324
324
|
}
|
|
325
325
|
|
|
326
|
-
// src/
|
|
326
|
+
// src/core/extractor.ts
|
|
327
327
|
function categorizePattern(code) {
|
|
328
328
|
const lower = code.toLowerCase();
|
|
329
329
|
if (lower.includes("request") && lower.includes("response") || lower.includes("router.") || lower.includes("app.get") || lower.includes("app.post") || lower.includes("express") || lower.includes("ctx.body")) {
|
|
@@ -368,17 +368,19 @@ function extractCodeBlocks(content, minLines) {
|
|
|
368
368
|
currentBlock.push(line);
|
|
369
369
|
}
|
|
370
370
|
if (inFunction && braceDepth === 0 && currentBlock.length >= minLines) {
|
|
371
|
-
const blockContent = currentBlock.join("\n");
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
371
|
+
const blockContent = currentBlock.join("\n").trim();
|
|
372
|
+
if (blockContent) {
|
|
373
|
+
const loc = currentBlock.filter(
|
|
374
|
+
(l) => l.trim() && !l.trim().startsWith("//")
|
|
375
|
+
).length;
|
|
376
|
+
blocks.push({
|
|
377
|
+
content: blockContent,
|
|
378
|
+
startLine: blockStart + 1,
|
|
379
|
+
endLine: i + 1,
|
|
380
|
+
patternType: categorizePattern(blockContent),
|
|
381
|
+
linesOfCode: loc
|
|
382
|
+
});
|
|
383
|
+
}
|
|
382
384
|
currentBlock = [];
|
|
383
385
|
inFunction = false;
|
|
384
386
|
} else if (inFunction && braceDepth === 0) {
|
|
@@ -388,15 +390,51 @@ function extractCodeBlocks(content, minLines) {
|
|
|
388
390
|
}
|
|
389
391
|
return blocks;
|
|
390
392
|
}
|
|
393
|
+
|
|
394
|
+
// src/core/normalizer.ts
|
|
391
395
|
function normalizeCode(code) {
|
|
392
|
-
if (!code)
|
|
393
|
-
return "";
|
|
394
|
-
}
|
|
396
|
+
if (!code) return "";
|
|
395
397
|
return code.replace(/\/\/.*$/gm, "").replace(/\/\*[\s\S]*?\*\//g, "").replace(/"[^"]*"/g, '"STR"').replace(/'[^']*'/g, "'STR'").replace(/`[^`]*`/g, "`STR`").replace(/\b\d+\b/g, "NUM").replace(/\s+/g, " ").trim();
|
|
396
398
|
}
|
|
399
|
+
var stopwords = /* @__PURE__ */ new Set([
|
|
400
|
+
"return",
|
|
401
|
+
"const",
|
|
402
|
+
"let",
|
|
403
|
+
"var",
|
|
404
|
+
"function",
|
|
405
|
+
"class",
|
|
406
|
+
"new",
|
|
407
|
+
"if",
|
|
408
|
+
"else",
|
|
409
|
+
"for",
|
|
410
|
+
"while",
|
|
411
|
+
"async",
|
|
412
|
+
"await",
|
|
413
|
+
"try",
|
|
414
|
+
"catch",
|
|
415
|
+
"switch",
|
|
416
|
+
"case",
|
|
417
|
+
"default",
|
|
418
|
+
"import",
|
|
419
|
+
"export",
|
|
420
|
+
"from",
|
|
421
|
+
"true",
|
|
422
|
+
"false",
|
|
423
|
+
"null",
|
|
424
|
+
"undefined",
|
|
425
|
+
"this"
|
|
426
|
+
]);
|
|
427
|
+
function tokenize(norm) {
|
|
428
|
+
const punctuation = "(){}[];.,";
|
|
429
|
+
const cleaned = norm.split("").map((ch) => punctuation.includes(ch) ? " " : ch).join("");
|
|
430
|
+
return cleaned.split(/\s+/).filter((t) => t && t.length >= 3 && !stopwords.has(t.toLowerCase()));
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
// src/core/similarity.ts
|
|
397
434
|
function jaccardSimilarity(tokens1, tokens2) {
|
|
398
435
|
const set1 = new Set(tokens1);
|
|
399
436
|
const set2 = new Set(tokens2);
|
|
437
|
+
if (set1.size === 0 && set2.size === 0) return 0;
|
|
400
438
|
let intersection = 0;
|
|
401
439
|
for (const token of set1) {
|
|
402
440
|
if (set2.has(token)) intersection++;
|
|
@@ -404,6 +442,53 @@ function jaccardSimilarity(tokens1, tokens2) {
|
|
|
404
442
|
const union = set1.size + set2.size - intersection;
|
|
405
443
|
return union === 0 ? 0 : intersection / union;
|
|
406
444
|
}
|
|
445
|
+
|
|
446
|
+
// src/core/approx-engine.ts
|
|
447
|
+
var ApproxEngine = class {
|
|
448
|
+
constructor(allBlocks, blockTokens) {
|
|
449
|
+
this.invertedIndex = /* @__PURE__ */ new Map();
|
|
450
|
+
this.allBlocks = allBlocks;
|
|
451
|
+
this.blockTokens = blockTokens;
|
|
452
|
+
this.buildIndex();
|
|
453
|
+
}
|
|
454
|
+
buildIndex() {
|
|
455
|
+
for (let i = 0; i < this.blockTokens.length; i++) {
|
|
456
|
+
for (const tok of this.blockTokens[i]) {
|
|
457
|
+
let arr = this.invertedIndex.get(tok);
|
|
458
|
+
if (!arr) {
|
|
459
|
+
arr = [];
|
|
460
|
+
this.invertedIndex.set(tok, arr);
|
|
461
|
+
}
|
|
462
|
+
arr.push(i);
|
|
463
|
+
}
|
|
464
|
+
}
|
|
465
|
+
}
|
|
466
|
+
findCandidates(blockIdx, minSharedTokens, maxCandidates) {
|
|
467
|
+
const block1 = this.allBlocks[blockIdx];
|
|
468
|
+
const block1Tokens = this.blockTokens[blockIdx];
|
|
469
|
+
const counts = /* @__PURE__ */ new Map();
|
|
470
|
+
const rareTokens = block1Tokens.filter((tok) => {
|
|
471
|
+
const freq = this.invertedIndex.get(tok)?.length || 0;
|
|
472
|
+
return freq < this.allBlocks.length * 0.1;
|
|
473
|
+
});
|
|
474
|
+
for (const tok of rareTokens) {
|
|
475
|
+
const ids = this.invertedIndex.get(tok);
|
|
476
|
+
if (!ids) continue;
|
|
477
|
+
for (const j of ids) {
|
|
478
|
+
if (j <= blockIdx) continue;
|
|
479
|
+
if (this.allBlocks[j].file === block1.file) continue;
|
|
480
|
+
counts.set(j, (counts.get(j) || 0) + 1);
|
|
481
|
+
}
|
|
482
|
+
}
|
|
483
|
+
return Array.from(counts.entries()).filter(([j, shared]) => {
|
|
484
|
+
const block2Size = this.blockTokens[j].length;
|
|
485
|
+
const minSize = Math.min(block1Tokens.length, block2Size);
|
|
486
|
+
return shared >= minSharedTokens && shared / minSize >= 0.3;
|
|
487
|
+
}).sort((a, b) => b[1] - a[1]).slice(0, maxCandidates).map(([j, shared]) => ({ j, shared }));
|
|
488
|
+
}
|
|
489
|
+
};
|
|
490
|
+
|
|
491
|
+
// src/detector.ts
|
|
407
492
|
async function detectDuplicatePatterns(files, options) {
|
|
408
493
|
const {
|
|
409
494
|
minSimilarity,
|
|
@@ -417,274 +502,92 @@ async function detectDuplicatePatterns(files, options) {
|
|
|
417
502
|
const duplicates = [];
|
|
418
503
|
const maxComparisons = approx ? Infinity : 5e5;
|
|
419
504
|
const allBlocks = files.flatMap(
|
|
420
|
-
(file) => extractCodeBlocks(file.content, minLines).filter(
|
|
421
|
-
content
|
|
422
|
-
|
|
423
|
-
|
|
505
|
+
(file) => extractCodeBlocks(file.content, minLines).filter(
|
|
506
|
+
(block) => block && block.content && block.content.trim().length > 0
|
|
507
|
+
).map((block) => ({
|
|
508
|
+
...block,
|
|
424
509
|
file: file.file,
|
|
425
510
|
normalized: normalizeCode(block.content),
|
|
426
|
-
|
|
427
|
-
tokenCost: (0, import_core2.estimateTokens)(block.content),
|
|
428
|
-
linesOfCode: block.linesOfCode
|
|
511
|
+
tokenCost: block.content ? (0, import_core2.estimateTokens)(block.content) : 0
|
|
429
512
|
}))
|
|
430
513
|
);
|
|
431
|
-
|
|
432
|
-
console.log(`Extracted ${allBlocks.length} code blocks for analysis`);
|
|
433
|
-
}
|
|
434
|
-
const pythonFiles = files.filter((f) => f.file.toLowerCase().endsWith(".py"));
|
|
514
|
+
const pythonFiles = files.filter((f) => f.file.endsWith(".py"));
|
|
435
515
|
if (pythonFiles.length > 0) {
|
|
436
516
|
const { extractPythonPatterns: extractPythonPatterns2 } = await Promise.resolve().then(() => (init_python_extractor(), python_extractor_exports));
|
|
437
|
-
const
|
|
517
|
+
const pythonPatterns = await extractPythonPatterns2(
|
|
438
518
|
pythonFiles.map((f) => f.file)
|
|
439
519
|
);
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
if (!options.onProgress) {
|
|
452
|
-
console.log(`Added ${pythonBlocks.length} Python patterns`);
|
|
453
|
-
}
|
|
454
|
-
}
|
|
455
|
-
if (!approx && allBlocks.length > 500) {
|
|
456
|
-
console.log(
|
|
457
|
-
`\u26A0\uFE0F Using --no-approx mode with ${allBlocks.length} blocks may be slow (O(B\xB2) complexity).`
|
|
458
|
-
);
|
|
459
|
-
console.log(
|
|
460
|
-
` Consider using approximate mode (default) for better performance.`
|
|
520
|
+
allBlocks.push(
|
|
521
|
+
...pythonPatterns.map((p) => ({
|
|
522
|
+
content: p.code,
|
|
523
|
+
startLine: p.startLine,
|
|
524
|
+
endLine: p.endLine,
|
|
525
|
+
file: p.file,
|
|
526
|
+
normalized: normalizeCode(p.code),
|
|
527
|
+
patternType: p.type,
|
|
528
|
+
tokenCost: p.code ? (0, import_core2.estimateTokens)(p.code) : 0,
|
|
529
|
+
linesOfCode: p.endLine - p.startLine + 1
|
|
530
|
+
}))
|
|
461
531
|
);
|
|
462
532
|
}
|
|
463
|
-
const stopwords = /* @__PURE__ */ new Set([
|
|
464
|
-
"return",
|
|
465
|
-
"const",
|
|
466
|
-
"let",
|
|
467
|
-
"var",
|
|
468
|
-
"function",
|
|
469
|
-
"class",
|
|
470
|
-
"new",
|
|
471
|
-
"if",
|
|
472
|
-
"else",
|
|
473
|
-
"for",
|
|
474
|
-
"while",
|
|
475
|
-
"async",
|
|
476
|
-
"await",
|
|
477
|
-
"try",
|
|
478
|
-
"catch",
|
|
479
|
-
"switch",
|
|
480
|
-
"case",
|
|
481
|
-
"default",
|
|
482
|
-
"import",
|
|
483
|
-
"export",
|
|
484
|
-
"from",
|
|
485
|
-
"true",
|
|
486
|
-
"false",
|
|
487
|
-
"null",
|
|
488
|
-
"undefined",
|
|
489
|
-
"this"
|
|
490
|
-
]);
|
|
491
|
-
const tokenize = (norm) => {
|
|
492
|
-
const punctuation = "(){}[];.,";
|
|
493
|
-
const cleaned = norm.split("").map((ch) => punctuation.includes(ch) ? " " : ch).join("");
|
|
494
|
-
return cleaned.split(/\s+/).filter((t) => t && t.length >= 3 && !stopwords.has(t.toLowerCase()));
|
|
495
|
-
};
|
|
496
533
|
const blockTokens = allBlocks.map((b) => tokenize(b.normalized));
|
|
497
|
-
const
|
|
498
|
-
if (approx) {
|
|
499
|
-
for (let i = 0; i < blockTokens.length; i++) {
|
|
500
|
-
for (const tok of blockTokens[i]) {
|
|
501
|
-
let arr = invertedIndex.get(tok);
|
|
502
|
-
if (!arr) {
|
|
503
|
-
arr = [];
|
|
504
|
-
invertedIndex.set(tok, arr);
|
|
505
|
-
}
|
|
506
|
-
arr.push(i);
|
|
507
|
-
}
|
|
508
|
-
}
|
|
509
|
-
}
|
|
510
|
-
const totalComparisons = approx ? void 0 : allBlocks.length * (allBlocks.length - 1) / 2;
|
|
511
|
-
if (totalComparisons !== void 0) {
|
|
512
|
-
console.log(
|
|
513
|
-
`Processing ${totalComparisons.toLocaleString()} comparisons in batches...`
|
|
514
|
-
);
|
|
515
|
-
} else {
|
|
516
|
-
console.log(
|
|
517
|
-
`Using approximate candidate selection to reduce comparisons...`
|
|
518
|
-
);
|
|
519
|
-
}
|
|
534
|
+
const engine = approx ? new ApproxEngine(allBlocks, blockTokens) : null;
|
|
520
535
|
let comparisonsProcessed = 0;
|
|
521
|
-
let comparisonsBudgetExhausted = false;
|
|
522
536
|
const startTime = Date.now();
|
|
523
537
|
for (let i = 0; i < allBlocks.length; i++) {
|
|
524
|
-
if (maxComparisons && comparisonsProcessed >= maxComparisons)
|
|
525
|
-
comparisonsBudgetExhausted = true;
|
|
526
|
-
break;
|
|
527
|
-
}
|
|
538
|
+
if (maxComparisons && comparisonsProcessed >= maxComparisons) break;
|
|
528
539
|
if (i % batchSize === 0 && i > 0) {
|
|
529
540
|
if (options.onProgress) {
|
|
530
|
-
options.onProgress(i, allBlocks.length,
|
|
541
|
+
options.onProgress(i, allBlocks.length, "Analyzing patterns");
|
|
531
542
|
} else {
|
|
532
|
-
const elapsed = (
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
const remaining = totalComparisons - comparisonsProcessed;
|
|
537
|
-
const rate = comparisonsProcessed / parseFloat(elapsed);
|
|
538
|
-
const eta = remaining > 0 ? (remaining / rate).toFixed(0) : 0;
|
|
539
|
-
console.log(
|
|
540
|
-
` ${progress}% (${comparisonsProcessed.toLocaleString()}/${totalComparisons.toLocaleString()} comparisons, ${elapsed}s elapsed, ~${eta}s remaining, ${duplicatesFound} duplicates)`
|
|
541
|
-
);
|
|
542
|
-
} else {
|
|
543
|
-
console.log(
|
|
544
|
-
` Processed ${i.toLocaleString()}/${allBlocks.length} blocks (${elapsed}s elapsed, ${duplicatesFound} duplicates)`
|
|
545
|
-
);
|
|
546
|
-
}
|
|
543
|
+
const elapsed = (Date.now() - startTime) / 1e3;
|
|
544
|
+
console.log(
|
|
545
|
+
` Processed ${i}/${allBlocks.length} blocks (${elapsed.toFixed(1)}s, ${duplicates.length} duplicates)`
|
|
546
|
+
);
|
|
547
547
|
}
|
|
548
|
-
await new Promise((
|
|
548
|
+
await new Promise((r) => setImmediate((resolve) => r(resolve)));
|
|
549
549
|
}
|
|
550
550
|
const block1 = allBlocks[i];
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
const
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
551
|
+
const candidates = engine ? engine.findCandidates(i, minSharedTokens, maxCandidatesPerBlock) : allBlocks.slice(i + 1).map((_, idx) => ({ j: i + 1 + idx, shared: 0 }));
|
|
552
|
+
for (const { j } of candidates) {
|
|
553
|
+
if (!approx && comparisonsProcessed >= maxComparisons) break;
|
|
554
|
+
comparisonsProcessed++;
|
|
555
|
+
const block2 = allBlocks[j];
|
|
556
|
+
if (block1.file === block2.file) continue;
|
|
557
|
+
const sim = jaccardSimilarity(blockTokens[i], blockTokens[j]);
|
|
558
|
+
if (sim >= minSimilarity) {
|
|
559
|
+
const severity = calculateSeverity(
|
|
560
|
+
block1.file,
|
|
561
|
+
block2.file,
|
|
562
|
+
block1.content,
|
|
563
|
+
sim,
|
|
564
|
+
block1.linesOfCode
|
|
565
|
+
);
|
|
566
|
+
const dup = {
|
|
567
|
+
file1: block1.file,
|
|
568
|
+
file2: block2.file,
|
|
569
|
+
line1: block1.startLine,
|
|
570
|
+
line2: block2.startLine,
|
|
571
|
+
endLine1: block1.endLine,
|
|
572
|
+
endLine2: block2.endLine,
|
|
573
|
+
similarity: sim,
|
|
574
|
+
snippet: block1.content.substring(0, 200),
|
|
575
|
+
patternType: block1.patternType,
|
|
576
|
+
tokenCost: block1.tokenCost,
|
|
577
|
+
linesOfCode: block1.linesOfCode,
|
|
578
|
+
severity: severity.severity,
|
|
579
|
+
reason: severity.reason,
|
|
580
|
+
suggestion: severity.suggestion
|
|
581
|
+
};
|
|
582
|
+
duplicates.push(dup);
|
|
583
|
+
if (streamResults)
|
|
583
584
|
console.log(
|
|
584
|
-
`
|
|
585
|
-
);
|
|
586
|
-
break;
|
|
587
|
-
}
|
|
588
|
-
comparisonsProcessed++;
|
|
589
|
-
const block2 = allBlocks[j];
|
|
590
|
-
const similarity = jaccardSimilarity(blockTokens[i], blockTokens[j]);
|
|
591
|
-
if (similarity >= minSimilarity) {
|
|
592
|
-
const { severity, reason, suggestion, matchedRule } = calculateSeverity(
|
|
593
|
-
block1.file,
|
|
594
|
-
block2.file,
|
|
595
|
-
block1.content,
|
|
596
|
-
similarity,
|
|
597
|
-
block1.linesOfCode
|
|
585
|
+
`[DUPLICATE] ${dup.file1}:${dup.line1} <-> ${dup.file2}:${dup.line2} (${Math.round(sim * 100)}%)`
|
|
598
586
|
);
|
|
599
|
-
const duplicate = {
|
|
600
|
-
file1: block1.file,
|
|
601
|
-
file2: block2.file,
|
|
602
|
-
line1: block1.startLine,
|
|
603
|
-
line2: block2.startLine,
|
|
604
|
-
endLine1: block1.endLine,
|
|
605
|
-
endLine2: block2.endLine,
|
|
606
|
-
similarity,
|
|
607
|
-
snippet: block1.content.split("\n").slice(0, 5).join("\n") + "\n...",
|
|
608
|
-
patternType: block1.patternType,
|
|
609
|
-
tokenCost: block1.tokenCost + block2.tokenCost,
|
|
610
|
-
linesOfCode: block1.linesOfCode,
|
|
611
|
-
severity,
|
|
612
|
-
reason,
|
|
613
|
-
suggestion,
|
|
614
|
-
matchedRule
|
|
615
|
-
};
|
|
616
|
-
duplicates.push(duplicate);
|
|
617
|
-
if (streamResults) {
|
|
618
|
-
console.log(
|
|
619
|
-
`
|
|
620
|
-
\u2705 Found: ${duplicate.patternType} ${Math.round(similarity * 100)}% similar`
|
|
621
|
-
);
|
|
622
|
-
console.log(
|
|
623
|
-
` ${duplicate.file1}:${duplicate.line1}-${duplicate.endLine1} \u21D4 ${duplicate.file2}:${duplicate.line2}-${duplicate.endLine2}`
|
|
624
|
-
);
|
|
625
|
-
console.log(
|
|
626
|
-
` Token cost: ${duplicate.tokenCost.toLocaleString()}`
|
|
627
|
-
);
|
|
628
|
-
}
|
|
629
|
-
}
|
|
630
|
-
}
|
|
631
|
-
} else {
|
|
632
|
-
for (let j = i + 1; j < allBlocks.length; j++) {
|
|
633
|
-
if (maxComparisons && comparisonsProcessed >= maxComparisons) break;
|
|
634
|
-
comparisonsProcessed++;
|
|
635
|
-
const block2 = allBlocks[j];
|
|
636
|
-
if (block1.file === block2.file) continue;
|
|
637
|
-
const similarity = jaccardSimilarity(blockTokens[i], blockTokens[j]);
|
|
638
|
-
if (similarity >= minSimilarity) {
|
|
639
|
-
const { severity, reason, suggestion, matchedRule } = calculateSeverity(
|
|
640
|
-
block1.file,
|
|
641
|
-
block2.file,
|
|
642
|
-
block1.content,
|
|
643
|
-
similarity,
|
|
644
|
-
block1.linesOfCode
|
|
645
|
-
);
|
|
646
|
-
const duplicate = {
|
|
647
|
-
file1: block1.file,
|
|
648
|
-
file2: block2.file,
|
|
649
|
-
line1: block1.startLine,
|
|
650
|
-
line2: block2.startLine,
|
|
651
|
-
endLine1: block1.endLine,
|
|
652
|
-
endLine2: block2.endLine,
|
|
653
|
-
similarity,
|
|
654
|
-
snippet: block1.content.split("\n").slice(0, 5).join("\n") + "\n...",
|
|
655
|
-
patternType: block1.patternType,
|
|
656
|
-
tokenCost: block1.tokenCost + block2.tokenCost,
|
|
657
|
-
linesOfCode: block1.linesOfCode,
|
|
658
|
-
severity,
|
|
659
|
-
reason,
|
|
660
|
-
suggestion,
|
|
661
|
-
matchedRule
|
|
662
|
-
};
|
|
663
|
-
duplicates.push(duplicate);
|
|
664
|
-
if (streamResults) {
|
|
665
|
-
console.log(
|
|
666
|
-
`
|
|
667
|
-
\u2705 Found: ${duplicate.patternType} ${Math.round(similarity * 100)}% similar`
|
|
668
|
-
);
|
|
669
|
-
console.log(
|
|
670
|
-
` ${duplicate.file1}:${duplicate.line1}-${duplicate.endLine1} \u21D4 ${duplicate.file2}:${duplicate.line2}-${duplicate.endLine2}`
|
|
671
|
-
);
|
|
672
|
-
console.log(
|
|
673
|
-
` Token cost: ${duplicate.tokenCost.toLocaleString()}`
|
|
674
|
-
);
|
|
675
|
-
}
|
|
676
|
-
}
|
|
677
587
|
}
|
|
678
588
|
}
|
|
679
589
|
}
|
|
680
|
-
|
|
681
|
-
console.log(
|
|
682
|
-
`\u26A0\uFE0F Comparison budget exhausted (${maxComparisons.toLocaleString()} comparisons). Use --max-comparisons to increase.`
|
|
683
|
-
);
|
|
684
|
-
}
|
|
685
|
-
return duplicates.sort(
|
|
686
|
-
(a, b) => b.similarity - a.similarity || b.tokenCost - a.tokenCost
|
|
687
|
-
);
|
|
590
|
+
return duplicates;
|
|
688
591
|
}
|
|
689
592
|
|
|
690
593
|
// src/grouping.ts
|
package/dist/cli.mjs
CHANGED
package/dist/index.d.mts
CHANGED
|
@@ -25,6 +25,7 @@ declare function filterBySeverity<T extends {
|
|
|
25
25
|
severity: Severity;
|
|
26
26
|
}>(duplicates: T[], minSeverity: Severity): T[];
|
|
27
27
|
|
|
28
|
+
type PatternType = 'function' | 'class-method' | 'api-handler' | 'validator' | 'utility' | 'component' | 'unknown';
|
|
28
29
|
interface DuplicatePattern {
|
|
29
30
|
file1: string;
|
|
30
31
|
file2: string;
|
|
@@ -42,7 +43,6 @@ interface DuplicatePattern {
|
|
|
42
43
|
suggestion?: string;
|
|
43
44
|
matchedRule?: string;
|
|
44
45
|
}
|
|
45
|
-
type PatternType = 'function' | 'class-method' | 'api-handler' | 'validator' | 'utility' | 'component' | 'unknown';
|
|
46
46
|
interface FileContent {
|
|
47
47
|
file: string;
|
|
48
48
|
content: string;
|
|
@@ -50,7 +50,6 @@ interface FileContent {
|
|
|
50
50
|
interface DetectionOptions {
|
|
51
51
|
minSimilarity: number;
|
|
52
52
|
minLines: number;
|
|
53
|
-
maxBlocks?: number;
|
|
54
53
|
batchSize?: number;
|
|
55
54
|
approx?: boolean;
|
|
56
55
|
minSharedTokens?: number;
|
|
@@ -59,6 +58,7 @@ interface DetectionOptions {
|
|
|
59
58
|
streamResults?: boolean;
|
|
60
59
|
onProgress?: (processed: number, total: number, message: string) => void;
|
|
61
60
|
}
|
|
61
|
+
|
|
62
62
|
/**
|
|
63
63
|
* Detect duplicate patterns across files with enhanced analysis
|
|
64
64
|
*/
|
package/dist/index.d.ts
CHANGED
|
@@ -25,6 +25,7 @@ declare function filterBySeverity<T extends {
|
|
|
25
25
|
severity: Severity;
|
|
26
26
|
}>(duplicates: T[], minSeverity: Severity): T[];
|
|
27
27
|
|
|
28
|
+
type PatternType = 'function' | 'class-method' | 'api-handler' | 'validator' | 'utility' | 'component' | 'unknown';
|
|
28
29
|
interface DuplicatePattern {
|
|
29
30
|
file1: string;
|
|
30
31
|
file2: string;
|
|
@@ -42,7 +43,6 @@ interface DuplicatePattern {
|
|
|
42
43
|
suggestion?: string;
|
|
43
44
|
matchedRule?: string;
|
|
44
45
|
}
|
|
45
|
-
type PatternType = 'function' | 'class-method' | 'api-handler' | 'validator' | 'utility' | 'component' | 'unknown';
|
|
46
46
|
interface FileContent {
|
|
47
47
|
file: string;
|
|
48
48
|
content: string;
|
|
@@ -50,7 +50,6 @@ interface FileContent {
|
|
|
50
50
|
interface DetectionOptions {
|
|
51
51
|
minSimilarity: number;
|
|
52
52
|
minLines: number;
|
|
53
|
-
maxBlocks?: number;
|
|
54
53
|
batchSize?: number;
|
|
55
54
|
approx?: boolean;
|
|
56
55
|
minSharedTokens?: number;
|
|
@@ -59,6 +58,7 @@ interface DetectionOptions {
|
|
|
59
58
|
streamResults?: boolean;
|
|
60
59
|
onProgress?: (processed: number, total: number, message: string) => void;
|
|
61
60
|
}
|
|
61
|
+
|
|
62
62
|
/**
|
|
63
63
|
* Detect duplicate patterns across files with enhanced analysis
|
|
64
64
|
*/
|