dslop 1.3.1 → 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,29 @@
1
1
  # Changelog
2
2
 
3
+ ## v1.4.1
4
+
5
+ [compare changes](https://github.com/turf-sports/dslop/compare/v1.4.0...v1.4.1)
6
+
7
+ ### 📖 Documentation
8
+
9
+ - Add technical details and limitations ([3598291](https://github.com/turf-sports/dslop/commit/3598291))
10
+
11
+ ### ❤️ Contributors
12
+
13
+ - Siddharth Sharma <sharmasiddharthcs@gmail.com>
14
+
15
+ ## v1.4.0
16
+
17
+ [compare changes](https://github.com/turf-sports/dslop/compare/v1.3.1...v1.4.0)
18
+
19
+ ### 🚀 Enhancements
20
+
21
+ - Improve duplicate detection accuracy for small blocks ([b009d47](https://github.com/turf-sports/dslop/commit/b009d47))
22
+
23
+ ### ❤️ Contributors
24
+
25
+ - Siddharth Sharma <sharmasiddharthcs@gmail.com>
26
+
3
27
  ## v1.3.1
4
28
 
5
29
  [compare changes](https://github.com/turf-sports/dslop/compare/v1.3.0...v1.3.1)
package/README.md CHANGED
@@ -34,6 +34,32 @@ dslop --all --cross-package # cross-package dupes (monorepos)
34
34
  | `--cross-package` | only show dupes across packages |
35
35
  | `--json` | json output |
36
36
 
37
+ ## How it works
38
+
39
+ **Block extraction:** Sliding window over source files. Extracts overlapping blocks at sizes 4, 6, 9, 13... lines. For blocks <10 lines, step=1 (every line). Larger blocks use step=blockSize/2.
40
+
41
+ **Normalization:** Before hashing, code is normalized:
42
+ - String literals → `"<STRING>"`
43
+ - Numbers → `<NUMBER>`
44
+ - Whitespace collapsed
45
+ - Comments preserved (intentional - comments often indicate copy-paste)
46
+
47
+ **Matching:** Normalized blocks are hashed. Exact hash matches = exact duplicates. For similar (non-exact) matches, uses character-level similarity on a sample of blocks per hash bucket.
48
+
49
+ **Declaration detection** (`--all` mode): Regex-based extraction of types, interfaces, functions, classes. Compares by name similarity (Levenshtein + word overlap) and content similarity.
50
+
51
+ **Changed-line filtering** (default mode): Parses `git diff` output to get exact line ranges. Only reports duplicates where your changed lines match code elsewhere.
52
+
53
+ ## Limitations
54
+
55
+ - **Text-based, not AST:** Doesn't understand code structure. A reformatted function won't match the original. Two semantically identical functions with different variable names won't match.
56
+ - **TypeScript/JavaScript focused:** Default extensions are ts/tsx/js/jsx. Works on any text but tuned for JS-like syntax.
57
+ - **No cross-language:** Won't detect a Python function duplicated in TypeScript.
58
+ - **Comments affect matching:** Intentional tradeoff. Copy-pasted code often includes comments.
59
+ - **Declaration detection is regex:** Can miss edge cases like multi-line generics or decorators.
60
+ - **Minimum 4 lines:** Shorter duplicates ignored to reduce noise. Use `-m 2` for stricter.
61
+ - **Memory:** Loads all blocks in memory. Very large codebases (>1M lines) may be slow.
62
+
37
63
  ## License
38
64
 
39
65
  MIT
package/dist/index.js CHANGED
@@ -359,6 +359,249 @@ var COLORS = {
359
359
  var MAX_FILE_SIZE2 = 1024 * 1024;
360
360
  var MAX_BLOCKS_FOR_SIMILARITY = 1e4;
361
361
 
362
+ // src/normalizer.ts
363
+ function normalizeCode(code) {
364
+ let normalized = code;
365
+ normalized = normalized.replace(/"(?:[^"\\]|\\.)*"/g, `"${STRING_PLACEHOLDER}"`);
366
+ normalized = normalized.replace(/'(?:[^'\\]|\\.)*'/g, `'${STRING_PLACEHOLDER}'`);
367
+ normalized = normalized.replace(/`(?:[^`\\]|\\.)*`/g, `\`${TEMPLATE_PLACEHOLDER}\``);
368
+ normalized = normalized.replace(/\b\d+\.?\d*\b/g, NUMBER_PLACEHOLDER);
369
+ normalized = normalized.replace(/#[0-9a-fA-F]{3,8}\b/g, COLOR_PLACEHOLDER);
370
+ normalized = normalized.replace(/[ \t]+/g, " ");
371
+ normalized = normalized.split(`
372
+ `).map((line) => line.trimEnd()).join(`
373
+ `);
374
+ normalized = normalized.replace(/\n\s*\n/g, `
375
+ `);
376
+ return normalized;
377
+ }
378
+
379
+ // src/declarations.ts
380
+ function simpleHash(str) {
381
+ let hash = 0;
382
+ for (let i = 0;i < str.length; i++) {
383
+ const char = str.charCodeAt(i);
384
+ hash = (hash << 5) - hash + char;
385
+ hash = hash & hash;
386
+ }
387
+ return hash.toString(36);
388
+ }
389
+ var DECLARATION_PATTERNS = [
390
+ { type: "type", regex: /^(export\s+)?type\s+([A-Z]\w*)\s*(?:<[^>]*>)?\s*=/m, nameGroup: 2 },
391
+ { type: "interface", regex: /^(export\s+)?interface\s+([A-Z]\w*)\s*(?:<[^>]*>)?\s*(?:extends\s+[^{]+)?\{/m, nameGroup: 2 },
392
+ { type: "class", regex: /^(export\s+)?(?:abstract\s+)?class\s+([A-Z]\w*)\s*(?:<[^>]*>)?\s*(?:extends\s+\w+)?\s*(?:implements\s+[^{]+)?\{/m, nameGroup: 2 },
393
+ { type: "enum", regex: /^(export\s+)?(?:const\s+)?enum\s+([A-Z]\w*)\s*\{/m, nameGroup: 2 },
394
+ { type: "function", regex: /^(export\s+)?(?:async\s+)?function\s+([a-zA-Z]\w*)\s*(?:<[^>]*>)?\s*\(/m, nameGroup: 2 },
395
+ { type: "function", regex: /^(export\s+)?const\s+([a-zA-Z]\w*)\s*(?::\s*[^=]+)?\s*=\s*(?:async\s*)?\(/m, nameGroup: 2 },
396
+ { type: "function", regex: /^(export\s+)?const\s+([a-zA-Z]\w*)\s*=\s*<[^>]+>\s*\(/m, nameGroup: 2 },
397
+ { type: "const", regex: /^(export\s+)?const\s+([A-Z][A-Z_0-9]*)\s*(?::\s*[^=]+)?\s*=/m, nameGroup: 2 }
398
+ ];
399
+ function findMatchingBrace(content, startIndex) {
400
+ let depth = 0;
401
+ let inString = false;
402
+ let stringChar = "";
403
+ let inTemplate = false;
404
+ for (let i = startIndex;i < content.length; i++) {
405
+ const char = content[i];
406
+ const prevChar = content[i - 1];
407
+ if (inString) {
408
+ if (char === stringChar && prevChar !== "\\") {
409
+ inString = false;
410
+ }
411
+ continue;
412
+ }
413
+ if (inTemplate) {
414
+ if (char === "`" && prevChar !== "\\") {
415
+ inTemplate = false;
416
+ }
417
+ continue;
418
+ }
419
+ if (char === '"' || char === "'") {
420
+ inString = true;
421
+ stringChar = char;
422
+ continue;
423
+ }
424
+ if (char === "`") {
425
+ inTemplate = true;
426
+ continue;
427
+ }
428
+ if (char === "{")
429
+ depth++;
430
+ if (char === "}") {
431
+ depth--;
432
+ if (depth === 0)
433
+ return i;
434
+ }
435
+ }
436
+ return -1;
437
+ }
438
+ function findStatementEnd(content, startIndex) {
439
+ let depth = 0;
440
+ let inString = false;
441
+ let stringChar = "";
442
+ for (let i = startIndex;i < content.length; i++) {
443
+ const char = content[i];
444
+ const prevChar = content[i - 1];
445
+ if (inString) {
446
+ if (char === stringChar && prevChar !== "\\") {
447
+ inString = false;
448
+ }
449
+ continue;
450
+ }
451
+ if (char === '"' || char === "'") {
452
+ inString = true;
453
+ stringChar = char;
454
+ continue;
455
+ }
456
+ if (char === "(" || char === "{" || char === "[")
457
+ depth++;
458
+ if (char === ")" || char === "}" || char === "]")
459
+ depth--;
460
+ if (depth === 0 && (char === ";" || char === `
461
+ `)) {
462
+ const remaining = content.slice(i + 1).trimStart();
463
+ if (!remaining.startsWith("|") && !remaining.startsWith("&")) {
464
+ return i;
465
+ }
466
+ }
467
+ }
468
+ return content.length - 1;
469
+ }
470
+ var QUICK_CHECK = /^(export\s+)?(type|interface|class|enum|const|function|async\s+function)\s+/;
471
+ function extractDeclarations(content, filePath) {
472
+ const declarations = [];
473
+ const lines = content.split(`
474
+ `);
475
+ let lineStart = 0;
476
+ for (let lineIndex = 0;lineIndex < lines.length; lineIndex++) {
477
+ const line = lines[lineIndex];
478
+ const trimmed = line.trimStart();
479
+ if (!QUICK_CHECK.test(trimmed)) {
480
+ lineStart += line.length + 1;
481
+ continue;
482
+ }
483
+ const remainingContent = content.slice(lineStart);
484
+ for (const pattern of DECLARATION_PATTERNS) {
485
+ const match = remainingContent.match(pattern.regex);
486
+ if (match && match.index === 0) {
487
+ const name = match[pattern.nameGroup];
488
+ if (!name)
489
+ continue;
490
+ const exported = !!match[1];
491
+ let endIndex;
492
+ if (pattern.type === "interface" || pattern.type === "class" || pattern.type === "enum") {
493
+ const braceStart = remainingContent.indexOf("{");
494
+ if (braceStart === -1)
495
+ continue;
496
+ endIndex = findMatchingBrace(remainingContent, braceStart);
497
+ if (endIndex === -1)
498
+ endIndex = remainingContent.indexOf(`
499
+ `, braceStart + 1);
500
+ } else if (pattern.type === "type") {
501
+ endIndex = findStatementEnd(remainingContent, match[0].length);
502
+ } else {
503
+ const hasArrow = remainingContent.slice(match[0].length, match[0].length + 100).includes("=>");
504
+ if (hasArrow) {
505
+ const arrowIndex = remainingContent.indexOf("=>", match[0].length);
506
+ const afterArrow = remainingContent.slice(arrowIndex + 2).trimStart();
507
+ if (afterArrow.startsWith("{")) {
508
+ const braceStart = arrowIndex + 2 + (remainingContent.slice(arrowIndex + 2).length - afterArrow.length);
509
+ endIndex = findMatchingBrace(remainingContent, braceStart);
510
+ } else {
511
+ endIndex = findStatementEnd(remainingContent, arrowIndex + 2);
512
+ }
513
+ } else {
514
+ const braceIndex = remainingContent.indexOf("{", match[0].length);
515
+ const newlineIndex = remainingContent.indexOf(`
516
+ `, match[0].length);
517
+ if (braceIndex !== -1 && (newlineIndex === -1 || braceIndex < newlineIndex)) {
518
+ endIndex = findMatchingBrace(remainingContent, braceIndex);
519
+ } else {
520
+ endIndex = findStatementEnd(remainingContent, match[0].length);
521
+ }
522
+ }
523
+ }
524
+ if (endIndex === -1)
525
+ endIndex = remainingContent.indexOf(`
526
+
527
+ `);
528
+ if (endIndex === -1)
529
+ endIndex = Math.min(remainingContent.length - 1, 500);
530
+ const declarationContent = remainingContent.slice(0, endIndex + 1).trim();
531
+ const endLineIndex = lineIndex + declarationContent.split(`
532
+ `).length - 1;
533
+ const normalized = normalizeCode(declarationContent);
534
+ declarations.push({
535
+ type: pattern.type,
536
+ name,
537
+ content: declarationContent,
538
+ normalized,
539
+ hash: simpleHash(normalized),
540
+ filePath,
541
+ startLine: lineIndex + 1,
542
+ endLine: endLineIndex + 1,
543
+ exported
544
+ });
545
+ break;
546
+ }
547
+ }
548
+ lineStart += line.length + 1;
549
+ }
550
+ return declarations;
551
+ }
552
+ function calculateNameSimilarity(a, b) {
553
+ if (a === b)
554
+ return 1;
555
+ const aLower = a.toLowerCase();
556
+ const bLower = b.toLowerCase();
557
+ if (aLower === bLower)
558
+ return 0.95;
559
+ if (aLower.includes(bLower) || bLower.includes(aLower))
560
+ return 0.8;
561
+ const aWords = splitCamelCase(a);
562
+ const bWords = splitCamelCase(b);
563
+ const aSet = new Set(aWords.map((w) => w.toLowerCase()));
564
+ const bSet = new Set(bWords.map((w) => w.toLowerCase()));
565
+ let intersection = 0;
566
+ for (const word of aSet) {
567
+ if (bSet.has(word))
568
+ intersection++;
569
+ }
570
+ const union = aSet.size + bSet.size - intersection;
571
+ const wordSimilarity = union > 0 ? intersection / union : 0;
572
+ if (wordSimilarity > 0.5)
573
+ return wordSimilarity * 0.9;
574
+ const distance = levenshteinDistance(aLower, bLower);
575
+ const maxLen = Math.max(aLower.length, bLower.length);
576
+ return Math.max(0, 1 - distance / maxLen) * 0.7;
577
+ }
578
+ function splitCamelCase(str) {
579
+ return str.replace(/([a-z])([A-Z])/g, "$1 $2").replace(/([A-Z]+)([A-Z][a-z])/g, "$1 $2").replace(/[_-]/g, " ").split(/\s+/).filter((w) => w.length > 0);
580
+ }
581
+ function levenshteinDistance(a, b) {
582
+ if (a.length === 0)
583
+ return b.length;
584
+ if (b.length === 0)
585
+ return a.length;
586
+ const matrix = [];
587
+ for (let i = 0;i <= b.length; i++) {
588
+ matrix[i] = [i];
589
+ }
590
+ for (let j = 0;j <= a.length; j++) {
591
+ matrix[0][j] = j;
592
+ }
593
+ for (let i = 1;i <= b.length; i++) {
594
+ for (let j = 1;j <= a.length; j++) {
595
+ if (b[i - 1] === a[j - 1]) {
596
+ matrix[i][j] = matrix[i - 1][j - 1];
597
+ } else {
598
+ matrix[i][j] = Math.min(matrix[i - 1][j - 1] + 1, matrix[i][j - 1] + 1, matrix[i - 1][j] + 1);
599
+ }
600
+ }
601
+ }
602
+ return matrix[b.length][a.length];
603
+ }
604
+
362
605
  // src/detector.ts
363
606
  function calculateSimilarityFast(a, b) {
364
607
  if (a === b)
@@ -429,7 +672,7 @@ class UnionFind {
429
672
  }
430
673
  }
431
674
  }
432
- function findDuplicates(blocks, minSimilarity, _basePath) {
675
+ function findDuplicates(blocks, minSimilarity, basePath) {
433
676
  const hashGroups = new Map;
434
677
  for (const block of blocks) {
435
678
  const existing = hashGroups.get(block.hash) ?? [];
@@ -582,7 +825,7 @@ function findDuplicates(blocks, minSimilarity, _basePath) {
582
825
  const dedupedGroups = deduplicateGroups(allDuplicates);
583
826
  return dedupedGroups.map((group) => ({
584
827
  ...group,
585
- suggestion: generateRefactoringSuggestion(group, _basePath)
828
+ suggestion: generateRefactoringSuggestion(group, basePath)
586
829
  }));
587
830
  }
588
831
  function filterOverlappingBlocks(blocks) {
@@ -776,10 +1019,81 @@ function deduplicateGroups(groups) {
776
1019
  }
777
1020
  return result;
778
1021
  }
1022
+ function findDeclarationDuplicates(declarations, minSimilarity) {
1023
+ const duplicates = [];
1024
+ let groupId = 0;
1025
+ const byType = new Map;
1026
+ for (const decl of declarations) {
1027
+ const existing = byType.get(decl.type) ?? [];
1028
+ existing.push(decl);
1029
+ byType.set(decl.type, existing);
1030
+ }
1031
+ for (const [type, typeDecls] of byType) {
1032
+ const processed = new Set;
1033
+ for (let i = 0;i < typeDecls.length; i++) {
1034
+ if (processed.has(i))
1035
+ continue;
1036
+ const declA = typeDecls[i];
1037
+ const matches = [{
1038
+ name: declA.name,
1039
+ filePath: declA.filePath,
1040
+ startLine: declA.startLine,
1041
+ endLine: declA.endLine,
1042
+ content: declA.content,
1043
+ exported: declA.exported
1044
+ }];
1045
+ let bestNameSim = 0;
1046
+ let bestContentSim = 0;
1047
+ for (let j = i + 1;j < typeDecls.length; j++) {
1048
+ if (processed.has(j))
1049
+ continue;
1050
+ const declB = typeDecls[j];
1051
+ if (declA.filePath === declB.filePath && Math.abs(declA.startLine - declB.startLine) < 5)
1052
+ continue;
1053
+ const nameSim = calculateNameSimilarity(declA.name, declB.name);
1054
+ const contentSim = calculateSimilarityFast(declA.normalized, declB.normalized);
1055
+ const combined = Math.max(nameSim * 0.4 + contentSim * 0.6, nameSim >= 0.9 ? nameSim : 0, contentSim >= 0.9 ? contentSim : 0);
1056
+ if (combined >= minSimilarity || nameSim >= 0.8 && contentSim >= 0.5) {
1057
+ processed.add(j);
1058
+ matches.push({
1059
+ name: declB.name,
1060
+ filePath: declB.filePath,
1061
+ startLine: declB.startLine,
1062
+ endLine: declB.endLine,
1063
+ content: declB.content,
1064
+ exported: declB.exported
1065
+ });
1066
+ bestNameSim = Math.max(bestNameSim, nameSim);
1067
+ bestContentSim = Math.max(bestContentSim, contentSim);
1068
+ }
1069
+ }
1070
+ if (matches.length >= 2) {
1071
+ processed.add(i);
1072
+ const exported = matches.find((m) => m.exported);
1073
+ const suggestion = exported ? `Import \`${exported.name}\` from \`${exported.filePath.replace(/.*\/(src|lib)\//, "")}\`` : `Consider extracting \`${declA.name}\` to a shared location`;
1074
+ duplicates.push({
1075
+ id: groupId++,
1076
+ type,
1077
+ similarity: Math.max(bestNameSim, bestContentSim),
1078
+ nameSimilarity: bestNameSim,
1079
+ contentSimilarity: bestContentSim,
1080
+ matches,
1081
+ suggestion
1082
+ });
1083
+ }
1084
+ }
1085
+ }
1086
+ duplicates.sort((a, b) => {
1087
+ const scoreA = a.matches.length * a.similarity;
1088
+ const scoreB = b.matches.length * b.similarity;
1089
+ return scoreB - scoreA;
1090
+ });
1091
+ return duplicates;
1092
+ }
779
1093
 
780
1094
  // src/formatter.ts
781
1095
  import path from "node:path";
782
- var { reset, bold, dim, red, green, yellow, blue, cyan, magenta, gray } = COLORS;
1096
+ var { reset, bold, dim, red, green, yellow, cyan, magenta, gray } = COLORS;
783
1097
  function truncatePath(filePath, basePath) {
784
1098
  const absoluteBase = path.resolve(basePath);
785
1099
  let relativePath = filePath;
@@ -822,13 +1136,14 @@ function formatGroup(group, index, basePath) {
822
1136
  if (hasMore) {
823
1137
  lines.push(` ${dim}└─${reset} ${gray}... and ${group.matches.length - MAX_MATCHES_IN_SUMMARY} more${reset}`);
824
1138
  }
825
- if (group.matches.length > 0) {
1139
+ const firstMatch = group.matches[0];
1140
+ if (firstMatch) {
826
1141
  lines.push("");
827
1142
  lines.push(` ${dim}Code preview:${reset}`);
828
- const previewLines = group.matches[0].content.split(`
1143
+ const previewLines = firstMatch.content.split(`
829
1144
  `).slice(0, CODE_PREVIEW_CONTEXT_LINES).map((line) => ` ${gray}│${reset} ${dim}${line.slice(0, 80)}${line.length > 80 ? "..." : ""}${reset}`);
830
1145
  lines.push(...previewLines);
831
- if (group.matches[0].content.split(`
1146
+ if (firstMatch.content.split(`
832
1147
  `).length > CODE_PREVIEW_CONTEXT_LINES) {
833
1148
  lines.push(` ${gray}│${reset} ${dim}...${reset}`);
834
1149
  }
@@ -856,8 +1171,11 @@ function formatOutput(groups, basePath) {
856
1171
  lines.push("");
857
1172
  const groupsToShow = groups.slice(0, MAX_GROUPS_DETAILED);
858
1173
  for (let i = 0;i < groupsToShow.length; i++) {
859
- lines.push(formatGroup(groupsToShow[i], i, basePath));
860
- lines.push("");
1174
+ const group = groupsToShow[i];
1175
+ if (group) {
1176
+ lines.push(formatGroup(group, i, basePath));
1177
+ lines.push("");
1178
+ }
861
1179
  }
862
1180
  if (groups.length > MAX_GROUPS_DETAILED) {
863
1181
  lines.push(`${dim}... and ${groups.length - MAX_GROUPS_DETAILED} more groups${reset}`);
@@ -896,6 +1214,76 @@ function formatStats(groups) {
896
1214
  return lines.join(`
897
1215
  `);
898
1216
  }
1217
+ var TYPE_LABELS = {
1218
+ type: "Type",
1219
+ interface: "Interface",
1220
+ function: "Function",
1221
+ class: "Class",
1222
+ const: "Constant",
1223
+ enum: "Enum"
1224
+ };
1225
+ function formatDeclarationGroup(group, index, basePath) {
1226
+ const lines = [];
1227
+ const typeLabel = TYPE_LABELS[group.type] || group.type;
1228
+ const simBadge = group.similarity >= 0.95 ? `${red}${bold}EXACT${reset}` : `${yellow}${Math.round(group.similarity * 100)}%${reset}`;
1229
+ lines.push(`${bold}${typeLabel} ${index + 1}${reset} │ ${simBadge} │ ${group.matches.length} occurrences`);
1230
+ if (group.nameSimilarity > 0 && group.nameSimilarity < 1) {
1231
+ lines.push(` ${dim}Name similarity: ${Math.round(group.nameSimilarity * 100)}%${reset}`);
1232
+ }
1233
+ lines.push("");
1234
+ for (const match of group.matches.slice(0, 5)) {
1235
+ const displayPath = truncatePath(match.filePath, basePath);
1236
+ const exportBadge = match.exported ? `${green}exported${reset}` : `${gray}local${reset}`;
1237
+ lines.push(` ${dim}├─${reset} ${cyan}${match.name}${reset} [${exportBadge}]`);
1238
+ lines.push(` ${displayPath}:${yellow}${match.startLine}${reset}-${yellow}${match.endLine}${reset}`);
1239
+ }
1240
+ if (group.matches.length > 5) {
1241
+ lines.push(` ${dim}└─${reset} ${gray}... and ${group.matches.length - 5} more${reset}`);
1242
+ }
1243
+ lines.push("");
1244
+ lines.push(` ${magenta}→${reset} ${group.suggestion}`);
1245
+ return lines.join(`
1246
+ `);
1247
+ }
1248
+ function formatDeclarations(groups, basePath) {
1249
+ if (groups.length === 0) {
1250
+ return "";
1251
+ }
1252
+ const lines = [];
1253
+ lines.push("");
1254
+ lines.push(SECTION_SEPARATOR2);
1255
+ lines.push(`${bold}DUPLICATE DECLARATIONS${reset}`);
1256
+ lines.push(SECTION_SEPARATOR2);
1257
+ lines.push("");
1258
+ const byType = new Map;
1259
+ for (const group of groups) {
1260
+ const existing = byType.get(group.type) || [];
1261
+ existing.push(group);
1262
+ byType.set(group.type, existing);
1263
+ }
1264
+ let globalIndex = 0;
1265
+ for (const [type, typeGroups] of byType) {
1266
+ const typeLabel = TYPE_LABELS[type] || type;
1267
+ lines.push(`${bold}${typeLabel}s (${typeGroups.length})${reset}`);
1268
+ lines.push("");
1269
+ for (const group of typeGroups.slice(0, 10)) {
1270
+ lines.push(formatDeclarationGroup(group, globalIndex++, basePath));
1271
+ lines.push("");
1272
+ }
1273
+ if (typeGroups.length > 10) {
1274
+ lines.push(`${dim}... and ${typeGroups.length - 10} more ${typeLabel.toLowerCase()}s${reset}`);
1275
+ lines.push("");
1276
+ }
1277
+ }
1278
+ lines.push(SECTION_SEPARATOR2);
1279
+ const totalDups = groups.reduce((sum, g) => sum + g.matches.length, 0);
1280
+ lines.push(`${bold}Declaration Summary${reset}`);
1281
+ lines.push(` Duplicate groups: ${bold}${groups.length}${reset}`);
1282
+ lines.push(` Total occurrences: ${bold}${totalDups}${reset}`);
1283
+ lines.push(SECTION_SEPARATOR2);
1284
+ return lines.join(`
1285
+ `);
1286
+ }
899
1287
 
900
1288
  // node_modules/@isaacs/balanced-match/dist/esm/index.js
901
1289
  var balanced = (a, b, str) => {
@@ -6446,26 +6834,7 @@ glob.glob = glob;
6446
6834
  // src/scanner.ts
6447
6835
  import path3 from "node:path";
6448
6836
  import { readFile, stat } from "node:fs/promises";
6449
-
6450
- // src/normalizer.ts
6451
- function normalizeCode(code) {
6452
- let normalized = code;
6453
- normalized = normalized.replace(/"(?:[^"\\]|\\.)*"/g, `"${STRING_PLACEHOLDER}"`);
6454
- normalized = normalized.replace(/'(?:[^'\\]|\\.)*'/g, `'${STRING_PLACEHOLDER}'`);
6455
- normalized = normalized.replace(/`(?:[^`\\]|\\.)*`/g, `\`${TEMPLATE_PLACEHOLDER}\``);
6456
- normalized = normalized.replace(/\b\d+\.?\d*\b/g, NUMBER_PLACEHOLDER);
6457
- normalized = normalized.replace(/#[0-9a-fA-F]{3,8}\b/g, COLOR_PLACEHOLDER);
6458
- normalized = normalized.replace(/[ \t]+/g, " ");
6459
- normalized = normalized.split(`
6460
- `).map((line) => line.trimEnd()).join(`
6461
- `);
6462
- normalized = normalized.replace(/\n\s*\n/g, `
6463
- `);
6464
- return normalized;
6465
- }
6466
-
6467
- // src/scanner.ts
6468
- function simpleHash(str) {
6837
+ function simpleHash2(str) {
6469
6838
  let hash = 0;
6470
6839
  for (let i = 0;i < str.length; i++) {
6471
6840
  const char = str.charCodeAt(i);
@@ -6493,7 +6862,7 @@ function extractBlocks(content, filePath, minLines, shouldNormalize) {
6493
6862
  blockSizes.push(size);
6494
6863
  }
6495
6864
  for (const blockSize of blockSizes) {
6496
- const step = Math.max(1, Math.floor(blockSize / SLIDING_WINDOW_STEP_DIVISOR));
6865
+ const step = blockSize < 10 ? 1 : Math.max(1, Math.floor(blockSize / SLIDING_WINDOW_STEP_DIVISOR));
6497
6866
  for (let i = 0;i <= lines.length - blockSize; i += step) {
6498
6867
  const blockLines = lines.slice(i, i + blockSize);
6499
6868
  const blockContent = blockLines.join(`
@@ -6503,7 +6872,7 @@ function extractBlocks(content, filePath, minLines, shouldNormalize) {
6503
6872
  continue;
6504
6873
  }
6505
6874
  const normalized = shouldNormalize ? normalizeCode(blockContent) : blockContent;
6506
- const hash = simpleHash(normalized);
6875
+ const hash = simpleHash2(normalized);
6507
6876
  blocks.push({
6508
6877
  content: blockContent,
6509
6878
  normalized,
@@ -6524,10 +6893,11 @@ function shouldIgnore(filePath, ignorePatterns) {
6524
6893
  return normalizedPath.includes(`/${normalizedPattern}/`) || normalizedPath.includes(`\\${normalizedPattern}\\`) || normalizedPath.endsWith(`/${normalizedPattern}`) || normalizedPath.endsWith(`\\${normalizedPattern}`);
6525
6894
  });
6526
6895
  }
6527
- async function scanDirectory(targetPath, options) {
6896
+ async function scanDirectory(targetPath, options, enableDeclarations = true) {
6528
6897
  const { extensions, ignorePatterns, minLines, normalize: normalize2 } = options;
6529
6898
  const absolutePath = path3.resolve(targetPath);
6530
6899
  const blocks = [];
6900
+ const declarations = [];
6531
6901
  let fileCount = 0;
6532
6902
  let totalLines = 0;
6533
6903
  const pattern = extensions.length === 1 ? `**/*.${extensions[0]}` : `**/*.{${extensions.join(",")}}`;
@@ -6537,6 +6907,7 @@ async function scanDirectory(targetPath, options) {
6537
6907
  nodir: true,
6538
6908
  ignore: ignorePatterns.map((p) => `**/${p}/**`)
6539
6909
  });
6910
+ const isTypeScript = enableDeclarations && extensions.some((ext2) => ext2 === "ts" || ext2 === "tsx");
6540
6911
  for (const filePath of files) {
6541
6912
  if (shouldIgnore(filePath, ignorePatterns)) {
6542
6913
  continue;
@@ -6553,45 +6924,102 @@ async function scanDirectory(targetPath, options) {
6553
6924
  fileCount++;
6554
6925
  const fileBlocks = extractBlocks(content, filePath, minLines, normalize2);
6555
6926
  blocks.push(...fileBlocks);
6927
+ if (isTypeScript && (filePath.endsWith(".ts") || filePath.endsWith(".tsx"))) {
6928
+ const fileDeclarations = extractDeclarations(content, filePath);
6929
+ declarations.push(...fileDeclarations);
6930
+ }
6556
6931
  } catch {
6557
6932
  console.warn(`Warning: Could not read ${filePath}`);
6558
6933
  }
6559
6934
  }
6560
- return { blocks, fileCount, totalLines };
6935
+ return { blocks, declarations, fileCount, totalLines };
6561
6936
  }
6562
6937
 
6563
6938
  // index.ts
6564
- var VERSION = process.env.npm_package_version || "1.3.1";
6565
- function getChangedFiles(targetPath) {
6566
- const absolutePath = path4.resolve(targetPath);
6567
- const files = new Set;
6568
- const addFiles = (output) => {
6569
- for (const file of output.split(`
6939
+ var VERSION = process.env.npm_package_version || "1.4.1";
6940
+ function parseDiffOutput(diff, cwd) {
6941
+ const changes = new Map;
6942
+ let currentFile = null;
6943
+ let newLineNum = 0;
6944
+ for (const line of diff.split(`
6570
6945
  `)) {
6571
- if (file.trim()) {
6572
- files.add(path4.resolve(absolutePath, file.trim()));
6946
+ if (line.startsWith("+++ b/")) {
6947
+ currentFile = path4.resolve(cwd, line.slice(6));
6948
+ if (!changes.has(currentFile)) {
6949
+ changes.set(currentFile, []);
6950
+ }
6951
+ } else if (line.startsWith("@@") && currentFile) {
6952
+ const match2 = line.match(/@@ -\d+(?:,\d+)? \+(\d+)(?:,\d+)? @@/);
6953
+ if (match2) {
6954
+ newLineNum = parseInt(match2[1], 10);
6955
+ }
6956
+ } else if (currentFile && newLineNum > 0) {
6957
+ if (line.startsWith("+") && !line.startsWith("+++")) {
6958
+ const ranges = changes.get(currentFile);
6959
+ const lastRange = ranges[ranges.length - 1];
6960
+ if (lastRange && lastRange.end === newLineNum - 1) {
6961
+ lastRange.end = newLineNum;
6962
+ } else {
6963
+ ranges.push({ start: newLineNum, end: newLineNum });
6964
+ }
6965
+ newLineNum++;
6966
+ } else if (line.startsWith("-") && !line.startsWith("---")) {} else if (!line.startsWith("\\")) {
6967
+ newLineNum++;
6573
6968
  }
6574
6969
  }
6970
+ }
6971
+ return changes;
6972
+ }
6973
+ function getChangedLines(targetPath) {
6974
+ const absolutePath = path4.resolve(targetPath);
6975
+ const allChanges = new Map;
6976
+ const mergeChanges = (newChanges) => {
6977
+ for (const [file, ranges] of newChanges) {
6978
+ if (!allChanges.has(file)) {
6979
+ allChanges.set(file, []);
6980
+ }
6981
+ allChanges.get(file).push(...ranges);
6982
+ }
6983
+ };
6984
+ const addFullFile = (filePath) => {
6985
+ const resolved = path4.resolve(absolutePath, filePath);
6986
+ if (!allChanges.has(resolved)) {
6987
+ allChanges.set(resolved, [{ start: 1, end: 999999 }]);
6988
+ }
6575
6989
  };
6576
6990
  try {
6577
- addFiles(execSync("git diff --cached --name-only", { cwd: absolutePath, encoding: "utf-8" }));
6578
- addFiles(execSync("git diff --name-only", { cwd: absolutePath, encoding: "utf-8" }));
6579
- addFiles(execSync("git ls-files --others --exclude-standard", { cwd: absolutePath, encoding: "utf-8" }));
6991
+ const stagedDiff = execSync("git diff --cached", { cwd: absolutePath, encoding: "utf-8" });
6992
+ mergeChanges(parseDiffOutput(stagedDiff, absolutePath));
6993
+ const unstagedDiff = execSync("git diff", { cwd: absolutePath, encoding: "utf-8" });
6994
+ mergeChanges(parseDiffOutput(unstagedDiff, absolutePath));
6995
+ const untracked = execSync("git ls-files --others --exclude-standard", { cwd: absolutePath, encoding: "utf-8" });
6996
+ for (const file of untracked.split(`
6997
+ `)) {
6998
+ if (file.trim())
6999
+ addFullFile(file.trim());
7000
+ }
6580
7001
  try {
6581
7002
  const baseBranch = execSync("git rev-parse --abbrev-ref origin/HEAD 2>/dev/null || echo origin/main", { cwd: absolutePath, encoding: "utf-8" }).trim().replace("origin/", "");
6582
7003
  const currentBranch = execSync("git rev-parse --abbrev-ref HEAD", { cwd: absolutePath, encoding: "utf-8" }).trim();
6583
7004
  if (currentBranch !== baseBranch) {
6584
7005
  const mergeBase = execSync(`git merge-base ${baseBranch} HEAD 2>/dev/null || echo ""`, { cwd: absolutePath, encoding: "utf-8" }).trim();
6585
7006
  if (mergeBase) {
6586
- addFiles(execSync(`git diff --name-only ${mergeBase}...HEAD`, { cwd: absolutePath, encoding: "utf-8" }));
7007
+ const branchDiff = execSync(`git diff ${mergeBase}...HEAD`, { cwd: absolutePath, encoding: "utf-8" });
7008
+ mergeChanges(parseDiffOutput(branchDiff, absolutePath));
6587
7009
  }
6588
7010
  }
6589
7011
  } catch {}
6590
- return files;
7012
+ return allChanges;
6591
7013
  } catch {
6592
- return new Set;
7014
+ return new Map;
6593
7015
  }
6594
7016
  }
7017
+ function isInChangedLines(filePath, blockStart, blockEnd, changes) {
7018
+ const ranges = changes.get(filePath);
7019
+ if (!ranges)
7020
+ return false;
7021
+ return ranges.some((r) => blockStart <= r.end && blockEnd >= r.start);
7022
+ }
6595
7023
  function showHelp() {
6596
7024
  console.log(`
6597
7025
  dslop - Detect Similar/Duplicate Lines Of Programming
@@ -6675,8 +7103,8 @@ async function main() {
6675
7103
  minLines,
6676
7104
  normalize: normalize2
6677
7105
  };
6678
- const changedFiles = !scanAll ? getChangedFiles(targetPath) : null;
6679
- if (!scanAll && changedFiles?.size === 0) {
7106
+ const changedLines = !scanAll ? getChangedLines(targetPath) : null;
7107
+ if (!scanAll && changedLines?.size === 0) {
6680
7108
  console.log(`
6681
7109
  No changes found. Use --all to scan entire codebase.`);
6682
7110
  process.exit(0);
@@ -6684,7 +7112,7 @@ No changes found. Use --all to scan entire codebase.`);
6684
7112
  console.log(`
6685
7113
  Scanning ${targetPath}...`);
6686
7114
  if (!scanAll) {
6687
- console.log(` Mode: checking ${changedFiles.size} changed files`);
7115
+ console.log(` Mode: checking changed lines in ${changedLines.size} files`);
6688
7116
  } else {
6689
7117
  console.log(` Mode: full codebase scan`);
6690
7118
  }
@@ -6697,20 +7125,42 @@ Scanning ${targetPath}...`);
6697
7125
  console.log();
6698
7126
  try {
6699
7127
  const startTime = performance.now();
6700
- const { blocks, fileCount, totalLines } = await scanDirectory(targetPath, scanOptions);
7128
+ const { blocks, declarations, fileCount, totalLines } = await scanDirectory(targetPath, scanOptions, scanAll);
6701
7129
  const scanTime = performance.now() - startTime;
6702
7130
  console.log(`Scanned ${fileCount} files (${totalLines.toLocaleString()} lines) in ${Math.round(scanTime)}ms`);
6703
- console.log(`Extracted ${blocks.length.toLocaleString()} code blocks
7131
+ if (declarations.length > 0) {
7132
+ console.log(`Extracted ${blocks.length.toLocaleString()} code blocks, ${declarations.length.toLocaleString()} declarations
7133
+ `);
7134
+ } else {
7135
+ console.log(`Extracted ${blocks.length.toLocaleString()} code blocks
6704
7136
  `);
6705
- if (blocks.length === 0) {
6706
- console.log("No code blocks found to analyze.");
7137
+ }
7138
+ if (blocks.length === 0 && declarations.length === 0) {
7139
+ console.log("No code found to analyze.");
6707
7140
  process.exit(0);
6708
7141
  }
6709
7142
  const detectStart = performance.now();
6710
7143
  let duplicates = findDuplicates(blocks, similarity, targetPath);
7144
+ let declDuplicates = findDeclarationDuplicates(declarations, similarity);
6711
7145
  const detectTime = performance.now() - detectStart;
6712
- if (!scanAll && changedFiles) {
6713
- duplicates = duplicates.filter((group) => group.matches.some((m) => changedFiles.has(m.filePath)));
7146
+ if (!scanAll && changedLines) {
7147
+ const changedFilePaths = new Set(changedLines.keys());
7148
+ duplicates = duplicates.filter((group) => {
7149
+ const inChanged = group.matches.filter((m) => isInChangedLines(m.filePath, m.startLine, m.endLine, changedLines));
7150
+ const notInChanged = group.matches.filter((m) => !isInChangedLines(m.filePath, m.startLine, m.endLine, changedLines));
7151
+ if (inChanged.length === 0 || notInChanged.length === 0)
7152
+ return false;
7153
+ const inOtherFiles = notInChanged.some((m) => !changedFilePaths.has(m.filePath));
7154
+ const inSameFileOutsideChanges = notInChanged.some((m) => changedFilePaths.has(m.filePath));
7155
+ if (inOtherFiles && group.matches.length > 10)
7156
+ return false;
7157
+ return inOtherFiles || inSameFileOutsideChanges;
7158
+ });
7159
+ declDuplicates = declDuplicates.filter((group) => {
7160
+ const inChanged = group.matches.filter((m) => isInChangedLines(m.filePath, m.startLine, m.endLine, changedLines));
7161
+ const notInChanged = group.matches.filter((m) => !isInChangedLines(m.filePath, m.startLine, m.endLine, changedLines));
7162
+ return inChanged.length > 0 && notInChanged.length > 0;
7163
+ });
6714
7164
  }
6715
7165
  if (crossPackage) {
6716
7166
  duplicates = duplicates.filter((group) => {
@@ -6720,10 +7170,23 @@ Scanning ${targetPath}...`);
6720
7170
  }));
6721
7171
  return packages.size > 1;
6722
7172
  });
7173
+ declDuplicates = declDuplicates.filter((group) => {
7174
+ const packages = new Set(group.matches.map((m) => {
7175
+ const match2 = m.filePath.match(/(?:apps|packages|libs)\/([^\/]+)/);
7176
+ return match2 ? match2[1] : m.filePath.split("/")[0];
7177
+ }));
7178
+ return packages.size > 1;
7179
+ });
6723
7180
  }
6724
- console.log(`Found ${duplicates.length} duplicate groups in ${Math.round(detectTime)}ms
7181
+ const totalGroups = duplicates.length + declDuplicates.length;
7182
+ console.log(`Found ${totalGroups} duplicate groups in ${Math.round(detectTime)}ms`);
7183
+ if (declDuplicates.length > 0) {
7184
+ console.log(` (${duplicates.length} code blocks, ${declDuplicates.length} declarations)
6725
7185
  `);
6726
- if (duplicates.length === 0) {
7186
+ } else {
7187
+ console.log();
7188
+ }
7189
+ if (totalGroups === 0) {
6727
7190
  if (!scanAll) {
6728
7191
  console.log("No duplicates in your changes. You're good!");
6729
7192
  } else if (crossPackage) {
@@ -6734,10 +7197,15 @@ Scanning ${targetPath}...`);
6734
7197
  process.exit(0);
6735
7198
  }
6736
7199
  if (jsonOutput) {
6737
- console.log(JSON.stringify(duplicates, null, 2));
7200
+ console.log(JSON.stringify({ duplicates, declarations: declDuplicates }, null, 2));
6738
7201
  } else {
6739
- console.log(formatOutput(duplicates, targetPath));
6740
- console.log(formatStats(duplicates));
7202
+ if (duplicates.length > 0) {
7203
+ console.log(formatOutput(duplicates, targetPath));
7204
+ console.log(formatStats(duplicates));
7205
+ }
7206
+ if (declDuplicates.length > 0) {
7207
+ console.log(formatDeclarations(declDuplicates, targetPath));
7208
+ }
6741
7209
  }
6742
7210
  } catch (error) {
6743
7211
  console.error("Error:", error instanceof Error ? error.message : error);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "dslop",
3
- "version": "1.3.1",
3
+ "version": "1.4.1",
4
4
  "description": "Detect Similar/Duplicate Lines Of Programming - Find code duplication in your codebase",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",