npm - @goshenkata/dryscan-core - Versions diffs - 1.2.5 → 1.2.6 - Mend

@goshenkata/dryscan-core 1.2.5 → 1.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/dist/index.d.ts +1 -1
package/dist/index.js +246 -184
package/dist/index.js.map +1 -1
package/package.json +1 -1
package/src/DryScan.ts +5 -4
package/src/config/dryconfig.ts +1 -1
package/src/extractors/java.ts +22 -7
package/src/services/DuplicateService.ts +133 -184
package/src/services/DuplicationCache.ts +107 -1
package/src/services/UpdateService.ts +5 -2

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@goshenkata/dryscan-core",
-  "version": "1.2.5",
+  "version": "1.2.6",
   "description": "Core library for DryScan - semantic code duplication analyzer",
   "type": "module",
   "main": "./dist/index.js",

package/src/DryScan.ts CHANGED Viewed

@@ -89,14 +89,15 @@ export class DryScan {
    * 6. Recompute embeddings for affected units
    * 7. Update file tracking metadata
    */
-  async updateIndex(): Promise<void> {
+  async updateIndex(): Promise<string[]> {
     console.log(`[DryScan] Updating index at ${this.repoPath}...`);
     console.log("[DryScan] Checking for file changes...");
     const start = Date.now();
     await this.ensureDatabase();
-    await this.services.updater.updateIndex();
+    const dirtyPaths = await this.services.updater.updateIndex();
     const duration = Date.now() - start;
     console.log(`[DryScan] Index update complete. Took ${duration}ms.`);
+    return dirtyPaths;
   }
@@ -129,13 +130,13 @@ export class DryScan {
     console.log("[DryScan] Updating index...");
     const updateStart = Date.now();
-    await this.updateIndex();
+    const dirtyPaths = await this.updateIndex();
     const updateDuration = Date.now() - updateStart;
     console.log(`[DryScan] Index update  took ${updateDuration}ms.`);
     console.log("[DryScan] Detecting duplicates...");
     const dupStart = Date.now();
-    const result = await this.services.duplicate.findDuplicates(config);
+    const result = await this.services.duplicate.findDuplicates(config, dirtyPaths);
     const dupDuration = Date.now() - dupStart;
     console.log(`[DryScan] Duplicate detection took ${dupDuration}ms.`);

package/src/config/dryconfig.ts CHANGED Viewed

@@ -11,7 +11,7 @@ export const DEFAULT_CONFIG: DryConfig = {
   excludedPairs: [],
   minLines: 3,
   minBlockLines: 5,
-  threshold: 0.88,
+  threshold: 0.8,
   embeddingSource: "http://localhost:11434",
   contextLength: 2048,
 };

package/src/extractors/java.ts CHANGED Viewed

@@ -72,7 +72,8 @@ export class JavaExtractor implements LanguageExtractor {
         const fnUnit = this.buildFunctionUnit(node, source, fileRelPath, currentClass);
         const fnLength = fnUnit.endLine - fnUnit.startLine;
         const bodyNode = this.getFunctionBody(node);
-        const skipFunction = this.shouldSkip(IndexUnitType.FUNCTION, fnUnit.name, fnLength);
+        const fnArity = this.getNodeArity(node);
+        const skipFunction = this.shouldSkip(IndexUnitType.FUNCTION, fnUnit.name, fnLength, fnArity);
         if (skipFunction) {
           return;
@@ -158,7 +159,7 @@ export class JavaExtractor implements LanguageExtractor {
     return crypto.createHash(BLOCK_HASH_ALGO).update(normalized).digest("hex");
   }
-  private shouldSkip(unitType: IndexUnitType, name: string, lineCount: number): boolean {
+  private shouldSkip(unitType: IndexUnitType, name: string, lineCount: number, arity?: number): boolean {
     if (!this.config) {
       throw new Error("Config not loaded before skip evaluation");
     }
@@ -167,17 +168,30 @@ export class JavaExtractor implements LanguageExtractor {
       ? Math.max(indexConfig.blockMinLines, config.minBlockLines ?? 0)
       : config.minLines;
     const belowMin = minLines > 0 && lineCount < minLines;
-    const trivial = unitType === IndexUnitType.FUNCTION && this.isTrivialFunction(name);
+    const trivial = unitType === IndexUnitType.FUNCTION && this.isTrivialFunction(name, arity ?? 0);
     return belowMin || trivial;
   }
-  private isTrivialFunction(fullName: string): boolean {
+  /**
+   * A function is trivial if it follows a simple accessor pattern:
+   * - getters/isers: name matches get[A-Z] or is[A-Z] with exactly 0 parameters
+   * - setters: name matches set[A-Z] with at most 1 parameter
+   * Methods like getUserById(Long id) have arity > 0 and are NOT trivial.
+   */
+  private isTrivialFunction(fullName: string, arity: number): boolean {
     const simpleName = fullName.split(".").pop() || fullName;
-    const isGetter = /^(get|is)[A-Z]/.test(simpleName);
-    const isSetter = /^set[A-Z]/.test(simpleName);
+    const isGetter = /^(get|is)[A-Z]/.test(simpleName) && arity === 0;
+    const isSetter = /^set[A-Z]/.test(simpleName) && arity <= 1;
     return isGetter || isSetter;
   }
+  /** Counts the formal parameters of a method or constructor node. */
+  private getNodeArity(node: Parser.SyntaxNode): number {
+    const params = node.childForFieldName?.("parameters");
+    if (!params) return 0;
+    return params.namedChildren.filter(c => c.type === "formal_parameter" || c.type === "spread_parameter").length;
+  }
   private isDtoClass(node: Parser.SyntaxNode, source: string, className: string): boolean {
     const classBody = node.children.find((child) => child.type === "class_body");
     if (!classBody) return false;
@@ -200,7 +214,8 @@ export class JavaExtractor implements LanguageExtractor {
       if (child.type === "method_declaration" || child.type === "constructor_declaration") {
         const simpleName = this.getSimpleFunctionName(child, source);
         const fullName = `${className}.${simpleName}`;
-        if (!this.isTrivialFunction(fullName)) {
+        const arity = this.getNodeArity(child);
+        if (!this.isTrivialFunction(fullName, arity)) {
           return false;
         }
         continue;

package/src/services/DuplicateService.ts CHANGED Viewed

@@ -1,6 +1,5 @@
 import debug from "debug";
 import shortUuid from "short-uuid";
-import { cosineSimilarity } from "@langchain/core/utils/math";
 import { DryScanServiceDeps } from "./types";
 import { DuplicateAnalysisResult, DuplicateGroup, DuplicationScore, IndexUnit, IndexUnitType } from "../types";
 import { indexConfig } from "../config/indexConfig";
@@ -15,137 +14,93 @@ export class DuplicateService {
   constructor(private readonly deps: DryScanServiceDeps) {}
-  //todo vetter optimisation
-  async findDuplicates(config: DryConfig): Promise<DuplicateAnalysisResult> {
+  /**
+   * @param dirtyPaths - File paths changed since last run. When provided, only
+   *   dirty×all similarities are recomputed; clean×clean values are reused from
+   *   the existing matrix.  Pass undefined (or omit) for a full rebuild.
+   */
+  async findDuplicates(config: DryConfig, dirtyPaths?: string[]): Promise<DuplicateAnalysisResult> {
     this.config = config;
     const t0 = performance.now();
     const allUnits = await this.deps.db.getAllUnits();
     log("Starting duplicate analysis on %d units", allUnits.length);
     if (allUnits.length < 2) {
-      log("Not enough units to compare, returning empty result");
-      const score = this.computeDuplicationScore([], allUnits);
-      return { duplicates: [], score };
+      return { duplicates: [], score: this.computeDuplicationScore([], allUnits) };
     }
     const thresholds = this.resolveThresholds(config.threshold);
-    log("Resolved thresholds: function=%d, block=%d, class=%d", thresholds.function, thresholds.block, thresholds.class);
-    const duplicates = this.computeDuplicates(allUnits, thresholds);
-    const filteredDuplicates = duplicates.filter((group) => !this.isGroupExcluded(group));
-    log("Found %d duplicate groups (%d excluded)", filteredDuplicates.length, duplicates.length - filteredDuplicates.length);
+    const duplicates = this.computeDuplicates(allUnits, thresholds, dirtyPaths);
+    const filtered = duplicates.filter((g) => !this.isGroupExcluded(g));
+    log("Found %d duplicate groups (%d excluded)", filtered.length, duplicates.length - filtered.length);
-    // Update cache asynchronously; no need to block the main flow.
-    this.cache.update(filteredDuplicates).catch((err) => log("Cache update failed: %O", err));
+    this.cache.update(filtered).catch((err) => log("Cache update failed: %O", err));
-    const score = this.computeDuplicationScore(filteredDuplicates, allUnits);
+    const score = this.computeDuplicationScore(filtered, allUnits);
     log("findDuplicates completed in %dms", (performance.now() - t0).toFixed(2));
-    return { duplicates: filteredDuplicates, score };
+    return { duplicates: filtered, score };
   }
   private resolveThresholds(functionThreshold?: number): { function: number; block: number; class: number } {
-    const defaults = indexConfig.thresholds;
-    const clamp = (value: number) => Math.min(1, Math.max(0, value));
-    const base = functionThreshold ?? defaults.function;
-    const blockOffset = defaults.block - defaults.function;
-    const classOffset = defaults.class - defaults.function;
-    const functionThresholdValue = clamp(base);
+    const d = indexConfig.thresholds;
+    const clamp = (v: number) => Math.min(1, Math.max(0, v));
+    const fn = clamp(functionThreshold ?? d.function);
     return {
-      function: functionThresholdValue,
-      block: clamp(functionThresholdValue + blockOffset),
-      class: clamp(functionThresholdValue + classOffset),
+      function: fn,
+      block: clamp(fn + d.block - d.function),
+      class: clamp(fn + d.class - d.function),
     };
   }
   private computeDuplicates(
     units: IndexUnit[],
-    thresholds: { function: number; block: number; class: number }
+    thresholds: { function: number; block: number; class: number },
+    dirtyPaths?: string[]
   ): DuplicateGroup[] {
-    const duplicates: DuplicateGroup[] = [];
-    const byType = new Map<IndexUnitType, IndexUnit[]>();
-    for (const unit of units) {
-      const list = byType.get(unit.unitType) ?? [];
-      list.push(unit);
-      byType.set(unit.unitType, list);
-    }
+    this.cache.clearRunCaches();
+    this.cache.buildEmbSimCache(units, dirtyPaths);
+    const duplicates: DuplicateGroup[] = [];
     const t0 = performance.now();
-    for (const [type, typedUnits] of byType.entries()) {
+    for (const [type, typedUnits] of this.groupByType(units)) {
       const threshold = this.getThreshold(type, thresholds);
-      log("Comparing %d units of type '%s' with threshold %d", typedUnits.length, type, threshold);
-      const typeStart = performance.now();
+      log("Comparing %d %s units (threshold=%.3f)", typedUnits.length, type, threshold);
       for (let i = 0; i < typedUnits.length; i++) {
         for (let j = i + 1; j < typedUnits.length; j++) {
-          const left = typedUnits[i];
-          const right = typedUnits[j];
-          if (this.shouldSkipComparison(left, right)) {
-            log("Skipping nested block comparison: '%s' and '%s'", left.name, right.name);
-            continue;
-          }
+          const left = typedUnits[i], right = typedUnits[j];
+          if (this.shouldSkipComparison(left, right)) continue;
+          // Always check the cache first — this allows pairs whose embeddings
+          // have since been cleared to still be reported using a prior score.
           const cached = this.cache.get(left.id, right.id, left.filePath, right.filePath);
-          let similarity: number | null = null;
-          if (cached !== null) {
-            log("Cache hit for '%s' <-> '%s': similarity=%d", left.name, right.name, cached);
-            similarity = cached;
-          } else {
-            if (!left.embedding || !right.embedding) {
-              log("Skipping '%s' <-> '%s': missing embedding", left.name, right.name);
-              continue;
-            }
-            similarity = this.computeWeightedSimilarity(left, right);
-            log("Computed similarity for '%s' <-> '%s': %d", left.name, right.name, similarity);
-          }
-          if (similarity === null) continue;
-          if (similarity >= threshold) {
-            const exclusionString = this.deps.pairing.pairKeyForUnits(left, right);
-            if (!exclusionString) continue;
-            log("Duplicate found: '%s' <-> '%s' (similarity=%d)", left.name, right.name, similarity);
-            duplicates.push({
-              id: `${left.id}::${right.id}`,
-              similarity,
-              shortId: shortUuid.generate(),
-              exclusionString,
-              left: {
-                id: left.id,
-                name: left.name,
-                filePath: left.filePath,
-                startLine: left.startLine,
-                endLine: left.endLine,
-                code: left.code,
-                unitType: left.unitType,
-              },
-              right: {
-                id: right.id,
-                name: right.name,
-                filePath: right.filePath,
-                startLine: right.startLine,
-                endLine: right.endLine,
-                code: right.code,
-                unitType: right.unitType,
-              },
-            });
-          }
+          const hasEmbeddings = left.embedding?.length && right.embedding?.length;
+          const similarity = cached ?? (hasEmbeddings ? this.computeWeightedSimilarity(left, right, threshold) : 0);
+          if (similarity < threshold) continue;
+          const exclusionString = this.deps.pairing.pairKeyForUnits(left, right);
+          if (!exclusionString) continue;
+          duplicates.push({
+            id: `${left.id}::${right.id}`,
+            similarity,
+            shortId: shortUuid.generate(),
+            exclusionString,
+            left: this.toMember(left),
+            right: this.toMember(right),
+          });
         }
       }
-      log("Type '%s' comparisons completed in %dms", type, (performance.now() - typeStart).toFixed(2));
     }
-    log("computeDuplicates completed in %dms, found %d raw duplicates", (performance.now() - t0).toFixed(2), duplicates.length);
+    log("computeDuplicates: %d duplicates in %dms", duplicates.length, (performance.now() - t0).toFixed(2));
     return duplicates.sort((a, b) => b.similarity - a.similarity);
   }
   private isGroupExcluded(group: DuplicateGroup): boolean {
     const config = this.config;
-    if (!config || !config.excludedPairs || config.excludedPairs.length === 0) return false;
+    if (!config?.excludedPairs?.length) return false;
     const key = this.deps.pairing.pairKeyForUnits(group.left, group.right);
     if (!key) return false;
     const actual = this.deps.pairing.parsePairKey(key);
@@ -162,142 +117,136 @@ export class DuplicateService {
     return thresholds.function;
   }
-  private computeWeightedSimilarity(left: IndexUnit, right: IndexUnit): number {
-    const selfSimilarity = this.similarityWithFallback(left, right);
+  private computeWeightedSimilarity(left: IndexUnit, right: IndexUnit, threshold: number): number {
+    const selfSim = this.similarity(left, right);
+    //CLASS
     if (left.unitType === IndexUnitType.CLASS) {
-      return selfSimilarity * indexConfig.weights.class.self;
+      return selfSim * indexConfig.weights.class.self;
     }
+    // FUNCTION
     if (left.unitType === IndexUnitType.FUNCTION) {
-      const weights = indexConfig.weights.function;
-      const hasParentClass = !!this.findParentOfType(left, IndexUnitType.CLASS) && !!this.findParentOfType(right, IndexUnitType.CLASS);
-      const parentClassSimilarity = hasParentClass ? this.parentSimilarity(left, right, IndexUnitType.CLASS) : 0;
-      // Re-normalize weights when parent context is missing, so standalone units aren't penalized.
-      const totalWeight = weights.self + (hasParentClass ? weights.parentClass : 0);
-      return ((weights.self * selfSimilarity) + (hasParentClass ? (weights.parentClass * parentClassSimilarity) : 0)) / totalWeight;
+      const w = indexConfig.weights.function;
+      const hasPC = this.bothHaveParent(left, right, IndexUnitType.CLASS);
+      const total = w.self + (hasPC ? w.parentClass : 0);
+      // Early exit: even with perfect parent similarity, can't reach threshold.
+      if ((w.self * selfSim + (hasPC ? w.parentClass : 0)) / total < threshold) return 0;
+      return (w.self * selfSim + (hasPC ? w.parentClass * this.parentSimilarity(left, right, IndexUnitType.CLASS) : 0)) / total;
     }
-    const weights = indexConfig.weights.block;
-    const hasParentFunction = !!this.findParentOfType(left, IndexUnitType.FUNCTION) && !!this.findParentOfType(right, IndexUnitType.FUNCTION);
-    const hasParentClass = !!this.findParentOfType(left, IndexUnitType.CLASS) && !!this.findParentOfType(right, IndexUnitType.CLASS);
-    const parentFuncSim = hasParentFunction ? this.parentSimilarity(left, right, IndexUnitType.FUNCTION) : 0;
-    const parentClassSim = hasParentClass ? this.parentSimilarity(left, right, IndexUnitType.CLASS) : 0;
+    // BLOCK
+    const w = indexConfig.weights.block;
+    const hasPF = this.bothHaveParent(left, right, IndexUnitType.FUNCTION);
+    const hasPC = this.bothHaveParent(left, right, IndexUnitType.CLASS);
+    const total = w.self + (hasPF ? w.parentFunction : 0) + (hasPC ? w.parentClass : 0);
+    if ((w.self * selfSim + (hasPF ? w.parentFunction : 0) + (hasPC ? w.parentClass : 0)) / total < threshold) return 0;
+    return (
+      w.self * selfSim +
+      (hasPF ? w.parentFunction * this.parentSimilarity(left, right, IndexUnitType.FUNCTION) : 0) +
+      (hasPC ? w.parentClass * this.parentSimilarity(left, right, IndexUnitType.CLASS) : 0)
+    ) / total;
+  }
-    // Re-normalize weights when some parent context is missing.
-    const totalWeight =
-      weights.self +
-      (hasParentFunction ? weights.parentFunction : 0) +
-      (hasParentClass ? weights.parentClass : 0);
+  /** Groups all units by type for the comparison loop. Units without embeddings are included
+   * so that cache hits can still be returned for pairs whose embeddings were cleared. */
+  private groupByType(units: IndexUnit[]): Map<IndexUnitType, IndexUnit[]> {
+    const byType = new Map<IndexUnitType, IndexUnit[]>();
+    for (const unit of units) {
+      const list = byType.get(unit.unitType) ?? [];
+      list.push(unit);
+      byType.set(unit.unitType, list);
+    }
+    return byType;
+  }
-    return (
-      (weights.self * selfSimilarity) +
-      (hasParentFunction ? (weights.parentFunction * parentFuncSim) : 0) +
-      (hasParentClass ? (weights.parentClass * parentClassSim) : 0)
-    ) / totalWeight;
+  private toMember(unit: IndexUnit): DuplicateGroup["left"] {
+    return {
+      id: unit.id,
+      name: unit.name,
+      filePath: unit.filePath,
+      startLine: unit.startLine,
+      endLine: unit.endLine,
+      code: unit.code,
+      unitType: unit.unitType,
+    };
   }
-  private parentSimilarity(left: IndexUnit, right: IndexUnit, targetType: IndexUnitType): number {
-    const leftParent = this.findParentOfType(left, targetType);
-    const rightParent = this.findParentOfType(right, targetType);
-    if (!leftParent || !rightParent) return 0;
-    return this.similarityWithFallback(leftParent, rightParent);
+  private bothHaveParent(left: IndexUnit, right: IndexUnit, type: IndexUnitType): boolean {
+    return !!this.findParent(left, type) && !!this.findParent(right, type);
   }
-  private similarityWithFallback(left: IndexUnit, right: IndexUnit): number {
-    const leftHasEmbedding = this.hasVector(left);
-    const rightHasEmbedding = this.hasVector(right);
+  private parentSimilarity(left: IndexUnit, right: IndexUnit, type: IndexUnitType): number {
+    const lp = this.findParent(left, type), rp = this.findParent(right, type);
+    if (!lp || !rp) return 0;
-    if (leftHasEmbedding && rightHasEmbedding) {
-      return cosineSimilarity([left.embedding as number[]], [right.embedding as number[]])[0][0];
-    }
+    const key = lp.id < rp.id ? `${lp.id}::${rp.id}` : `${rp.id}::${lp.id}`;
+    const cached = this.cache.getParentSim(key);
+    if (cached !== undefined) return cached;
-    return this.childSimilarity(left, right);
+    const sim = this.similarity(lp, rp);
+    this.cache.setParentSim(key, sim);
+    return sim;
+  }
+  /** Resolves similarity via the pre-computed embedding matrix, falling back to best child match. */
+  private similarity(left: IndexUnit, right: IndexUnit): number {
+    return this.cache.getEmbSim(left.id, right.id) ?? this.childSimilarity(left, right);
   }
   private childSimilarity(left: IndexUnit, right: IndexUnit): number {
-    const leftChildren = left.children ?? [];
-    const rightChildren = right.children ?? [];
-    if (leftChildren.length === 0 || rightChildren.length === 0) return 0;
+    const lc = left.children ?? [], rc = right.children ?? [];
+    if (!lc.length || !rc.length) return 0;
     let best = 0;
-    for (const lChild of leftChildren) {
-      for (const rChild of rightChildren) {
-        if (lChild.unitType !== rChild.unitType) continue;
-        const sim = this.similarityWithFallback(lChild, rChild);
+    for (const l of lc) {
+      for (const r of rc) {
+        if (l.unitType !== r.unitType) continue;
+        const sim = this.similarity(l, r);
         if (sim > best) best = sim;
       }
     }
     return best;
   }
-  private hasVector(unit: IndexUnit): boolean {
-    return Array.isArray(unit.embedding) && unit.embedding.length > 0;
-  }
   private shouldSkipComparison(left: IndexUnit, right: IndexUnit): boolean {
-    if (left.unitType !== IndexUnitType.BLOCK || right.unitType !== IndexUnitType.BLOCK) {
-      return false;
-    }
-    if (left.filePath !== right.filePath) {
-      return false;
-    }
-    const leftContainsRight = left.startLine <= right.startLine && left.endLine >= right.endLine;
-    const rightContainsLeft = right.startLine <= left.startLine && right.endLine >= left.endLine;
-    return leftContainsRight || rightContainsLeft;
+    if (left.unitType !== IndexUnitType.BLOCK || right.unitType !== IndexUnitType.BLOCK) return false;
+    if (left.filePath !== right.filePath) return false;
+    return (left.startLine <= right.startLine && left.endLine >= right.endLine)
+        || (right.startLine <= left.startLine && right.endLine >= left.endLine);
   }
-  private findParentOfType(unit: IndexUnit, targetType: IndexUnitType): IndexUnit | null {
-    let current: IndexUnit | undefined | null = unit.parent;
-    while (current) {
-      if (current.unitType === targetType) return current;
-      current = current.parent;
+  private findParent(unit: IndexUnit, type: IndexUnitType): IndexUnit | null {
+    let p = unit.parent;
+    while (p) {
+      if (p.unitType === type) return p;
+      p = p.parent;
     }
     return null;
   }
   private computeDuplicationScore(duplicates: DuplicateGroup[], allUnits: IndexUnit[]): DuplicationScore {
-    const totalLines = this.calculateTotalLines(allUnits);
-    if (totalLines === 0 || duplicates.length === 0) {
-      return {
-        score: 0,
-        grade: "Excellent",
-        totalLines,
-        duplicateLines: 0,
-        duplicateGroups: 0,
-      };
+    const totalLines = allUnits.reduce((sum, u) => sum + u.endLine - u.startLine + 1, 0);
+    if (!totalLines || !duplicates.length) {
+      return { score: 0, grade: "Excellent", totalLines, duplicateLines: 0, duplicateGroups: 0 };
     }
-    const weightedDuplicateLines = duplicates.reduce((sum, group) => {
-      const leftLines = group.left.endLine - group.left.startLine + 1;
-      const rightLines = group.right.endLine - group.right.startLine + 1;
-      const avgLines = (leftLines + rightLines) / 2;
-      return sum + group.similarity * avgLines;
+    const duplicateLines = duplicates.reduce((sum, g) => {
+      const avg = ((g.left.endLine - g.left.startLine + 1) + (g.right.endLine - g.right.startLine + 1)) / 2;
+      return sum + g.similarity * avg;
     }, 0);
-    const score = (weightedDuplicateLines / totalLines) * 100;
-    const grade = this.getScoreGrade(score);
+    const score = (duplicateLines / totalLines) * 100;
     return {
       score,
-      grade,
+      grade: this.getScoreGrade(score),
       totalLines,
-      duplicateLines: Math.round(weightedDuplicateLines),
+      duplicateLines: Math.round(duplicateLines),
       duplicateGroups: duplicates.length,
     };
   }
-  private calculateTotalLines(units: IndexUnit[]): number {
-    return units.reduce((sum, unit) => {
-      const lines = unit.endLine - unit.startLine + 1;
-      return sum + lines;
-    }, 0);
-  }
   private getScoreGrade(score: number): DuplicationScore["grade"] {
     if (score < 5) return "Excellent";
     if (score < 15) return "Good";