@goshenkata/dryscan-core 1.2.5 → 1.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@goshenkata/dryscan-core",
3
- "version": "1.2.5",
3
+ "version": "1.2.6",
4
4
  "description": "Core library for DryScan - semantic code duplication analyzer",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",
package/src/DryScan.ts CHANGED
@@ -89,14 +89,15 @@ export class DryScan {
89
89
  * 6. Recompute embeddings for affected units
90
90
  * 7. Update file tracking metadata
91
91
  */
92
- async updateIndex(): Promise<void> {
92
+ async updateIndex(): Promise<string[]> {
93
93
  console.log(`[DryScan] Updating index at ${this.repoPath}...`);
94
94
  console.log("[DryScan] Checking for file changes...");
95
95
  const start = Date.now();
96
96
  await this.ensureDatabase();
97
- await this.services.updater.updateIndex();
97
+ const dirtyPaths = await this.services.updater.updateIndex();
98
98
  const duration = Date.now() - start;
99
99
  console.log(`[DryScan] Index update complete. Took ${duration}ms.`);
100
+ return dirtyPaths;
100
101
  }
101
102
 
102
103
 
@@ -129,13 +130,13 @@ export class DryScan {
129
130
 
130
131
  console.log("[DryScan] Updating index...");
131
132
  const updateStart = Date.now();
132
- await this.updateIndex();
133
+ const dirtyPaths = await this.updateIndex();
133
134
  const updateDuration = Date.now() - updateStart;
134
135
  console.log(`[DryScan] Index update took ${updateDuration}ms.`);
135
136
 
136
137
  console.log("[DryScan] Detecting duplicates...");
137
138
  const dupStart = Date.now();
138
- const result = await this.services.duplicate.findDuplicates(config);
139
+ const result = await this.services.duplicate.findDuplicates(config, dirtyPaths);
139
140
  const dupDuration = Date.now() - dupStart;
140
141
  console.log(`[DryScan] Duplicate detection took ${dupDuration}ms.`);
141
142
 
@@ -11,7 +11,7 @@ export const DEFAULT_CONFIG: DryConfig = {
11
11
  excludedPairs: [],
12
12
  minLines: 3,
13
13
  minBlockLines: 5,
14
- threshold: 0.88,
14
+ threshold: 0.8,
15
15
  embeddingSource: "http://localhost:11434",
16
16
  contextLength: 2048,
17
17
  };
@@ -72,7 +72,8 @@ export class JavaExtractor implements LanguageExtractor {
72
72
  const fnUnit = this.buildFunctionUnit(node, source, fileRelPath, currentClass);
73
73
  const fnLength = fnUnit.endLine - fnUnit.startLine;
74
74
  const bodyNode = this.getFunctionBody(node);
75
- const skipFunction = this.shouldSkip(IndexUnitType.FUNCTION, fnUnit.name, fnLength);
75
+ const fnArity = this.getNodeArity(node);
76
+ const skipFunction = this.shouldSkip(IndexUnitType.FUNCTION, fnUnit.name, fnLength, fnArity);
76
77
 
77
78
  if (skipFunction) {
78
79
  return;
@@ -158,7 +159,7 @@ export class JavaExtractor implements LanguageExtractor {
158
159
  return crypto.createHash(BLOCK_HASH_ALGO).update(normalized).digest("hex");
159
160
  }
160
161
 
161
- private shouldSkip(unitType: IndexUnitType, name: string, lineCount: number): boolean {
162
+ private shouldSkip(unitType: IndexUnitType, name: string, lineCount: number, arity?: number): boolean {
162
163
  if (!this.config) {
163
164
  throw new Error("Config not loaded before skip evaluation");
164
165
  }
@@ -167,17 +168,30 @@ export class JavaExtractor implements LanguageExtractor {
167
168
  ? Math.max(indexConfig.blockMinLines, config.minBlockLines ?? 0)
168
169
  : config.minLines;
169
170
  const belowMin = minLines > 0 && lineCount < minLines;
170
- const trivial = unitType === IndexUnitType.FUNCTION && this.isTrivialFunction(name);
171
+ const trivial = unitType === IndexUnitType.FUNCTION && this.isTrivialFunction(name, arity ?? 0);
171
172
  return belowMin || trivial;
172
173
  }
173
174
 
174
- private isTrivialFunction(fullName: string): boolean {
175
+ /**
176
+ * A function is trivial if it follows a simple accessor pattern:
177
+ * - getters/isers: name matches get[A-Z] or is[A-Z] with exactly 0 parameters
178
+ * - setters: name matches set[A-Z] with at most 1 parameter
179
+ * Methods like getUserById(Long id) have arity > 0 and are NOT trivial.
180
+ */
181
+ private isTrivialFunction(fullName: string, arity: number): boolean {
175
182
  const simpleName = fullName.split(".").pop() || fullName;
176
- const isGetter = /^(get|is)[A-Z]/.test(simpleName);
177
- const isSetter = /^set[A-Z]/.test(simpleName);
183
+ const isGetter = /^(get|is)[A-Z]/.test(simpleName) && arity === 0;
184
+ const isSetter = /^set[A-Z]/.test(simpleName) && arity <= 1;
178
185
  return isGetter || isSetter;
179
186
  }
180
187
 
188
+ /** Counts the formal parameters of a method or constructor node. */
189
+ private getNodeArity(node: Parser.SyntaxNode): number {
190
+ const params = node.childForFieldName?.("parameters");
191
+ if (!params) return 0;
192
+ return params.namedChildren.filter(c => c.type === "formal_parameter" || c.type === "spread_parameter").length;
193
+ }
194
+
181
195
  private isDtoClass(node: Parser.SyntaxNode, source: string, className: string): boolean {
182
196
  const classBody = node.children.find((child) => child.type === "class_body");
183
197
  if (!classBody) return false;
@@ -200,7 +214,8 @@ export class JavaExtractor implements LanguageExtractor {
200
214
  if (child.type === "method_declaration" || child.type === "constructor_declaration") {
201
215
  const simpleName = this.getSimpleFunctionName(child, source);
202
216
  const fullName = `${className}.${simpleName}`;
203
- if (!this.isTrivialFunction(fullName)) {
217
+ const arity = this.getNodeArity(child);
218
+ if (!this.isTrivialFunction(fullName, arity)) {
204
219
  return false;
205
220
  }
206
221
  continue;
@@ -1,6 +1,5 @@
1
1
  import debug from "debug";
2
2
  import shortUuid from "short-uuid";
3
- import { cosineSimilarity } from "@langchain/core/utils/math";
4
3
  import { DryScanServiceDeps } from "./types";
5
4
  import { DuplicateAnalysisResult, DuplicateGroup, DuplicationScore, IndexUnit, IndexUnitType } from "../types";
6
5
  import { indexConfig } from "../config/indexConfig";
@@ -15,137 +14,93 @@ export class DuplicateService {
15
14
 
16
15
  constructor(private readonly deps: DryScanServiceDeps) {}
17
16
 
18
- //todo vetter optimisation
19
- async findDuplicates(config: DryConfig): Promise<DuplicateAnalysisResult> {
17
+ /**
18
+ * @param dirtyPaths - File paths changed since last run. When provided, only
19
+ * dirty×all similarities are recomputed; clean×clean values are reused from
20
+ * the existing matrix. Pass undefined (or omit) for a full rebuild.
21
+ */
22
+ async findDuplicates(config: DryConfig, dirtyPaths?: string[]): Promise<DuplicateAnalysisResult> {
20
23
  this.config = config;
21
24
  const t0 = performance.now();
22
25
  const allUnits = await this.deps.db.getAllUnits();
23
26
  log("Starting duplicate analysis on %d units", allUnits.length);
27
+
24
28
  if (allUnits.length < 2) {
25
- log("Not enough units to compare, returning empty result");
26
- const score = this.computeDuplicationScore([], allUnits);
27
- return { duplicates: [], score };
29
+ return { duplicates: [], score: this.computeDuplicationScore([], allUnits) };
28
30
  }
29
31
 
30
32
  const thresholds = this.resolveThresholds(config.threshold);
31
- log("Resolved thresholds: function=%d, block=%d, class=%d", thresholds.function, thresholds.block, thresholds.class);
32
- const duplicates = this.computeDuplicates(allUnits, thresholds);
33
- const filteredDuplicates = duplicates.filter((group) => !this.isGroupExcluded(group));
34
- log("Found %d duplicate groups (%d excluded)", filteredDuplicates.length, duplicates.length - filteredDuplicates.length);
33
+ const duplicates = this.computeDuplicates(allUnits, thresholds, dirtyPaths);
34
+ const filtered = duplicates.filter((g) => !this.isGroupExcluded(g));
35
+ log("Found %d duplicate groups (%d excluded)", filtered.length, duplicates.length - filtered.length);
35
36
 
36
- // Update cache asynchronously; no need to block the main flow.
37
- this.cache.update(filteredDuplicates).catch((err) => log("Cache update failed: %O", err));
37
+ this.cache.update(filtered).catch((err) => log("Cache update failed: %O", err));
38
38
 
39
- const score = this.computeDuplicationScore(filteredDuplicates, allUnits);
39
+ const score = this.computeDuplicationScore(filtered, allUnits);
40
40
  log("findDuplicates completed in %dms", (performance.now() - t0).toFixed(2));
41
- return { duplicates: filteredDuplicates, score };
41
+ return { duplicates: filtered, score };
42
42
  }
43
43
 
44
44
  private resolveThresholds(functionThreshold?: number): { function: number; block: number; class: number } {
45
- const defaults = indexConfig.thresholds;
46
- const clamp = (value: number) => Math.min(1, Math.max(0, value));
47
-
48
- const base = functionThreshold ?? defaults.function;
49
- const blockOffset = defaults.block - defaults.function;
50
- const classOffset = defaults.class - defaults.function;
51
-
52
- const functionThresholdValue = clamp(base);
45
+ const d = indexConfig.thresholds;
46
+ const clamp = (v: number) => Math.min(1, Math.max(0, v));
47
+ const fn = clamp(functionThreshold ?? d.function);
53
48
  return {
54
- function: functionThresholdValue,
55
- block: clamp(functionThresholdValue + blockOffset),
56
- class: clamp(functionThresholdValue + classOffset),
49
+ function: fn,
50
+ block: clamp(fn + d.block - d.function),
51
+ class: clamp(fn + d.class - d.function),
57
52
  };
58
53
  }
59
54
 
60
55
  private computeDuplicates(
61
56
  units: IndexUnit[],
62
- thresholds: { function: number; block: number; class: number }
57
+ thresholds: { function: number; block: number; class: number },
58
+ dirtyPaths?: string[]
63
59
  ): DuplicateGroup[] {
64
- const duplicates: DuplicateGroup[] = [];
65
- const byType = new Map<IndexUnitType, IndexUnit[]>();
66
-
67
- for (const unit of units) {
68
- const list = byType.get(unit.unitType) ?? [];
69
- list.push(unit);
70
- byType.set(unit.unitType, list);
71
- }
60
+ this.cache.clearRunCaches();
61
+ this.cache.buildEmbSimCache(units, dirtyPaths);
72
62
 
63
+ const duplicates: DuplicateGroup[] = [];
73
64
  const t0 = performance.now();
74
65
 
75
- for (const [type, typedUnits] of byType.entries()) {
66
+ for (const [type, typedUnits] of this.groupByType(units)) {
76
67
  const threshold = this.getThreshold(type, thresholds);
77
- log("Comparing %d units of type '%s' with threshold %d", typedUnits.length, type, threshold);
78
- const typeStart = performance.now();
68
+ log("Comparing %d %s units (threshold=%.3f)", typedUnits.length, type, threshold);
79
69
 
80
70
  for (let i = 0; i < typedUnits.length; i++) {
81
71
  for (let j = i + 1; j < typedUnits.length; j++) {
82
- const left = typedUnits[i];
83
- const right = typedUnits[j];
84
-
85
- if (this.shouldSkipComparison(left, right)) {
86
- log("Skipping nested block comparison: '%s' and '%s'", left.name, right.name);
87
- continue;
88
- }
72
+ const left = typedUnits[i], right = typedUnits[j];
73
+ if (this.shouldSkipComparison(left, right)) continue;
89
74
 
75
+ // Always check the cache first — this allows pairs whose embeddings
76
+ // have since been cleared to still be reported using a prior score.
90
77
  const cached = this.cache.get(left.id, right.id, left.filePath, right.filePath);
91
- let similarity: number | null = null;
92
-
93
- if (cached !== null) {
94
- log("Cache hit for '%s' <-> '%s': similarity=%d", left.name, right.name, cached);
95
- similarity = cached;
96
- } else {
97
- if (!left.embedding || !right.embedding) {
98
- log("Skipping '%s' <-> '%s': missing embedding", left.name, right.name);
99
- continue;
100
- }
101
- similarity = this.computeWeightedSimilarity(left, right);
102
- log("Computed similarity for '%s' <-> '%s': %d", left.name, right.name, similarity);
103
- }
104
-
105
- if (similarity === null) continue;
106
-
107
- if (similarity >= threshold) {
108
- const exclusionString = this.deps.pairing.pairKeyForUnits(left, right);
109
- if (!exclusionString) continue;
110
-
111
- log("Duplicate found: '%s' <-> '%s' (similarity=%d)", left.name, right.name, similarity);
112
- duplicates.push({
113
- id: `${left.id}::${right.id}`,
114
- similarity,
115
- shortId: shortUuid.generate(),
116
- exclusionString,
117
- left: {
118
- id: left.id,
119
- name: left.name,
120
- filePath: left.filePath,
121
- startLine: left.startLine,
122
- endLine: left.endLine,
123
- code: left.code,
124
- unitType: left.unitType,
125
- },
126
- right: {
127
- id: right.id,
128
- name: right.name,
129
- filePath: right.filePath,
130
- startLine: right.startLine,
131
- endLine: right.endLine,
132
- code: right.code,
133
- unitType: right.unitType,
134
- },
135
- });
136
- }
78
+ const hasEmbeddings = left.embedding?.length && right.embedding?.length;
79
+ const similarity = cached ?? (hasEmbeddings ? this.computeWeightedSimilarity(left, right, threshold) : 0);
80
+ if (similarity < threshold) continue;
81
+
82
+ const exclusionString = this.deps.pairing.pairKeyForUnits(left, right);
83
+ if (!exclusionString) continue;
84
+
85
+ duplicates.push({
86
+ id: `${left.id}::${right.id}`,
87
+ similarity,
88
+ shortId: shortUuid.generate(),
89
+ exclusionString,
90
+ left: this.toMember(left),
91
+ right: this.toMember(right),
92
+ });
137
93
  }
138
94
  }
139
- log("Type '%s' comparisons completed in %dms", type, (performance.now() - typeStart).toFixed(2));
140
95
  }
141
96
 
142
- log("computeDuplicates completed in %dms, found %d raw duplicates", (performance.now() - t0).toFixed(2), duplicates.length);
97
+ log("computeDuplicates: %d duplicates in %dms", duplicates.length, (performance.now() - t0).toFixed(2));
143
98
  return duplicates.sort((a, b) => b.similarity - a.similarity);
144
99
  }
145
100
 
146
101
  private isGroupExcluded(group: DuplicateGroup): boolean {
147
102
  const config = this.config;
148
- if (!config || !config.excludedPairs || config.excludedPairs.length === 0) return false;
103
+ if (!config?.excludedPairs?.length) return false;
149
104
  const key = this.deps.pairing.pairKeyForUnits(group.left, group.right);
150
105
  if (!key) return false;
151
106
  const actual = this.deps.pairing.parsePairKey(key);
@@ -162,142 +117,136 @@ export class DuplicateService {
162
117
  return thresholds.function;
163
118
  }
164
119
 
165
- private computeWeightedSimilarity(left: IndexUnit, right: IndexUnit): number {
166
- const selfSimilarity = this.similarityWithFallback(left, right);
120
+ private computeWeightedSimilarity(left: IndexUnit, right: IndexUnit, threshold: number): number {
121
+ const selfSim = this.similarity(left, right);
167
122
 
123
+ //CLASS
168
124
  if (left.unitType === IndexUnitType.CLASS) {
169
- return selfSimilarity * indexConfig.weights.class.self;
125
+ return selfSim * indexConfig.weights.class.self;
170
126
  }
171
127
 
128
+ // FUNCTION
172
129
  if (left.unitType === IndexUnitType.FUNCTION) {
173
- const weights = indexConfig.weights.function;
174
- const hasParentClass = !!this.findParentOfType(left, IndexUnitType.CLASS) && !!this.findParentOfType(right, IndexUnitType.CLASS);
175
- const parentClassSimilarity = hasParentClass ? this.parentSimilarity(left, right, IndexUnitType.CLASS) : 0;
176
-
177
- // Re-normalize weights when parent context is missing, so standalone units aren't penalized.
178
- const totalWeight = weights.self + (hasParentClass ? weights.parentClass : 0);
179
- return ((weights.self * selfSimilarity) + (hasParentClass ? (weights.parentClass * parentClassSimilarity) : 0)) / totalWeight;
130
+ const w = indexConfig.weights.function;
131
+ const hasPC = this.bothHaveParent(left, right, IndexUnitType.CLASS);
132
+ const total = w.self + (hasPC ? w.parentClass : 0);
133
+ // Early exit: even with perfect parent similarity, can't reach threshold.
134
+ if ((w.self * selfSim + (hasPC ? w.parentClass : 0)) / total < threshold) return 0;
135
+ return (w.self * selfSim + (hasPC ? w.parentClass * this.parentSimilarity(left, right, IndexUnitType.CLASS) : 0)) / total;
180
136
  }
181
137
 
182
- const weights = indexConfig.weights.block;
183
- const hasParentFunction = !!this.findParentOfType(left, IndexUnitType.FUNCTION) && !!this.findParentOfType(right, IndexUnitType.FUNCTION);
184
- const hasParentClass = !!this.findParentOfType(left, IndexUnitType.CLASS) && !!this.findParentOfType(right, IndexUnitType.CLASS);
185
- const parentFuncSim = hasParentFunction ? this.parentSimilarity(left, right, IndexUnitType.FUNCTION) : 0;
186
- const parentClassSim = hasParentClass ? this.parentSimilarity(left, right, IndexUnitType.CLASS) : 0;
138
+ // BLOCK
139
+ const w = indexConfig.weights.block;
140
+ const hasPF = this.bothHaveParent(left, right, IndexUnitType.FUNCTION);
141
+ const hasPC = this.bothHaveParent(left, right, IndexUnitType.CLASS);
142
+ const total = w.self + (hasPF ? w.parentFunction : 0) + (hasPC ? w.parentClass : 0);
143
+ if ((w.self * selfSim + (hasPF ? w.parentFunction : 0) + (hasPC ? w.parentClass : 0)) / total < threshold) return 0;
144
+ return (
145
+ w.self * selfSim +
146
+ (hasPF ? w.parentFunction * this.parentSimilarity(left, right, IndexUnitType.FUNCTION) : 0) +
147
+ (hasPC ? w.parentClass * this.parentSimilarity(left, right, IndexUnitType.CLASS) : 0)
148
+ ) / total;
149
+ }
187
150
 
188
- // Re-normalize weights when some parent context is missing.
189
- const totalWeight =
190
- weights.self +
191
- (hasParentFunction ? weights.parentFunction : 0) +
192
- (hasParentClass ? weights.parentClass : 0);
151
+ /** Groups all units by type for the comparison loop. Units without embeddings are included
152
+ * so that cache hits can still be returned for pairs whose embeddings were cleared. */
153
+ private groupByType(units: IndexUnit[]): Map<IndexUnitType, IndexUnit[]> {
154
+ const byType = new Map<IndexUnitType, IndexUnit[]>();
155
+ for (const unit of units) {
156
+ const list = byType.get(unit.unitType) ?? [];
157
+ list.push(unit);
158
+ byType.set(unit.unitType, list);
159
+ }
160
+ return byType;
161
+ }
193
162
 
194
- return (
195
- (weights.self * selfSimilarity) +
196
- (hasParentFunction ? (weights.parentFunction * parentFuncSim) : 0) +
197
- (hasParentClass ? (weights.parentClass * parentClassSim) : 0)
198
- ) / totalWeight;
163
+ private toMember(unit: IndexUnit): DuplicateGroup["left"] {
164
+ return {
165
+ id: unit.id,
166
+ name: unit.name,
167
+ filePath: unit.filePath,
168
+ startLine: unit.startLine,
169
+ endLine: unit.endLine,
170
+ code: unit.code,
171
+ unitType: unit.unitType,
172
+ };
199
173
  }
200
174
 
201
- private parentSimilarity(left: IndexUnit, right: IndexUnit, targetType: IndexUnitType): number {
202
- const leftParent = this.findParentOfType(left, targetType);
203
- const rightParent = this.findParentOfType(right, targetType);
204
- if (!leftParent || !rightParent) return 0;
205
- return this.similarityWithFallback(leftParent, rightParent);
175
+ private bothHaveParent(left: IndexUnit, right: IndexUnit, type: IndexUnitType): boolean {
176
+ return !!this.findParent(left, type) && !!this.findParent(right, type);
206
177
  }
207
178
 
208
- private similarityWithFallback(left: IndexUnit, right: IndexUnit): number {
209
- const leftHasEmbedding = this.hasVector(left);
210
- const rightHasEmbedding = this.hasVector(right);
179
+ private parentSimilarity(left: IndexUnit, right: IndexUnit, type: IndexUnitType): number {
180
+ const lp = this.findParent(left, type), rp = this.findParent(right, type);
181
+ if (!lp || !rp) return 0;
211
182
 
212
- if (leftHasEmbedding && rightHasEmbedding) {
213
- return cosineSimilarity([left.embedding as number[]], [right.embedding as number[]])[0][0];
214
- }
183
+ const key = lp.id < rp.id ? `${lp.id}::${rp.id}` : `${rp.id}::${lp.id}`;
184
+ const cached = this.cache.getParentSim(key);
185
+ if (cached !== undefined) return cached;
215
186
 
216
- return this.childSimilarity(left, right);
187
+ const sim = this.similarity(lp, rp);
188
+ this.cache.setParentSim(key, sim);
189
+ return sim;
190
+ }
191
+
192
+ /** Resolves similarity via the pre-computed embedding matrix, falling back to best child match. */
193
+ private similarity(left: IndexUnit, right: IndexUnit): number {
194
+ return this.cache.getEmbSim(left.id, right.id) ?? this.childSimilarity(left, right);
217
195
  }
218
196
 
219
197
  private childSimilarity(left: IndexUnit, right: IndexUnit): number {
220
- const leftChildren = left.children ?? [];
221
- const rightChildren = right.children ?? [];
222
- if (leftChildren.length === 0 || rightChildren.length === 0) return 0;
198
+ const lc = left.children ?? [], rc = right.children ?? [];
199
+ if (!lc.length || !rc.length) return 0;
223
200
 
224
201
  let best = 0;
225
- for (const lChild of leftChildren) {
226
- for (const rChild of rightChildren) {
227
- if (lChild.unitType !== rChild.unitType) continue;
228
- const sim = this.similarityWithFallback(lChild, rChild);
202
+ for (const l of lc) {
203
+ for (const r of rc) {
204
+ if (l.unitType !== r.unitType) continue;
205
+ const sim = this.similarity(l, r);
229
206
  if (sim > best) best = sim;
230
207
  }
231
208
  }
232
209
  return best;
233
210
  }
234
211
 
235
- private hasVector(unit: IndexUnit): boolean {
236
- return Array.isArray(unit.embedding) && unit.embedding.length > 0;
237
- }
238
-
239
212
  private shouldSkipComparison(left: IndexUnit, right: IndexUnit): boolean {
240
- if (left.unitType !== IndexUnitType.BLOCK || right.unitType !== IndexUnitType.BLOCK) {
241
- return false;
242
- }
243
-
244
- if (left.filePath !== right.filePath) {
245
- return false;
246
- }
247
-
248
- const leftContainsRight = left.startLine <= right.startLine && left.endLine >= right.endLine;
249
- const rightContainsLeft = right.startLine <= left.startLine && right.endLine >= left.endLine;
250
- return leftContainsRight || rightContainsLeft;
213
+ if (left.unitType !== IndexUnitType.BLOCK || right.unitType !== IndexUnitType.BLOCK) return false;
214
+ if (left.filePath !== right.filePath) return false;
215
+ return (left.startLine <= right.startLine && left.endLine >= right.endLine)
216
+ || (right.startLine <= left.startLine && right.endLine >= left.endLine);
251
217
  }
252
218
 
253
- private findParentOfType(unit: IndexUnit, targetType: IndexUnitType): IndexUnit | null {
254
- let current: IndexUnit | undefined | null = unit.parent;
255
- while (current) {
256
- if (current.unitType === targetType) return current;
257
- current = current.parent;
219
+ private findParent(unit: IndexUnit, type: IndexUnitType): IndexUnit | null {
220
+ let p = unit.parent;
221
+ while (p) {
222
+ if (p.unitType === type) return p;
223
+ p = p.parent;
258
224
  }
259
225
  return null;
260
226
  }
261
227
 
262
228
  private computeDuplicationScore(duplicates: DuplicateGroup[], allUnits: IndexUnit[]): DuplicationScore {
263
- const totalLines = this.calculateTotalLines(allUnits);
264
-
265
- if (totalLines === 0 || duplicates.length === 0) {
266
- return {
267
- score: 0,
268
- grade: "Excellent",
269
- totalLines,
270
- duplicateLines: 0,
271
- duplicateGroups: 0,
272
- };
229
+ const totalLines = allUnits.reduce((sum, u) => sum + u.endLine - u.startLine + 1, 0);
230
+
231
+ if (!totalLines || !duplicates.length) {
232
+ return { score: 0, grade: "Excellent", totalLines, duplicateLines: 0, duplicateGroups: 0 };
273
233
  }
274
234
 
275
- const weightedDuplicateLines = duplicates.reduce((sum, group) => {
276
- const leftLines = group.left.endLine - group.left.startLine + 1;
277
- const rightLines = group.right.endLine - group.right.startLine + 1;
278
- const avgLines = (leftLines + rightLines) / 2;
279
- return sum + group.similarity * avgLines;
235
+ const duplicateLines = duplicates.reduce((sum, g) => {
236
+ const avg = ((g.left.endLine - g.left.startLine + 1) + (g.right.endLine - g.right.startLine + 1)) / 2;
237
+ return sum + g.similarity * avg;
280
238
  }, 0);
281
239
 
282
- const score = (weightedDuplicateLines / totalLines) * 100;
283
- const grade = this.getScoreGrade(score);
284
-
240
+ const score = (duplicateLines / totalLines) * 100;
285
241
  return {
286
242
  score,
287
- grade,
243
+ grade: this.getScoreGrade(score),
288
244
  totalLines,
289
- duplicateLines: Math.round(weightedDuplicateLines),
245
+ duplicateLines: Math.round(duplicateLines),
290
246
  duplicateGroups: duplicates.length,
291
247
  };
292
248
  }
293
249
 
294
- private calculateTotalLines(units: IndexUnit[]): number {
295
- return units.reduce((sum, unit) => {
296
- const lines = unit.endLine - unit.startLine + 1;
297
- return sum + lines;
298
- }, 0);
299
- }
300
-
301
250
  private getScoreGrade(score: number): DuplicationScore["grade"] {
302
251
  if (score < 5) return "Excellent";
303
252
  if (score < 15) return "Good";