@goshenkata/dryscan-core 1.2.7 → 1.2.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,376 +0,0 @@
1
- import crypto from "node:crypto";
2
- import Parser from "tree-sitter";
3
- import Java from "tree-sitter-java";
4
- import { LanguageExtractor } from "./LanguageExtractor";
5
- import { IndexUnit, IndexUnitType } from "../types";
6
- import { indexConfig } from "../config/indexConfig";
7
- import { DryConfig } from "../types";
8
- import { configStore } from "../config/configStore";
9
- import { BLOCK_HASH_ALGO } from "../const";
10
-
11
- export class JavaExtractor implements LanguageExtractor {
12
- readonly id = "java";
13
- readonly exts = [".java"];
14
-
15
- private parser: Parser;
16
- private readonly repoPath: string;
17
- private config?: DryConfig;
18
-
19
- constructor(repoPath: string) {
20
- this.repoPath = repoPath;
21
- this.parser = new Parser();
22
- this.parser.setLanguage(Java);
23
- }
24
-
25
- supports(filePath: string): boolean {
26
- const lower = filePath.toLowerCase();
27
- return this.exts.some((ext) => lower.endsWith(ext));
28
- }
29
-
30
- async extractFromText(fileRelPath: string, source: string): Promise<IndexUnit[]> {
31
- if (!source.trim()) return [];
32
-
33
- this.config = await configStore.get(this.repoPath);
34
-
35
- const tree = this.parser.parse(source);
36
- const units: IndexUnit[] = [];
37
-
38
- const visit = (node: Parser.SyntaxNode, currentClass?: IndexUnit) => {
39
- if (this.isClassNode(node)) {
40
- const className = this.getClassName(node, source) || "<anonymous>";
41
- if (this.isDtoClass(node, source, className)) {
42
- return;
43
- }
44
- const startLine = node.startPosition.row;
45
- const endLine = node.endPosition.row;
46
- const classLength = endLine - startLine;
47
- const skipClass = this.shouldSkip(IndexUnitType.CLASS, className, classLength);
48
- const classId = this.buildId(IndexUnitType.CLASS, className, startLine, endLine);
49
- const code = this.stripComments(this.stripClassBody(node, source));
50
- const classUnit: IndexUnit = {
51
- id: classId,
52
- name: className,
53
- filePath: fileRelPath,
54
- startLine,
55
- endLine,
56
- code,
57
- unitType: IndexUnitType.CLASS,
58
- children: [],
59
- };
60
- if (!skipClass) {
61
- units.push(classUnit);
62
- }
63
-
64
- for (let i = 0; i < node.namedChildCount; i++) {
65
- const child = node.namedChild(i);
66
- if (child) visit(child, skipClass ? undefined : classUnit);
67
- }
68
- return;
69
- }
70
-
71
- if (this.isFunctionNode(node)) {
72
- const fnUnit = this.buildFunctionUnit(node, source, fileRelPath, currentClass);
73
- const fnLength = fnUnit.endLine - fnUnit.startLine;
74
- const bodyNode = this.getFunctionBody(node);
75
- const fnArity = this.getNodeArity(node);
76
- const skipFunction = this.shouldSkip(IndexUnitType.FUNCTION, fnUnit.name, fnLength, fnArity);
77
-
78
- if (skipFunction) {
79
- return;
80
- }
81
-
82
- units.push(fnUnit);
83
-
84
- if (bodyNode) {
85
- const blocks = this.extractBlocks(bodyNode, source, fileRelPath, fnUnit);
86
- units.push(...blocks);
87
- }
88
- }
89
-
90
- for (let i = 0; i < node.namedChildCount; i++) {
91
- const child = node.namedChild(i);
92
- if (child) visit(child, currentClass);
93
- }
94
- };
95
-
96
- visit(tree.rootNode);
97
-
98
- //remove duplicates if any
99
- return this.removeDuplicates(units);
100
- }
101
-
102
- unitLabel(unit: IndexUnit): string | null {
103
- if (unit.unitType === IndexUnitType.CLASS) return unit.filePath;
104
- if (unit.unitType === IndexUnitType.FUNCTION) return this.canonicalFunctionSignature(unit);
105
- if (unit.unitType === IndexUnitType.BLOCK) return this.normalizedBlockHash(unit);
106
- return unit.name;
107
- }
108
-
109
- private isClassNode(node: Parser.SyntaxNode): boolean {
110
- return node.type === "class_declaration";
111
- }
112
-
113
- private getClassName(node: Parser.SyntaxNode, source: string): string | null {
114
- const nameNode = node.childForFieldName?.("name");
115
- return nameNode ? source.slice(nameNode.startIndex, nameNode.endIndex) : null;
116
- }
117
-
118
- private isFunctionNode(node: Parser.SyntaxNode): boolean {
119
- return node.type === "method_declaration" || node.type === "constructor_declaration";
120
- }
121
-
122
- private getFunctionName(node: Parser.SyntaxNode, source: string, parentClass?: IndexUnit): string | null {
123
- const nameNode = node.childForFieldName?.("name");
124
- const nameText = nameNode ? source.slice(nameNode.startIndex, nameNode.endIndex) : "<anonymous>";
125
- return parentClass ? `${parentClass.name}.${nameText}` : nameText;
126
- }
127
-
128
- private getFunctionBody(node: Parser.SyntaxNode): Parser.SyntaxNode | null {
129
- return node.childForFieldName?.("body") ?? null;
130
- }
131
-
132
- private isBlockNode(node: Parser.SyntaxNode): boolean {
133
- return node.type === "block";
134
- }
135
-
136
- private getMethodBodiesForClass(node: Parser.SyntaxNode): Parser.SyntaxNode[] {
137
- const bodies: Parser.SyntaxNode[] = [];
138
- const classBody = node.children.find(child => child.type === "class_body");
139
- if (!classBody) return bodies;
140
-
141
- for (let i = 0; i < classBody.namedChildCount; i++) {
142
- const child = classBody.namedChild(i);
143
- if (!child) continue;
144
- if (child.type === "method_declaration" || child.type === "constructor_declaration") {
145
- const body = child.childForFieldName?.("body");
146
- if (body) bodies.push(body);
147
- }
148
- }
149
- return bodies;
150
- }
151
-
152
- private canonicalFunctionSignature(unit: IndexUnit): string {
153
- const arity = this.extractArity(unit.code);
154
- return `${unit.name}(arity:${arity})`;
155
- }
156
-
157
- private normalizedBlockHash(unit: IndexUnit): string {
158
- const normalized = this.normalizeCode(unit.code);
159
- return crypto.createHash(BLOCK_HASH_ALGO).update(normalized).digest("hex");
160
- }
161
-
162
- private shouldSkip(unitType: IndexUnitType, name: string, lineCount: number, arity?: number): boolean {
163
- if (!this.config) {
164
- throw new Error("Config not loaded before skip evaluation");
165
- }
166
- const config = this.config;
167
- const minLines = unitType === IndexUnitType.BLOCK
168
- ? Math.max(indexConfig.blockMinLines, config.minBlockLines ?? 0)
169
- : config.minLines;
170
- const belowMin = minLines > 0 && lineCount < minLines;
171
- const trivial = unitType === IndexUnitType.FUNCTION && this.isTrivialFunction(name, arity ?? 0);
172
- return belowMin || trivial;
173
- }
174
-
175
- /**
176
- * A function is trivial if it follows a simple accessor pattern:
177
- * - getters/isers: name matches get[A-Z] or is[A-Z] with exactly 0 parameters
178
- * - setters: name matches set[A-Z] with at most 1 parameter
179
- * Methods like getUserById(Long id) have arity > 0 and are NOT trivial.
180
- */
181
- private isTrivialFunction(fullName: string, arity: number): boolean {
182
- const simpleName = fullName.split(".").pop() || fullName;
183
- const isGetter = /^(get|is)[A-Z]/.test(simpleName) && arity === 0;
184
- const isSetter = /^set[A-Z]/.test(simpleName) && arity <= 1;
185
- return isGetter || isSetter;
186
- }
187
-
188
- /** Counts the formal parameters of a method or constructor node. */
189
- private getNodeArity(node: Parser.SyntaxNode): number {
190
- const params = node.childForFieldName?.("parameters");
191
- if (!params) return 0;
192
- return params.namedChildren.filter(c => c.type === "formal_parameter" || c.type === "spread_parameter").length;
193
- }
194
-
195
- private isDtoClass(node: Parser.SyntaxNode, source: string, className: string): boolean {
196
- const classBody = node.children.find((child) => child.type === "class_body");
197
- if (!classBody) return false;
198
-
199
- let hasField = false;
200
-
201
- for (let i = 0; i < classBody.namedChildCount; i++) {
202
- const child = classBody.namedChild(i);
203
- if (!child) continue;
204
-
205
- if (child.type === "field_declaration") {
206
- hasField = true;
207
- continue;
208
- }
209
-
210
- if (child.type.includes("annotation")) {
211
- continue;
212
- }
213
-
214
- if (child.type === "method_declaration" || child.type === "constructor_declaration") {
215
- const simpleName = this.getSimpleFunctionName(child, source);
216
- const fullName = `${className}.${simpleName}`;
217
- const arity = this.getNodeArity(child);
218
- if (!this.isTrivialFunction(fullName, arity)) {
219
- return false;
220
- }
221
- continue;
222
- }
223
-
224
- return false;
225
- }
226
-
227
- return hasField;
228
- }
229
-
230
- private getSimpleFunctionName(node: Parser.SyntaxNode, source: string): string {
231
- const nameNode = node.childForFieldName?.("name");
232
- return nameNode ? source.slice(nameNode.startIndex, nameNode.endIndex) : "<anonymous>";
233
- }
234
-
235
- private buildFunctionUnit(
236
- node: Parser.SyntaxNode,
237
- source: string,
238
- file: string,
239
- parentClass?: IndexUnit
240
- ): IndexUnit {
241
- const name = this.getFunctionName(node, source, parentClass) || "<anonymous>";
242
- const startLine = node.startPosition.row;
243
- const endLine = node.endPosition.row;
244
- const id = this.buildId(IndexUnitType.FUNCTION, name, startLine, endLine);
245
- const unit: IndexUnit = {
246
- id,
247
- name,
248
- filePath: file,
249
- startLine,
250
- endLine,
251
- children: [],
252
- code: this.stripComments(source.slice(node.startIndex, node.endIndex)),
253
- unitType: IndexUnitType.FUNCTION,
254
- parentId: parentClass?.id,
255
- parent: parentClass,
256
- };
257
- if (parentClass) {
258
- parentClass.children = parentClass.children || [];
259
- parentClass.children.push(unit);
260
- }
261
- return unit;
262
- }
263
-
264
- private extractBlocks(
265
- bodyNode: Parser.SyntaxNode,
266
- source: string,
267
- file: string,
268
- parentFunction: IndexUnit
269
- ): IndexUnit[] {
270
- const blocks: IndexUnit[] = [];
271
-
272
- const visit = (n: Parser.SyntaxNode) => {
273
- if (this.isBlockNode(n)) {
274
- const startLine = n.startPosition.row;
275
- const endLine = n.endPosition.row;
276
- const lineCount = endLine - startLine;
277
- if (this.shouldSkip(IndexUnitType.BLOCK, parentFunction.name, lineCount)) {
278
- return;
279
- }
280
- if (lineCount >= indexConfig.blockMinLines) {
281
- const id = this.buildId(IndexUnitType.BLOCK, parentFunction.name, startLine, endLine);
282
- const blockUnit: IndexUnit = {
283
- id,
284
- name: parentFunction.name,
285
- filePath: file,
286
- startLine,
287
- endLine,
288
- code: this.stripComments(source.slice(n.startIndex, n.endIndex)),
289
- unitType: IndexUnitType.BLOCK,
290
- parentId: parentFunction.id,
291
- parent: parentFunction,
292
- };
293
- const contextLength = this.config?.contextLength ?? 2048;
294
- const splitBlocks = this.textSplitBlockIfOverContextLimit(blockUnit, contextLength);
295
- parentFunction.children = parentFunction.children || [];
296
- parentFunction.children.push(...splitBlocks);
297
- blocks.push(...splitBlocks);
298
- }
299
- }
300
-
301
- for (let i = 0; i < n.namedChildCount; i++) {
302
- const child = n.namedChild(i);
303
- if (child) visit(child);
304
- }
305
- };
306
-
307
- visit(bodyNode);
308
- return blocks;
309
- }
310
-
311
- private stripClassBody(node: Parser.SyntaxNode, source: string): string {
312
- const classStart = node.startIndex;
313
- let code = source.slice(classStart, node.endIndex);
314
-
315
- const methodBodies: Array<{ start: number; end: number }> = [];
316
- const candidates = this.getMethodBodiesForClass(node);
317
-
318
- for (const body of candidates) {
319
- methodBodies.push({ start: body.startIndex - classStart, end: body.endIndex - classStart });
320
- }
321
-
322
- methodBodies.sort((a, b) => b.start - a.start);
323
- for (const body of methodBodies) {
324
- code = code.slice(0, body.start) + " { }" + code.slice(body.end);
325
- }
326
-
327
- return code;
328
- }
329
-
330
- private buildId(type: IndexUnitType, name: string, startLine: number, endLine: number): string {
331
- return `${type}:${name}:${startLine}-${endLine}`;
332
- }
333
-
334
- private extractArity(code: string): number {
335
- const match = code.match(/^[^{]*?\(([^)]*)\)/s);
336
- if (!match) return 0;
337
- const params = match[1]
338
- .split(",")
339
- .map((p) => p.trim())
340
- .filter(Boolean);
341
- return params.length;
342
- }
343
-
344
- private normalizeCode(code: string): string {
345
- const withoutBlockComments = code.replace(/\/\*[\s\S]*?\*\//g, "");
346
- const withoutLineComments = withoutBlockComments.replace(/\/\/[^\n\r]*/g, "");
347
- return withoutLineComments.replace(/\s+/g, "");
348
- }
349
-
350
- private stripComments(code: string): string {
351
- const withoutBlockComments = code.replace(/\/\*[\s\S]*?\*\//g, (match) => match.replace(/[^\n\r]/g, ""));
352
- return withoutBlockComments.replace(/\/\/[^\n\r]*/g, "");
353
- }
354
-
355
- private removeDuplicates(units: IndexUnit[]): IndexUnit[] | PromiseLike<IndexUnit[]> {
356
- return Array.from(new Map(units.map(u => [u.id, u])).values());
357
- }
358
-
359
- /** Splits a block unit's code into chunks if it exceeds the context length limit. */
360
- private textSplitBlockIfOverContextLimit(unit: IndexUnit, contextLength: number): IndexUnit[] {
361
- if (unit.code.length <= contextLength) return [unit];
362
-
363
- const chunks: IndexUnit[] = [];
364
- let chunkIndex = 0;
365
- for (let i = 0; i < unit.code.length; i += contextLength) {
366
- chunks.push({
367
- ...unit,
368
- id: `${unit.id}:chunk${chunkIndex}`,
369
- code: unit.code.slice(i, i + contextLength),
370
- });
371
- chunkIndex++;
372
- }
373
- return chunks;
374
- }
375
- }
376
-
package/src/index.ts DELETED
@@ -1,9 +0,0 @@
1
- // Public surface: keep minimal API for consumers
2
- export { DryScan } from './DryScan';
3
- export { configStore } from './config/configStore';
4
- export {
5
- DuplicateGroup,
6
- DuplicationScore,
7
- DuplicateReport,
8
- DryConfig,
9
- } from './types';
@@ -1,257 +0,0 @@
1
- import debug from "debug";
2
- import shortUuid from "short-uuid";
3
- import { DryScanServiceDeps } from "./types";
4
- import { DuplicateAnalysisResult, DuplicateGroup, DuplicationScore, IndexUnit, IndexUnitType } from "../types";
5
- import { indexConfig } from "../config/indexConfig";
6
- import { DryConfig } from "../types";
7
- import { DuplicationCache } from "./DuplicationCache";
8
-
9
- const log = debug("DryScan:DuplicateService");
10
-
11
- export class DuplicateService {
12
- private config?: DryConfig;
13
- private readonly cache = DuplicationCache.getInstance();
14
-
15
- constructor(private readonly deps: DryScanServiceDeps) {}
16
-
17
- /**
18
- * @param dirtyPaths - File paths changed since last run. When provided, only
19
- * dirty×all similarities are recomputed; clean×clean values are reused from
20
- * the existing matrix. Pass undefined (or omit) for a full rebuild.
21
- */
22
- async findDuplicates(config: DryConfig, dirtyPaths?: string[]): Promise<DuplicateAnalysisResult> {
23
- this.config = config;
24
- const t0 = performance.now();
25
- const allUnits = await this.deps.db.getAllUnits();
26
- log("Starting duplicate analysis on %d units", allUnits.length);
27
-
28
- if (allUnits.length < 2) {
29
- return { duplicates: [], score: this.computeDuplicationScore([], allUnits) };
30
- }
31
-
32
- const thresholds = this.resolveThresholds(config.threshold);
33
- const duplicates = await this.computeDuplicates(allUnits, thresholds, dirtyPaths);
34
- const filtered = duplicates.filter((g) => !this.isGroupExcluded(g));
35
- log("Found %d duplicate groups (%d excluded)", filtered.length, duplicates.length - filtered.length);
36
-
37
- this.cache.update(filtered).catch((err) => log("Cache update failed: %O", err));
38
-
39
- const score = this.computeDuplicationScore(filtered, allUnits);
40
- log("findDuplicates completed in %dms", (performance.now() - t0).toFixed(2));
41
- return { duplicates: filtered, score };
42
- }
43
-
44
- private resolveThresholds(functionThreshold?: number): { function: number; block: number; class: number } {
45
- const d = indexConfig.thresholds;
46
- const clamp = (v: number) => Math.min(1, Math.max(0, v));
47
- const fn = clamp(functionThreshold ?? d.function);
48
- return {
49
- function: fn,
50
- block: clamp(fn + d.block - d.function),
51
- class: clamp(fn + d.class - d.function),
52
- };
53
- }
54
-
55
- private async computeDuplicates(
56
- units: IndexUnit[],
57
- thresholds: { function: number; block: number; class: number },
58
- dirtyPaths?: string[]
59
- ): Promise<DuplicateGroup[]> {
60
- this.cache.clearRunCaches();
61
- await this.cache.buildEmbSimCache(units, dirtyPaths);
62
-
63
- const duplicates: DuplicateGroup[] = [];
64
- const t0 = performance.now();
65
-
66
- for (const [type, typedUnits] of this.groupByType(units)) {
67
- const threshold = this.getThreshold(type, thresholds);
68
- log("Comparing %d %s units (threshold=%.3f)", typedUnits.length, type, threshold);
69
-
70
- for (let i = 0; i < typedUnits.length; i++) {
71
- for (let j = i + 1; j < typedUnits.length; j++) {
72
- const left = typedUnits[i], right = typedUnits[j];
73
- if (this.shouldSkipComparison(left, right)) continue;
74
-
75
- // Always check the cache first — this allows pairs whose embeddings
76
- // have since been cleared to still be reported using a prior score.
77
- const cached = this.cache.get(left.id, right.id, left.filePath, right.filePath);
78
- const hasEmbeddings = left.embedding?.length && right.embedding?.length;
79
- const similarity = cached ?? (hasEmbeddings ? this.computeWeightedSimilarity(left, right, threshold) : 0);
80
- if (similarity < threshold) continue;
81
-
82
- const exclusionString = this.deps.pairing.pairKeyForUnits(left, right);
83
- if (!exclusionString) continue;
84
-
85
- duplicates.push({
86
- id: `${left.id}::${right.id}`,
87
- similarity,
88
- shortId: shortUuid.generate(),
89
- exclusionString,
90
- left: this.toMember(left),
91
- right: this.toMember(right),
92
- });
93
- }
94
- }
95
- }
96
-
97
- log("computeDuplicates: %d duplicates in %dms", duplicates.length, (performance.now() - t0).toFixed(2));
98
- return duplicates.sort((a, b) => b.similarity - a.similarity);
99
- }
100
-
101
- private isGroupExcluded(group: DuplicateGroup): boolean {
102
- const config = this.config;
103
- if (!config?.excludedPairs?.length) return false;
104
- const key = this.deps.pairing.pairKeyForUnits(group.left, group.right);
105
- if (!key) return false;
106
- const actual = this.deps.pairing.parsePairKey(key);
107
- if (!actual) return false;
108
- return config.excludedPairs.some((entry) => {
109
- const parsed = this.deps.pairing.parsePairKey(entry);
110
- return parsed ? this.deps.pairing.pairKeyMatches(actual, parsed) : false;
111
- });
112
- }
113
-
114
- private getThreshold(type: IndexUnitType, thresholds: { function: number; block: number; class: number }): number {
115
- if (type === IndexUnitType.CLASS) return thresholds.class;
116
- if (type === IndexUnitType.BLOCK) return thresholds.block;
117
- return thresholds.function;
118
- }
119
-
120
- private computeWeightedSimilarity(left: IndexUnit, right: IndexUnit, threshold: number): number {
121
- const selfSim = this.similarity(left, right);
122
-
123
- //CLASS
124
- if (left.unitType === IndexUnitType.CLASS) {
125
- return selfSim * indexConfig.weights.class.self;
126
- }
127
-
128
- // FUNCTION
129
- if (left.unitType === IndexUnitType.FUNCTION) {
130
- const w = indexConfig.weights.function;
131
- const hasPC = this.bothHaveParent(left, right, IndexUnitType.CLASS);
132
- const total = w.self + (hasPC ? w.parentClass : 0);
133
- // Early exit: even with perfect parent similarity, can't reach threshold.
134
- if ((w.self * selfSim + (hasPC ? w.parentClass : 0)) / total < threshold) return 0;
135
- return (w.self * selfSim + (hasPC ? w.parentClass * this.parentSimilarity(left, right, IndexUnitType.CLASS) : 0)) / total;
136
- }
137
-
138
- // BLOCK
139
- const w = indexConfig.weights.block;
140
- const hasPF = this.bothHaveParent(left, right, IndexUnitType.FUNCTION);
141
- const hasPC = this.bothHaveParent(left, right, IndexUnitType.CLASS);
142
- const total = w.self + (hasPF ? w.parentFunction : 0) + (hasPC ? w.parentClass : 0);
143
- if ((w.self * selfSim + (hasPF ? w.parentFunction : 0) + (hasPC ? w.parentClass : 0)) / total < threshold) return 0;
144
- return (
145
- w.self * selfSim +
146
- (hasPF ? w.parentFunction * this.parentSimilarity(left, right, IndexUnitType.FUNCTION) : 0) +
147
- (hasPC ? w.parentClass * this.parentSimilarity(left, right, IndexUnitType.CLASS) : 0)
148
- ) / total;
149
- }
150
-
151
- /** Groups all units by type for the comparison loop. Units without embeddings are included
152
- * so that cache hits can still be returned for pairs whose embeddings were cleared. */
153
- private groupByType(units: IndexUnit[]): Map<IndexUnitType, IndexUnit[]> {
154
- const byType = new Map<IndexUnitType, IndexUnit[]>();
155
- for (const unit of units) {
156
- const list = byType.get(unit.unitType) ?? [];
157
- list.push(unit);
158
- byType.set(unit.unitType, list);
159
- }
160
- return byType;
161
- }
162
-
163
- private toMember(unit: IndexUnit): DuplicateGroup["left"] {
164
- return {
165
- id: unit.id,
166
- name: unit.name,
167
- filePath: unit.filePath,
168
- startLine: unit.startLine,
169
- endLine: unit.endLine,
170
- code: unit.code,
171
- unitType: unit.unitType,
172
- };
173
- }
174
-
175
- private bothHaveParent(left: IndexUnit, right: IndexUnit, type: IndexUnitType): boolean {
176
- return !!this.findParent(left, type) && !!this.findParent(right, type);
177
- }
178
-
179
- private parentSimilarity(left: IndexUnit, right: IndexUnit, type: IndexUnitType): number {
180
- const lp = this.findParent(left, type), rp = this.findParent(right, type);
181
- if (!lp || !rp) return 0;
182
-
183
- const key = lp.id < rp.id ? `${lp.id}::${rp.id}` : `${rp.id}::${lp.id}`;
184
- const cached = this.cache.getParentSim(key);
185
- if (cached !== undefined) return cached;
186
-
187
- const sim = this.similarity(lp, rp);
188
- this.cache.setParentSim(key, sim);
189
- return sim;
190
- }
191
-
192
- /** Resolves similarity via the pre-computed embedding matrix, falling back to best child match. */
193
- private similarity(left: IndexUnit, right: IndexUnit): number {
194
- return this.cache.getEmbSim(left.id, right.id) ?? this.childSimilarity(left, right);
195
- }
196
-
197
- private childSimilarity(left: IndexUnit, right: IndexUnit): number {
198
- const lc = left.children ?? [], rc = right.children ?? [];
199
- if (!lc.length || !rc.length) return 0;
200
-
201
- let best = 0;
202
- for (const l of lc) {
203
- for (const r of rc) {
204
- if (l.unitType !== r.unitType) continue;
205
- const sim = this.similarity(l, r);
206
- if (sim > best) best = sim;
207
- }
208
- }
209
- return best;
210
- }
211
-
212
- private shouldSkipComparison(left: IndexUnit, right: IndexUnit): boolean {
213
- if (left.unitType !== IndexUnitType.BLOCK || right.unitType !== IndexUnitType.BLOCK) return false;
214
- if (left.filePath !== right.filePath) return false;
215
- return (left.startLine <= right.startLine && left.endLine >= right.endLine)
216
- || (right.startLine <= left.startLine && right.endLine >= left.endLine);
217
- }
218
-
219
- private findParent(unit: IndexUnit, type: IndexUnitType): IndexUnit | null {
220
- let p = unit.parent;
221
- while (p) {
222
- if (p.unitType === type) return p;
223
- p = p.parent;
224
- }
225
- return null;
226
- }
227
-
228
- private computeDuplicationScore(duplicates: DuplicateGroup[], allUnits: IndexUnit[]): DuplicationScore {
229
- const totalLines = allUnits.reduce((sum, u) => sum + u.endLine - u.startLine + 1, 0);
230
-
231
- if (!totalLines || !duplicates.length) {
232
- return { score: 0, grade: "Excellent", totalLines, duplicateLines: 0, duplicateGroups: 0 };
233
- }
234
-
235
- const duplicateLines = duplicates.reduce((sum, g) => {
236
- const avg = ((g.left.endLine - g.left.startLine + 1) + (g.right.endLine - g.right.startLine + 1)) / 2;
237
- return sum + g.similarity * avg;
238
- }, 0);
239
-
240
- const score = (duplicateLines / totalLines) * 100;
241
- return {
242
- score,
243
- grade: this.getScoreGrade(score),
244
- totalLines,
245
- duplicateLines: Math.round(duplicateLines),
246
- duplicateGroups: duplicates.length,
247
- };
248
- }
249
-
250
- private getScoreGrade(score: number): DuplicationScore["grade"] {
251
- if (score < 5) return "Excellent";
252
- if (score < 15) return "Good";
253
- if (score < 30) return "Fair";
254
- if (score < 50) return "Poor";
255
- return "Critical";
256
- }
257
- }