@bfra.me/workspace-analyzer 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +402 -0
- package/lib/chunk-4LSFAAZW.js +1 -0
- package/lib/chunk-JDF7DQ4V.js +27 -0
- package/lib/chunk-WOJ4C7N7.js +7122 -0
- package/lib/cli.d.ts +1 -0
- package/lib/cli.js +318 -0
- package/lib/index.d.ts +3701 -0
- package/lib/index.js +1262 -0
- package/lib/types/index.d.ts +146 -0
- package/lib/types/index.js +28 -0
- package/package.json +89 -0
- package/src/analyzers/analyzer.ts +201 -0
- package/src/analyzers/architectural-analyzer.ts +304 -0
- package/src/analyzers/build-config-analyzer.ts +334 -0
- package/src/analyzers/circular-import-analyzer.ts +463 -0
- package/src/analyzers/config-consistency-analyzer.ts +335 -0
- package/src/analyzers/dead-code-analyzer.ts +565 -0
- package/src/analyzers/duplicate-code-analyzer.ts +626 -0
- package/src/analyzers/duplicate-dependency-analyzer.ts +381 -0
- package/src/analyzers/eslint-config-analyzer.ts +281 -0
- package/src/analyzers/exports-field-analyzer.ts +324 -0
- package/src/analyzers/index.ts +388 -0
- package/src/analyzers/large-dependency-analyzer.ts +535 -0
- package/src/analyzers/package-json-analyzer.ts +349 -0
- package/src/analyzers/peer-dependency-analyzer.ts +275 -0
- package/src/analyzers/tree-shaking-analyzer.ts +623 -0
- package/src/analyzers/tsconfig-analyzer.ts +382 -0
- package/src/analyzers/unused-dependency-analyzer.ts +356 -0
- package/src/analyzers/version-alignment-analyzer.ts +308 -0
- package/src/api/analyze-workspace.ts +245 -0
- package/src/api/index.ts +11 -0
- package/src/cache/cache-manager.ts +495 -0
- package/src/cache/cache-schema.ts +247 -0
- package/src/cache/change-detector.ts +169 -0
- package/src/cache/file-hasher.ts +65 -0
- package/src/cache/index.ts +47 -0
- package/src/cli/commands/analyze.ts +240 -0
- package/src/cli/commands/index.ts +5 -0
- package/src/cli/index.ts +61 -0
- package/src/cli/types.ts +65 -0
- package/src/cli/ui.ts +213 -0
- package/src/cli.ts +9 -0
- package/src/config/defaults.ts +183 -0
- package/src/config/index.ts +81 -0
- package/src/config/loader.ts +270 -0
- package/src/config/merger.ts +229 -0
- package/src/config/schema.ts +263 -0
- package/src/core/incremental-analyzer.ts +462 -0
- package/src/core/index.ts +34 -0
- package/src/core/orchestrator.ts +416 -0
- package/src/graph/dependency-graph.ts +408 -0
- package/src/graph/index.ts +19 -0
- package/src/index.ts +417 -0
- package/src/parser/config-parser.ts +491 -0
- package/src/parser/import-extractor.ts +340 -0
- package/src/parser/index.ts +54 -0
- package/src/parser/typescript-parser.ts +95 -0
- package/src/performance/bundle-estimator.ts +444 -0
- package/src/performance/index.ts +27 -0
- package/src/reporters/console-reporter.ts +355 -0
- package/src/reporters/index.ts +49 -0
- package/src/reporters/json-reporter.ts +273 -0
- package/src/reporters/markdown-reporter.ts +349 -0
- package/src/reporters/reporter.ts +399 -0
- package/src/rules/builtin-rules.ts +709 -0
- package/src/rules/index.ts +52 -0
- package/src/rules/rule-engine.ts +409 -0
- package/src/scanner/index.ts +18 -0
- package/src/scanner/workspace-scanner.ts +403 -0
- package/src/types/index.ts +176 -0
- package/src/types/result.ts +19 -0
- package/src/utils/index.ts +7 -0
- package/src/utils/pattern-matcher.ts +48 -0
|
@@ -0,0 +1,626 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DuplicateCodeAnalyzer - Detects duplicate code patterns using AST fingerprinting.
|
|
3
|
+
*
|
|
4
|
+
* Identifies code duplication within and across packages in the workspace:
|
|
5
|
+
* - Exact function duplicates with different names
|
|
6
|
+
* - Similar code blocks that could be refactored
|
|
7
|
+
* - Duplicated utility functions across packages
|
|
8
|
+
*
|
|
9
|
+
* Uses structural AST fingerprinting rather than text comparison to detect
|
|
10
|
+
* semantically equivalent code regardless of variable naming or formatting.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import type {SourceFile} from 'ts-morph'
|
|
14
|
+
|
|
15
|
+
import type {WorkspacePackage} from '../scanner/workspace-scanner'
|
|
16
|
+
import type {Issue, IssueLocation, Severity} from '../types/index'
|
|
17
|
+
import type {Result} from '../types/result'
|
|
18
|
+
import type {AnalysisContext, Analyzer, AnalyzerError, AnalyzerMetadata} from './analyzer'
|
|
19
|
+
|
|
20
|
+
import {createProject} from '@bfra.me/doc-sync/parsers'
|
|
21
|
+
import {ok} from '@bfra.me/es/result'
|
|
22
|
+
import {SyntaxKind} from 'ts-morph'
|
|
23
|
+
|
|
24
|
+
import {createIssue, filterIssues} from './analyzer'
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Configuration options for DuplicateCodeAnalyzer.
|
|
28
|
+
*/
|
|
29
|
+
export interface DuplicateCodeAnalyzerOptions {
|
|
30
|
+
/** Minimum number of statements for a block to be considered for duplication */
|
|
31
|
+
readonly minStatements?: number
|
|
32
|
+
/** Minimum fingerprint similarity threshold (0-1) for reporting */
|
|
33
|
+
readonly similarityThreshold?: number
|
|
34
|
+
/** Check for duplicates across packages (not just within) */
|
|
35
|
+
readonly crossPackageAnalysis?: boolean
|
|
36
|
+
/** Report duplicate functions */
|
|
37
|
+
readonly reportFunctions?: boolean
|
|
38
|
+
/** Report duplicate class methods */
|
|
39
|
+
readonly reportMethods?: boolean
|
|
40
|
+
/** Report similar code blocks */
|
|
41
|
+
readonly reportCodeBlocks?: boolean
|
|
42
|
+
/** Severity for exact duplicates */
|
|
43
|
+
readonly exactDuplicateSeverity?: Severity
|
|
44
|
+
/** Severity for similar code */
|
|
45
|
+
readonly similarCodeSeverity?: Severity
|
|
46
|
+
/** File patterns to exclude from analysis */
|
|
47
|
+
readonly excludePatterns?: readonly string[]
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
const DEFAULT_OPTIONS: Required<DuplicateCodeAnalyzerOptions> = {
|
|
51
|
+
minStatements: 5,
|
|
52
|
+
similarityThreshold: 0.85,
|
|
53
|
+
crossPackageAnalysis: true,
|
|
54
|
+
reportFunctions: true,
|
|
55
|
+
reportMethods: true,
|
|
56
|
+
reportCodeBlocks: true,
|
|
57
|
+
exactDuplicateSeverity: 'warning',
|
|
58
|
+
similarCodeSeverity: 'info',
|
|
59
|
+
excludePatterns: ['**/*.test.ts', '**/*.spec.ts', '**/__tests__/**', '**/__mocks__/**'],
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
export const duplicateCodeAnalyzerMetadata: AnalyzerMetadata = {
|
|
63
|
+
id: 'duplicate-code',
|
|
64
|
+
name: 'Duplicate Code Analyzer',
|
|
65
|
+
description: 'Detects duplicate code patterns using AST fingerprinting',
|
|
66
|
+
categories: ['performance'],
|
|
67
|
+
defaultSeverity: 'warning',
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
/**
|
|
71
|
+
* Creates a DuplicateCodeAnalyzer instance.
|
|
72
|
+
*/
|
|
73
|
+
export function createDuplicateCodeAnalyzer(options: DuplicateCodeAnalyzerOptions = {}): Analyzer {
|
|
74
|
+
const resolvedOptions = {...DEFAULT_OPTIONS, ...options}
|
|
75
|
+
|
|
76
|
+
return {
|
|
77
|
+
metadata: duplicateCodeAnalyzerMetadata,
|
|
78
|
+
analyze: async (context: AnalysisContext): Promise<Result<readonly Issue[], AnalyzerError>> => {
|
|
79
|
+
const issues: Issue[] = []
|
|
80
|
+
|
|
81
|
+
// Collect all fingerprints across packages
|
|
82
|
+
const allFingerprints: CodeFingerprint[] = []
|
|
83
|
+
|
|
84
|
+
for (const pkg of context.packages) {
|
|
85
|
+
context.reportProgress?.(`Fingerprinting code in ${pkg.name}...`)
|
|
86
|
+
|
|
87
|
+
const fingerprints = await collectPackageFingerprints(
|
|
88
|
+
pkg,
|
|
89
|
+
context.workspacePath,
|
|
90
|
+
resolvedOptions,
|
|
91
|
+
)
|
|
92
|
+
allFingerprints.push(...fingerprints)
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
context.reportProgress?.('Detecting duplicates...')
|
|
96
|
+
|
|
97
|
+
// Find duplicates
|
|
98
|
+
const duplicates = findDuplicates(allFingerprints, resolvedOptions)
|
|
99
|
+
|
|
100
|
+
// Create issues for duplicates
|
|
101
|
+
for (const duplicate of duplicates) {
|
|
102
|
+
issues.push(createDuplicateIssue(duplicate, resolvedOptions))
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
return ok(filterIssues(issues, context.config))
|
|
106
|
+
},
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
/**
|
|
111
|
+
* Fingerprint of a code fragment for comparison.
|
|
112
|
+
*/
|
|
113
|
+
export interface CodeFingerprint {
|
|
114
|
+
/** Unique hash of the structural fingerprint */
|
|
115
|
+
readonly hash: string
|
|
116
|
+
/** Type of code fragment */
|
|
117
|
+
readonly type: 'function' | 'method' | 'block'
|
|
118
|
+
/** Name of the function/method (if applicable) */
|
|
119
|
+
readonly name?: string
|
|
120
|
+
/** Package containing this code */
|
|
121
|
+
readonly packageName: string
|
|
122
|
+
/** File path */
|
|
123
|
+
readonly filePath: string
|
|
124
|
+
/** Location in the file */
|
|
125
|
+
readonly location: IssueLocation
|
|
126
|
+
/** Number of statements in the fragment */
|
|
127
|
+
readonly statementCount: number
|
|
128
|
+
/** Original code (for display) */
|
|
129
|
+
readonly codePreview: string
|
|
130
|
+
/** Structural elements for similarity comparison */
|
|
131
|
+
readonly structure: readonly string[]
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
/**
|
|
135
|
+
* Detected duplicate code pattern.
|
|
136
|
+
*/
|
|
137
|
+
export interface DuplicatePattern {
|
|
138
|
+
/** Fingerprint hash */
|
|
139
|
+
readonly hash: string
|
|
140
|
+
/** All occurrences of this duplicate */
|
|
141
|
+
readonly occurrences: readonly CodeFingerprint[]
|
|
142
|
+
/** Whether this is an exact match or similar */
|
|
143
|
+
readonly isExactMatch: boolean
|
|
144
|
+
/** Similarity score (1.0 for exact matches) */
|
|
145
|
+
readonly similarity: number
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
async function collectPackageFingerprints(
|
|
149
|
+
pkg: WorkspacePackage,
|
|
150
|
+
_workspacePath: string,
|
|
151
|
+
options: Required<DuplicateCodeAnalyzerOptions>,
|
|
152
|
+
): Promise<CodeFingerprint[]> {
|
|
153
|
+
const fingerprints: CodeFingerprint[] = []
|
|
154
|
+
|
|
155
|
+
const sourceFiles = filterSourceFiles(pkg.sourceFiles, options.excludePatterns)
|
|
156
|
+
if (sourceFiles.length === 0) {
|
|
157
|
+
return fingerprints
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
const project = createProject()
|
|
161
|
+
|
|
162
|
+
for (const filePath of sourceFiles) {
|
|
163
|
+
try {
|
|
164
|
+
const sourceFile = project.addSourceFileAtPath(filePath)
|
|
165
|
+
const fileFingerprints = fingerprintFile(sourceFile, pkg, options)
|
|
166
|
+
fingerprints.push(...fileFingerprints)
|
|
167
|
+
} catch {
|
|
168
|
+
// File may not be parseable
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
return fingerprints
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
function fingerprintFile(
|
|
176
|
+
sourceFile: SourceFile,
|
|
177
|
+
pkg: WorkspacePackage,
|
|
178
|
+
options: Required<DuplicateCodeAnalyzerOptions>,
|
|
179
|
+
): CodeFingerprint[] {
|
|
180
|
+
const fingerprints: CodeFingerprint[] = []
|
|
181
|
+
const filePath = sourceFile.getFilePath()
|
|
182
|
+
|
|
183
|
+
// Fingerprint functions
|
|
184
|
+
if (options.reportFunctions) {
|
|
185
|
+
for (const func of sourceFile.getFunctions()) {
|
|
186
|
+
const body = func.getBody()
|
|
187
|
+
if (body === undefined) continue
|
|
188
|
+
|
|
189
|
+
// Check if body is a block with statements
|
|
190
|
+
if (body.getKind() !== SyntaxKind.Block) continue
|
|
191
|
+
const block = body.asKind(SyntaxKind.Block)
|
|
192
|
+
if (block === undefined) continue
|
|
193
|
+
|
|
194
|
+
const statements = block.getStatements()
|
|
195
|
+
if (statements.length < options.minStatements) continue
|
|
196
|
+
|
|
197
|
+
const structure = extractStructure(block)
|
|
198
|
+
const hash = computeHash(structure)
|
|
199
|
+
const {line, column} = sourceFile.getLineAndColumnAtPos(func.getStart())
|
|
200
|
+
|
|
201
|
+
fingerprints.push({
|
|
202
|
+
hash,
|
|
203
|
+
type: 'function',
|
|
204
|
+
name: func.getName(),
|
|
205
|
+
packageName: pkg.name,
|
|
206
|
+
filePath,
|
|
207
|
+
location: {
|
|
208
|
+
filePath,
|
|
209
|
+
line,
|
|
210
|
+
column,
|
|
211
|
+
endLine: sourceFile.getLineAndColumnAtPos(func.getEnd()).line,
|
|
212
|
+
},
|
|
213
|
+
statementCount: statements.length,
|
|
214
|
+
codePreview: truncateCode(func.getText(), 100),
|
|
215
|
+
structure,
|
|
216
|
+
})
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
// Fingerprint class methods
|
|
221
|
+
if (options.reportMethods) {
|
|
222
|
+
for (const classDecl of sourceFile.getClasses()) {
|
|
223
|
+
for (const method of classDecl.getMethods()) {
|
|
224
|
+
const body = method.getBody()
|
|
225
|
+
if (body === undefined) continue
|
|
226
|
+
|
|
227
|
+
// Check if body is a block with statements
|
|
228
|
+
if (body.getKind() !== SyntaxKind.Block) continue
|
|
229
|
+
const block = body.asKind(SyntaxKind.Block)
|
|
230
|
+
if (block === undefined) continue
|
|
231
|
+
|
|
232
|
+
const statements = block.getStatements()
|
|
233
|
+
if (statements.length < options.minStatements) continue
|
|
234
|
+
|
|
235
|
+
const structure = extractStructure(block)
|
|
236
|
+
const hash = computeHash(structure)
|
|
237
|
+
const {line, column} = sourceFile.getLineAndColumnAtPos(method.getStart())
|
|
238
|
+
|
|
239
|
+
fingerprints.push({
|
|
240
|
+
hash,
|
|
241
|
+
type: 'method',
|
|
242
|
+
name: `${classDecl.getName()}.${method.getName()}`,
|
|
243
|
+
packageName: pkg.name,
|
|
244
|
+
filePath,
|
|
245
|
+
location: {
|
|
246
|
+
filePath,
|
|
247
|
+
line,
|
|
248
|
+
column,
|
|
249
|
+
endLine: sourceFile.getLineAndColumnAtPos(method.getEnd()).line,
|
|
250
|
+
},
|
|
251
|
+
statementCount: statements.length,
|
|
252
|
+
codePreview: truncateCode(method.getText(), 100),
|
|
253
|
+
structure,
|
|
254
|
+
})
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
// Fingerprint arrow function expressions assigned to variables
|
|
260
|
+
if (options.reportFunctions) {
|
|
261
|
+
for (const varDecl of sourceFile.getVariableDeclarations()) {
|
|
262
|
+
const initializer = varDecl.getInitializer()
|
|
263
|
+
if (initializer === undefined) continue
|
|
264
|
+
|
|
265
|
+
if (initializer.getKind() !== SyntaxKind.ArrowFunction) continue
|
|
266
|
+
|
|
267
|
+
const arrowFunc = initializer.asKind(SyntaxKind.ArrowFunction)
|
|
268
|
+
if (arrowFunc === undefined) continue
|
|
269
|
+
|
|
270
|
+
const body = arrowFunc.getBody()
|
|
271
|
+
if (body === undefined) continue
|
|
272
|
+
|
|
273
|
+
// Only process block bodies, not expression bodies
|
|
274
|
+
if (body.getKind() !== SyntaxKind.Block) continue
|
|
275
|
+
|
|
276
|
+
const block = body.asKind(SyntaxKind.Block)
|
|
277
|
+
if (block === undefined) continue
|
|
278
|
+
|
|
279
|
+
const statements = block.getStatements()
|
|
280
|
+
if (statements.length < options.minStatements) continue
|
|
281
|
+
|
|
282
|
+
const structure = extractStructure(block)
|
|
283
|
+
const hash = computeHash(structure)
|
|
284
|
+
const {line, column} = sourceFile.getLineAndColumnAtPos(varDecl.getStart())
|
|
285
|
+
|
|
286
|
+
fingerprints.push({
|
|
287
|
+
hash,
|
|
288
|
+
type: 'function',
|
|
289
|
+
name: varDecl.getName(),
|
|
290
|
+
packageName: pkg.name,
|
|
291
|
+
filePath,
|
|
292
|
+
location: {
|
|
293
|
+
filePath,
|
|
294
|
+
line,
|
|
295
|
+
column,
|
|
296
|
+
endLine: sourceFile.getLineAndColumnAtPos(varDecl.getEnd()).line,
|
|
297
|
+
},
|
|
298
|
+
statementCount: statements.length,
|
|
299
|
+
codePreview: truncateCode(varDecl.getText(), 100),
|
|
300
|
+
structure,
|
|
301
|
+
})
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
return fingerprints
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
/**
|
|
309
|
+
* Extracts structural elements from a code block for fingerprinting.
|
|
310
|
+
*
|
|
311
|
+
* This creates a normalized representation of code structure,
|
|
312
|
+
* ignoring variable names and formatting.
|
|
313
|
+
*/
|
|
314
|
+
function extractStructure(node: {
|
|
315
|
+
forEachDescendant: (cb: (n: {getKind: () => number}) => void) => void
|
|
316
|
+
}): string[] {
|
|
317
|
+
const structure: string[] = []
|
|
318
|
+
|
|
319
|
+
node.forEachDescendant(descendant => {
|
|
320
|
+
const kind = descendant.getKind()
|
|
321
|
+
|
|
322
|
+
// Include structural elements, exclude identifiers and literals
|
|
323
|
+
switch (kind) {
|
|
324
|
+
case SyntaxKind.IfStatement:
|
|
325
|
+
structure.push('IF')
|
|
326
|
+
break
|
|
327
|
+
case SyntaxKind.ForStatement:
|
|
328
|
+
case SyntaxKind.ForInStatement:
|
|
329
|
+
case SyntaxKind.ForOfStatement:
|
|
330
|
+
structure.push('FOR')
|
|
331
|
+
break
|
|
332
|
+
case SyntaxKind.WhileStatement:
|
|
333
|
+
structure.push('WHILE')
|
|
334
|
+
break
|
|
335
|
+
case SyntaxKind.DoStatement:
|
|
336
|
+
structure.push('DO')
|
|
337
|
+
break
|
|
338
|
+
case SyntaxKind.SwitchStatement:
|
|
339
|
+
structure.push('SWITCH')
|
|
340
|
+
break
|
|
341
|
+
case SyntaxKind.TryStatement:
|
|
342
|
+
structure.push('TRY')
|
|
343
|
+
break
|
|
344
|
+
case SyntaxKind.ReturnStatement:
|
|
345
|
+
structure.push('RETURN')
|
|
346
|
+
break
|
|
347
|
+
case SyntaxKind.ThrowStatement:
|
|
348
|
+
structure.push('THROW')
|
|
349
|
+
break
|
|
350
|
+
case SyntaxKind.AwaitExpression:
|
|
351
|
+
structure.push('AWAIT')
|
|
352
|
+
break
|
|
353
|
+
case SyntaxKind.CallExpression:
|
|
354
|
+
structure.push('CALL')
|
|
355
|
+
break
|
|
356
|
+
case SyntaxKind.PropertyAccessExpression:
|
|
357
|
+
structure.push('PROP')
|
|
358
|
+
break
|
|
359
|
+
case SyntaxKind.ElementAccessExpression:
|
|
360
|
+
structure.push('ELEM')
|
|
361
|
+
break
|
|
362
|
+
case SyntaxKind.BinaryExpression:
|
|
363
|
+
structure.push('BIN')
|
|
364
|
+
break
|
|
365
|
+
case SyntaxKind.ConditionalExpression:
|
|
366
|
+
structure.push('COND')
|
|
367
|
+
break
|
|
368
|
+
case SyntaxKind.ArrayLiteralExpression:
|
|
369
|
+
structure.push('ARR')
|
|
370
|
+
break
|
|
371
|
+
case SyntaxKind.ObjectLiteralExpression:
|
|
372
|
+
structure.push('OBJ')
|
|
373
|
+
break
|
|
374
|
+
case SyntaxKind.NewExpression:
|
|
375
|
+
structure.push('NEW')
|
|
376
|
+
break
|
|
377
|
+
case SyntaxKind.VariableStatement:
|
|
378
|
+
structure.push('VAR')
|
|
379
|
+
break
|
|
380
|
+
case SyntaxKind.ExpressionStatement:
|
|
381
|
+
structure.push('EXPR')
|
|
382
|
+
break
|
|
383
|
+
}
|
|
384
|
+
})
|
|
385
|
+
|
|
386
|
+
return structure
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
/**
|
|
390
|
+
* Computes a hash for a structural fingerprint.
|
|
391
|
+
*/
|
|
392
|
+
function computeHash(structure: readonly string[]): string {
|
|
393
|
+
const str = structure.join(':')
|
|
394
|
+
// Simple hash function for fingerprinting
|
|
395
|
+
let hash = 0
|
|
396
|
+
for (let i = 0; i < str.length; i++) {
|
|
397
|
+
const char = str.charCodeAt(i)
|
|
398
|
+
hash = (hash << 5) - hash + char
|
|
399
|
+
hash = hash & hash // Convert to 32-bit integer
|
|
400
|
+
}
|
|
401
|
+
return hash.toString(36)
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
/**
|
|
405
|
+
* Calculates similarity between two structural fingerprints.
|
|
406
|
+
*
|
|
407
|
+
* Uses Jaccard similarity coefficient.
|
|
408
|
+
*/
|
|
409
|
+
function calculateSimilarity(a: readonly string[], b: readonly string[]): number {
|
|
410
|
+
if (a.length === 0 && b.length === 0) return 1
|
|
411
|
+
if (a.length === 0 || b.length === 0) return 0
|
|
412
|
+
|
|
413
|
+
const setA = new Set(a)
|
|
414
|
+
const setB = new Set(b)
|
|
415
|
+
|
|
416
|
+
let intersection = 0
|
|
417
|
+
for (const item of setA) {
|
|
418
|
+
if (setB.has(item)) intersection++
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
const union = setA.size + setB.size - intersection
|
|
422
|
+
return union === 0 ? 0 : intersection / union
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
/**
|
|
426
|
+
* Finds duplicate patterns in collected fingerprints.
|
|
427
|
+
*/
|
|
428
|
+
function findDuplicates(
|
|
429
|
+
fingerprints: readonly CodeFingerprint[],
|
|
430
|
+
options: Required<DuplicateCodeAnalyzerOptions>,
|
|
431
|
+
): DuplicatePattern[] {
|
|
432
|
+
const duplicates: DuplicatePattern[] = []
|
|
433
|
+
const processed = new Set<string>()
|
|
434
|
+
|
|
435
|
+
// Group by hash for exact matches
|
|
436
|
+
const byHash = new Map<string, CodeFingerprint[]>()
|
|
437
|
+
for (const fp of fingerprints) {
|
|
438
|
+
const existing = byHash.get(fp.hash)
|
|
439
|
+
if (existing === undefined) {
|
|
440
|
+
byHash.set(fp.hash, [fp])
|
|
441
|
+
} else {
|
|
442
|
+
existing.push(fp)
|
|
443
|
+
}
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
// Report exact duplicates
|
|
447
|
+
for (const [hash, occurrences] of byHash) {
|
|
448
|
+
if (occurrences.length < 2) continue
|
|
449
|
+
|
|
450
|
+
// Filter to cross-package only if configured
|
|
451
|
+
const filteredOccurrences = options.crossPackageAnalysis
|
|
452
|
+
? occurrences
|
|
453
|
+
: filterSamePackageOccurrences(occurrences)
|
|
454
|
+
|
|
455
|
+
if (filteredOccurrences.length < 2) continue
|
|
456
|
+
|
|
457
|
+
duplicates.push({
|
|
458
|
+
hash,
|
|
459
|
+
occurrences: filteredOccurrences,
|
|
460
|
+
isExactMatch: true,
|
|
461
|
+
similarity: 1,
|
|
462
|
+
})
|
|
463
|
+
|
|
464
|
+
for (const occ of filteredOccurrences) {
|
|
465
|
+
processed.add(`${occ.filePath}:${occ.location.line}`)
|
|
466
|
+
}
|
|
467
|
+
}
|
|
468
|
+
|
|
469
|
+
// Find similar code (if similarity threshold < 1)
|
|
470
|
+
if (options.similarityThreshold < 1) {
|
|
471
|
+
for (let i = 0; i < fingerprints.length; i++) {
|
|
472
|
+
const fpA = fingerprints[i]
|
|
473
|
+
if (fpA === undefined) continue
|
|
474
|
+
|
|
475
|
+
const keyA = `${fpA.filePath}:${fpA.location.line}`
|
|
476
|
+
if (processed.has(keyA)) continue
|
|
477
|
+
|
|
478
|
+
for (let j = i + 1; j < fingerprints.length; j++) {
|
|
479
|
+
const fpB = fingerprints[j]
|
|
480
|
+
if (fpB === undefined) continue
|
|
481
|
+
|
|
482
|
+
const keyB = `${fpB.filePath}:${fpB.location.line}`
|
|
483
|
+
if (processed.has(keyB)) continue
|
|
484
|
+
|
|
485
|
+
// Skip if same file and close locations
|
|
486
|
+
if (fpA.filePath === fpB.filePath) {
|
|
487
|
+
const lineDiff = Math.abs((fpA.location.line ?? 0) - (fpB.location.line ?? 0))
|
|
488
|
+
if (lineDiff < 20) continue
|
|
489
|
+
}
|
|
490
|
+
|
|
491
|
+
// Skip if not cross-package and configured
|
|
492
|
+
if (!options.crossPackageAnalysis && fpA.packageName === fpB.packageName) {
|
|
493
|
+
continue
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
const similarity = calculateSimilarity(fpA.structure, fpB.structure)
|
|
497
|
+
if (similarity >= options.similarityThreshold && similarity < 1) {
|
|
498
|
+
duplicates.push({
|
|
499
|
+
hash: `similar-${i}-${j}`,
|
|
500
|
+
occurrences: [fpA, fpB],
|
|
501
|
+
isExactMatch: false,
|
|
502
|
+
similarity,
|
|
503
|
+
})
|
|
504
|
+
|
|
505
|
+
processed.add(keyA)
|
|
506
|
+
processed.add(keyB)
|
|
507
|
+
break // Move to next fpA
|
|
508
|
+
}
|
|
509
|
+
}
|
|
510
|
+
}
|
|
511
|
+
}
|
|
512
|
+
|
|
513
|
+
return duplicates
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
function filterSamePackageOccurrences(occurrences: CodeFingerprint[]): CodeFingerprint[] {
|
|
517
|
+
// Keep only if there are duplicates across different files in the same package
|
|
518
|
+
const byFile = new Map<string, CodeFingerprint[]>()
|
|
519
|
+
for (const occ of occurrences) {
|
|
520
|
+
const existing = byFile.get(occ.filePath)
|
|
521
|
+
if (existing === undefined) {
|
|
522
|
+
byFile.set(occ.filePath, [occ])
|
|
523
|
+
} else {
|
|
524
|
+
existing.push(occ)
|
|
525
|
+
}
|
|
526
|
+
}
|
|
527
|
+
|
|
528
|
+
// Only return if duplicates are in different files
|
|
529
|
+
if (byFile.size >= 2) {
|
|
530
|
+
return occurrences
|
|
531
|
+
}
|
|
532
|
+
|
|
533
|
+
return []
|
|
534
|
+
}
|
|
535
|
+
|
|
536
|
+
function createDuplicateIssue(
|
|
537
|
+
duplicate: DuplicatePattern,
|
|
538
|
+
options: Required<DuplicateCodeAnalyzerOptions>,
|
|
539
|
+
): Issue {
|
|
540
|
+
const [first, ...rest] = duplicate.occurrences
|
|
541
|
+
if (first === undefined) {
|
|
542
|
+
// This should never happen, but handle gracefully
|
|
543
|
+
return createIssue({
|
|
544
|
+
id: 'duplicate-code',
|
|
545
|
+
title: 'Duplicate code detected',
|
|
546
|
+
description: 'Duplicate code pattern detected',
|
|
547
|
+
severity: options.exactDuplicateSeverity,
|
|
548
|
+
category: 'performance',
|
|
549
|
+
location: {filePath: 'unknown'},
|
|
550
|
+
})
|
|
551
|
+
}
|
|
552
|
+
|
|
553
|
+
const otherLocations = rest.map(occ => ({
|
|
554
|
+
filePath: occ.filePath,
|
|
555
|
+
line: occ.location.line,
|
|
556
|
+
column: occ.location.column,
|
|
557
|
+
}))
|
|
558
|
+
|
|
559
|
+
const locationStrings = duplicate.occurrences
|
|
560
|
+
.map(occ => `${occ.packageName}:${getFileName(occ.filePath)}:${occ.location.line}`)
|
|
561
|
+
.join(', ')
|
|
562
|
+
|
|
563
|
+
const typeLabel =
|
|
564
|
+
first.type === 'function' ? 'function' : first.type === 'method' ? 'method' : 'code block'
|
|
565
|
+
|
|
566
|
+
const title = duplicate.isExactMatch
|
|
567
|
+
? `Duplicate ${typeLabel}: ${first.name ?? 'anonymous'}`
|
|
568
|
+
: `Similar ${typeLabel}s (${Math.round(duplicate.similarity * 100)}% match)`
|
|
569
|
+
|
|
570
|
+
return createIssue({
|
|
571
|
+
id: duplicate.isExactMatch ? 'exact-duplicate' : 'similar-code',
|
|
572
|
+
title,
|
|
573
|
+
description:
|
|
574
|
+
`${duplicate.isExactMatch ? 'Identical' : 'Similar'} ${typeLabel} found in ${duplicate.occurrences.length} locations: ${locationStrings}. ` +
|
|
575
|
+
`This increases bundle size and maintenance burden.`,
|
|
576
|
+
severity: duplicate.isExactMatch ? options.exactDuplicateSeverity : options.similarCodeSeverity,
|
|
577
|
+
category: 'performance',
|
|
578
|
+
location: first.location,
|
|
579
|
+
relatedLocations: otherLocations,
|
|
580
|
+
suggestion:
|
|
581
|
+
`Consider extracting this ${typeLabel} into a shared utility module that can be imported by all usages. ` +
|
|
582
|
+
`This reduces bundle size through deduplication and centralizes maintenance.`,
|
|
583
|
+
metadata: {
|
|
584
|
+
duplicateHash: duplicate.hash,
|
|
585
|
+
similarity: duplicate.similarity,
|
|
586
|
+
isExactMatch: duplicate.isExactMatch,
|
|
587
|
+
occurrenceCount: duplicate.occurrences.length,
|
|
588
|
+
statementCount: first.statementCount,
|
|
589
|
+
codePreview: first.codePreview,
|
|
590
|
+
},
|
|
591
|
+
})
|
|
592
|
+
}
|
|
593
|
+
|
|
594
|
+
function truncateCode(code: string, maxLength: number): string {
|
|
595
|
+
if (code.length <= maxLength) return code
|
|
596
|
+
return `${code.slice(0, maxLength)}...`
|
|
597
|
+
}
|
|
598
|
+
|
|
599
|
+
function getFileName(filePath: string): string {
|
|
600
|
+
return filePath.split('/').pop() ?? filePath
|
|
601
|
+
}
|
|
602
|
+
|
|
603
|
+
function filterSourceFiles(
|
|
604
|
+
sourceFiles: readonly string[],
|
|
605
|
+
excludePatterns: readonly string[],
|
|
606
|
+
): string[] {
|
|
607
|
+
return sourceFiles.filter(filePath => {
|
|
608
|
+
const fileName = filePath.split('/').pop() ?? ''
|
|
609
|
+
|
|
610
|
+
return !excludePatterns.some(pattern => {
|
|
611
|
+
if (pattern.includes('**')) {
|
|
612
|
+
const regex = patternToRegex(pattern)
|
|
613
|
+
return regex.test(filePath)
|
|
614
|
+
}
|
|
615
|
+
return fileName.includes(pattern.replaceAll('*', ''))
|
|
616
|
+
})
|
|
617
|
+
})
|
|
618
|
+
}
|
|
619
|
+
|
|
620
|
+
function patternToRegex(pattern: string): RegExp {
|
|
621
|
+
const escaped = pattern
|
|
622
|
+
.replaceAll('.', String.raw`\.`)
|
|
623
|
+
.replaceAll('**', '.*')
|
|
624
|
+
.replaceAll('*', '[^/]*')
|
|
625
|
+
return new RegExp(escaped)
|
|
626
|
+
}
|