clone-alert 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +125 -16
- package/dist/baseline.d.ts +20 -0
- package/dist/baseline.js +105 -0
- package/dist/cli.d.ts +8 -2
- package/dist/cli.js +369 -68
- package/dist/core.d.ts +2 -0
- package/dist/core.js +19 -12
- package/dist/files.d.ts +2 -0
- package/dist/files.js +236 -0
- package/dist/index.d.ts +21 -0
- package/dist/index.js +49 -0
- package/dist/stats.d.ts +28 -0
- package/dist/stats.js +37 -0
- package/package.json +3 -3
- package/scripts/compare-pmd-cpd.mjs +0 -565
package/dist/cli.js
CHANGED
|
@@ -34,12 +34,16 @@ var __importStar = (this && this.__importStar) || (function () {
|
|
|
34
34
|
};
|
|
35
35
|
})();
|
|
36
36
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
37
|
-
exports.collectFiles =
|
|
37
|
+
exports.collectFiles = void 0;
|
|
38
38
|
exports.main = main;
|
|
39
39
|
exports.parseArgs = parseArgs;
|
|
40
40
|
const fs = __importStar(require("node:fs"));
|
|
41
41
|
const path = __importStar(require("node:path"));
|
|
42
|
+
const baseline_1 = require("./baseline");
|
|
43
|
+
const files_1 = require("./files");
|
|
44
|
+
Object.defineProperty(exports, "collectFiles", { enumerable: true, get: function () { return files_1.collectFiles; } });
|
|
42
45
|
const index_1 = require("./index");
|
|
46
|
+
const stats_1 = require("./stats");
|
|
43
47
|
const DEFAULT_EXTENSIONS = [
|
|
44
48
|
'.ts',
|
|
45
49
|
'.tsx',
|
|
@@ -60,11 +64,25 @@ PMD CPD-like copy-paste detector for TS/JS and common frontend templates.
|
|
|
60
64
|
|
|
61
65
|
Options:
|
|
62
66
|
--files <path[,path...]> Files or directories to scan. Can be repeated.
|
|
67
|
+
--file-list <path> Read newline-separated paths to scan from a file.
|
|
63
68
|
--minimum-tokens <n> Minimum duplicated token span. Default: 50.
|
|
64
69
|
--minimum-tile-size <n> Alias for --minimum-tokens.
|
|
65
|
-
--format <
|
|
70
|
+
--format <fmt> Report format: text (default), xml, json, sarif,
|
|
71
|
+
csv, csv_with_linecount_per_file, markdown, ai.
|
|
72
|
+
sarif targets GitHub Code Scanning; xml/json/
|
|
73
|
+
markdown embed the duplicated code; ai is a
|
|
74
|
+
compact, token-frugal listing for LLM pipelines;
|
|
75
|
+
shields prints a shields.io endpoint JSON for a
|
|
76
|
+
duplication badge.
|
|
66
77
|
--extensions <ext[,ext...]> Extensions to include. Default: ts,tsx,js,jsx,vue,svelte,html.
|
|
67
78
|
--exclude <glob[,glob...]> Exclude files or directories. Can be repeated.
|
|
79
|
+
--non-recursive Scan only the top level of each directory.
|
|
80
|
+
--gitignore Skip files ignored by .gitignore (nested files
|
|
81
|
+
honored, within the git repo). Default.
|
|
82
|
+
--no-gitignore Scan files even if .gitignore would ignore them.
|
|
83
|
+
--skip-duplicate-files Skip files with the same name and byte length.
|
|
84
|
+
--skip-lexical-errors Skip files that fail to tokenize instead of
|
|
85
|
+
aborting the whole run.
|
|
68
86
|
--ignore-identifiers Normalize identifiers.
|
|
69
87
|
--no-ignore-identifiers Compare exact identifiers. Default.
|
|
70
88
|
--ignore-literals Normalize literals.
|
|
@@ -87,13 +105,23 @@ Options:
|
|
|
87
105
|
alone (handy for a code-only threshold pass).
|
|
88
106
|
--angular-inline-templates Also scan Angular @Component inline templates.
|
|
89
107
|
--skip-angular-inline-templates Do not scan inline Angular templates. Default.
|
|
90
|
-
--fail-on-violation Exit with code 4 when duplications are found.
|
|
108
|
+
--fail-on-violation Exit with code 4 when duplications are found. Default.
|
|
109
|
+
--no-fail-on-violation Always exit 0 even when duplications are found.
|
|
110
|
+
--baseline <path> Ignore duplications recorded in this baseline
|
|
111
|
+
file; report and fail only on new ones. Match is
|
|
112
|
+
by content fingerprint, so accepted clones stay
|
|
113
|
+
suppressed even after the code moves.
|
|
114
|
+
--update-baseline Write/regenerate the baseline file at --baseline
|
|
115
|
+
with all current duplications, then exit 0. Run
|
|
116
|
+
this once to adopt the existing debt.
|
|
91
117
|
-h, --help Show this help.
|
|
92
118
|
-V, --version Show version.
|
|
93
119
|
|
|
94
120
|
Examples:
|
|
95
121
|
clone-alert --minimum-tokens 50 --files src
|
|
96
122
|
clone-alert --minimum-tokens 30 --format xml src test
|
|
123
|
+
clone-alert src --baseline .clone-alert-baseline.json --update-baseline
|
|
124
|
+
clone-alert src --baseline .clone-alert-baseline.json --fail-on-violation
|
|
97
125
|
`;
|
|
98
126
|
function main(argv) {
|
|
99
127
|
let options;
|
|
@@ -110,9 +138,13 @@ function main(argv) {
|
|
|
110
138
|
console.error("Try 'clone-alert --help' for more information.");
|
|
111
139
|
return 2;
|
|
112
140
|
}
|
|
141
|
+
if (options.updateBaseline && !options.baselinePath) {
|
|
142
|
+
console.error('clone-alert: --update-baseline requires --baseline <path>');
|
|
143
|
+
return 2;
|
|
144
|
+
}
|
|
113
145
|
let files;
|
|
114
146
|
try {
|
|
115
|
-
files = collectFiles(options.paths, options.extensions, options.excludePatterns);
|
|
147
|
+
files = (0, files_1.collectFiles)(options.paths, options.extensions, options.excludePatterns, options.respectGitignore, options.nonRecursive);
|
|
116
148
|
}
|
|
117
149
|
catch (error) {
|
|
118
150
|
console.error(`clone-alert: ${error.message}`);
|
|
@@ -123,17 +155,76 @@ function main(argv) {
|
|
|
123
155
|
return 2;
|
|
124
156
|
}
|
|
125
157
|
const cpd = new index_1.Cpd(options);
|
|
158
|
+
// PMD's --skip-duplicate-files keys on basename + byte length, not content.
|
|
159
|
+
const dupKeys = options.skipDuplicateFiles ? new Set() : null;
|
|
126
160
|
for (const file of files) {
|
|
127
|
-
|
|
161
|
+
if (dupKeys) {
|
|
162
|
+
const key = `${path.basename(file)}_${fs.statSync(file).size}`;
|
|
163
|
+
if (dupKeys.has(key))
|
|
164
|
+
continue;
|
|
165
|
+
dupKeys.add(key);
|
|
166
|
+
}
|
|
167
|
+
try {
|
|
168
|
+
cpd.addPath(file);
|
|
169
|
+
}
|
|
170
|
+
catch (error) {
|
|
171
|
+
if (options.skipLexicalErrors) {
|
|
172
|
+
console.error(`clone-alert: skipping ${file}: ${error.message}`);
|
|
173
|
+
continue;
|
|
174
|
+
}
|
|
175
|
+
console.error(`clone-alert: ${error.message}`);
|
|
176
|
+
console.error('clone-alert: pass --skip-lexical-errors to skip files that fail to tokenize.');
|
|
177
|
+
return 2;
|
|
178
|
+
}
|
|
128
179
|
}
|
|
129
180
|
const matches = cpd.run();
|
|
181
|
+
if (options.baselinePath) {
|
|
182
|
+
try {
|
|
183
|
+
return runWithBaseline(options, cpd, matches);
|
|
184
|
+
}
|
|
185
|
+
catch (error) {
|
|
186
|
+
console.error(`clone-alert: ${error.message}`);
|
|
187
|
+
return 2;
|
|
188
|
+
}
|
|
189
|
+
}
|
|
130
190
|
process.stdout.write(formatReport(options.format, cpd, matches));
|
|
131
191
|
return options.failOnViolation && matches.length > 0 ? 4 : 0;
|
|
132
192
|
}
|
|
193
|
+
// Baseline handling. Detection is already done; this only writes (update) or
|
|
194
|
+
// filters (read) the match set by content fingerprint, so it never touches the
|
|
195
|
+
// hot path — cost is O(matches), not O(tokens).
|
|
196
|
+
function runWithBaseline(options, cpd, matches) {
|
|
197
|
+
const baselinePath = options.baselinePath;
|
|
198
|
+
if (options.updateBaseline) {
|
|
199
|
+
(0, baseline_1.writeBaseline)(baselinePath, matches.map((match) => toCloneRecord(match, cpd)));
|
|
200
|
+
console.error(`clone-alert: wrote baseline with ${matches.length} duplication(s) to ${baselinePath}`);
|
|
201
|
+
return 0;
|
|
202
|
+
}
|
|
203
|
+
const known = (0, baseline_1.readBaseline)(baselinePath);
|
|
204
|
+
const fresh = matches.filter((match) => !known.has((0, baseline_1.fingerprint)(cpd, match)));
|
|
205
|
+
const suppressed = matches.length - fresh.length;
|
|
206
|
+
if (suppressed > 0) {
|
|
207
|
+
console.error(`clone-alert: ${suppressed} known duplication(s) suppressed by baseline`);
|
|
208
|
+
}
|
|
209
|
+
process.stdout.write(formatReport(options.format, cpd, fresh));
|
|
210
|
+
return options.failOnViolation && fresh.length > 0 ? 4 : 0;
|
|
211
|
+
}
|
|
212
|
+
// Informational context for a baseline entry: token count plus the involved file
|
|
213
|
+
// paths relative to cwd (so the file is portable across machines/CI). Line/column
|
|
214
|
+
// are intentionally left out — the fingerprint already pins the content, and
|
|
215
|
+
// omitting them keeps the baseline diff stable when code moves.
|
|
216
|
+
function toCloneRecord(match, cpd) {
|
|
217
|
+
const files = Array.from(new Set(match.marks.map((mark) => (0, files_1.toPosix)(path.relative(process.cwd(), mark.token.file))))).sort();
|
|
218
|
+
return { fingerprint: (0, baseline_1.fingerprint)(cpd, match), tokens: match.tokenCount, files };
|
|
219
|
+
}
|
|
133
220
|
function parseArgs(argv) {
|
|
134
221
|
const paths = [];
|
|
135
222
|
const extensions = new Set(DEFAULT_EXTENSIONS);
|
|
136
223
|
const excludePatterns = [];
|
|
224
|
+
let respectGitignore = true;
|
|
225
|
+
let nonRecursive = false;
|
|
226
|
+
let skipDuplicateFiles = false;
|
|
227
|
+
let skipLexicalErrors = false;
|
|
137
228
|
let minTileSize = 50;
|
|
138
229
|
let ignoreIdentifiers = false;
|
|
139
230
|
let ignoreLiterals = false;
|
|
@@ -142,7 +233,9 @@ function parseArgs(argv) {
|
|
|
142
233
|
let vueTemplates = true;
|
|
143
234
|
let angularInlineTemplates = false;
|
|
144
235
|
let format = 'text';
|
|
145
|
-
let failOnViolation =
|
|
236
|
+
let failOnViolation = true;
|
|
237
|
+
let baselinePath;
|
|
238
|
+
let updateBaseline = false;
|
|
146
239
|
for (let i = 0; i < argv.length; i++) {
|
|
147
240
|
const arg = argv[i];
|
|
148
241
|
if (arg === '-h' || arg === '--help') {
|
|
@@ -161,6 +254,14 @@ function parseArgs(argv) {
|
|
|
161
254
|
paths.push(...splitList(arg.slice('--files='.length)));
|
|
162
255
|
continue;
|
|
163
256
|
}
|
|
257
|
+
if (arg === '--file-list') {
|
|
258
|
+
paths.push(...readFileList(requireValue(argv, ++i, arg)));
|
|
259
|
+
continue;
|
|
260
|
+
}
|
|
261
|
+
if (arg.startsWith('--file-list=')) {
|
|
262
|
+
paths.push(...readFileList(arg.slice('--file-list='.length)));
|
|
263
|
+
continue;
|
|
264
|
+
}
|
|
164
265
|
if (arg === '--minimum-tokens' || arg === '--minimum-tile-size') {
|
|
165
266
|
minTileSize = parsePositiveInteger(requireValue(argv, ++i, arg), arg);
|
|
166
267
|
continue;
|
|
@@ -197,6 +298,26 @@ function parseArgs(argv) {
|
|
|
197
298
|
excludePatterns.push(...splitList(arg.slice('--exclude='.length)));
|
|
198
299
|
continue;
|
|
199
300
|
}
|
|
301
|
+
if (arg === '--gitignore') {
|
|
302
|
+
respectGitignore = true;
|
|
303
|
+
continue;
|
|
304
|
+
}
|
|
305
|
+
if (arg === '--no-gitignore') {
|
|
306
|
+
respectGitignore = false;
|
|
307
|
+
continue;
|
|
308
|
+
}
|
|
309
|
+
if (arg === '--non-recursive') {
|
|
310
|
+
nonRecursive = true;
|
|
311
|
+
continue;
|
|
312
|
+
}
|
|
313
|
+
if (arg === '--skip-duplicate-files') {
|
|
314
|
+
skipDuplicateFiles = true;
|
|
315
|
+
continue;
|
|
316
|
+
}
|
|
317
|
+
if (arg === '--skip-lexical-errors') {
|
|
318
|
+
skipLexicalErrors = true;
|
|
319
|
+
continue;
|
|
320
|
+
}
|
|
200
321
|
if (arg === '--ignore-identifiers') {
|
|
201
322
|
ignoreIdentifiers = true;
|
|
202
323
|
continue;
|
|
@@ -249,6 +370,22 @@ function parseArgs(argv) {
|
|
|
249
370
|
failOnViolation = true;
|
|
250
371
|
continue;
|
|
251
372
|
}
|
|
373
|
+
if (arg === '--no-fail-on-violation') {
|
|
374
|
+
failOnViolation = false;
|
|
375
|
+
continue;
|
|
376
|
+
}
|
|
377
|
+
if (arg === '--baseline') {
|
|
378
|
+
baselinePath = requireValue(argv, ++i, arg);
|
|
379
|
+
continue;
|
|
380
|
+
}
|
|
381
|
+
if (arg.startsWith('--baseline=')) {
|
|
382
|
+
baselinePath = arg.slice('--baseline='.length);
|
|
383
|
+
continue;
|
|
384
|
+
}
|
|
385
|
+
if (arg === '--update-baseline') {
|
|
386
|
+
updateBaseline = true;
|
|
387
|
+
continue;
|
|
388
|
+
}
|
|
252
389
|
if (arg.startsWith('-')) {
|
|
253
390
|
throw new Error(`unknown option: ${arg}`);
|
|
254
391
|
}
|
|
@@ -258,6 +395,10 @@ function parseArgs(argv) {
|
|
|
258
395
|
paths,
|
|
259
396
|
extensions,
|
|
260
397
|
excludePatterns,
|
|
398
|
+
respectGitignore,
|
|
399
|
+
nonRecursive,
|
|
400
|
+
skipDuplicateFiles,
|
|
401
|
+
skipLexicalErrors,
|
|
261
402
|
minTileSize,
|
|
262
403
|
ignoreIdentifiers,
|
|
263
404
|
ignoreLiterals,
|
|
@@ -267,6 +408,8 @@ function parseArgs(argv) {
|
|
|
267
408
|
angularInlineTemplates,
|
|
268
409
|
format,
|
|
269
410
|
failOnViolation,
|
|
411
|
+
baselinePath,
|
|
412
|
+
updateBaseline,
|
|
270
413
|
};
|
|
271
414
|
}
|
|
272
415
|
function requireValue(argv, index, option) {
|
|
@@ -289,11 +432,35 @@ function parsePositiveInteger(value, option) {
|
|
|
289
432
|
}
|
|
290
433
|
return parsed;
|
|
291
434
|
}
|
|
435
|
+
const REPORT_FORMATS = [
|
|
436
|
+
'text',
|
|
437
|
+
'xml',
|
|
438
|
+
'json',
|
|
439
|
+
'sarif',
|
|
440
|
+
'csv',
|
|
441
|
+
'csv_with_linecount_per_file',
|
|
442
|
+
'markdown',
|
|
443
|
+
'ai',
|
|
444
|
+
'shields',
|
|
445
|
+
];
|
|
292
446
|
function parseFormat(value) {
|
|
293
|
-
if (value
|
|
447
|
+
if (REPORT_FORMATS.includes(value)) {
|
|
294
448
|
return value;
|
|
295
449
|
}
|
|
296
|
-
throw new Error(
|
|
450
|
+
throw new Error(`--format must be one of: ${REPORT_FORMATS.join(', ')}`);
|
|
451
|
+
}
|
|
452
|
+
function readFileList(listPath) {
|
|
453
|
+
let contents;
|
|
454
|
+
try {
|
|
455
|
+
contents = fs.readFileSync(listPath, 'utf-8');
|
|
456
|
+
}
|
|
457
|
+
catch {
|
|
458
|
+
throw new Error(`--file-list not readable: ${listPath}`);
|
|
459
|
+
}
|
|
460
|
+
return contents
|
|
461
|
+
.split('\n')
|
|
462
|
+
.map((line) => line.trim())
|
|
463
|
+
.filter(Boolean);
|
|
297
464
|
}
|
|
298
465
|
function replaceExtensions(target, value) {
|
|
299
466
|
target.clear();
|
|
@@ -301,41 +468,6 @@ function replaceExtensions(target, value) {
|
|
|
301
468
|
target.add(ext.startsWith('.') ? ext.toLowerCase() : `.${ext.toLowerCase()}`);
|
|
302
469
|
}
|
|
303
470
|
}
|
|
304
|
-
function collectFiles(paths, extensions, excludePatterns = []) {
|
|
305
|
-
const files = [];
|
|
306
|
-
const seen = new Set();
|
|
307
|
-
const excludeMatchers = excludePatterns.map((pattern) => globToRegExp(toPosix(pattern)));
|
|
308
|
-
const visit = (entry) => {
|
|
309
|
-
const full = path.resolve(entry);
|
|
310
|
-
if (!fs.existsSync(full)) {
|
|
311
|
-
throw new Error(`path does not exist: ${entry}`);
|
|
312
|
-
}
|
|
313
|
-
const stat = fs.statSync(full);
|
|
314
|
-
if (stat.isDirectory()) {
|
|
315
|
-
if (isExcluded(`${full}${path.sep}`, excludeMatchers))
|
|
316
|
-
return;
|
|
317
|
-
for (const child of fs.readdirSync(full).sort()) {
|
|
318
|
-
if (child === 'node_modules' || child === '.git' || child === 'dist')
|
|
319
|
-
continue;
|
|
320
|
-
visit(path.join(full, child));
|
|
321
|
-
}
|
|
322
|
-
return;
|
|
323
|
-
}
|
|
324
|
-
if (!stat.isFile())
|
|
325
|
-
return;
|
|
326
|
-
if (isExcluded(full, excludeMatchers))
|
|
327
|
-
return;
|
|
328
|
-
if (!extensions.has(path.extname(full).toLowerCase()))
|
|
329
|
-
return;
|
|
330
|
-
if (seen.has(full))
|
|
331
|
-
return;
|
|
332
|
-
seen.add(full);
|
|
333
|
-
files.push(full);
|
|
334
|
-
};
|
|
335
|
-
for (const entry of paths)
|
|
336
|
-
visit(entry);
|
|
337
|
-
return files;
|
|
338
|
-
}
|
|
339
471
|
function formatReport(format, cpd, matches) {
|
|
340
472
|
if (format === 'json') {
|
|
341
473
|
return `${JSON.stringify({ duplicates: matches.map((match) => matchToJson(match, cpd)) }, null, 2)}\n`;
|
|
@@ -343,13 +475,132 @@ function formatReport(format, cpd, matches) {
|
|
|
343
475
|
if (format === 'xml') {
|
|
344
476
|
return formatXml(matches, cpd);
|
|
345
477
|
}
|
|
346
|
-
|
|
478
|
+
if (format === 'sarif') {
|
|
479
|
+
return formatSarif(matches, cpd);
|
|
480
|
+
}
|
|
481
|
+
if (format === 'csv') {
|
|
482
|
+
return formatCsv(matches, cpd);
|
|
483
|
+
}
|
|
484
|
+
if (format === 'csv_with_linecount_per_file') {
|
|
485
|
+
return formatCsvWithLinecountPerFile(matches, cpd);
|
|
486
|
+
}
|
|
487
|
+
if (format === 'markdown') {
|
|
488
|
+
return formatMarkdown(matches, cpd);
|
|
489
|
+
}
|
|
490
|
+
if (format === 'ai') {
|
|
491
|
+
return formatAi(matches, cpd);
|
|
492
|
+
}
|
|
493
|
+
if (format === 'shields') {
|
|
494
|
+
return formatShields(matches, cpd);
|
|
495
|
+
}
|
|
496
|
+
const text = cpd.report(matches);
|
|
497
|
+
if (matches.length === 0) {
|
|
498
|
+
return text;
|
|
499
|
+
}
|
|
500
|
+
// Footer with the aggregate duplication stats, like jscpd's summary line.
|
|
501
|
+
return `${text}${(0, stats_1.formatStatsLine)((0, stats_1.computeStats)(matches, cpd))}\n`;
|
|
502
|
+
}
|
|
503
|
+
// Mirrors PMD's CSVRenderer: a `lines,tokens,occurrences` header, then per
|
|
504
|
+
// duplication `<lines>,<tokens>,<occurrences>` followed by `<startLine>,"<file>"`
|
|
505
|
+
// for each occurrence.
|
|
506
|
+
function formatCsv(matches, cpd) {
|
|
507
|
+
const rows = ['lines,tokens,occurrences'];
|
|
508
|
+
for (const match of matches) {
|
|
509
|
+
const duplicate = matchToJson(match, cpd);
|
|
510
|
+
const cells = [String(duplicate.lines), String(match.tokenCount), String(match.markCount)];
|
|
511
|
+
for (const mark of match.marks) {
|
|
512
|
+
const location = cpd.locationForMark(mark, match.tokenCount);
|
|
513
|
+
cells.push(String(location.startLine), `"${location.path}"`);
|
|
514
|
+
}
|
|
515
|
+
rows.push(cells.join(','));
|
|
516
|
+
}
|
|
517
|
+
return `${rows.join('\n')}\n`;
|
|
518
|
+
}
|
|
519
|
+
// Mirrors PMD's CSVWithLinecountPerFileRenderer: no header; per duplication
|
|
520
|
+
// `<occurrences>,<tokens>` then `<startLine>,<lineCount>,"<file>"` per occurrence.
|
|
521
|
+
function formatCsvWithLinecountPerFile(matches, cpd) {
|
|
522
|
+
const rows = [];
|
|
523
|
+
for (const match of matches) {
|
|
524
|
+
const cells = [String(match.markCount), String(match.tokenCount)];
|
|
525
|
+
for (const mark of match.marks) {
|
|
526
|
+
const location = cpd.locationForMark(mark, match.tokenCount);
|
|
527
|
+
const lineCount = location.endLine - location.startLine + 1;
|
|
528
|
+
cells.push(String(location.startLine), String(lineCount), `"${location.path}"`);
|
|
529
|
+
}
|
|
530
|
+
rows.push(cells.join(','));
|
|
531
|
+
}
|
|
532
|
+
return `${rows.join('\n')}\n`;
|
|
533
|
+
}
|
|
534
|
+
// SARIF 2.1.0 for GitHub Code Scanning (`github/codeql-action/upload-sarif`).
|
|
535
|
+
// One result per duplication, anchored at its first occurrence; the other
|
|
536
|
+
// occurrences are relatedLocations. URIs are relative to cwd so GitHub maps them
|
|
537
|
+
// to the checked-out tree. The content fingerprint goes into partialFingerprints,
|
|
538
|
+
// so GitHub tracks an alert across commits even when the clone moves.
|
|
539
|
+
function formatSarif(matches, cpd) {
|
|
540
|
+
const cwd = process.cwd();
|
|
541
|
+
const physicalLocation = (location) => ({
|
|
542
|
+
physicalLocation: {
|
|
543
|
+
artifactLocation: { uri: (0, files_1.toPosix)(path.relative(cwd, location.path)) },
|
|
544
|
+
region: {
|
|
545
|
+
startLine: location.startLine,
|
|
546
|
+
startColumn: location.startColumn,
|
|
547
|
+
endLine: location.endLine,
|
|
548
|
+
endColumn: location.endColumn,
|
|
549
|
+
},
|
|
550
|
+
},
|
|
551
|
+
});
|
|
552
|
+
const results = matches.map((match) => {
|
|
553
|
+
const [primary, ...others] = match.marks.map((mark) => cpd.locationForMark(mark, match.tokenCount));
|
|
554
|
+
const elsewhere = others
|
|
555
|
+
.map((location) => `${(0, files_1.toPosix)(path.relative(cwd, location.path))}:${location.startLine}`)
|
|
556
|
+
.join(', ');
|
|
557
|
+
return {
|
|
558
|
+
ruleId: 'duplication',
|
|
559
|
+
ruleIndex: 0,
|
|
560
|
+
level: 'warning',
|
|
561
|
+
message: {
|
|
562
|
+
text: `Found a ${match.tokenCount} token (${match.markCount} occurrences) duplication${elsewhere ? `; also at ${elsewhere}` : ''}.`,
|
|
563
|
+
},
|
|
564
|
+
locations: [physicalLocation(primary)],
|
|
565
|
+
relatedLocations: others.map((location, index) => ({ id: index, ...physicalLocation(location) })),
|
|
566
|
+
partialFingerprints: { 'cloneAlert/contentV1': (0, baseline_1.fingerprint)(cpd, match) },
|
|
567
|
+
};
|
|
568
|
+
});
|
|
569
|
+
const log = {
|
|
570
|
+
$schema: 'https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json',
|
|
571
|
+
version: '2.1.0',
|
|
572
|
+
runs: [
|
|
573
|
+
{
|
|
574
|
+
tool: {
|
|
575
|
+
driver: {
|
|
576
|
+
name: 'clone-alert',
|
|
577
|
+
informationUri: 'https://github.com/BaryshevRS/clone-alert',
|
|
578
|
+
version: readVersion(),
|
|
579
|
+
rules: [
|
|
580
|
+
{
|
|
581
|
+
id: 'duplication',
|
|
582
|
+
name: 'Duplication',
|
|
583
|
+
shortDescription: { text: 'Duplicated code' },
|
|
584
|
+
fullDescription: { text: 'A span of duplicated tokens detected by clone-alert.' },
|
|
585
|
+
helpUri: 'https://github.com/BaryshevRS/clone-alert#readme',
|
|
586
|
+
defaultConfiguration: { level: 'warning' },
|
|
587
|
+
},
|
|
588
|
+
],
|
|
589
|
+
},
|
|
590
|
+
},
|
|
591
|
+
results,
|
|
592
|
+
},
|
|
593
|
+
],
|
|
594
|
+
};
|
|
595
|
+
return `${JSON.stringify(log, null, 2)}\n`;
|
|
347
596
|
}
|
|
348
597
|
function matchToJson(match, cpd) {
|
|
349
598
|
const files = match.marks.map((mark) => cpd.locationForMark(mark, match.tokenCount));
|
|
350
599
|
return {
|
|
351
600
|
lines: Math.max(0, ...files.map((file) => file.endLine - file.startLine + 1)),
|
|
352
601
|
tokens: match.tokenCount,
|
|
602
|
+
// The duplicated source itself, like jscpd's `fragment` field.
|
|
603
|
+
fragment: cpd.codeFragment(match),
|
|
353
604
|
files,
|
|
354
605
|
};
|
|
355
606
|
}
|
|
@@ -362,41 +613,91 @@ function formatXml(matches, cpd) {
|
|
|
362
613
|
const location = cpd.locationForMark(mark, match.tokenCount);
|
|
363
614
|
lines.push(` <file path="${escapeXml(location.path)}" line="${location.startLine}" endline="${location.endLine}" column="${location.startColumn}" endcolumn="${location.endColumn}" />`);
|
|
364
615
|
}
|
|
616
|
+
// Like PMD's XMLRenderer: one <codefragment> per duplication with the source
|
|
617
|
+
// slice of the first occurrence, after the <file> elements.
|
|
618
|
+
lines.push(` <codefragment><![CDATA[${escapeCdata(cpd.codeFragment(match))}]]></codefragment>`);
|
|
365
619
|
lines.push(' </duplication>');
|
|
366
620
|
}
|
|
367
621
|
lines.push('</pmd-cpd>');
|
|
368
622
|
return `${lines.join('\n')}\n`;
|
|
369
623
|
}
|
|
370
|
-
function
|
|
371
|
-
|
|
372
|
-
return matchers.some((matcher) => matcher.test(normalized));
|
|
624
|
+
function escapeXml(value) {
|
|
625
|
+
return value.replace(/&/g, '&').replace(/"/g, '"').replace(/</g, '<').replace(/>/g, '>');
|
|
373
626
|
}
|
|
374
|
-
|
|
375
|
-
|
|
627
|
+
// CDATA cannot contain the `]]>` terminator; split it across two sections so the
|
|
628
|
+
// embedded source survives verbatim.
|
|
629
|
+
function escapeCdata(value) {
|
|
630
|
+
return value.replace(/]]>/g, ']]]]><![CDATA[>');
|
|
376
631
|
}
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
632
|
+
// jscpd-style markdown: a title, a one-line summary, then per duplication two
|
|
633
|
+
// occurrence locations and a fenced code block with the duplicated source.
|
|
634
|
+
function formatMarkdown(matches, cpd) {
|
|
635
|
+
const out = ['# Copy/paste detection report', ''];
|
|
636
|
+
if (matches.length === 0) {
|
|
637
|
+
out.push('No duplicates found.', '');
|
|
638
|
+
return `${out.join('\n')}\n`;
|
|
639
|
+
}
|
|
640
|
+
out.push(`> Found ${matches.length} ${matches.length === 1 ? 'clone' : 'clones'}.`, '');
|
|
641
|
+
for (const match of matches) {
|
|
642
|
+
const locations = match.marks.map((mark) => cpd.locationForMark(mark, match.tokenCount));
|
|
643
|
+
out.push(`## Clone (${match.tokenCount} tokens, ${match.markCount} occurrences)`, '');
|
|
644
|
+
for (const location of locations) {
|
|
645
|
+
out.push(` - \`${(0, files_1.toPosix)(location.path)}\` [${location.startLine}:${location.startColumn} - ${location.endLine}:${location.endColumn}]`);
|
|
390
646
|
}
|
|
391
|
-
|
|
647
|
+
out.push('', '```', cpd.codeFragment(match), '```', '');
|
|
392
648
|
}
|
|
393
|
-
return
|
|
649
|
+
return `${out.join('\n')}\n`;
|
|
394
650
|
}
|
|
395
|
-
|
|
396
|
-
|
|
651
|
+
// Compact, token-frugal listing for LLM/agent pipelines, modelled on jscpd's `ai`
|
|
652
|
+
// reporter: one line per duplication (occurrences joined by ` ~ `), a shared
|
|
653
|
+
// directory prefix stripped to save tokens, then a `---` and the stats summary.
|
|
654
|
+
// No code, no colors.
|
|
655
|
+
function formatAi(matches, cpd) {
|
|
656
|
+
if (matches.length === 0) {
|
|
657
|
+
return '';
|
|
658
|
+
}
|
|
659
|
+
const locationsByMatch = matches.map((match) => match.marks.map((mark) => cpd.locationForMark(mark, match.tokenCount)));
|
|
660
|
+
const prefix = commonDirPrefix(locationsByMatch.flat().map((location) => (0, files_1.toPosix)(location.path)));
|
|
661
|
+
const lines = locationsByMatch.map((locations) => locations
|
|
662
|
+
.map((location) => `${(0, files_1.toPosix)(location.path).slice(prefix.length)}:${location.startLine}-${location.endLine}`)
|
|
663
|
+
.join(' ~ '));
|
|
664
|
+
lines.push('---', (0, stats_1.formatStatsLine)((0, stats_1.computeStats)(matches, cpd)));
|
|
665
|
+
return `${lines.join('\n')}\n`;
|
|
397
666
|
}
|
|
398
|
-
|
|
399
|
-
|
|
667
|
+
// A shields.io endpoint payload (https://shields.io/badges/endpoint-badge):
|
|
668
|
+
// host this JSON anywhere and point `img.shields.io/endpoint?url=...` at it, so
|
|
669
|
+
// shields renders the badge. Marketing trinket, not a gate: color from a fixed
|
|
670
|
+
// scale rewarding near-zero, with zero clones as the bright-green hero state.
|
|
671
|
+
function formatShields(matches, cpd) {
|
|
672
|
+
const stats = (0, stats_1.computeStats)(matches, cpd);
|
|
673
|
+
const message = stats.clones === 0 ? '0 clones' : `${stats.percentage.toFixed(1)}%`;
|
|
674
|
+
const color = stats.clones === 0
|
|
675
|
+
? 'brightgreen' // the flex
|
|
676
|
+
: stats.percentage <= 3
|
|
677
|
+
? 'green' // clean
|
|
678
|
+
: stats.percentage <= 10
|
|
679
|
+
? 'yellow' // has debt
|
|
680
|
+
: 'red'; // bad
|
|
681
|
+
const payload = { schemaVersion: 1, label: 'clone-alert', message, color };
|
|
682
|
+
return `${JSON.stringify(payload, null, 2)}\n`;
|
|
683
|
+
}
|
|
684
|
+
// Longest shared directory prefix (ending at a `/`) of posix paths, so we strip
|
|
685
|
+
// whole directories rather than a partial filename.
|
|
686
|
+
function commonDirPrefix(paths) {
|
|
687
|
+
if (paths.length === 0) {
|
|
688
|
+
return '';
|
|
689
|
+
}
|
|
690
|
+
let prefix = paths[0];
|
|
691
|
+
for (const candidate of paths) {
|
|
692
|
+
while (!candidate.startsWith(prefix)) {
|
|
693
|
+
prefix = prefix.slice(0, -1);
|
|
694
|
+
}
|
|
695
|
+
if (prefix === '') {
|
|
696
|
+
return '';
|
|
697
|
+
}
|
|
698
|
+
}
|
|
699
|
+
const slash = prefix.lastIndexOf('/');
|
|
700
|
+
return slash >= 0 ? prefix.slice(0, slash + 1) : '';
|
|
400
701
|
}
|
|
401
702
|
function readVersion() {
|
|
402
703
|
const pkg = JSON.parse(fs.readFileSync(path.resolve(__dirname, '..', 'package.json'), 'utf-8'));
|
package/dist/core.d.ts
CHANGED
|
@@ -93,6 +93,8 @@ export declare class CpdCore {
|
|
|
93
93
|
get idColumn(): Int32Array;
|
|
94
94
|
/** Materialize a TokenEntry by absolute index. Returns undefined when out of range. */
|
|
95
95
|
entryAt(index: number): TokenEntry | undefined;
|
|
96
|
+
/** Interned image of the token at an absolute index. Caller guarantees range. */
|
|
97
|
+
imageAt(index: number): string;
|
|
96
98
|
analyze(): Match[];
|
|
97
99
|
private hash;
|
|
98
100
|
}
|
package/dist/core.js
CHANGED
|
@@ -174,6 +174,10 @@ class CpdCore {
|
|
|
174
174
|
const id = this.ids[index];
|
|
175
175
|
return new TokenEntry(this.idImages[id], id, index, this.fileNames[this.fileIds[index]], this.beginLines[index], this.beginColumns[index], this.endLines[index], this.endColumns[index]);
|
|
176
176
|
}
|
|
177
|
+
/** Interned image of the token at an absolute index. Caller guarantees range. */
|
|
178
|
+
imageAt(index) {
|
|
179
|
+
return this.idImages[this.ids[index]];
|
|
180
|
+
}
|
|
177
181
|
analyze() {
|
|
178
182
|
if (this.size < this.minTileSize)
|
|
179
183
|
return [];
|
|
@@ -415,28 +419,31 @@ class MatchCollector {
|
|
|
415
419
|
}
|
|
416
420
|
return result;
|
|
417
421
|
}
|
|
422
|
+
// Inlined matchEnded(mark1-1, mark2-1). Within a bucket mark2 > mark1, so when
|
|
423
|
+
// mark1 > 0 both predecessors are valid indices in [0, tokenCount) — no bounds
|
|
424
|
+
// check needed. !matchEnded reduces to "ids equal and not EOF".
|
|
418
425
|
hasPreviousDupe(mark1, mark2) {
|
|
419
426
|
if (mark1 === 0)
|
|
420
427
|
return false;
|
|
421
|
-
|
|
428
|
+
const id1 = this.ids[mark1 - 1];
|
|
429
|
+
const id2 = this.ids[mark2 - 1];
|
|
430
|
+
return id1 === id2 && id1 !== 0;
|
|
422
431
|
}
|
|
432
|
+
// Inlined matchEnded in the hot scan. Bounds checks are unnecessary: every file
|
|
433
|
+
// ends with an EOF sentinel (id 0) that marks never sit on, so the larger index
|
|
434
|
+
// (mark2) reads a 0 and breaks before running off the end. (An out-of-range
|
|
435
|
+
// typed-array read yields undefined and also breaks, so the tail is safe.)
|
|
436
|
+
// id2 === 0 needs no separate test: if id1 === id2 === 0 the id1 === 0 test fires.
|
|
423
437
|
countDuplicateTokens(mark1, mark2) {
|
|
438
|
+
const ids = this.ids;
|
|
424
439
|
let index = 0;
|
|
425
440
|
for (;;) {
|
|
426
|
-
|
|
441
|
+
const id1 = ids[mark1 + index];
|
|
442
|
+
const id2 = ids[mark2 + index];
|
|
443
|
+
if (id1 !== id2 || id1 === 0)
|
|
427
444
|
break;
|
|
428
445
|
index++;
|
|
429
446
|
}
|
|
430
447
|
return index;
|
|
431
448
|
}
|
|
432
|
-
// True once the windows diverge: one of the indices is out of range, the ids
|
|
433
|
-
// differ, or it is EOF (id === 0). Equivalent to matchEnded(token1, token2) on
|
|
434
|
-
// TokenEntry.
|
|
435
|
-
matchEnded(a, b) {
|
|
436
|
-
if (a < 0 || b < 0 || a >= this.tokenCount || b >= this.tokenCount)
|
|
437
|
-
return true;
|
|
438
|
-
const id1 = this.ids[a];
|
|
439
|
-
const id2 = this.ids[b];
|
|
440
|
-
return id1 !== id2 || id1 === 0 || id2 === 0;
|
|
441
|
-
}
|
|
442
449
|
}
|
package/dist/files.d.ts
ADDED