clone-alert 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -34,12 +34,16 @@ var __importStar = (this && this.__importStar) || (function () {
34
34
  };
35
35
  })();
36
36
  Object.defineProperty(exports, "__esModule", { value: true });
37
- exports.collectFiles = collectFiles;
37
+ exports.collectFiles = void 0;
38
38
  exports.main = main;
39
39
  exports.parseArgs = parseArgs;
40
40
  const fs = __importStar(require("node:fs"));
41
41
  const path = __importStar(require("node:path"));
42
+ const baseline_1 = require("./baseline");
43
+ const files_1 = require("./files");
44
+ Object.defineProperty(exports, "collectFiles", { enumerable: true, get: function () { return files_1.collectFiles; } });
42
45
  const index_1 = require("./index");
46
+ const stats_1 = require("./stats");
43
47
  const DEFAULT_EXTENSIONS = [
44
48
  '.ts',
45
49
  '.tsx',
@@ -60,11 +64,25 @@ PMD CPD-like copy-paste detector for TS/JS and common frontend templates.
60
64
 
61
65
  Options:
62
66
  --files <path[,path...]> Files or directories to scan. Can be repeated.
67
+ --file-list <path> Read newline-separated paths to scan from a file.
63
68
  --minimum-tokens <n> Minimum duplicated token span. Default: 50.
64
69
  --minimum-tile-size <n> Alias for --minimum-tokens.
65
- --format <text|xml|json> Report format. Default: text.
70
+ --format <fmt> Report format: text (default), xml, json, sarif,
71
+ csv, csv_with_linecount_per_file, markdown, ai.
72
+ sarif targets GitHub Code Scanning; xml/json/
73
+ markdown embed the duplicated code; ai is a
74
+ compact, token-frugal listing for LLM pipelines;
75
+ shields prints a shields.io endpoint JSON for a
76
+ duplication badge.
66
77
  --extensions <ext[,ext...]> Extensions to include. Default: ts,tsx,js,jsx,vue,svelte,html.
67
78
  --exclude <glob[,glob...]> Exclude files or directories. Can be repeated.
79
+ --non-recursive Scan only the top level of each directory.
80
+ --gitignore Skip files ignored by .gitignore (nested files
81
+ honored, within the git repo). Default.
82
+ --no-gitignore Scan files even if .gitignore would ignore them.
83
+ --skip-duplicate-files Skip files with the same name and byte length.
84
+ --skip-lexical-errors Skip files that fail to tokenize instead of
85
+ aborting the whole run.
68
86
  --ignore-identifiers Normalize identifiers.
69
87
  --no-ignore-identifiers Compare exact identifiers. Default.
70
88
  --ignore-literals Normalize literals.
@@ -87,13 +105,23 @@ Options:
87
105
  alone (handy for a code-only threshold pass).
88
106
  --angular-inline-templates Also scan Angular @Component inline templates.
89
107
  --skip-angular-inline-templates Do not scan inline Angular templates. Default.
90
- --fail-on-violation Exit with code 4 when duplications are found.
108
+ --fail-on-violation Exit with code 4 when duplications are found. Default.
109
+ --no-fail-on-violation Always exit 0 even when duplications are found.
110
+ --baseline <path> Ignore duplications recorded in this baseline
111
+ file; report and fail only on new ones. Match is
112
+ by content fingerprint, so accepted clones stay
113
+ suppressed even after the code moves.
114
+ --update-baseline Write/regenerate the baseline file at --baseline
115
+ with all current duplications, then exit 0. Run
116
+ this once to adopt the existing debt.
91
117
  -h, --help Show this help.
92
118
  -V, --version Show version.
93
119
 
94
120
  Examples:
95
121
  clone-alert --minimum-tokens 50 --files src
96
122
  clone-alert --minimum-tokens 30 --format xml src test
123
+ clone-alert src --baseline .clone-alert-baseline.json --update-baseline
124
+ clone-alert src --baseline .clone-alert-baseline.json --fail-on-violation
97
125
  `;
98
126
  function main(argv) {
99
127
  let options;
@@ -110,9 +138,13 @@ function main(argv) {
110
138
  console.error("Try 'clone-alert --help' for more information.");
111
139
  return 2;
112
140
  }
141
+ if (options.updateBaseline && !options.baselinePath) {
142
+ console.error('clone-alert: --update-baseline requires --baseline <path>');
143
+ return 2;
144
+ }
113
145
  let files;
114
146
  try {
115
- files = collectFiles(options.paths, options.extensions, options.excludePatterns);
147
+ files = (0, files_1.collectFiles)(options.paths, options.extensions, options.excludePatterns, options.respectGitignore, options.nonRecursive);
116
148
  }
117
149
  catch (error) {
118
150
  console.error(`clone-alert: ${error.message}`);
@@ -123,17 +155,76 @@ function main(argv) {
123
155
  return 2;
124
156
  }
125
157
  const cpd = new index_1.Cpd(options);
158
+ // PMD's --skip-duplicate-files keys on basename + byte length, not content.
159
+ const dupKeys = options.skipDuplicateFiles ? new Set() : null;
126
160
  for (const file of files) {
127
- cpd.addPath(file);
161
+ if (dupKeys) {
162
+ const key = `${path.basename(file)}_${fs.statSync(file).size}`;
163
+ if (dupKeys.has(key))
164
+ continue;
165
+ dupKeys.add(key);
166
+ }
167
+ try {
168
+ cpd.addPath(file);
169
+ }
170
+ catch (error) {
171
+ if (options.skipLexicalErrors) {
172
+ console.error(`clone-alert: skipping ${file}: ${error.message}`);
173
+ continue;
174
+ }
175
+ console.error(`clone-alert: ${error.message}`);
176
+ console.error('clone-alert: pass --skip-lexical-errors to skip files that fail to tokenize.');
177
+ return 2;
178
+ }
128
179
  }
129
180
  const matches = cpd.run();
181
+ if (options.baselinePath) {
182
+ try {
183
+ return runWithBaseline(options, cpd, matches);
184
+ }
185
+ catch (error) {
186
+ console.error(`clone-alert: ${error.message}`);
187
+ return 2;
188
+ }
189
+ }
130
190
  process.stdout.write(formatReport(options.format, cpd, matches));
131
191
  return options.failOnViolation && matches.length > 0 ? 4 : 0;
132
192
  }
193
+ // Baseline handling. Detection is already done; this only writes (update) or
194
+ // filters (read) the match set by content fingerprint, so it never touches the
195
+ // hot path — cost is O(matches), not O(tokens).
196
+ function runWithBaseline(options, cpd, matches) {
197
+ const baselinePath = options.baselinePath;
198
+ if (options.updateBaseline) {
199
+ (0, baseline_1.writeBaseline)(baselinePath, matches.map((match) => toCloneRecord(match, cpd)));
200
+ console.error(`clone-alert: wrote baseline with ${matches.length} duplication(s) to ${baselinePath}`);
201
+ return 0;
202
+ }
203
+ const known = (0, baseline_1.readBaseline)(baselinePath);
204
+ const fresh = matches.filter((match) => !known.has((0, baseline_1.fingerprint)(cpd, match)));
205
+ const suppressed = matches.length - fresh.length;
206
+ if (suppressed > 0) {
207
+ console.error(`clone-alert: ${suppressed} known duplication(s) suppressed by baseline`);
208
+ }
209
+ process.stdout.write(formatReport(options.format, cpd, fresh));
210
+ return options.failOnViolation && fresh.length > 0 ? 4 : 0;
211
+ }
212
+ // Informational context for a baseline entry: token count plus the involved file
213
+ // paths relative to cwd (so the file is portable across machines/CI). Line/column
214
+ // are intentionally left out — the fingerprint already pins the content, and
215
+ // omitting them keeps the baseline diff stable when code moves.
216
+ function toCloneRecord(match, cpd) {
217
+ const files = Array.from(new Set(match.marks.map((mark) => (0, files_1.toPosix)(path.relative(process.cwd(), mark.token.file))))).sort();
218
+ return { fingerprint: (0, baseline_1.fingerprint)(cpd, match), tokens: match.tokenCount, files };
219
+ }
133
220
  function parseArgs(argv) {
134
221
  const paths = [];
135
222
  const extensions = new Set(DEFAULT_EXTENSIONS);
136
223
  const excludePatterns = [];
224
+ let respectGitignore = true;
225
+ let nonRecursive = false;
226
+ let skipDuplicateFiles = false;
227
+ let skipLexicalErrors = false;
137
228
  let minTileSize = 50;
138
229
  let ignoreIdentifiers = false;
139
230
  let ignoreLiterals = false;
@@ -142,7 +233,9 @@ function parseArgs(argv) {
142
233
  let vueTemplates = true;
143
234
  let angularInlineTemplates = false;
144
235
  let format = 'text';
145
- let failOnViolation = false;
236
+ let failOnViolation = true;
237
+ let baselinePath;
238
+ let updateBaseline = false;
146
239
  for (let i = 0; i < argv.length; i++) {
147
240
  const arg = argv[i];
148
241
  if (arg === '-h' || arg === '--help') {
@@ -161,6 +254,14 @@ function parseArgs(argv) {
161
254
  paths.push(...splitList(arg.slice('--files='.length)));
162
255
  continue;
163
256
  }
257
+ if (arg === '--file-list') {
258
+ paths.push(...readFileList(requireValue(argv, ++i, arg)));
259
+ continue;
260
+ }
261
+ if (arg.startsWith('--file-list=')) {
262
+ paths.push(...readFileList(arg.slice('--file-list='.length)));
263
+ continue;
264
+ }
164
265
  if (arg === '--minimum-tokens' || arg === '--minimum-tile-size') {
165
266
  minTileSize = parsePositiveInteger(requireValue(argv, ++i, arg), arg);
166
267
  continue;
@@ -197,6 +298,26 @@ function parseArgs(argv) {
197
298
  excludePatterns.push(...splitList(arg.slice('--exclude='.length)));
198
299
  continue;
199
300
  }
301
+ if (arg === '--gitignore') {
302
+ respectGitignore = true;
303
+ continue;
304
+ }
305
+ if (arg === '--no-gitignore') {
306
+ respectGitignore = false;
307
+ continue;
308
+ }
309
+ if (arg === '--non-recursive') {
310
+ nonRecursive = true;
311
+ continue;
312
+ }
313
+ if (arg === '--skip-duplicate-files') {
314
+ skipDuplicateFiles = true;
315
+ continue;
316
+ }
317
+ if (arg === '--skip-lexical-errors') {
318
+ skipLexicalErrors = true;
319
+ continue;
320
+ }
200
321
  if (arg === '--ignore-identifiers') {
201
322
  ignoreIdentifiers = true;
202
323
  continue;
@@ -249,6 +370,22 @@ function parseArgs(argv) {
249
370
  failOnViolation = true;
250
371
  continue;
251
372
  }
373
+ if (arg === '--no-fail-on-violation') {
374
+ failOnViolation = false;
375
+ continue;
376
+ }
377
+ if (arg === '--baseline') {
378
+ baselinePath = requireValue(argv, ++i, arg);
379
+ continue;
380
+ }
381
+ if (arg.startsWith('--baseline=')) {
382
+ baselinePath = arg.slice('--baseline='.length);
383
+ continue;
384
+ }
385
+ if (arg === '--update-baseline') {
386
+ updateBaseline = true;
387
+ continue;
388
+ }
252
389
  if (arg.startsWith('-')) {
253
390
  throw new Error(`unknown option: ${arg}`);
254
391
  }
@@ -258,6 +395,10 @@ function parseArgs(argv) {
258
395
  paths,
259
396
  extensions,
260
397
  excludePatterns,
398
+ respectGitignore,
399
+ nonRecursive,
400
+ skipDuplicateFiles,
401
+ skipLexicalErrors,
261
402
  minTileSize,
262
403
  ignoreIdentifiers,
263
404
  ignoreLiterals,
@@ -267,6 +408,8 @@ function parseArgs(argv) {
267
408
  angularInlineTemplates,
268
409
  format,
269
410
  failOnViolation,
411
+ baselinePath,
412
+ updateBaseline,
270
413
  };
271
414
  }
272
415
  function requireValue(argv, index, option) {
@@ -289,11 +432,35 @@ function parsePositiveInteger(value, option) {
289
432
  }
290
433
  return parsed;
291
434
  }
435
+ const REPORT_FORMATS = [
436
+ 'text',
437
+ 'xml',
438
+ 'json',
439
+ 'sarif',
440
+ 'csv',
441
+ 'csv_with_linecount_per_file',
442
+ 'markdown',
443
+ 'ai',
444
+ 'shields',
445
+ ];
292
446
  function parseFormat(value) {
293
- if (value === 'text' || value === 'xml' || value === 'json') {
447
+ if (REPORT_FORMATS.includes(value)) {
294
448
  return value;
295
449
  }
296
- throw new Error('--format must be one of: text, xml, json');
450
+ throw new Error(`--format must be one of: ${REPORT_FORMATS.join(', ')}`);
451
+ }
452
+ function readFileList(listPath) {
453
+ let contents;
454
+ try {
455
+ contents = fs.readFileSync(listPath, 'utf-8');
456
+ }
457
+ catch {
458
+ throw new Error(`--file-list not readable: ${listPath}`);
459
+ }
460
+ return contents
461
+ .split('\n')
462
+ .map((line) => line.trim())
463
+ .filter(Boolean);
297
464
  }
298
465
  function replaceExtensions(target, value) {
299
466
  target.clear();
@@ -301,41 +468,6 @@ function replaceExtensions(target, value) {
301
468
  target.add(ext.startsWith('.') ? ext.toLowerCase() : `.${ext.toLowerCase()}`);
302
469
  }
303
470
  }
304
- function collectFiles(paths, extensions, excludePatterns = []) {
305
- const files = [];
306
- const seen = new Set();
307
- const excludeMatchers = excludePatterns.map((pattern) => globToRegExp(toPosix(pattern)));
308
- const visit = (entry) => {
309
- const full = path.resolve(entry);
310
- if (!fs.existsSync(full)) {
311
- throw new Error(`path does not exist: ${entry}`);
312
- }
313
- const stat = fs.statSync(full);
314
- if (stat.isDirectory()) {
315
- if (isExcluded(`${full}${path.sep}`, excludeMatchers))
316
- return;
317
- for (const child of fs.readdirSync(full).sort()) {
318
- if (child === 'node_modules' || child === '.git' || child === 'dist')
319
- continue;
320
- visit(path.join(full, child));
321
- }
322
- return;
323
- }
324
- if (!stat.isFile())
325
- return;
326
- if (isExcluded(full, excludeMatchers))
327
- return;
328
- if (!extensions.has(path.extname(full).toLowerCase()))
329
- return;
330
- if (seen.has(full))
331
- return;
332
- seen.add(full);
333
- files.push(full);
334
- };
335
- for (const entry of paths)
336
- visit(entry);
337
- return files;
338
- }
339
471
  function formatReport(format, cpd, matches) {
340
472
  if (format === 'json') {
341
473
  return `${JSON.stringify({ duplicates: matches.map((match) => matchToJson(match, cpd)) }, null, 2)}\n`;
@@ -343,13 +475,132 @@ function formatReport(format, cpd, matches) {
343
475
  if (format === 'xml') {
344
476
  return formatXml(matches, cpd);
345
477
  }
346
- return cpd.report(matches);
478
+ if (format === 'sarif') {
479
+ return formatSarif(matches, cpd);
480
+ }
481
+ if (format === 'csv') {
482
+ return formatCsv(matches, cpd);
483
+ }
484
+ if (format === 'csv_with_linecount_per_file') {
485
+ return formatCsvWithLinecountPerFile(matches, cpd);
486
+ }
487
+ if (format === 'markdown') {
488
+ return formatMarkdown(matches, cpd);
489
+ }
490
+ if (format === 'ai') {
491
+ return formatAi(matches, cpd);
492
+ }
493
+ if (format === 'shields') {
494
+ return formatShields(matches, cpd);
495
+ }
496
+ const text = cpd.report(matches);
497
+ if (matches.length === 0) {
498
+ return text;
499
+ }
500
+ // Footer with the aggregate duplication stats, like jscpd's summary line.
501
+ return `${text}${(0, stats_1.formatStatsLine)((0, stats_1.computeStats)(matches, cpd))}\n`;
502
+ }
503
+ // Mirrors PMD's CSVRenderer: a `lines,tokens,occurrences` header, then per
504
+ // duplication `<lines>,<tokens>,<occurrences>` followed by `<startLine>,"<file>"`
505
+ // for each occurrence.
506
+ function formatCsv(matches, cpd) {
507
+ const rows = ['lines,tokens,occurrences'];
508
+ for (const match of matches) {
509
+ const duplicate = matchToJson(match, cpd);
510
+ const cells = [String(duplicate.lines), String(match.tokenCount), String(match.markCount)];
511
+ for (const mark of match.marks) {
512
+ const location = cpd.locationForMark(mark, match.tokenCount);
513
+ cells.push(String(location.startLine), `"${location.path}"`);
514
+ }
515
+ rows.push(cells.join(','));
516
+ }
517
+ return `${rows.join('\n')}\n`;
518
+ }
519
+ // Mirrors PMD's CSVWithLinecountPerFileRenderer: no header; per duplication
520
+ // `<occurrences>,<tokens>` then `<startLine>,<lineCount>,"<file>"` per occurrence.
521
+ function formatCsvWithLinecountPerFile(matches, cpd) {
522
+ const rows = [];
523
+ for (const match of matches) {
524
+ const cells = [String(match.markCount), String(match.tokenCount)];
525
+ for (const mark of match.marks) {
526
+ const location = cpd.locationForMark(mark, match.tokenCount);
527
+ const lineCount = location.endLine - location.startLine + 1;
528
+ cells.push(String(location.startLine), String(lineCount), `"${location.path}"`);
529
+ }
530
+ rows.push(cells.join(','));
531
+ }
532
+ return `${rows.join('\n')}\n`;
533
+ }
534
+ // SARIF 2.1.0 for GitHub Code Scanning (`github/codeql-action/upload-sarif`).
535
+ // One result per duplication, anchored at its first occurrence; the other
536
+ // occurrences are relatedLocations. URIs are relative to cwd so GitHub maps them
537
+ // to the checked-out tree. The content fingerprint goes into partialFingerprints,
538
+ // so GitHub tracks an alert across commits even when the clone moves.
539
+ function formatSarif(matches, cpd) {
540
+ const cwd = process.cwd();
541
+ const physicalLocation = (location) => ({
542
+ physicalLocation: {
543
+ artifactLocation: { uri: (0, files_1.toPosix)(path.relative(cwd, location.path)) },
544
+ region: {
545
+ startLine: location.startLine,
546
+ startColumn: location.startColumn,
547
+ endLine: location.endLine,
548
+ endColumn: location.endColumn,
549
+ },
550
+ },
551
+ });
552
+ const results = matches.map((match) => {
553
+ const [primary, ...others] = match.marks.map((mark) => cpd.locationForMark(mark, match.tokenCount));
554
+ const elsewhere = others
555
+ .map((location) => `${(0, files_1.toPosix)(path.relative(cwd, location.path))}:${location.startLine}`)
556
+ .join(', ');
557
+ return {
558
+ ruleId: 'duplication',
559
+ ruleIndex: 0,
560
+ level: 'warning',
561
+ message: {
562
+ text: `Found a ${match.tokenCount} token (${match.markCount} occurrences) duplication${elsewhere ? `; also at ${elsewhere}` : ''}.`,
563
+ },
564
+ locations: [physicalLocation(primary)],
565
+ relatedLocations: others.map((location, index) => ({ id: index, ...physicalLocation(location) })),
566
+ partialFingerprints: { 'cloneAlert/contentV1': (0, baseline_1.fingerprint)(cpd, match) },
567
+ };
568
+ });
569
+ const log = {
570
+ $schema: 'https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json',
571
+ version: '2.1.0',
572
+ runs: [
573
+ {
574
+ tool: {
575
+ driver: {
576
+ name: 'clone-alert',
577
+ informationUri: 'https://github.com/BaryshevRS/clone-alert',
578
+ version: readVersion(),
579
+ rules: [
580
+ {
581
+ id: 'duplication',
582
+ name: 'Duplication',
583
+ shortDescription: { text: 'Duplicated code' },
584
+ fullDescription: { text: 'A span of duplicated tokens detected by clone-alert.' },
585
+ helpUri: 'https://github.com/BaryshevRS/clone-alert#readme',
586
+ defaultConfiguration: { level: 'warning' },
587
+ },
588
+ ],
589
+ },
590
+ },
591
+ results,
592
+ },
593
+ ],
594
+ };
595
+ return `${JSON.stringify(log, null, 2)}\n`;
347
596
  }
348
597
  function matchToJson(match, cpd) {
349
598
  const files = match.marks.map((mark) => cpd.locationForMark(mark, match.tokenCount));
350
599
  return {
351
600
  lines: Math.max(0, ...files.map((file) => file.endLine - file.startLine + 1)),
352
601
  tokens: match.tokenCount,
602
+ // The duplicated source itself, like jscpd's `fragment` field.
603
+ fragment: cpd.codeFragment(match),
353
604
  files,
354
605
  };
355
606
  }
@@ -362,41 +613,91 @@ function formatXml(matches, cpd) {
362
613
  const location = cpd.locationForMark(mark, match.tokenCount);
363
614
  lines.push(` <file path="${escapeXml(location.path)}" line="${location.startLine}" endline="${location.endLine}" column="${location.startColumn}" endcolumn="${location.endColumn}" />`);
364
615
  }
616
+ // Like PMD's XMLRenderer: one <codefragment> per duplication with the source
617
+ // slice of the first occurrence, after the <file> elements.
618
+ lines.push(` <codefragment><![CDATA[${escapeCdata(cpd.codeFragment(match))}]]></codefragment>`);
365
619
  lines.push(' </duplication>');
366
620
  }
367
621
  lines.push('</pmd-cpd>');
368
622
  return `${lines.join('\n')}\n`;
369
623
  }
370
- function isExcluded(filePath, matchers) {
371
- const normalized = toPosix(filePath);
372
- return matchers.some((matcher) => matcher.test(normalized));
624
+ function escapeXml(value) {
625
+ return value.replace(/&/g, '&amp;').replace(/"/g, '&quot;').replace(/</g, '&lt;').replace(/>/g, '&gt;');
373
626
  }
374
- function toPosix(value) {
375
- return value.split(path.sep).join('/');
627
+ // CDATA cannot contain the `]]>` terminator; split it across two sections so the
628
+ // embedded source survives verbatim.
629
+ function escapeCdata(value) {
630
+ return value.replace(/]]>/g, ']]]]><![CDATA[>');
376
631
  }
377
- function globToRegExp(pattern) {
378
- let source = '';
379
- for (let index = 0; index < pattern.length; index++) {
380
- const char = pattern[index];
381
- if (char === '*') {
382
- if (pattern[index + 1] === '*') {
383
- source += '.*';
384
- index++;
385
- }
386
- else {
387
- source += '[^/]*';
388
- }
389
- continue;
632
+ // jscpd-style markdown: a title, a one-line summary, then per duplication two
633
+ // occurrence locations and a fenced code block with the duplicated source.
634
+ function formatMarkdown(matches, cpd) {
635
+ const out = ['# Copy/paste detection report', ''];
636
+ if (matches.length === 0) {
637
+ out.push('No duplicates found.', '');
638
+ return `${out.join('\n')}\n`;
639
+ }
640
+ out.push(`> Found ${matches.length} ${matches.length === 1 ? 'clone' : 'clones'}.`, '');
641
+ for (const match of matches) {
642
+ const locations = match.marks.map((mark) => cpd.locationForMark(mark, match.tokenCount));
643
+ out.push(`## Clone (${match.tokenCount} tokens, ${match.markCount} occurrences)`, '');
644
+ for (const location of locations) {
645
+ out.push(` - \`${(0, files_1.toPosix)(location.path)}\` [${location.startLine}:${location.startColumn} - ${location.endLine}:${location.endColumn}]`);
390
646
  }
391
- source += escapeRegExp(char);
647
+ out.push('', '```', cpd.codeFragment(match), '```', '');
392
648
  }
393
- return new RegExp(`^${source}$`);
649
+ return `${out.join('\n')}\n`;
394
650
  }
395
- function escapeRegExp(char) {
396
- return /[\\^$+?.()|[\]{}]/.test(char) ? `\\${char}` : char;
651
+ // Compact, token-frugal listing for LLM/agent pipelines, modelled on jscpd's `ai`
652
+ // reporter: one line per duplication (occurrences joined by ` ~ `), a shared
653
+ // directory prefix stripped to save tokens, then a `---` and the stats summary.
654
+ // No code, no colors.
655
+ function formatAi(matches, cpd) {
656
+ if (matches.length === 0) {
657
+ return '';
658
+ }
659
+ const locationsByMatch = matches.map((match) => match.marks.map((mark) => cpd.locationForMark(mark, match.tokenCount)));
660
+ const prefix = commonDirPrefix(locationsByMatch.flat().map((location) => (0, files_1.toPosix)(location.path)));
661
+ const lines = locationsByMatch.map((locations) => locations
662
+ .map((location) => `${(0, files_1.toPosix)(location.path).slice(prefix.length)}:${location.startLine}-${location.endLine}`)
663
+ .join(' ~ '));
664
+ lines.push('---', (0, stats_1.formatStatsLine)((0, stats_1.computeStats)(matches, cpd)));
665
+ return `${lines.join('\n')}\n`;
397
666
  }
398
- function escapeXml(value) {
399
- return value.replace(/&/g, '&amp;').replace(/"/g, '&quot;').replace(/</g, '&lt;').replace(/>/g, '&gt;');
667
+ // A shields.io endpoint payload (https://shields.io/badges/endpoint-badge):
668
+ // host this JSON anywhere and point `img.shields.io/endpoint?url=...` at it, so
669
+ // shields renders the badge. Marketing trinket, not a gate: color from a fixed
670
+ // scale rewarding near-zero, with zero clones as the bright-green hero state.
671
+ function formatShields(matches, cpd) {
672
+ const stats = (0, stats_1.computeStats)(matches, cpd);
673
+ const message = stats.clones === 0 ? '0 clones' : `${stats.percentage.toFixed(1)}%`;
674
+ const color = stats.clones === 0
675
+ ? 'brightgreen' // the flex
676
+ : stats.percentage <= 3
677
+ ? 'green' // clean
678
+ : stats.percentage <= 10
679
+ ? 'yellow' // has debt
680
+ : 'red'; // bad
681
+ const payload = { schemaVersion: 1, label: 'clone-alert', message, color };
682
+ return `${JSON.stringify(payload, null, 2)}\n`;
683
+ }
684
+ // Longest shared directory prefix (ending at a `/`) of posix paths, so we strip
685
+ // whole directories rather than a partial filename.
686
+ function commonDirPrefix(paths) {
687
+ if (paths.length === 0) {
688
+ return '';
689
+ }
690
+ let prefix = paths[0];
691
+ for (const candidate of paths) {
692
+ while (!candidate.startsWith(prefix)) {
693
+ prefix = prefix.slice(0, -1);
694
+ }
695
+ if (prefix === '') {
696
+ return '';
697
+ }
698
+ }
699
+ const slash = prefix.lastIndexOf('/');
700
+ return slash >= 0 ? prefix.slice(0, slash + 1) : '';
400
701
  }
401
702
  function readVersion() {
402
703
  const pkg = JSON.parse(fs.readFileSync(path.resolve(__dirname, '..', 'package.json'), 'utf-8'));
package/dist/core.d.ts CHANGED
@@ -93,6 +93,8 @@ export declare class CpdCore {
93
93
  get idColumn(): Int32Array;
94
94
  /** Materialize a TokenEntry by absolute index. Returns undefined when out of range. */
95
95
  entryAt(index: number): TokenEntry | undefined;
96
+ /** Interned image of the token at an absolute index. Caller guarantees range. */
97
+ imageAt(index: number): string;
96
98
  analyze(): Match[];
97
99
  private hash;
98
100
  }
package/dist/core.js CHANGED
@@ -174,6 +174,10 @@ class CpdCore {
174
174
  const id = this.ids[index];
175
175
  return new TokenEntry(this.idImages[id], id, index, this.fileNames[this.fileIds[index]], this.beginLines[index], this.beginColumns[index], this.endLines[index], this.endColumns[index]);
176
176
  }
177
+ /** Interned image of the token at an absolute index. Caller guarantees range. */
178
+ imageAt(index) {
179
+ return this.idImages[this.ids[index]];
180
+ }
177
181
  analyze() {
178
182
  if (this.size < this.minTileSize)
179
183
  return [];
@@ -415,28 +419,31 @@ class MatchCollector {
415
419
  }
416
420
  return result;
417
421
  }
422
+ // Inlined matchEnded(mark1-1, mark2-1). Within a bucket mark2 > mark1, so when
423
+ // mark1 > 0 both predecessors are valid indices in [0, tokenCount) — no bounds
424
+ // check needed. !matchEnded reduces to "ids equal and not EOF".
418
425
  hasPreviousDupe(mark1, mark2) {
419
426
  if (mark1 === 0)
420
427
  return false;
421
- return !this.matchEnded(mark1 - 1, mark2 - 1);
428
+ const id1 = this.ids[mark1 - 1];
429
+ const id2 = this.ids[mark2 - 1];
430
+ return id1 === id2 && id1 !== 0;
422
431
  }
432
+ // Inlined matchEnded in the hot scan. Bounds checks are unnecessary: every file
433
+ // ends with an EOF sentinel (id 0) that marks never sit on, so the larger index
434
+ // (mark2) reads a 0 and breaks before running off the end. (An out-of-range
435
+ // typed-array read yields undefined and also breaks, so the tail is safe.)
436
+ // id2 === 0 needs no separate test: if id1 === id2 === 0 the id1 === 0 test fires.
423
437
  countDuplicateTokens(mark1, mark2) {
438
+ const ids = this.ids;
424
439
  let index = 0;
425
440
  for (;;) {
426
- if (this.matchEnded(mark1 + index, mark2 + index))
441
+ const id1 = ids[mark1 + index];
442
+ const id2 = ids[mark2 + index];
443
+ if (id1 !== id2 || id1 === 0)
427
444
  break;
428
445
  index++;
429
446
  }
430
447
  return index;
431
448
  }
432
- // True once the windows diverge: one of the indices is out of range, the ids
433
- // differ, or it is EOF (id === 0). Equivalent to matchEnded(token1, token2) on
434
- // TokenEntry.
435
- matchEnded(a, b) {
436
- if (a < 0 || b < 0 || a >= this.tokenCount || b >= this.tokenCount)
437
- return true;
438
- const id1 = this.ids[a];
439
- const id2 = this.ids[b];
440
- return id1 !== id2 || id1 === 0 || id2 === 0;
441
- }
442
449
  }
@@ -0,0 +1,2 @@
1
+ export declare function collectFiles(paths: string[], extensions: Set<string>, excludePatterns?: string[], respectGitignore?: boolean, nonRecursive?: boolean): string[];
2
+ export declare function toPosix(value: string): string;