clone-alert 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/files.js ADDED
@@ -0,0 +1,236 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || (function () {
19
+ var ownKeys = function(o) {
20
+ ownKeys = Object.getOwnPropertyNames || function (o) {
21
+ var ar = [];
22
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
+ return ar;
24
+ };
25
+ return ownKeys(o);
26
+ };
27
+ return function (mod) {
28
+ if (mod && mod.__esModule) return mod;
29
+ var result = {};
30
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
+ __setModuleDefault(result, mod);
32
+ return result;
33
+ };
34
+ })();
35
+ Object.defineProperty(exports, "__esModule", { value: true });
36
+ exports.collectFiles = collectFiles;
37
+ exports.toPosix = toPosix;
38
+ const fs = __importStar(require("node:fs"));
39
+ const path = __importStar(require("node:path"));
40
+ // File discovery for the CLI: recursive walk with --exclude globs and .gitignore
41
+ // pruning. Kept out of cli.ts so the arg parser/reporters stay readable. Both the
42
+ // exclude matchers and the .gitignore rules prune *during* the walk — an ignored
43
+ // directory is never read, never a post-filter over a fully materialized list.
44
+ function collectFiles(paths, extensions, excludePatterns = [], respectGitignore = true, nonRecursive = false) {
45
+ const files = [];
46
+ const seen = new Set();
47
+ const excludeMatchers = excludePatterns.map((pattern) => globToRegExp(toPosix(pattern)));
48
+ const visit = (entry, layers, isTopLevel) => {
49
+ const full = path.resolve(entry);
50
+ if (!fs.existsSync(full)) {
51
+ throw new Error(`path does not exist: ${entry}`);
52
+ }
53
+ const stat = fs.statSync(full);
54
+ // Explicitly passed paths are always scanned; .gitignore only prunes below them.
55
+ if (!isTopLevel && respectGitignore && isGitIgnored(layers, full, stat.isDirectory()))
56
+ return;
57
+ if (stat.isDirectory()) {
58
+ // --non-recursive: scan a directory's direct children, never descend into subdirs.
59
+ if (!isTopLevel && nonRecursive)
60
+ return;
61
+ if (isExcluded(`${full}${path.sep}`, excludeMatchers))
62
+ return;
63
+ // The directory's own .gitignore governs its children, not itself.
64
+ const childLayers = respectGitignore ? withGitignore(layers, full) : layers;
65
+ for (const child of fs.readdirSync(full).sort()) {
66
+ if (child === 'node_modules' || child === '.git' || child === 'dist')
67
+ continue;
68
+ visit(path.join(full, child), childLayers, false);
69
+ }
70
+ return;
71
+ }
72
+ if (!stat.isFile())
73
+ return;
74
+ if (isExcluded(full, excludeMatchers))
75
+ return;
76
+ if (!extensions.has(path.extname(full).toLowerCase()))
77
+ return;
78
+ if (seen.has(full))
79
+ return;
80
+ seen.add(full);
81
+ files.push(full);
82
+ };
83
+ // Seed each root with the .gitignore files of its repo ancestors so a repo-root
84
+ // file applies even when only a subdirectory is scanned.
85
+ for (const entry of paths) {
86
+ const full = path.resolve(entry);
87
+ const seed = respectGitignore && fs.existsSync(full) ? seedGitignoreLayers(full) : [];
88
+ visit(entry, seed, true);
89
+ }
90
+ return files;
91
+ }
92
+ function toPosix(value) {
93
+ return value.split(path.sep).join('/');
94
+ }
95
+ function isExcluded(filePath, matchers) {
96
+ const normalized = toPosix(filePath);
97
+ return matchers.some((matcher) => matcher.test(normalized));
98
+ }
99
+ function globToRegExp(pattern) {
100
+ let source = '';
101
+ for (let index = 0; index < pattern.length; index++) {
102
+ const char = pattern[index];
103
+ if (char === '*') {
104
+ if (pattern[index + 1] === '*') {
105
+ source += '.*';
106
+ index++;
107
+ }
108
+ else {
109
+ source += '[^/]*';
110
+ }
111
+ continue;
112
+ }
113
+ source += escapeRegExp(char);
114
+ }
115
+ return new RegExp(`^${source}$`);
116
+ }
117
+ function escapeRegExp(char) {
118
+ return /[\\^$+?.()|[\]{}]/.test(char) ? `\\${char}` : char;
119
+ }
120
+ function loadGitignore(dir) {
121
+ const file = path.join(dir, '.gitignore');
122
+ if (!fs.existsSync(file))
123
+ return null;
124
+ const rules = parseGitignore(fs.readFileSync(file, 'utf-8'));
125
+ return rules.length ? { base: dir, rules } : null;
126
+ }
127
+ function withGitignore(layers, dir) {
128
+ const layer = loadGitignore(dir);
129
+ return layer ? [...layers, layer] : layers;
130
+ }
131
+ // Walk up to the git repo root, gathering the .gitignore files of the directories
132
+ // above startPath. .gitignore is only meaningful inside a repo, so bail out (no
133
+ // rules) when there is no .git ancestor.
134
+ function seedGitignoreLayers(startPath) {
135
+ const startDir = fs.statSync(startPath).isDirectory() ? startPath : path.dirname(startPath);
136
+ const ancestors = [];
137
+ let dir = startDir;
138
+ let repoRootFound = fs.existsSync(path.join(dir, '.git'));
139
+ while (!repoRootFound) {
140
+ const parent = path.dirname(dir);
141
+ if (parent === dir)
142
+ break;
143
+ ancestors.push(parent);
144
+ dir = parent;
145
+ repoRootFound = fs.existsSync(path.join(dir, '.git'));
146
+ }
147
+ if (!repoRootFound)
148
+ return [];
149
+ ancestors.reverse(); // shallow-first, so deeper files win on later matches
150
+ const layers = [];
151
+ for (const ancestor of ancestors) {
152
+ const layer = loadGitignore(ancestor);
153
+ if (layer)
154
+ layers.push(layer);
155
+ }
156
+ return layers;
157
+ }
158
+ function isGitIgnored(layers, fullPath, isDir) {
159
+ let ignored = false;
160
+ for (const layer of layers) {
161
+ const rel = toPosix(path.relative(layer.base, fullPath));
162
+ if (rel === '' || rel.startsWith('../'))
163
+ continue;
164
+ // Last matching rule wins (negations re-include); shallow layers first.
165
+ for (const rule of layer.rules) {
166
+ if (rule.dirOnly && !isDir)
167
+ continue;
168
+ if (rule.regex.test(rel))
169
+ ignored = !rule.negated;
170
+ }
171
+ }
172
+ return ignored;
173
+ }
174
+ function parseGitignore(content) {
175
+ const rules = [];
176
+ for (const raw of content.split('\n')) {
177
+ // Strip CR and trailing unescaped whitespace; skip blanks and comments.
178
+ let line = raw.replace(/\r$/, '').replace(/(?<!\\)\s+$/, '');
179
+ if (line === '' || line.startsWith('#'))
180
+ continue;
181
+ let negated = false;
182
+ if (line.startsWith('!')) {
183
+ negated = true;
184
+ line = line.slice(1);
185
+ }
186
+ if (line.startsWith('\\#') || line.startsWith('\\!'))
187
+ line = line.slice(1);
188
+ let dirOnly = false;
189
+ if (line.endsWith('/')) {
190
+ dirOnly = true;
191
+ line = line.slice(0, -1);
192
+ }
193
+ if (line === '')
194
+ continue;
195
+ // A slash anywhere (other than a trailing one, already stripped) anchors the
196
+ // pattern to the .gitignore's directory; otherwise it matches at any depth.
197
+ const anchored = line.includes('/');
198
+ if (line.startsWith('/'))
199
+ line = line.slice(1);
200
+ rules.push({ negated, dirOnly, regex: gitignoreToRegExp(line, anchored) });
201
+ }
202
+ return rules;
203
+ }
204
+ function gitignoreToRegExp(pattern, anchored) {
205
+ let source = '';
206
+ for (let index = 0; index < pattern.length; index++) {
207
+ const char = pattern[index];
208
+ if (char === '*') {
209
+ if (pattern[index + 1] === '*') {
210
+ const atStart = index === 0 || pattern[index - 1] === '/';
211
+ const slashAfter = pattern[index + 2] === '/';
212
+ if (atStart && slashAfter) {
213
+ source += '(?:.*/)?'; // `**/` — zero or more leading dirs
214
+ index += 2;
215
+ }
216
+ else {
217
+ source += '.*'; // `**` spanning segments
218
+ index += 1;
219
+ }
220
+ }
221
+ else {
222
+ source += '[^/]*';
223
+ }
224
+ continue;
225
+ }
226
+ if (char === '?') {
227
+ source += '[^/]';
228
+ continue;
229
+ }
230
+ source += escapeRegExp(char);
231
+ }
232
+ // Non-anchored patterns match at any directory boundary; the trailing group lets
233
+ // a matched directory also cover everything beneath it.
234
+ const prefix = anchored ? '^' : '(?:^|/)';
235
+ return new RegExp(`${prefix}${source}(?:/.*)?$`);
236
+ }
package/dist/index.d.ts CHANGED
@@ -17,11 +17,32 @@ export interface MatchLocation {
17
17
  export declare class Cpd {
18
18
  private core;
19
19
  private opts;
20
+ /** Original source per file, retained so reporters can emit the duplicated code. */
21
+ private sources;
20
22
  constructor(opts?: CpdOptions);
21
23
  addPath(filePath: string): void;
22
24
  addSource(filePath: string, source: string): void;
23
25
  run(): Match[];
26
+ /**
27
+ * The token images of a match's span (any occurrence — all share the same
28
+ * content). Lets the baseline layer fingerprint a match without reaching into
29
+ * the engine's storage.
30
+ */
31
+ spanImages(match: Match): string[];
24
32
  locationForMark(mark: Mark, tokenCount: number): MatchLocation;
33
+ /**
34
+ * The duplicated source for a match: the full lines [startLine, endLine] of its
35
+ * first occurrence, like PMD's `getSourceCodeSlice`. Empty string if the file's
36
+ * source was not retained (e.g. a baseline-only match). Used by the xml/json/
37
+ * markdown reporters to embed the code fragment.
38
+ */
39
+ codeFragment(match: Match): string;
40
+ /**
41
+ * Total physical line count across all added sources — the denominator for
42
+ * the duplication percentage (see `stats.ts`). A trailing newline is not
43
+ * counted as an extra line.
44
+ */
45
+ totalLines(): number;
25
46
  /** Plain text report for eyeballing / diff tests. */
26
47
  report(matches?: Match[]): string;
27
48
  }
package/dist/index.js CHANGED
@@ -59,6 +59,8 @@ const HTML_EXT = new Set(['.html', '.htm']);
59
59
  class Cpd {
60
60
  core;
61
61
  opts;
62
+ /** Original source per file, retained so reporters can emit the duplicated code. */
63
+ sources = new Map();
62
64
  constructor(opts = {}) {
63
65
  this.opts = {
64
66
  minTileSize: opts.minTileSize ?? 50,
@@ -75,6 +77,7 @@ class Cpd {
75
77
  this.addSource(filePath, fs.readFileSync(filePath, 'utf-8'));
76
78
  }
77
79
  addSource(filePath, source) {
80
+ this.sources.set(filePath, source);
78
81
  const ext = path.extname(filePath).toLowerCase();
79
82
  const tok = {
80
83
  ignoreIdentifiers: this.opts.ignoreIdentifiers,
@@ -120,6 +123,19 @@ class Cpd {
120
123
  run() {
121
124
  return this.core.analyze();
122
125
  }
126
+ /**
127
+ * The token images of a match's span (any occurrence — all share the same
128
+ * content). Lets the baseline layer fingerprint a match without reaching into
129
+ * the engine's storage.
130
+ */
131
+ spanImages(match) {
132
+ const start = match.marks[0].token.index;
133
+ const images = new Array(match.tokenCount);
134
+ for (let k = 0; k < match.tokenCount; k++) {
135
+ images[k] = this.core.imageAt(start + k);
136
+ }
137
+ return images;
138
+ }
123
139
  locationForMark(mark, tokenCount) {
124
140
  const start = mark.token;
125
141
  const end = this.core.entryAt(start.index + tokenCount - 1) ?? start;
@@ -131,6 +147,39 @@ class Cpd {
131
147
  endColumn: end.endColumn,
132
148
  };
133
149
  }
150
+ /**
151
+ * The duplicated source for a match: the full lines [startLine, endLine] of its
152
+ * first occurrence, like PMD's `getSourceCodeSlice`. Empty string if the file's
153
+ * source was not retained (e.g. a baseline-only match). Used by the xml/json/
154
+ * markdown reporters to embed the code fragment.
155
+ */
156
+ codeFragment(match) {
157
+ const location = this.locationForMark(match.marks[0], match.tokenCount);
158
+ const source = this.sources.get(location.path);
159
+ if (source === undefined) {
160
+ return '';
161
+ }
162
+ return source
163
+ .split('\n')
164
+ .slice(location.startLine - 1, location.endLine)
165
+ .join('\n');
166
+ }
167
+ /**
168
+ * Total physical line count across all added sources — the denominator for
169
+ * the duplication percentage (see `stats.ts`). A trailing newline is not
170
+ * counted as an extra line.
171
+ */
172
+ totalLines() {
173
+ let total = 0;
174
+ for (const source of this.sources.values()) {
175
+ if (source.length === 0) {
176
+ continue;
177
+ }
178
+ const lines = source.split('\n').length;
179
+ total += source.endsWith('\n') ? lines - 1 : lines;
180
+ }
181
+ return total;
182
+ }
134
183
  /** Plain text report for eyeballing / diff tests. */
135
184
  report(matches = this.run()) {
136
185
  const lines = [];
@@ -0,0 +1,28 @@
1
+ /**
2
+ * Aggregate duplication statistics over a finished analysis — the `N clones`,
3
+ * duplicated-line count and overall percentage that jscpd reports. This is a
4
+ * reporting concern layered on top of {@link Cpd}; the match engine (`core.ts`)
5
+ * knows nothing about it.
6
+ *
7
+ * @packageDocumentation
8
+ */
9
+ import type { Match } from './core';
10
+ import type { Cpd } from './index';
11
+ export interface DuplicationStats {
12
+ /** Number of detected duplications (matches). */
13
+ clones: number;
14
+ /** Physical lines across all analyzed files (the percentage denominator). */
15
+ totalLines: number;
16
+ /** Distinct lines covered by at least one clone (overlaps counted once). */
17
+ duplicatedLines: number;
18
+ /** `duplicatedLines / totalLines * 100`, or 0 when nothing was analyzed. */
19
+ percentage: number;
20
+ }
21
+ /**
22
+ * Compute duplication stats. Duplicated lines are the union of every match's
23
+ * occurrence ranges per file, so overlapping clones in the same file are not
24
+ * double-counted. Line-based (not byte-exact to jscpd, which is not our etalon).
25
+ */
26
+ export declare function computeStats(matches: Match[], cpd: Cpd): DuplicationStats;
27
+ /** A one-line summary, e.g. `12 clones · 4.23% duplicated lines`. */
28
+ export declare function formatStatsLine(stats: DuplicationStats): string;
package/dist/stats.js ADDED
@@ -0,0 +1,37 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.computeStats = computeStats;
4
+ exports.formatStatsLine = formatStatsLine;
5
+ /**
6
+ * Compute duplication stats. Duplicated lines are the union of every match's
7
+ * occurrence ranges per file, so overlapping clones in the same file are not
8
+ * double-counted. Line-based (not byte-exact to jscpd, which is not our etalon).
9
+ */
10
+ function computeStats(matches, cpd) {
11
+ const coveredByFile = new Map();
12
+ for (const match of matches) {
13
+ for (const mark of match.marks) {
14
+ const location = cpd.locationForMark(mark, match.tokenCount);
15
+ let covered = coveredByFile.get(location.path);
16
+ if (covered === undefined) {
17
+ covered = new Set();
18
+ coveredByFile.set(location.path, covered);
19
+ }
20
+ for (let line = location.startLine; line <= location.endLine; line++) {
21
+ covered.add(line);
22
+ }
23
+ }
24
+ }
25
+ let duplicatedLines = 0;
26
+ for (const covered of coveredByFile.values()) {
27
+ duplicatedLines += covered.size;
28
+ }
29
+ const totalLines = cpd.totalLines();
30
+ const percentage = totalLines > 0 ? (duplicatedLines / totalLines) * 100 : 0;
31
+ return { clones: matches.length, totalLines, duplicatedLines, percentage };
32
+ }
33
+ /** A one-line summary, e.g. `12 clones · 4.23% duplicated lines`. */
34
+ function formatStatsLine(stats) {
35
+ const noun = stats.clones === 1 ? 'clone' : 'clones';
36
+ return `${stats.clones} ${noun} · ${stats.percentage.toFixed(2)}% duplicated lines`;
37
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "clone-alert",
3
- "version": "0.3.0",
3
+ "version": "0.4.0",
4
4
  "description": "PMD CPD-compatible copy-paste detector that finds duplicate code in TypeScript, JavaScript, JSX/TSX, Vue, Svelte and Angular.",
5
5
  "license": "MIT",
6
6
  "type": "commonjs",
@@ -19,7 +19,6 @@
19
19
  },
20
20
  "files": [
21
21
  "dist/**/*",
22
- "scripts/compare-pmd-cpd.mjs",
23
22
  "README.md",
24
23
  "LICENSE"
25
24
  ],
@@ -61,6 +60,7 @@
61
60
  "check": "biome check .",
62
61
  "check:fix": "biome check --write .",
63
62
  "check-types": "tsc -p tsconfig.json --noEmit && tsc -p tsconfig.test.json --noEmit",
63
+ "badge": "npm run build && node dist/cli.js src --minimum-tokens 70 --format shields --no-fail-on-violation > clone-alert-badge.json",
64
64
  "compare:pmd": "npm run build && node scripts/compare-pmd-cpd.mjs",
65
65
  "lint": "npm run check:fix && npm run lint:knip && npm run check-types && npm run lint:cpd",
66
66
  "lint:cpd": "npm run build && node dist/cli.js --minimum-tokens 70 --files src --extensions ts --format text --fail-on-violation",
@@ -96,7 +96,7 @@
96
96
  "devDependencies": {
97
97
  "@angular/compiler": "^22.0.2",
98
98
  "@biomejs/biome": "^2.5.0",
99
- "@types/node": "^24.0.0",
99
+ "@types/node": "^24.13.2",
100
100
  "@vue/compiler-sfc": "^3.5.38",
101
101
  "knip": "^6.5.0",
102
102
  "svelte": "^5.56.3",