clone-alert 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/core.js ADDED
@@ -0,0 +1,442 @@
1
+ "use strict";
2
+ /**
3
+ * Language-agnostic CPD core. Operates on a flat stream of tokens supplied by
4
+ * the tokenizers (see tokenizers.ts) as `RawToken[]`.
5
+ *
6
+ * Token storage is struct-of-arrays over typed arrays (`Int32Array`): instead of
7
+ * ~N `TokenEntry` objects we keep parallel numeric columns. Full `TokenEntry`
8
+ * objects are materialized lazily, only for the marks that land in a match.
9
+ *
10
+ * @packageDocumentation
11
+ */
12
+ Object.defineProperty(exports, "__esModule", { value: true });
13
+ exports.CpdCore = exports.Match = exports.Mark = exports.TokenEntry = exports.TS_LIT = exports.TS_ID = exports.S = void 0;
14
+ /**
15
+ * Normalization sentinel prefix, taken from the Unicode private-use area so it
16
+ * is guaranteed never to collide with real source token images. Framework token
17
+ * namespaces (Angular/Vue/Svelte) live in their own extension modules
18
+ * (src/angular.ts, etc.) and are built on top of this shared sentinel.
19
+ */
20
+ exports.S = '\uE000';
21
+ /** Normalized identifier (TS). */
22
+ exports.TS_ID = `${exports.S}ID`;
23
+ /** Normalized literal (TS). */
24
+ exports.TS_LIT = `${exports.S}LIT`;
25
+ /** A fully materialized token with its image, interned id, and source location. */
26
+ class TokenEntry {
27
+ image;
28
+ identifier;
29
+ index;
30
+ file;
31
+ beginLine;
32
+ beginColumn;
33
+ endLine;
34
+ endColumn;
35
+ constructor(image, identifier, index, file, beginLine, beginColumn, endLine = beginLine, endColumn = beginColumn) {
36
+ this.image = image;
37
+ this.identifier = identifier;
38
+ this.index = index;
39
+ this.file = file;
40
+ this.beginLine = beginLine;
41
+ this.beginColumn = beginColumn;
42
+ this.endLine = endLine;
43
+ this.endColumn = endColumn;
44
+ }
45
+ }
46
+ exports.TokenEntry = TokenEntry;
47
+ /** A single occurrence of a duplicated span, anchored at its starting token. */
48
+ class Mark {
49
+ token;
50
+ constructor(token) {
51
+ this.token = token;
52
+ }
53
+ }
54
+ exports.Mark = Mark;
55
+ /** A set of marks that share an identical duplicated token span. */
56
+ class Match {
57
+ tokenCount;
58
+ /** Dedupe by token index (PMD uses a TreeSet keyed by index, not by reference). */
59
+ markMap = new Map();
60
+ /**
61
+ * Cache of sorted marks. The `marks` getter is hit millions of times in the
62
+ * hot reportMatch path; without the cache every call did Array.from + sort.
63
+ * Invalidated only in addMark, i.e. when the mark set actually changes.
64
+ */
65
+ marksSorted = null;
66
+ constructor(tokenCount, first, second) {
67
+ this.tokenCount = tokenCount;
68
+ this.markMap.set(first.token.index, first);
69
+ this.markMap.set(second.token.index, second);
70
+ }
71
+ addMark(entry) {
72
+ if (!this.markMap.has(entry.index)) {
73
+ this.markMap.set(entry.index, new Mark(entry));
74
+ this.marksSorted = null;
75
+ }
76
+ }
77
+ get markCount() {
78
+ return this.markMap.size;
79
+ }
80
+ get marks() {
81
+ if (this.marksSorted === null) {
82
+ this.marksSorted = Array.from(this.markMap.values()).sort((a, b) => a.token.index - b.token.index);
83
+ }
84
+ return this.marksSorted;
85
+ }
86
+ }
87
+ exports.Match = Match;
88
+ /** The duplicate-detection engine: ingests token streams and reports matches. */
89
+ class CpdCore {
90
+ minTileSize;
91
+ // Token columns (struct-of-arrays). Grown geometrically in ensureCapacity().
92
+ ids = new Int32Array(0); // interned image; 0 == EOF/barrier
93
+ fileIds = new Int32Array(0);
94
+ beginLines = new Int32Array(0);
95
+ beginColumns = new Int32Array(0);
96
+ endLines = new Int32Array(0);
97
+ endColumns = new Int32Array(0);
98
+ size = 0;
99
+ capacity = 0;
100
+ // Interning tables: id -> string. idImages[0] == '' (EOF).
101
+ imageToId = new Map();
102
+ idImages = [''];
103
+ fileToId = new Map();
104
+ fileNames = [];
105
+ constructor(minTileSize = 50) {
106
+ this.minTileSize = minTileSize;
107
+ }
108
+ intern(image) {
109
+ let id = this.imageToId.get(image);
110
+ if (id === undefined) {
111
+ id = this.idImages.length;
112
+ this.imageToId.set(image, id);
113
+ this.idImages.push(image);
114
+ }
115
+ return id;
116
+ }
117
+ fileId(file) {
118
+ let id = this.fileToId.get(file);
119
+ if (id === undefined) {
120
+ id = this.fileNames.length;
121
+ this.fileToId.set(file, id);
122
+ this.fileNames.push(file);
123
+ }
124
+ return id;
125
+ }
126
+ ensureCapacity(extra) {
127
+ const need = this.size + extra;
128
+ if (need <= this.capacity)
129
+ return;
130
+ let cap = this.capacity === 0 ? 1024 : this.capacity;
131
+ while (cap < need)
132
+ cap *= 2;
133
+ this.ids = growInt32(this.ids, cap);
134
+ this.fileIds = growInt32(this.fileIds, cap);
135
+ this.beginLines = growInt32(this.beginLines, cap);
136
+ this.beginColumns = growInt32(this.beginColumns, cap);
137
+ this.endLines = growInt32(this.endLines, cap);
138
+ this.endColumns = growInt32(this.endColumns, cap);
139
+ this.capacity = cap;
140
+ }
141
+ pushToken(id, fileId, bl, bc, el, ec) {
142
+ const i = this.size++;
143
+ this.ids[i] = id;
144
+ this.fileIds[i] = fileId;
145
+ this.beginLines[i] = bl;
146
+ this.beginColumns[i] = bc;
147
+ this.endLines[i] = el;
148
+ this.endColumns[i] = ec;
149
+ }
150
+ /** Add one file's token stream. An EOF barrier is always appended at the end. */
151
+ addFile(file, raw) {
152
+ const fileId = this.fileId(file);
153
+ this.ensureCapacity(raw.length + 1);
154
+ for (const r of raw) {
155
+ if (r.barrier) {
156
+ this.pushToken(0, fileId, r.line, r.column, r.line, r.column);
157
+ continue;
158
+ }
159
+ this.pushToken(this.intern(r.image), fileId, r.line, r.column, r.endLine ?? r.line, r.endColumn ?? r.column);
160
+ }
161
+ this.pushToken(0, fileId, 0, 0, 0, 0); // EOF
162
+ }
163
+ get tokenCount() {
164
+ return this.size;
165
+ }
166
+ /** Raw access to the id column for the collector's hot loops (module-internal). */
167
+ get idColumn() {
168
+ return this.ids;
169
+ }
170
+ /** Materialize a TokenEntry by absolute index. Returns undefined when out of range. */
171
+ entryAt(index) {
172
+ if (index < 0 || index >= this.size)
173
+ return undefined;
174
+ const id = this.ids[index];
175
+ return new TokenEntry(this.idImages[id], id, index, this.fileNames[this.fileIds[index]], this.beginLines[index], this.beginColumns[index], this.endLines[index], this.endColumns[index]);
176
+ }
177
+ analyze() {
178
+ if (this.size < this.minTileSize)
179
+ return [];
180
+ const { markIndices, markHashes, markCount } = this.hash();
181
+ if (markCount === 0)
182
+ return [];
183
+ // Group by equal hash. This used to be a comparator sort over a boxed
184
+ // number[] — the most expensive part of the core (O(n log n) with a
185
+ // megamorphic closure over 3.4M items). Now it is a stable LSD radix sort
186
+ // by the 32-bit hash on a Uint32Array: O(n) linear passes, no closures, no
187
+ // boxing. markIndices is strictly decreasing (hash() walks right-to-left),
188
+ // so the initial permutation by ascending index is a reversal; radix
189
+ // stability preserves ascending index within an equal hash (required by
190
+ // MatchCollector.collect).
191
+ const order = radixSortByHash(markHashes, markCount);
192
+ const collector = new MatchCollector(this, this.minTileSize);
193
+ let start = 0;
194
+ while (start < markCount) {
195
+ const h = markHashes[order[start]];
196
+ let end = start + 1;
197
+ while (end < markCount && markHashes[order[end]] === h)
198
+ end++;
199
+ if (end - start > 1) {
200
+ // The run is already sorted by ascending index (the sort tie-break).
201
+ const group = new Int32Array(end - start);
202
+ for (let k = start; k < end; k++)
203
+ group[k - start] = markIndices[order[k]];
204
+ collector.collect(group);
205
+ }
206
+ start = end;
207
+ }
208
+ const matches = collector.getMatches();
209
+ // Deterministic report order. Does not affect detection. For a line-by-line
210
+ // diff against PMD, sort both dumps by (file, line) instead.
211
+ matches.sort((a, b) => {
212
+ const byLen = b.tokenCount - a.tokenCount;
213
+ if (byLen !== 0)
214
+ return byLen;
215
+ const byMarks = b.markCount - a.markCount;
216
+ if (byMarks !== 0)
217
+ return byMarks;
218
+ return a.marks[0].token.index - b.marks[0].token.index;
219
+ });
220
+ return matches;
221
+ }
222
+ // Karp-Rabin sliding window, right-to-left. All arithmetic is 32-bit (| 0 /
223
+ // Math.imul); float64 would produce hashes different from the Java original.
224
+ //
225
+ // Returns parallel columns (token index, its hash) ordered by descending index
226
+ // — the same mark set the Java original distributed across buckets.
227
+ hash() {
228
+ const ids = this.ids;
229
+ const n = this.size;
230
+ const MOD = 37;
231
+ let lastMod = 1;
232
+ for (let i = 0; i < this.minTileSize; i++) {
233
+ lastMod = Math.imul(lastMod, MOD);
234
+ }
235
+ let lastHash = 0;
236
+ const markIndices = new Int32Array(n);
237
+ const markHashes = new Int32Array(n);
238
+ let m = 0;
239
+ for (let i = n - 1; i >= 0; i--) {
240
+ if (ids[i] !== 0) {
241
+ const aheadIndex = i + this.minTileSize;
242
+ const last = aheadIndex < n ? ids[aheadIndex] : 0;
243
+ lastHash = (Math.imul(MOD, lastHash) + ids[i] - Math.imul(lastMod, last)) | 0;
244
+ markIndices[m] = i;
245
+ markHashes[m] = lastHash;
246
+ m++;
247
+ }
248
+ else {
249
+ // EOF/barrier: reset the hash and skip the minTileSize-1 positions
250
+ // before it (their windows would cross the boundary). The warm-up
251
+ // advances the OUTER i.
252
+ lastHash = 0;
253
+ const end = Math.max(0, i - this.minTileSize + 1);
254
+ for (; i > end; i--) {
255
+ const id = ids[i - 1];
256
+ lastHash = (Math.imul(MOD, lastHash) + id) | 0;
257
+ if (id === 0)
258
+ break;
259
+ }
260
+ }
261
+ }
262
+ return { markIndices, markHashes, markCount: m };
263
+ }
264
+ }
265
+ exports.CpdCore = CpdCore;
266
+ function growInt32(src, capacity) {
267
+ const dst = new Int32Array(capacity);
268
+ dst.set(src);
269
+ return dst;
270
+ }
271
+ // Stable LSD radix sort of the permutation [0..count) by key markHashes[pos].
272
+ // Order is by ascending signed hash; on equal hashes stability preserves the
273
+ // order of the starting permutation. We start from positions in descending order
274
+ // (count-1..0): because markHashes/markIndices run by descending token index,
275
+ // this yields ascending index within every equal-hash group. 4 byte passes
276
+ // instead of an O(n log n) comparator over a boxed number[].
277
+ function radixSortByHash(markHashes, count) {
278
+ // Signed int32 -> monotonic uint32 (flip the top bit) so the byte-wise radix
279
+ // produces a correct signed order.
280
+ const keys = new Uint32Array(count);
281
+ for (let i = 0; i < count; i++)
282
+ keys[i] = (markHashes[i] ^ 0x80000000) >>> 0;
283
+ let src = new Uint32Array(count);
284
+ for (let i = 0; i < count; i++)
285
+ src[i] = count - 1 - i;
286
+ let dst = new Uint32Array(count);
287
+ const counts = new Int32Array(257);
288
+ for (let shift = 0; shift < 32; shift += 8) {
289
+ counts.fill(0);
290
+ for (let i = 0; i < count; i++)
291
+ counts[((keys[src[i]] >>> shift) & 0xff) + 1]++;
292
+ for (let b = 0; b < 256; b++)
293
+ counts[b + 1] += counts[b];
294
+ for (let i = 0; i < count; i++) {
295
+ const p = src[i];
296
+ dst[counts[(keys[p] >>> shift) & 0xff]++] = p;
297
+ }
298
+ const tmp = src;
299
+ src = dst;
300
+ dst = tmp;
301
+ }
302
+ return src;
303
+ }
304
+ // Port of MatchCollector.java with no change to the algorithm (it is correct).
305
+ // Marks are represented by the absolute token index (number); positions and ids
306
+ // are read from the SoA columns.
307
+ class MatchCollector {
308
+ ma;
309
+ minTileSize;
310
+ matchTree = new Map();
311
+ tokenMatchSets = new Map();
312
+ ids;
313
+ tokenCount;
314
+ constructor(ma, minTileSize) {
315
+ this.ma = ma;
316
+ this.minTileSize = minTileSize;
317
+ this.ids = ma.idColumn;
318
+ this.tokenCount = ma.tokenCount;
319
+ }
320
+ collect(marks) {
321
+ let skipped = 0;
322
+ for (let i = 0; i < marks.length - 1; i += skipped + 1) {
323
+ skipped = 0;
324
+ const mark1 = marks[i];
325
+ for (let j = i + 1; j < marks.length; j++) {
326
+ const mark2 = marks[j];
327
+ const diff = mark1 - mark2;
328
+ if (-diff < this.minTileSize) {
329
+ skipped++;
330
+ continue;
331
+ }
332
+ if (this.hasPreviousDupe(mark1, mark2)) {
333
+ continue;
334
+ }
335
+ const dupes = this.countDuplicateTokens(mark1, mark2);
336
+ if (dupes < this.minTileSize) {
337
+ continue;
338
+ }
339
+ if (diff + dupes >= 1) {
340
+ continue; // self-overlap
341
+ }
342
+ this.reportMatch(mark1, mark2, dupes);
343
+ }
344
+ }
345
+ }
346
+ reportMatch(mark1, mark2, dupes) {
347
+ if (this.tokenMatchSets.get(mark1)?.has(mark2)) {
348
+ return;
349
+ }
350
+ let lowestKey = mark1;
351
+ const set1 = this.tokenMatchSets.get(mark1);
352
+ if (set1) {
353
+ for (const key of set1) {
354
+ if (key < lowestKey)
355
+ lowestKey = key;
356
+ }
357
+ }
358
+ let matches = this.matchTree.get(lowestKey);
359
+ if (!matches) {
360
+ matches = [];
361
+ this.matchTree.set(lowestKey, matches);
362
+ }
363
+ for (let i = 0; i < matches.length; i++) {
364
+ const m = matches[i];
365
+ for (const otherMark of m.marks) {
366
+ const otherEnd = otherMark.token.index;
367
+ if (otherEnd === mark1)
368
+ continue;
369
+ if (otherEnd < mark2 && otherEnd + m.tokenCount >= mark2 + dupes) {
370
+ return; // nested inside an existing match
371
+ }
372
+ else if (mark2 < otherEnd && mark2 + dupes >= otherEnd + m.tokenCount) {
373
+ matches.splice(i, 1); // replace it
374
+ i--;
375
+ break;
376
+ }
377
+ else if (dupes === m.tokenCount) {
378
+ for (const other of m.marks) {
379
+ this.registerTokenMatch(other.token.index, mark2);
380
+ }
381
+ m.addMark(this.entry(mark2));
382
+ return;
383
+ }
384
+ }
385
+ }
386
+ matches.push(new Match(dupes, new Mark(this.entry(mark1)), new Mark(this.entry(mark2))));
387
+ this.registerTokenMatch(mark1, mark2);
388
+ }
389
+ // Materialize a TokenEntry for a mark. The index is guaranteed in range (marks
390
+ // come from the token stream), so undefined is impossible here.
391
+ entry(index) {
392
+ const entry = this.ma.entryAt(index);
393
+ if (!entry)
394
+ throw new Error(`token index out of range: ${index}`);
395
+ return entry;
396
+ }
397
+ registerTokenMatch(mark1, mark2) {
398
+ let s1 = this.tokenMatchSets.get(mark1);
399
+ if (!s1) {
400
+ s1 = new Set();
401
+ this.tokenMatchSets.set(mark1, s1);
402
+ }
403
+ let s2 = this.tokenMatchSets.get(mark2);
404
+ if (!s2) {
405
+ s2 = new Set();
406
+ this.tokenMatchSets.set(mark2, s2);
407
+ }
408
+ s1.add(mark2);
409
+ s2.add(mark1);
410
+ }
411
+ getMatches() {
412
+ const result = [];
413
+ for (const matches of this.matchTree.values()) {
414
+ result.push(...matches);
415
+ }
416
+ return result;
417
+ }
418
+ hasPreviousDupe(mark1, mark2) {
419
+ if (mark1 === 0)
420
+ return false;
421
+ return !this.matchEnded(mark1 - 1, mark2 - 1);
422
+ }
423
+ countDuplicateTokens(mark1, mark2) {
424
+ let index = 0;
425
+ for (;;) {
426
+ if (this.matchEnded(mark1 + index, mark2 + index))
427
+ break;
428
+ index++;
429
+ }
430
+ return index;
431
+ }
432
+ // True once the windows diverge: one of the indices is out of range, the ids
433
+ // differ, or it is EOF (id === 0). Equivalent to matchEnded(token1, token2) on
434
+ // TokenEntry.
435
+ matchEnded(a, b) {
436
+ if (a < 0 || b < 0 || a >= this.tokenCount || b >= this.tokenCount)
437
+ return true;
438
+ const id1 = this.ids[a];
439
+ const id2 = this.ids[b];
440
+ return id1 !== id2 || id1 === 0 || id2 === 0;
441
+ }
442
+ }
@@ -0,0 +1,27 @@
1
+ import { type Mark, type Match } from './core';
2
+ import { type TokenizeOptions } from './tokenizers';
3
+ export { Mark, Match, TokenEntry } from './core';
4
+ export interface CpdOptions extends TokenizeOptions {
5
+ minTileSize?: number;
6
+ /** Extract inline templates from `@Component` for `.ts` files (default: false). */
7
+ angularInlineTemplates?: boolean;
8
+ }
9
+ export interface MatchLocation {
10
+ path: string;
11
+ startLine: number;
12
+ startColumn: number;
13
+ endLine: number;
14
+ endColumn: number;
15
+ }
16
+ /** High-level copy-paste detector: add sources, then {@link run} to get matches. */
17
+ export declare class Cpd {
18
+ private core;
19
+ private opts;
20
+ constructor(opts?: CpdOptions);
21
+ addPath(filePath: string): void;
22
+ addSource(filePath: string, source: string): void;
23
+ run(): Match[];
24
+ locationForMark(mark: Mark, tokenCount: number): MatchLocation;
25
+ /** Plain text report for eyeballing / diff tests. */
26
+ report(matches?: Match[]): string;
27
+ }
package/dist/index.js ADDED
@@ -0,0 +1,153 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || (function () {
19
+ var ownKeys = function(o) {
20
+ ownKeys = Object.getOwnPropertyNames || function (o) {
21
+ var ar = [];
22
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
+ return ar;
24
+ };
25
+ return ownKeys(o);
26
+ };
27
+ return function (mod) {
28
+ if (mod && mod.__esModule) return mod;
29
+ var result = {};
30
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
+ __setModuleDefault(result, mod);
32
+ return result;
33
+ };
34
+ })();
35
+ Object.defineProperty(exports, "__esModule", { value: true });
36
+ exports.Cpd = exports.TokenEntry = exports.Match = exports.Mark = void 0;
37
+ /**
38
+ * Public entry point. The {@link Cpd} facade dispatches each source by extension
39
+ * to the right tokenizer, feeds the tokens to {@link CpdCore}, and materializes
40
+ * match locations for reporting.
41
+ *
42
+ * @packageDocumentation
43
+ */
44
+ const fs = __importStar(require("node:fs"));
45
+ const path = __importStar(require("node:path"));
46
+ const angular_1 = require("./angular");
47
+ const core_1 = require("./core");
48
+ const svelte_1 = require("./svelte");
49
+ const tokenizers_1 = require("./tokenizers");
50
+ const vue_1 = require("./vue");
51
+ var core_2 = require("./core");
52
+ Object.defineProperty(exports, "Mark", { enumerable: true, get: function () { return core_2.Mark; } });
53
+ Object.defineProperty(exports, "Match", { enumerable: true, get: function () { return core_2.Match; } });
54
+ Object.defineProperty(exports, "TokenEntry", { enumerable: true, get: function () { return core_2.TokenEntry; } });
55
+ const TS_EXT = new Set(['.ts', '.mts', '.cts']);
56
+ const JSX_EXT = new Set(['.tsx', '.jsx', '.js', '.mjs', '.cjs']);
57
+ const HTML_EXT = new Set(['.html', '.htm']);
58
+ /** High-level copy-paste detector: add sources, then {@link run} to get matches. */
59
+ class Cpd {
60
+ core;
61
+ opts;
62
+ constructor(opts = {}) {
63
+ this.opts = {
64
+ minTileSize: opts.minTileSize ?? 50,
65
+ ignoreIdentifiers: opts.ignoreIdentifiers ?? false,
66
+ ignoreLiterals: opts.ignoreLiterals ?? false,
67
+ pmdTypescriptCompatibility: opts.pmdTypescriptCompatibility ?? true,
68
+ svelteTemplates: opts.svelteTemplates ?? true,
69
+ vueTemplates: opts.vueTemplates ?? true,
70
+ angularInlineTemplates: opts.angularInlineTemplates ?? false,
71
+ };
72
+ this.core = new core_1.CpdCore(this.opts.minTileSize);
73
+ }
74
+ addPath(filePath) {
75
+ this.addSource(filePath, fs.readFileSync(filePath, 'utf-8'));
76
+ }
77
+ addSource(filePath, source) {
78
+ const ext = path.extname(filePath).toLowerCase();
79
+ const tok = {
80
+ ignoreIdentifiers: this.opts.ignoreIdentifiers,
81
+ ignoreLiterals: this.opts.ignoreLiterals,
82
+ pmdTypescriptCompatibility: this.opts.pmdTypescriptCompatibility,
83
+ svelteTemplates: this.opts.svelteTemplates,
84
+ vueTemplates: this.opts.vueTemplates,
85
+ };
86
+ if (TS_EXT.has(ext)) {
87
+ const script = (0, tokenizers_1.tokenizeTypeScript)(filePath, source, tok, (0, tokenizers_1.scriptKindFor)(ext));
88
+ const all = [...script];
89
+ if (this.opts.angularInlineTemplates && source.includes('@Component')) {
90
+ for (const tpl of (0, angular_1.extractAngularInlineTemplates)(filePath, source)) {
91
+ const tplTokens = (0, angular_1.tokenizeAngularHtml)(filePath, tpl.code, { line: tpl.line, col: tpl.col }, tok);
92
+ if (tplTokens.length) {
93
+ all.push({ image: '', line: tpl.line, column: tpl.col, barrier: true });
94
+ all.push(...tplTokens);
95
+ }
96
+ }
97
+ }
98
+ this.core.addFile(filePath, all);
99
+ return;
100
+ }
101
+ if (JSX_EXT.has(ext)) {
102
+ this.core.addFile(filePath, (0, tokenizers_1.tokenizeTypeScript)(filePath, source, tok, (0, tokenizers_1.scriptKindFor)(ext)));
103
+ return;
104
+ }
105
+ if (ext === '.vue') {
106
+ this.core.addFile(filePath, (0, vue_1.tokenizeVue)(filePath, source, tok));
107
+ return;
108
+ }
109
+ if (ext === '.svelte') {
110
+ this.core.addFile(filePath, (0, svelte_1.tokenizeSvelte)(filePath, source, tok));
111
+ return;
112
+ }
113
+ if (HTML_EXT.has(ext)) {
114
+ // External Angular template (plain HTML parses too).
115
+ this.core.addFile(filePath, (0, angular_1.tokenizeAngularHtml)(filePath, source, { line: 1, col: 1 }, tok));
116
+ return;
117
+ }
118
+ // Unknown extension: ignore.
119
+ }
120
+ run() {
121
+ return this.core.analyze();
122
+ }
123
+ locationForMark(mark, tokenCount) {
124
+ const start = mark.token;
125
+ const end = this.core.entryAt(start.index + tokenCount - 1) ?? start;
126
+ return {
127
+ path: start.file,
128
+ startLine: start.beginLine,
129
+ startColumn: start.beginColumn,
130
+ endLine: end.endLine,
131
+ endColumn: end.endColumn,
132
+ };
133
+ }
134
+ /** Plain text report for eyeballing / diff tests. */
135
+ report(matches = this.run()) {
136
+ const lines = [];
137
+ for (const m of matches) {
138
+ const marks = m.marks;
139
+ lines.push(`Found a ${m.tokenCount} token (${m.markCount} occurrences) duplication:`);
140
+ for (const mk of marks) {
141
+ const t = mk.token;
142
+ lines.push(` ${t.file}:${t.beginLine}:${t.beginColumn}`);
143
+ }
144
+ lines.push('');
145
+ }
146
+ return lines.join('\n');
147
+ }
148
+ }
149
+ exports.Cpd = Cpd;
150
+ // Example:
151
+ // const cpd = new Cpd({ minTileSize: 50 });
152
+ // for (const f of files) cpd.addPath(f);
153
+ // console.log(cpd.report());
@@ -0,0 +1,4 @@
1
+ import { type RawToken } from './core';
2
+ import { type TokenizeOptions } from './tokenizers';
3
+ /** Tokenize a `.svelte` component (`<script>` blocks + markup). */
4
+ export declare function tokenizeSvelte(filePath: string, source: string, options?: TokenizeOptions): RawToken[];