npm - clone-alert - Versions diffs - 0.3.0 → 0.4.0 - Mend

clone-alert 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/README.md +125 -16
package/dist/baseline.d.ts +20 -0
package/dist/baseline.js +105 -0
package/dist/cli.d.ts +8 -2
package/dist/cli.js +369 -68
package/dist/core.d.ts +2 -0
package/dist/core.js +19 -12
package/dist/files.d.ts +2 -0
package/dist/files.js +236 -0
package/dist/index.d.ts +21 -0
package/dist/index.js +49 -0
package/dist/stats.d.ts +28 -0
package/dist/stats.js +37 -0
package/package.json +3 -3
package/scripts/compare-pmd-cpd.mjs +0 -565

package/dist/files.js ADDED Viewed

@@ -0,0 +1,236 @@
+"use strict";
+var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
+    if (k2 === undefined) k2 = k;
+    var desc = Object.getOwnPropertyDescriptor(m, k);
+    if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
+      desc = { enumerable: true, get: function() { return m[k]; } };
+    }
+    Object.defineProperty(o, k2, desc);
+}) : (function(o, m, k, k2) {
+    if (k2 === undefined) k2 = k;
+    o[k2] = m[k];
+}));
+var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
+    Object.defineProperty(o, "default", { enumerable: true, value: v });
+}) : function(o, v) {
+    o["default"] = v;
+});
+var __importStar = (this && this.__importStar) || (function () {
+    var ownKeys = function(o) {
+        ownKeys = Object.getOwnPropertyNames || function (o) {
+            var ar = [];
+            for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
+            return ar;
+        };
+        return ownKeys(o);
+    };
+    return function (mod) {
+        if (mod && mod.__esModule) return mod;
+        var result = {};
+        if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
+        __setModuleDefault(result, mod);
+        return result;
+    };
+})();
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.collectFiles = collectFiles;
+exports.toPosix = toPosix;
+const fs = __importStar(require("node:fs"));
+const path = __importStar(require("node:path"));
+// File discovery for the CLI: recursive walk with --exclude globs and .gitignore
+// pruning. Kept out of cli.ts so the arg parser/reporters stay readable. Both the
+// exclude matchers and the .gitignore rules prune *during* the walk — an ignored
+// directory is never read, never a post-filter over a fully materialized list.
+function collectFiles(paths, extensions, excludePatterns = [], respectGitignore = true, nonRecursive = false) {
+    const files = [];
+    const seen = new Set();
+    const excludeMatchers = excludePatterns.map((pattern) => globToRegExp(toPosix(pattern)));
+    const visit = (entry, layers, isTopLevel) => {
+        const full = path.resolve(entry);
+        if (!fs.existsSync(full)) {
+            throw new Error(`path does not exist: ${entry}`);
+        }
+        const stat = fs.statSync(full);
+        // Explicitly passed paths are always scanned; .gitignore only prunes below them.
+        if (!isTopLevel && respectGitignore && isGitIgnored(layers, full, stat.isDirectory()))
+            return;
+        if (stat.isDirectory()) {
+            // --non-recursive: scan a directory's direct children, never descend into subdirs.
+            if (!isTopLevel && nonRecursive)
+                return;
+            if (isExcluded(`${full}${path.sep}`, excludeMatchers))
+                return;
+            // The directory's own .gitignore governs its children, not itself.
+            const childLayers = respectGitignore ? withGitignore(layers, full) : layers;
+            for (const child of fs.readdirSync(full).sort()) {
+                if (child === 'node_modules' || child === '.git' || child === 'dist')
+                    continue;
+                visit(path.join(full, child), childLayers, false);
+            }
+            return;
+        }
+        if (!stat.isFile())
+            return;
+        if (isExcluded(full, excludeMatchers))
+            return;
+        if (!extensions.has(path.extname(full).toLowerCase()))
+            return;
+        if (seen.has(full))
+            return;
+        seen.add(full);
+        files.push(full);
+    };
+    // Seed each root with the .gitignore files of its repo ancestors so a repo-root
+    // file applies even when only a subdirectory is scanned.
+    for (const entry of paths) {
+        const full = path.resolve(entry);
+        const seed = respectGitignore && fs.existsSync(full) ? seedGitignoreLayers(full) : [];
+        visit(entry, seed, true);
+    }
+    return files;
+}
+function toPosix(value) {
+    return value.split(path.sep).join('/');
+}
+function isExcluded(filePath, matchers) {
+    const normalized = toPosix(filePath);
+    return matchers.some((matcher) => matcher.test(normalized));
+}
+function globToRegExp(pattern) {
+    let source = '';
+    for (let index = 0; index < pattern.length; index++) {
+        const char = pattern[index];
+        if (char === '*') {
+            if (pattern[index + 1] === '*') {
+                source += '.*';
+                index++;
+            }
+            else {
+                source += '[^/]*';
+            }
+            continue;
+        }
+        source += escapeRegExp(char);
+    }
+    return new RegExp(`^${source}$`);
+}
+function escapeRegExp(char) {
+    return /[\\^$+?.()|[\]{}]/.test(char) ? `\\${char}` : char;
+}
+function loadGitignore(dir) {
+    const file = path.join(dir, '.gitignore');
+    if (!fs.existsSync(file))
+        return null;
+    const rules = parseGitignore(fs.readFileSync(file, 'utf-8'));
+    return rules.length ? { base: dir, rules } : null;
+}
+function withGitignore(layers, dir) {
+    const layer = loadGitignore(dir);
+    return layer ? [...layers, layer] : layers;
+}
+// Walk up to the git repo root, gathering the .gitignore files of the directories
+// above startPath. .gitignore is only meaningful inside a repo, so bail out (no
+// rules) when there is no .git ancestor.
+function seedGitignoreLayers(startPath) {
+    const startDir = fs.statSync(startPath).isDirectory() ? startPath : path.dirname(startPath);
+    const ancestors = [];
+    let dir = startDir;
+    let repoRootFound = fs.existsSync(path.join(dir, '.git'));
+    while (!repoRootFound) {
+        const parent = path.dirname(dir);
+        if (parent === dir)
+            break;
+        ancestors.push(parent);
+        dir = parent;
+        repoRootFound = fs.existsSync(path.join(dir, '.git'));
+    }
+    if (!repoRootFound)
+        return [];
+    ancestors.reverse(); // shallow-first, so deeper files win on later matches
+    const layers = [];
+    for (const ancestor of ancestors) {
+        const layer = loadGitignore(ancestor);
+        if (layer)
+            layers.push(layer);
+    }
+    return layers;
+}
+function isGitIgnored(layers, fullPath, isDir) {
+    let ignored = false;
+    for (const layer of layers) {
+        const rel = toPosix(path.relative(layer.base, fullPath));
+        if (rel === '' || rel.startsWith('../'))
+            continue;
+        // Last matching rule wins (negations re-include); shallow layers first.
+        for (const rule of layer.rules) {
+            if (rule.dirOnly && !isDir)
+                continue;
+            if (rule.regex.test(rel))
+                ignored = !rule.negated;
+        }
+    }
+    return ignored;
+}
+function parseGitignore(content) {
+    const rules = [];
+    for (const raw of content.split('\n')) {
+        // Strip CR and trailing unescaped whitespace; skip blanks and comments.
+        let line = raw.replace(/\r$/, '').replace(/(?<!\\)\s+$/, '');
+        if (line === '' || line.startsWith('#'))
+            continue;
+        let negated = false;
+        if (line.startsWith('!')) {
+            negated = true;
+            line = line.slice(1);
+        }
+        if (line.startsWith('\\#') || line.startsWith('\\!'))
+            line = line.slice(1);
+        let dirOnly = false;
+        if (line.endsWith('/')) {
+            dirOnly = true;
+            line = line.slice(0, -1);
+        }
+        if (line === '')
+            continue;
+        // A slash anywhere (other than a trailing one, already stripped) anchors the
+        // pattern to the .gitignore's directory; otherwise it matches at any depth.
+        const anchored = line.includes('/');
+        if (line.startsWith('/'))
+            line = line.slice(1);
+        rules.push({ negated, dirOnly, regex: gitignoreToRegExp(line, anchored) });
+    }
+    return rules;
+}
+function gitignoreToRegExp(pattern, anchored) {
+    let source = '';
+    for (let index = 0; index < pattern.length; index++) {
+        const char = pattern[index];
+        if (char === '*') {
+            if (pattern[index + 1] === '*') {
+                const atStart = index === 0 || pattern[index - 1] === '/';
+                const slashAfter = pattern[index + 2] === '/';
+                if (atStart && slashAfter) {
+                    source += '(?:.*/)?'; // `**/` — zero or more leading dirs
+                    index += 2;
+                }
+                else {
+                    source += '.*'; // `**` spanning segments
+                    index += 1;
+                }
+            }
+            else {
+                source += '[^/]*';
+            }
+            continue;
+        }
+        if (char === '?') {
+            source += '[^/]';
+            continue;
+        }
+        source += escapeRegExp(char);
+    }
+    // Non-anchored patterns match at any directory boundary; the trailing group lets
+    // a matched directory also cover everything beneath it.
+    const prefix = anchored ? '^' : '(?:^|/)';
+    return new RegExp(`${prefix}${source}(?:/.*)?$`);
+}

package/dist/index.d.ts CHANGED Viewed

@@ -17,11 +17,32 @@ export interface MatchLocation {
 export declare class Cpd {
     private core;
     private opts;
+    /** Original source per file, retained so reporters can emit the duplicated code. */
+    private sources;
     constructor(opts?: CpdOptions);
     addPath(filePath: string): void;
     addSource(filePath: string, source: string): void;
     run(): Match[];
+    /**
+     * The token images of a match's span (any occurrence — all share the same
+     * content). Lets the baseline layer fingerprint a match without reaching into
+     * the engine's storage.
+     */
+    spanImages(match: Match): string[];
     locationForMark(mark: Mark, tokenCount: number): MatchLocation;
+    /**
+     * The duplicated source for a match: the full lines [startLine, endLine] of its
+     * first occurrence, like PMD's `getSourceCodeSlice`. Empty string if the file's
+     * source was not retained (e.g. a baseline-only match). Used by the xml/json/
+     * markdown reporters to embed the code fragment.
+     */
+    codeFragment(match: Match): string;
+    /**
+     * Total physical line count across all added sources — the denominator for
+     * the duplication percentage (see `stats.ts`). A trailing newline is not
+     * counted as an extra line.
+     */
+    totalLines(): number;
     /** Plain text report for eyeballing / diff tests. */
     report(matches?: Match[]): string;
 }

package/dist/index.js CHANGED Viewed

@@ -59,6 +59,8 @@ const HTML_EXT = new Set(['.html', '.htm']);
 class Cpd {
     core;
     opts;
+    /** Original source per file, retained so reporters can emit the duplicated code. */
+    sources = new Map();
     constructor(opts = {}) {
         this.opts = {
             minTileSize: opts.minTileSize ?? 50,
@@ -75,6 +77,7 @@ class Cpd {
         this.addSource(filePath, fs.readFileSync(filePath, 'utf-8'));
     }
     addSource(filePath, source) {
+        this.sources.set(filePath, source);
         const ext = path.extname(filePath).toLowerCase();
         const tok = {
             ignoreIdentifiers: this.opts.ignoreIdentifiers,
@@ -120,6 +123,19 @@ class Cpd {
     run() {
         return this.core.analyze();
     }
+    /**
+     * The token images of a match's span (any occurrence — all share the same
+     * content). Lets the baseline layer fingerprint a match without reaching into
+     * the engine's storage.
+     */
+    spanImages(match) {
+        const start = match.marks[0].token.index;
+        const images = new Array(match.tokenCount);
+        for (let k = 0; k < match.tokenCount; k++) {
+            images[k] = this.core.imageAt(start + k);
+        }
+        return images;
+    }
     locationForMark(mark, tokenCount) {
         const start = mark.token;
         const end = this.core.entryAt(start.index + tokenCount - 1) ?? start;
@@ -131,6 +147,39 @@ class Cpd {
             endColumn: end.endColumn,
         };
     }
+    /**
+     * The duplicated source for a match: the full lines [startLine, endLine] of its
+     * first occurrence, like PMD's `getSourceCodeSlice`. Empty string if the file's
+     * source was not retained (e.g. a baseline-only match). Used by the xml/json/
+     * markdown reporters to embed the code fragment.
+     */
+    codeFragment(match) {
+        const location = this.locationForMark(match.marks[0], match.tokenCount);
+        const source = this.sources.get(location.path);
+        if (source === undefined) {
+            return '';
+        }
+        return source
+            .split('\n')
+            .slice(location.startLine - 1, location.endLine)
+            .join('\n');
+    }
+    /**
+     * Total physical line count across all added sources — the denominator for
+     * the duplication percentage (see `stats.ts`). A trailing newline is not
+     * counted as an extra line.
+     */
+    totalLines() {
+        let total = 0;
+        for (const source of this.sources.values()) {
+            if (source.length === 0) {
+                continue;
+            }
+            const lines = source.split('\n').length;
+            total += source.endsWith('\n') ? lines - 1 : lines;
+        }
+        return total;
+    }
     /** Plain text report for eyeballing / diff tests. */
     report(matches = this.run()) {
         const lines = [];

package/dist/stats.d.ts ADDED Viewed

@@ -0,0 +1,28 @@
+/**
+ * Aggregate duplication statistics over a finished analysis — the `N clones`,
+ * duplicated-line count and overall percentage that jscpd reports. This is a
+ * reporting concern layered on top of {@link Cpd}; the match engine (`core.ts`)
+ * knows nothing about it.
+ *
+ * @packageDocumentation
+ */
+import type { Match } from './core';
+import type { Cpd } from './index';
+export interface DuplicationStats {
+    /** Number of detected duplications (matches). */
+    clones: number;
+    /** Physical lines across all analyzed files (the percentage denominator). */
+    totalLines: number;
+    /** Distinct lines covered by at least one clone (overlaps counted once). */
+    duplicatedLines: number;
+    /** `duplicatedLines / totalLines * 100`, or 0 when nothing was analyzed. */
+    percentage: number;
+}
+/**
+ * Compute duplication stats. Duplicated lines are the union of every match's
+ * occurrence ranges per file, so overlapping clones in the same file are not
+ * double-counted. Line-based (not byte-exact to jscpd, which is not our etalon).
+ */
+export declare function computeStats(matches: Match[], cpd: Cpd): DuplicationStats;
+/** A one-line summary, e.g. `12 clones · 4.23% duplicated lines`. */
+export declare function formatStatsLine(stats: DuplicationStats): string;

package/dist/stats.js ADDED Viewed

@@ -0,0 +1,37 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.computeStats = computeStats;
+exports.formatStatsLine = formatStatsLine;
+/**
+ * Compute duplication stats. Duplicated lines are the union of every match's
+ * occurrence ranges per file, so overlapping clones in the same file are not
+ * double-counted. Line-based (not byte-exact to jscpd, which is not our etalon).
+ */
+function computeStats(matches, cpd) {
+    const coveredByFile = new Map();
+    for (const match of matches) {
+        for (const mark of match.marks) {
+            const location = cpd.locationForMark(mark, match.tokenCount);
+            let covered = coveredByFile.get(location.path);
+            if (covered === undefined) {
+                covered = new Set();
+                coveredByFile.set(location.path, covered);
+            }
+            for (let line = location.startLine; line <= location.endLine; line++) {
+                covered.add(line);
+            }
+        }
+    }
+    let duplicatedLines = 0;
+    for (const covered of coveredByFile.values()) {
+        duplicatedLines += covered.size;
+    }
+    const totalLines = cpd.totalLines();
+    const percentage = totalLines > 0 ? (duplicatedLines / totalLines) * 100 : 0;
+    return { clones: matches.length, totalLines, duplicatedLines, percentage };
+}
+/** A one-line summary, e.g. `12 clones · 4.23% duplicated lines`. */
+function formatStatsLine(stats) {
+    const noun = stats.clones === 1 ? 'clone' : 'clones';
+    return `${stats.clones} ${noun} · ${stats.percentage.toFixed(2)}% duplicated lines`;
+}

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "clone-alert",
-  "version": "0.3.0",
+  "version": "0.4.0",
   "description": "PMD CPD-compatible copy-paste detector that finds duplicate code in TypeScript, JavaScript, JSX/TSX, Vue, Svelte and Angular.",
   "license": "MIT",
   "type": "commonjs",
@@ -19,7 +19,6 @@
   },
   "files": [
     "dist/**/*",
-    "scripts/compare-pmd-cpd.mjs",
     "README.md",
     "LICENSE"
   ],
@@ -61,6 +60,7 @@
     "check": "biome check .",
     "check:fix": "biome check --write .",
     "check-types": "tsc -p tsconfig.json --noEmit && tsc -p tsconfig.test.json --noEmit",
+    "badge": "npm run build && node dist/cli.js src --minimum-tokens 70 --format shields --no-fail-on-violation > clone-alert-badge.json",
     "compare:pmd": "npm run build && node scripts/compare-pmd-cpd.mjs",
     "lint": "npm run check:fix && npm run lint:knip && npm run check-types && npm run lint:cpd",
     "lint:cpd": "npm run build && node dist/cli.js --minimum-tokens 70 --files src --extensions ts --format text --fail-on-violation",
@@ -96,7 +96,7 @@
   "devDependencies": {
     "@angular/compiler": "^22.0.2",
     "@biomejs/biome": "^2.5.0",
-    "@types/node": "^24.0.0",
+    "@types/node": "^24.13.2",
     "@vue/compiler-sfc": "^3.5.38",
     "knip": "^6.5.0",
     "svelte": "^5.56.3",