npm - @vibecodeqa/cli - Versions diffs - 0.38.1 → 0.39.0 - Mend

@vibecodeqa/cli 0.38.1 → 0.39.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/runners/architecture.d.ts +1 -1
package/dist/runners/architecture.js +74 -2
package/dist/runners/duplication.d.ts +23 -2
package/dist/runners/duplication.js +237 -100
package/package.json +3 -1

package/dist/runners/architecture.d.ts CHANGED Viewed

@@ -23,5 +23,5 @@ export interface ArchGraph {
     godModules: string[];
     orphans: string[];
 }
-export declare function runArchitecture(cwd: string, workspace?: WorkspaceInfo): CheckResult;
+export declare function runArchitecture(cwd: string, workspace?: WorkspaceInfo): Promise<CheckResult>;
 export { generateArchSVG, generateDSM, generateLayerDiagram, generatePackageDiagram, generateSequenceDiagram } from "./diagrams.js";

package/dist/runners/architecture.js CHANGED Viewed

@@ -9,11 +9,13 @@
  *   6. Layer violations (optional: detect cross-layer imports)
  *   7. SVG architecture diagram
  */
+import { existsSync } from "node:fs";
 import { basename, dirname, extname } from "node:path";
+import { cruise } from "dependency-cruiser";
 import { getProductionFiles } from "../fs-utils.js";
 import { gradeFromScore } from "../types.js";
 import { generateContainerDiagram } from "./diagrams.js";
-export function runArchitecture(cwd, workspace) {
+export async function runArchitecture(cwd, workspace) {
     const start = Date.now();
     const issues = [];
     const files = getProductionFiles(cwd);
@@ -27,7 +29,12 @@ export function runArchitecture(cwd, workspace) {
             duration: Date.now() - start,
         };
     }
-    const graph = buildGraph(files);
+    // Prefer dependency-cruiser (real module resolution incl. tsconfig paths) and
+    // fall back to the built-in resolver when it can't cover the project (e.g. SFC-
+    // heavy or monorepos where cross-package imports use bare specifiers).
+    const cruised = await buildGraphViaCruise(cwd, files);
+    const graph = cruised ?? buildGraph(files);
+    const tool = cruised ? "dependency-cruiser" : "built-in";
     // ── Circular dependencies ──
     const cycles = findCycles(graph.nodes);
     for (const cycle of cycles.slice(0, 5)) {
@@ -142,12 +149,77 @@ export function runArchitecture(cwd, workspace) {
             graph: graphData,
             containerSvg: generateContainerDiagram(cwd),
             assessment,
+            tool,
         },
         issues,
         duration: Date.now() - start,
     };
 }
 // ── Graph building ──
+/** Build the import graph with dependency-cruiser (real module resolution,
+ *  tsconfig path aliases, transitive TS deps). Returns null — so the caller
+ *  falls back to the built-in resolver — if it errors or covers too little of
+ *  the project (e.g. .vue/.svelte SFCs it can't resolve, or monorepo bare imports). */
+async function buildGraphViaCruise(cwd, files) {
+    // .vue/.svelte single-file components need our SFC-aware resolver; dependency-cruiser
+    // doesn't resolve them without a bundler plugin, so hand those projects to the built-in.
+    if (files.some((f) => f.ext === ".vue" || f.ext === ".svelte"))
+        return null;
+    const fileSet = new Set(files.map((f) => f.path));
+    const roots = [...new Set(files.map((f) => f.path.split("/")[0]).filter(Boolean))];
+    if (roots.length === 0)
+        return null;
+    const prevCwd = process.cwd();
+    try {
+        process.chdir(cwd); // dependency-cruiser resolves relative to process.cwd()
+        const options = {
+            doNotFollow: { path: "node_modules" },
+            exclude: { path: "(\\.test\\.|\\.spec\\.|/node_modules/|/dist/|/\\.vibe-check/)" },
+            tsPreCompilationDeps: true,
+        };
+        if (existsSync("tsconfig.json"))
+            options.tsConfig = { fileName: "tsconfig.json" };
+        const result = await cruise(roots, options);
+        const output = result.output;
+        if (typeof output === "string")
+            return null;
+        const modules = output.modules ?? [];
+        const nodes = new Map();
+        for (const m of modules) {
+            if (fileSet.has(m.source)) {
+                nodes.set(m.source, { path: m.source, imports: [], importedBy: [], dir: dirname(m.source), exports: 0 });
+            }
+        }
+        for (const m of modules) {
+            const node = nodes.get(m.source);
+            if (!node)
+                continue;
+            const seen = new Set();
+            for (const d of m.dependencies ?? []) {
+                if (d.coreModule || !d.resolved || d.resolved === m.source)
+                    continue;
+                if (!fileSet.has(d.resolved) || seen.has(d.resolved))
+                    continue;
+                seen.add(d.resolved);
+                node.imports.push(d.resolved);
+            }
+        }
+        for (const [path, node] of nodes) {
+            for (const imp of node.imports)
+                nodes.get(imp)?.importedBy.push(path);
+        }
+        // Too little coverage → the built-in resolver is a better bet for this project.
+        if (nodes.size < files.length * 0.5)
+            return null;
+        return { nodes };
+    }
+    catch {
+        return null;
+    }
+    finally {
+        process.chdir(prevCwd);
+    }
+}
 function buildGraph(files) {
     const filePaths = new Set(files.map((f) => f.path));
     const nodes = new Map();

package/dist/runners/duplication.d.ts CHANGED Viewed

@@ -1,3 +1,24 @@
-/** Code duplication — delegates to jscpd when available, falls back to built-in line-hash. */
+/** Code duplication.
+ *
+ *  - If the project depends on jscpd, shell out to it (opt-in, richest output).
+ *  - Otherwise run jscpd's detection engine (@jscpd/core — battle-tested Rabin-Karp
+ *    matching + maximal-clone extension) over our own lightweight tokenizer. We
+ *    supply the tokens (zero heavy language-grammar dependency) and let the mature
+ *    engine do the matching/merging/statistics.
+ *
+ *  Catches Type-1/2 clones (exact, modulo formatting), like jscpd's default. */
 import type { CheckResult } from "../types.js";
-export declare function runDuplication(cwd: string): CheckResult;
+export declare function runDuplication(cwd: string): Promise<CheckResult>;
+interface Token {
+    text: string;
+    line: number;
+    endLine: number;
+    start: number;
+    end: number;
+}
+/** Lex source into tokens, skipping whitespace and comments. */
+export declare function tokenize(content: string): Token[];
+/** Blank out import / re-export statements (preserving line numbers) before
+ *  tokenizing. Identical import headers aren't refactorable duplication. */
+export declare function stripImports(content: string): string;
+export {};

package/dist/runners/duplication.js CHANGED Viewed

@@ -1,12 +1,27 @@
-/** Code duplication — delegates to jscpd when available, falls back to built-in line-hash. */
+/** Code duplication.
+ *
+ *  - If the project depends on jscpd, shell out to it (opt-in, richest output).
+ *  - Otherwise run jscpd's detection engine (@jscpd/core — battle-tested Rabin-Karp
+ *    matching + maximal-clone extension) over our own lightweight tokenizer. We
+ *    supply the tokens (zero heavy language-grammar dependency) and let the mature
+ *    engine do the matching/merging/statistics.
+ *
+ *  Catches Type-1/2 clones (exact, modulo formatting), like jscpd's default. */
+import { createHash } from "node:crypto";
 import { readFileSync, rmdirSync, unlinkSync } from "node:fs";
 import { join } from "node:path";
+import { Detector, getDefaultOptions, MemoryStore, Statistic, } from "@jscpd/core";
 import { getProductionFiles, readDeps } from "../fs-utils.js";
 import { gradeFromScore } from "../types.js";
 import { run } from "./exec.js";
-const MIN_LINES = 6; // minimum duplicate block size
-const MIN_TOKENS = 50; // minimum token count for a duplicate
-export function runDuplication(cwd) {
+// jscpd-parity thresholds: ≥ MIN_TOKENS consecutive matching tokens AND ≥ MIN_LINES lines.
+const MIN_TOKENS = 50;
+const MIN_LINES = 6;
+const MAX_ISSUES = 20;
+// One synthetic "format" for every file so clones are detected across all sources
+// (jscpd only compares within a format; we want cross-file/cross-extension matches).
+const FORMAT = "src";
+export async function runDuplication(cwd) {
     const start = Date.now();
     // Try jscpd if it's an explicit project dependency (opt-in, not auto-npx)
     const deps = readDeps(cwd);
@@ -15,108 +30,244 @@ export function runDuplication(cwd) {
         jscpdResult.duration = Date.now() - start;
         return jscpdResult;
     }
-    // Fallback: built-in line-hash detection
-    const issues = [];
-    const sourceFiles = getProductionFiles(cwd);
-    if (sourceFiles.length < 2) {
+    const files = getProductionFiles(cwd);
+    if (files.length < 2) {
         return {
             name: "duplication",
             score: 100,
             grade: "A",
-            details: { filesScanned: sourceFiles.length, duplicates: 0, tool: "built-in" },
+            details: { filesScanned: files.length, duplicates: 0, tool: "built-in" },
             issues: [],
             duration: Date.now() - start,
         };
     }
-    // Simple line-based duplicate detection
-    // Build a map of normalized line hashes → locations
-    const lineMap = new Map();
-    let totalSourceLines = 0;
-    for (const sf of sourceFiles) {
-        const lines = sf.content.split("\n");
-        totalSourceLines += lines.length;
-        for (let i = 0; i <= lines.length - MIN_LINES; i++) {
-            const block = lines
-                .slice(i, i + MIN_LINES)
-                .map((l) => l.trim())
-                .filter((l) => l.length > 0 &&
-                !l.startsWith("//") &&
-                !l.startsWith("*") &&
-                !l.startsWith("import ") &&
-                !l.startsWith("export {") &&
-                l !== "{" &&
-                l !== "}" &&
-                l !== "");
-            if (block.length < MIN_LINES - 2)
-                continue; // too many empty/trivial lines
-            const key = block.join("\n");
-            if (key.length < MIN_TOKENS)
-                continue;
-            const locs = lineMap.get(key) || [];
-            locs.push({ file: sf.path, line: i + 1 });
-            lineMap.set(key, locs);
-        }
-    }
-    // Find blocks that appear in 2+ locations
-    const duplicates = [];
-    const seen = new Set();
-    for (const [key, locs] of lineMap) {
-        if (locs.length < 2)
-            continue;
-        // Deduplicate: same file, adjacent lines are the same block
-        const unique = locs.filter((l, i) => i === 0 || l.file !== locs[i - 1].file || l.line > locs[i - 1].line + MIN_LINES);
-        if (unique.length < 2)
-            continue;
-        // Only report each pair once
-        for (let i = 0; i < unique.length - 1; i++) {
-            const a = unique[i];
-            const b = unique[i + 1];
-            const pairKey = `${a.file}:${a.line}-${b.file}:${b.line}`;
-            if (seen.has(pairKey))
-                continue;
-            seen.add(pairKey);
-            duplicates.push({ fileA: a.file, lineA: a.line, fileB: b.file, lineB: b.line, lines: MIN_LINES, content: key });
-        }
-    }
-    for (const d of duplicates.slice(0, 20)) {
-        // Show first 3 lines of the duplicated content, truncate at word boundary
-        const lines = d.content.split("\n").slice(0, 3);
-        const preview = lines.join(" \u2502 "); // use │ separator
-        const maxLen = 120;
-        const truncated = preview.length > maxLen ? `${preview.slice(0, preview.lastIndexOf(" ", maxLen) || maxLen)}...` : preview;
-        // First line of content is the best search string
-        const searchSnippet = d.content.split("\n")[0];
-        issues.push({
-            severity: "warning",
-            message: `Duplicate (${d.lines} lines): ${truncated}`,
-            file: `${d.fileA}:${d.lineA} ↔ ${d.fileB}:${d.lineB}`,
-            rule: "duplicate-code",
-            snippet: searchSnippet,
-        });
-    }
-    const dupPct = totalSourceLines > 0 ? Math.round((duplicates.length * MIN_LINES * 100) / totalSourceLines) : 0;
-    // Score based on duplication percentage (not absolute count)
-    const score = Math.max(0, Math.min(100, 100 - dupPct * 5));
+    const { clones, percentage, totalLines } = await detectWithCore(files);
+    const score = scoreFromPct(percentage);
+    // Rank biggest clones first; stable tie-break for deterministic output.
+    clones.sort((a, b) => b.lines - a.lines || a.fileA.localeCompare(b.fileA) || a.lineA - b.lineA);
+    const issues = clones.slice(0, MAX_ISSUES).map((c) => ({
+        severity: "warning",
+        message: `Duplicate (${c.lines} lines): ${c.snippet.slice(0, 100)}`,
+        file: `${c.fileA}:${c.lineA} ↔ ${c.fileB}:${c.lineB}`,
+        rule: "duplicate-code",
+        snippet: c.snippet,
+    }));
     return {
         name: "duplication",
         score,
         grade: gradeFromScore(score),
         details: {
-            filesScanned: sourceFiles.length,
-            totalSourceLines,
-            duplicateBlocks: duplicates.length,
-            duplicationPct: `${dupPct}%`,
+            filesScanned: files.length,
+            totalSourceLines: totalLines,
+            duplicateBlocks: clones.length,
+            duplicationPct: `${percentage}%`,
             tool: "built-in",
         },
         issues,
         duration: Date.now() - start,
     };
 }
+/** Run @jscpd/core over our tokens. Files are fed sequentially into one Detector
+ *  sharing a store, so later files match earlier ones (cross-file clones). */
+async function detectWithCore(files) {
+    const options = { ...getDefaultOptions(), minTokens: MIN_TOKENS, minLines: MIN_LINES, maxLines: 100_000 };
+    const detector = new Detector(new VcqaTokenizer(), new MemoryStore(), undefined, options);
+    const statistic = new Statistic();
+    const handlers = statistic.subscribe();
+    for (const event of Object.keys(handlers)) {
+        const handler = handlers[event];
+        if (handler)
+            detector.on(event, handler);
+    }
+    const clones = [];
+    for (const f of files) {
+        const found = await detector.detect(f.path, f.content, FORMAT);
+        for (const c of found) {
+            const a = c.duplicationA; // the current file
+            const b = c.duplicationB; // the earlier match
+            const lines = Math.max(1, a.end.line - a.start.line + 1);
+            const snippet = (f.content.split("\n")[a.start.line - 1] ?? "").trim();
+            clones.push({ fileA: a.sourceId, lineA: a.start.line, fileB: b.sourceId, lineB: b.start.line, lines, snippet });
+        }
+    }
+    const stat = statistic.getStatistic();
+    const percentage = Math.round((stat.total.percentage ?? 0) * 10) / 10;
+    return { clones, percentage, totalLines: stat.total.lines ?? 0 };
+}
+const isWord = (ch) => (ch >= "a" && ch <= "z") || (ch >= "A" && ch <= "Z") || (ch >= "0" && ch <= "9") || ch === "_" || ch === "$";
+/** Consume a string/template literal, returning its full text. One token, so the
+ *  detector never matches *inside* a string; text retained so distinct strings differ. */
+function scanString(content, cur) {
+    const quote = content[cur.i];
+    const start = cur.i;
+    const n = content.length;
+    cur.i++;
+    while (cur.i < n && content[cur.i] !== quote) {
+        if (content[cur.i] === "\\") {
+            if (content[cur.i + 1] === "\n")
+                cur.line++;
+            cur.i += 2;
+            continue;
+        }
+        if (content[cur.i] === "\n")
+            cur.line++;
+        cur.i++;
+    }
+    const text = content.slice(start, Math.min(cur.i + 1, n));
+    cur.i++; // skip closing quote
+    return text;
+}
+/** Consume a block comment, advancing past the closing star-slash. */
+function scanBlockComment(content, cur) {
+    const n = content.length;
+    cur.i += 2;
+    while (cur.i < n && !(content[cur.i] === "*" && content[cur.i + 1] === "/")) {
+        if (content[cur.i] === "\n")
+            cur.line++;
+        cur.i++;
+    }
+    cur.i += 2;
+}
+/** Lex source into tokens, skipping whitespace and comments. */
+export function tokenize(content) {
+    const tokens = [];
+    const n = content.length;
+    const cur = { i: 0, line: 1 };
+    while (cur.i < n) {
+        const c = content[cur.i];
+        if (c === "\n") {
+            cur.line++;
+            cur.i++;
+            continue;
+        }
+        if (c === " " || c === "\t" || c === "\r") {
+            cur.i++;
+            continue;
+        }
+        if (c === "/" && content[cur.i + 1] === "/") {
+            while (cur.i < n && content[cur.i] !== "\n")
+                cur.i++;
+            continue;
+        }
+        if (c === "/" && content[cur.i + 1] === "*") {
+            scanBlockComment(content, cur);
+            continue;
+        }
+        const startLine = cur.line;
+        const startPos = cur.i;
+        if (c === '"' || c === "'" || c === "`") {
+            const text = scanString(content, cur);
+            tokens.push({ text, line: startLine, endLine: cur.line, start: startPos, end: cur.i });
+            continue;
+        }
+        if (isWord(c)) {
+            let j = cur.i + 1;
+            while (j < n && isWord(content[j]))
+                j++;
+            tokens.push({ text: content.slice(cur.i, j), line: startLine, endLine: startLine, start: cur.i, end: j });
+            cur.i = j;
+            continue;
+        }
+        tokens.push({ text: c, line: startLine, endLine: startLine, start: startPos, end: cur.i + 1 }); // punctuation
+        cur.i++;
+    }
+    return tokens;
+}
+/** Blank out import / re-export statements (preserving line numbers) before
+ *  tokenizing. Identical import headers aren't refactorable duplication. */
+export function stripImports(content) {
+    const lines = content.split("\n");
+    let cont = false; // inside a multi-line import
+    for (let i = 0; i < lines.length; i++) {
+        const t = lines[i].trim();
+        if (cont) {
+            lines[i] = "";
+            if (/from\s*['"][^'"]*['"]\s*;?\s*$/.test(t) || /;\s*$/.test(t))
+                cont = false;
+            continue;
+        }
+        if (/^import\b/.test(t)) {
+            const done = /from\s*['"][^'"]*['"]\s*;?\s*$/.test(t) || /^import\s*['"][^'"]*['"]\s*;?\s*$/.test(t);
+            lines[i] = "";
+            if (!done)
+                cont = true;
+            continue;
+        }
+        if (/^export\s*(\*|\{[^}]*\})\s*from\s*['"]/.test(t))
+            lines[i] = "";
+    }
+    return lines.join("\n");
+}
+function toIToken(t, format) {
+    return {
+        type: "code",
+        value: t.text,
+        length: t.text.length,
+        format,
+        range: [t.start, t.end],
+        loc: { start: { line: t.line, column: 0, position: t.start }, end: { line: t.endLine, column: 0, position: t.end } },
+    };
+}
+/** Bridges our tokens into the @jscpd/core engine: hashes rolling MIN_TOKENS-token
+ *  windows into map frames the Rabin-Karp detector matches against. */
+class VcqaTokenizer {
+    generateMaps(id, data, format, options) {
+        const w = options.minTokens ?? MIN_TOKENS;
+        const toks = tokenize(stripImports(data)).map((t) => toIToken(t, format));
+        const frames = [];
+        for (let i = 0; i + w <= toks.length; i++) {
+            const id2 = createHash("md5").update(toks.slice(i, i + w).map((t) => t.value).join("")).digest("hex");
+            frames.push({ id: id2, sourceId: id, start: toks[i], end: toks[i + w - 1] });
+        }
+        return [new VcqaTokensMap(id, format, data, toks.length, frames)];
+    }
+}
+class VcqaTokensMap {
+    id;
+    format;
+    data;
+    tokensCount;
+    frames;
+    p = 0;
+    constructor(id, format, data, tokensCount, frames) {
+        this.id = id;
+        this.format = format;
+        this.data = data;
+        this.tokensCount = tokensCount;
+        this.frames = frames;
+    }
+    getFormat() { return this.format; }
+    getId() { return this.id; }
+    getLinesCount() { return this.data.split("\n").length; }
+    getTokensCount() { return this.tokensCount; }
+    next() {
+        if (this.p < this.frames.length)
+            return { value: this.frames[this.p++], done: false };
+        return { value: false, done: true };
+    }
+}
+/** Shared scoring band (used by both built-in and jscpd paths).
+ *  Industry benchmarks: <5% great, 5-10% good, 10-20% acceptable, >20% needs work. */
+function scoreFromPct(dupPct) {
+    if (dupPct <= 5)
+        return 100;
+    if (dupPct <= 10)
+        return 90;
+    if (dupPct <= 20)
+        return 75;
+    if (dupPct <= 30)
+        return 60;
+    if (dupPct <= 40)
+        return 45;
+    if (dupPct <= 50)
+        return 30;
+    return Math.max(10, Math.round(30 - (dupPct - 50)));
+}
 function tryJscpd(cwd) {
     // jscpd writes JSON to a file, not stdout. Use a temp output dir.
     const tmpDir = join(cwd, ".vibe-check", "jscpd-tmp");
     const ignores = "node_modules/**,dist/**,build/**,.vibe-check/**,coverage/**,.next/**,.nuxt/**,**/*.json,**/*.lock,**/*.yaml,**/*.md";
-    run(`npx jscpd . --min-lines 6 --min-tokens 50 --reporters json --output "${tmpDir}" --ignore "${ignores}" --silent 2>/dev/null || true`, cwd, 30_000);
+    run(`npx jscpd . --min-lines ${MIN_LINES} --min-tokens ${MIN_TOKENS} --reporters json --output "${tmpDir}" --ignore "${ignores}" --silent 2>/dev/null || true`, cwd, 30_000);
     const reportPath = join(tmpDir, "jscpd-report.json");
     let rawData;
     try {
@@ -139,33 +290,19 @@ function tryJscpd(cwd) {
             return null;
         const issues = [];
         const clones = data.duplicates || [];
-        for (const d of clones.slice(0, 20)) {
+        for (const d of clones.slice(0, MAX_ISSUES)) {
             const fileA = d.firstFile?.name || "?";
             const fileB = d.secondFile?.name || "?";
             const lines = d.lines || 0;
             issues.push({
                 severity: "warning",
                 message: `Duplicate (${lines} lines)`,
-                file: `${fileA}:${d.firstFile?.start} \u2194 ${fileB}:${d.secondFile?.start}`,
+                file: `${fileA}:${d.firstFile?.start} ↔ ${fileB}:${d.secondFile?.start}`,
                 rule: "duplicate-code",
             });
         }
         const dupPct = Math.round((data.statistics.total?.percentage || 0) * 100) / 100;
-        // jscpd token-level detection is more aggressive than line-hash
-        // Industry benchmarks: <10% good, 10-20% acceptable, 20-40% needs work, >40% poor
-        const score = dupPct <= 5
-            ? 100
-            : dupPct <= 10
-                ? 90
-                : dupPct <= 20
-                    ? 75
-                    : dupPct <= 30
-                        ? 60
-                        : dupPct <= 40
-                            ? 45
-                            : dupPct <= 50
-                                ? 30
-                                : Math.max(10, Math.round(30 - (dupPct - 50)));
+        const score = scoreFromPct(dupPct);
         return {
             name: "duplication",
             score,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
 	"name": "@vibecodeqa/cli",
-	"version": "0.38.1",
+	"version": "0.39.0",
 	"description": "Code health scanner for the AI coding era. 25 checks, zero config, full report.",
 	"type": "module",
 	"bin": {
@@ -53,6 +53,8 @@
 		"vitest": "^4.1.6"
 	},
 	"dependencies": {
+		"@jscpd/core": "^4.2.4",
+		"dependency-cruiser": "^17.4.3",
 		"ink": "^5.2.1",
 		"react": "^18.3.1"
 	}