npm - ucn - Versions diffs - 4.0.1 → 4.0.2 - Mend

ucn 4.0.1 → 4.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/core/analysis.js CHANGED Viewed

@@ -12,7 +12,7 @@ const path = require('path');
 const { execFileSync } = require('child_process');
 const { parse } = require('./parser');
 const { detectLanguage, langTraits } = require('../languages');
-const { NON_CALLABLE_TYPES, addTestExclusions } = require('./shared');
+const { NON_CALLABLE_TYPES, addTestExclusions, countTextBlindspots } = require('./shared');
 const { computeReachability, symbolKey } = require('./entrypoints');
 const { getLanguageModule } = require('../languages');
@@ -600,20 +600,20 @@ function detectCompleteness(index) {
             const content = index._readFile(filePath);
             if (langTraits(fileEntry.language)?.hasDynamicImports) {
-                // Dynamic imports: import(), require(variable), __import__
-                dynamicImports += (content.match(/import\s*\([^'"]/g) || []).length;
-                dynamicImports += (content.match(/require\s*\([^'"]/g) || []).length;
-                dynamicImports += (content.match(/__import__\s*\(/g) || []).length;
-                // eval, Function constructor
-                evalUsage += (content.match(/(^|[^a-zA-Z_])eval\s*\(/gm) || []).length;
-                evalUsage += (content.match(/new\s+Function\s*\(/g) || []).length;
+                // Dynamic imports: use the parser's structural count — the SAME
+                // source `doctor` uses — instead of a text regex. The old
+                // /import\s*\(/ matched Python grouped imports `from x import
+                // (...)`, flashing a false "N dynamic imports" incompleteness
+                // warning on essentially every Python project (field-report #2,
+                // reviewer-confirmed: doctor and about now agree on one count).
+                dynamicImports += fileEntry.dynamicImports || 0;
             }
-            // Reflection: getattr, hasattr, Reflect
-            reflectionUsage += (content.match(/\bgetattr\s*\(/g) || []).length;
-            reflectionUsage += (content.match(/\bhasattr\s*\(/g) || []).length;
-            reflectionUsage += (content.match(/\bReflect\./g) || []).length;
+            // eval/exec and reflection: the SAME shared counter doctor uses, so
+            // the about footer and the trust report never diverge (field-report #2).
+            const bs = countTextBlindspots(content, fileEntry.language);
+            evalUsage += bs.eval;
+            reflectionUsage += bs.reflection;
         } catch (e) {
             // Skip unreadable files
         }

package/core/output/analysis.js CHANGED Viewed

@@ -732,6 +732,21 @@ function formatAbout(about, options = {}) {
             for (const c of testTop) renderAboutCaller(c);
         }
         if (aboutCallerReach.note) lines.push(aboutCallerReach.note);
+        // Field-report #5: when every CONFIRMED caller is a test and the
+        // production call sites are method-style (landed in UNVERIFIED as
+        // method-ambiguous — e.g. a module function sharing a name with a
+        // method), the bare "0 prod" count reads like dead code. Flag it so the
+        // empty prod count isn't misread; the real calls are listed below.
+        if (prodTop.length === 0 && testTop.length > 0) {
+            const uv = about.callers.unverified;
+            const methodStyle = uv && uv.top
+                ? uv.top.some(u => u.reason === 'method-ambiguous' || u.reason === 'possible-dispatch')
+                : false;
+            if (uv && uv.total > 0 && methodStyle) {
+                lines.push(`  Note: 0 production callers CONFIRMED — the ${uv.total} call site(s) under UNVERIFIED below include method-style calls that may bind to this or a same-name method, so this is not dead code.`);
+            }
+        }
     }
     // Callers — UNVERIFIED tier (always visible; the contract forbids hiding)

package/core/output/doctor.js CHANGED Viewed

@@ -13,6 +13,7 @@ function formatDoctor(result) {
     const lines = [];
     lines.push(`UCN Trust Report — ${result.root}`);
     lines.push('═'.repeat(60));
+    if (result.version) lines.push(`Version: ucn ${result.version}`);
     lines.push(`Index: ${result.files.scanned} file${result.files.scanned === 1 ? '' : 's'}, ${result.symbols} symbol${result.symbols === 1 ? '' : 's'}`);
     if (result.filter) lines.push(`Filter: ${result.filter}`);
@@ -60,13 +61,22 @@ function formatDoctor(result) {
         ['Reflection',      bs.reflection],
         ['Parse failures',  bs.parseFailures],
     ];
+    const unitFor = { 'Dynamic imports': 'import', 'Eval/exec calls': 'use', 'Reflection': 'use', 'Parse failures': 'failure' };
     let anyBlindSpot = false;
     for (const [label, info] of bsLines) {
         if (info && info.count > 0) {
             anyBlindSpot = true;
-            const sample = info.files.slice(0, 3).map(f => `    - ${f}`).join('\n');
-            const more = info.files.length > 3 ? `\n    ... and ${info.files.length - 3} more` : '';
-            lines.push(`  ${label}: ${info.count} in ${info.files.length} file${info.files.length === 1 ? '' : 's'}`);
+            // fileCount is the TRUE (uncapped) number of files; info.files is a
+            // capped display sample. Show "N use(s) in M file(s)" and, when the
+            // sample is truncated, "... and K more file(s)" against the true M —
+            // never present the display cap as the population (field-report #2).
+            const fileCount = info.fileCount != null ? info.fileCount : info.files.length;
+            const unit = unitFor[label] || 'use';
+            lines.push(`  ${label}: ${info.count} ${unit}${info.count === 1 ? '' : 's'} in ${fileCount} file${fileCount === 1 ? '' : 's'}`);
+            const shownFiles = info.files.slice(0, 3);
+            const sample = shownFiles.map(f => `    - ${f}`).join('\n');
+            const moreFiles = fileCount - shownFiles.length;
+            const more = moreFiles > 0 ? `\n    ... and ${moreFiles} more file${moreFiles === 1 ? '' : 's'}` : '';
             if (sample) lines.push(sample + more);
         }
     }

package/core/reporting.js CHANGED Viewed

@@ -538,28 +538,23 @@ function doctor(index, options = {}) {
     const fileCounts = { total: 0, scanned: 0 };
     const langs = {};
     let totalSymbols = 0;  // counted post-filter for accuracy when --in is set
+    // Each category tracks: count = total OCCURRENCES (uses), fileCount = TRUE
+    // number of files affected (uncapped), files = a capped sample for display.
+    // Keeping count and fileCount distinct is what lets the formatter say
+    // "481 uses in 121 files" instead of mislabeling a file count as uses or
+    // presenting the 10-file display cap as the population (field-report #2).
+    const BLINDSPOT_FILE_CAP = 10;
     const blindSpots = {
-        dynamicImports: { count: 0, files: [] },
-        evalCalls:      { count: 0, files: [] },
-        reflection:     { count: 0, files: [] },
-        parseFailures:  { count: 0, files: [] },
+        dynamicImports: { count: 0, fileCount: 0, files: [] },
+        evalCalls:      { count: 0, fileCount: 0, files: [] },
+        reflection:     { count: 0, fileCount: 0, files: [] },
+        parseFailures:  { count: 0, fileCount: 0, files: [] },
     };
-    // Reflection signals per language. These run textually over the source — fast,
-    // and acceptable since UCN already records dynamic-import counts at parse time.
-    const REFLECTION_PATTERNS = {
-        python:     /\b(getattr|hasattr|setattr|__import__|importlib\.import_module)\s*\(/,
-        javascript: /\bnew Function\s*\(|\bReflect\.\w+\s*\(/,
-        typescript: /\bnew Function\s*\(|\bReflect\.\w+\s*\(/,
-        go:         /"reflect"|reflect\.\w+\s*\(/,
-        java:       /\.getDeclaredMethod\b|\.getMethod\b|\.getDeclaredField\b|Class\.forName\b/,
-        rust:       /\bAny::downcast/,
-    };
-    const EVAL_PATTERNS = {
-        python:     /\b(eval|exec)\s*\(/,
-        javascript: /\beval\s*\(/,
-        typescript: /\beval\s*\(/,
-    };
+    // Reflection/eval signals come from the shared text-blind-spot counter
+    // (core/shared.js) — the SAME routine detectCompleteness uses for the about
+    // footer, so the two never drift (field-report #2). Occurrence counts.
+    const { hasTextBlindspots, countTextBlindspots } = require('./shared');
     for (const [filePath, fe] of index.files) {
         fileCounts.total++;
@@ -574,29 +569,22 @@ function doctor(index, options = {}) {
         langs[lang].lines += fe.lines || 0;
         totalSymbols += (fe.symbols || []).length;
-        if (fe.dynamicImports && fe.dynamicImports > 0) {
-            blindSpots.dynamicImports.count += fe.dynamicImports;
-            if (blindSpots.dynamicImports.files.length < 10) blindSpots.dynamicImports.files.push(rel);
-        }
-        if (fe.parseError) {
-            blindSpots.parseFailures.count++;
-            if (blindSpots.parseFailures.files.length < 10) blindSpots.parseFailures.files.push(rel);
-        }
+        const recordBlind = (cat, occurrences) => {
+            if (occurrences <= 0) return;
+            cat.count += occurrences;
+            cat.fileCount++;
+            if (cat.files.length < BLINDSPOT_FILE_CAP) cat.files.push(rel);
+        };
+        if (fe.dynamicImports && fe.dynamicImports > 0) recordBlind(blindSpots.dynamicImports, fe.dynamicImports);
+        if (fe.parseError) recordBlind(blindSpots.parseFailures, 1);
-        // Read file once for eval/reflection signals
-        const evalRe = EVAL_PATTERNS[lang];
-        const reflRe = REFLECTION_PATTERNS[lang];
-        if (evalRe || reflRe) {
+        // Read file once for eval/reflection signals (shared counter).
+        if (hasTextBlindspots(lang)) {
             try {
-                const content = fs.readFileSync(filePath, 'utf-8');
-                if (evalRe && evalRe.test(content)) {
-                    blindSpots.evalCalls.count++;
-                    if (blindSpots.evalCalls.files.length < 10) blindSpots.evalCalls.files.push(rel);
-                }
-                if (reflRe && reflRe.test(content)) {
-                    blindSpots.reflection.count++;
-                    if (blindSpots.reflection.files.length < 10) blindSpots.reflection.files.push(rel);
-                }
+                const bs = countTextBlindspots(fs.readFileSync(filePath, 'utf-8'), lang);
+                recordBlind(blindSpots.evalCalls, bs.eval);
+                recordBlind(blindSpots.reflection, bs.reflection);
             } catch (e) { /* ignore read errors */ }
         }
     }
@@ -620,57 +608,87 @@ function doctor(index, options = {}) {
     // Compute trust verdict.
     //
-    // 1. If a deep sample produced no edges (empty project, --in matches nothing),
-    //    don't pretend that's "0% confident" — return UNKNOWN.
-    // 2. Coverage gives the headline %, but blind spots (eval/reflection/dynamic
-    //    imports) downgrade the verdict by one tier each — a project that resolves
-    //    99% of edges but is full of `getattr` is not actually "HIGH" trust.
-    // 3. Parse failures always cap at MEDIUM regardless of coverage.
+    // Field-report #1: the old logic dropped the tier by one PER blind-spot
+    // category present, so any non-trivial Python/TS project (all of which have
+    // some getattr/eval/dynamic import) was forced to LOW even when --deep
+    // measured ~99% of edges at confidence ≥ 0.5 — a self-contradicting verdict
+    // ("99.1% ... LOW") that trains agents to distrust a healthy index. The fix:
+    //   - When --deep coverage exists it drives the tier. Coverage measures the
+    //     CONFIDENCE of edges UCN FOUND, NOT completeness — a reflection-hidden
+    //     edge is absent from the sample, never a low-confidence edge dragging
+    //     the % down — so sparse blind spots are a CAVEAT, while PERVASIVE ones
+    //     (a large share of files) can hide edges the sample can't see and cap
+    //     the verdict at MEDIUM (density, not mere presence; see below).
+    //   - Parse failures are a separate exception: a file UCN couldn't parse is
+    //     not in the sample at all, a genuine uncounted hole → cap at MEDIUM.
+    //   - Without --deep there is no measurement, so blind spots are the only
+    //     signal — but bounded to ONE tier total (not one per category), so a
+    //     handful of getattr doesn't read as untrustworthy.
     let trust = 'UNKNOWN';
     let trustReason = '';
-    const reasons = [];
+    const tier = ['HIGH', 'MEDIUM', 'LOW'];
+    const blindSignals = [];
+    if (blindSpots.parseFailures.count > 0) blindSignals.push(`${blindSpots.parseFailures.count} parse failure(s)`);
+    if (blindSpots.evalCalls.count > 0) blindSignals.push(`${blindSpots.evalCalls.count} eval/exec use(s) in ${blindSpots.evalCalls.fileCount} file(s)`);
+    if (blindSpots.reflection.count > 0) blindSignals.push(`${blindSpots.reflection.count} reflection use(s) in ${blindSpots.reflection.fileCount} file(s)`);
+    if (blindSpots.dynamicImports.count > 0) blindSignals.push(`${blindSpots.dynamicImports.count} dynamic import(s) in ${blindSpots.dynamicImports.fileCount} file(s)`);
     if (coverage && coverage.total > 0) {
         const safe = coverage.high + coverage.medium;
         const safePct = safe / coverage.total;
-        let baseLevel;
-        if (safePct >= 0.85) baseLevel = 'HIGH';
-        else if (safePct >= 0.6) baseLevel = 'MEDIUM';
-        else baseLevel = 'LOW';
-        reasons.push(`${(safePct * 100).toFixed(1)}% of edges have confidence ≥ 0.5`);
-        // Blind-spot downgrades — each kind drops one tier.
-        const tier = ['HIGH', 'MEDIUM', 'LOW'];
-        let idx = tier.indexOf(baseLevel);
-        const blindSignals = [];
-        if (blindSpots.parseFailures.count > 0) { idx = Math.max(idx, 1); blindSignals.push(`${blindSpots.parseFailures.count} parse failure(s)`); }
-        if (blindSpots.evalCalls.count > 0) { idx = Math.min(2, idx + 1); blindSignals.push(`${blindSpots.evalCalls.count} eval call(s)`); }
-        if (blindSpots.reflection.count > 0) { idx = Math.min(2, idx + 1); blindSignals.push(`${blindSpots.reflection.count} reflection use(s)`); }
-        if (blindSpots.dynamicImports.count > 0) { idx = Math.min(2, idx + 1); blindSignals.push(`${blindSpots.dynamicImports.count} dynamic import(s)`); }
+        let idx = safePct >= 0.85 ? 0 : safePct >= 0.6 ? 1 : 2;
+        // Parse failures: unparsed files aren't in the sample at all.
+        if (blindSpots.parseFailures.count > 0) idx = Math.max(idx, 1);
+        // Coverage measures the CONFIDENCE of edges UCN found, NOT completeness:
+        // a call hidden behind reflection/dynamic dispatch is simply absent from
+        // findCallers' result, never a low-confidence edge that drags the % down.
+        // So when blind spots are PERVASIVE — affecting a large share of files —
+        // they can hide a real fraction of the call graph that the sample can't
+        // see, and the verdict is capped at MEDIUM. Density, not mere presence:
+        // a handful of getattr stays a caveat (the old code dropped a tier per
+        // category, forcing every project to LOW); reflection across half the
+        // files does cap. Gated on a file-count floor — file share is meaningless
+        // for a handful of files, so small projects ride on coverage alone.
+        const scanned = fileCounts.scanned || 1;
+        const share = (fc) => fc / scanned;
+        const pervasiveBlindSpot = scanned >= 10 && (
+            share(blindSpots.reflection.fileCount) >= 0.5 ||
+            share(blindSpots.dynamicImports.fileCount) >= 0.4 ||
+            share(blindSpots.evalCalls.fileCount) >= 0.15
+        );
+        const baseIdx = idx;
+        if (pervasiveBlindSpot) idx = Math.max(idx, 1);
+        const capped = idx > baseIdx;
         trust = tier[idx];
-        if (blindSignals.length) reasons.push(`blind spots: ${blindSignals.join(', ')}`);
+        const reasons = [`${(safePct * 100).toFixed(1)}% of found edges have confidence ≥ 0.5`];
+        if (blindSignals.length) {
+            reasons.push(capped
+                ? `capped at MEDIUM — pervasive blind spots may hide edges the sample can't see: ${blindSignals.join(', ')}`
+                : `blind spots (caveat — coverage measures found edges, not completeness): ${blindSignals.join(', ')}`);
+        }
         trustReason = reasons.join('; ');
     } else if (coverage) {
         // Sampled but zero edges — can't say anything about confidence.
         trust = 'UNKNOWN';
         trustReason = 'no edges sampled (empty scope or filter matched nothing)';
     } else if (fileCounts.scanned > 0) {
-        // Cheap path (no --deep): use blind-spot signals.
-        const tier = ['HIGH', 'MEDIUM', 'LOW'];
+        // Cheap path (no --deep): no measurement, so blind spots are the only
+        // signal — bounded to one tier total. Run --deep for a measured verdict.
         let idx = 0;
-        const blindSignals = [];
-        if (blindSpots.parseFailures.count > 0) { idx = Math.max(idx, 1); blindSignals.push(`${blindSpots.parseFailures.count} parse failure(s)`); }
-        if (blindSpots.evalCalls.count > 0) { idx = Math.min(2, idx + 1); blindSignals.push(`${blindSpots.evalCalls.count} eval call(s)`); }
-        if (blindSpots.reflection.count > 0) { idx = Math.min(2, idx + 1); blindSignals.push(`${blindSpots.reflection.count} reflection use(s)`); }
-        if (blindSpots.dynamicImports.count > 0) { idx = Math.min(2, idx + 1); blindSignals.push(`${blindSpots.dynamicImports.count} dynamic import(s)`); }
+        if (blindSpots.parseFailures.count > 0) idx = Math.max(idx, 1);
+        if (blindSpots.evalCalls.count + blindSpots.reflection.count + blindSpots.dynamicImports.count > 0) {
+            idx = Math.min(2, idx + 1);
+        }
         trust = tier[idx];
         trustReason = blindSignals.length
-            ? `coverage not deep-checked; blind spots: ${blindSignals.join(', ')}`
-            : 'no parse failures; coverage not deep-checked';
+            ? `coverage not deep-checked (run --deep); blind spots: ${blindSignals.join(', ')}`
+            : 'no parse failures; coverage not deep-checked (run --deep)';
     }
     return {
         root: index.root,
+        version: require('../package.json').version,  // running ucn version — surfaces MCP/CLI drift (field-report #3)
         files: fileCounts,
         symbols: totalSymbols,
         languages: langs,

package/core/shared.js CHANGED Viewed

@@ -159,6 +159,49 @@ function isOverrideMarked(def) {
     return false;
 }
+// Per-language text patterns for the "blind spots" UCN's AST can't follow:
+// eval/exec-style code execution and reflection (dynamic attribute access /
+// dynamic dispatch). ONE source of truth so doctor's trust scan and
+// detectCompleteness's about-footer warning count identically (field-report #2:
+// they used to diverge — doctor 497 reflection vs footer 194, eval 3 vs 2 —
+// because each kept its own regex set). Dynamic imports are NOT here: those are
+// structural (fileEntry.dynamicImports), the AST-accurate count both paths share.
+// `new Function(...)` is categorized as eval (code execution), not reflection.
+const BLINDSPOT_TEXT_PATTERNS = {
+    reflection: {
+        python:     /\b(getattr|hasattr|setattr|__import__|importlib\.import_module)\s*\(/g,
+        javascript: /\bReflect\.\w+\s*\(/g,
+        typescript: /\bReflect\.\w+\s*\(/g,
+        go:         /\breflect\.\w+\s*\(/g,
+        java:       /\.getDeclaredMethod\b|\.getMethod\b|\.getDeclaredField\b|Class\.forName\b/g,
+        rust:       /\bAny::downcast/g,
+    },
+    eval: {
+        python:     /\b(eval|exec)\s*\(/g,
+        javascript: /\beval\s*\(|\bnew\s+Function\s*\(/g,
+        typescript: /\beval\s*\(|\bnew\s+Function\s*\(/g,
+    },
+};
+/** True when a language has any text-blind-spot pattern (so callers can skip the file read otherwise). */
+function hasTextBlindspots(language) {
+    return !!(BLINDSPOT_TEXT_PATTERNS.reflection[language] || BLINDSPOT_TEXT_PATTERNS.eval[language]);
+}
+/**
+ * Count text-detected blind spots (eval/exec, reflection) in one file's source.
+ * Returns { eval, reflection } OCCURRENCE counts (global match). Shared by doctor
+ * and detectCompleteness so both report the same numbers (field-report #2).
+ */
+function countTextBlindspots(content, language) {
+    const reRe = BLINDSPOT_TEXT_PATTERNS.reflection[language];
+    const evRe = BLINDSPOT_TEXT_PATTERNS.eval[language];
+    return {
+        eval: evRe ? (content.match(evRe) || []).length : 0,
+        reflection: reRe ? (content.match(reRe) || []).length : 0,
+    };
+}
 module.exports = {
     pickBestDefinition,
     addTestExclusions,
@@ -169,4 +212,6 @@ module.exports = {
     looksLikeHandle,
     isTestPath,
     isOverrideMarked,
+    hasTextBlindspots,
+    countTextBlindspots,
 };

package/mcp/server.js CHANGED Viewed

@@ -791,7 +791,9 @@ server.registerTool(
 async function main() {
     const transport = new StdioServerTransport();
     await server.connect(transport);
-    console.error('UCN MCP server running on stdio');
+    // Print the running version so MCP-vs-CLI drift is visible (field-report #3:
+    // a stale `npx -y ucn` cache can silently run an older engine than the CLI).
+    console.error(`UCN MCP server v${require('../package.json').version} running on stdio`);
 }
 main().catch(e => {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "ucn",
-  "version": "4.0.1",
+  "version": "4.0.2",
   "mcpName": "io.github.mleoca/ucn",
   "description": "Code intelligence toolkit for AI agents — extract functions, trace call chains, find callers, detect dead code without reading entire files. Works as MCP server, CLI, or agent skill. Supports JS/TS, Python, Go, Rust, Java.",
   "main": "index.js",