npm - ucn - Versions diffs - 3.8.23 → 3.8.26 - Mend

ucn 3.8.23 → 3.8.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

package/.claude/skills/ucn/SKILL.md +127 -12
package/README.md +152 -156
package/cli/index.js +363 -37
package/core/analysis.js +936 -32
package/core/bridge.js +1095 -0
package/core/brief.js +408 -0
package/core/cache.js +105 -5
package/core/callers.js +72 -18
package/core/check.js +200 -0
package/core/discovery.js +57 -34
package/core/entrypoints.js +638 -4
package/core/execute.js +304 -5
package/core/git-enrich.js +130 -0
package/core/graph.js +24 -2
package/core/output/analysis.js +157 -25
package/core/output/brief.js +100 -0
package/core/output/check.js +79 -0
package/core/output/doctor.js +85 -0
package/core/output/endpoints.js +239 -0
package/core/output/extraction.js +2 -0
package/core/output/find.js +126 -39
package/core/output/graph.js +48 -15
package/core/output/refactoring.js +103 -5
package/core/output/reporting.js +63 -23
package/core/output/search.js +110 -17
package/core/output/shared.js +56 -2
package/core/output.js +4 -0
package/core/parser.js +8 -2
package/core/project.js +39 -3
package/core/registry.js +30 -14
package/core/reporting.js +465 -2
package/core/search.js +130 -52
package/core/shared.js +101 -5
package/core/tracing.js +16 -6
package/core/verify.js +982 -95
package/languages/go.js +91 -6
package/languages/html.js +10 -0
package/languages/java.js +151 -35
package/languages/javascript.js +290 -33
package/languages/python.js +78 -11
package/languages/rust.js +267 -12
package/languages/utils.js +315 -3
package/mcp/server.js +91 -16
package/package.json +9 -1

package/core/reporting.js CHANGED Viewed

@@ -15,7 +15,7 @@ const { isTestFile } = require('./discovery');
  * Get project statistics: file counts, symbol counts, LOC, language breakdown.
  *
  * @param {object} index - ProjectIndex instance
- * @param {object} options - { functions }
+ * @param {object} options - { functions, hot, top }
  * @returns {object}
  */
 function getStats(index, options = {}) {
@@ -85,6 +85,264 @@ function getStats(index, options = {}) {
         stats.functions = functions;
     }
+    // Hot list: top N functions by inbound call-site count.
+    // "callCount" = number of distinct call-site lines that resolve to this name
+    // across the project. Multiple definitions of the same name are listed
+    // separately (per file:line) since callers may differ. The count is
+    // name-keyed (not per-definition) — same trade-off as `usages` and matches
+    // the rest of the codebase's call-graph approximation.
+    if (options.hot) {
+        // MEDIUM-7: caller (execute.js) validates and passes either a
+        // positive integer, 0 (show nothing), or undefined (default 10).
+        const top = options.top === 0
+            ? 0
+            : ((options.top != null && Number(options.top) > 0) ? Number(options.top) : 10);
+        const FUNCTION_TYPES = new Set([
+            'function', 'method', 'static', 'constructor',
+            'public', 'abstract', 'classmethod'
+        ]);
+        // Ensure the calls cache is fully populated before counting.
+        // First-time stats --hot may need to parse files to extract calls;
+        // subsequent runs use the persisted calls cache.
+        if (typeof index.buildCalleeIndex === 'function' && !index.calleeIndex) {
+            index.buildCalleeIndex();
+        }
+        // BUG-H2: aggregate calls by *resolution kind* so a method call like
+        // `dict.get()` doesn't get attributed to a standalone `function get()`.
+        //
+        // Buckets per name:
+        //   bareNameCounts[name]     — calls with !isMethod (e.g. `get()`)
+        //   methodByReceiverType[t][name] — calls with isMethod and inferred receiverType
+        //   methodByName[name]       — all isMethod calls (fallback denominator)
+        //   importedReceiverCounts[name] — method calls whose receiver is an imported
+        //                                  module alias in the calling file (e.g.
+        //                                  `mod.foo()` where `mod` is a require alias).
+        //                                  These resolve like top-level function calls.
+        //
+        // self/this/cls/super counted under bareNameCounts since they always resolve
+        // to the enclosing class's method (handled in attribution below).
+        // We dedupe per file by (name, line) so multi-record call sites count once.
+        const SELF_RECEIVERS = new Set(['self', 'this', 'cls', 'super']);
+        const bareNameCounts = new Map();           // name -> count
+        const methodByReceiverType = new Map();      // receiverType -> Map(name -> count)
+        const methodByName = new Map();              // name -> count of all method calls
+        const selfMethodByName = new Map();          // name -> count of self/this.name() calls
+        const importedReceiverCounts = new Map();    // name -> count of `mod.name()` calls
+                                                     //          where mod is an import alias
+        // Pre-compute import-alias sets per file. Used to distinguish `mod.foo()`
+        // (resolves to top-level foo) from `obj.foo()` on a local variable.
+        const fileImportAliases = new Map();         // filePath -> Set<string> of alias names
+        for (const [filePath, fileEntry] of index.files) {
+            const aliases = new Set();
+            // importNames are the named imports/exports brought into this file.
+            // importAliases (when present) carry namespace import aliases (e.g.
+            // `import * as mod from "..."` → 'mod').
+            for (const n of (fileEntry.importNames || [])) aliases.add(n);
+            if (Array.isArray(fileEntry.importAliases)) {
+                for (const a of fileEntry.importAliases) {
+                    if (a && a.local) aliases.add(a.local);
+                }
+            }
+            fileImportAliases.set(filePath, aliases);
+        }
+        for (const [filePath, entry] of index.callsCache) {
+            if (!entry || !Array.isArray(entry.calls)) continue;
+            const seenInFile = new Set();
+            const aliasesForFile = fileImportAliases.get(filePath) || new Set();
+            for (const c of entry.calls) {
+                if (!c || !c.name) continue;
+                const key = `${c.name}::${c.line || 0}`;
+                if (seenInFile.has(key)) continue;
+                seenInFile.add(key);
+                const isSelfMethod = c.isMethod && SELF_RECEIVERS.has(c.receiver);
+                if (!c.isMethod) {
+                    // Bare-name call: foo() or pkg.Foo() (Go package call has receiver
+                    // but isMethod:false — keep counting under bareName since they
+                    // resolve like top-level functions in their package).
+                    bareNameCounts.set(c.name, (bareNameCounts.get(c.name) || 0) + 1);
+                } else if (isSelfMethod) {
+                    // self/this.foo() — attributed to the enclosing class's foo
+                    selfMethodByName.set(c.name, (selfMethodByName.get(c.name) || 0) + 1);
+                    methodByName.set(c.name, (methodByName.get(c.name) || 0) + 1);
+                } else {
+                    methodByName.set(c.name, (methodByName.get(c.name) || 0) + 1);
+                    // Module-alias receiver? `mod.foo()` where `mod` was imported here.
+                    // Treat the call as resolving to a top-level `foo` (the standalone
+                    // function exported from `mod`).
+                    if (c.receiver && aliasesForFile.has(c.receiver)) {
+                        importedReceiverCounts.set(c.name,
+                            (importedReceiverCounts.get(c.name) || 0) + 1);
+                    }
+                    if (c.receiverType) {
+                        let inner = methodByReceiverType.get(c.receiverType);
+                        if (!inner) {
+                            inner = new Map();
+                            methodByReceiverType.set(c.receiverType, inner);
+                        }
+                        inner.set(c.name, (inner.get(c.name) || 0) + 1);
+                    }
+                }
+                // Also account for resolvedName aliases (e.g. `import {foo as bar}; bar()`
+                // resolves to `foo`). Treat the resolved form the same way as the original.
+                if (c.resolvedName && c.resolvedName !== c.name) {
+                    const rkey = `${c.resolvedName}::${c.line || 0}`;
+                    if (!seenInFile.has(rkey)) {
+                        seenInFile.add(rkey);
+                        if (!c.isMethod) {
+                            bareNameCounts.set(c.resolvedName,
+                                (bareNameCounts.get(c.resolvedName) || 0) + 1);
+                        }
+                    }
+                }
+            }
+        }
+        // For each name, count how many distinct classes/types own a method with
+        // that name (used to split method-call counts when receiverType is unknown).
+        const classOwnersByName = new Map();         // name -> Set<className>
+        for (const [name, symbols] of index.symbols) {
+            for (const sym of symbols) {
+                if (!FUNCTION_TYPES.has(sym.type)) continue;
+                const owner = sym.className || (sym.receiver && sym.receiver.replace(/^\*/, ''));
+                if (owner) {
+                    let s = classOwnersByName.get(name);
+                    if (!s) { s = new Set(); classOwnersByName.set(name, s); }
+                    s.add(owner);
+                }
+            }
+        }
+        // MEDIUM-6: aggregate by name. Multiple definitions of the same name
+        // in different files (e.g. `tmp` in test/helpers/index.js AND
+        // test/accuracy.test.js) previously each got the GLOBAL call count,
+        // duplicating the row and inflating the leaderboard. We now emit
+        // one row per name with a `locations` list, so the user sees both
+        // definitions but the count appears exactly once.
+        //
+        // BUG-H2: with the buckets above, attribute counts per (name, ownerClass):
+        //   - standalone function:   bareNameCounts[name]
+        //   - class method (Foo.bar): methodByReceiverType[Foo][bar]
+        //                              + selfMethodByName[bar] / numOwnerClasses
+        //                              + (residual unresolved method calls split evenly)
+        //   - falls back to methodByName[name] when no receiverType evidence exists.
+        const hotList = [];
+        let usedHeuristicSplit = false;  // whether any row's count was approximated
+        for (const [name, symbols] of index.symbols) {
+            // Filter to function-shaped definitions, dedup by file:line.
+            const seenLoc = new Set();
+            const locations = [];
+            let representative = null;
+            const ownerClasses = new Set();      // classes/receivers that own this name
+            for (const sym of symbols) {
+                if (!FUNCTION_TYPES.has(sym.type)) continue;
+                const relativePath = sym.relativePath ||
+                    (sym.file ? path.relative(index.root, sym.file) : '');
+                const locKey = `${relativePath}:${sym.startLine}`;
+                if (seenLoc.has(locKey)) continue;
+                seenLoc.add(locKey);
+                locations.push({
+                    file: relativePath,
+                    startLine: sym.startLine,
+                    endLine: sym.endLine,
+                    ...(sym.className && { className: sym.className }),
+                });
+                const owner = sym.className || (sym.receiver && sym.receiver.replace(/^\*/, ''));
+                if (owner) ownerClasses.add(owner);
+                if (!representative) representative = sym;
+            }
+            if (locations.length === 0) continue;
+            // Decide if this row represents a standalone function or a method.
+            // Mixed-type defs (e.g. "tmp" defined as both a function and a class method
+            // somewhere) are rare; for them we use the representative's flavor and
+            // accept that the count may be approximate.
+            const isMethodRow = ownerClasses.size > 0 &&
+                (!representative || !!representative.className || !!representative.receiver);
+            let count = 0;
+            let approximate = false;
+            if (!isMethodRow) {
+                // Standalone function (or top-level package call): use bare-name calls
+                // plus method-style calls where the receiver was an imported module
+                // alias (e.g. `lib.foo()` where `lib` is a require/import alias).
+                // We deliberately do NOT include arbitrary `obj.foo()` calls — those
+                // would inflate the count with unrelated method calls (the H2 bug).
+                count = (bareNameCounts.get(name) || 0) +
+                        (importedReceiverCounts.get(name) || 0);
+            } else {
+                // Method definition. Count only calls we can resolve to this owner:
+                //   - typed hits (receiverType matches one of this row's owner classes)
+                //   - self-method calls inside this owner class (counted via callerSymbol)
+                // Calls like `dict.get()` (no receiverType) are NOT attributed — they
+                // would inflate the count with builtin/unrelated method calls.
+                const selfShare = selfMethodByName.get(name) || 0;
+                const totalOwners = (classOwnersByName.get(name) || new Set()).size || 1;
+                let typedHits = 0;
+                for (const cls of ownerClasses) {
+                    const inner = methodByReceiverType.get(cls);
+                    if (inner) typedHits += (inner.get(name) || 0);
+                }
+                // Self-method calls: split evenly across owner classes (each class's own
+                // self.method() resolves to itself). When this row covers all owners
+                // (locations cover the only class that has this method), give the full
+                // self-share to this row.
+                const selfShareForRow = selfShare * (ownerClasses.size / totalOwners);
+                count = typedHits + Math.round(selfShareForRow);
+                // If we used the self-method heuristic across multiple classes, mark approximate.
+                if (selfShare > 0 && totalOwners > 1) approximate = true;
+            }
+            if (count === 0) continue; // skip dead symbols
+            if (approximate) usedHeuristicSplit = true;
+            // Sort locations by (file, startLine) for stable display.
+            locations.sort((a, b) =>
+                a.file.localeCompare(b.file) ||
+                (a.startLine || 0) - (b.startLine || 0)
+            );
+            const primary = locations[0];
+            hotList.push({
+                // Use the representative symbol's className for display name
+                // (so "Foo.bar" is preserved when applicable). When defs
+                // disagree on className, just show the bare name.
+                name: representative && representative.className
+                    ? `${representative.className}.${name}`
+                    : name,
+                // Primary location remains for backward-compat with consumers
+                // that read `file`/`startLine`/`endLine` directly.
+                file: primary.file,
+                startLine: primary.startLine,
+                endLine: primary.endLine,
+                callCount: count,
+                ...(approximate && { approximate: true }),
+                ...(locations.length > 1 && { locations }),
+            });
+        }
+        // Stable order: callCount desc, then (relativePath, startLine) asc.
+        hotList.sort((a, b) =>
+            (b.callCount - a.callCount) ||
+            a.file.localeCompare(b.file) ||
+            (a.startLine || 0) - (b.startLine || 0)
+        );
+        stats.hot = {
+            top,
+            total: hotList.length,
+            items: hotList.slice(0, top),
+            ...(usedHeuristicSplit && {
+                note: 'Method-call counts approximated when receiver type was unknown — values within those rows may include unresolved calls split across owner classes.'
+            }),
+        };
+    }
     return stats;
 }
@@ -255,4 +513,209 @@ function getToc(index, options = {}) {
     };
 }
-module.exports = { getStats, getToc };
+/**
+ * Project trust report. Tells the caller how much UCN itself trusts the index
+ * for this project: resolution coverage, blind spots (dynamic imports, eval,
+ * reflection), parse failures, and a quick verdict.
+ *
+ * Cheap-by-default: counts + blind-spot scan are O(files). The expensive
+ * confidence-coverage computation is deferred unless options.deep is set
+ * (then samples a slice of symbols).
+ *
+ * @param {object} index - ProjectIndex
+ * @param {object} options - { deep, sampleSize, in, file }
+ */
+function doctor(index, options = {}) {
+    const { detectLanguage, langTraits } = require('../languages');
+    const path = require('path');
+    const inFilter = options.in || options.file || null;
+    const matchInFilter = (rel) => {
+        if (!inFilter) return true;
+        return rel.includes(inFilter);
+    };
+    const fileCounts = { total: 0, scanned: 0 };
+    const langs = {};
+    let totalSymbols = 0;  // counted post-filter for accuracy when --in is set
+    const blindSpots = {
+        dynamicImports: { count: 0, files: [] },
+        evalCalls:      { count: 0, files: [] },
+        reflection:     { count: 0, files: [] },
+        parseFailures:  { count: 0, files: [] },
+    };
+    // Reflection signals per language. These run textually over the source — fast,
+    // and acceptable since UCN already records dynamic-import counts at parse time.
+    const REFLECTION_PATTERNS = {
+        python:     /\b(getattr|hasattr|setattr|__import__|importlib\.import_module)\s*\(/,
+        javascript: /\bnew Function\s*\(|\bReflect\.\w+\s*\(/,
+        typescript: /\bnew Function\s*\(|\bReflect\.\w+\s*\(/,
+        go:         /"reflect"|reflect\.\w+\s*\(/,
+        java:       /\.getDeclaredMethod\b|\.getMethod\b|\.getDeclaredField\b|Class\.forName\b/,
+        rust:       /\bAny::downcast/,
+    };
+    const EVAL_PATTERNS = {
+        python:     /\b(eval|exec)\s*\(/,
+        javascript: /\beval\s*\(/,
+        typescript: /\beval\s*\(/,
+    };
+    for (const [filePath, fe] of index.files) {
+        fileCounts.total++;
+        const rel = fe.relativePath || filePath;
+        if (!matchInFilter(rel)) continue;
+        fileCounts.scanned++;
+        const lang = fe.language || 'unknown';
+        if (!langs[lang]) langs[lang] = { files: 0, symbols: 0, lines: 0 };
+        langs[lang].files++;
+        langs[lang].symbols += (fe.symbols || []).length;
+        langs[lang].lines += fe.lines || 0;
+        totalSymbols += (fe.symbols || []).length;
+        if (fe.dynamicImports && fe.dynamicImports > 0) {
+            blindSpots.dynamicImports.count += fe.dynamicImports;
+            if (blindSpots.dynamicImports.files.length < 10) blindSpots.dynamicImports.files.push(rel);
+        }
+        if (fe.parseError) {
+            blindSpots.parseFailures.count++;
+            if (blindSpots.parseFailures.files.length < 10) blindSpots.parseFailures.files.push(rel);
+        }
+        // Read file once for eval/reflection signals
+        const evalRe = EVAL_PATTERNS[lang];
+        const reflRe = REFLECTION_PATTERNS[lang];
+        if (evalRe || reflRe) {
+            try {
+                const content = fs.readFileSync(filePath, 'utf-8');
+                if (evalRe && evalRe.test(content)) {
+                    blindSpots.evalCalls.count++;
+                    if (blindSpots.evalCalls.files.length < 10) blindSpots.evalCalls.files.push(rel);
+                }
+                if (reflRe && reflRe.test(content)) {
+                    blindSpots.reflection.count++;
+                    if (blindSpots.reflection.files.length < 10) blindSpots.reflection.files.push(rel);
+                }
+            } catch (e) { /* ignore read errors */ }
+        }
+    }
+    // Resolution coverage — sampled by default to keep doctor fast.
+    let coverage = null;
+    if (options.deep || options.sampleSize) {
+        coverage = computeCoverageSample(index, {
+            sampleSize: options.sampleSize || 200,
+            inFilter,
+            matchInFilter,
+        });
+    }
+    // Cache info
+    let cache = { fresh: null };
+    try {
+        cache.fresh = !index.isCacheStale();
+        cache.buildMs = index.buildTime || null;
+    } catch (e) { /* ignore */ }
+    // Compute trust verdict.
+    //
+    // 1. If a deep sample produced no edges (empty project, --in matches nothing),
+    //    don't pretend that's "0% confident" — return UNKNOWN.
+    // 2. Coverage gives the headline %, but blind spots (eval/reflection/dynamic
+    //    imports) downgrade the verdict by one tier each — a project that resolves
+    //    99% of edges but is full of `getattr` is not actually "HIGH" trust.
+    // 3. Parse failures always cap at MEDIUM regardless of coverage.
+    let trust = 'UNKNOWN';
+    let trustReason = '';
+    const reasons = [];
+    if (coverage && coverage.total > 0) {
+        const safe = coverage.high + coverage.medium;
+        const safePct = safe / coverage.total;
+        let baseLevel;
+        if (safePct >= 0.85) baseLevel = 'HIGH';
+        else if (safePct >= 0.6) baseLevel = 'MEDIUM';
+        else baseLevel = 'LOW';
+        reasons.push(`${(safePct * 100).toFixed(1)}% of edges have confidence ≥ 0.5`);
+        // Blind-spot downgrades — each kind drops one tier.
+        const tier = ['HIGH', 'MEDIUM', 'LOW'];
+        let idx = tier.indexOf(baseLevel);
+        const blindSignals = [];
+        if (blindSpots.parseFailures.count > 0) { idx = Math.max(idx, 1); blindSignals.push(`${blindSpots.parseFailures.count} parse failure(s)`); }
+        if (blindSpots.evalCalls.count > 0) { idx = Math.min(2, idx + 1); blindSignals.push(`${blindSpots.evalCalls.count} eval call(s)`); }
+        if (blindSpots.reflection.count > 0) { idx = Math.min(2, idx + 1); blindSignals.push(`${blindSpots.reflection.count} reflection use(s)`); }
+        if (blindSpots.dynamicImports.count > 0) { idx = Math.min(2, idx + 1); blindSignals.push(`${blindSpots.dynamicImports.count} dynamic import(s)`); }
+        trust = tier[idx];
+        if (blindSignals.length) reasons.push(`blind spots: ${blindSignals.join(', ')}`);
+        trustReason = reasons.join('; ');
+    } else if (coverage) {
+        // Sampled but zero edges — can't say anything about confidence.
+        trust = 'UNKNOWN';
+        trustReason = 'no edges sampled (empty scope or filter matched nothing)';
+    } else if (fileCounts.scanned > 0) {
+        // Cheap path (no --deep): use blind-spot signals.
+        const tier = ['HIGH', 'MEDIUM', 'LOW'];
+        let idx = 0;
+        const blindSignals = [];
+        if (blindSpots.parseFailures.count > 0) { idx = Math.max(idx, 1); blindSignals.push(`${blindSpots.parseFailures.count} parse failure(s)`); }
+        if (blindSpots.evalCalls.count > 0) { idx = Math.min(2, idx + 1); blindSignals.push(`${blindSpots.evalCalls.count} eval call(s)`); }
+        if (blindSpots.reflection.count > 0) { idx = Math.min(2, idx + 1); blindSignals.push(`${blindSpots.reflection.count} reflection use(s)`); }
+        if (blindSpots.dynamicImports.count > 0) { idx = Math.min(2, idx + 1); blindSignals.push(`${blindSpots.dynamicImports.count} dynamic import(s)`); }
+        trust = tier[idx];
+        trustReason = blindSignals.length
+            ? `coverage not deep-checked; blind spots: ${blindSignals.join(', ')}`
+            : 'no parse failures; coverage not deep-checked';
+    }
+    return {
+        root: index.root,
+        files: fileCounts,
+        symbols: totalSymbols,
+        languages: langs,
+        blindSpots,
+        coverage,
+        cache,
+        trust,
+        trustReason,
+        ...(inFilter && { filter: inFilter }),
+    };
+}
+/**
+ * Sample-based coverage: pick up to N symbols, run findCallers, bucket confidence.
+ * Doesn't pretend to be exhaustive — meant for a fast trust signal, not an audit.
+ */
+function computeCoverageSample(index, { sampleSize, inFilter, matchInFilter }) {
+    const buckets = { high: 0, medium: 0, low: 0, total: 0, sampled: 0 };
+    const symbolNames = [];
+    for (const [name, arr] of index.symbols) {
+        for (const sym of arr) {
+            if (!sym || !sym.relativePath) continue;
+            if (!matchInFilter(sym.relativePath)) continue;
+            if (sym.type === 'method' || sym.type === 'function' || sym.type === 'constructor') {
+                symbolNames.push(name);
+                if (symbolNames.length >= sampleSize * 2) break; // cap collection cost
+            }
+        }
+        if (symbolNames.length >= sampleSize * 2) break;
+    }
+    // Take a slice (not random — deterministic for tests)
+    const slice = symbolNames.slice(0, sampleSize);
+    buckets.sampled = slice.length;
+    for (const name of slice) {
+        const callers = index.findCallers(name, { includeMethods: true, includeUncertain: true });
+        for (const c of callers) {
+            const conf = (c.confidence != null) ? c.confidence : 1;
+            buckets.total++;
+            if (conf > 0.8) buckets.high++;
+            else if (conf >= 0.5) buckets.medium++;
+            else buckets.low++;
+        }
+    }
+    return buckets;
+}
+module.exports = { getStats, getToc, doctor };