npm - gitnexus - Versions diffs - 1.6.8-rc.37 → 1.6.8-rc.39 - Mend

gitnexus 1.6.8-rc.37 → 1.6.8-rc.39

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

package/dist/core/embeddings/hf-env.d.ts +0 -66
package/dist/core/ingestion/cfg/control-flow-context.d.ts +9 -0
package/dist/core/ingestion/cfg/control-flow-context.js +11 -0
package/dist/core/ingestion/cfg/emit.d.ts +10 -4
package/dist/core/ingestion/cfg/emit.js +10 -4
package/dist/core/ingestion/cfg/reaching-defs-graph.d.ts +18 -0
package/dist/core/ingestion/cfg/reaching-defs-graph.js +312 -0
package/dist/core/ingestion/cfg/reaching-defs.d.ts +70 -25
package/dist/core/ingestion/cfg/reaching-defs.js +519 -150
package/dist/core/ingestion/cfg/visitors/csharp-harvest.d.ts +15 -1
package/dist/core/ingestion/cfg/visitors/csharp-harvest.js +29 -1
package/dist/core/ingestion/cfg/visitors/csharp.d.ts +6 -0
package/dist/core/ingestion/cfg/visitors/csharp.js +161 -1
package/dist/core/ingestion/cfg/visitors/dart-harvest.d.ts +8 -0
package/dist/core/ingestion/cfg/visitors/dart-harvest.js +26 -0
package/dist/core/ingestion/cfg/visitors/dart.d.ts +7 -4
package/dist/core/ingestion/cfg/visitors/dart.js +148 -1
package/dist/core/ingestion/cfg/visitors/java-harvest.d.ts +8 -0
package/dist/core/ingestion/cfg/visitors/java-harvest.js +19 -0
package/dist/core/ingestion/cfg/visitors/java.d.ts +6 -5
package/dist/core/ingestion/cfg/visitors/java.js +106 -10
package/dist/core/ingestion/cfg/visitors/kotlin-harvest.d.ts +9 -0
package/dist/core/ingestion/cfg/visitors/kotlin-harvest.js +20 -0
package/dist/core/ingestion/cfg/visitors/kotlin.d.ts +8 -6
package/dist/core/ingestion/cfg/visitors/kotlin.js +58 -9
package/dist/core/ingestion/cfg/visitors/php-harvest.d.ts +8 -0
package/dist/core/ingestion/cfg/visitors/php-harvest.js +20 -0
package/dist/core/ingestion/cfg/visitors/php.d.ts +8 -6
package/dist/core/ingestion/cfg/visitors/php.js +110 -1
package/dist/core/ingestion/cfg/visitors/swift-harvest.d.ts +8 -0
package/dist/core/ingestion/cfg/visitors/swift-harvest.js +18 -0
package/dist/core/ingestion/cfg/visitors/swift.d.ts +6 -0
package/dist/core/ingestion/cfg/visitors/swift.js +66 -0
package/dist/core/ingestion/pipeline-phases/parse-impl.d.ts +0 -21
package/dist/core/ingestion/pipeline.d.ts +0 -10
package/dist/core/ingestion/workers/worker-pool.d.ts +0 -40
package/dist/core/logger.d.ts +0 -5
package/dist/core/run-analyze.js +8 -0
package/dist/core/tree-sitter/safe-parse.d.ts +0 -5
package/dist/mcp/local/local-backend.d.ts +0 -24
package/dist/storage/repo-manager.d.ts +13 -0
package/package.json +1 -1

package/dist/core/embeddings/hf-env.d.ts CHANGED Viewed

@@ -13,66 +13,6 @@ export declare const CB_RESET_TIMEOUT_MS = 60000;
 export declare const HF_MAX_TIMEOUT_MS: number;
 /** Upper bound clamped on the env-override attempt count. */
 export declare const HF_MAX_ATTEMPTS_CAP = 10;
-/**
- * @internal Exported only for unit tests and the two embedder entry points
- * (`core/embeddings/embedder.ts` + `mcp/core/embedder.ts`). Not part of the
- * public package API.
- *
- * Minimal subset of `@huggingface/transformers`' `env` object that gitnexus
- * mutates. Defining a local structural type keeps this helper free of a
- * transitive dependency on transformers' generated `.d.ts` while still
- * giving full type-checking on the two fields we actually touch.
- */
-export interface HfEnvSubset {
-    cacheDir: string;
-    remoteHost: string;
-}
-/**
- * @internal Exported only for unit tests and the two embedder entry points
- * (`core/embeddings/embedder.ts` + `mcp/core/embedder.ts`). Not part of the
- * public package API.
- *
- * Apply user-controlled HuggingFace environment overrides to the
- * `@huggingface/transformers` `env` object. Centralises the two env-var
- * bridges so every gitnexus embedder entry point (the analyze pipeline
- * and the MCP server) behaves identically.
- *
- * - **`HF_HOME`** → `env.cacheDir` (default: `~/.cache/huggingface`).
- *   transformers.js otherwise defaults to `./node_modules/.cache` inside
- *   its own install dir, which is unwritable when gitnexus is installed
- *   globally (e.g. `/usr/lib/node_modules/`).
- *
- * - **`HF_ENDPOINT`** → `env.remoteHost` (#1205). transformers.js does
- *   not read `HF_ENDPOINT` on its own — it reads `env.remoteHost` —
- *   even though `HF_ENDPOINT` is the standard env var the upstream
- *   `huggingface_hub` Python client and the official HF mirror docs
- *   tell users to set. Bridging the two unblocks `--embeddings` for
- *   users behind networks where `huggingface.co` is unreachable
- *   (corporate proxies, the GFW, air-gapped mirrors). The trailing
- *   slash is normalised because transformers.js builds URLs by string
- *   concatenation and a missing slash silently falls through to its
- *   default `huggingface.co/...` host.
- *
- * Mutation rather than return-and-apply because callers already hold a
- * reference to the live `env` object imported from
- * `@huggingface/transformers` — passing the same reference in keeps the
- * call site a single line at each entry point.
- */
-export declare function applyHfEnvOverrides(env: HfEnvSubset): void;
-/**
- * @internal Exported for unit tests and the two embedder entry points.
- *
- * Returns true when an error message indicates a network-level fetch failure
- * during HuggingFace model download (e.g. `TypeError: fetch failed`,
- * `ECONNREFUSED`, `ENOTFOUND`, `ETIMEDOUT`, `ECONNRESET`).
- *
- * These errors are not device-specific and cannot be fixed by falling back to
- * a different ONNX device — the caller should rethrow immediately with
- * guidance about `HF_ENDPOINT`.
- */
-export declare function isNetworkFetchError(message: string): boolean;
-/** @internal Used by `withHfDownloadRetry` to mark a circuit-open rejection. */
-export declare const CIRCUIT_OPEN_TAG = "hf-circuit-open";
 /**
  * Module-level singleton shared by both embedder entry points
  * (`core/embeddings/embedder.ts` + `mcp/core/embedder.ts`). Per-process
@@ -82,18 +22,12 @@ export declare const CIRCUIT_OPEN_TAG = "hf-circuit-open";
  * recovery-time stampedes).
  */
 export declare const hfDownloadCircuit: CircuitBreaker;
-/** @internal Returns true for errors that should abort without retry (circuit-open). */
-export declare function isHfCircuitOpenError(message: string): boolean;
 /**
  * Returns true for any HuggingFace download failure that warrants showing the
  * `HF_ENDPOINT` remediation hint: either a raw network error or a
  * circuit-open rejection (which itself was caused by repeated network errors).
  */
 export declare function isHfDownloadFailure(message: string): boolean;
-/** @internal Wraps `fn` in a hard time-limit. The timeout error contains
- *  `ETIMEDOUT` so that `isNetworkFetchError` classifies it correctly.
- */
-export declare function withDownloadTimeout<T>(fn: () => Promise<T>, timeoutMs: number): Promise<T>;
 export interface HfRetryOptions {
     /** Maximum total attempts including the initial one (default: `HF_MAX_ATTEMPTS`). */
     maxAttempts?: number;

package/dist/core/ingestion/cfg/control-flow-context.d.ts CHANGED Viewed

@@ -62,6 +62,15 @@ export declare class ControlFlowContext {
     resolveBreak(label?: string): JumpResolution | undefined;
     /** Resolve a `continue`: like {@link resolveBreak} but only loop frames match. */
     resolveContinue(label?: string): JumpResolution | undefined;
+    /**
+     * Resolve a Java `yield e` (switch-EXPRESSION arm exit): the nearest enclosing
+     * SWITCH frame's exit, threading the finalizers stacked above it. Unlike a
+     * `break`, a `yield` ALWAYS targets the switch — never an intervening loop — so
+     * it cannot match a loop frame (a `yield` inside a loop inside a switch arm
+     * still exits the whole switch). Returns `undefined` when there is no enclosing
+     * switch (malformed input); the caller falls back to its conservative routing.
+     */
+    resolveYield(): JumpResolution | undefined;
     /** Every active finalizer, innermost first — what a `return` must cross. */
     finalizersForReturn(): readonly FinalizerFrame[];
     /**

package/dist/core/ingestion/cfg/control-flow-context.js CHANGED Viewed

@@ -39,6 +39,17 @@ export class ControlFlowContext {
     resolveContinue(label) {
         return this.resolve((f) => f.kind === 'loop' && (label === undefined || f.labels.includes(label)), (f) => f.continueTo);
     }
+    /**
+     * Resolve a Java `yield e` (switch-EXPRESSION arm exit): the nearest enclosing
+     * SWITCH frame's exit, threading the finalizers stacked above it. Unlike a
+     * `break`, a `yield` ALWAYS targets the switch — never an intervening loop — so
+     * it cannot match a loop frame (a `yield` inside a loop inside a switch arm
+     * still exits the whole switch). Returns `undefined` when there is no enclosing
+     * switch (malformed input); the caller falls back to its conservative routing.
+     */
+    resolveYield() {
+        return this.resolve((f) => f.kind === 'switch');
+    }
     /** Every active finalizer, innermost first — what a `return` must cross. */
     finalizersForReturn() {
         const fins = [];

package/dist/core/ingestion/cfg/emit.d.ts CHANGED Viewed

@@ -89,10 +89,16 @@ export declare const DEFAULT_PDG_MAX_REACHING_DEF_FACTS_PER_FUNCTION: number;
  * never fires). Truncation degrades to a sound empty REACHING_DEF for that one
  * function (status `truncated`), never wrong facts.
  *
- * This ceiling is the SOUND backstop, not a perf fix: WTO / loop-aware iteration
- * ordering was benchmarked and rejected (0% faster — the cost is dense-set
- * propagation, not visitation order; see the no-go note in reaching-defs.ts at
- * the RPO-order site). SSA-sparse reaching-defs is the deferred real fix.
+ * As of #2201 this ceiling is an adversarial-only backstop that effectively
+ * never fires on real code: the production solver auto-selects the SSA-sparse
+ * path for the looping functions that would breach it, and the SSA path has no
+ * fixpoint iteration (it answers reaching queries from the def-use graph in one
+ * pass) so it computes the full facts where the dense worklist would have
+ * truncated. The budget is still consulted on the dense fallback path (small /
+ * loop-free functions, and throw-edge / unreachable-block functions the SSA path
+ * does not model). WTO / loop-aware iteration ordering was benchmarked and
+ * rejected (0% faster — the cost was dense-set propagation, not visitation
+ * order); SSA-sparse was the real fix. See reaching-defs.ts.
  */
 export declare const DEFAULT_PDG_MAX_REACHING_DEF_BLOCK_REVISITS = 64;
 export interface CfgEmitResult {

package/dist/core/ingestion/cfg/emit.js CHANGED Viewed

@@ -72,10 +72,16 @@ export const DEFAULT_PDG_MAX_REACHING_DEF_FACTS_PER_FUNCTION = REACHING_DEF_FACT
  * never fires). Truncation degrades to a sound empty REACHING_DEF for that one
  * function (status `truncated`), never wrong facts.
  *
- * This ceiling is the SOUND backstop, not a perf fix: WTO / loop-aware iteration
- * ordering was benchmarked and rejected (0% faster — the cost is dense-set
- * propagation, not visitation order; see the no-go note in reaching-defs.ts at
- * the RPO-order site). SSA-sparse reaching-defs is the deferred real fix.
+ * As of #2201 this ceiling is an adversarial-only backstop that effectively
+ * never fires on real code: the production solver auto-selects the SSA-sparse
+ * path for the looping functions that would breach it, and the SSA path has no
+ * fixpoint iteration (it answers reaching queries from the def-use graph in one
+ * pass) so it computes the full facts where the dense worklist would have
+ * truncated. The budget is still consulted on the dense fallback path (small /
+ * loop-free functions, and throw-edge / unreachable-block functions the SSA path
+ * does not model). WTO / loop-aware iteration ordering was benchmarked and
+ * rejected (0% faster — the cost was dense-set propagation, not visitation
+ * order); SSA-sparse was the real fix. See reaching-defs.ts.
  */
 export const DEFAULT_PDG_MAX_REACHING_DEF_BLOCK_REVISITS = 64;
 /**

package/dist/core/ingestion/cfg/reaching-defs-graph.d.ts ADDED Viewed

@@ -0,0 +1,18 @@
+/**
+ * Pure graph sub-stages for the reaching-definitions solvers (#2201 review R4).
+ *
+ * Extracted from reaching-defs.ts to keep that module focused on the
+ * orchestrator, the dense oracle, the statement sweep, and the dispatcher.
+ * Everything here is a pure function of plain arrays — no CFG, no harvest, no
+ * solver state — so this module has NO dependency on reaching-defs.ts (a strict
+ * one-way import) and each stage is independently testable. The SSA pipeline
+ * (dominators → dominance frontiers → Tarjan SCC → reach-set condensation)
+ * implements Cooper-Harvey-Kennedy + Cytron + Tarjan; reverse-post-order, the
+ * loop-reachability check, and the def-set/lattice primitives are shared with
+ * the dense GEN/KILL solver and the dispatcher.
+ *
+ * These are held byte-identical to their former inline form by the differential
+ * equivalence fuzz (test/unit/cfg/reaching-defs-equivalence.test.ts) — any diff
+ * after extraction is an extraction bug, never the oracle.
+ */
+export {};

package/dist/core/ingestion/cfg/reaching-defs-graph.js ADDED Viewed

@@ -0,0 +1,312 @@
+/**
+ * Pure graph sub-stages for the reaching-definitions solvers (#2201 review R4).
+ *
+ * Extracted from reaching-defs.ts to keep that module focused on the
+ * orchestrator, the dense oracle, the statement sweep, and the dispatcher.
+ * Everything here is a pure function of plain arrays — no CFG, no harvest, no
+ * solver state — so this module has NO dependency on reaching-defs.ts (a strict
+ * one-way import) and each stage is independently testable. The SSA pipeline
+ * (dominators → dominance frontiers → Tarjan SCC → reach-set condensation)
+ * implements Cooper-Harvey-Kennedy + Cytron + Tarjan; reverse-post-order, the
+ * loop-reachability check, and the def-set/lattice primitives are shared with
+ * the dense GEN/KILL solver and the dispatcher.
+ *
+ * These are held byte-identical to their former inline form by the differential
+ * equivalence fuzz (test/unit/cfg/reaching-defs-equivalence.test.ts) — any diff
+ * after extraction is an extraction bug, never the oracle.
+ */
+/**
+ * RPO over blocks reachable from `entry`; unreachable blocks appended by index.
+ * Returns the order AND the reachability bitmap the DFS already computed, so a
+ * caller needing "is every block reachable?" reuses this pass instead of a
+ * separate BFS (#2201 review R8 — the SSA path's reachability gate).
+ *
+ * @internal
+ */
+export function reversePostOrder(entry, succs, n) {
+    const visited = new Array(n).fill(false);
+    const post = [];
+    // Iterative DFS with an explicit phase stack (children pushed in reverse so
+    // they pop in sorted order — determinism).
+    const stack = [{ node: entry, childIdx: 0 }];
+    visited[entry] = true;
+    while (stack.length) {
+        const top = stack[stack.length - 1];
+        const children = succs[top.node];
+        if (top.childIdx < children.length) {
+            const next = children[top.childIdx];
+            top.childIdx += 1;
+            if (!visited[next]) {
+                visited[next] = true;
+                stack.push({ node: next, childIdx: 0 });
+            }
+        }
+        else {
+            post.push(top.node);
+            stack.pop();
+        }
+    }
+    const order = post.reverse();
+    for (let b = 0; b < n; b++)
+        if (!visited[b])
+            order.push(b);
+    return { order, visited };
+}
+/**
+ * Immediate dominators (Cooper-Harvey-Kennedy; correct on irreducible CFGs).
+ * `rpo` is the reverse-post-order rooted at the synthetic start `S`, `dPredsX`
+ * the dominator-graph predecessors (incl. S→entry). Returns idom[b] for every
+ * node in [0, nx); idom[S] === S.
+ *
+ * @internal
+ */
+export function buildDominators(rpo, dPredsX, S, nx) {
+    const rpoIdx = new Array(nx);
+    rpo.forEach((b, i) => (rpoIdx[b] = i));
+    const idom = new Array(nx).fill(-1);
+    idom[S] = S;
+    const intersect = (a, b) => {
+        while (a !== b) {
+            while (rpoIdx[a] > rpoIdx[b])
+                a = idom[a];
+            while (rpoIdx[b] > rpoIdx[a])
+                b = idom[b];
+        }
+        return a;
+    };
+    for (let changed = true; changed;) {
+        changed = false;
+        for (const b of rpo) {
+            if (b === S)
+                continue;
+            let nd = -1;
+            for (const p of dPredsX[b])
+                if (idom[p] !== -1)
+                    nd = nd === -1 ? p : intersect(nd, p);
+            if (nd !== -1 && idom[b] !== nd) {
+                idom[b] = nd;
+                changed = true;
+            }
+        }
+    }
+    return idom;
+}
+/**
+ * Dominance frontiers (Cytron). df[b] is the set of nodes where b's dominance
+ * ends — the φ-placement targets for any binding defined in b.
+ *
+ * @internal
+ */
+export function buildDominanceFrontiers(dPredsX, idom, nx) {
+    const df = Array.from({ length: nx }, () => new Set());
+    for (let b = 0; b < nx; b++) {
+        const dp = dPredsX[b];
+        if (dp.length < 2)
+            continue;
+        for (const p of dp) {
+            let runner = p;
+            while (runner !== idom[b] && runner !== -1) {
+                df[runner].add(b);
+                runner = idom[runner];
+            }
+        }
+    }
+    return df;
+}
+/**
+ * Tarjan strongly-connected components over the value-graph operand edges
+ * (`nodeOps[node]` = operand node ids). Iterative (explicit work stack — the
+ * graph can be deep). SCCs are emitted in REVERSE topological order, so an
+ * SCC's operand SCCs are numbered before it — the property
+ * {@link condenseReachingSets} relies on for its single forward pass.
+ *
+ * @internal
+ */
+export function tarjanScc(nodeOps) {
+    const N = nodeOps.length;
+    const sccOf = new Array(N).fill(-1);
+    const sccMembers = [];
+    const index = new Array(N).fill(-1);
+    const low = new Array(N).fill(0);
+    const onStk = new Array(N).fill(false);
+    const tarjanStk = [];
+    let counter = 0;
+    for (let start = 0; start < N; start++) {
+        if (index[start] !== -1)
+            continue;
+        const work = [{ node: start, oi: 0 }];
+        index[start] = low[start] = counter++;
+        tarjanStk.push(start);
+        onStk[start] = true;
+        while (work.length) {
+            const top = work[work.length - 1];
+            const ops = nodeOps[top.node];
+            if (top.oi < ops.length) {
+                const w = ops[top.oi++];
+                if (index[w] === -1) {
+                    index[w] = low[w] = counter++;
+                    tarjanStk.push(w);
+                    onStk[w] = true;
+                    work.push({ node: w, oi: 0 });
+                }
+                else if (onStk[w] && index[w] < low[top.node]) {
+                    low[top.node] = index[w];
+                }
+            }
+            else {
+                if (low[top.node] === index[top.node]) {
+                    const members = [];
+                    let w;
+                    do {
+                        w = tarjanStk.pop();
+                        onStk[w] = false;
+                        sccOf[w] = sccMembers.length;
+                        members.push(w);
+                    } while (w !== top.node);
+                    sccMembers.push(members);
+                }
+                work.pop();
+                if (work.length) {
+                    const par = work[work.length - 1].node;
+                    if (low[top.node] < low[par])
+                        low[par] = low[top.node];
+                }
+            }
+        }
+    }
+    return { sccOf, sccMembers };
+}
+/**
+ * Reaching def-key set per SCC via condensation (cycle-safe union). Tarjan emits
+ * SCCs in reverse topological order, so a single forward pass over SCCs resolves
+ * every union: an SCC's reaching set is its members' own leaf keys plus the
+ * already-computed reaching sets of its cross-SCC operands.
+ *
+ * Alias fast path (#2201 review R2): an SCC with NO own leaf keys whose cross-SCC
+ * operands all resolve to a SINGLE source SCC has exactly that source's reaching
+ * set — share it BY REFERENCE instead of copying element-by-element (the O(defs²)
+ * cost at wide-fan-in φ merges). Safe: the returned sets are read-only after this
+ * pass, and contents are identical (set iteration order is irrelevant — the
+ * sweep sorts each use's keys before emission, KTD6).
+ *
+ * @internal
+ */
+export function condenseReachingSets(sccMembers, sccOf, nodeKeys, nodeOps) {
+    const reachByScc = new Array(sccMembers.length);
+    for (let s = 0; s < sccMembers.length; s++) {
+        const members = sccMembers[s];
+        let aliasTarget = -1; // the unique cross-SCC source SCC, or -1 if none/many
+        let hasOwnKeys = false;
+        let multiSource = false;
+        for (const node of members) {
+            if (nodeKeys[node]) {
+                hasOwnKeys = true;
+                break;
+            }
+            for (const w of nodeOps[node]) {
+                const ws = sccOf[w];
+                if (ws === s)
+                    continue; // intra-SCC operand: same set being built, adds nothing
+                if (aliasTarget === -1)
+                    aliasTarget = ws;
+                else if (aliasTarget !== ws) {
+                    multiSource = true;
+                    break;
+                }
+            }
+            if (multiSource)
+                break;
+        }
+        if (!hasOwnKeys && !multiSource && aliasTarget !== -1) {
+            reachByScc[s] = reachByScc[aliasTarget]; // zero-copy share
+            continue;
+        }
+        // General case: union own leaf keys + every distinct cross-SCC operand set.
+        const set = new Set();
+        for (const node of members) {
+            const keys = nodeKeys[node];
+            if (keys)
+                for (const k of keys)
+                    set.add(k);
+            for (const w of nodeOps[node]) {
+                const ws = sccOf[w];
+                if (ws !== s)
+                    for (const k of reachByScc[ws])
+                        set.add(k);
+            }
+        }
+        reachByScc[s] = set;
+    }
+    return reachByScc;
+}
+/**
+ * True iff a cycle is reachable from `entry` (the CFG has a loop). Iterative DFS
+ * with a gray/black coloring; a gray successor is a back-edge. O(V+E). Used by
+ * the production dispatcher to decide SSA-vs-dense.
+ *
+ * @internal
+ */
+export function hasReachableLoop(entry, succs, n) {
+    const color = new Uint8Array(n); // 0 white, 1 gray, 2 black
+    const stack = [{ node: entry, i: 0 }];
+    color[entry] = 1;
+    while (stack.length) {
+        const top = stack[stack.length - 1];
+        const ss = succs[top.node];
+        if (top.i < ss.length) {
+            const next = ss[top.i++];
+            if (color[next] === 1)
+                return true;
+            if (color[next] === 0) {
+                color[next] = 1;
+                stack.push({ node: next, i: 0 });
+            }
+        }
+        else {
+            color[top.node] = 2;
+            stack.pop();
+        }
+    }
+    return false;
+}
+/**
+ * Order-stable union of two def-sets (shares `a` when `b` adds nothing).
+ *
+ * @internal
+ */
+export function unionSets(a, b) {
+    let target = a;
+    let copied = false;
+    for (const key of b) {
+        if (!target.has(key)) {
+            if (!copied) {
+                target = new Set(a);
+                copied = true;
+            }
+            target.add(key);
+        }
+    }
+    return target;
+}
+/**
+ * Per-binding lattice equality with a reference fast path (sets only ever grow).
+ *
+ * @internal
+ */
+export function latticeEquals(a, b) {
+    if (a === b)
+        return true;
+    if (a.size !== b.size)
+        return false;
+    for (const [k, bSet] of b) {
+        const aSet = a.get(k);
+        if (aSet === bSet)
+            continue;
+        if (!aSet || aSet.size !== bSet.size)
+            return false;
+        for (const v of bSet)
+            if (!aSet.has(v))
+                return false;
+    }
+    return true;
+}

package/dist/core/ingestion/cfg/reaching-defs.d.ts CHANGED Viewed

@@ -1,8 +1,27 @@
 /**
- * Reaching definitions (#2082 M2 U3) — classic GEN/KILL monotone fixpoint over
- * one function's CFG, plus the canonical intra-block statement sweep that
- * recovers statement-granular def→use facts from M1's coalesced blocks
- * WITHOUT re-splitting the CFG.
+ * Reaching definitions (#2082 M2 U3, SSA-sparse rewrite #2201) — per-function
+ * intraprocedural may-reaching-definitions, plus the canonical intra-block
+ * statement sweep that recovers statement-granular def→use facts from M1's
+ * coalesced blocks WITHOUT re-splitting the CFG.
+ *
+ * ARCHITECTURE (#2201): the analysis is split into solver-INDEPENDENT stages
+ * (shared by every path, so the byte-identical surface is maximal) and a
+ * swappable IN-set computation:
+ *   - {@link harvestStatementFacts} — per-block GEN/allDefs + def/use telemetry.
+ *   - {@link buildAdjacency} — throw-aware predecessor/successor adjacency.
+ *   - the IN-set computer — answers block-entry reaching-set queries. Two
+ *     implementations: {@link computeInSetsSparse} (SSA — CHK dominators →
+ *     Cytron dominance frontiers + φ-placement → stack renaming over a
+ *     synthetic entry, walked SCC-condensed) and {@link computeInSetsDense}
+ *     (the original GEN/KILL worklist). Production runs {@link
+ *     computeInSetsAuto}, which picks the SSA solver for looping functions large
+ *     enough to amortize construction (where it is asymptotically faster and
+ *     never hits the dense ceiling) and the dense worklist everywhere else; the
+ *     dense path also serves the throw-edge / unreachable-block cases the SSA
+ *     path does not model. The two are held byte-identical by the equivalence
+ *     fuzz — only set CONTENTS must match (the sweep sorts each use's keys
+ *     before the maxFacts cutoff, so iteration order is irrelevant).
+ *   - {@link sweepFacts} — statement sweep + sort + maxFacts truncation.
  *
  * PURE AND DETERMINISTIC (load-bearing contract):
  *  - Pure function of its inputs — no graph, no logger (warnings are the
@@ -14,17 +33,10 @@
  *    insertion-ordered Maps/Sets throughout, and the output fact array is
  *    explicitly sorted. Snapshot tests and content-derived edge ids rely on it.
  *
- * COMPLEXITY DISCIPLINE (the four-times-repeated repo bug shape is per-item
- * re-derivation inside the loop): def-sets are SHARED BY REFERENCE, never
- * deep-copied — a MUST def's kill is total per binding, so a transfer either
- * aliases the incoming set or replaces it; a MAY def (conditional context —
- * see StatementFacts.mayDefs) unions WITHOUT killing via a copy-on-extend.
- * Single-predecessor blocks alias the predecessor's OUT map outright;
- * multi-pred merges union only bindings whose incoming sets differ by
- * reference. Iteration is reverse post-order, seeded with every block
- * (unreachable blocks keep ⊥ IN — correct, their defs reach nothing).
- * Convergence: sets grow monotonically within the finite def-site universe ⇒
- * ≤ loop-depth+1 passes in practice.
+ * COMPLEXITY DISCIPLINE: def-sets are SHARED BY REFERENCE, never deep-copied —
+ * a MUST def's kill is total per binding, so a transfer either aliases the
+ * incoming set or replaces it; a MAY def (conditional context — see
+ * StatementFacts.mayDefs) unions WITHOUT killing via a copy-on-extend.
  *
  * `limits.maxFacts` bounds materialization: facts are O(defs×uses) BY SPEC in
  * merge-heavy code (N branch-arm defs × N later uses = N² facts), and a
@@ -62,18 +74,42 @@ export interface ReachingDefsLimits {
      */
     readonly maxFacts?: number;
     /**
-     * Maximum total block dequeues in the dataflow fixpoint. Iterative
-     * reaching-defs on a reducible CFG converges in O(loop-nesting-depth) passes,
-     * so a worklist visits each block a small multiple of times for real code; a
-     * pathologically deep loop nest (machine-generated / obfuscated) drives the
-     * pass count — and thus the visit total — to O(blocks²) and the solver to
-     * seconds + GB of heap (`maxFacts` does not help: fact count stays linear).
-     * When the visit total exceeds this budget the fixpoint has NOT converged, so
-     * any facts would be unsound — the solver bails to a sound empty
-     * `status: 'truncated'` (like the `overflow` guard). `undefined`/0 ⇒ unlimited
-     * (the default for direct callers; the emit path sets a per-function budget).
+     * Adversarial-only safety bound on the DENSE worklist's iteration.
+     *
+     * The dense GEN/KILL solver reads this as a ceiling on total block dequeues:
+     * iterative reaching-defs on a reducible CFG converges in O(loop-nesting-depth)
+     * passes, but a pathologically deep loop nest drives the visit total — and thus
+     * the solver — to O(blocks²), seconds + GB of heap (`maxFacts` does not help:
+     * fact count stays linear). Exceeding the budget means the fixpoint has NOT
+     * converged, so any facts would be unsound — the dense solver bails to a sound
+     * empty `status: 'truncated'` (like the `overflow` guard).
+     *
+     * The SSA solver (#2201) has NO fixpoint iteration — it answers reaching
+     * queries from the def-use graph in one pass — so it always converges and this
+     * budget never trips it. The production dispatcher ({@link computeInSetsAuto})
+     * routes the deep nests that would breach the dense ceiling to the SSA solver,
+     * which computes their full facts: the ceiling that fired on the dense worklist
+     * effectively never fires on real code (#2201 acceptance). The budget is still
+     * honored on the dense fallback path (small / loop-free functions, and the
+     * throw-edge / unreachable-block cases the SSA path does not model).
+     *
+     * `undefined`/0 ⇒ unlimited (the default for direct callers; the emit path sets
+     * a per-function budget).
      */
     readonly maxBlockVisits?: number;
+    /**
+     * Memory bound on the SSA-sparse solver's value-graph construction (#2201
+     * review R1). `maxFacts` bounds fact MATERIALIZATION (sweepFacts) but nothing
+     * bounds the φ/value-graph the sparse path builds first; a high-binding-density
+     * deep loop routed to SSA (≥ SSA_MIN_BLOCKS blocks + a reachable loop) builds an
+     * O(blocks×bindings) graph the dense path would have truncated at the
+     * `maxBlockVisits` ceiling (~1.5 GB measured on a 3000-block × 300-binding
+     * function). When the projected node count would exceed this, the sparse solver
+     * falls back to the dense oracle (byte-identical, and bounded — dense honors
+     * `maxBlockVisits`). Honored ONLY by the sparse path; the dense solver ignores
+     * it. `undefined`/0 ⇒ {@link DEFAULT_MAX_SSA_VALUE_GRAPH_NODES}.
+     */
+    readonly maxSsaValueGraphNodes?: number;
 }
 export interface FunctionDefUse {
     /**
@@ -99,5 +135,14 @@ export interface FunctionDefUse {
 /**
  * Compute reaching definitions for one function. See the module doc for the
  * purity/determinism/sharing contract.
+ *
+ * This is the production entry point. As of #2201 it auto-dispatches via
+ * {@link computeInSetsAuto} — the SSA-sparse solver ({@link computeInSetsSparse})
+ * for looping functions large enough to amortize construction, the dense
+ * GEN/KILL worklist ({@link computeInSetsDense}) everywhere else (and for the
+ * throw-edge / unreachable-block functions the SSA path does not model). The two
+ * solvers are held byte-identical by the equivalence fuzz (status, bindings,
+ * sorted facts, def/use telemetry), so the dispatch is a pure performance
+ * heuristic; the dense solver doubles as that differential oracle.
  */
 export declare function computeReachingDefs(cfg: FunctionCfg, limits?: ReachingDefsLimits): FunctionDefUse;