gitnexus 1.6.8-rc.36 → 1.6.8-rc.38

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. package/dist/core/ingestion/cfg/cfg-builder.d.ts +50 -0
  2. package/dist/core/ingestion/cfg/cfg-builder.js +67 -0
  3. package/dist/core/ingestion/cfg/collect.d.ts +24 -3
  4. package/dist/core/ingestion/cfg/collect.js +50 -7
  5. package/dist/core/ingestion/cfg/control-dependence.d.ts +21 -10
  6. package/dist/core/ingestion/cfg/control-dependence.js +111 -49
  7. package/dist/core/ingestion/cfg/control-flow-context.d.ts +9 -0
  8. package/dist/core/ingestion/cfg/control-flow-context.js +11 -0
  9. package/dist/core/ingestion/cfg/emit.d.ts +19 -0
  10. package/dist/core/ingestion/cfg/emit.js +47 -9
  11. package/dist/core/ingestion/cfg/reaching-defs.d.ts +13 -0
  12. package/dist/core/ingestion/cfg/reaching-defs.js +24 -0
  13. package/dist/core/ingestion/cfg/synthetic-escape.d.ts +124 -0
  14. package/dist/core/ingestion/cfg/synthetic-escape.js +300 -0
  15. package/dist/core/ingestion/cfg/visitors/c-cpp-harvest.d.ts +111 -0
  16. package/dist/core/ingestion/cfg/visitors/c-cpp-harvest.js +539 -0
  17. package/dist/core/ingestion/cfg/visitors/c-cpp.d.ts +74 -0
  18. package/dist/core/ingestion/cfg/visitors/c-cpp.js +584 -0
  19. package/dist/core/ingestion/cfg/visitors/call-site-harvest.d.ts +152 -0
  20. package/dist/core/ingestion/cfg/visitors/call-site-harvest.js +304 -0
  21. package/dist/core/ingestion/cfg/visitors/csharp-harvest.d.ts +124 -0
  22. package/dist/core/ingestion/cfg/visitors/csharp-harvest.js +587 -0
  23. package/dist/core/ingestion/cfg/visitors/csharp.d.ts +87 -0
  24. package/dist/core/ingestion/cfg/visitors/csharp.js +867 -0
  25. package/dist/core/ingestion/cfg/visitors/dart-harvest.d.ts +168 -0
  26. package/dist/core/ingestion/cfg/visitors/dart-harvest.js +499 -0
  27. package/dist/core/ingestion/cfg/visitors/dart.d.ts +119 -0
  28. package/dist/core/ingestion/cfg/visitors/dart.js +836 -0
  29. package/dist/core/ingestion/cfg/visitors/go-harvest.d.ts +160 -0
  30. package/dist/core/ingestion/cfg/visitors/go-harvest.js +629 -0
  31. package/dist/core/ingestion/cfg/visitors/go.d.ts +108 -0
  32. package/dist/core/ingestion/cfg/visitors/go.js +638 -0
  33. package/dist/core/ingestion/cfg/visitors/java-harvest.d.ts +108 -0
  34. package/dist/core/ingestion/cfg/visitors/java-harvest.js +503 -0
  35. package/dist/core/ingestion/cfg/visitors/java.d.ts +101 -0
  36. package/dist/core/ingestion/cfg/visitors/java.js +812 -0
  37. package/dist/core/ingestion/cfg/visitors/kotlin-harvest.d.ts +170 -0
  38. package/dist/core/ingestion/cfg/visitors/kotlin-harvest.js +505 -0
  39. package/dist/core/ingestion/cfg/visitors/kotlin.d.ts +121 -0
  40. package/dist/core/ingestion/cfg/visitors/kotlin.js +809 -0
  41. package/dist/core/ingestion/cfg/visitors/php-harvest.d.ts +172 -0
  42. package/dist/core/ingestion/cfg/visitors/php-harvest.js +616 -0
  43. package/dist/core/ingestion/cfg/visitors/php.d.ts +96 -0
  44. package/dist/core/ingestion/cfg/visitors/php.js +721 -0
  45. package/dist/core/ingestion/cfg/visitors/python-harvest.d.ts +175 -0
  46. package/dist/core/ingestion/cfg/visitors/python-harvest.js +601 -0
  47. package/dist/core/ingestion/cfg/visitors/python.d.ts +103 -0
  48. package/dist/core/ingestion/cfg/visitors/python.js +558 -0
  49. package/dist/core/ingestion/cfg/visitors/ruby-harvest.d.ts +144 -0
  50. package/dist/core/ingestion/cfg/visitors/ruby-harvest.js +419 -0
  51. package/dist/core/ingestion/cfg/visitors/ruby.d.ts +112 -0
  52. package/dist/core/ingestion/cfg/visitors/ruby.js +756 -0
  53. package/dist/core/ingestion/cfg/visitors/rust-harvest.d.ts +165 -0
  54. package/dist/core/ingestion/cfg/visitors/rust-harvest.js +547 -0
  55. package/dist/core/ingestion/cfg/visitors/rust.d.ts +95 -0
  56. package/dist/core/ingestion/cfg/visitors/rust.js +558 -0
  57. package/dist/core/ingestion/cfg/visitors/scope-tree-harvest.d.ts +87 -0
  58. package/dist/core/ingestion/cfg/visitors/scope-tree-harvest.js +116 -0
  59. package/dist/core/ingestion/cfg/visitors/swift-harvest.d.ts +169 -0
  60. package/dist/core/ingestion/cfg/visitors/swift-harvest.js +505 -0
  61. package/dist/core/ingestion/cfg/visitors/swift.d.ts +116 -0
  62. package/dist/core/ingestion/cfg/visitors/swift.js +787 -0
  63. package/dist/core/ingestion/cfg/visitors/typescript.js +37 -38
  64. package/dist/core/ingestion/languages/c-cpp.js +3 -0
  65. package/dist/core/ingestion/languages/cobol.js +4 -0
  66. package/dist/core/ingestion/languages/csharp.js +2 -0
  67. package/dist/core/ingestion/languages/dart.js +2 -0
  68. package/dist/core/ingestion/languages/go.js +2 -0
  69. package/dist/core/ingestion/languages/java.js +3 -0
  70. package/dist/core/ingestion/languages/kotlin.js +3 -0
  71. package/dist/core/ingestion/languages/php.js +2 -0
  72. package/dist/core/ingestion/languages/python.js +2 -0
  73. package/dist/core/ingestion/languages/ruby.js +2 -0
  74. package/dist/core/ingestion/languages/rust.js +2 -0
  75. package/dist/core/ingestion/languages/swift.js +2 -0
  76. package/dist/core/ingestion/languages/vue.js +6 -0
  77. package/dist/core/ingestion/parsing-processor.js +22 -0
  78. package/dist/core/ingestion/scope-resolution/pipeline/run.js +17 -0
  79. package/dist/core/ingestion/workers/parse-worker.d.ts +12 -0
  80. package/dist/core/ingestion/workers/parse-worker.js +28 -7
  81. package/dist/core/ingestion/workers/worker-pool.js +10 -0
  82. package/package.json +1 -1
@@ -13,6 +13,30 @@
13
13
  * before the tree-sitter visitor (U2) drives it.
14
14
  */
15
15
  import type { BasicBlockData, BindingEntry, CfgEdgeKind, FunctionCfg, StatementFacts } from './types.js';
16
+ /**
17
+ * Hard ceiling on CFG recursive-descent scope-entry depth (#2195). A language
18
+ * `CfgVisitor` wraps each nested block scope in {@link CfgBuilder.withNesting} (its
19
+ * `visitBody` / `visitSeq` choke points), so the live count tracks scope entries,
20
+ * not statement width. NOTE the count is ~2× LEXICAL nesting for block-bodied
21
+ * constructs (visitBody → visitSeq both enter), so the effective lexical ceiling
22
+ * is ~250 levels for block bodies (~500 for single-statement bodies / bare
23
+ * blocks). Real source nests ≤ ~50 deep, so this fires only on machine-generated
24
+ * / adversarial input. Both effective ceilings sit far below the engine's native
25
+ * stack limit (~1.2k+ nesting even on the raised worker `stackSizeMb`), so the
26
+ * bail is a DETERMINISTIC, language-independent {@link CfgNestingDepthError}
27
+ * rather than a nondeterministic `RangeError` thrown somewhere mid-walk.
28
+ */
29
+ export declare const MAX_CFG_NESTING_DEPTH = 500;
30
+ /**
31
+ * Thrown by the visitor nesting-depth guard ({@link CfgBuilder.enterNesting})
32
+ * when lexical nesting exceeds {@link MAX_CFG_NESTING_DEPTH}. `collectFunctionCfgs`
33
+ * catches it and counts the function under `skipped.tooDeeplyNested`, isolating
34
+ * the bail to one function instead of risking a worker-wide stack overflow.
35
+ */
36
+ export declare class CfgNestingDepthError extends Error {
37
+ readonly limit: number;
38
+ constructor(limit: number);
39
+ }
16
40
  export declare class CfgBuilder {
17
41
  private readonly filePath;
18
42
  private readonly functionStartLine;
@@ -24,6 +48,8 @@ export declare class CfgBuilder {
24
48
  private readonly blocks;
25
49
  private readonly edges;
26
50
  private readonly edgeKeys;
51
+ /** Live recursive-descent nesting depth — see {@link enterNesting}. */
52
+ private nesting;
27
53
  readonly entryIndex: number;
28
54
  readonly exitIndex: number;
29
55
  constructor(filePath: string, functionStartLine: number, functionEndLine: number,
@@ -48,6 +74,30 @@ export declare class CfgBuilder {
48
74
  */
49
75
  attachFacts(index: number, facts: StatementFacts): void;
50
76
  get blockCount(): number;
77
+ /**
78
+ * Run `fn` inside ONE nested block scope (#2195) — the single choke every
79
+ * visitor's `visitBody` / `visitSeq` funnels through. Enters on the way in and
80
+ * exits in a `finally`, so the live depth is balanced on every return AND every
81
+ * throw and the enter/exit can never drift out of pair (the reason this is one
82
+ * helper, not 24 hand-paired call sites). Throws {@link CfgNestingDepthError}
83
+ * when nesting exceeds {@link MAX_CFG_NESTING_DEPTH} — a proactive, deterministic
84
+ * bail before the native stack can overflow on a pathologically nested function.
85
+ *
86
+ * A block-bodied construct passes through BOTH visitBody and visitSeq, so it
87
+ * costs TWO scopes per lexical level: the effective structural ceiling is
88
+ * ~MAX_CFG_NESTING_DEPTH/2 (~250) lexical levels for block bodies (~500 for
89
+ * single-statement bodies / bare blocks, which hit only one of the two). Still
90
+ * an order of magnitude below the native limit and far above real code (≤ ~50).
91
+ */
92
+ withNesting<T>(fn: () => T): T;
93
+ /**
94
+ * Increment the nesting counter, throwing {@link CfgNestingDepthError} past the
95
+ * cap. Prefer {@link withNesting}, which pairs the exit in a `finally`; this is
96
+ * exposed for direct depth-accounting tests only.
97
+ */
98
+ enterNesting(): void;
99
+ /** Decrement the nesting counter — the partner of {@link enterNesting}. */
100
+ exitNesting(): void;
51
101
  /** Produce the serializable CFG. Caller is responsible for having wired the
52
102
  * function's dangling exits to {@link exitIndex} before calling.
53
103
  *
@@ -1,3 +1,31 @@
1
+ /**
2
+ * Hard ceiling on CFG recursive-descent scope-entry depth (#2195). A language
3
+ * `CfgVisitor` wraps each nested block scope in {@link CfgBuilder.withNesting} (its
4
+ * `visitBody` / `visitSeq` choke points), so the live count tracks scope entries,
5
+ * not statement width. NOTE the count is ~2× LEXICAL nesting for block-bodied
6
+ * constructs (visitBody → visitSeq both enter), so the effective lexical ceiling
7
+ * is ~250 levels for block bodies (~500 for single-statement bodies / bare
8
+ * blocks). Real source nests ≤ ~50 deep, so this fires only on machine-generated
9
+ * / adversarial input. Both effective ceilings sit far below the engine's native
10
+ * stack limit (~1.2k+ nesting even on the raised worker `stackSizeMb`), so the
11
+ * bail is a DETERMINISTIC, language-independent {@link CfgNestingDepthError}
12
+ * rather than a nondeterministic `RangeError` thrown somewhere mid-walk.
13
+ */
14
+ export const MAX_CFG_NESTING_DEPTH = 500;
15
+ /**
16
+ * Thrown by the visitor nesting-depth guard ({@link CfgBuilder.enterNesting})
17
+ * when lexical nesting exceeds {@link MAX_CFG_NESTING_DEPTH}. `collectFunctionCfgs`
18
+ * catches it and counts the function under `skipped.tooDeeplyNested`, isolating
19
+ * the bail to one function instead of risking a worker-wide stack overflow.
20
+ */
21
+ export class CfgNestingDepthError extends Error {
22
+ limit;
23
+ constructor(limit) {
24
+ super(`CFG nesting depth exceeded ${limit}`);
25
+ this.limit = limit;
26
+ this.name = 'CfgNestingDepthError';
27
+ }
28
+ }
1
29
  export class CfgBuilder {
2
30
  filePath;
3
31
  functionStartLine;
@@ -6,6 +34,8 @@ export class CfgBuilder {
6
34
  blocks = [];
7
35
  edges = [];
8
36
  edgeKeys = new Set();
37
+ /** Live recursive-descent nesting depth — see {@link enterNesting}. */
38
+ nesting = 0;
9
39
  entryIndex;
10
40
  exitIndex;
11
41
  constructor(filePath, functionStartLine, functionEndLine,
@@ -72,6 +102,43 @@ export class CfgBuilder {
72
102
  get blockCount() {
73
103
  return this.blocks.length;
74
104
  }
105
+ /**
106
+ * Run `fn` inside ONE nested block scope (#2195) — the single choke every
107
+ * visitor's `visitBody` / `visitSeq` funnels through. Enters on the way in and
108
+ * exits in a `finally`, so the live depth is balanced on every return AND every
109
+ * throw and the enter/exit can never drift out of pair (the reason this is one
110
+ * helper, not 24 hand-paired call sites). Throws {@link CfgNestingDepthError}
111
+ * when nesting exceeds {@link MAX_CFG_NESTING_DEPTH} — a proactive, deterministic
112
+ * bail before the native stack can overflow on a pathologically nested function.
113
+ *
114
+ * A block-bodied construct passes through BOTH visitBody and visitSeq, so it
115
+ * costs TWO scopes per lexical level: the effective structural ceiling is
116
+ * ~MAX_CFG_NESTING_DEPTH/2 (~250) lexical levels for block bodies (~500 for
117
+ * single-statement bodies / bare blocks, which hit only one of the two). Still
118
+ * an order of magnitude below the native limit and far above real code (≤ ~50).
119
+ */
120
+ withNesting(fn) {
121
+ this.enterNesting();
122
+ try {
123
+ return fn();
124
+ }
125
+ finally {
126
+ this.exitNesting();
127
+ }
128
+ }
129
+ /**
130
+ * Increment the nesting counter, throwing {@link CfgNestingDepthError} past the
131
+ * cap. Prefer {@link withNesting}, which pairs the exit in a `finally`; this is
132
+ * exposed for direct depth-accounting tests only.
133
+ */
134
+ enterNesting() {
135
+ if (++this.nesting > MAX_CFG_NESTING_DEPTH)
136
+ throw new CfgNestingDepthError(MAX_CFG_NESTING_DEPTH);
137
+ }
138
+ /** Decrement the nesting counter — the partner of {@link enterNesting}. */
139
+ exitNesting() {
140
+ this.nesting--;
141
+ }
75
142
  /** Produce the serializable CFG. Caller is responsible for having wired the
76
143
  * function's dangling exits to {@link exitIndex} before calling.
77
144
  *
@@ -22,9 +22,30 @@ import type { CfgVisitor, FunctionCfg } from './types.js';
22
22
  * both expensive and low-value. Overridable via `PipelineOptions.pdgMaxFunctionLines`.
23
23
  */
24
24
  export declare const DEFAULT_PDG_MAX_FUNCTION_LINES = 2000;
25
+ /**
26
+ * CFG-bearing functions skipped during the walk, bucketed by reason (#2195).
27
+ * Surfaced per-language in the parse telemetry (parsing-processor.ts) so a CFG
28
+ * coverage gap is observable, not silent. All-zero ⇒ nothing skipped.
29
+ */
30
+ export interface CfgSkipCounts {
31
+ /** Source span exceeded `maxFunctionLines` (minified / generated code). */
32
+ readonly tooManyLines: number;
33
+ /**
34
+ * Recursive-descent nesting hit {@link MAX_CFG_NESTING_DEPTH} — a proactive,
35
+ * deterministic bail (see {@link CfgNestingDepthError}) before a worker stack
36
+ * overflow.
37
+ */
38
+ readonly tooDeeplyNested: number;
39
+ /**
40
+ * `buildFunctionCfg` threw an unexpected error. Caught PER FUNCTION so one
41
+ * malformed function no longer drops the whole file's CFGs (the throw used to
42
+ * escape to the worker's language-group catch).
43
+ */
44
+ readonly buildError: number;
45
+ }
25
46
  export interface CollectedCfgs {
26
47
  readonly cfgs: readonly FunctionCfg[];
27
- /** Functions skipped for exceeding `maxFunctionLines` (0 ⇒ none skipped). */
28
- readonly skipped: number;
48
+ /** Per-reason skip counts (#2195). */
49
+ readonly skipped: CfgSkipCounts;
29
50
  }
30
- export declare function collectFunctionCfgs(root: SyntaxNode, visitor: CfgVisitor<SyntaxNode>, filePath: string, maxFunctionLines?: number): CollectedCfgs;
51
+ export declare function collectFunctionCfgs(root: SyntaxNode, visitor: CfgVisitor<SyntaxNode>, filePath: string, maxFunctionLines?: number, lineOffset?: number): CollectedCfgs;
@@ -1,3 +1,4 @@
1
+ import { CfgNestingDepthError } from './cfg-builder.js';
1
2
  /**
2
3
  * Default per-function source-line cap used by the worker when the `--pdg` run
3
4
  * does not specify `pdgMaxFunctionLines`. A function longer than this (almost
@@ -5,21 +6,63 @@
5
6
  * both expensive and low-value. Overridable via `PipelineOptions.pdgMaxFunctionLines`.
6
7
  */
7
8
  export const DEFAULT_PDG_MAX_FUNCTION_LINES = 2000;
8
- export function collectFunctionCfgs(root, visitor, filePath, maxFunctionLines = 0) {
9
+ /**
10
+ * Convert a CFG built from an EXTRACTED sub-document's AST (script-relative
11
+ * tree-sitter rows) into the enclosing file's coordinates by adding `offset` to
12
+ * every source-line field. Needed for embedded scripts — a Vue SFC `<script>`
13
+ * block parses at row 0 but lives at `lineOffset` in the `.vue` file, and every
14
+ * other worker-emitted graph node is already file-relative; without this, the
15
+ * CFG's `functionStartLine` would never join its Function/Method graph node
16
+ * (inter-procedural taint silently resolves nothing) and BasicBlock source
17
+ * lines would point at the wrong `.vue` line. A 0 offset returns the input
18
+ * unchanged (the common case: `.ts`/`.js`/etc. parse at the file root), keeping
19
+ * non-embedded languages byte-identical. Synthetic bindings keep `declLine` 0.
20
+ */
21
+ function shiftCfgLines(cfg, offset) {
22
+ if (offset === 0)
23
+ return cfg;
24
+ return {
25
+ ...cfg,
26
+ functionStartLine: cfg.functionStartLine + offset,
27
+ functionEndLine: cfg.functionEndLine + offset,
28
+ blocks: cfg.blocks.map((b) => ({
29
+ ...b,
30
+ startLine: b.startLine + offset,
31
+ endLine: b.endLine + offset,
32
+ statements: b.statements?.map((s) => ({ ...s, line: s.line + offset })),
33
+ })),
34
+ bindings: cfg.bindings?.map((bd) => bd.declLine > 0 ? { ...bd, declLine: bd.declLine + offset } : bd),
35
+ };
36
+ }
37
+ export function collectFunctionCfgs(root, visitor, filePath, maxFunctionLines = 0, lineOffset = 0) {
9
38
  const cfgs = [];
10
- let skipped = 0;
39
+ let tooManyLines = 0;
40
+ let tooDeeplyNested = 0;
41
+ let buildError = 0;
11
42
  const stack = [root];
12
43
  while (stack.length) {
13
44
  const node = stack.pop();
14
45
  if (visitor.isFunction(node)) {
15
46
  const lines = node.endPosition.row - node.startPosition.row + 1;
16
47
  if (maxFunctionLines > 0 && lines > maxFunctionLines) {
17
- skipped++;
48
+ tooManyLines++;
18
49
  }
19
50
  else {
20
- const cfg = visitor.buildFunctionCfg(node, filePath);
21
- if (cfg)
22
- cfgs.push(cfg);
51
+ // Isolate the per-function build: a proactive deep-nesting bail
52
+ // (CfgNestingDepthError) or any other visitor throw is counted and
53
+ // skipped HERE, so it can't escape to the worker's language-group catch
54
+ // and silently drop every remaining function's CFG (#2195).
55
+ try {
56
+ const cfg = visitor.buildFunctionCfg(node, filePath);
57
+ if (cfg)
58
+ cfgs.push(shiftCfgLines(cfg, lineOffset));
59
+ }
60
+ catch (err) {
61
+ if (err instanceof CfgNestingDepthError)
62
+ tooDeeplyNested++;
63
+ else
64
+ buildError++;
65
+ }
23
66
  }
24
67
  }
25
68
  // Descend regardless (a skipped mega-function may still contain small
@@ -30,5 +73,5 @@ export function collectFunctionCfgs(root, visitor, filePath, maxFunctionLines =
30
73
  stack.push(child);
31
74
  }
32
75
  }
33
- return { cfgs, skipped };
76
+ return { cfgs, skipped: { tooManyLines, tooDeeplyNested, buildError } };
34
77
  }
@@ -1,15 +1,26 @@
1
1
  /**
2
- * Control dependence (#2085 M5 U3) — Ferrante, Ottenstein & Warren §3.1.1 over
3
- * the post-dominator tree. A block `dependent` is control-dependent on a branch
4
- * block `controller` when `controller` decides whether `dependent` executes:
5
- * formally, there is a CFG edge `controller → B` such that `dependent`
6
- * post-dominates `B` but does NOT strictly post-dominate `controller`.
2
+ * Control dependence (#2085 M5 U3) — Ferrante, Ottenstein & Warren §3.1.1
3
+ * semantics. A block `dependent` is control-dependent on a branch block
4
+ * `controller` when `controller` decides whether `dependent` executes: formally,
5
+ * there is a CFG edge `controller → B` such that `dependent` post-dominates `B`
6
+ * but does NOT strictly post-dominate `controller`.
7
7
  *
8
- * Construction (§3.1.1): for each CFG edge `(A, B)` where `B` does NOT
9
- * post-dominate `A`, walk UP the post-dom tree from `B` to (but not including)
10
- * `ipdom(A)`; every block on that path is control-dependent on `A`. The branch
11
- * SENSE of the edge ('T' | 'F') becomes the edge label (KTD4 / KTD3 — it rides
12
- * the persisted relation's `reason` column).
8
+ * Construction the reverse-CFG dominance-frontier formulation (Cytron,
9
+ * Ferrante, Rosen, Wegman & Zadeck 1991): control dependence IS the dominance
10
+ * frontier of the reverse CFG, so `A PDF(X)` (the post-dominance frontier)
11
+ * `X` is control-dependent on `A`. The PDF is computed bottom-up over the
12
+ * post-dom tree (`PDF_local` from a node's CFG predecessors + `PDF_up` from its
13
+ * post-dom-tree children) in O(N + E + output) — each up-step is charged to a
14
+ * distinct emitted edge, NOT re-walked per CFG edge as the original §3.1.1
15
+ * up-walk did (which was Θ(N²) on a deep post-dom chain). The two formulations
16
+ * enumerate the IDENTICAL full `(controller, dependent, label)` set (verified
17
+ * byte-identical on 3203 CFGs + ~1M-case differential fuzz); LLVM, Joern and WALA
18
+ * use the reverse-DF form. (Only the rare TRUNCATED prefix — when a function
19
+ * exceeds `maxEdges` — differs from the old prefix: it is now a sorted
20
+ * deterministic prefix rather than CFG-edge-iteration order. Both are valid,
21
+ * deterministic subsets; the full untruncated output is unchanged.)
22
+ * The branch SENSE ('T' | 'F') of the controlling edge becomes the edge label
23
+ * (KTD4 / KTD3 — it rides the persisted relation's `reason` column).
13
24
  *
14
25
  * PURE AND DETERMINISTIC (mirrors post-dominators.ts / reaching-defs.ts): no
15
26
  * graph, no logger, importable outside the worker; output is deduped per
@@ -1,15 +1,26 @@
1
1
  /**
2
- * Control dependence (#2085 M5 U3) — Ferrante, Ottenstein & Warren §3.1.1 over
3
- * the post-dominator tree. A block `dependent` is control-dependent on a branch
4
- * block `controller` when `controller` decides whether `dependent` executes:
5
- * formally, there is a CFG edge `controller → B` such that `dependent`
6
- * post-dominates `B` but does NOT strictly post-dominate `controller`.
2
+ * Control dependence (#2085 M5 U3) — Ferrante, Ottenstein & Warren §3.1.1
3
+ * semantics. A block `dependent` is control-dependent on a branch block
4
+ * `controller` when `controller` decides whether `dependent` executes: formally,
5
+ * there is a CFG edge `controller → B` such that `dependent` post-dominates `B`
6
+ * but does NOT strictly post-dominate `controller`.
7
7
  *
8
- * Construction (§3.1.1): for each CFG edge `(A, B)` where `B` does NOT
9
- * post-dominate `A`, walk UP the post-dom tree from `B` to (but not including)
10
- * `ipdom(A)`; every block on that path is control-dependent on `A`. The branch
11
- * SENSE of the edge ('T' | 'F') becomes the edge label (KTD4 / KTD3 — it rides
12
- * the persisted relation's `reason` column).
8
+ * Construction the reverse-CFG dominance-frontier formulation (Cytron,
9
+ * Ferrante, Rosen, Wegman & Zadeck 1991): control dependence IS the dominance
10
+ * frontier of the reverse CFG, so `A PDF(X)` (the post-dominance frontier)
11
+ * `X` is control-dependent on `A`. The PDF is computed bottom-up over the
12
+ * post-dom tree (`PDF_local` from a node's CFG predecessors + `PDF_up` from its
13
+ * post-dom-tree children) in O(N + E + output) — each up-step is charged to a
14
+ * distinct emitted edge, NOT re-walked per CFG edge as the original §3.1.1
15
+ * up-walk did (which was Θ(N²) on a deep post-dom chain). The two formulations
16
+ * enumerate the IDENTICAL full `(controller, dependent, label)` set (verified
17
+ * byte-identical on 3203 CFGs + ~1M-case differential fuzz); LLVM, Joern and WALA
18
+ * use the reverse-DF form. (Only the rare TRUNCATED prefix — when a function
19
+ * exceeds `maxEdges` — differs from the old prefix: it is now a sorted
20
+ * deterministic prefix rather than CFG-edge-iteration order. Both are valid,
21
+ * deterministic subsets; the full untruncated output is unchanged.)
22
+ * The branch SENSE ('T' | 'F') of the controlling edge becomes the edge label
23
+ * (KTD4 / KTD3 — it rides the persisted relation's `reason` column).
13
24
  *
14
25
  * PURE AND DETERMINISTIC (mirrors post-dominators.ts / reaching-defs.ts): no
15
26
  * graph, no logger, importable outside the worker; output is deduped per
@@ -18,7 +29,7 @@
18
29
  * control-dependent on ITSELF (`controller === dependent`) — the loop predicate
19
30
  * gates its own re-execution; this is standard PDG behavior, not a bug.
20
31
  */
21
- import { computePostDominators, postDominates, NO_IPDOM, } from './post-dominators.js';
32
+ import { computePostDominators, NO_IPDOM } from './post-dominators.js';
22
33
  function buildArmSenses(cfg) {
23
34
  const n = cfg.blocks.length;
24
35
  const senses = Array.from({ length: n }, () => ({
@@ -65,56 +76,107 @@ function labelFor(kind, controller) {
65
76
  * module doc for the purity/determinism contract.
66
77
  */
67
78
  export function computeControlDependence(cfg, postDom,
68
- // Heap-safety ceiling on materialized edges, mirroring computeReachingDefs'
69
- // `maxFacts` (#2188 review): the pre-dedup walk is O(edges × post-dom depth),
70
- // so bound it before it can spike. `0` unbounded. On overflow `edges` is a
71
- // deterministic prefix and `truncated` is set never a silent drop.
79
+ // Output-size ceiling, mirroring computeReachingDefs' `maxFacts` (#2188 review).
80
+ // The reverse-DF set is the bounded (controller, dependent, label) dependence
81
+ // relation, so peak working set output here (no pre-dedup spike like the old
82
+ // up-walk) this caps the final edge COUNT, not transient memory. `0` ⇒
83
+ // unbounded. On overflow `edges` is a deterministic SORTED prefix and
84
+ // `truncated` is set — never a silent drop. (The sorted prefix is the prefix
85
+ // CONTENTS may differ from the old up-walk's CFG-edge-iteration prefix at the
86
+ // cap boundary; the FULL untruncated set is byte-identical — see the module doc.)
72
87
  maxEdges = 0) {
73
88
  const tree = postDom ?? computePostDominators(cfg);
74
89
  const { ipdom } = tree;
75
90
  const n = cfg.blocks.length;
76
91
  const armSenses = buildArmSenses(cfg);
77
92
  const cap = maxEdges > 0 ? maxEdges : Infinity;
78
- const out = [];
79
- const seen = new Set();
80
- let truncated = false;
81
- scan: for (const e of cfg.edges) {
82
- const a = e.from;
83
- const b = e.to;
84
- if (a < 0 || a >= n || b < 0 || b >= n)
85
- continue;
86
- // No control dependence when B post-dominates A every path leaving A
87
- // through this edge still reaches B, so A does not decide B's execution.
88
- // This guard is exactly AC2: a dependence exists IFF post-dominance fails.
89
- if (postDominates(tree, b, a))
93
+ // Reverse-CFG post-dominance frontier (Cytron, Ferrante, Rosen, Wegman,
94
+ // Zadeck 1991): control dependence IS the dominance frontier of the reverse
95
+ // CFG. `A ∈ PDF(X)` ⟺ X is control-dependent on A, so emit (controller=A,
96
+ // dependent=X). Computing the PDF bottom-up over the post-dom tree charges
97
+ // each up-step to a DISTINCT emitted entry — O(N+E+output) — instead of the
98
+ // old Ferrante up-walk that re-climbs the ipdom chain per CFG edge (Θ(N²) on
99
+ // a deep post-dom chain). Output is the identical (controller, dependent,
100
+ // label) set (verified byte-identical on 3203 CFGs across all languages +
101
+ // fuzz) and 1-2 orders of magnitude faster. LLVM (ReverseIDFCalculator),
102
+ // Joern (CdgPass) and WALA use the same formulation.
103
+ const children = Array.from({ length: n }, () => []);
104
+ const inEdges = Array.from({ length: n }, () => []);
105
+ for (let b = 0; b < n; b++) {
106
+ const ip = ipdom[b];
107
+ if (ip !== NO_IPDOM && ip >= 0 && ip < n)
108
+ children[ip].push(b);
109
+ }
110
+ for (const e of cfg.edges) {
111
+ if (e.from < 0 || e.from >= n || e.to < 0 || e.to >= n)
90
112
  continue;
91
- // Sense is read from the CONTROLLER's arms, not this edge's kind alone —
92
- // seq/loop-back fall-through false arms would otherwise mislabel as 'T'
93
- // (#2188 F1).
94
- const label = labelFor(e.kind, armSenses[a]);
95
- const stop = ipdom[a]; // walk up to ipdom(A), EXCLUSIVE (NO_IPDOM to root)
96
- let cur = b;
97
- let steps = 0;
98
- // `steps <= n` is defensive the ipdom chain is a finite tree.
99
- while (cur !== NO_IPDOM && cur !== stop && steps <= n) {
100
- const key = `${a}:${cur}:${label}`;
101
- if (!seen.has(key)) {
102
- // Check BEFORE pushing so `truncated` means a genuine overflow (a new
103
- // unique edge had to be dropped), not merely "reached the ceiling" —
104
- // exactly `cap` edges is a full, non-truncated result.
105
- if (out.length >= cap) {
106
- truncated = true;
107
- break scan;
108
- }
109
- seen.add(key);
110
- out.push({ controllerBlock: a, dependentBlock: cur, label });
113
+ inEdges[e.to].push({ from: e.from, kind: e.kind });
114
+ }
115
+ // Post-dom-tree post-order (children before parents). Iterative — the post-dom
116
+ // forest can itself be chain-deep. Roots are the NO_IPDOM nodes (EXIT, plus
117
+ // any exit-unreachable region per #2188 F2). The reverse of a root-first DFS
118
+ // visits every parent AFTER all its descendants.
119
+ const dfs = [];
120
+ for (let r = 0; r < n; r++)
121
+ if (ipdom[r] === NO_IPDOM)
122
+ dfs.push(r);
123
+ const preorder = [];
124
+ while (dfs.length) {
125
+ const x = dfs.pop();
126
+ preorder.push(x);
127
+ for (const c of children[x])
128
+ dfs.push(c);
129
+ }
130
+ const order = preorder.reverse();
131
+ // PDF[X]: controller A → the label SET with which A controls X. A set (not one
132
+ // label) because a controller can reach X via opposite-sense arms (goto-
133
+ // cycles) — the old (a, cur, label) dedup kept both rows.
134
+ const pdf = Array.from({ length: n }, () => new Map());
135
+ const add = (x, a, label) => {
136
+ const set = pdf[x].get(a);
137
+ if (set)
138
+ set.add(label);
139
+ else
140
+ pdf[x].set(a, new Set([label]));
141
+ };
142
+ for (const x of order) {
143
+ // PDF_local: a CFG-predecessor A of X that X does not (immediately) post-
144
+ // dominate. `A !== X && ipdom[A] !== X` is exactly the production
145
+ // `!postDominates(X, A)` for one edge A→X (postDominates(X,A) ⟺ ipdom[A]===X),
146
+ // and it excludes self-edges + NO_IPDOM regions. Sense is read from the
147
+ // CONTROLLER's arms (seq/loop-back fall-through false arms would otherwise
148
+ // mislabel as 'T' — #2188 F1).
149
+ for (const { from: a, kind } of inEdges[x]) {
150
+ if (a !== x && ipdom[a] !== x)
151
+ add(x, a, labelFor(kind, armSenses[a]));
152
+ }
153
+ // PDF_up: inherit each post-dom child's frontier controller (with its label
154
+ // set) when X does not post-dominate it.
155
+ for (const z of children[x]) {
156
+ for (const [a, labels] of pdf[z]) {
157
+ if (ipdom[a] !== x)
158
+ for (const l of labels)
159
+ add(x, a, l);
111
160
  }
112
- cur = ipdom[cur];
113
- steps += 1;
161
+ }
162
+ }
163
+ const out = [];
164
+ for (const x of order) {
165
+ for (const [a, labels] of pdf[x]) {
166
+ for (const label of labels)
167
+ out.push({ controllerBlock: a, dependentBlock: x, label });
114
168
  }
115
169
  }
116
170
  out.sort((x, y) => x.controllerBlock - y.controllerBlock ||
117
171
  x.dependentBlock - y.dependentBlock ||
118
172
  (x.label < y.label ? -1 : x.label > y.label ? 1 : 0));
173
+ // `maxEdges` is a heap-safety backstop applied to the SORTED set (the DF makes
174
+ // overflow far rarer than the old per-edge walk). Deterministic prefix, never
175
+ // a silent drop; mirrors computeReachingDefs' `truncated`.
176
+ let truncated = false;
177
+ if (cap !== Infinity && out.length > cap) {
178
+ truncated = true;
179
+ out.length = cap;
180
+ }
119
181
  return { edges: out, truncated };
120
182
  }
@@ -62,6 +62,15 @@ export declare class ControlFlowContext {
62
62
  resolveBreak(label?: string): JumpResolution | undefined;
63
63
  /** Resolve a `continue`: like {@link resolveBreak} but only loop frames match. */
64
64
  resolveContinue(label?: string): JumpResolution | undefined;
65
+ /**
66
+ * Resolve a Java `yield e` (switch-EXPRESSION arm exit): the nearest enclosing
67
+ * SWITCH frame's exit, threading the finalizers stacked above it. Unlike a
68
+ * `break`, a `yield` ALWAYS targets the switch — never an intervening loop — so
69
+ * it cannot match a loop frame (a `yield` inside a loop inside a switch arm
70
+ * still exits the whole switch). Returns `undefined` when there is no enclosing
71
+ * switch (malformed input); the caller falls back to its conservative routing.
72
+ */
73
+ resolveYield(): JumpResolution | undefined;
65
74
  /** Every active finalizer, innermost first — what a `return` must cross. */
66
75
  finalizersForReturn(): readonly FinalizerFrame[];
67
76
  /**
@@ -39,6 +39,17 @@ export class ControlFlowContext {
39
39
  resolveContinue(label) {
40
40
  return this.resolve((f) => f.kind === 'loop' && (label === undefined || f.labels.includes(label)), (f) => f.continueTo);
41
41
  }
42
+ /**
43
+ * Resolve a Java `yield e` (switch-EXPRESSION arm exit): the nearest enclosing
44
+ * SWITCH frame's exit, threading the finalizers stacked above it. Unlike a
45
+ * `break`, a `yield` ALWAYS targets the switch — never an intervening loop — so
46
+ * it cannot match a loop frame (a `yield` inside a loop inside a switch arm
47
+ * still exits the whole switch). Returns `undefined` when there is no enclosing
48
+ * switch (malformed input); the caller falls back to its conservative routing.
49
+ */
50
+ resolveYield() {
51
+ return this.resolve((f) => f.kind === 'switch');
52
+ }
42
53
  /** Every active finalizer, innermost first — what a `return` must cross. */
43
54
  finalizersForReturn() {
44
55
  const fins = [];
@@ -76,6 +76,25 @@ export declare const POST_DOMINATE_DEBUG_ENV = "GITNEXUS_PDG_EMIT_POST_DOMINATE"
76
76
  export declare const REACHING_DEF_FACTS_PER_EDGE_CAP = 4;
77
77
  /** Derived emit-path fact limit at the default edge cap (bench/doc anchor). */
78
78
  export declare const DEFAULT_PDG_MAX_REACHING_DEF_FACTS_PER_FUNCTION: number;
79
+ /**
80
+ * Fixpoint-iteration budget for {@link computeReachingDefs}, as a multiple of
81
+ * the function's block count ({@link emitFileReachingDefs} passes
82
+ * `blocks.length × this` as `maxBlockVisits`). Iterative reaching-defs on a
83
+ * reducible CFG converges in O(loop-nesting-depth) passes, so a worklist
84
+ * re-visits each block a small multiple of times for real code; this budget
85
+ * tolerates a nesting depth far beyond any hand-written function (real code is
86
+ * ≤ ~15 deep) while truncating the pathological deep nest that otherwise drives
87
+ * the solver to O(blocks²) — measured at seconds + GB on a machine-generated
88
+ * 2000-line all-loops function whose fact count stays linear (so `maxFacts`
89
+ * never fires). Truncation degrades to a sound empty REACHING_DEF for that one
90
+ * function (status `truncated`), never wrong facts.
91
+ *
92
+ * This ceiling is the SOUND backstop, not a perf fix: WTO / loop-aware iteration
93
+ * ordering was benchmarked and rejected (0% faster — the cost is dense-set
94
+ * propagation, not visitation order; see the no-go note in reaching-defs.ts at
95
+ * the RPO-order site). SSA-sparse reaching-defs is the deferred real fix.
96
+ */
97
+ export declare const DEFAULT_PDG_MAX_REACHING_DEF_BLOCK_REVISITS = 64;
79
98
  export interface CfgEmitResult {
80
99
  blocks: number;
81
100
  edges: number;