gitnexus 1.6.8-rc.36 → 1.6.8-rc.38
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/core/ingestion/cfg/cfg-builder.d.ts +50 -0
- package/dist/core/ingestion/cfg/cfg-builder.js +67 -0
- package/dist/core/ingestion/cfg/collect.d.ts +24 -3
- package/dist/core/ingestion/cfg/collect.js +50 -7
- package/dist/core/ingestion/cfg/control-dependence.d.ts +21 -10
- package/dist/core/ingestion/cfg/control-dependence.js +111 -49
- package/dist/core/ingestion/cfg/control-flow-context.d.ts +9 -0
- package/dist/core/ingestion/cfg/control-flow-context.js +11 -0
- package/dist/core/ingestion/cfg/emit.d.ts +19 -0
- package/dist/core/ingestion/cfg/emit.js +47 -9
- package/dist/core/ingestion/cfg/reaching-defs.d.ts +13 -0
- package/dist/core/ingestion/cfg/reaching-defs.js +24 -0
- package/dist/core/ingestion/cfg/synthetic-escape.d.ts +124 -0
- package/dist/core/ingestion/cfg/synthetic-escape.js +300 -0
- package/dist/core/ingestion/cfg/visitors/c-cpp-harvest.d.ts +111 -0
- package/dist/core/ingestion/cfg/visitors/c-cpp-harvest.js +539 -0
- package/dist/core/ingestion/cfg/visitors/c-cpp.d.ts +74 -0
- package/dist/core/ingestion/cfg/visitors/c-cpp.js +584 -0
- package/dist/core/ingestion/cfg/visitors/call-site-harvest.d.ts +152 -0
- package/dist/core/ingestion/cfg/visitors/call-site-harvest.js +304 -0
- package/dist/core/ingestion/cfg/visitors/csharp-harvest.d.ts +124 -0
- package/dist/core/ingestion/cfg/visitors/csharp-harvest.js +587 -0
- package/dist/core/ingestion/cfg/visitors/csharp.d.ts +87 -0
- package/dist/core/ingestion/cfg/visitors/csharp.js +867 -0
- package/dist/core/ingestion/cfg/visitors/dart-harvest.d.ts +168 -0
- package/dist/core/ingestion/cfg/visitors/dart-harvest.js +499 -0
- package/dist/core/ingestion/cfg/visitors/dart.d.ts +119 -0
- package/dist/core/ingestion/cfg/visitors/dart.js +836 -0
- package/dist/core/ingestion/cfg/visitors/go-harvest.d.ts +160 -0
- package/dist/core/ingestion/cfg/visitors/go-harvest.js +629 -0
- package/dist/core/ingestion/cfg/visitors/go.d.ts +108 -0
- package/dist/core/ingestion/cfg/visitors/go.js +638 -0
- package/dist/core/ingestion/cfg/visitors/java-harvest.d.ts +108 -0
- package/dist/core/ingestion/cfg/visitors/java-harvest.js +503 -0
- package/dist/core/ingestion/cfg/visitors/java.d.ts +101 -0
- package/dist/core/ingestion/cfg/visitors/java.js +812 -0
- package/dist/core/ingestion/cfg/visitors/kotlin-harvest.d.ts +170 -0
- package/dist/core/ingestion/cfg/visitors/kotlin-harvest.js +505 -0
- package/dist/core/ingestion/cfg/visitors/kotlin.d.ts +121 -0
- package/dist/core/ingestion/cfg/visitors/kotlin.js +809 -0
- package/dist/core/ingestion/cfg/visitors/php-harvest.d.ts +172 -0
- package/dist/core/ingestion/cfg/visitors/php-harvest.js +616 -0
- package/dist/core/ingestion/cfg/visitors/php.d.ts +96 -0
- package/dist/core/ingestion/cfg/visitors/php.js +721 -0
- package/dist/core/ingestion/cfg/visitors/python-harvest.d.ts +175 -0
- package/dist/core/ingestion/cfg/visitors/python-harvest.js +601 -0
- package/dist/core/ingestion/cfg/visitors/python.d.ts +103 -0
- package/dist/core/ingestion/cfg/visitors/python.js +558 -0
- package/dist/core/ingestion/cfg/visitors/ruby-harvest.d.ts +144 -0
- package/dist/core/ingestion/cfg/visitors/ruby-harvest.js +419 -0
- package/dist/core/ingestion/cfg/visitors/ruby.d.ts +112 -0
- package/dist/core/ingestion/cfg/visitors/ruby.js +756 -0
- package/dist/core/ingestion/cfg/visitors/rust-harvest.d.ts +165 -0
- package/dist/core/ingestion/cfg/visitors/rust-harvest.js +547 -0
- package/dist/core/ingestion/cfg/visitors/rust.d.ts +95 -0
- package/dist/core/ingestion/cfg/visitors/rust.js +558 -0
- package/dist/core/ingestion/cfg/visitors/scope-tree-harvest.d.ts +87 -0
- package/dist/core/ingestion/cfg/visitors/scope-tree-harvest.js +116 -0
- package/dist/core/ingestion/cfg/visitors/swift-harvest.d.ts +169 -0
- package/dist/core/ingestion/cfg/visitors/swift-harvest.js +505 -0
- package/dist/core/ingestion/cfg/visitors/swift.d.ts +116 -0
- package/dist/core/ingestion/cfg/visitors/swift.js +787 -0
- package/dist/core/ingestion/cfg/visitors/typescript.js +37 -38
- package/dist/core/ingestion/languages/c-cpp.js +3 -0
- package/dist/core/ingestion/languages/cobol.js +4 -0
- package/dist/core/ingestion/languages/csharp.js +2 -0
- package/dist/core/ingestion/languages/dart.js +2 -0
- package/dist/core/ingestion/languages/go.js +2 -0
- package/dist/core/ingestion/languages/java.js +3 -0
- package/dist/core/ingestion/languages/kotlin.js +3 -0
- package/dist/core/ingestion/languages/php.js +2 -0
- package/dist/core/ingestion/languages/python.js +2 -0
- package/dist/core/ingestion/languages/ruby.js +2 -0
- package/dist/core/ingestion/languages/rust.js +2 -0
- package/dist/core/ingestion/languages/swift.js +2 -0
- package/dist/core/ingestion/languages/vue.js +6 -0
- package/dist/core/ingestion/parsing-processor.js +22 -0
- package/dist/core/ingestion/scope-resolution/pipeline/run.js +17 -0
- package/dist/core/ingestion/workers/parse-worker.d.ts +12 -0
- package/dist/core/ingestion/workers/parse-worker.js +28 -7
- package/dist/core/ingestion/workers/worker-pool.js +10 -0
- package/package.json +1 -1
|
@@ -13,6 +13,30 @@
|
|
|
13
13
|
* before the tree-sitter visitor (U2) drives it.
|
|
14
14
|
*/
|
|
15
15
|
import type { BasicBlockData, BindingEntry, CfgEdgeKind, FunctionCfg, StatementFacts } from './types.js';
|
|
16
|
+
/**
|
|
17
|
+
* Hard ceiling on CFG recursive-descent scope-entry depth (#2195). A language
|
|
18
|
+
* `CfgVisitor` wraps each nested block scope in {@link CfgBuilder.withNesting} (its
|
|
19
|
+
* `visitBody` / `visitSeq` choke points), so the live count tracks scope entries,
|
|
20
|
+
* not statement width. NOTE the count is ~2× LEXICAL nesting for block-bodied
|
|
21
|
+
* constructs (visitBody → visitSeq both enter), so the effective lexical ceiling
|
|
22
|
+
* is ~250 levels for block bodies (~500 for single-statement bodies / bare
|
|
23
|
+
* blocks). Real source nests ≤ ~50 deep, so this fires only on machine-generated
|
|
24
|
+
* / adversarial input. Both effective ceilings sit far below the engine's native
|
|
25
|
+
* stack limit (~1.2k+ nesting even on the raised worker `stackSizeMb`), so the
|
|
26
|
+
* bail is a DETERMINISTIC, language-independent {@link CfgNestingDepthError}
|
|
27
|
+
* rather than a nondeterministic `RangeError` thrown somewhere mid-walk.
|
|
28
|
+
*/
|
|
29
|
+
export declare const MAX_CFG_NESTING_DEPTH = 500;
|
|
30
|
+
/**
|
|
31
|
+
* Thrown by the visitor nesting-depth guard ({@link CfgBuilder.enterNesting})
|
|
32
|
+
* when lexical nesting exceeds {@link MAX_CFG_NESTING_DEPTH}. `collectFunctionCfgs`
|
|
33
|
+
* catches it and counts the function under `skipped.tooDeeplyNested`, isolating
|
|
34
|
+
* the bail to one function instead of risking a worker-wide stack overflow.
|
|
35
|
+
*/
|
|
36
|
+
export declare class CfgNestingDepthError extends Error {
|
|
37
|
+
readonly limit: number;
|
|
38
|
+
constructor(limit: number);
|
|
39
|
+
}
|
|
16
40
|
export declare class CfgBuilder {
|
|
17
41
|
private readonly filePath;
|
|
18
42
|
private readonly functionStartLine;
|
|
@@ -24,6 +48,8 @@ export declare class CfgBuilder {
|
|
|
24
48
|
private readonly blocks;
|
|
25
49
|
private readonly edges;
|
|
26
50
|
private readonly edgeKeys;
|
|
51
|
+
/** Live recursive-descent nesting depth — see {@link enterNesting}. */
|
|
52
|
+
private nesting;
|
|
27
53
|
readonly entryIndex: number;
|
|
28
54
|
readonly exitIndex: number;
|
|
29
55
|
constructor(filePath: string, functionStartLine: number, functionEndLine: number,
|
|
@@ -48,6 +74,30 @@ export declare class CfgBuilder {
|
|
|
48
74
|
*/
|
|
49
75
|
attachFacts(index: number, facts: StatementFacts): void;
|
|
50
76
|
get blockCount(): number;
|
|
77
|
+
/**
|
|
78
|
+
* Run `fn` inside ONE nested block scope (#2195) — the single choke every
|
|
79
|
+
* visitor's `visitBody` / `visitSeq` funnels through. Enters on the way in and
|
|
80
|
+
* exits in a `finally`, so the live depth is balanced on every return AND every
|
|
81
|
+
* throw and the enter/exit can never drift out of pair (the reason this is one
|
|
82
|
+
* helper, not 24 hand-paired call sites). Throws {@link CfgNestingDepthError}
|
|
83
|
+
* when nesting exceeds {@link MAX_CFG_NESTING_DEPTH} — a proactive, deterministic
|
|
84
|
+
* bail before the native stack can overflow on a pathologically nested function.
|
|
85
|
+
*
|
|
86
|
+
* A block-bodied construct passes through BOTH visitBody and visitSeq, so it
|
|
87
|
+
* costs TWO scopes per lexical level: the effective structural ceiling is
|
|
88
|
+
* ~MAX_CFG_NESTING_DEPTH/2 (~250) lexical levels for block bodies (~500 for
|
|
89
|
+
* single-statement bodies / bare blocks, which hit only one of the two). Still
|
|
90
|
+
* an order of magnitude below the native limit and far above real code (≤ ~50).
|
|
91
|
+
*/
|
|
92
|
+
withNesting<T>(fn: () => T): T;
|
|
93
|
+
/**
|
|
94
|
+
* Increment the nesting counter, throwing {@link CfgNestingDepthError} past the
|
|
95
|
+
* cap. Prefer {@link withNesting}, which pairs the exit in a `finally`; this is
|
|
96
|
+
* exposed for direct depth-accounting tests only.
|
|
97
|
+
*/
|
|
98
|
+
enterNesting(): void;
|
|
99
|
+
/** Decrement the nesting counter — the partner of {@link enterNesting}. */
|
|
100
|
+
exitNesting(): void;
|
|
51
101
|
/** Produce the serializable CFG. Caller is responsible for having wired the
|
|
52
102
|
* function's dangling exits to {@link exitIndex} before calling.
|
|
53
103
|
*
|
|
@@ -1,3 +1,31 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Hard ceiling on CFG recursive-descent scope-entry depth (#2195). A language
|
|
3
|
+
* `CfgVisitor` wraps each nested block scope in {@link CfgBuilder.withNesting} (its
|
|
4
|
+
* `visitBody` / `visitSeq` choke points), so the live count tracks scope entries,
|
|
5
|
+
* not statement width. NOTE the count is ~2× LEXICAL nesting for block-bodied
|
|
6
|
+
* constructs (visitBody → visitSeq both enter), so the effective lexical ceiling
|
|
7
|
+
* is ~250 levels for block bodies (~500 for single-statement bodies / bare
|
|
8
|
+
* blocks). Real source nests ≤ ~50 deep, so this fires only on machine-generated
|
|
9
|
+
* / adversarial input. Both effective ceilings sit far below the engine's native
|
|
10
|
+
* stack limit (~1.2k+ nesting even on the raised worker `stackSizeMb`), so the
|
|
11
|
+
* bail is a DETERMINISTIC, language-independent {@link CfgNestingDepthError}
|
|
12
|
+
* rather than a nondeterministic `RangeError` thrown somewhere mid-walk.
|
|
13
|
+
*/
|
|
14
|
+
export const MAX_CFG_NESTING_DEPTH = 500;
|
|
15
|
+
/**
|
|
16
|
+
* Thrown by the visitor nesting-depth guard ({@link CfgBuilder.enterNesting})
|
|
17
|
+
* when lexical nesting exceeds {@link MAX_CFG_NESTING_DEPTH}. `collectFunctionCfgs`
|
|
18
|
+
* catches it and counts the function under `skipped.tooDeeplyNested`, isolating
|
|
19
|
+
* the bail to one function instead of risking a worker-wide stack overflow.
|
|
20
|
+
*/
|
|
21
|
+
export class CfgNestingDepthError extends Error {
|
|
22
|
+
limit;
|
|
23
|
+
constructor(limit) {
|
|
24
|
+
super(`CFG nesting depth exceeded ${limit}`);
|
|
25
|
+
this.limit = limit;
|
|
26
|
+
this.name = 'CfgNestingDepthError';
|
|
27
|
+
}
|
|
28
|
+
}
|
|
1
29
|
export class CfgBuilder {
|
|
2
30
|
filePath;
|
|
3
31
|
functionStartLine;
|
|
@@ -6,6 +34,8 @@ export class CfgBuilder {
|
|
|
6
34
|
blocks = [];
|
|
7
35
|
edges = [];
|
|
8
36
|
edgeKeys = new Set();
|
|
37
|
+
/** Live recursive-descent nesting depth — see {@link enterNesting}. */
|
|
38
|
+
nesting = 0;
|
|
9
39
|
entryIndex;
|
|
10
40
|
exitIndex;
|
|
11
41
|
constructor(filePath, functionStartLine, functionEndLine,
|
|
@@ -72,6 +102,43 @@ export class CfgBuilder {
|
|
|
72
102
|
get blockCount() {
|
|
73
103
|
return this.blocks.length;
|
|
74
104
|
}
|
|
105
|
+
/**
|
|
106
|
+
* Run `fn` inside ONE nested block scope (#2195) — the single choke every
|
|
107
|
+
* visitor's `visitBody` / `visitSeq` funnels through. Enters on the way in and
|
|
108
|
+
* exits in a `finally`, so the live depth is balanced on every return AND every
|
|
109
|
+
* throw and the enter/exit can never drift out of pair (the reason this is one
|
|
110
|
+
* helper, not 24 hand-paired call sites). Throws {@link CfgNestingDepthError}
|
|
111
|
+
* when nesting exceeds {@link MAX_CFG_NESTING_DEPTH} — a proactive, deterministic
|
|
112
|
+
* bail before the native stack can overflow on a pathologically nested function.
|
|
113
|
+
*
|
|
114
|
+
* A block-bodied construct passes through BOTH visitBody and visitSeq, so it
|
|
115
|
+
* costs TWO scopes per lexical level: the effective structural ceiling is
|
|
116
|
+
* ~MAX_CFG_NESTING_DEPTH/2 (~250) lexical levels for block bodies (~500 for
|
|
117
|
+
* single-statement bodies / bare blocks, which hit only one of the two). Still
|
|
118
|
+
* an order of magnitude below the native limit and far above real code (≤ ~50).
|
|
119
|
+
*/
|
|
120
|
+
withNesting(fn) {
|
|
121
|
+
this.enterNesting();
|
|
122
|
+
try {
|
|
123
|
+
return fn();
|
|
124
|
+
}
|
|
125
|
+
finally {
|
|
126
|
+
this.exitNesting();
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
/**
|
|
130
|
+
* Increment the nesting counter, throwing {@link CfgNestingDepthError} past the
|
|
131
|
+
* cap. Prefer {@link withNesting}, which pairs the exit in a `finally`; this is
|
|
132
|
+
* exposed for direct depth-accounting tests only.
|
|
133
|
+
*/
|
|
134
|
+
enterNesting() {
|
|
135
|
+
if (++this.nesting > MAX_CFG_NESTING_DEPTH)
|
|
136
|
+
throw new CfgNestingDepthError(MAX_CFG_NESTING_DEPTH);
|
|
137
|
+
}
|
|
138
|
+
/** Decrement the nesting counter — the partner of {@link enterNesting}. */
|
|
139
|
+
exitNesting() {
|
|
140
|
+
this.nesting--;
|
|
141
|
+
}
|
|
75
142
|
/** Produce the serializable CFG. Caller is responsible for having wired the
|
|
76
143
|
* function's dangling exits to {@link exitIndex} before calling.
|
|
77
144
|
*
|
|
@@ -22,9 +22,30 @@ import type { CfgVisitor, FunctionCfg } from './types.js';
|
|
|
22
22
|
* both expensive and low-value. Overridable via `PipelineOptions.pdgMaxFunctionLines`.
|
|
23
23
|
*/
|
|
24
24
|
export declare const DEFAULT_PDG_MAX_FUNCTION_LINES = 2000;
|
|
25
|
+
/**
|
|
26
|
+
* CFG-bearing functions skipped during the walk, bucketed by reason (#2195).
|
|
27
|
+
* Surfaced per-language in the parse telemetry (parsing-processor.ts) so a CFG
|
|
28
|
+
* coverage gap is observable, not silent. All-zero ⇒ nothing skipped.
|
|
29
|
+
*/
|
|
30
|
+
export interface CfgSkipCounts {
|
|
31
|
+
/** Source span exceeded `maxFunctionLines` (minified / generated code). */
|
|
32
|
+
readonly tooManyLines: number;
|
|
33
|
+
/**
|
|
34
|
+
* Recursive-descent nesting hit {@link MAX_CFG_NESTING_DEPTH} — a proactive,
|
|
35
|
+
* deterministic bail (see {@link CfgNestingDepthError}) before a worker stack
|
|
36
|
+
* overflow.
|
|
37
|
+
*/
|
|
38
|
+
readonly tooDeeplyNested: number;
|
|
39
|
+
/**
|
|
40
|
+
* `buildFunctionCfg` threw an unexpected error. Caught PER FUNCTION so one
|
|
41
|
+
* malformed function no longer drops the whole file's CFGs (the throw used to
|
|
42
|
+
* escape to the worker's language-group catch).
|
|
43
|
+
*/
|
|
44
|
+
readonly buildError: number;
|
|
45
|
+
}
|
|
25
46
|
export interface CollectedCfgs {
|
|
26
47
|
readonly cfgs: readonly FunctionCfg[];
|
|
27
|
-
/**
|
|
28
|
-
readonly skipped:
|
|
48
|
+
/** Per-reason skip counts (#2195). */
|
|
49
|
+
readonly skipped: CfgSkipCounts;
|
|
29
50
|
}
|
|
30
|
-
export declare function collectFunctionCfgs(root: SyntaxNode, visitor: CfgVisitor<SyntaxNode>, filePath: string, maxFunctionLines?: number): CollectedCfgs;
|
|
51
|
+
export declare function collectFunctionCfgs(root: SyntaxNode, visitor: CfgVisitor<SyntaxNode>, filePath: string, maxFunctionLines?: number, lineOffset?: number): CollectedCfgs;
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { CfgNestingDepthError } from './cfg-builder.js';
|
|
1
2
|
/**
|
|
2
3
|
* Default per-function source-line cap used by the worker when the `--pdg` run
|
|
3
4
|
* does not specify `pdgMaxFunctionLines`. A function longer than this (almost
|
|
@@ -5,21 +6,63 @@
|
|
|
5
6
|
* both expensive and low-value. Overridable via `PipelineOptions.pdgMaxFunctionLines`.
|
|
6
7
|
*/
|
|
7
8
|
export const DEFAULT_PDG_MAX_FUNCTION_LINES = 2000;
|
|
8
|
-
|
|
9
|
+
/**
|
|
10
|
+
* Convert a CFG built from an EXTRACTED sub-document's AST (script-relative
|
|
11
|
+
* tree-sitter rows) into the enclosing file's coordinates by adding `offset` to
|
|
12
|
+
* every source-line field. Needed for embedded scripts — a Vue SFC `<script>`
|
|
13
|
+
* block parses at row 0 but lives at `lineOffset` in the `.vue` file, and every
|
|
14
|
+
* other worker-emitted graph node is already file-relative; without this, the
|
|
15
|
+
* CFG's `functionStartLine` would never join its Function/Method graph node
|
|
16
|
+
* (inter-procedural taint silently resolves nothing) and BasicBlock source
|
|
17
|
+
* lines would point at the wrong `.vue` line. A 0 offset returns the input
|
|
18
|
+
* unchanged (the common case: `.ts`/`.js`/etc. parse at the file root), keeping
|
|
19
|
+
* non-embedded languages byte-identical. Synthetic bindings keep `declLine` 0.
|
|
20
|
+
*/
|
|
21
|
+
function shiftCfgLines(cfg, offset) {
|
|
22
|
+
if (offset === 0)
|
|
23
|
+
return cfg;
|
|
24
|
+
return {
|
|
25
|
+
...cfg,
|
|
26
|
+
functionStartLine: cfg.functionStartLine + offset,
|
|
27
|
+
functionEndLine: cfg.functionEndLine + offset,
|
|
28
|
+
blocks: cfg.blocks.map((b) => ({
|
|
29
|
+
...b,
|
|
30
|
+
startLine: b.startLine + offset,
|
|
31
|
+
endLine: b.endLine + offset,
|
|
32
|
+
statements: b.statements?.map((s) => ({ ...s, line: s.line + offset })),
|
|
33
|
+
})),
|
|
34
|
+
bindings: cfg.bindings?.map((bd) => bd.declLine > 0 ? { ...bd, declLine: bd.declLine + offset } : bd),
|
|
35
|
+
};
|
|
36
|
+
}
|
|
37
|
+
export function collectFunctionCfgs(root, visitor, filePath, maxFunctionLines = 0, lineOffset = 0) {
|
|
9
38
|
const cfgs = [];
|
|
10
|
-
let
|
|
39
|
+
let tooManyLines = 0;
|
|
40
|
+
let tooDeeplyNested = 0;
|
|
41
|
+
let buildError = 0;
|
|
11
42
|
const stack = [root];
|
|
12
43
|
while (stack.length) {
|
|
13
44
|
const node = stack.pop();
|
|
14
45
|
if (visitor.isFunction(node)) {
|
|
15
46
|
const lines = node.endPosition.row - node.startPosition.row + 1;
|
|
16
47
|
if (maxFunctionLines > 0 && lines > maxFunctionLines) {
|
|
17
|
-
|
|
48
|
+
tooManyLines++;
|
|
18
49
|
}
|
|
19
50
|
else {
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
51
|
+
// Isolate the per-function build: a proactive deep-nesting bail
|
|
52
|
+
// (CfgNestingDepthError) or any other visitor throw is counted and
|
|
53
|
+
// skipped HERE, so it can't escape to the worker's language-group catch
|
|
54
|
+
// and silently drop every remaining function's CFG (#2195).
|
|
55
|
+
try {
|
|
56
|
+
const cfg = visitor.buildFunctionCfg(node, filePath);
|
|
57
|
+
if (cfg)
|
|
58
|
+
cfgs.push(shiftCfgLines(cfg, lineOffset));
|
|
59
|
+
}
|
|
60
|
+
catch (err) {
|
|
61
|
+
if (err instanceof CfgNestingDepthError)
|
|
62
|
+
tooDeeplyNested++;
|
|
63
|
+
else
|
|
64
|
+
buildError++;
|
|
65
|
+
}
|
|
23
66
|
}
|
|
24
67
|
}
|
|
25
68
|
// Descend regardless (a skipped mega-function may still contain small
|
|
@@ -30,5 +73,5 @@ export function collectFunctionCfgs(root, visitor, filePath, maxFunctionLines =
|
|
|
30
73
|
stack.push(child);
|
|
31
74
|
}
|
|
32
75
|
}
|
|
33
|
-
return { cfgs, skipped };
|
|
76
|
+
return { cfgs, skipped: { tooManyLines, tooDeeplyNested, buildError } };
|
|
34
77
|
}
|
|
@@ -1,15 +1,26 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Control dependence (#2085 M5 U3) — Ferrante, Ottenstein & Warren §3.1.1
|
|
3
|
-
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
2
|
+
* Control dependence (#2085 M5 U3) — Ferrante, Ottenstein & Warren §3.1.1
|
|
3
|
+
* semantics. A block `dependent` is control-dependent on a branch block
|
|
4
|
+
* `controller` when `controller` decides whether `dependent` executes: formally,
|
|
5
|
+
* there is a CFG edge `controller → B` such that `dependent` post-dominates `B`
|
|
6
|
+
* but does NOT strictly post-dominate `controller`.
|
|
7
7
|
*
|
|
8
|
-
* Construction
|
|
9
|
-
*
|
|
10
|
-
*
|
|
11
|
-
*
|
|
12
|
-
*
|
|
8
|
+
* Construction — the reverse-CFG dominance-frontier formulation (Cytron,
|
|
9
|
+
* Ferrante, Rosen, Wegman & Zadeck 1991): control dependence IS the dominance
|
|
10
|
+
* frontier of the reverse CFG, so `A ∈ PDF(X)` (the post-dominance frontier)
|
|
11
|
+
* ⟺ `X` is control-dependent on `A`. The PDF is computed bottom-up over the
|
|
12
|
+
* post-dom tree (`PDF_local` from a node's CFG predecessors + `PDF_up` from its
|
|
13
|
+
* post-dom-tree children) in O(N + E + output) — each up-step is charged to a
|
|
14
|
+
* distinct emitted edge, NOT re-walked per CFG edge as the original §3.1.1
|
|
15
|
+
* up-walk did (which was Θ(N²) on a deep post-dom chain). The two formulations
|
|
16
|
+
* enumerate the IDENTICAL full `(controller, dependent, label)` set (verified
|
|
17
|
+
* byte-identical on 3203 CFGs + ~1M-case differential fuzz); LLVM, Joern and WALA
|
|
18
|
+
* use the reverse-DF form. (Only the rare TRUNCATED prefix — when a function
|
|
19
|
+
* exceeds `maxEdges` — differs from the old prefix: it is now a sorted
|
|
20
|
+
* deterministic prefix rather than CFG-edge-iteration order. Both are valid,
|
|
21
|
+
* deterministic subsets; the full untruncated output is unchanged.)
|
|
22
|
+
* The branch SENSE ('T' | 'F') of the controlling edge becomes the edge label
|
|
23
|
+
* (KTD4 / KTD3 — it rides the persisted relation's `reason` column).
|
|
13
24
|
*
|
|
14
25
|
* PURE AND DETERMINISTIC (mirrors post-dominators.ts / reaching-defs.ts): no
|
|
15
26
|
* graph, no logger, importable outside the worker; output is deduped per
|
|
@@ -1,15 +1,26 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Control dependence (#2085 M5 U3) — Ferrante, Ottenstein & Warren §3.1.1
|
|
3
|
-
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
2
|
+
* Control dependence (#2085 M5 U3) — Ferrante, Ottenstein & Warren §3.1.1
|
|
3
|
+
* semantics. A block `dependent` is control-dependent on a branch block
|
|
4
|
+
* `controller` when `controller` decides whether `dependent` executes: formally,
|
|
5
|
+
* there is a CFG edge `controller → B` such that `dependent` post-dominates `B`
|
|
6
|
+
* but does NOT strictly post-dominate `controller`.
|
|
7
7
|
*
|
|
8
|
-
* Construction
|
|
9
|
-
*
|
|
10
|
-
*
|
|
11
|
-
*
|
|
12
|
-
*
|
|
8
|
+
* Construction — the reverse-CFG dominance-frontier formulation (Cytron,
|
|
9
|
+
* Ferrante, Rosen, Wegman & Zadeck 1991): control dependence IS the dominance
|
|
10
|
+
* frontier of the reverse CFG, so `A ∈ PDF(X)` (the post-dominance frontier)
|
|
11
|
+
* ⟺ `X` is control-dependent on `A`. The PDF is computed bottom-up over the
|
|
12
|
+
* post-dom tree (`PDF_local` from a node's CFG predecessors + `PDF_up` from its
|
|
13
|
+
* post-dom-tree children) in O(N + E + output) — each up-step is charged to a
|
|
14
|
+
* distinct emitted edge, NOT re-walked per CFG edge as the original §3.1.1
|
|
15
|
+
* up-walk did (which was Θ(N²) on a deep post-dom chain). The two formulations
|
|
16
|
+
* enumerate the IDENTICAL full `(controller, dependent, label)` set (verified
|
|
17
|
+
* byte-identical on 3203 CFGs + ~1M-case differential fuzz); LLVM, Joern and WALA
|
|
18
|
+
* use the reverse-DF form. (Only the rare TRUNCATED prefix — when a function
|
|
19
|
+
* exceeds `maxEdges` — differs from the old prefix: it is now a sorted
|
|
20
|
+
* deterministic prefix rather than CFG-edge-iteration order. Both are valid,
|
|
21
|
+
* deterministic subsets; the full untruncated output is unchanged.)
|
|
22
|
+
* The branch SENSE ('T' | 'F') of the controlling edge becomes the edge label
|
|
23
|
+
* (KTD4 / KTD3 — it rides the persisted relation's `reason` column).
|
|
13
24
|
*
|
|
14
25
|
* PURE AND DETERMINISTIC (mirrors post-dominators.ts / reaching-defs.ts): no
|
|
15
26
|
* graph, no logger, importable outside the worker; output is deduped per
|
|
@@ -18,7 +29,7 @@
|
|
|
18
29
|
* control-dependent on ITSELF (`controller === dependent`) — the loop predicate
|
|
19
30
|
* gates its own re-execution; this is standard PDG behavior, not a bug.
|
|
20
31
|
*/
|
|
21
|
-
import { computePostDominators,
|
|
32
|
+
import { computePostDominators, NO_IPDOM } from './post-dominators.js';
|
|
22
33
|
function buildArmSenses(cfg) {
|
|
23
34
|
const n = cfg.blocks.length;
|
|
24
35
|
const senses = Array.from({ length: n }, () => ({
|
|
@@ -65,56 +76,107 @@ function labelFor(kind, controller) {
|
|
|
65
76
|
* module doc for the purity/determinism contract.
|
|
66
77
|
*/
|
|
67
78
|
export function computeControlDependence(cfg, postDom,
|
|
68
|
-
//
|
|
69
|
-
//
|
|
70
|
-
// so
|
|
71
|
-
//
|
|
79
|
+
// Output-size ceiling, mirroring computeReachingDefs' `maxFacts` (#2188 review).
|
|
80
|
+
// The reverse-DF set is the bounded (controller, dependent, label) dependence
|
|
81
|
+
// relation, so peak working set ≈ output here (no pre-dedup spike like the old
|
|
82
|
+
// up-walk) — this caps the final edge COUNT, not transient memory. `0` ⇒
|
|
83
|
+
// unbounded. On overflow `edges` is a deterministic SORTED prefix and
|
|
84
|
+
// `truncated` is set — never a silent drop. (The sorted prefix is the prefix
|
|
85
|
+
// CONTENTS may differ from the old up-walk's CFG-edge-iteration prefix at the
|
|
86
|
+
// cap boundary; the FULL untruncated set is byte-identical — see the module doc.)
|
|
72
87
|
maxEdges = 0) {
|
|
73
88
|
const tree = postDom ?? computePostDominators(cfg);
|
|
74
89
|
const { ipdom } = tree;
|
|
75
90
|
const n = cfg.blocks.length;
|
|
76
91
|
const armSenses = buildArmSenses(cfg);
|
|
77
92
|
const cap = maxEdges > 0 ? maxEdges : Infinity;
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
93
|
+
// Reverse-CFG post-dominance frontier (Cytron, Ferrante, Rosen, Wegman,
|
|
94
|
+
// Zadeck 1991): control dependence IS the dominance frontier of the reverse
|
|
95
|
+
// CFG. `A ∈ PDF(X)` ⟺ X is control-dependent on A, so emit (controller=A,
|
|
96
|
+
// dependent=X). Computing the PDF bottom-up over the post-dom tree charges
|
|
97
|
+
// each up-step to a DISTINCT emitted entry — O(N+E+output) — instead of the
|
|
98
|
+
// old Ferrante up-walk that re-climbs the ipdom chain per CFG edge (Θ(N²) on
|
|
99
|
+
// a deep post-dom chain). Output is the identical (controller, dependent,
|
|
100
|
+
// label) set (verified byte-identical on 3203 CFGs across all languages +
|
|
101
|
+
// fuzz) and 1-2 orders of magnitude faster. LLVM (ReverseIDFCalculator),
|
|
102
|
+
// Joern (CdgPass) and WALA use the same formulation.
|
|
103
|
+
const children = Array.from({ length: n }, () => []);
|
|
104
|
+
const inEdges = Array.from({ length: n }, () => []);
|
|
105
|
+
for (let b = 0; b < n; b++) {
|
|
106
|
+
const ip = ipdom[b];
|
|
107
|
+
if (ip !== NO_IPDOM && ip >= 0 && ip < n)
|
|
108
|
+
children[ip].push(b);
|
|
109
|
+
}
|
|
110
|
+
for (const e of cfg.edges) {
|
|
111
|
+
if (e.from < 0 || e.from >= n || e.to < 0 || e.to >= n)
|
|
90
112
|
continue;
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
113
|
+
inEdges[e.to].push({ from: e.from, kind: e.kind });
|
|
114
|
+
}
|
|
115
|
+
// Post-dom-tree post-order (children before parents). Iterative — the post-dom
|
|
116
|
+
// forest can itself be chain-deep. Roots are the NO_IPDOM nodes (EXIT, plus
|
|
117
|
+
// any exit-unreachable region per #2188 F2). The reverse of a root-first DFS
|
|
118
|
+
// visits every parent AFTER all its descendants.
|
|
119
|
+
const dfs = [];
|
|
120
|
+
for (let r = 0; r < n; r++)
|
|
121
|
+
if (ipdom[r] === NO_IPDOM)
|
|
122
|
+
dfs.push(r);
|
|
123
|
+
const preorder = [];
|
|
124
|
+
while (dfs.length) {
|
|
125
|
+
const x = dfs.pop();
|
|
126
|
+
preorder.push(x);
|
|
127
|
+
for (const c of children[x])
|
|
128
|
+
dfs.push(c);
|
|
129
|
+
}
|
|
130
|
+
const order = preorder.reverse();
|
|
131
|
+
// PDF[X]: controller A → the label SET with which A controls X. A set (not one
|
|
132
|
+
// label) because a controller can reach X via opposite-sense arms (goto-
|
|
133
|
+
// cycles) — the old (a, cur, label) dedup kept both rows.
|
|
134
|
+
const pdf = Array.from({ length: n }, () => new Map());
|
|
135
|
+
const add = (x, a, label) => {
|
|
136
|
+
const set = pdf[x].get(a);
|
|
137
|
+
if (set)
|
|
138
|
+
set.add(label);
|
|
139
|
+
else
|
|
140
|
+
pdf[x].set(a, new Set([label]));
|
|
141
|
+
};
|
|
142
|
+
for (const x of order) {
|
|
143
|
+
// PDF_local: a CFG-predecessor A of X that X does not (immediately) post-
|
|
144
|
+
// dominate. `A !== X && ipdom[A] !== X` is exactly the production
|
|
145
|
+
// `!postDominates(X, A)` for one edge A→X (postDominates(X,A) ⟺ ipdom[A]===X),
|
|
146
|
+
// and it excludes self-edges + NO_IPDOM regions. Sense is read from the
|
|
147
|
+
// CONTROLLER's arms (seq/loop-back fall-through false arms would otherwise
|
|
148
|
+
// mislabel as 'T' — #2188 F1).
|
|
149
|
+
for (const { from: a, kind } of inEdges[x]) {
|
|
150
|
+
if (a !== x && ipdom[a] !== x)
|
|
151
|
+
add(x, a, labelFor(kind, armSenses[a]));
|
|
152
|
+
}
|
|
153
|
+
// PDF_up: inherit each post-dom child's frontier controller (with its label
|
|
154
|
+
// set) when X does not post-dominate it.
|
|
155
|
+
for (const z of children[x]) {
|
|
156
|
+
for (const [a, labels] of pdf[z]) {
|
|
157
|
+
if (ipdom[a] !== x)
|
|
158
|
+
for (const l of labels)
|
|
159
|
+
add(x, a, l);
|
|
111
160
|
}
|
|
112
|
-
|
|
113
|
-
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
const out = [];
|
|
164
|
+
for (const x of order) {
|
|
165
|
+
for (const [a, labels] of pdf[x]) {
|
|
166
|
+
for (const label of labels)
|
|
167
|
+
out.push({ controllerBlock: a, dependentBlock: x, label });
|
|
114
168
|
}
|
|
115
169
|
}
|
|
116
170
|
out.sort((x, y) => x.controllerBlock - y.controllerBlock ||
|
|
117
171
|
x.dependentBlock - y.dependentBlock ||
|
|
118
172
|
(x.label < y.label ? -1 : x.label > y.label ? 1 : 0));
|
|
173
|
+
// `maxEdges` is a heap-safety backstop applied to the SORTED set (the DF makes
|
|
174
|
+
// overflow far rarer than the old per-edge walk). Deterministic prefix, never
|
|
175
|
+
// a silent drop; mirrors computeReachingDefs' `truncated`.
|
|
176
|
+
let truncated = false;
|
|
177
|
+
if (cap !== Infinity && out.length > cap) {
|
|
178
|
+
truncated = true;
|
|
179
|
+
out.length = cap;
|
|
180
|
+
}
|
|
119
181
|
return { edges: out, truncated };
|
|
120
182
|
}
|
|
@@ -62,6 +62,15 @@ export declare class ControlFlowContext {
|
|
|
62
62
|
resolveBreak(label?: string): JumpResolution | undefined;
|
|
63
63
|
/** Resolve a `continue`: like {@link resolveBreak} but only loop frames match. */
|
|
64
64
|
resolveContinue(label?: string): JumpResolution | undefined;
|
|
65
|
+
/**
|
|
66
|
+
* Resolve a Java `yield e` (switch-EXPRESSION arm exit): the nearest enclosing
|
|
67
|
+
* SWITCH frame's exit, threading the finalizers stacked above it. Unlike a
|
|
68
|
+
* `break`, a `yield` ALWAYS targets the switch — never an intervening loop — so
|
|
69
|
+
* it cannot match a loop frame (a `yield` inside a loop inside a switch arm
|
|
70
|
+
* still exits the whole switch). Returns `undefined` when there is no enclosing
|
|
71
|
+
* switch (malformed input); the caller falls back to its conservative routing.
|
|
72
|
+
*/
|
|
73
|
+
resolveYield(): JumpResolution | undefined;
|
|
65
74
|
/** Every active finalizer, innermost first — what a `return` must cross. */
|
|
66
75
|
finalizersForReturn(): readonly FinalizerFrame[];
|
|
67
76
|
/**
|
|
@@ -39,6 +39,17 @@ export class ControlFlowContext {
|
|
|
39
39
|
resolveContinue(label) {
|
|
40
40
|
return this.resolve((f) => f.kind === 'loop' && (label === undefined || f.labels.includes(label)), (f) => f.continueTo);
|
|
41
41
|
}
|
|
42
|
+
/**
|
|
43
|
+
* Resolve a Java `yield e` (switch-EXPRESSION arm exit): the nearest enclosing
|
|
44
|
+
* SWITCH frame's exit, threading the finalizers stacked above it. Unlike a
|
|
45
|
+
* `break`, a `yield` ALWAYS targets the switch — never an intervening loop — so
|
|
46
|
+
* it cannot match a loop frame (a `yield` inside a loop inside a switch arm
|
|
47
|
+
* still exits the whole switch). Returns `undefined` when there is no enclosing
|
|
48
|
+
* switch (malformed input); the caller falls back to its conservative routing.
|
|
49
|
+
*/
|
|
50
|
+
resolveYield() {
|
|
51
|
+
return this.resolve((f) => f.kind === 'switch');
|
|
52
|
+
}
|
|
42
53
|
/** Every active finalizer, innermost first — what a `return` must cross. */
|
|
43
54
|
finalizersForReturn() {
|
|
44
55
|
const fins = [];
|
|
@@ -76,6 +76,25 @@ export declare const POST_DOMINATE_DEBUG_ENV = "GITNEXUS_PDG_EMIT_POST_DOMINATE"
|
|
|
76
76
|
export declare const REACHING_DEF_FACTS_PER_EDGE_CAP = 4;
|
|
77
77
|
/** Derived emit-path fact limit at the default edge cap (bench/doc anchor). */
|
|
78
78
|
export declare const DEFAULT_PDG_MAX_REACHING_DEF_FACTS_PER_FUNCTION: number;
|
|
79
|
+
/**
|
|
80
|
+
* Fixpoint-iteration budget for {@link computeReachingDefs}, as a multiple of
|
|
81
|
+
* the function's block count ({@link emitFileReachingDefs} passes
|
|
82
|
+
* `blocks.length × this` as `maxBlockVisits`). Iterative reaching-defs on a
|
|
83
|
+
* reducible CFG converges in O(loop-nesting-depth) passes, so a worklist
|
|
84
|
+
* re-visits each block a small multiple of times for real code; this budget
|
|
85
|
+
* tolerates a nesting depth far beyond any hand-written function (real code is
|
|
86
|
+
* ≤ ~15 deep) while truncating the pathological deep nest that otherwise drives
|
|
87
|
+
* the solver to O(blocks²) — measured at seconds + GB on a machine-generated
|
|
88
|
+
* 2000-line all-loops function whose fact count stays linear (so `maxFacts`
|
|
89
|
+
* never fires). Truncation degrades to a sound empty REACHING_DEF for that one
|
|
90
|
+
* function (status `truncated`), never wrong facts.
|
|
91
|
+
*
|
|
92
|
+
* This ceiling is the SOUND backstop, not a perf fix: WTO / loop-aware iteration
|
|
93
|
+
* ordering was benchmarked and rejected (0% faster — the cost is dense-set
|
|
94
|
+
* propagation, not visitation order; see the no-go note in reaching-defs.ts at
|
|
95
|
+
* the RPO-order site). SSA-sparse reaching-defs is the deferred real fix.
|
|
96
|
+
*/
|
|
97
|
+
export declare const DEFAULT_PDG_MAX_REACHING_DEF_BLOCK_REVISITS = 64;
|
|
79
98
|
export interface CfgEmitResult {
|
|
80
99
|
blocks: number;
|
|
81
100
|
edges: number;
|