al-sem 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +361 -0
- package/package.json +64 -0
- package/scripts/d40-diff.ts +44 -0
- package/scripts/fetch-native-parser.ts +179 -0
- package/scripts/precision-sample.ts +99 -0
- package/scripts/precision-study.ts +42 -0
- package/scripts/precision-tabulate.ts +52 -0
- package/src/cli/baseline.ts +31 -0
- package/src/cli/diff.ts +199 -0
- package/src/cli/events-chains.ts +56 -0
- package/src/cli/events-fanout.ts +87 -0
- package/src/cli/exit-code.ts +30 -0
- package/src/cli/fingerprint-indexes.ts +130 -0
- package/src/cli/fingerprint-query.ts +543 -0
- package/src/cli/fingerprint-witness.ts +493 -0
- package/src/cli/fingerprint.ts +292 -0
- package/src/cli/format-compact-json.ts +45 -0
- package/src/cli/format-events.ts +77 -0
- package/src/cli/format-fingerprint.ts +295 -0
- package/src/cli/format-html.ts +503 -0
- package/src/cli/format-json.ts +13 -0
- package/src/cli/format-policy.ts +95 -0
- package/src/cli/format-sarif.ts +186 -0
- package/src/cli/format-terminal.ts +153 -0
- package/src/cli/index.ts +566 -0
- package/src/cli/policy.ts +204 -0
- package/src/config/roots-config.ts +302 -0
- package/src/deps/cache-versions.ts +74 -0
- package/src/deps/canonical-json.ts +27 -0
- package/src/deps/dependency-artifact.ts +144 -0
- package/src/deps/dependency-cache.ts +262 -0
- package/src/deps/dependency-dag.ts +128 -0
- package/src/deps/dependency-package-discovery.ts +85 -0
- package/src/deps/dependency-pipeline.ts +483 -0
- package/src/deps/dependency-projection.ts +211 -0
- package/src/deps/dependency-resolver.ts +154 -0
- package/src/deps/workspace-dependencies.ts +114 -0
- package/src/detectors/capability-query.ts +145 -0
- package/src/detectors/confidence.ts +52 -0
- package/src/detectors/d1-db-op-in-loop.ts +457 -0
- package/src/detectors/d10-self-modifying-loop.ts +114 -0
- package/src/detectors/d11-modify-without-get.ts +129 -0
- package/src/detectors/d12-dead-integration-event.ts +81 -0
- package/src/detectors/d13-cross-app-internal-call.ts +105 -0
- package/src/detectors/d14-dead-routine.ts +151 -0
- package/src/detectors/d16-obsolete-routine-call.ts +94 -0
- package/src/detectors/d17-min-version-drift.ts +157 -0
- package/src/detectors/d18-constant-filter-in-loop.ts +151 -0
- package/src/detectors/d19-unused-parameter.ts +116 -0
- package/src/detectors/d2-event-fanout-in-loop.ts +240 -0
- package/src/detectors/d20-unreachable-after-exit.ts +92 -0
- package/src/detectors/d21-read-without-load.ts +128 -0
- package/src/detectors/d22-flowfield-without-calcfields.ts +168 -0
- package/src/detectors/d29-subscriber-modify-on-event-record.ts +163 -0
- package/src/detectors/d3-load-state.ts +72 -0
- package/src/detectors/d3-missing-setloadfields.ts +234 -0
- package/src/detectors/d32-constant-boolean-parameter.ts +185 -0
- package/src/detectors/d33-unfiltered-bulk-write.ts +173 -0
- package/src/detectors/d34-commit-in-loop.ts +206 -0
- package/src/detectors/d35-commit-in-event-subscriber.ts +138 -0
- package/src/detectors/d36-late-setloadfields.ts +162 -0
- package/src/detectors/d37-validate-without-persist.ts +271 -0
- package/src/detectors/d38-subscriber-to-obsolete-event.ts +140 -0
- package/src/detectors/d39-record-left-dirty-across-chain.ts +165 -0
- package/src/detectors/d4-repeated-lookup-in-loop.ts +128 -0
- package/src/detectors/d40-transitive-load-missing.ts +217 -0
- package/src/detectors/d41-transitive-filter-loss.ts +200 -0
- package/src/detectors/d42-cross-call-wrong-setloadfields.ts +243 -0
- package/src/detectors/d43-event-ishandled-skip.ts +257 -0
- package/src/detectors/d44-event-multi-subscriber-overlap.ts +223 -0
- package/src/detectors/d45-event-transitive-table-exposure.ts +159 -0
- package/src/detectors/d5-set-based-opportunity.ts +162 -0
- package/src/detectors/d7-recursive-event-expansion.ts +151 -0
- package/src/detectors/d8-commit-in-transaction.ts +132 -0
- package/src/detectors/d9-transaction-span-summary.ts +107 -0
- package/src/detectors/detector-context.ts +121 -0
- package/src/detectors/finding-grouping.ts +61 -0
- package/src/detectors/path-merge.ts +174 -0
- package/src/detectors/registry.ts +176 -0
- package/src/detectors/table-display.ts +42 -0
- package/src/diff/diff-abi.ts +195 -0
- package/src/diff/diff-capabilities.ts +179 -0
- package/src/diff/diff-engine.ts +146 -0
- package/src/diff/diff-events.ts +323 -0
- package/src/diff/diff-identity.ts +73 -0
- package/src/diff/diff-indexes.ts +199 -0
- package/src/diff/diff-permissions.ts +260 -0
- package/src/diff/diff-policy.ts +101 -0
- package/src/diff/diff-preflight.ts +66 -0
- package/src/diff/diff-renames.ts +104 -0
- package/src/diff/diff-schema.ts +232 -0
- package/src/diff/format-diff.ts +148 -0
- package/src/engine/attribute-parser.ts +50 -0
- package/src/engine/capability-cone.ts +531 -0
- package/src/engine/combined-graph.ts +357 -0
- package/src/engine/control-flow-walker.ts +1317 -0
- package/src/engine/dispatch-sites.ts +199 -0
- package/src/engine/effect-lattice.ts +81 -0
- package/src/engine/entry-points.ts +57 -0
- package/src/engine/event-flow.ts +524 -0
- package/src/engine/event-relay.ts +92 -0
- package/src/engine/op-classification.ts +92 -0
- package/src/engine/path-walker.ts +189 -0
- package/src/engine/reverse-call-graph.ts +23 -0
- package/src/engine/root-classifier-overlay.ts +194 -0
- package/src/engine/root-classifier.ts +135 -0
- package/src/engine/scc.ts +110 -0
- package/src/engine/source-anchor.ts +25 -0
- package/src/engine/summary-context.ts +104 -0
- package/src/engine/summary-engine.ts +296 -0
- package/src/engine/summary-runner.ts +560 -0
- package/src/engine/transaction-spans.ts +112 -0
- package/src/engine/uncertainty-util.ts +54 -0
- package/src/hash.ts +31 -0
- package/src/index/attribute-from-node.ts +141 -0
- package/src/index/callee-from-node.ts +181 -0
- package/src/index/capability/background.ts +90 -0
- package/src/index/capability/commit.ts +44 -0
- package/src/index/capability/dispatch.ts +164 -0
- package/src/index/capability/events.ts +65 -0
- package/src/index/capability/extractor.ts +124 -0
- package/src/index/capability/file-blob.ts +137 -0
- package/src/index/capability/http.ts +159 -0
- package/src/index/capability/hyperlink.ts +60 -0
- package/src/index/capability/isolated-storage.ts +179 -0
- package/src/index/capability/table.ts +113 -0
- package/src/index/capability/telemetry.ts +84 -0
- package/src/index/capability/ui.ts +55 -0
- package/src/index/capability/value-source.ts +202 -0
- package/src/index/expression-from-node.ts +117 -0
- package/src/index/indexer.ts +102 -0
- package/src/index/intraprocedural-body.ts +1467 -0
- package/src/index/intraprocedural-ops.ts +253 -0
- package/src/index/intraprocedural-refs.ts +188 -0
- package/src/index/object-indexer.ts +279 -0
- package/src/index/routine-indexer.ts +282 -0
- package/src/index/routine-signature.ts +46 -0
- package/src/index/variable-indexer.ts +134 -0
- package/src/index/variable-initializer-extractor.ts +155 -0
- package/src/index/variable-type-normalizer.ts +83 -0
- package/src/index.ts +267 -0
- package/src/mcp/server.ts +72 -0
- package/src/mcp/session.ts +49 -0
- package/src/mcp/tools/explain-path.ts +75 -0
- package/src/mcp/tools/get-analysis-health.ts +62 -0
- package/src/mcp/tools/get-finding.ts +47 -0
- package/src/mcp/tools/get-routine-summary.ts +126 -0
- package/src/mcp/tools/list-findings.ts +85 -0
- package/src/mcp/tools/list-hotspots.ts +78 -0
- package/src/mcp/tools/list-rollups.ts +103 -0
- package/src/mcp/tools/validators.ts +25 -0
- package/src/model/attributes.ts +120 -0
- package/src/model/callee.ts +45 -0
- package/src/model/capability.ts +187 -0
- package/src/model/coverage.ts +85 -0
- package/src/model/entities.ts +628 -0
- package/src/model/expression.ts +98 -0
- package/src/model/finding.ts +110 -0
- package/src/model/graph-edge.ts +93 -0
- package/src/model/graph.ts +62 -0
- package/src/model/identity.ts +81 -0
- package/src/model/ids.ts +90 -0
- package/src/model/index.ts +13 -0
- package/src/model/model.ts +51 -0
- package/src/model/permission.ts +76 -0
- package/src/model/root-classification.ts +116 -0
- package/src/model/stable-identity.ts +102 -0
- package/src/model/summary.ts +96 -0
- package/src/parser/ast.ts +82 -0
- package/src/parser/native/ffi.ts +145 -0
- package/src/parser/native/parse-index-pool.ts +148 -0
- package/src/parser/native/parse-index-worker.ts +94 -0
- package/src/parser/native/wrapper.ts +353 -0
- package/src/parser/parser-init.ts +43 -0
- package/src/perf/profiler.ts +66 -0
- package/src/policy/policy-default.yaml +83 -0
- package/src/policy/policy-engine.ts +339 -0
- package/src/policy/policy-loader.ts +257 -0
- package/src/policy/policy-schema.json +379 -0
- package/src/policy/policy-types.ts +81 -0
- package/src/policy/predicate-compiler.ts +151 -0
- package/src/policy/predicate-evaluator.ts +267 -0
- package/src/policy/predicate-fields.ts +439 -0
- package/src/projection/actionable-anchor.ts +48 -0
- package/src/projection/finding-filters.ts +44 -0
- package/src/projection/finding-fingerprint.ts +54 -0
- package/src/projection/finding-groups.ts +41 -0
- package/src/projection/finding-summary.ts +110 -0
- package/src/projection/rollup-findings.ts +105 -0
- package/src/providers/discover.ts +88 -0
- package/src/providers/external.ts +46 -0
- package/src/providers/types.ts +36 -0
- package/src/providers/workspace.ts +117 -0
- package/src/resolve/call-resolver.ts +117 -0
- package/src/resolve/coverage.ts +61 -0
- package/src/resolve/event-graph.ts +166 -0
- package/src/resolve/implicit-edges.ts +53 -0
- package/src/resolve/record-types.ts +36 -0
- package/src/resolve/resolver.ts +23 -0
- package/src/resolve/semantic-graph.ts +29 -0
- package/src/resolve/symbol-table.ts +69 -0
- package/src/snapshot/app-snapshot.ts +74 -0
- package/src/snapshot/compose.ts +100 -0
- package/src/snapshot/derive/callsite-evidence.ts +76 -0
- package/src/snapshot/derive/capability-facts.ts +70 -0
- package/src/snapshot/derive/contracts.ts +131 -0
- package/src/snapshot/derive/coverage.ts +35 -0
- package/src/snapshot/derive/event-declarations.ts +140 -0
- package/src/snapshot/derive/identity-table.ts +58 -0
- package/src/snapshot/derive/inputs.ts +91 -0
- package/src/snapshot/derive/operation-evidence.ts +70 -0
- package/src/snapshot/derive/permissions.ts +186 -0
- package/src/snapshot/derive/root-classifications.ts +56 -0
- package/src/snapshot/derive/schema.ts +130 -0
- package/src/snapshot/derive/typed-edges.ts +60 -0
- package/src/snapshot/derive/workspace-fingerprint.ts +19 -0
- package/src/snapshot/deserialize.ts +40 -0
- package/src/snapshot/serialize-cbor-gz.ts +12 -0
- package/src/snapshot/serialize-cbor.ts +19 -0
- package/src/snapshot/serialize-json.ts +22 -0
- package/src/snapshot/shard.ts +134 -0
- package/src/snapshot/types.ts +181 -0
- package/src/symbols/app-manifest.ts +96 -0
- package/src/symbols/app-package-zip.ts +50 -0
- package/src/symbols/embedded-source-reader.ts +41 -0
- package/src/symbols/package-hash.ts +81 -0
- package/src/symbols/symbol-reader.ts +101 -0
- package/src/symbols/symbol-reference-parser.ts +378 -0
- package/src/symbols/symbol-reference-reader.ts +27 -0
- package/tsconfig.json +18 -0
|
@@ -0,0 +1,1467 @@
|
|
|
1
|
+
// src/index/intraprocedural-body.ts
|
|
2
|
+
// Single-DFS body walker: loops + record ops + commits + call sites + field accesses.
|
|
3
|
+
//
|
|
4
|
+
// Previously the routine-indexer drove TWO separate full-body traversals — one in
|
|
5
|
+
// `extractOpsAndLoops` (loops + record-ops + commits) and one in `extractRefs` (call
|
|
6
|
+
// sites + field accesses). Each traversal walked `namedChildren` recursively through
|
|
7
|
+
// FFI; combined, they did 2× the work. On Microsoft Base Application that was a
|
|
8
|
+
// significant chunk of the per-file cost.
|
|
9
|
+
//
|
|
10
|
+
// This module fuses both passes. The walker maintains an ambient `loopStack` (so
|
|
11
|
+
// call sites' `loopStack` is populated directly — no post-pass back-fill), and it
|
|
12
|
+
// handles `call_expression` nodes structurally: only the argument list is recursed
|
|
13
|
+
// into, so the function-callee subtree (`member_expression` or `identifier`) is
|
|
14
|
+
// never re-examined as a stray field access. That structural skip removes the
|
|
15
|
+
// previous `node.parent`/`.id`-based "is this a callee?" check that called FFI on
|
|
16
|
+
// every member-expression descendant.
|
|
17
|
+
|
|
18
|
+
import type {
|
|
19
|
+
CallArgumentBinding,
|
|
20
|
+
CallSite,
|
|
21
|
+
ConditionReference,
|
|
22
|
+
ControlFlowNode,
|
|
23
|
+
FieldAccess,
|
|
24
|
+
LoopNode,
|
|
25
|
+
LoopType,
|
|
26
|
+
OperationSite,
|
|
27
|
+
ParameterSymbol,
|
|
28
|
+
RecordOpType,
|
|
29
|
+
RecordOperation,
|
|
30
|
+
RecordVariable,
|
|
31
|
+
UnreachableStatement,
|
|
32
|
+
VarAssignment,
|
|
33
|
+
} from "../model/entities.ts";
|
|
34
|
+
import type { ExpressionInfo } from "../model/expression.ts";
|
|
35
|
+
import {
|
|
36
|
+
type LoopId,
|
|
37
|
+
type RoutineId,
|
|
38
|
+
encodeCallsiteId,
|
|
39
|
+
encodeLoopId,
|
|
40
|
+
encodeOperationId,
|
|
41
|
+
} from "../model/ids.ts";
|
|
42
|
+
import { nodeToSourceRange } from "../parser/ast.ts";
|
|
43
|
+
import type { Node as SyntaxNode } from "../parser/native/wrapper.ts";
|
|
44
|
+
import { calleeFromNode } from "./callee-from-node.ts";
|
|
45
|
+
import { expressionInfoFromNode } from "./expression-from-node.ts";
|
|
46
|
+
|
|
47
|
+
const LOOP_TYPE_MAP: Record<string, LoopType> = {
|
|
48
|
+
repeat_statement: "repeat",
|
|
49
|
+
for_statement: "for",
|
|
50
|
+
foreach_statement: "foreach",
|
|
51
|
+
while_statement: "while",
|
|
52
|
+
};
|
|
53
|
+
|
|
54
|
+
/** Canonical record-op name (lowercase) -> properly-cased RecordOpType. */
|
|
55
|
+
const RECORD_OP_MAP: Record<string, RecordOpType> = {
|
|
56
|
+
findset: "FindSet",
|
|
57
|
+
findfirst: "FindFirst",
|
|
58
|
+
findlast: "FindLast",
|
|
59
|
+
find: "Find",
|
|
60
|
+
get: "Get",
|
|
61
|
+
calcfields: "CalcFields",
|
|
62
|
+
calcsums: "CalcSums",
|
|
63
|
+
testfield: "TestField",
|
|
64
|
+
modify: "Modify",
|
|
65
|
+
modifyall: "ModifyAll",
|
|
66
|
+
insert: "Insert",
|
|
67
|
+
delete: "Delete",
|
|
68
|
+
deleteall: "DeleteAll",
|
|
69
|
+
setloadfields: "SetLoadFields",
|
|
70
|
+
addloadfields: "AddLoadFields",
|
|
71
|
+
setrange: "SetRange",
|
|
72
|
+
setfilter: "SetFilter",
|
|
73
|
+
setcurrentkey: "SetCurrentKey",
|
|
74
|
+
reset: "Reset",
|
|
75
|
+
copy: "Copy",
|
|
76
|
+
transferfields: "TransferFields",
|
|
77
|
+
validate: "Validate",
|
|
78
|
+
init: "Init",
|
|
79
|
+
next: "Next",
|
|
80
|
+
count: "Count",
|
|
81
|
+
countapprox: "CountApprox",
|
|
82
|
+
isempty: "IsEmpty",
|
|
83
|
+
locktable: "LockTable",
|
|
84
|
+
};
|
|
85
|
+
|
|
86
|
+
/**
|
|
87
|
+
* Record ops for which we capture all field arguments.
|
|
88
|
+
*
|
|
89
|
+
* Includes `Get`, `Find`, `FindFirst`, `FindLast`, `FindSet` so that D4 (repeated
|
|
90
|
+
* identical lookup in a loop) can read the first argument (the key expression) and
|
|
91
|
+
* detect duplicate calls with the same literal key.
|
|
92
|
+
*/
|
|
93
|
+
const FIELD_ARGS_OPS = new Set([
|
|
94
|
+
"SetRange",
|
|
95
|
+
"SetFilter",
|
|
96
|
+
"SetLoadFields",
|
|
97
|
+
"AddLoadFields",
|
|
98
|
+
"SetCurrentKey",
|
|
99
|
+
"Validate",
|
|
100
|
+
"Get",
|
|
101
|
+
"Find",
|
|
102
|
+
"FindFirst",
|
|
103
|
+
"FindLast",
|
|
104
|
+
"FindSet",
|
|
105
|
+
"CalcFields",
|
|
106
|
+
"CalcSums",
|
|
107
|
+
"TestField",
|
|
108
|
+
]);
|
|
109
|
+
|
|
110
|
+
export interface ExtractBodyResult {
|
|
111
|
+
loops: LoopNode[];
|
|
112
|
+
operationSites: OperationSite[];
|
|
113
|
+
recordOperations: RecordOperation[];
|
|
114
|
+
callSites: CallSite[];
|
|
115
|
+
fieldAccesses: FieldAccess[];
|
|
116
|
+
unreachableStatements: UnreachableStatement[];
|
|
117
|
+
/** True iff the visitor saw any branching node (if/case/case_branch/try). */
|
|
118
|
+
hasBranching: boolean;
|
|
119
|
+
/**
|
|
120
|
+
* Compact control-flow tree rooted at the routine's code_block. Populated
|
|
121
|
+
* alongside the flat feature lists and consumed by the path-aware walker in
|
|
122
|
+
* P6.T2. Uses the same op/callsite IDs as the flat lists.
|
|
123
|
+
*/
|
|
124
|
+
statementTree: ControlFlowNode;
|
|
125
|
+
/**
|
|
126
|
+
* Set of identifier names referenced as values in the body — lowercased,
|
|
127
|
+
* sorted, deduped. Excludes member-of-member-expression (field/method names),
|
|
128
|
+
* value-of-qualified-enum (enum members), and bare-type-name in enum-type
|
|
129
|
+
* position. Used by D19 (unused parameter) and any detector that needs a
|
|
130
|
+
* cheap "is this name referenced anywhere?" query.
|
|
131
|
+
*/
|
|
132
|
+
identifierReferences: string[];
|
|
133
|
+
/**
|
|
134
|
+
* Var-assignment stream — every `<lhs> := <rhs>` statement in the body
|
|
135
|
+
* where the LHS is an identifier or member_expression. RHS literal is
|
|
136
|
+
* captured when the RHS node is a boolean / integer / string_literal.
|
|
137
|
+
* Sorted by source position. Phase 3 D43 consumes this.
|
|
138
|
+
*/
|
|
139
|
+
varAssignments: VarAssignment[];
|
|
140
|
+
/**
|
|
141
|
+
* Identifiers referenced in a control-predicate position (if-test,
|
|
142
|
+
* while-test, repeat-until, case-of subject). Sorted by referenceAnchor.
|
|
143
|
+
* Phase 3.1 D43 dispatch-site guard detection consumes this.
|
|
144
|
+
*/
|
|
145
|
+
conditionReferences: ConditionReference[];
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
/**
|
|
149
|
+
* Build the per-argument structural bindings for a callsite. Caller-side fields
|
|
150
|
+
* only — callee-side var-ness and bindingResolution upgrades happen later when
|
|
151
|
+
* the call-resolver matches the callsite against a callee signature.
|
|
152
|
+
*/
|
|
153
|
+
function extractArgumentBindings(
|
|
154
|
+
argumentNodes: SyntaxNode[],
|
|
155
|
+
enclosingRoutineParameters: ParameterSymbol[],
|
|
156
|
+
enclosingRoutineRecordVariables: RecordVariable[],
|
|
157
|
+
sourceUnitId: string,
|
|
158
|
+
routineId: RoutineId,
|
|
159
|
+
): CallArgumentBinding[] {
|
|
160
|
+
const recVarByLcName = new Map(
|
|
161
|
+
enclosingRoutineRecordVariables.map((rv) => [rv.name.toLowerCase(), rv]),
|
|
162
|
+
);
|
|
163
|
+
const paramByLcName = new Map(enclosingRoutineParameters.map((p) => [p.name.toLowerCase(), p]));
|
|
164
|
+
return argumentNodes.map((argNode, parameterIndex): CallArgumentBinding => {
|
|
165
|
+
const text = argNode.text.trim();
|
|
166
|
+
const argumentAnchor: CallArgumentBinding["argumentAnchor"] = {
|
|
167
|
+
sourceUnitId,
|
|
168
|
+
range: nodeToSourceRange(argNode),
|
|
169
|
+
enclosingRoutineId: routineId,
|
|
170
|
+
syntaxKind: argNode.type,
|
|
171
|
+
};
|
|
172
|
+
// Only bare-identifier args can bind to a record/parameter symbol. The grammar
|
|
173
|
+
// surfaces this directly as `argNode.type === "identifier"` — anything else
|
|
174
|
+
// (literals, calls, member expressions, quoted identifiers, etc.) is a value
|
|
175
|
+
// expression that flows by-value and can't carry record-flow semantics.
|
|
176
|
+
if (argNode.type !== "identifier") {
|
|
177
|
+
return {
|
|
178
|
+
parameterIndex,
|
|
179
|
+
sourceKind: "expression",
|
|
180
|
+
calleeParameterIsVar: false,
|
|
181
|
+
argumentAnchor,
|
|
182
|
+
bindingResolution: "non-record-arg",
|
|
183
|
+
};
|
|
184
|
+
}
|
|
185
|
+
const lcName = text.toLowerCase();
|
|
186
|
+
const recVar = recVarByLcName.get(lcName);
|
|
187
|
+
const param = paramByLcName.get(lcName);
|
|
188
|
+
const sourceKind: CallArgumentBinding["sourceKind"] =
|
|
189
|
+
param !== undefined
|
|
190
|
+
? "parameter"
|
|
191
|
+
: recVar !== undefined
|
|
192
|
+
? "local"
|
|
193
|
+
: lcName === "rec" || lcName === "xrec"
|
|
194
|
+
? "implicit-rec"
|
|
195
|
+
: "unknown";
|
|
196
|
+
return {
|
|
197
|
+
parameterIndex,
|
|
198
|
+
sourceKind,
|
|
199
|
+
// Per the field doc on CallArgumentBinding, `sourceVariableName` is set for
|
|
200
|
+
// named-variable kinds only. An identifier we cannot resolve to any local /
|
|
201
|
+
// parameter / implicit-Rec gets sourceKind === "unknown" and no name.
|
|
202
|
+
sourceVariableName: sourceKind === "unknown" ? undefined : lcName,
|
|
203
|
+
sourceRecordVariableId: recVar?.id,
|
|
204
|
+
sourceParameterIndex: param?.index,
|
|
205
|
+
callerSourceParameterIsVar: param?.isVar,
|
|
206
|
+
// `undefined` here means "not yet resolved" — the Phase 2 call-resolver will
|
|
207
|
+
// upgrade it. `"unknown"` is reserved for "tried and failed to resolve",
|
|
208
|
+
// which is a Phase 2+ outcome.
|
|
209
|
+
sourceTableId: recVar?.tableId,
|
|
210
|
+
sourceTempState: recVar?.tempState,
|
|
211
|
+
argumentAnchor,
|
|
212
|
+
calleeParameterIsVar: false, // upgraded by call-resolver later
|
|
213
|
+
bindingResolution: recVar !== undefined ? "unresolved-callee" : "non-record-arg",
|
|
214
|
+
};
|
|
215
|
+
});
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
/**
|
|
219
|
+
* Extract the LHS identifier name from an assignment target node.
|
|
220
|
+
* - `identifier` → the identifier text directly.
|
|
221
|
+
* - `member_expression` → the trailing member field (`Rec.Field := ...` → "field").
|
|
222
|
+
* Returns `undefined` for any other shape.
|
|
223
|
+
*/
|
|
224
|
+
function lhsIdentifierOf(target: SyntaxNode): string | undefined {
|
|
225
|
+
if (target.type === "identifier") return target.text;
|
|
226
|
+
if (target.type === "member_expression") {
|
|
227
|
+
const member =
|
|
228
|
+
target.childForFieldName("member") ?? target.namedChild(target.namedChildCount - 1);
|
|
229
|
+
return member?.text;
|
|
230
|
+
}
|
|
231
|
+
return undefined;
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
/**
|
|
235
|
+
* Extract a lowercased literal string from an RHS value node, or `undefined`
|
|
236
|
+
* when the node is not a recognized literal.
|
|
237
|
+
*
|
|
238
|
+
* Tree-sitter-al literal types (confirmed via probe):
|
|
239
|
+
* - `boolean` → "true" / "false" (lowercased)
|
|
240
|
+
* - `integer` → decimal digits
|
|
241
|
+
* - `string_literal` → AL single-quoted string; surrounding `'` are stripped
|
|
242
|
+
* and the bare content is lowercased
|
|
243
|
+
*/
|
|
244
|
+
function literalTextOf(value: SyntaxNode): string | undefined {
|
|
245
|
+
if (value.type === "boolean") return value.text.toLowerCase();
|
|
246
|
+
if (value.type === "integer") return value.text;
|
|
247
|
+
if (value.type === "string_literal") {
|
|
248
|
+
// AL string literals are single-quoted (e.g. `'foo'`). Strip exactly one
|
|
249
|
+
// leading and trailing `'` before lowercasing so consumers see the bare
|
|
250
|
+
// content per `VarAssignment.rhsLiteralValue`'s "lowercased" contract.
|
|
251
|
+
const raw = value.text;
|
|
252
|
+
const stripped =
|
|
253
|
+
raw.length >= 2 && raw.startsWith("'") && raw.endsWith("'") ? raw.slice(1, -1) : raw;
|
|
254
|
+
return stripped.toLowerCase();
|
|
255
|
+
}
|
|
256
|
+
return undefined;
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
/**
|
|
260
|
+
* Collect every `<lhs> := <rhs>` assignment in the routine body where the LHS
|
|
261
|
+
* is an `identifier` or `member_expression`. Results are sorted by source
|
|
262
|
+
* position for deterministic output.
|
|
263
|
+
*/
|
|
264
|
+
function collectVarAssignments(
|
|
265
|
+
bodyNode: SyntaxNode,
|
|
266
|
+
routineId: RoutineId,
|
|
267
|
+
sourceUnitId: string,
|
|
268
|
+
): VarAssignment[] {
|
|
269
|
+
const out: VarAssignment[] = [];
|
|
270
|
+
const stack: SyntaxNode[] = [bodyNode];
|
|
271
|
+
while (stack.length > 0) {
|
|
272
|
+
const n = stack.pop();
|
|
273
|
+
if (!n) continue;
|
|
274
|
+
if (n.type === "assignment_statement") {
|
|
275
|
+
// Match the field-accessor pattern in variable-initializer-extractor.ts:
|
|
276
|
+
// prefer named-field accessors, fall back to positional named children.
|
|
277
|
+
// This insulates the indexer from minor grammar-field renames.
|
|
278
|
+
const target =
|
|
279
|
+
n.childForFieldName("left") ?? n.childForFieldName("target") ?? n.namedChild(0);
|
|
280
|
+
const value =
|
|
281
|
+
n.childForFieldName("right") ??
|
|
282
|
+
n.childForFieldName("value") ??
|
|
283
|
+
n.namedChild(n.namedChildCount - 1);
|
|
284
|
+
if (target && value) {
|
|
285
|
+
const lhsName = lhsIdentifierOf(target);
|
|
286
|
+
if (lhsName !== undefined) {
|
|
287
|
+
out.push({
|
|
288
|
+
lhsName: lhsName.toLowerCase(),
|
|
289
|
+
rhsLiteralValue: literalTextOf(value),
|
|
290
|
+
sourceAnchor: {
|
|
291
|
+
sourceUnitId,
|
|
292
|
+
range: nodeToSourceRange(n),
|
|
293
|
+
enclosingRoutineId: routineId,
|
|
294
|
+
syntaxKind: n.type,
|
|
295
|
+
},
|
|
296
|
+
});
|
|
297
|
+
}
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
for (let i = 0; i < n.namedChildCount; i++) {
|
|
301
|
+
const c = n.namedChild(i);
|
|
302
|
+
if (c) stack.push(c);
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
out.sort((a, b) => {
|
|
306
|
+
const ar = a.sourceAnchor.range;
|
|
307
|
+
const br = b.sourceAnchor.range;
|
|
308
|
+
return ar.startLine - br.startLine || ar.startColumn - br.startColumn;
|
|
309
|
+
});
|
|
310
|
+
return out;
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
/**
|
|
314
|
+
* Collect every identifier referenced in a control-predicate position in the
|
|
315
|
+
* routine body: if-test, while-test, repeat-until, case-of subject. Walks
|
|
316
|
+
* compound expressions recursively so `A and not IsHandled` captures both
|
|
317
|
+
* identifiers. For member expressions, only the trailing member name is
|
|
318
|
+
* captured (the receiver is not a predicate operand). Results are sorted by
|
|
319
|
+
* referenceAnchor position for deterministic output.
|
|
320
|
+
*
|
|
321
|
+
* Grammar field names (confirmed by probe):
|
|
322
|
+
* if_statement → childForFieldName("condition")
|
|
323
|
+
* while_statement → childForFieldName("condition")
|
|
324
|
+
* repeat_statement → childForFieldName("condition") (the until-expr)
|
|
325
|
+
* case_statement → childForFieldName("expression") (NOT "subject")
|
|
326
|
+
*/
|
|
327
|
+
function collectConditionReferences(
|
|
328
|
+
bodyNode: SyntaxNode,
|
|
329
|
+
routineId: RoutineId,
|
|
330
|
+
sourceUnitId: string,
|
|
331
|
+
): ConditionReference[] {
|
|
332
|
+
const out: ConditionReference[] = [];
|
|
333
|
+
|
|
334
|
+
function sourceAnchorOf(n: SyntaxNode): ConditionReference["referenceAnchor"] {
|
|
335
|
+
return {
|
|
336
|
+
sourceUnitId,
|
|
337
|
+
range: nodeToSourceRange(n),
|
|
338
|
+
enclosingRoutineId: routineId,
|
|
339
|
+
syntaxKind: n.type,
|
|
340
|
+
};
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
function collectIdentifiersInExpression(
|
|
344
|
+
expr: SyntaxNode | null,
|
|
345
|
+
kind: ConditionReference["conditionKind"],
|
|
346
|
+
stmt: ConditionReference["statementAnchor"],
|
|
347
|
+
): void {
|
|
348
|
+
if (!expr) return;
|
|
349
|
+
if (expr.type === "identifier") {
|
|
350
|
+
out.push({
|
|
351
|
+
identifier: expr.text.toLowerCase(),
|
|
352
|
+
conditionKind: kind,
|
|
353
|
+
statementAnchor: stmt,
|
|
354
|
+
referenceAnchor: sourceAnchorOf(expr),
|
|
355
|
+
});
|
|
356
|
+
return;
|
|
357
|
+
}
|
|
358
|
+
if (expr.type === "member_expression") {
|
|
359
|
+
const member = expr.childForFieldName("member") ?? expr.namedChild(expr.namedChildCount - 1);
|
|
360
|
+
if (member?.type === "identifier") {
|
|
361
|
+
out.push({
|
|
362
|
+
identifier: member.text.toLowerCase(),
|
|
363
|
+
conditionKind: kind,
|
|
364
|
+
statementAnchor: stmt,
|
|
365
|
+
referenceAnchor: sourceAnchorOf(member),
|
|
366
|
+
});
|
|
367
|
+
}
|
|
368
|
+
return;
|
|
369
|
+
}
|
|
370
|
+
for (let i = 0; i < expr.namedChildCount; i++) {
|
|
371
|
+
collectIdentifiersInExpression(expr.namedChild(i), kind, stmt);
|
|
372
|
+
}
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
const stack: SyntaxNode[] = [bodyNode];
|
|
376
|
+
while (stack.length > 0) {
|
|
377
|
+
const n = stack.pop();
|
|
378
|
+
if (!n) continue;
|
|
379
|
+
if (n.type === "if_statement") {
|
|
380
|
+
const cond = n.childForFieldName("condition");
|
|
381
|
+
if (cond) {
|
|
382
|
+
const stmt = sourceAnchorOf(n);
|
|
383
|
+
collectIdentifiersInExpression(cond, "if", stmt);
|
|
384
|
+
}
|
|
385
|
+
} else if (n.type === "while_statement") {
|
|
386
|
+
const cond = n.childForFieldName("condition");
|
|
387
|
+
if (cond) {
|
|
388
|
+
const stmt = sourceAnchorOf(n);
|
|
389
|
+
collectIdentifiersInExpression(cond, "while", stmt);
|
|
390
|
+
}
|
|
391
|
+
} else if (n.type === "repeat_statement") {
|
|
392
|
+
const cond = n.childForFieldName("condition");
|
|
393
|
+
if (cond) {
|
|
394
|
+
const stmt = sourceAnchorOf(n);
|
|
395
|
+
collectIdentifiersInExpression(cond, "repeat-until", stmt);
|
|
396
|
+
}
|
|
397
|
+
} else if (n.type === "case_statement") {
|
|
398
|
+
// field name is "expression" (confirmed by grammar probe)
|
|
399
|
+
const subj = n.childForFieldName("expression");
|
|
400
|
+
if (subj) {
|
|
401
|
+
const stmt = sourceAnchorOf(n);
|
|
402
|
+
collectIdentifiersInExpression(subj, "case", stmt);
|
|
403
|
+
}
|
|
404
|
+
}
|
|
405
|
+
// Always recurse into all named children to find nested statements
|
|
406
|
+
for (let i = 0; i < n.namedChildCount; i++) {
|
|
407
|
+
const c = n.namedChild(i);
|
|
408
|
+
if (c) stack.push(c);
|
|
409
|
+
}
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
out.sort((a, b) => {
|
|
413
|
+
const ar = a.referenceAnchor.range;
|
|
414
|
+
const br = b.referenceAnchor.range;
|
|
415
|
+
return ar.startLine - br.startLine || ar.startColumn - br.startColumn;
|
|
416
|
+
});
|
|
417
|
+
return out;
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
/**
|
|
421
|
+
* Walk a routine body once, emitting every L1 feature the indexer needs.
|
|
422
|
+
*
|
|
423
|
+
* Operation-id ordering matches the previous two-pass implementation: record-ops
|
|
424
|
+
* and commits get ids 0..N-1 (in body DFS order), call sites get ids N..N+M-1
|
|
425
|
+
* (also in body DFS order). The two streams are collected during the walk and
|
|
426
|
+
* assigned final operation ids after — that's why call-site `operationId` is
|
|
427
|
+
* filled in post-walk.
|
|
428
|
+
*/
|
|
429
|
+
export function extractBodyFeatures(
|
|
430
|
+
bodyNode: SyntaxNode,
|
|
431
|
+
routineId: RoutineId,
|
|
432
|
+
sourceUnitId: string,
|
|
433
|
+
recordVarNames: Set<string>,
|
|
434
|
+
enclosingParameters: ParameterSymbol[],
|
|
435
|
+
enclosingRecordVariables: RecordVariable[],
|
|
436
|
+
): ExtractBodyResult {
|
|
437
|
+
const loops: LoopNode[] = [];
|
|
438
|
+
const operationSites: OperationSite[] = [];
|
|
439
|
+
const recordOperations: RecordOperation[] = [];
|
|
440
|
+
const callSites: CallSite[] = [];
|
|
441
|
+
const fieldAccesses: FieldAccess[] = [];
|
|
442
|
+
const unreachableStatements: UnreachableStatement[] = [];
|
|
443
|
+
const identifierRefSet = new Set<string>();
|
|
444
|
+
|
|
445
|
+
/**
|
|
446
|
+
* Walk an arbitrary subtree and add every value-position identifier to
|
|
447
|
+
* `identifierRefSet`. Used inside `handleCallExpression` for the callee subtree
|
|
448
|
+
* (which the main visitor skips for performance) so the receiver in
|
|
449
|
+
* `Other.SetRange(...)` and the callee name in `Helper(x)` are captured.
|
|
450
|
+
*
|
|
451
|
+
* Same exclusion rules as the identifier hook in `visit()` — skips the
|
|
452
|
+
* `member` field of `member_expression`, the `value` field of
|
|
453
|
+
* `qualified_enum_value`, and bare type names in `enum_type` position.
|
|
454
|
+
*/
|
|
455
|
+
function collectIdentifiersFrom(root: SyntaxNode): void {
|
|
456
|
+
const stack: { node: SyntaxNode; parent: SyntaxNode | null }[] = [{ node: root, parent: null }];
|
|
457
|
+
while (stack.length > 0) {
|
|
458
|
+
const item = stack.pop();
|
|
459
|
+
if (item === undefined) continue;
|
|
460
|
+
const { node, parent } = item;
|
|
461
|
+
if (node.type === "identifier" && parent !== null) {
|
|
462
|
+
const parentType = parent.type;
|
|
463
|
+
let isValueRef = true;
|
|
464
|
+
if (parentType === "member_expression") {
|
|
465
|
+
const memberField = parent.childForFieldName("member");
|
|
466
|
+
if (memberField !== null && memberField.startIndex === node.startIndex) {
|
|
467
|
+
isValueRef = false;
|
|
468
|
+
}
|
|
469
|
+
} else if (parentType === "qualified_enum_value") {
|
|
470
|
+
const valueField = parent.childForFieldName("value");
|
|
471
|
+
const enumTypeField = parent.childForFieldName("enum_type");
|
|
472
|
+
if (valueField !== null && valueField.startIndex === node.startIndex) {
|
|
473
|
+
isValueRef = false;
|
|
474
|
+
} else if (
|
|
475
|
+
enumTypeField !== null &&
|
|
476
|
+
enumTypeField.startIndex === node.startIndex &&
|
|
477
|
+
enumTypeField.type === "identifier"
|
|
478
|
+
) {
|
|
479
|
+
isValueRef = false;
|
|
480
|
+
}
|
|
481
|
+
}
|
|
482
|
+
if (isValueRef) identifierRefSet.add(node.text.toLowerCase());
|
|
483
|
+
}
|
|
484
|
+
for (const child of node.namedChildren) {
|
|
485
|
+
if (child !== null) stack.push({ node: child, parent: node });
|
|
486
|
+
}
|
|
487
|
+
}
|
|
488
|
+
}
|
|
489
|
+
|
|
490
|
+
const loopStack: LoopId[] = [];
|
|
491
|
+
let opIndex = 0;
|
|
492
|
+
let csIndex = 0;
|
|
493
|
+
let unreachableIndex = 0;
|
|
494
|
+
let hasBranching = false;
|
|
495
|
+
|
|
496
|
+
// Maps from tree-sitter Node.id (unique per node within a tree) to the op/callsite
|
|
497
|
+
// ID assigned during the visit pass. Used post-visit to build the statement tree
|
|
498
|
+
// without a second DFS.
|
|
499
|
+
//
|
|
500
|
+
// Keyed by `node.id` rather than `startIndex` because chained-receiver expressions
|
|
501
|
+
// (e.g. `Helper(C).FindSet()`) produce nested `call_expression` nodes that share
|
|
502
|
+
// the same `startIndex` — keying by `startIndex` collapses them and silently drops
|
|
503
|
+
// the inner call. `Node.id` is a per-tree interned identifier that's unique even
|
|
504
|
+
// for nodes spanning the same source position.
|
|
505
|
+
const opIdByNodeId = new Map<number, string>();
|
|
506
|
+
const csIdByNodeId = new Map<number, string>();
|
|
507
|
+
|
|
508
|
+
/**
|
|
509
|
+
* Classify a statement node as an unconditional exit from the enclosing routine.
|
|
510
|
+
* Recognised shapes (case-insensitive):
|
|
511
|
+
* - `exit_statement` (covers `Exit;` and `Exit(value);`)
|
|
512
|
+
* - bare `call_expression` whose callee identifier is `Error` (any arity)
|
|
513
|
+
* - `call_expression` whose callee is a `member_expression` `<receiver>.Quit`
|
|
514
|
+
* where receiver is `CurrReport` (the report-runtime exit)
|
|
515
|
+
* Returns `undefined` for non-exit statements.
|
|
516
|
+
*/
|
|
517
|
+
function unconditionalExitKind(node: SyntaxNode): UnreachableStatement["exitKind"] | undefined {
|
|
518
|
+
if (node.type === "exit_statement") return "exit";
|
|
519
|
+
if (node.type !== "call_expression") return undefined;
|
|
520
|
+
const funcNode = node.childForFieldName("function") ?? node.namedChildren[0];
|
|
521
|
+
if (!funcNode) return undefined;
|
|
522
|
+
if (funcNode.type === "identifier" && funcNode.text.toLowerCase() === "error") {
|
|
523
|
+
return "error";
|
|
524
|
+
}
|
|
525
|
+
if (funcNode.type === "member_expression") {
|
|
526
|
+
const objNode = funcNode.childForFieldName("object") ?? funcNode.namedChildren[0];
|
|
527
|
+
const memberNode = funcNode.childForFieldName("member") ?? funcNode.namedChildren[1];
|
|
528
|
+
if (
|
|
529
|
+
objNode &&
|
|
530
|
+
memberNode &&
|
|
531
|
+
objNode.text.toLowerCase() === "currreport" &&
|
|
532
|
+
memberNode.text.toLowerCase() === "quit"
|
|
533
|
+
) {
|
|
534
|
+
return "currreport-quit";
|
|
535
|
+
}
|
|
536
|
+
}
|
|
537
|
+
return undefined;
|
|
538
|
+
}
|
|
539
|
+
|
|
540
|
+
function handleCallExpression(node: SyntaxNode, nodeType: string): void {
|
|
541
|
+
const funcNode = node.childForFieldName("function") ?? node.namedChildren[0];
|
|
542
|
+
if (!funcNode) return;
|
|
543
|
+
|
|
544
|
+
// Collect identifier-uses from the callee subtree. The main visitor's
|
|
545
|
+
// "recurse only into argument_list" optimization (below) intentionally
|
|
546
|
+
// skips the function-callee subtree to avoid re-emitting its member
|
|
547
|
+
// expressions as stray field accesses — but D19 still needs to see
|
|
548
|
+
// receivers (`Other` in `Other.SetRange(...)`) and bare callee names
|
|
549
|
+
// (`Helper` in `Helper(x)`) as identifier references.
|
|
550
|
+
collectIdentifiersFrom(funcNode);
|
|
551
|
+
|
|
552
|
+
if (funcNode.type === "member_expression") {
|
|
553
|
+
const memberNode = funcNode.childForFieldName("member") ?? funcNode.namedChildren[1];
|
|
554
|
+
if (!memberNode) return;
|
|
555
|
+
const methodLc = memberNode.text.toLowerCase();
|
|
556
|
+
const opType = RECORD_OP_MAP[methodLc];
|
|
557
|
+
if (opType !== undefined) {
|
|
558
|
+
// Record-op site (e.g. SalesLine.SetRange(...)).
|
|
559
|
+
const objNode = funcNode.childForFieldName("object") ?? funcNode.namedChildren[0];
|
|
560
|
+
const receiver = objNode ? objNode.text : "";
|
|
561
|
+
let fieldArguments: string[] | undefined;
|
|
562
|
+
let fieldArgumentInfos: ExpressionInfo[] | undefined;
|
|
563
|
+
if (FIELD_ARGS_OPS.has(opType)) {
|
|
564
|
+
const argListNode = node.namedChildren.find(
|
|
565
|
+
(c) => c !== null && c.type === "argument_list",
|
|
566
|
+
);
|
|
567
|
+
if (argListNode) {
|
|
568
|
+
const args: string[] = [];
|
|
569
|
+
const infos: ExpressionInfo[] = [];
|
|
570
|
+
for (const arg of argListNode.namedChildren) {
|
|
571
|
+
if (arg !== null) {
|
|
572
|
+
args.push(arg.text);
|
|
573
|
+
infos.push(expressionInfoFromNode(arg));
|
|
574
|
+
}
|
|
575
|
+
}
|
|
576
|
+
fieldArguments = args;
|
|
577
|
+
fieldArgumentInfos = infos;
|
|
578
|
+
}
|
|
579
|
+
}
|
|
580
|
+
const anchor = {
|
|
581
|
+
sourceUnitId,
|
|
582
|
+
range: nodeToSourceRange(node),
|
|
583
|
+
enclosingRoutineId: routineId,
|
|
584
|
+
syntaxKind: nodeType,
|
|
585
|
+
};
|
|
586
|
+
const opId = encodeOperationId(routineId, opIndex++);
|
|
587
|
+
opIdByNodeId.set(node.id, opId);
|
|
588
|
+
const snapshotLoopStack = loopStack.slice();
|
|
589
|
+
recordOperations.push({
|
|
590
|
+
id: opId,
|
|
591
|
+
routineId,
|
|
592
|
+
op: opType,
|
|
593
|
+
recordVariableName: receiver,
|
|
594
|
+
tempState: { kind: "unknown" },
|
|
595
|
+
fieldArguments,
|
|
596
|
+
...(fieldArgumentInfos !== undefined ? { fieldArgumentInfos } : {}),
|
|
597
|
+
loopStack: snapshotLoopStack,
|
|
598
|
+
sourceAnchor: anchor,
|
|
599
|
+
});
|
|
600
|
+
operationSites.push({
|
|
601
|
+
id: opId,
|
|
602
|
+
routineId,
|
|
603
|
+
kind: opType === "LockTable" ? "lock" : "record-op",
|
|
604
|
+
sourceAnchor: anchor,
|
|
605
|
+
loopStack: snapshotLoopStack,
|
|
606
|
+
});
|
|
607
|
+
} else {
|
|
608
|
+
// Member call that is NOT a record op — emit as a CallSite.
|
|
609
|
+
const argListNode = node.namedChildren.find(
|
|
610
|
+
(c) => c !== null && c.type === "argument_list",
|
|
611
|
+
);
|
|
612
|
+
const argumentTexts: string[] = [];
|
|
613
|
+
const argumentInfos: ExpressionInfo[] = [];
|
|
614
|
+
const argNodes: SyntaxNode[] = [];
|
|
615
|
+
if (argListNode) {
|
|
616
|
+
for (const arg of argListNode.namedChildren) {
|
|
617
|
+
if (arg !== null) {
|
|
618
|
+
argumentTexts.push(arg.text);
|
|
619
|
+
argumentInfos.push(expressionInfoFromNode(arg));
|
|
620
|
+
argNodes.push(arg);
|
|
621
|
+
}
|
|
622
|
+
}
|
|
623
|
+
}
|
|
624
|
+
const csId = encodeCallsiteId(routineId, csIndex++);
|
|
625
|
+
csIdByNodeId.set(node.id, csId);
|
|
626
|
+
callSites.push({
|
|
627
|
+
id: csId,
|
|
628
|
+
operationId: "", // filled in post-walk; see assignCallSiteOperationIds below
|
|
629
|
+
calleeText: funcNode.text,
|
|
630
|
+
callee: calleeFromNode(node),
|
|
631
|
+
argumentTexts,
|
|
632
|
+
argumentInfos,
|
|
633
|
+
argumentBindings: extractArgumentBindings(
|
|
634
|
+
argNodes,
|
|
635
|
+
enclosingParameters,
|
|
636
|
+
enclosingRecordVariables,
|
|
637
|
+
sourceUnitId,
|
|
638
|
+
routineId,
|
|
639
|
+
),
|
|
640
|
+
loopStack: loopStack.slice(),
|
|
641
|
+
sourceAnchor: {
|
|
642
|
+
sourceUnitId,
|
|
643
|
+
range: nodeToSourceRange(node),
|
|
644
|
+
enclosingRoutineId: routineId,
|
|
645
|
+
syntaxKind: nodeType,
|
|
646
|
+
},
|
|
647
|
+
});
|
|
648
|
+
}
|
|
649
|
+
} else if (funcNode.type === "identifier") {
|
|
650
|
+
// Bare call: plain identifier (e.g. EnrichLine, Message, Error, Commit).
|
|
651
|
+
const methodText = funcNode.text;
|
|
652
|
+
if (methodText.toLowerCase() === "commit") {
|
|
653
|
+
const opId = encodeOperationId(routineId, opIndex++);
|
|
654
|
+
opIdByNodeId.set(node.id, opId);
|
|
655
|
+
operationSites.push({
|
|
656
|
+
id: opId,
|
|
657
|
+
routineId,
|
|
658
|
+
kind: "commit",
|
|
659
|
+
sourceAnchor: {
|
|
660
|
+
sourceUnitId,
|
|
661
|
+
range: nodeToSourceRange(node),
|
|
662
|
+
enclosingRoutineId: routineId,
|
|
663
|
+
syntaxKind: nodeType,
|
|
664
|
+
},
|
|
665
|
+
loopStack: loopStack.slice(),
|
|
666
|
+
});
|
|
667
|
+
} else {
|
|
668
|
+
const argListNode = node.namedChildren.find(
|
|
669
|
+
(c) => c !== null && c.type === "argument_list",
|
|
670
|
+
);
|
|
671
|
+
const argumentTexts: string[] = [];
|
|
672
|
+
const argumentInfos: ExpressionInfo[] = [];
|
|
673
|
+
const argNodes: SyntaxNode[] = [];
|
|
674
|
+
if (argListNode) {
|
|
675
|
+
for (const arg of argListNode.namedChildren) {
|
|
676
|
+
if (arg !== null) {
|
|
677
|
+
argumentTexts.push(arg.text);
|
|
678
|
+
argumentInfos.push(expressionInfoFromNode(arg));
|
|
679
|
+
argNodes.push(arg);
|
|
680
|
+
}
|
|
681
|
+
}
|
|
682
|
+
}
|
|
683
|
+
const csId2 = encodeCallsiteId(routineId, csIndex++);
|
|
684
|
+
csIdByNodeId.set(node.id, csId2);
|
|
685
|
+
callSites.push({
|
|
686
|
+
id: csId2,
|
|
687
|
+
operationId: "",
|
|
688
|
+
calleeText: methodText,
|
|
689
|
+
callee: calleeFromNode(node),
|
|
690
|
+
argumentTexts,
|
|
691
|
+
argumentInfos,
|
|
692
|
+
argumentBindings: extractArgumentBindings(
|
|
693
|
+
argNodes,
|
|
694
|
+
enclosingParameters,
|
|
695
|
+
enclosingRecordVariables,
|
|
696
|
+
sourceUnitId,
|
|
697
|
+
routineId,
|
|
698
|
+
),
|
|
699
|
+
loopStack: loopStack.slice(),
|
|
700
|
+
sourceAnchor: {
|
|
701
|
+
sourceUnitId,
|
|
702
|
+
range: nodeToSourceRange(node),
|
|
703
|
+
enclosingRoutineId: routineId,
|
|
704
|
+
syntaxKind: nodeType,
|
|
705
|
+
},
|
|
706
|
+
});
|
|
707
|
+
}
|
|
708
|
+
}
|
|
709
|
+
|
|
710
|
+
// Recurse only into the argument_list children. The function-callee subtree is
|
|
711
|
+
// already handled above — recursing into it would either re-visit it (no-op) or
|
|
712
|
+
// risk treating its inner member_expression as a stray field access.
|
|
713
|
+
const argListNode = node.namedChildren.find((c) => c !== null && c.type === "argument_list");
|
|
714
|
+
if (argListNode) {
|
|
715
|
+
for (const child of argListNode.namedChildren) {
|
|
716
|
+
if (child) visit(child, argListNode);
|
|
717
|
+
}
|
|
718
|
+
}
|
|
719
|
+
|
|
720
|
+
// Chained-receiver descent (P7.5 review): if the callee is a member_expression
|
|
721
|
+
// whose object is itself a call (e.g. `Helper(C).FindSet()` — outer is FindSet,
|
|
722
|
+
// inner Helper lives in the receiver), recursively visit the inner call so it
|
|
723
|
+
// gets registered as a CallSite. Without this, `harvestExpressionLeaves` can't
|
|
724
|
+
// surface the receiver call as a sibling leaf because no csId exists for it.
|
|
725
|
+
if (funcNode.type === "member_expression") {
|
|
726
|
+
const objNode = funcNode.childForFieldName("object") ?? funcNode.namedChildren[0];
|
|
727
|
+
if (objNode && objNode.type === "call_expression") {
|
|
728
|
+
visit(objNode, funcNode);
|
|
729
|
+
}
|
|
730
|
+
}
|
|
731
|
+
}
|
|
732
|
+
|
|
733
|
+
/**
|
|
734
|
+
* Parents whose direct child IS a statement (no nuance, no condition / body
|
|
735
|
+
* disambiguation needed). A member_expression with one of these parents is a
|
|
736
|
+
* bare method call written without parens (e.g. `Customer.SetRecFilter;`), not a
|
|
737
|
+
* field access — AL has no bare-expression statements that read a field for
|
|
738
|
+
* side-effect.
|
|
739
|
+
*/
|
|
740
|
+
const PURE_STATEMENT_PARENTS: ReadonlySet<string> = new Set(["code_block"]);
|
|
741
|
+
|
|
742
|
+
/**
|
|
743
|
+
* Parents that contain expression-position children (condition / variable / start /
|
|
744
|
+
* end / iterable / object) and statement-position children (body / then_branch /
|
|
745
|
+
* else_branch). The grammar uses positive fields for these — see grammar.js's
|
|
746
|
+
* `field('body', ...)` etc.
|
|
747
|
+
*
|
|
748
|
+
* Map: parentType → set of FIELD NAMES whose child is statement-position. A
|
|
749
|
+
* member_expression in any OTHER child position of the same parent is treated as
|
|
750
|
+
* expression-position (field access).
|
|
751
|
+
*
|
|
752
|
+
* `repeat_statement` is special: it has no body field. Its direct children
|
|
753
|
+
* `repeat($._statement)` are statement-position; its `condition` field child
|
|
754
|
+
* is expression-position. Treated below as "all children except condition are
|
|
755
|
+
* statements".
|
|
756
|
+
*/
|
|
757
|
+
const STATEMENT_FIELDS_BY_PARENT: ReadonlyMap<string, ReadonlySet<string>> = new Map([
|
|
758
|
+
["if_statement", new Set(["then_branch", "else_branch"])],
|
|
759
|
+
["for_statement", new Set(["body"])],
|
|
760
|
+
["while_statement", new Set(["body"])],
|
|
761
|
+
["foreach_statement", new Set(["body"])],
|
|
762
|
+
["with_statement", new Set(["body"])],
|
|
763
|
+
["case_branch", new Set(["body"])],
|
|
764
|
+
// repeat_statement handled via "not condition" logic in the visitor itself.
|
|
765
|
+
]);
|
|
766
|
+
|
|
767
|
+
function visit(node: SyntaxNode, parent: SyntaxNode | null): void {
|
|
768
|
+
const nodeType = node.type;
|
|
769
|
+
const parentType = parent?.type ?? "";
|
|
770
|
+
|
|
771
|
+
// Identifier-reference collection: record every `identifier` node that is in
|
|
772
|
+
// VALUE position. Skipped:
|
|
773
|
+
// - `member` field of `member_expression` — field/method name, not a value
|
|
774
|
+
// - `value` field of `qualified_enum_value` — enum option name, not a value
|
|
775
|
+
// - direct `enum_type` field of `qualified_enum_value` when the type is a
|
|
776
|
+
// bare identifier — that's a type reference, not a variable reference
|
|
777
|
+
// (when `enum_type` is a `member_expression`, its inner identifier IS
|
|
778
|
+
// a value reference and is visited recursively).
|
|
779
|
+
// Definition positions (`parameter_list`, `var_section`) are outside the body
|
|
780
|
+
// code_block we walk, so no extra filter is needed for those.
|
|
781
|
+
if (nodeType === "identifier" && parent !== null) {
|
|
782
|
+
let isValueRef = true;
|
|
783
|
+
if (parentType === "member_expression") {
|
|
784
|
+
const memberField = parent.childForFieldName("member");
|
|
785
|
+
if (memberField !== null && memberField.startIndex === node.startIndex) {
|
|
786
|
+
isValueRef = false;
|
|
787
|
+
}
|
|
788
|
+
} else if (parentType === "qualified_enum_value") {
|
|
789
|
+
const valueField = parent.childForFieldName("value");
|
|
790
|
+
const enumTypeField = parent.childForFieldName("enum_type");
|
|
791
|
+
if (valueField !== null && valueField.startIndex === node.startIndex) {
|
|
792
|
+
isValueRef = false;
|
|
793
|
+
} else if (
|
|
794
|
+
enumTypeField !== null &&
|
|
795
|
+
enumTypeField.startIndex === node.startIndex &&
|
|
796
|
+
enumTypeField.type === "identifier"
|
|
797
|
+
) {
|
|
798
|
+
isValueRef = false;
|
|
799
|
+
}
|
|
800
|
+
}
|
|
801
|
+
if (isValueRef) identifierRefSet.add(node.text.toLowerCase());
|
|
802
|
+
}
|
|
803
|
+
|
|
804
|
+
// Unreachable-after-exit scan: when entering a code_block, walk its statement-position
|
|
805
|
+
// children pairwise. The first one classified as an unconditional exit, with any
|
|
806
|
+
// later sibling, marks the rest of the block unreachable. We emit only the FIRST
|
|
807
|
+
// unreachable sibling (D20 reports per-block, not per-statement) and don't recurse
|
|
808
|
+
// past the exit — the indexer's normal traversal continues into all children, so
|
|
809
|
+
// nested code blocks (inside conditionals, loops, etc.) get their own scan.
|
|
810
|
+
if (nodeType === "code_block") {
|
|
811
|
+
const stmts = node.namedChildren.filter(
|
|
812
|
+
(c): c is SyntaxNode =>
|
|
813
|
+
c !== null && c.type !== "begin_keyword" && c.type !== "end_keyword",
|
|
814
|
+
);
|
|
815
|
+
for (let i = 0; i < stmts.length - 1; i++) {
|
|
816
|
+
const s = stmts[i];
|
|
817
|
+
if (s === undefined) continue;
|
|
818
|
+
const exitKind = unconditionalExitKind(s);
|
|
819
|
+
if (exitKind === undefined) continue;
|
|
820
|
+
const next = stmts[i + 1];
|
|
821
|
+
if (next === undefined) break;
|
|
822
|
+
unreachableStatements.push({
|
|
823
|
+
id: `${routineId}/u${unreachableIndex++}`,
|
|
824
|
+
exitKind,
|
|
825
|
+
exitAnchor: {
|
|
826
|
+
sourceUnitId,
|
|
827
|
+
range: nodeToSourceRange(s),
|
|
828
|
+
enclosingRoutineId: routineId,
|
|
829
|
+
syntaxKind: s.type,
|
|
830
|
+
},
|
|
831
|
+
unreachableAnchor: {
|
|
832
|
+
sourceUnitId,
|
|
833
|
+
range: nodeToSourceRange(next),
|
|
834
|
+
enclosingRoutineId: routineId,
|
|
835
|
+
syntaxKind: next.type,
|
|
836
|
+
},
|
|
837
|
+
});
|
|
838
|
+
break;
|
|
839
|
+
}
|
|
840
|
+
}
|
|
841
|
+
|
|
842
|
+
// Branching-control-flow detection. Loops are handled separately by `nestingDepth`;
|
|
843
|
+
// unconditional early-exit shapes are handled by `unreachableStatements`. This flag
|
|
844
|
+
// captures everything else that breaks the straight-line assumption — conditional
|
|
845
|
+
// (`if`/`case`) and exception-handling (`try`) constructs. Used by the Phase 4
|
|
846
|
+
// control-flow walker (and any future straight-line analysis).
|
|
847
|
+
if (
|
|
848
|
+
nodeType === "if_statement" ||
|
|
849
|
+
nodeType === "case_statement" ||
|
|
850
|
+
nodeType === "case_branch" ||
|
|
851
|
+
nodeType === "try_statement"
|
|
852
|
+
) {
|
|
853
|
+
hasBranching = true;
|
|
854
|
+
}
|
|
855
|
+
|
|
856
|
+
// Loop detection.
|
|
857
|
+
const loopType = LOOP_TYPE_MAP[nodeType];
|
|
858
|
+
let pushedLoop = false;
|
|
859
|
+
if (loopType !== undefined) {
|
|
860
|
+
const id = encodeLoopId(routineId, loops.length);
|
|
861
|
+
loops.push({
|
|
862
|
+
id,
|
|
863
|
+
type: loopType,
|
|
864
|
+
sourceAnchor: {
|
|
865
|
+
sourceUnitId,
|
|
866
|
+
range: nodeToSourceRange(node),
|
|
867
|
+
enclosingRoutineId: routineId,
|
|
868
|
+
syntaxKind: nodeType,
|
|
869
|
+
},
|
|
870
|
+
});
|
|
871
|
+
loopStack.push(id);
|
|
872
|
+
pushedLoop = true;
|
|
873
|
+
}
|
|
874
|
+
|
|
875
|
+
if (nodeType === "call_expression") {
|
|
876
|
+
handleCallExpression(node, nodeType);
|
|
877
|
+
if (pushedLoop) loopStack.pop();
|
|
878
|
+
return;
|
|
879
|
+
}
|
|
880
|
+
|
|
881
|
+
if (nodeType === "member_expression") {
|
|
882
|
+
// Non-callee member_expression (the callee shape exits via handleCallExpression
|
|
883
|
+
// without recursing back here). Decide between two cases:
|
|
884
|
+
// - Statement position (parent is code_block, or a control-flow rule's body
|
|
885
|
+
// field): AL has no bare-field-read statements, so `Customer.X;` is a
|
|
886
|
+
// parameterless method call. Treat as record-op if X is in the op map,
|
|
887
|
+
// otherwise emit as a CallSite so callers/D14 see the dependency.
|
|
888
|
+
// - Expression position (parent is assignment_statement, argument_list, or
|
|
889
|
+
// a control-flow rule's condition field): treat as a field access on a
|
|
890
|
+
// known record var.
|
|
891
|
+
const objNode = node.childForFieldName("object") ?? node.namedChildren[0];
|
|
892
|
+
const memberNode = node.childForFieldName("member") ?? node.namedChildren[1];
|
|
893
|
+
if (objNode && memberNode) {
|
|
894
|
+
let isStatementPosition = PURE_STATEMENT_PARENTS.has(parentType);
|
|
895
|
+
if (!isStatementPosition && parent !== null) {
|
|
896
|
+
const stmtFields = STATEMENT_FIELDS_BY_PARENT.get(parentType);
|
|
897
|
+
if (stmtFields !== undefined) {
|
|
898
|
+
// Check whether the member_expression occupies one of the parent's
|
|
899
|
+
// known statement-position fields.
|
|
900
|
+
for (const fieldName of stmtFields) {
|
|
901
|
+
const fieldChild = parent.childForFieldName(fieldName);
|
|
902
|
+
if (fieldChild !== null && fieldChild.startIndex === node.startIndex) {
|
|
903
|
+
isStatementPosition = true;
|
|
904
|
+
break;
|
|
905
|
+
}
|
|
906
|
+
}
|
|
907
|
+
} else if (parentType === "repeat_statement") {
|
|
908
|
+
// repeat_statement has no body field — every direct child except
|
|
909
|
+
// the `condition` field is a statement.
|
|
910
|
+
const conditionChild = parent.childForFieldName("condition");
|
|
911
|
+
if (conditionChild === null || conditionChild.startIndex !== node.startIndex) {
|
|
912
|
+
isStatementPosition = true;
|
|
913
|
+
}
|
|
914
|
+
}
|
|
915
|
+
}
|
|
916
|
+
if (isStatementPosition) {
|
|
917
|
+
// Parameterless method call written without parens (e.g.
|
|
918
|
+
// `Customer.SetRecFilter;`). Mirror the `member_expression`-branch
|
|
919
|
+
// of handleCallExpression, minus argument processing.
|
|
920
|
+
const methodLc = memberNode.text.toLowerCase();
|
|
921
|
+
const opType = RECORD_OP_MAP[methodLc];
|
|
922
|
+
const anchor = {
|
|
923
|
+
sourceUnitId,
|
|
924
|
+
range: nodeToSourceRange(node),
|
|
925
|
+
enclosingRoutineId: routineId,
|
|
926
|
+
syntaxKind: nodeType,
|
|
927
|
+
};
|
|
928
|
+
if (opType !== undefined) {
|
|
929
|
+
const opId = encodeOperationId(routineId, opIndex++);
|
|
930
|
+
opIdByNodeId.set(node.id, opId);
|
|
931
|
+
const snapshotLoopStack = loopStack.slice();
|
|
932
|
+
recordOperations.push({
|
|
933
|
+
id: opId,
|
|
934
|
+
routineId,
|
|
935
|
+
op: opType,
|
|
936
|
+
recordVariableName: objNode.text,
|
|
937
|
+
tempState: { kind: "unknown" },
|
|
938
|
+
loopStack: snapshotLoopStack,
|
|
939
|
+
sourceAnchor: anchor,
|
|
940
|
+
});
|
|
941
|
+
operationSites.push({
|
|
942
|
+
id: opId,
|
|
943
|
+
routineId,
|
|
944
|
+
kind: opType === "LockTable" ? "lock" : "record-op",
|
|
945
|
+
sourceAnchor: anchor,
|
|
946
|
+
loopStack: snapshotLoopStack,
|
|
947
|
+
});
|
|
948
|
+
} else {
|
|
949
|
+
const csId3 = encodeCallsiteId(routineId, csIndex++);
|
|
950
|
+
csIdByNodeId.set(node.id, csId3);
|
|
951
|
+
callSites.push({
|
|
952
|
+
id: csId3,
|
|
953
|
+
operationId: "",
|
|
954
|
+
calleeText: node.text,
|
|
955
|
+
callee: calleeFromNode(node),
|
|
956
|
+
argumentTexts: [],
|
|
957
|
+
argumentInfos: [],
|
|
958
|
+
argumentBindings: [],
|
|
959
|
+
loopStack: loopStack.slice(),
|
|
960
|
+
sourceAnchor: anchor,
|
|
961
|
+
});
|
|
962
|
+
}
|
|
963
|
+
if (pushedLoop) loopStack.pop();
|
|
964
|
+
return;
|
|
965
|
+
}
|
|
966
|
+
// Skip `Record.Field::Option` enum-scope references. Tree-sitter-al
|
|
967
|
+
// wraps these in a `qualified_enum_value` parent; the inner
|
|
968
|
+
// member_expression is a TYPE-side reference used to resolve the
|
|
969
|
+
// enum option name at compile time, not a runtime field read. Emitting
|
|
970
|
+
// it as a fieldAccess causes the path-aware walker (which raises
|
|
971
|
+
// `requiresLoadedAtEntry` on any field-access against an unloaded
|
|
972
|
+
// record) to false-positive on loader routines that filter by enum
|
|
973
|
+
// fields, e.g. `AIField.SetRange(Type, AIField.Type::Header); ... AIField.FindFirst;`.
|
|
974
|
+
const isEnumScopeRef = parentType === "qualified_enum_value";
|
|
975
|
+
const recordVariableName = objNode.text;
|
|
976
|
+
if (!isEnumScopeRef && recordVarNames.has(recordVariableName.toLowerCase())) {
|
|
977
|
+
// Strip the surrounding double quotes when the field name is a
|
|
978
|
+
// `quoted_identifier` (`Customer."No."`) so downstream consumers
|
|
979
|
+
// (D22 lookupField, fingerprinting) compare against the canonical
|
|
980
|
+
// `Field.name` without re-stripping. Plain `identifier` members
|
|
981
|
+
// pass through unchanged.
|
|
982
|
+
const fieldName =
|
|
983
|
+
memberNode.type === "quoted_identifier" &&
|
|
984
|
+
memberNode.text.length >= 2 &&
|
|
985
|
+
memberNode.text.startsWith('"') &&
|
|
986
|
+
memberNode.text.endsWith('"')
|
|
987
|
+
? memberNode.text.slice(1, -1)
|
|
988
|
+
: memberNode.text;
|
|
989
|
+
fieldAccesses.push({
|
|
990
|
+
recordVariableName,
|
|
991
|
+
fieldName,
|
|
992
|
+
sourceAnchor: {
|
|
993
|
+
sourceUnitId,
|
|
994
|
+
range: nodeToSourceRange(node),
|
|
995
|
+
enclosingRoutineId: routineId,
|
|
996
|
+
syntaxKind: nodeType,
|
|
997
|
+
},
|
|
998
|
+
});
|
|
999
|
+
}
|
|
1000
|
+
}
|
|
1001
|
+
// Continue into children to find chained accesses (`A.B.C`).
|
|
1002
|
+
}
|
|
1003
|
+
|
|
1004
|
+
for (const child of node.namedChildren) {
|
|
1005
|
+
if (child) visit(child, node);
|
|
1006
|
+
}
|
|
1007
|
+
|
|
1008
|
+
if (pushedLoop) loopStack.pop();
|
|
1009
|
+
}
|
|
1010
|
+
|
|
1011
|
+
visit(bodyNode, null);
|
|
1012
|
+
|
|
1013
|
+
// Assign final operationIds to call sites: their range is opIndex..opIndex+csIndex-1
|
|
1014
|
+
// (call-sites come after every record-op/commit in the routine's id space, matching
|
|
1015
|
+
// the old two-pass behavior).
|
|
1016
|
+
for (let i = 0; i < callSites.length; i++) {
|
|
1017
|
+
const cs = callSites[i];
|
|
1018
|
+
if (cs) cs.operationId = encodeOperationId(routineId, opIndex + i);
|
|
1019
|
+
}
|
|
1020
|
+
|
|
1021
|
+
// Build the compact statement tree. This pass is purely structural — it reads the
|
|
1022
|
+
// op/callsite ID maps populated during `visit`, so all ids are final.
|
|
1023
|
+
const statementTree = buildCFNForBlock(bodyNode);
|
|
1024
|
+
|
|
1025
|
+
return {
|
|
1026
|
+
loops,
|
|
1027
|
+
operationSites,
|
|
1028
|
+
recordOperations,
|
|
1029
|
+
callSites,
|
|
1030
|
+
fieldAccesses,
|
|
1031
|
+
unreachableStatements,
|
|
1032
|
+
hasBranching,
|
|
1033
|
+
statementTree,
|
|
1034
|
+
// Deterministic, sorted order — model dumps stay byte-stable across runs.
|
|
1035
|
+
identifierReferences: [...identifierRefSet].sort(),
|
|
1036
|
+
varAssignments: collectVarAssignments(bodyNode, routineId, sourceUnitId),
|
|
1037
|
+
conditionReferences: collectConditionReferences(bodyNode, routineId, sourceUnitId),
|
|
1038
|
+
};
|
|
1039
|
+
|
|
1040
|
+
/**
|
|
1041
|
+
* Recursively build a ControlFlowNode for a code_block node. Returns a "block" node
|
|
1042
|
+
* whose children are the CFNs for each statement in the block (skipping begin/end keywords).
|
|
1043
|
+
*/
|
|
1044
|
+
function buildCFNForBlock(blockNode: SyntaxNode): ControlFlowNode {
|
|
1045
|
+
const children: ControlFlowNode[] = [];
|
|
1046
|
+
for (const child of blockNode.namedChildren) {
|
|
1047
|
+
if (child === null) continue;
|
|
1048
|
+
const t = child.type;
|
|
1049
|
+
if (t === "begin_keyword" || t === "end_keyword") continue;
|
|
1050
|
+
const cfn = buildCFNForStatement(child);
|
|
1051
|
+
if (cfn !== null) children.push(cfn);
|
|
1052
|
+
}
|
|
1053
|
+
return {
|
|
1054
|
+
kind: "block",
|
|
1055
|
+
sourceAnchor: {
|
|
1056
|
+
sourceUnitId,
|
|
1057
|
+
range: nodeToSourceRange(blockNode),
|
|
1058
|
+
enclosingRoutineId: routineId,
|
|
1059
|
+
syntaxKind: blockNode.type,
|
|
1060
|
+
},
|
|
1061
|
+
children,
|
|
1062
|
+
};
|
|
1063
|
+
}
|
|
1064
|
+
|
|
1065
|
+
/**
|
|
1066
|
+
* Build a ControlFlowNode for a branch/loop body. The AL grammar allows the
|
|
1067
|
+
* `then_branch` / `else_branch` / loop `body` / case_branch `body` field to be
|
|
1068
|
+
* EITHER a `code_block` (when begin/end is present) OR the single statement
|
|
1069
|
+
* itself (when begin/end is omitted, e.g. `if X then Foo();`).
|
|
1070
|
+
*
|
|
1071
|
+
* If it's a code_block, delegate to `buildCFNForBlock`. Otherwise, synthesize
|
|
1072
|
+
* a one-statement block so the walker can treat all branch bodies uniformly.
|
|
1073
|
+
* The synthetic block's `sourceAnchor` covers the single statement (no enclosing
|
|
1074
|
+
* code_block exists in the source).
|
|
1075
|
+
*/
|
|
1076
|
+
function buildCFNForBranchBody(node: SyntaxNode): ControlFlowNode {
|
|
1077
|
+
if (node.type === "code_block") return buildCFNForBlock(node);
|
|
1078
|
+
const stmtCFN = buildCFNForStatement(node);
|
|
1079
|
+
const children: ControlFlowNode[] = stmtCFN !== null ? [stmtCFN] : [];
|
|
1080
|
+
return {
|
|
1081
|
+
kind: "block",
|
|
1082
|
+
sourceAnchor: {
|
|
1083
|
+
sourceUnitId,
|
|
1084
|
+
range: nodeToSourceRange(node),
|
|
1085
|
+
enclosingRoutineId: routineId,
|
|
1086
|
+
syntaxKind: node.type,
|
|
1087
|
+
},
|
|
1088
|
+
children,
|
|
1089
|
+
};
|
|
1090
|
+
}
|
|
1091
|
+
|
|
1092
|
+
/**
|
|
1093
|
+
* Harvest leaves from the receiver (function-side) of a chained call expression.
|
|
1094
|
+
* For `Helper(C).FindSet()`, the outer `call_expression`'s function field is a
|
|
1095
|
+
* `member_expression` whose object is another `call_expression` (`Helper(C)`).
|
|
1096
|
+
* That inner call is registered by the visit pass as a CallSite, but lives in
|
|
1097
|
+
* the receiver subtree — outside the outer's argument_list. AL evaluates the
|
|
1098
|
+
* receiver BEFORE the outer call, so the harvested leaf must be a SIBLING of
|
|
1099
|
+
* the outer leaf in the parent's conditionLeaves, ordered before it.
|
|
1100
|
+
*
|
|
1101
|
+
* This helper does nothing if the function-side is not a member_expression with
|
|
1102
|
+
* a call_expression / member_expression object (the common non-chained shape).
|
|
1103
|
+
*/
|
|
1104
|
+
function harvestReceiverLeaves(callNode: SyntaxNode, out: ControlFlowNode[]): void {
|
|
1105
|
+
const funcNode = callNode.childForFieldName("function") ?? callNode.namedChildren[0];
|
|
1106
|
+
if (!funcNode || funcNode.type !== "member_expression") return;
|
|
1107
|
+
const objNode = funcNode.childForFieldName("object") ?? funcNode.namedChildren[0];
|
|
1108
|
+
if (!objNode) return;
|
|
1109
|
+
if (objNode.type !== "call_expression" && objNode.type !== "member_expression") return;
|
|
1110
|
+
harvestExpressionLeaves(objNode, out);
|
|
1111
|
+
}
|
|
1112
|
+
|
|
1113
|
+
/**
|
|
1114
|
+
* Harvest expression-position record-op / callsite leaves from an expression
|
|
1115
|
+
* subtree (P7.5). For each `call_expression` / `member_expression` encountered,
|
|
1116
|
+
* look up its `node.id` in the op/callsite id maps populated by `visit`. If
|
|
1117
|
+
* a hit, emit a one-shot leaf, harvest any chained receiver as a sibling leaf
|
|
1118
|
+
* (see `harvestReceiverLeaves`), and STOP descending into the call's children
|
|
1119
|
+
* (apart from its argument_list — nested ops inside arguments are captured
|
|
1120
|
+
* as the outer leaf's own `conditionLeaves`, so the walker runs them BEFORE
|
|
1121
|
+
* applying the outer leaf's effect).
|
|
1122
|
+
*
|
|
1123
|
+
* Used by: if / while / for / foreach / repeat / case condition harvest, and by
|
|
1124
|
+
* the call_expression leaf's own argument harvest in buildCFNForStatement.
|
|
1125
|
+
*/
|
|
1126
|
+
function harvestExpressionLeaves(exprNode: SyntaxNode | null, out: ControlFlowNode[]): void {
|
|
1127
|
+
if (exprNode === null) return;
|
|
1128
|
+
const type = exprNode.type;
|
|
1129
|
+
if (type === "call_expression" || type === "member_expression") {
|
|
1130
|
+
const anchor: ControlFlowNode["sourceAnchor"] = {
|
|
1131
|
+
sourceUnitId,
|
|
1132
|
+
range: nodeToSourceRange(exprNode),
|
|
1133
|
+
enclosingRoutineId: routineId,
|
|
1134
|
+
syntaxKind: type,
|
|
1135
|
+
};
|
|
1136
|
+
// When the outer node matches an op or callsite, harvest its argument-list
|
|
1137
|
+
// children as inner conditionLeaves (nested under the outer leaf), AND
|
|
1138
|
+
// also descend into the function-side receiver if it's a chained call
|
|
1139
|
+
// (e.g. `Helper(C).FindSet()` — the inner `Helper(C)` callsite must
|
|
1140
|
+
// appear as a SIBLING of the outer leaf in the parent's conditionLeaves,
|
|
1141
|
+
// since AL evaluates the receiver before the outer call).
|
|
1142
|
+
const opId = opIdByNodeId.get(exprNode.id);
|
|
1143
|
+
if (opId !== undefined) {
|
|
1144
|
+
const argList = exprNode.namedChildren.find(
|
|
1145
|
+
(c) => c !== null && c.type === "argument_list",
|
|
1146
|
+
);
|
|
1147
|
+
const innerLeaves: ControlFlowNode[] = [];
|
|
1148
|
+
if (argList) {
|
|
1149
|
+
for (const arg of argList.namedChildren) {
|
|
1150
|
+
if (arg) harvestExpressionLeaves(arg, innerLeaves);
|
|
1151
|
+
}
|
|
1152
|
+
}
|
|
1153
|
+
// Chained-receiver harvest: receiver leaves go into `out` (sibling
|
|
1154
|
+
// position), not into innerLeaves (which is the outer's own arglist).
|
|
1155
|
+
harvestReceiverLeaves(exprNode, out);
|
|
1156
|
+
const leaf: ControlFlowNode = { kind: "op", sourceAnchor: anchor, operationId: opId };
|
|
1157
|
+
if (innerLeaves.length > 0) leaf.conditionLeaves = innerLeaves;
|
|
1158
|
+
out.push(leaf);
|
|
1159
|
+
return;
|
|
1160
|
+
}
|
|
1161
|
+
const csId = csIdByNodeId.get(exprNode.id);
|
|
1162
|
+
if (csId !== undefined) {
|
|
1163
|
+
const argList = exprNode.namedChildren.find(
|
|
1164
|
+
(c) => c !== null && c.type === "argument_list",
|
|
1165
|
+
);
|
|
1166
|
+
const innerLeaves: ControlFlowNode[] = [];
|
|
1167
|
+
if (argList) {
|
|
1168
|
+
for (const arg of argList.namedChildren) {
|
|
1169
|
+
if (arg) harvestExpressionLeaves(arg, innerLeaves);
|
|
1170
|
+
}
|
|
1171
|
+
}
|
|
1172
|
+
const funcNode = exprNode.childForFieldName("function") ?? exprNode.namedChildren[0];
|
|
1173
|
+
// Chained-receiver harvest: same treatment as the op branch above.
|
|
1174
|
+
harvestReceiverLeaves(exprNode, out);
|
|
1175
|
+
const isError = funcNode?.type === "identifier" && funcNode.text.toLowerCase() === "error";
|
|
1176
|
+
const leaf: ControlFlowNode = isError
|
|
1177
|
+
? { kind: "error", sourceAnchor: anchor, callsiteId: csId }
|
|
1178
|
+
: { kind: "call", sourceAnchor: anchor, callsiteId: csId };
|
|
1179
|
+
if (innerLeaves.length > 0) leaf.conditionLeaves = innerLeaves;
|
|
1180
|
+
out.push(leaf);
|
|
1181
|
+
return;
|
|
1182
|
+
}
|
|
1183
|
+
// Not registered as either op or callsite — keep descending so nested
|
|
1184
|
+
// receiver / argument expressions can still surface their ops.
|
|
1185
|
+
}
|
|
1186
|
+
// Recurse into all named children (including argument_list, comparison
|
|
1187
|
+
// operands, etc.). Statement-position children DO NOT appear in expression
|
|
1188
|
+
// subtrees by construction of the grammar (an if-condition can't be a
|
|
1189
|
+
// statement, etc.).
|
|
1190
|
+
for (const child of exprNode.namedChildren) {
|
|
1191
|
+
if (child) harvestExpressionLeaves(child, out);
|
|
1192
|
+
}
|
|
1193
|
+
}
|
|
1194
|
+
|
|
1195
|
+
/**
|
|
1196
|
+
* Build a ControlFlowNode for a single statement node. Returns null for nodes that
|
|
1197
|
+
* should be skipped (e.g. keywords, empty statements).
|
|
1198
|
+
*
|
|
1199
|
+
* Grammar-verified node types:
|
|
1200
|
+
* - if_statement: fields `condition` (expression), `then_branch` (code_block or
|
|
1201
|
+
* bare statement), `else_branch` (optional)
|
|
1202
|
+
* - case_statement: field `expression` (expression); namedChildren include
|
|
1203
|
+
* case_branch / case_else_branch nodes
|
|
1204
|
+
* - case_branch: field `body` (code_block); case_else_branch: second namedChild is code_block
|
|
1205
|
+
* - for_statement: fields `start`, `end`, `variable`, `body`
|
|
1206
|
+
* - foreach_statement: fields `iterable`, `variable`, `body`
|
|
1207
|
+
* - while_statement: fields `condition`, `body`
|
|
1208
|
+
* - repeat_statement: field `condition` (the until-expr); namedChildren before
|
|
1209
|
+
* until_keyword are the body statements
|
|
1210
|
+
* - exit_statement: leaf, kind "exit"
|
|
1211
|
+
* - call_expression: op leaf (op map hit) or call leaf (callsite); already visited
|
|
1212
|
+
* - member_expression in statement position: same as call_expression
|
|
1213
|
+
*
|
|
1214
|
+
* P7.5: branching / looping nodes harvest expression-position record-ops /
|
|
1215
|
+
* callsites from their condition / range / iterable / expression fields into
|
|
1216
|
+
* `conditionLeaves`. Call / op leaves harvest from their `argument_list`. The
|
|
1217
|
+
* walker processes `conditionLeaves` at the position dictated by AL semantics
|
|
1218
|
+
* (pre-body for if / case / while / for / foreach; post-body for repeat;
|
|
1219
|
+
* pre-effect for call / op).
|
|
1220
|
+
*/
|
|
1221
|
+
function buildCFNForStatement(node: SyntaxNode): ControlFlowNode | null {
|
|
1222
|
+
const type = node.type;
|
|
1223
|
+
const anchor: ControlFlowNode["sourceAnchor"] = {
|
|
1224
|
+
sourceUnitId,
|
|
1225
|
+
range: nodeToSourceRange(node),
|
|
1226
|
+
enclosingRoutineId: routineId,
|
|
1227
|
+
syntaxKind: type,
|
|
1228
|
+
};
|
|
1229
|
+
|
|
1230
|
+
if (type === "if_statement") {
|
|
1231
|
+
// then_branch / else_branch may be EITHER a code_block (begin/end present) OR a
|
|
1232
|
+
// bare statement (begin/end omitted, e.g. `if X then Foo();`). Use
|
|
1233
|
+
// buildCFNForBranchBody to handle both shapes.
|
|
1234
|
+
const thenBranchNode = node.childForFieldName("then_branch");
|
|
1235
|
+
const elseBranchNode = node.childForFieldName("else_branch");
|
|
1236
|
+
const children = thenBranchNode !== null ? [buildCFNForBranchBody(thenBranchNode)] : [];
|
|
1237
|
+
const elseChildren =
|
|
1238
|
+
elseBranchNode !== null ? [buildCFNForBranchBody(elseBranchNode)] : undefined;
|
|
1239
|
+
// P7.5: harvest expression-position record-ops / callsites from the
|
|
1240
|
+
// condition field (e.g. `if Cust.FindSet() then ...`).
|
|
1241
|
+
const conditionLeaves: ControlFlowNode[] = [];
|
|
1242
|
+
harvestExpressionLeaves(node.childForFieldName("condition"), conditionLeaves);
|
|
1243
|
+
const result: ControlFlowNode = {
|
|
1244
|
+
kind: "if",
|
|
1245
|
+
sourceAnchor: anchor,
|
|
1246
|
+
children,
|
|
1247
|
+
...(elseChildren !== undefined ? { elseChildren } : {}),
|
|
1248
|
+
};
|
|
1249
|
+
if (conditionLeaves.length > 0) result.conditionLeaves = conditionLeaves;
|
|
1250
|
+
return result;
|
|
1251
|
+
}
|
|
1252
|
+
|
|
1253
|
+
if (type === "case_statement") {
|
|
1254
|
+
// Children of case_statement are case_branch and case_else_branch nodes.
|
|
1255
|
+
const branchCFNs: ControlFlowNode[] = [];
|
|
1256
|
+
for (const child of node.namedChildren) {
|
|
1257
|
+
if (child === null) continue;
|
|
1258
|
+
if (child.type === "case_branch" || child.type === "case_else_branch") {
|
|
1259
|
+
const cfn = buildCFNForCaseBranch(child);
|
|
1260
|
+
if (cfn !== null) branchCFNs.push(cfn);
|
|
1261
|
+
}
|
|
1262
|
+
}
|
|
1263
|
+
// P7.5: harvest from the case-value expression (e.g. `case Rec.Find('-') of`).
|
|
1264
|
+
const conditionLeaves: ControlFlowNode[] = [];
|
|
1265
|
+
harvestExpressionLeaves(node.childForFieldName("expression"), conditionLeaves);
|
|
1266
|
+
const result: ControlFlowNode = {
|
|
1267
|
+
kind: "case",
|
|
1268
|
+
sourceAnchor: anchor,
|
|
1269
|
+
children: branchCFNs,
|
|
1270
|
+
};
|
|
1271
|
+
if (conditionLeaves.length > 0) result.conditionLeaves = conditionLeaves;
|
|
1272
|
+
return result;
|
|
1273
|
+
}
|
|
1274
|
+
|
|
1275
|
+
if (type === "for_statement" || type === "foreach_statement" || type === "while_statement") {
|
|
1276
|
+
const kind: ControlFlowNode["kind"] =
|
|
1277
|
+
type === "for_statement" ? "for" : type === "foreach_statement" ? "foreach" : "while";
|
|
1278
|
+
// Loop `body` field may be either a code_block or a bare statement (no begin/end).
|
|
1279
|
+
const bodyNode = node.childForFieldName("body");
|
|
1280
|
+
const children = bodyNode !== null ? [buildCFNForBranchBody(bodyNode)] : [];
|
|
1281
|
+
// P7.5: harvest expression-position ops from the loop's controlling expressions:
|
|
1282
|
+
// - while: `condition` (evaluated before each iteration)
|
|
1283
|
+
// - for: `start` + `end` (`start` evaluated once at loop entry; `end` per
|
|
1284
|
+
// iteration in strict AL semantics, but the walker's fixed-point
|
|
1285
|
+
// naturally re-applies these each iteration, which is sound)
|
|
1286
|
+
// - foreach: `iterable` (evaluated once at loop entry — the iterator is
|
|
1287
|
+
// stepped each iteration, but the iterable's own evaluation is once;
|
|
1288
|
+
// still sound to apply per-iteration via the fixed-point)
|
|
1289
|
+
const conditionLeaves: ControlFlowNode[] = [];
|
|
1290
|
+
if (type === "while_statement") {
|
|
1291
|
+
harvestExpressionLeaves(node.childForFieldName("condition"), conditionLeaves);
|
|
1292
|
+
} else if (type === "for_statement") {
|
|
1293
|
+
harvestExpressionLeaves(node.childForFieldName("start"), conditionLeaves);
|
|
1294
|
+
harvestExpressionLeaves(node.childForFieldName("end"), conditionLeaves);
|
|
1295
|
+
} else {
|
|
1296
|
+
// foreach_statement
|
|
1297
|
+
harvestExpressionLeaves(node.childForFieldName("iterable"), conditionLeaves);
|
|
1298
|
+
}
|
|
1299
|
+
const result: ControlFlowNode = { kind, sourceAnchor: anchor, children };
|
|
1300
|
+
if (conditionLeaves.length > 0) result.conditionLeaves = conditionLeaves;
|
|
1301
|
+
return result;
|
|
1302
|
+
}
|
|
1303
|
+
|
|
1304
|
+
if (type === "repeat_statement") {
|
|
1305
|
+
// repeat_statement has no body field: body children are all namedChildren before
|
|
1306
|
+
// the until_keyword (which is followed by the condition expression).
|
|
1307
|
+
const bodyChildren: ControlFlowNode[] = [];
|
|
1308
|
+
for (const child of node.namedChildren) {
|
|
1309
|
+
if (child === null) continue;
|
|
1310
|
+
if (child.type === "until_keyword" || child.type === "repeat_keyword") continue;
|
|
1311
|
+
// Once we hit the condition (which follows until_keyword in the source), stop.
|
|
1312
|
+
// The condition field is the comparison/expression after "until". We detect this
|
|
1313
|
+
// by checking if we're at the condition field.
|
|
1314
|
+
if (node.childForFieldName("condition")?.startIndex === child.startIndex) break;
|
|
1315
|
+
const cfn = buildCFNForStatement(child);
|
|
1316
|
+
if (cfn !== null) bodyChildren.push(cfn);
|
|
1317
|
+
}
|
|
1318
|
+
// P7.5: harvest from the until-condition (e.g. `until Cust.Next() = 0`). The
|
|
1319
|
+
// walker processes these AFTER body each iteration (post-condition semantics).
|
|
1320
|
+
const conditionLeaves: ControlFlowNode[] = [];
|
|
1321
|
+
harvestExpressionLeaves(node.childForFieldName("condition"), conditionLeaves);
|
|
1322
|
+
const result: ControlFlowNode = {
|
|
1323
|
+
kind: "repeat",
|
|
1324
|
+
sourceAnchor: anchor,
|
|
1325
|
+
children: bodyChildren,
|
|
1326
|
+
};
|
|
1327
|
+
if (conditionLeaves.length > 0) result.conditionLeaves = conditionLeaves;
|
|
1328
|
+
return result;
|
|
1329
|
+
}
|
|
1330
|
+
|
|
1331
|
+
if (type === "try_statement") {
|
|
1332
|
+
// The AL grammar (tree-sitter-al / src/node-types.json) does not currently
|
|
1333
|
+
// define a `try_statement` node — AL's TryFunction is a [TryFunction] attribute
|
|
1334
|
+
// on a procedure declaration, and `asserterror_statement` is the closest
|
|
1335
|
+
// statement-level cousin. The branching-flag code in `visit` checks for
|
|
1336
|
+
// `try_statement` defensively in case the grammar adds it later; mirror that
|
|
1337
|
+
// here by emitting a "try" wrapper with no children rather than guessing the
|
|
1338
|
+
// field structure.
|
|
1339
|
+
// TODO P6.T2: if the AL grammar ever adds a real try_statement node, verify
|
|
1340
|
+
// its named fields with a grammar probe before populating `children`.
|
|
1341
|
+
return { kind: "try", sourceAnchor: anchor, children: [] };
|
|
1342
|
+
}
|
|
1343
|
+
|
|
1344
|
+
if (type === "exit_statement") {
|
|
1345
|
+
return { kind: "exit", sourceAnchor: anchor };
|
|
1346
|
+
}
|
|
1347
|
+
|
|
1348
|
+
// call_expression: check if we recorded an op or callsite at this node's id.
|
|
1349
|
+
if (type === "call_expression" || type === "member_expression") {
|
|
1350
|
+
// P7.5: harvest argument-position ops/calls (e.g. `Helper(Cust.FindSet())`)
|
|
1351
|
+
// AND chained-receiver ops/calls (e.g. `Helper(C).FindSet()`). The walker
|
|
1352
|
+
// processes both BEFORE applying the outer leaf's own effect. They live
|
|
1353
|
+
// in the outer leaf's `conditionLeaves` because there's no parent slot
|
|
1354
|
+
// at statement position (cf. expression-position harvest, where the
|
|
1355
|
+
// receiver leaf goes into the parent's conditionLeaves as a sibling).
|
|
1356
|
+
const argList = node.namedChildren.find((c) => c !== null && c.type === "argument_list");
|
|
1357
|
+
const preLeaves: ControlFlowNode[] = [];
|
|
1358
|
+
// Receiver first (evaluates first in AL), then arglist.
|
|
1359
|
+
harvestReceiverLeaves(node, preLeaves);
|
|
1360
|
+
if (argList) {
|
|
1361
|
+
for (const arg of argList.namedChildren) {
|
|
1362
|
+
if (arg) harvestExpressionLeaves(arg, preLeaves);
|
|
1363
|
+
}
|
|
1364
|
+
}
|
|
1365
|
+
const opId = opIdByNodeId.get(node.id);
|
|
1366
|
+
if (opId !== undefined) {
|
|
1367
|
+
const leaf: ControlFlowNode = { kind: "op", sourceAnchor: anchor, operationId: opId };
|
|
1368
|
+
if (preLeaves.length > 0) leaf.conditionLeaves = preLeaves;
|
|
1369
|
+
return leaf;
|
|
1370
|
+
}
|
|
1371
|
+
const csId = csIdByNodeId.get(node.id);
|
|
1372
|
+
if (csId !== undefined) {
|
|
1373
|
+
// Determine if this is an error-kind call (bare Error(...)).
|
|
1374
|
+
const funcNode = node.childForFieldName("function") ?? node.namedChildren[0];
|
|
1375
|
+
const isError = funcNode?.type === "identifier" && funcNode.text.toLowerCase() === "error";
|
|
1376
|
+
const leaf: ControlFlowNode = isError
|
|
1377
|
+
? { kind: "error", sourceAnchor: anchor, callsiteId: csId }
|
|
1378
|
+
: { kind: "call", sourceAnchor: anchor, callsiteId: csId };
|
|
1379
|
+
if (preLeaves.length > 0) leaf.conditionLeaves = preLeaves;
|
|
1380
|
+
return leaf;
|
|
1381
|
+
}
|
|
1382
|
+
// Unrecognised expression-position call (no op/callsite registered). Still
|
|
1383
|
+
// emit any nested argument-position / receiver leaves we found so they
|
|
1384
|
+
// aren't lost.
|
|
1385
|
+
if (preLeaves.length > 0) {
|
|
1386
|
+
return { kind: "other", sourceAnchor: anchor, conditionLeaves: preLeaves };
|
|
1387
|
+
}
|
|
1388
|
+
return { kind: "other", sourceAnchor: anchor };
|
|
1389
|
+
}
|
|
1390
|
+
|
|
1391
|
+
// Compound statements that contain a body (e.g. with_statement, asserterror_statement).
|
|
1392
|
+
// Recurse to expose any ops/calls inside. The `body` field may be a code_block
|
|
1393
|
+
// OR a bare statement — use buildCFNForBranchBody to handle both.
|
|
1394
|
+
if (type === "with_statement" || type === "asserterror_statement") {
|
|
1395
|
+
const bodyNode = node.childForFieldName("body");
|
|
1396
|
+
if (bodyNode !== null) {
|
|
1397
|
+
const children = [buildCFNForBranchBody(bodyNode)];
|
|
1398
|
+
return { kind: "other", sourceAnchor: anchor, children };
|
|
1399
|
+
}
|
|
1400
|
+
return { kind: "other", sourceAnchor: anchor };
|
|
1401
|
+
}
|
|
1402
|
+
|
|
1403
|
+
// Everything else (assignment, message, continue, break, empty, preproc, …):
|
|
1404
|
+
// treat as opaque straight-line "other". These don't affect control flow and
|
|
1405
|
+
// contain no ops/callsites at top-level (sub-expressions with calls are captured
|
|
1406
|
+
// separately by the visit pass, but we don't expose them in the tree for now).
|
|
1407
|
+
if (
|
|
1408
|
+
type === "begin_keyword" ||
|
|
1409
|
+
type === "end_keyword" ||
|
|
1410
|
+
type === "if_keyword" ||
|
|
1411
|
+
type === "then_keyword" ||
|
|
1412
|
+
type === "else_keyword" ||
|
|
1413
|
+
type === "case_keyword" ||
|
|
1414
|
+
type === "of_keyword" ||
|
|
1415
|
+
type === "repeat_keyword" ||
|
|
1416
|
+
type === "until_keyword" ||
|
|
1417
|
+
type === "while_keyword" ||
|
|
1418
|
+
type === "for_keyword" ||
|
|
1419
|
+
type === "do_keyword" ||
|
|
1420
|
+
type === "foreach_keyword" ||
|
|
1421
|
+
type === "in_keyword" ||
|
|
1422
|
+
type === "empty_statement"
|
|
1423
|
+
) {
|
|
1424
|
+
return null;
|
|
1425
|
+
}
|
|
1426
|
+
|
|
1427
|
+
return { kind: "other", sourceAnchor: anchor };
|
|
1428
|
+
}
|
|
1429
|
+
|
|
1430
|
+
/**
|
|
1431
|
+
* Build a ControlFlowNode for a case_branch or case_else_branch node.
|
|
1432
|
+
* - case_branch: field `body` — may be a code_block OR a bare statement (no begin/end).
|
|
1433
|
+
* - case_else_branch: no named body field. The body is the first non-keyword namedChild
|
|
1434
|
+
* (typically `else_keyword`, then either a `code_block` or a bare statement).
|
|
1435
|
+
*/
|
|
1436
|
+
function buildCFNForCaseBranch(node: SyntaxNode): ControlFlowNode | null {
|
|
1437
|
+
const anchor: ControlFlowNode["sourceAnchor"] = {
|
|
1438
|
+
sourceUnitId,
|
|
1439
|
+
range: nodeToSourceRange(node),
|
|
1440
|
+
enclosingRoutineId: routineId,
|
|
1441
|
+
syntaxKind: node.type,
|
|
1442
|
+
};
|
|
1443
|
+
let bodyNode: SyntaxNode | null = null;
|
|
1444
|
+
if (node.type === "case_branch") {
|
|
1445
|
+
bodyNode = node.childForFieldName("body");
|
|
1446
|
+
} else {
|
|
1447
|
+
// case_else_branch: prefer a code_block child, but if absent (single-statement
|
|
1448
|
+
// body, no begin/end), fall back to the first non-keyword namedChild as the body.
|
|
1449
|
+
for (const child of node.namedChildren) {
|
|
1450
|
+
if (child?.type === "code_block") {
|
|
1451
|
+
bodyNode = child;
|
|
1452
|
+
break;
|
|
1453
|
+
}
|
|
1454
|
+
}
|
|
1455
|
+
if (bodyNode === null) {
|
|
1456
|
+
for (const child of node.namedChildren) {
|
|
1457
|
+
if (child === null) continue;
|
|
1458
|
+
if (child.type === "else_keyword") continue;
|
|
1459
|
+
bodyNode = child;
|
|
1460
|
+
break;
|
|
1461
|
+
}
|
|
1462
|
+
}
|
|
1463
|
+
}
|
|
1464
|
+
const children = bodyNode !== null ? [buildCFNForBranchBody(bodyNode)] : [];
|
|
1465
|
+
return { kind: "case-branch", sourceAnchor: anchor, children };
|
|
1466
|
+
}
|
|
1467
|
+
}
|