al-sem 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (231) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +361 -0
  3. package/package.json +64 -0
  4. package/scripts/d40-diff.ts +44 -0
  5. package/scripts/fetch-native-parser.ts +179 -0
  6. package/scripts/precision-sample.ts +99 -0
  7. package/scripts/precision-study.ts +42 -0
  8. package/scripts/precision-tabulate.ts +52 -0
  9. package/src/cli/baseline.ts +31 -0
  10. package/src/cli/diff.ts +199 -0
  11. package/src/cli/events-chains.ts +56 -0
  12. package/src/cli/events-fanout.ts +87 -0
  13. package/src/cli/exit-code.ts +30 -0
  14. package/src/cli/fingerprint-indexes.ts +130 -0
  15. package/src/cli/fingerprint-query.ts +543 -0
  16. package/src/cli/fingerprint-witness.ts +493 -0
  17. package/src/cli/fingerprint.ts +292 -0
  18. package/src/cli/format-compact-json.ts +45 -0
  19. package/src/cli/format-events.ts +77 -0
  20. package/src/cli/format-fingerprint.ts +295 -0
  21. package/src/cli/format-html.ts +503 -0
  22. package/src/cli/format-json.ts +13 -0
  23. package/src/cli/format-policy.ts +95 -0
  24. package/src/cli/format-sarif.ts +186 -0
  25. package/src/cli/format-terminal.ts +153 -0
  26. package/src/cli/index.ts +566 -0
  27. package/src/cli/policy.ts +204 -0
  28. package/src/config/roots-config.ts +302 -0
  29. package/src/deps/cache-versions.ts +74 -0
  30. package/src/deps/canonical-json.ts +27 -0
  31. package/src/deps/dependency-artifact.ts +144 -0
  32. package/src/deps/dependency-cache.ts +262 -0
  33. package/src/deps/dependency-dag.ts +128 -0
  34. package/src/deps/dependency-package-discovery.ts +85 -0
  35. package/src/deps/dependency-pipeline.ts +483 -0
  36. package/src/deps/dependency-projection.ts +211 -0
  37. package/src/deps/dependency-resolver.ts +154 -0
  38. package/src/deps/workspace-dependencies.ts +114 -0
  39. package/src/detectors/capability-query.ts +145 -0
  40. package/src/detectors/confidence.ts +52 -0
  41. package/src/detectors/d1-db-op-in-loop.ts +457 -0
  42. package/src/detectors/d10-self-modifying-loop.ts +114 -0
  43. package/src/detectors/d11-modify-without-get.ts +129 -0
  44. package/src/detectors/d12-dead-integration-event.ts +81 -0
  45. package/src/detectors/d13-cross-app-internal-call.ts +105 -0
  46. package/src/detectors/d14-dead-routine.ts +151 -0
  47. package/src/detectors/d16-obsolete-routine-call.ts +94 -0
  48. package/src/detectors/d17-min-version-drift.ts +157 -0
  49. package/src/detectors/d18-constant-filter-in-loop.ts +151 -0
  50. package/src/detectors/d19-unused-parameter.ts +116 -0
  51. package/src/detectors/d2-event-fanout-in-loop.ts +240 -0
  52. package/src/detectors/d20-unreachable-after-exit.ts +92 -0
  53. package/src/detectors/d21-read-without-load.ts +128 -0
  54. package/src/detectors/d22-flowfield-without-calcfields.ts +168 -0
  55. package/src/detectors/d29-subscriber-modify-on-event-record.ts +163 -0
  56. package/src/detectors/d3-load-state.ts +72 -0
  57. package/src/detectors/d3-missing-setloadfields.ts +234 -0
  58. package/src/detectors/d32-constant-boolean-parameter.ts +185 -0
  59. package/src/detectors/d33-unfiltered-bulk-write.ts +173 -0
  60. package/src/detectors/d34-commit-in-loop.ts +206 -0
  61. package/src/detectors/d35-commit-in-event-subscriber.ts +138 -0
  62. package/src/detectors/d36-late-setloadfields.ts +162 -0
  63. package/src/detectors/d37-validate-without-persist.ts +271 -0
  64. package/src/detectors/d38-subscriber-to-obsolete-event.ts +140 -0
  65. package/src/detectors/d39-record-left-dirty-across-chain.ts +165 -0
  66. package/src/detectors/d4-repeated-lookup-in-loop.ts +128 -0
  67. package/src/detectors/d40-transitive-load-missing.ts +217 -0
  68. package/src/detectors/d41-transitive-filter-loss.ts +200 -0
  69. package/src/detectors/d42-cross-call-wrong-setloadfields.ts +243 -0
  70. package/src/detectors/d43-event-ishandled-skip.ts +257 -0
  71. package/src/detectors/d44-event-multi-subscriber-overlap.ts +223 -0
  72. package/src/detectors/d45-event-transitive-table-exposure.ts +159 -0
  73. package/src/detectors/d5-set-based-opportunity.ts +162 -0
  74. package/src/detectors/d7-recursive-event-expansion.ts +151 -0
  75. package/src/detectors/d8-commit-in-transaction.ts +132 -0
  76. package/src/detectors/d9-transaction-span-summary.ts +107 -0
  77. package/src/detectors/detector-context.ts +121 -0
  78. package/src/detectors/finding-grouping.ts +61 -0
  79. package/src/detectors/path-merge.ts +174 -0
  80. package/src/detectors/registry.ts +176 -0
  81. package/src/detectors/table-display.ts +42 -0
  82. package/src/diff/diff-abi.ts +195 -0
  83. package/src/diff/diff-capabilities.ts +179 -0
  84. package/src/diff/diff-engine.ts +146 -0
  85. package/src/diff/diff-events.ts +323 -0
  86. package/src/diff/diff-identity.ts +73 -0
  87. package/src/diff/diff-indexes.ts +199 -0
  88. package/src/diff/diff-permissions.ts +260 -0
  89. package/src/diff/diff-policy.ts +101 -0
  90. package/src/diff/diff-preflight.ts +66 -0
  91. package/src/diff/diff-renames.ts +104 -0
  92. package/src/diff/diff-schema.ts +232 -0
  93. package/src/diff/format-diff.ts +148 -0
  94. package/src/engine/attribute-parser.ts +50 -0
  95. package/src/engine/capability-cone.ts +531 -0
  96. package/src/engine/combined-graph.ts +357 -0
  97. package/src/engine/control-flow-walker.ts +1317 -0
  98. package/src/engine/dispatch-sites.ts +199 -0
  99. package/src/engine/effect-lattice.ts +81 -0
  100. package/src/engine/entry-points.ts +57 -0
  101. package/src/engine/event-flow.ts +524 -0
  102. package/src/engine/event-relay.ts +92 -0
  103. package/src/engine/op-classification.ts +92 -0
  104. package/src/engine/path-walker.ts +189 -0
  105. package/src/engine/reverse-call-graph.ts +23 -0
  106. package/src/engine/root-classifier-overlay.ts +194 -0
  107. package/src/engine/root-classifier.ts +135 -0
  108. package/src/engine/scc.ts +110 -0
  109. package/src/engine/source-anchor.ts +25 -0
  110. package/src/engine/summary-context.ts +104 -0
  111. package/src/engine/summary-engine.ts +296 -0
  112. package/src/engine/summary-runner.ts +560 -0
  113. package/src/engine/transaction-spans.ts +112 -0
  114. package/src/engine/uncertainty-util.ts +54 -0
  115. package/src/hash.ts +31 -0
  116. package/src/index/attribute-from-node.ts +141 -0
  117. package/src/index/callee-from-node.ts +181 -0
  118. package/src/index/capability/background.ts +90 -0
  119. package/src/index/capability/commit.ts +44 -0
  120. package/src/index/capability/dispatch.ts +164 -0
  121. package/src/index/capability/events.ts +65 -0
  122. package/src/index/capability/extractor.ts +124 -0
  123. package/src/index/capability/file-blob.ts +137 -0
  124. package/src/index/capability/http.ts +159 -0
  125. package/src/index/capability/hyperlink.ts +60 -0
  126. package/src/index/capability/isolated-storage.ts +179 -0
  127. package/src/index/capability/table.ts +113 -0
  128. package/src/index/capability/telemetry.ts +84 -0
  129. package/src/index/capability/ui.ts +55 -0
  130. package/src/index/capability/value-source.ts +202 -0
  131. package/src/index/expression-from-node.ts +117 -0
  132. package/src/index/indexer.ts +102 -0
  133. package/src/index/intraprocedural-body.ts +1467 -0
  134. package/src/index/intraprocedural-ops.ts +253 -0
  135. package/src/index/intraprocedural-refs.ts +188 -0
  136. package/src/index/object-indexer.ts +279 -0
  137. package/src/index/routine-indexer.ts +282 -0
  138. package/src/index/routine-signature.ts +46 -0
  139. package/src/index/variable-indexer.ts +134 -0
  140. package/src/index/variable-initializer-extractor.ts +155 -0
  141. package/src/index/variable-type-normalizer.ts +83 -0
  142. package/src/index.ts +267 -0
  143. package/src/mcp/server.ts +72 -0
  144. package/src/mcp/session.ts +49 -0
  145. package/src/mcp/tools/explain-path.ts +75 -0
  146. package/src/mcp/tools/get-analysis-health.ts +62 -0
  147. package/src/mcp/tools/get-finding.ts +47 -0
  148. package/src/mcp/tools/get-routine-summary.ts +126 -0
  149. package/src/mcp/tools/list-findings.ts +85 -0
  150. package/src/mcp/tools/list-hotspots.ts +78 -0
  151. package/src/mcp/tools/list-rollups.ts +103 -0
  152. package/src/mcp/tools/validators.ts +25 -0
  153. package/src/model/attributes.ts +120 -0
  154. package/src/model/callee.ts +45 -0
  155. package/src/model/capability.ts +187 -0
  156. package/src/model/coverage.ts +85 -0
  157. package/src/model/entities.ts +628 -0
  158. package/src/model/expression.ts +98 -0
  159. package/src/model/finding.ts +110 -0
  160. package/src/model/graph-edge.ts +93 -0
  161. package/src/model/graph.ts +62 -0
  162. package/src/model/identity.ts +81 -0
  163. package/src/model/ids.ts +90 -0
  164. package/src/model/index.ts +13 -0
  165. package/src/model/model.ts +51 -0
  166. package/src/model/permission.ts +76 -0
  167. package/src/model/root-classification.ts +116 -0
  168. package/src/model/stable-identity.ts +102 -0
  169. package/src/model/summary.ts +96 -0
  170. package/src/parser/ast.ts +82 -0
  171. package/src/parser/native/ffi.ts +145 -0
  172. package/src/parser/native/parse-index-pool.ts +148 -0
  173. package/src/parser/native/parse-index-worker.ts +94 -0
  174. package/src/parser/native/wrapper.ts +353 -0
  175. package/src/parser/parser-init.ts +43 -0
  176. package/src/perf/profiler.ts +66 -0
  177. package/src/policy/policy-default.yaml +83 -0
  178. package/src/policy/policy-engine.ts +339 -0
  179. package/src/policy/policy-loader.ts +257 -0
  180. package/src/policy/policy-schema.json +379 -0
  181. package/src/policy/policy-types.ts +81 -0
  182. package/src/policy/predicate-compiler.ts +151 -0
  183. package/src/policy/predicate-evaluator.ts +267 -0
  184. package/src/policy/predicate-fields.ts +439 -0
  185. package/src/projection/actionable-anchor.ts +48 -0
  186. package/src/projection/finding-filters.ts +44 -0
  187. package/src/projection/finding-fingerprint.ts +54 -0
  188. package/src/projection/finding-groups.ts +41 -0
  189. package/src/projection/finding-summary.ts +110 -0
  190. package/src/projection/rollup-findings.ts +105 -0
  191. package/src/providers/discover.ts +88 -0
  192. package/src/providers/external.ts +46 -0
  193. package/src/providers/types.ts +36 -0
  194. package/src/providers/workspace.ts +117 -0
  195. package/src/resolve/call-resolver.ts +117 -0
  196. package/src/resolve/coverage.ts +61 -0
  197. package/src/resolve/event-graph.ts +166 -0
  198. package/src/resolve/implicit-edges.ts +53 -0
  199. package/src/resolve/record-types.ts +36 -0
  200. package/src/resolve/resolver.ts +23 -0
  201. package/src/resolve/semantic-graph.ts +29 -0
  202. package/src/resolve/symbol-table.ts +69 -0
  203. package/src/snapshot/app-snapshot.ts +74 -0
  204. package/src/snapshot/compose.ts +100 -0
  205. package/src/snapshot/derive/callsite-evidence.ts +76 -0
  206. package/src/snapshot/derive/capability-facts.ts +70 -0
  207. package/src/snapshot/derive/contracts.ts +131 -0
  208. package/src/snapshot/derive/coverage.ts +35 -0
  209. package/src/snapshot/derive/event-declarations.ts +140 -0
  210. package/src/snapshot/derive/identity-table.ts +58 -0
  211. package/src/snapshot/derive/inputs.ts +91 -0
  212. package/src/snapshot/derive/operation-evidence.ts +70 -0
  213. package/src/snapshot/derive/permissions.ts +186 -0
  214. package/src/snapshot/derive/root-classifications.ts +56 -0
  215. package/src/snapshot/derive/schema.ts +130 -0
  216. package/src/snapshot/derive/typed-edges.ts +60 -0
  217. package/src/snapshot/derive/workspace-fingerprint.ts +19 -0
  218. package/src/snapshot/deserialize.ts +40 -0
  219. package/src/snapshot/serialize-cbor-gz.ts +12 -0
  220. package/src/snapshot/serialize-cbor.ts +19 -0
  221. package/src/snapshot/serialize-json.ts +22 -0
  222. package/src/snapshot/shard.ts +134 -0
  223. package/src/snapshot/types.ts +181 -0
  224. package/src/symbols/app-manifest.ts +96 -0
  225. package/src/symbols/app-package-zip.ts +50 -0
  226. package/src/symbols/embedded-source-reader.ts +41 -0
  227. package/src/symbols/package-hash.ts +81 -0
  228. package/src/symbols/symbol-reader.ts +101 -0
  229. package/src/symbols/symbol-reference-parser.ts +378 -0
  230. package/src/symbols/symbol-reference-reader.ts +27 -0
  231. package/tsconfig.json +18 -0
@@ -0,0 +1,1467 @@
1
+ // src/index/intraprocedural-body.ts
2
+ // Single-DFS body walker: loops + record ops + commits + call sites + field accesses.
3
+ //
4
+ // Previously the routine-indexer drove TWO separate full-body traversals — one in
5
+ // `extractOpsAndLoops` (loops + record-ops + commits) and one in `extractRefs` (call
6
+ // sites + field accesses). Each traversal walked `namedChildren` recursively through
7
+ // FFI; combined, they did 2× the work. On Microsoft Base Application that was a
8
+ // significant chunk of the per-file cost.
9
+ //
10
+ // This module fuses both passes. The walker maintains an ambient `loopStack` (so
11
+ // call sites' `loopStack` is populated directly — no post-pass back-fill), and it
12
+ // handles `call_expression` nodes structurally: only the argument list is recursed
13
+ // into, so the function-callee subtree (`member_expression` or `identifier`) is
14
+ // never re-examined as a stray field access. That structural skip removes the
15
+ // previous `node.parent`/`.id`-based "is this a callee?" check that called FFI on
16
+ // every member-expression descendant.
17
+
18
+ import type {
19
+ CallArgumentBinding,
20
+ CallSite,
21
+ ConditionReference,
22
+ ControlFlowNode,
23
+ FieldAccess,
24
+ LoopNode,
25
+ LoopType,
26
+ OperationSite,
27
+ ParameterSymbol,
28
+ RecordOpType,
29
+ RecordOperation,
30
+ RecordVariable,
31
+ UnreachableStatement,
32
+ VarAssignment,
33
+ } from "../model/entities.ts";
34
+ import type { ExpressionInfo } from "../model/expression.ts";
35
+ import {
36
+ type LoopId,
37
+ type RoutineId,
38
+ encodeCallsiteId,
39
+ encodeLoopId,
40
+ encodeOperationId,
41
+ } from "../model/ids.ts";
42
+ import { nodeToSourceRange } from "../parser/ast.ts";
43
+ import type { Node as SyntaxNode } from "../parser/native/wrapper.ts";
44
+ import { calleeFromNode } from "./callee-from-node.ts";
45
+ import { expressionInfoFromNode } from "./expression-from-node.ts";
46
+
47
+ const LOOP_TYPE_MAP: Record<string, LoopType> = {
48
+ repeat_statement: "repeat",
49
+ for_statement: "for",
50
+ foreach_statement: "foreach",
51
+ while_statement: "while",
52
+ };
53
+
54
+ /** Canonical record-op name (lowercase) -> properly-cased RecordOpType. */
55
+ const RECORD_OP_MAP: Record<string, RecordOpType> = {
56
+ findset: "FindSet",
57
+ findfirst: "FindFirst",
58
+ findlast: "FindLast",
59
+ find: "Find",
60
+ get: "Get",
61
+ calcfields: "CalcFields",
62
+ calcsums: "CalcSums",
63
+ testfield: "TestField",
64
+ modify: "Modify",
65
+ modifyall: "ModifyAll",
66
+ insert: "Insert",
67
+ delete: "Delete",
68
+ deleteall: "DeleteAll",
69
+ setloadfields: "SetLoadFields",
70
+ addloadfields: "AddLoadFields",
71
+ setrange: "SetRange",
72
+ setfilter: "SetFilter",
73
+ setcurrentkey: "SetCurrentKey",
74
+ reset: "Reset",
75
+ copy: "Copy",
76
+ transferfields: "TransferFields",
77
+ validate: "Validate",
78
+ init: "Init",
79
+ next: "Next",
80
+ count: "Count",
81
+ countapprox: "CountApprox",
82
+ isempty: "IsEmpty",
83
+ locktable: "LockTable",
84
+ };
85
+
86
+ /**
87
+ * Record ops for which we capture all field arguments.
88
+ *
89
+ * Includes `Get`, `Find`, `FindFirst`, `FindLast`, `FindSet` so that D4 (repeated
90
+ * identical lookup in a loop) can read the first argument (the key expression) and
91
+ * detect duplicate calls with the same literal key.
92
+ */
93
+ const FIELD_ARGS_OPS = new Set([
94
+ "SetRange",
95
+ "SetFilter",
96
+ "SetLoadFields",
97
+ "AddLoadFields",
98
+ "SetCurrentKey",
99
+ "Validate",
100
+ "Get",
101
+ "Find",
102
+ "FindFirst",
103
+ "FindLast",
104
+ "FindSet",
105
+ "CalcFields",
106
+ "CalcSums",
107
+ "TestField",
108
+ ]);
109
+
110
+ export interface ExtractBodyResult {
111
+ loops: LoopNode[];
112
+ operationSites: OperationSite[];
113
+ recordOperations: RecordOperation[];
114
+ callSites: CallSite[];
115
+ fieldAccesses: FieldAccess[];
116
+ unreachableStatements: UnreachableStatement[];
117
+ /** True iff the visitor saw any branching node (if/case/case_branch/try). */
118
+ hasBranching: boolean;
119
+ /**
120
+ * Compact control-flow tree rooted at the routine's code_block. Populated
121
+ * alongside the flat feature lists and consumed by the path-aware walker in
122
+ * P6.T2. Uses the same op/callsite IDs as the flat lists.
123
+ */
124
+ statementTree: ControlFlowNode;
125
+ /**
126
+ * Set of identifier names referenced as values in the body — lowercased,
127
+ * sorted, deduped. Excludes member-of-member-expression (field/method names),
128
+ * value-of-qualified-enum (enum members), and bare-type-name in enum-type
129
+ * position. Used by D19 (unused parameter) and any detector that needs a
130
+ * cheap "is this name referenced anywhere?" query.
131
+ */
132
+ identifierReferences: string[];
133
+ /**
134
+ * Var-assignment stream — every `<lhs> := <rhs>` statement in the body
135
+ * where the LHS is an identifier or member_expression. RHS literal is
136
+ * captured when the RHS node is a boolean / integer / string_literal.
137
+ * Sorted by source position. Phase 3 D43 consumes this.
138
+ */
139
+ varAssignments: VarAssignment[];
140
+ /**
141
+ * Identifiers referenced in a control-predicate position (if-test,
142
+ * while-test, repeat-until, case-of subject). Sorted by referenceAnchor.
143
+ * Phase 3.1 D43 dispatch-site guard detection consumes this.
144
+ */
145
+ conditionReferences: ConditionReference[];
146
+ }
147
+
148
+ /**
149
+ * Build the per-argument structural bindings for a callsite. Caller-side fields
150
+ * only — callee-side var-ness and bindingResolution upgrades happen later when
151
+ * the call-resolver matches the callsite against a callee signature.
152
+ */
153
+ function extractArgumentBindings(
154
+ argumentNodes: SyntaxNode[],
155
+ enclosingRoutineParameters: ParameterSymbol[],
156
+ enclosingRoutineRecordVariables: RecordVariable[],
157
+ sourceUnitId: string,
158
+ routineId: RoutineId,
159
+ ): CallArgumentBinding[] {
160
+ const recVarByLcName = new Map(
161
+ enclosingRoutineRecordVariables.map((rv) => [rv.name.toLowerCase(), rv]),
162
+ );
163
+ const paramByLcName = new Map(enclosingRoutineParameters.map((p) => [p.name.toLowerCase(), p]));
164
+ return argumentNodes.map((argNode, parameterIndex): CallArgumentBinding => {
165
+ const text = argNode.text.trim();
166
+ const argumentAnchor: CallArgumentBinding["argumentAnchor"] = {
167
+ sourceUnitId,
168
+ range: nodeToSourceRange(argNode),
169
+ enclosingRoutineId: routineId,
170
+ syntaxKind: argNode.type,
171
+ };
172
+ // Only bare-identifier args can bind to a record/parameter symbol. The grammar
173
+ // surfaces this directly as `argNode.type === "identifier"` — anything else
174
+ // (literals, calls, member expressions, quoted identifiers, etc.) is a value
175
+ // expression that flows by-value and can't carry record-flow semantics.
176
+ if (argNode.type !== "identifier") {
177
+ return {
178
+ parameterIndex,
179
+ sourceKind: "expression",
180
+ calleeParameterIsVar: false,
181
+ argumentAnchor,
182
+ bindingResolution: "non-record-arg",
183
+ };
184
+ }
185
+ const lcName = text.toLowerCase();
186
+ const recVar = recVarByLcName.get(lcName);
187
+ const param = paramByLcName.get(lcName);
188
+ const sourceKind: CallArgumentBinding["sourceKind"] =
189
+ param !== undefined
190
+ ? "parameter"
191
+ : recVar !== undefined
192
+ ? "local"
193
+ : lcName === "rec" || lcName === "xrec"
194
+ ? "implicit-rec"
195
+ : "unknown";
196
+ return {
197
+ parameterIndex,
198
+ sourceKind,
199
+ // Per the field doc on CallArgumentBinding, `sourceVariableName` is set for
200
+ // named-variable kinds only. An identifier we cannot resolve to any local /
201
+ // parameter / implicit-Rec gets sourceKind === "unknown" and no name.
202
+ sourceVariableName: sourceKind === "unknown" ? undefined : lcName,
203
+ sourceRecordVariableId: recVar?.id,
204
+ sourceParameterIndex: param?.index,
205
+ callerSourceParameterIsVar: param?.isVar,
206
+ // `undefined` here means "not yet resolved" — the Phase 2 call-resolver will
207
+ // upgrade it. `"unknown"` is reserved for "tried and failed to resolve",
208
+ // which is a Phase 2+ outcome.
209
+ sourceTableId: recVar?.tableId,
210
+ sourceTempState: recVar?.tempState,
211
+ argumentAnchor,
212
+ calleeParameterIsVar: false, // upgraded by call-resolver later
213
+ bindingResolution: recVar !== undefined ? "unresolved-callee" : "non-record-arg",
214
+ };
215
+ });
216
+ }
217
+
218
+ /**
219
+ * Extract the LHS identifier name from an assignment target node.
220
+ * - `identifier` → the identifier text directly.
221
+ * - `member_expression` → the trailing member field (`Rec.Field := ...` → "field").
222
+ * Returns `undefined` for any other shape.
223
+ */
224
+ function lhsIdentifierOf(target: SyntaxNode): string | undefined {
225
+ if (target.type === "identifier") return target.text;
226
+ if (target.type === "member_expression") {
227
+ const member =
228
+ target.childForFieldName("member") ?? target.namedChild(target.namedChildCount - 1);
229
+ return member?.text;
230
+ }
231
+ return undefined;
232
+ }
233
+
234
+ /**
235
+ * Extract a lowercased literal string from an RHS value node, or `undefined`
236
+ * when the node is not a recognized literal.
237
+ *
238
+ * Tree-sitter-al literal types (confirmed via probe):
239
+ * - `boolean` → "true" / "false" (lowercased)
240
+ * - `integer` → decimal digits
241
+ * - `string_literal` → AL single-quoted string; surrounding `'` are stripped
242
+ * and the bare content is lowercased
243
+ */
244
+ function literalTextOf(value: SyntaxNode): string | undefined {
245
+ if (value.type === "boolean") return value.text.toLowerCase();
246
+ if (value.type === "integer") return value.text;
247
+ if (value.type === "string_literal") {
248
+ // AL string literals are single-quoted (e.g. `'foo'`). Strip exactly one
249
+ // leading and trailing `'` before lowercasing so consumers see the bare
250
+ // content per `VarAssignment.rhsLiteralValue`'s "lowercased" contract.
251
+ const raw = value.text;
252
+ const stripped =
253
+ raw.length >= 2 && raw.startsWith("'") && raw.endsWith("'") ? raw.slice(1, -1) : raw;
254
+ return stripped.toLowerCase();
255
+ }
256
+ return undefined;
257
+ }
258
+
259
+ /**
260
+ * Collect every `<lhs> := <rhs>` assignment in the routine body where the LHS
261
+ * is an `identifier` or `member_expression`. Results are sorted by source
262
+ * position for deterministic output.
263
+ */
264
+ function collectVarAssignments(
265
+ bodyNode: SyntaxNode,
266
+ routineId: RoutineId,
267
+ sourceUnitId: string,
268
+ ): VarAssignment[] {
269
+ const out: VarAssignment[] = [];
270
+ const stack: SyntaxNode[] = [bodyNode];
271
+ while (stack.length > 0) {
272
+ const n = stack.pop();
273
+ if (!n) continue;
274
+ if (n.type === "assignment_statement") {
275
+ // Match the field-accessor pattern in variable-initializer-extractor.ts:
276
+ // prefer named-field accessors, fall back to positional named children.
277
+ // This insulates the indexer from minor grammar-field renames.
278
+ const target =
279
+ n.childForFieldName("left") ?? n.childForFieldName("target") ?? n.namedChild(0);
280
+ const value =
281
+ n.childForFieldName("right") ??
282
+ n.childForFieldName("value") ??
283
+ n.namedChild(n.namedChildCount - 1);
284
+ if (target && value) {
285
+ const lhsName = lhsIdentifierOf(target);
286
+ if (lhsName !== undefined) {
287
+ out.push({
288
+ lhsName: lhsName.toLowerCase(),
289
+ rhsLiteralValue: literalTextOf(value),
290
+ sourceAnchor: {
291
+ sourceUnitId,
292
+ range: nodeToSourceRange(n),
293
+ enclosingRoutineId: routineId,
294
+ syntaxKind: n.type,
295
+ },
296
+ });
297
+ }
298
+ }
299
+ }
300
+ for (let i = 0; i < n.namedChildCount; i++) {
301
+ const c = n.namedChild(i);
302
+ if (c) stack.push(c);
303
+ }
304
+ }
305
+ out.sort((a, b) => {
306
+ const ar = a.sourceAnchor.range;
307
+ const br = b.sourceAnchor.range;
308
+ return ar.startLine - br.startLine || ar.startColumn - br.startColumn;
309
+ });
310
+ return out;
311
+ }
312
+
313
+ /**
314
+ * Collect every identifier referenced in a control-predicate position in the
315
+ * routine body: if-test, while-test, repeat-until, case-of subject. Walks
316
+ * compound expressions recursively so `A and not IsHandled` captures both
317
+ * identifiers. For member expressions, only the trailing member name is
318
+ * captured (the receiver is not a predicate operand). Results are sorted by
319
+ * referenceAnchor position for deterministic output.
320
+ *
321
+ * Grammar field names (confirmed by probe):
322
+ * if_statement → childForFieldName("condition")
323
+ * while_statement → childForFieldName("condition")
324
+ * repeat_statement → childForFieldName("condition") (the until-expr)
325
+ * case_statement → childForFieldName("expression") (NOT "subject")
326
+ */
327
+ function collectConditionReferences(
328
+ bodyNode: SyntaxNode,
329
+ routineId: RoutineId,
330
+ sourceUnitId: string,
331
+ ): ConditionReference[] {
332
+ const out: ConditionReference[] = [];
333
+
334
+ function sourceAnchorOf(n: SyntaxNode): ConditionReference["referenceAnchor"] {
335
+ return {
336
+ sourceUnitId,
337
+ range: nodeToSourceRange(n),
338
+ enclosingRoutineId: routineId,
339
+ syntaxKind: n.type,
340
+ };
341
+ }
342
+
343
+ function collectIdentifiersInExpression(
344
+ expr: SyntaxNode | null,
345
+ kind: ConditionReference["conditionKind"],
346
+ stmt: ConditionReference["statementAnchor"],
347
+ ): void {
348
+ if (!expr) return;
349
+ if (expr.type === "identifier") {
350
+ out.push({
351
+ identifier: expr.text.toLowerCase(),
352
+ conditionKind: kind,
353
+ statementAnchor: stmt,
354
+ referenceAnchor: sourceAnchorOf(expr),
355
+ });
356
+ return;
357
+ }
358
+ if (expr.type === "member_expression") {
359
+ const member = expr.childForFieldName("member") ?? expr.namedChild(expr.namedChildCount - 1);
360
+ if (member?.type === "identifier") {
361
+ out.push({
362
+ identifier: member.text.toLowerCase(),
363
+ conditionKind: kind,
364
+ statementAnchor: stmt,
365
+ referenceAnchor: sourceAnchorOf(member),
366
+ });
367
+ }
368
+ return;
369
+ }
370
+ for (let i = 0; i < expr.namedChildCount; i++) {
371
+ collectIdentifiersInExpression(expr.namedChild(i), kind, stmt);
372
+ }
373
+ }
374
+
375
+ const stack: SyntaxNode[] = [bodyNode];
376
+ while (stack.length > 0) {
377
+ const n = stack.pop();
378
+ if (!n) continue;
379
+ if (n.type === "if_statement") {
380
+ const cond = n.childForFieldName("condition");
381
+ if (cond) {
382
+ const stmt = sourceAnchorOf(n);
383
+ collectIdentifiersInExpression(cond, "if", stmt);
384
+ }
385
+ } else if (n.type === "while_statement") {
386
+ const cond = n.childForFieldName("condition");
387
+ if (cond) {
388
+ const stmt = sourceAnchorOf(n);
389
+ collectIdentifiersInExpression(cond, "while", stmt);
390
+ }
391
+ } else if (n.type === "repeat_statement") {
392
+ const cond = n.childForFieldName("condition");
393
+ if (cond) {
394
+ const stmt = sourceAnchorOf(n);
395
+ collectIdentifiersInExpression(cond, "repeat-until", stmt);
396
+ }
397
+ } else if (n.type === "case_statement") {
398
+ // field name is "expression" (confirmed by grammar probe)
399
+ const subj = n.childForFieldName("expression");
400
+ if (subj) {
401
+ const stmt = sourceAnchorOf(n);
402
+ collectIdentifiersInExpression(subj, "case", stmt);
403
+ }
404
+ }
405
+ // Always recurse into all named children to find nested statements
406
+ for (let i = 0; i < n.namedChildCount; i++) {
407
+ const c = n.namedChild(i);
408
+ if (c) stack.push(c);
409
+ }
410
+ }
411
+
412
+ out.sort((a, b) => {
413
+ const ar = a.referenceAnchor.range;
414
+ const br = b.referenceAnchor.range;
415
+ return ar.startLine - br.startLine || ar.startColumn - br.startColumn;
416
+ });
417
+ return out;
418
+ }
419
+
420
+ /**
421
+ * Walk a routine body once, emitting every L1 feature the indexer needs.
422
+ *
423
+ * Operation-id ordering matches the previous two-pass implementation: record-ops
424
+ * and commits get ids 0..N-1 (in body DFS order), call sites get ids N..N+M-1
425
+ * (also in body DFS order). The two streams are collected during the walk and
426
+ * assigned final operation ids after — that's why call-site `operationId` is
427
+ * filled in post-walk.
428
+ */
429
+ export function extractBodyFeatures(
430
+ bodyNode: SyntaxNode,
431
+ routineId: RoutineId,
432
+ sourceUnitId: string,
433
+ recordVarNames: Set<string>,
434
+ enclosingParameters: ParameterSymbol[],
435
+ enclosingRecordVariables: RecordVariable[],
436
+ ): ExtractBodyResult {
437
+ const loops: LoopNode[] = [];
438
+ const operationSites: OperationSite[] = [];
439
+ const recordOperations: RecordOperation[] = [];
440
+ const callSites: CallSite[] = [];
441
+ const fieldAccesses: FieldAccess[] = [];
442
+ const unreachableStatements: UnreachableStatement[] = [];
443
+ const identifierRefSet = new Set<string>();
444
+
445
+ /**
446
+ * Walk an arbitrary subtree and add every value-position identifier to
447
+ * `identifierRefSet`. Used inside `handleCallExpression` for the callee subtree
448
+ * (which the main visitor skips for performance) so the receiver in
449
+ * `Other.SetRange(...)` and the callee name in `Helper(x)` are captured.
450
+ *
451
+ * Same exclusion rules as the identifier hook in `visit()` — skips the
452
+ * `member` field of `member_expression`, the `value` field of
453
+ * `qualified_enum_value`, and bare type names in `enum_type` position.
454
+ */
455
+ function collectIdentifiersFrom(root: SyntaxNode): void {
456
+ const stack: { node: SyntaxNode; parent: SyntaxNode | null }[] = [{ node: root, parent: null }];
457
+ while (stack.length > 0) {
458
+ const item = stack.pop();
459
+ if (item === undefined) continue;
460
+ const { node, parent } = item;
461
+ if (node.type === "identifier" && parent !== null) {
462
+ const parentType = parent.type;
463
+ let isValueRef = true;
464
+ if (parentType === "member_expression") {
465
+ const memberField = parent.childForFieldName("member");
466
+ if (memberField !== null && memberField.startIndex === node.startIndex) {
467
+ isValueRef = false;
468
+ }
469
+ } else if (parentType === "qualified_enum_value") {
470
+ const valueField = parent.childForFieldName("value");
471
+ const enumTypeField = parent.childForFieldName("enum_type");
472
+ if (valueField !== null && valueField.startIndex === node.startIndex) {
473
+ isValueRef = false;
474
+ } else if (
475
+ enumTypeField !== null &&
476
+ enumTypeField.startIndex === node.startIndex &&
477
+ enumTypeField.type === "identifier"
478
+ ) {
479
+ isValueRef = false;
480
+ }
481
+ }
482
+ if (isValueRef) identifierRefSet.add(node.text.toLowerCase());
483
+ }
484
+ for (const child of node.namedChildren) {
485
+ if (child !== null) stack.push({ node: child, parent: node });
486
+ }
487
+ }
488
+ }
489
+
490
+ const loopStack: LoopId[] = [];
491
+ let opIndex = 0;
492
+ let csIndex = 0;
493
+ let unreachableIndex = 0;
494
+ let hasBranching = false;
495
+
496
+ // Maps from tree-sitter Node.id (unique per node within a tree) to the op/callsite
497
+ // ID assigned during the visit pass. Used post-visit to build the statement tree
498
+ // without a second DFS.
499
+ //
500
+ // Keyed by `node.id` rather than `startIndex` because chained-receiver expressions
501
+ // (e.g. `Helper(C).FindSet()`) produce nested `call_expression` nodes that share
502
+ // the same `startIndex` — keying by `startIndex` collapses them and silently drops
503
+ // the inner call. `Node.id` is a per-tree interned identifier that's unique even
504
+ // for nodes spanning the same source position.
505
+ const opIdByNodeId = new Map<number, string>();
506
+ const csIdByNodeId = new Map<number, string>();
507
+
508
+ /**
509
+ * Classify a statement node as an unconditional exit from the enclosing routine.
510
+ * Recognised shapes (case-insensitive):
511
+ * - `exit_statement` (covers `Exit;` and `Exit(value);`)
512
+ * - bare `call_expression` whose callee identifier is `Error` (any arity)
513
+ * - `call_expression` whose callee is a `member_expression` `<receiver>.Quit`
514
+ * where receiver is `CurrReport` (the report-runtime exit)
515
+ * Returns `undefined` for non-exit statements.
516
+ */
517
+ function unconditionalExitKind(node: SyntaxNode): UnreachableStatement["exitKind"] | undefined {
518
+ if (node.type === "exit_statement") return "exit";
519
+ if (node.type !== "call_expression") return undefined;
520
+ const funcNode = node.childForFieldName("function") ?? node.namedChildren[0];
521
+ if (!funcNode) return undefined;
522
+ if (funcNode.type === "identifier" && funcNode.text.toLowerCase() === "error") {
523
+ return "error";
524
+ }
525
+ if (funcNode.type === "member_expression") {
526
+ const objNode = funcNode.childForFieldName("object") ?? funcNode.namedChildren[0];
527
+ const memberNode = funcNode.childForFieldName("member") ?? funcNode.namedChildren[1];
528
+ if (
529
+ objNode &&
530
+ memberNode &&
531
+ objNode.text.toLowerCase() === "currreport" &&
532
+ memberNode.text.toLowerCase() === "quit"
533
+ ) {
534
+ return "currreport-quit";
535
+ }
536
+ }
537
+ return undefined;
538
+ }
539
+
540
+ function handleCallExpression(node: SyntaxNode, nodeType: string): void {
541
+ const funcNode = node.childForFieldName("function") ?? node.namedChildren[0];
542
+ if (!funcNode) return;
543
+
544
+ // Collect identifier-uses from the callee subtree. The main visitor's
545
+ // "recurse only into argument_list" optimization (below) intentionally
546
+ // skips the function-callee subtree to avoid re-emitting its member
547
+ // expressions as stray field accesses — but D19 still needs to see
548
+ // receivers (`Other` in `Other.SetRange(...)`) and bare callee names
549
+ // (`Helper` in `Helper(x)`) as identifier references.
550
+ collectIdentifiersFrom(funcNode);
551
+
552
+ if (funcNode.type === "member_expression") {
553
+ const memberNode = funcNode.childForFieldName("member") ?? funcNode.namedChildren[1];
554
+ if (!memberNode) return;
555
+ const methodLc = memberNode.text.toLowerCase();
556
+ const opType = RECORD_OP_MAP[methodLc];
557
+ if (opType !== undefined) {
558
+ // Record-op site (e.g. SalesLine.SetRange(...)).
559
+ const objNode = funcNode.childForFieldName("object") ?? funcNode.namedChildren[0];
560
+ const receiver = objNode ? objNode.text : "";
561
+ let fieldArguments: string[] | undefined;
562
+ let fieldArgumentInfos: ExpressionInfo[] | undefined;
563
+ if (FIELD_ARGS_OPS.has(opType)) {
564
+ const argListNode = node.namedChildren.find(
565
+ (c) => c !== null && c.type === "argument_list",
566
+ );
567
+ if (argListNode) {
568
+ const args: string[] = [];
569
+ const infos: ExpressionInfo[] = [];
570
+ for (const arg of argListNode.namedChildren) {
571
+ if (arg !== null) {
572
+ args.push(arg.text);
573
+ infos.push(expressionInfoFromNode(arg));
574
+ }
575
+ }
576
+ fieldArguments = args;
577
+ fieldArgumentInfos = infos;
578
+ }
579
+ }
580
+ const anchor = {
581
+ sourceUnitId,
582
+ range: nodeToSourceRange(node),
583
+ enclosingRoutineId: routineId,
584
+ syntaxKind: nodeType,
585
+ };
586
+ const opId = encodeOperationId(routineId, opIndex++);
587
+ opIdByNodeId.set(node.id, opId);
588
+ const snapshotLoopStack = loopStack.slice();
589
+ recordOperations.push({
590
+ id: opId,
591
+ routineId,
592
+ op: opType,
593
+ recordVariableName: receiver,
594
+ tempState: { kind: "unknown" },
595
+ fieldArguments,
596
+ ...(fieldArgumentInfos !== undefined ? { fieldArgumentInfos } : {}),
597
+ loopStack: snapshotLoopStack,
598
+ sourceAnchor: anchor,
599
+ });
600
+ operationSites.push({
601
+ id: opId,
602
+ routineId,
603
+ kind: opType === "LockTable" ? "lock" : "record-op",
604
+ sourceAnchor: anchor,
605
+ loopStack: snapshotLoopStack,
606
+ });
607
+ } else {
608
+ // Member call that is NOT a record op — emit as a CallSite.
609
+ const argListNode = node.namedChildren.find(
610
+ (c) => c !== null && c.type === "argument_list",
611
+ );
612
+ const argumentTexts: string[] = [];
613
+ const argumentInfos: ExpressionInfo[] = [];
614
+ const argNodes: SyntaxNode[] = [];
615
+ if (argListNode) {
616
+ for (const arg of argListNode.namedChildren) {
617
+ if (arg !== null) {
618
+ argumentTexts.push(arg.text);
619
+ argumentInfos.push(expressionInfoFromNode(arg));
620
+ argNodes.push(arg);
621
+ }
622
+ }
623
+ }
624
+ const csId = encodeCallsiteId(routineId, csIndex++);
625
+ csIdByNodeId.set(node.id, csId);
626
+ callSites.push({
627
+ id: csId,
628
+ operationId: "", // filled in post-walk; see assignCallSiteOperationIds below
629
+ calleeText: funcNode.text,
630
+ callee: calleeFromNode(node),
631
+ argumentTexts,
632
+ argumentInfos,
633
+ argumentBindings: extractArgumentBindings(
634
+ argNodes,
635
+ enclosingParameters,
636
+ enclosingRecordVariables,
637
+ sourceUnitId,
638
+ routineId,
639
+ ),
640
+ loopStack: loopStack.slice(),
641
+ sourceAnchor: {
642
+ sourceUnitId,
643
+ range: nodeToSourceRange(node),
644
+ enclosingRoutineId: routineId,
645
+ syntaxKind: nodeType,
646
+ },
647
+ });
648
+ }
649
+ } else if (funcNode.type === "identifier") {
650
+ // Bare call: plain identifier (e.g. EnrichLine, Message, Error, Commit).
651
+ const methodText = funcNode.text;
652
+ if (methodText.toLowerCase() === "commit") {
653
+ const opId = encodeOperationId(routineId, opIndex++);
654
+ opIdByNodeId.set(node.id, opId);
655
+ operationSites.push({
656
+ id: opId,
657
+ routineId,
658
+ kind: "commit",
659
+ sourceAnchor: {
660
+ sourceUnitId,
661
+ range: nodeToSourceRange(node),
662
+ enclosingRoutineId: routineId,
663
+ syntaxKind: nodeType,
664
+ },
665
+ loopStack: loopStack.slice(),
666
+ });
667
+ } else {
668
+ const argListNode = node.namedChildren.find(
669
+ (c) => c !== null && c.type === "argument_list",
670
+ );
671
+ const argumentTexts: string[] = [];
672
+ const argumentInfos: ExpressionInfo[] = [];
673
+ const argNodes: SyntaxNode[] = [];
674
+ if (argListNode) {
675
+ for (const arg of argListNode.namedChildren) {
676
+ if (arg !== null) {
677
+ argumentTexts.push(arg.text);
678
+ argumentInfos.push(expressionInfoFromNode(arg));
679
+ argNodes.push(arg);
680
+ }
681
+ }
682
+ }
683
+ const csId2 = encodeCallsiteId(routineId, csIndex++);
684
+ csIdByNodeId.set(node.id, csId2);
685
+ callSites.push({
686
+ id: csId2,
687
+ operationId: "",
688
+ calleeText: methodText,
689
+ callee: calleeFromNode(node),
690
+ argumentTexts,
691
+ argumentInfos,
692
+ argumentBindings: extractArgumentBindings(
693
+ argNodes,
694
+ enclosingParameters,
695
+ enclosingRecordVariables,
696
+ sourceUnitId,
697
+ routineId,
698
+ ),
699
+ loopStack: loopStack.slice(),
700
+ sourceAnchor: {
701
+ sourceUnitId,
702
+ range: nodeToSourceRange(node),
703
+ enclosingRoutineId: routineId,
704
+ syntaxKind: nodeType,
705
+ },
706
+ });
707
+ }
708
+ }
709
+
710
+ // Recurse only into the argument_list children. The function-callee subtree is
711
+ // already handled above — recursing into it would either re-visit it (no-op) or
712
+ // risk treating its inner member_expression as a stray field access.
713
+ const argListNode = node.namedChildren.find((c) => c !== null && c.type === "argument_list");
714
+ if (argListNode) {
715
+ for (const child of argListNode.namedChildren) {
716
+ if (child) visit(child, argListNode);
717
+ }
718
+ }
719
+
720
+ // Chained-receiver descent (P7.5 review): if the callee is a member_expression
721
+ // whose object is itself a call (e.g. `Helper(C).FindSet()` — outer is FindSet,
722
+ // inner Helper lives in the receiver), recursively visit the inner call so it
723
+ // gets registered as a CallSite. Without this, `harvestExpressionLeaves` can't
724
+ // surface the receiver call as a sibling leaf because no csId exists for it.
725
+ if (funcNode.type === "member_expression") {
726
+ const objNode = funcNode.childForFieldName("object") ?? funcNode.namedChildren[0];
727
+ if (objNode && objNode.type === "call_expression") {
728
+ visit(objNode, funcNode);
729
+ }
730
+ }
731
+ }
732
+
733
+ /**
734
+ * Parents whose direct child IS a statement (no nuance, no condition / body
735
+ * disambiguation needed). A member_expression with one of these parents is a
736
+ * bare method call written without parens (e.g. `Customer.SetRecFilter;`), not a
737
+ * field access — AL has no bare-expression statements that read a field for
738
+ * side-effect.
739
+ */
740
+ const PURE_STATEMENT_PARENTS: ReadonlySet<string> = new Set(["code_block"]);
741
+
742
+ /**
743
+ * Parents that contain expression-position children (condition / variable / start /
744
+ * end / iterable / object) and statement-position children (body / then_branch /
745
+ * else_branch). The grammar uses positive fields for these — see grammar.js's
746
+ * `field('body', ...)` etc.
747
+ *
748
+ * Map: parentType → set of FIELD NAMES whose child is statement-position. A
749
+ * member_expression in any OTHER child position of the same parent is treated as
750
+ * expression-position (field access).
751
+ *
752
+ * `repeat_statement` is special: it has no body field. Its direct children
753
+ * `repeat($._statement)` are statement-position; its `condition` field child
754
+ * is expression-position. Treated below as "all children except condition are
755
+ * statements".
756
+ */
757
+ const STATEMENT_FIELDS_BY_PARENT: ReadonlyMap<string, ReadonlySet<string>> = new Map([
758
+ ["if_statement", new Set(["then_branch", "else_branch"])],
759
+ ["for_statement", new Set(["body"])],
760
+ ["while_statement", new Set(["body"])],
761
+ ["foreach_statement", new Set(["body"])],
762
+ ["with_statement", new Set(["body"])],
763
+ ["case_branch", new Set(["body"])],
764
+ // repeat_statement handled via "not condition" logic in the visitor itself.
765
+ ]);
766
+
767
+ function visit(node: SyntaxNode, parent: SyntaxNode | null): void {
768
+ const nodeType = node.type;
769
+ const parentType = parent?.type ?? "";
770
+
771
+ // Identifier-reference collection: record every `identifier` node that is in
772
+ // VALUE position. Skipped:
773
+ // - `member` field of `member_expression` — field/method name, not a value
774
+ // - `value` field of `qualified_enum_value` — enum option name, not a value
775
+ // - direct `enum_type` field of `qualified_enum_value` when the type is a
776
+ // bare identifier — that's a type reference, not a variable reference
777
+ // (when `enum_type` is a `member_expression`, its inner identifier IS
778
+ // a value reference and is visited recursively).
779
+ // Definition positions (`parameter_list`, `var_section`) are outside the body
780
+ // code_block we walk, so no extra filter is needed for those.
781
+ if (nodeType === "identifier" && parent !== null) {
782
+ let isValueRef = true;
783
+ if (parentType === "member_expression") {
784
+ const memberField = parent.childForFieldName("member");
785
+ if (memberField !== null && memberField.startIndex === node.startIndex) {
786
+ isValueRef = false;
787
+ }
788
+ } else if (parentType === "qualified_enum_value") {
789
+ const valueField = parent.childForFieldName("value");
790
+ const enumTypeField = parent.childForFieldName("enum_type");
791
+ if (valueField !== null && valueField.startIndex === node.startIndex) {
792
+ isValueRef = false;
793
+ } else if (
794
+ enumTypeField !== null &&
795
+ enumTypeField.startIndex === node.startIndex &&
796
+ enumTypeField.type === "identifier"
797
+ ) {
798
+ isValueRef = false;
799
+ }
800
+ }
801
+ if (isValueRef) identifierRefSet.add(node.text.toLowerCase());
802
+ }
803
+
804
+ // Unreachable-after-exit scan: when entering a code_block, walk its statement-position
805
+ // children pairwise. The first one classified as an unconditional exit, with any
806
+ // later sibling, marks the rest of the block unreachable. We emit only the FIRST
807
+ // unreachable sibling (D20 reports per-block, not per-statement) and don't recurse
808
+ // past the exit — the indexer's normal traversal continues into all children, so
809
+ // nested code blocks (inside conditionals, loops, etc.) get their own scan.
810
+ if (nodeType === "code_block") {
811
+ const stmts = node.namedChildren.filter(
812
+ (c): c is SyntaxNode =>
813
+ c !== null && c.type !== "begin_keyword" && c.type !== "end_keyword",
814
+ );
815
+ for (let i = 0; i < stmts.length - 1; i++) {
816
+ const s = stmts[i];
817
+ if (s === undefined) continue;
818
+ const exitKind = unconditionalExitKind(s);
819
+ if (exitKind === undefined) continue;
820
+ const next = stmts[i + 1];
821
+ if (next === undefined) break;
822
+ unreachableStatements.push({
823
+ id: `${routineId}/u${unreachableIndex++}`,
824
+ exitKind,
825
+ exitAnchor: {
826
+ sourceUnitId,
827
+ range: nodeToSourceRange(s),
828
+ enclosingRoutineId: routineId,
829
+ syntaxKind: s.type,
830
+ },
831
+ unreachableAnchor: {
832
+ sourceUnitId,
833
+ range: nodeToSourceRange(next),
834
+ enclosingRoutineId: routineId,
835
+ syntaxKind: next.type,
836
+ },
837
+ });
838
+ break;
839
+ }
840
+ }
841
+
842
+ // Branching-control-flow detection. Loops are handled separately by `nestingDepth`;
843
+ // unconditional early-exit shapes are handled by `unreachableStatements`. This flag
844
+ // captures everything else that breaks the straight-line assumption — conditional
845
+ // (`if`/`case`) and exception-handling (`try`) constructs. Used by the Phase 4
846
+ // control-flow walker (and any future straight-line analysis).
847
+ if (
848
+ nodeType === "if_statement" ||
849
+ nodeType === "case_statement" ||
850
+ nodeType === "case_branch" ||
851
+ nodeType === "try_statement"
852
+ ) {
853
+ hasBranching = true;
854
+ }
855
+
856
+ // Loop detection.
857
+ const loopType = LOOP_TYPE_MAP[nodeType];
858
+ let pushedLoop = false;
859
+ if (loopType !== undefined) {
860
+ const id = encodeLoopId(routineId, loops.length);
861
+ loops.push({
862
+ id,
863
+ type: loopType,
864
+ sourceAnchor: {
865
+ sourceUnitId,
866
+ range: nodeToSourceRange(node),
867
+ enclosingRoutineId: routineId,
868
+ syntaxKind: nodeType,
869
+ },
870
+ });
871
+ loopStack.push(id);
872
+ pushedLoop = true;
873
+ }
874
+
875
+ if (nodeType === "call_expression") {
876
+ handleCallExpression(node, nodeType);
877
+ if (pushedLoop) loopStack.pop();
878
+ return;
879
+ }
880
+
881
+ if (nodeType === "member_expression") {
882
+ // Non-callee member_expression (the callee shape exits via handleCallExpression
883
+ // without recursing back here). Decide between two cases:
884
+ // - Statement position (parent is code_block, or a control-flow rule's body
885
+ // field): AL has no bare-field-read statements, so `Customer.X;` is a
886
+ // parameterless method call. Treat as record-op if X is in the op map,
887
+ // otherwise emit as a CallSite so callers/D14 see the dependency.
888
+ // - Expression position (parent is assignment_statement, argument_list, or
889
+ // a control-flow rule's condition field): treat as a field access on a
890
+ // known record var.
891
+ const objNode = node.childForFieldName("object") ?? node.namedChildren[0];
892
+ const memberNode = node.childForFieldName("member") ?? node.namedChildren[1];
893
+ if (objNode && memberNode) {
894
+ let isStatementPosition = PURE_STATEMENT_PARENTS.has(parentType);
895
+ if (!isStatementPosition && parent !== null) {
896
+ const stmtFields = STATEMENT_FIELDS_BY_PARENT.get(parentType);
897
+ if (stmtFields !== undefined) {
898
+ // Check whether the member_expression occupies one of the parent's
899
+ // known statement-position fields.
900
+ for (const fieldName of stmtFields) {
901
+ const fieldChild = parent.childForFieldName(fieldName);
902
+ if (fieldChild !== null && fieldChild.startIndex === node.startIndex) {
903
+ isStatementPosition = true;
904
+ break;
905
+ }
906
+ }
907
+ } else if (parentType === "repeat_statement") {
908
+ // repeat_statement has no body field — every direct child except
909
+ // the `condition` field is a statement.
910
+ const conditionChild = parent.childForFieldName("condition");
911
+ if (conditionChild === null || conditionChild.startIndex !== node.startIndex) {
912
+ isStatementPosition = true;
913
+ }
914
+ }
915
+ }
916
+ if (isStatementPosition) {
917
+ // Parameterless method call written without parens (e.g.
918
+ // `Customer.SetRecFilter;`). Mirror the `member_expression`-branch
919
+ // of handleCallExpression, minus argument processing.
920
+ const methodLc = memberNode.text.toLowerCase();
921
+ const opType = RECORD_OP_MAP[methodLc];
922
+ const anchor = {
923
+ sourceUnitId,
924
+ range: nodeToSourceRange(node),
925
+ enclosingRoutineId: routineId,
926
+ syntaxKind: nodeType,
927
+ };
928
+ if (opType !== undefined) {
929
+ const opId = encodeOperationId(routineId, opIndex++);
930
+ opIdByNodeId.set(node.id, opId);
931
+ const snapshotLoopStack = loopStack.slice();
932
+ recordOperations.push({
933
+ id: opId,
934
+ routineId,
935
+ op: opType,
936
+ recordVariableName: objNode.text,
937
+ tempState: { kind: "unknown" },
938
+ loopStack: snapshotLoopStack,
939
+ sourceAnchor: anchor,
940
+ });
941
+ operationSites.push({
942
+ id: opId,
943
+ routineId,
944
+ kind: opType === "LockTable" ? "lock" : "record-op",
945
+ sourceAnchor: anchor,
946
+ loopStack: snapshotLoopStack,
947
+ });
948
+ } else {
949
+ const csId3 = encodeCallsiteId(routineId, csIndex++);
950
+ csIdByNodeId.set(node.id, csId3);
951
+ callSites.push({
952
+ id: csId3,
953
+ operationId: "",
954
+ calleeText: node.text,
955
+ callee: calleeFromNode(node),
956
+ argumentTexts: [],
957
+ argumentInfos: [],
958
+ argumentBindings: [],
959
+ loopStack: loopStack.slice(),
960
+ sourceAnchor: anchor,
961
+ });
962
+ }
963
+ if (pushedLoop) loopStack.pop();
964
+ return;
965
+ }
966
+ // Skip `Record.Field::Option` enum-scope references. Tree-sitter-al
967
+ // wraps these in a `qualified_enum_value` parent; the inner
968
+ // member_expression is a TYPE-side reference used to resolve the
969
+ // enum option name at compile time, not a runtime field read. Emitting
970
+ // it as a fieldAccess causes the path-aware walker (which raises
971
+ // `requiresLoadedAtEntry` on any field-access against an unloaded
972
+ // record) to false-positive on loader routines that filter by enum
973
+ // fields, e.g. `AIField.SetRange(Type, AIField.Type::Header); ... AIField.FindFirst;`.
974
+ const isEnumScopeRef = parentType === "qualified_enum_value";
975
+ const recordVariableName = objNode.text;
976
+ if (!isEnumScopeRef && recordVarNames.has(recordVariableName.toLowerCase())) {
977
+ // Strip the surrounding double quotes when the field name is a
978
+ // `quoted_identifier` (`Customer."No."`) so downstream consumers
979
+ // (D22 lookupField, fingerprinting) compare against the canonical
980
+ // `Field.name` without re-stripping. Plain `identifier` members
981
+ // pass through unchanged.
982
+ const fieldName =
983
+ memberNode.type === "quoted_identifier" &&
984
+ memberNode.text.length >= 2 &&
985
+ memberNode.text.startsWith('"') &&
986
+ memberNode.text.endsWith('"')
987
+ ? memberNode.text.slice(1, -1)
988
+ : memberNode.text;
989
+ fieldAccesses.push({
990
+ recordVariableName,
991
+ fieldName,
992
+ sourceAnchor: {
993
+ sourceUnitId,
994
+ range: nodeToSourceRange(node),
995
+ enclosingRoutineId: routineId,
996
+ syntaxKind: nodeType,
997
+ },
998
+ });
999
+ }
1000
+ }
1001
+ // Continue into children to find chained accesses (`A.B.C`).
1002
+ }
1003
+
1004
+ for (const child of node.namedChildren) {
1005
+ if (child) visit(child, node);
1006
+ }
1007
+
1008
+ if (pushedLoop) loopStack.pop();
1009
+ }
1010
+
1011
+ visit(bodyNode, null);
1012
+
1013
+ // Assign final operationIds to call sites: their range is opIndex..opIndex+csIndex-1
1014
+ // (call-sites come after every record-op/commit in the routine's id space, matching
1015
+ // the old two-pass behavior).
1016
+ for (let i = 0; i < callSites.length; i++) {
1017
+ const cs = callSites[i];
1018
+ if (cs) cs.operationId = encodeOperationId(routineId, opIndex + i);
1019
+ }
1020
+
1021
+ // Build the compact statement tree. This pass is purely structural — it reads the
1022
+ // op/callsite ID maps populated during `visit`, so all ids are final.
1023
+ const statementTree = buildCFNForBlock(bodyNode);
1024
+
1025
+ return {
1026
+ loops,
1027
+ operationSites,
1028
+ recordOperations,
1029
+ callSites,
1030
+ fieldAccesses,
1031
+ unreachableStatements,
1032
+ hasBranching,
1033
+ statementTree,
1034
+ // Deterministic, sorted order — model dumps stay byte-stable across runs.
1035
+ identifierReferences: [...identifierRefSet].sort(),
1036
+ varAssignments: collectVarAssignments(bodyNode, routineId, sourceUnitId),
1037
+ conditionReferences: collectConditionReferences(bodyNode, routineId, sourceUnitId),
1038
+ };
1039
+
1040
+ /**
1041
+ * Recursively build a ControlFlowNode for a code_block node. Returns a "block" node
1042
+ * whose children are the CFNs for each statement in the block (skipping begin/end keywords).
1043
+ */
1044
+ function buildCFNForBlock(blockNode: SyntaxNode): ControlFlowNode {
1045
+ const children: ControlFlowNode[] = [];
1046
+ for (const child of blockNode.namedChildren) {
1047
+ if (child === null) continue;
1048
+ const t = child.type;
1049
+ if (t === "begin_keyword" || t === "end_keyword") continue;
1050
+ const cfn = buildCFNForStatement(child);
1051
+ if (cfn !== null) children.push(cfn);
1052
+ }
1053
+ return {
1054
+ kind: "block",
1055
+ sourceAnchor: {
1056
+ sourceUnitId,
1057
+ range: nodeToSourceRange(blockNode),
1058
+ enclosingRoutineId: routineId,
1059
+ syntaxKind: blockNode.type,
1060
+ },
1061
+ children,
1062
+ };
1063
+ }
1064
+
1065
+ /**
1066
+ * Build a ControlFlowNode for a branch/loop body. The AL grammar allows the
1067
+ * `then_branch` / `else_branch` / loop `body` / case_branch `body` field to be
1068
+ * EITHER a `code_block` (when begin/end is present) OR the single statement
1069
+ * itself (when begin/end is omitted, e.g. `if X then Foo();`).
1070
+ *
1071
+ * If it's a code_block, delegate to `buildCFNForBlock`. Otherwise, synthesize
1072
+ * a one-statement block so the walker can treat all branch bodies uniformly.
1073
+ * The synthetic block's `sourceAnchor` covers the single statement (no enclosing
1074
+ * code_block exists in the source).
1075
+ */
1076
+ function buildCFNForBranchBody(node: SyntaxNode): ControlFlowNode {
1077
+ if (node.type === "code_block") return buildCFNForBlock(node);
1078
+ const stmtCFN = buildCFNForStatement(node);
1079
+ const children: ControlFlowNode[] = stmtCFN !== null ? [stmtCFN] : [];
1080
+ return {
1081
+ kind: "block",
1082
+ sourceAnchor: {
1083
+ sourceUnitId,
1084
+ range: nodeToSourceRange(node),
1085
+ enclosingRoutineId: routineId,
1086
+ syntaxKind: node.type,
1087
+ },
1088
+ children,
1089
+ };
1090
+ }
1091
+
1092
+ /**
1093
+ * Harvest leaves from the receiver (function-side) of a chained call expression.
1094
+ * For `Helper(C).FindSet()`, the outer `call_expression`'s function field is a
1095
+ * `member_expression` whose object is another `call_expression` (`Helper(C)`).
1096
+ * That inner call is registered by the visit pass as a CallSite, but lives in
1097
+ * the receiver subtree — outside the outer's argument_list. AL evaluates the
1098
+ * receiver BEFORE the outer call, so the harvested leaf must be a SIBLING of
1099
+ * the outer leaf in the parent's conditionLeaves, ordered before it.
1100
+ *
1101
+ * This helper does nothing if the function-side is not a member_expression with
1102
+ * a call_expression / member_expression object (the common non-chained shape).
1103
+ */
1104
+ function harvestReceiverLeaves(callNode: SyntaxNode, out: ControlFlowNode[]): void {
1105
+ const funcNode = callNode.childForFieldName("function") ?? callNode.namedChildren[0];
1106
+ if (!funcNode || funcNode.type !== "member_expression") return;
1107
+ const objNode = funcNode.childForFieldName("object") ?? funcNode.namedChildren[0];
1108
+ if (!objNode) return;
1109
+ if (objNode.type !== "call_expression" && objNode.type !== "member_expression") return;
1110
+ harvestExpressionLeaves(objNode, out);
1111
+ }
1112
+
1113
+ /**
1114
+ * Harvest expression-position record-op / callsite leaves from an expression
1115
+ * subtree (P7.5). For each `call_expression` / `member_expression` encountered,
1116
+ * look up its `node.id` in the op/callsite id maps populated by `visit`. If
1117
+ * a hit, emit a one-shot leaf, harvest any chained receiver as a sibling leaf
1118
+ * (see `harvestReceiverLeaves`), and STOP descending into the call's children
1119
+ * (apart from its argument_list — nested ops inside arguments are captured
1120
+ * as the outer leaf's own `conditionLeaves`, so the walker runs them BEFORE
1121
+ * applying the outer leaf's effect).
1122
+ *
1123
+ * Used by: if / while / for / foreach / repeat / case condition harvest, and by
1124
+ * the call_expression leaf's own argument harvest in buildCFNForStatement.
1125
+ */
1126
+ function harvestExpressionLeaves(exprNode: SyntaxNode | null, out: ControlFlowNode[]): void {
1127
+ if (exprNode === null) return;
1128
+ const type = exprNode.type;
1129
+ if (type === "call_expression" || type === "member_expression") {
1130
+ const anchor: ControlFlowNode["sourceAnchor"] = {
1131
+ sourceUnitId,
1132
+ range: nodeToSourceRange(exprNode),
1133
+ enclosingRoutineId: routineId,
1134
+ syntaxKind: type,
1135
+ };
1136
+ // When the outer node matches an op or callsite, harvest its argument-list
1137
+ // children as inner conditionLeaves (nested under the outer leaf), AND
1138
+ // also descend into the function-side receiver if it's a chained call
1139
+ // (e.g. `Helper(C).FindSet()` — the inner `Helper(C)` callsite must
1140
+ // appear as a SIBLING of the outer leaf in the parent's conditionLeaves,
1141
+ // since AL evaluates the receiver before the outer call).
1142
+ const opId = opIdByNodeId.get(exprNode.id);
1143
+ if (opId !== undefined) {
1144
+ const argList = exprNode.namedChildren.find(
1145
+ (c) => c !== null && c.type === "argument_list",
1146
+ );
1147
+ const innerLeaves: ControlFlowNode[] = [];
1148
+ if (argList) {
1149
+ for (const arg of argList.namedChildren) {
1150
+ if (arg) harvestExpressionLeaves(arg, innerLeaves);
1151
+ }
1152
+ }
1153
+ // Chained-receiver harvest: receiver leaves go into `out` (sibling
1154
+ // position), not into innerLeaves (which is the outer's own arglist).
1155
+ harvestReceiverLeaves(exprNode, out);
1156
+ const leaf: ControlFlowNode = { kind: "op", sourceAnchor: anchor, operationId: opId };
1157
+ if (innerLeaves.length > 0) leaf.conditionLeaves = innerLeaves;
1158
+ out.push(leaf);
1159
+ return;
1160
+ }
1161
+ const csId = csIdByNodeId.get(exprNode.id);
1162
+ if (csId !== undefined) {
1163
+ const argList = exprNode.namedChildren.find(
1164
+ (c) => c !== null && c.type === "argument_list",
1165
+ );
1166
+ const innerLeaves: ControlFlowNode[] = [];
1167
+ if (argList) {
1168
+ for (const arg of argList.namedChildren) {
1169
+ if (arg) harvestExpressionLeaves(arg, innerLeaves);
1170
+ }
1171
+ }
1172
+ const funcNode = exprNode.childForFieldName("function") ?? exprNode.namedChildren[0];
1173
+ // Chained-receiver harvest: same treatment as the op branch above.
1174
+ harvestReceiverLeaves(exprNode, out);
1175
+ const isError = funcNode?.type === "identifier" && funcNode.text.toLowerCase() === "error";
1176
+ const leaf: ControlFlowNode = isError
1177
+ ? { kind: "error", sourceAnchor: anchor, callsiteId: csId }
1178
+ : { kind: "call", sourceAnchor: anchor, callsiteId: csId };
1179
+ if (innerLeaves.length > 0) leaf.conditionLeaves = innerLeaves;
1180
+ out.push(leaf);
1181
+ return;
1182
+ }
1183
+ // Not registered as either op or callsite — keep descending so nested
1184
+ // receiver / argument expressions can still surface their ops.
1185
+ }
1186
+ // Recurse into all named children (including argument_list, comparison
1187
+ // operands, etc.). Statement-position children DO NOT appear in expression
1188
+ // subtrees by construction of the grammar (an if-condition can't be a
1189
+ // statement, etc.).
1190
+ for (const child of exprNode.namedChildren) {
1191
+ if (child) harvestExpressionLeaves(child, out);
1192
+ }
1193
+ }
1194
+
1195
+ /**
1196
+ * Build a ControlFlowNode for a single statement node. Returns null for nodes that
1197
+ * should be skipped (e.g. keywords, empty statements).
1198
+ *
1199
+ * Grammar-verified node types:
1200
+ * - if_statement: fields `condition` (expression), `then_branch` (code_block or
1201
+ * bare statement), `else_branch` (optional)
1202
+ * - case_statement: field `expression` (expression); namedChildren include
1203
+ * case_branch / case_else_branch nodes
1204
+ * - case_branch: field `body` (code_block); case_else_branch: second namedChild is code_block
1205
+ * - for_statement: fields `start`, `end`, `variable`, `body`
1206
+ * - foreach_statement: fields `iterable`, `variable`, `body`
1207
+ * - while_statement: fields `condition`, `body`
1208
+ * - repeat_statement: field `condition` (the until-expr); namedChildren before
1209
+ * until_keyword are the body statements
1210
+ * - exit_statement: leaf, kind "exit"
1211
+ * - call_expression: op leaf (op map hit) or call leaf (callsite); already visited
1212
+ * - member_expression in statement position: same as call_expression
1213
+ *
1214
+ * P7.5: branching / looping nodes harvest expression-position record-ops /
1215
+ * callsites from their condition / range / iterable / expression fields into
1216
+ * `conditionLeaves`. Call / op leaves harvest from their `argument_list`. The
1217
+ * walker processes `conditionLeaves` at the position dictated by AL semantics
1218
+ * (pre-body for if / case / while / for / foreach; post-body for repeat;
1219
+ * pre-effect for call / op).
1220
+ */
1221
+ function buildCFNForStatement(node: SyntaxNode): ControlFlowNode | null {
1222
+ const type = node.type;
1223
+ const anchor: ControlFlowNode["sourceAnchor"] = {
1224
+ sourceUnitId,
1225
+ range: nodeToSourceRange(node),
1226
+ enclosingRoutineId: routineId,
1227
+ syntaxKind: type,
1228
+ };
1229
+
1230
+ if (type === "if_statement") {
1231
+ // then_branch / else_branch may be EITHER a code_block (begin/end present) OR a
1232
+ // bare statement (begin/end omitted, e.g. `if X then Foo();`). Use
1233
+ // buildCFNForBranchBody to handle both shapes.
1234
+ const thenBranchNode = node.childForFieldName("then_branch");
1235
+ const elseBranchNode = node.childForFieldName("else_branch");
1236
+ const children = thenBranchNode !== null ? [buildCFNForBranchBody(thenBranchNode)] : [];
1237
+ const elseChildren =
1238
+ elseBranchNode !== null ? [buildCFNForBranchBody(elseBranchNode)] : undefined;
1239
+ // P7.5: harvest expression-position record-ops / callsites from the
1240
+ // condition field (e.g. `if Cust.FindSet() then ...`).
1241
+ const conditionLeaves: ControlFlowNode[] = [];
1242
+ harvestExpressionLeaves(node.childForFieldName("condition"), conditionLeaves);
1243
+ const result: ControlFlowNode = {
1244
+ kind: "if",
1245
+ sourceAnchor: anchor,
1246
+ children,
1247
+ ...(elseChildren !== undefined ? { elseChildren } : {}),
1248
+ };
1249
+ if (conditionLeaves.length > 0) result.conditionLeaves = conditionLeaves;
1250
+ return result;
1251
+ }
1252
+
1253
+ if (type === "case_statement") {
1254
+ // Children of case_statement are case_branch and case_else_branch nodes.
1255
+ const branchCFNs: ControlFlowNode[] = [];
1256
+ for (const child of node.namedChildren) {
1257
+ if (child === null) continue;
1258
+ if (child.type === "case_branch" || child.type === "case_else_branch") {
1259
+ const cfn = buildCFNForCaseBranch(child);
1260
+ if (cfn !== null) branchCFNs.push(cfn);
1261
+ }
1262
+ }
1263
+ // P7.5: harvest from the case-value expression (e.g. `case Rec.Find('-') of`).
1264
+ const conditionLeaves: ControlFlowNode[] = [];
1265
+ harvestExpressionLeaves(node.childForFieldName("expression"), conditionLeaves);
1266
+ const result: ControlFlowNode = {
1267
+ kind: "case",
1268
+ sourceAnchor: anchor,
1269
+ children: branchCFNs,
1270
+ };
1271
+ if (conditionLeaves.length > 0) result.conditionLeaves = conditionLeaves;
1272
+ return result;
1273
+ }
1274
+
1275
+ if (type === "for_statement" || type === "foreach_statement" || type === "while_statement") {
1276
+ const kind: ControlFlowNode["kind"] =
1277
+ type === "for_statement" ? "for" : type === "foreach_statement" ? "foreach" : "while";
1278
+ // Loop `body` field may be either a code_block or a bare statement (no begin/end).
1279
+ const bodyNode = node.childForFieldName("body");
1280
+ const children = bodyNode !== null ? [buildCFNForBranchBody(bodyNode)] : [];
1281
+ // P7.5: harvest expression-position ops from the loop's controlling expressions:
1282
+ // - while: `condition` (evaluated before each iteration)
1283
+ // - for: `start` + `end` (`start` evaluated once at loop entry; `end` per
1284
+ // iteration in strict AL semantics, but the walker's fixed-point
1285
+ // naturally re-applies these each iteration, which is sound)
1286
+ // - foreach: `iterable` (evaluated once at loop entry — the iterator is
1287
+ // stepped each iteration, but the iterable's own evaluation is once;
1288
+ // still sound to apply per-iteration via the fixed-point)
1289
+ const conditionLeaves: ControlFlowNode[] = [];
1290
+ if (type === "while_statement") {
1291
+ harvestExpressionLeaves(node.childForFieldName("condition"), conditionLeaves);
1292
+ } else if (type === "for_statement") {
1293
+ harvestExpressionLeaves(node.childForFieldName("start"), conditionLeaves);
1294
+ harvestExpressionLeaves(node.childForFieldName("end"), conditionLeaves);
1295
+ } else {
1296
+ // foreach_statement
1297
+ harvestExpressionLeaves(node.childForFieldName("iterable"), conditionLeaves);
1298
+ }
1299
+ const result: ControlFlowNode = { kind, sourceAnchor: anchor, children };
1300
+ if (conditionLeaves.length > 0) result.conditionLeaves = conditionLeaves;
1301
+ return result;
1302
+ }
1303
+
1304
+ if (type === "repeat_statement") {
1305
+ // repeat_statement has no body field: body children are all namedChildren before
1306
+ // the until_keyword (which is followed by the condition expression).
1307
+ const bodyChildren: ControlFlowNode[] = [];
1308
+ for (const child of node.namedChildren) {
1309
+ if (child === null) continue;
1310
+ if (child.type === "until_keyword" || child.type === "repeat_keyword") continue;
1311
+ // Once we hit the condition (which follows until_keyword in the source), stop.
1312
+ // The condition field is the comparison/expression after "until". We detect this
1313
+ // by checking if we're at the condition field.
1314
+ if (node.childForFieldName("condition")?.startIndex === child.startIndex) break;
1315
+ const cfn = buildCFNForStatement(child);
1316
+ if (cfn !== null) bodyChildren.push(cfn);
1317
+ }
1318
+ // P7.5: harvest from the until-condition (e.g. `until Cust.Next() = 0`). The
1319
+ // walker processes these AFTER body each iteration (post-condition semantics).
1320
+ const conditionLeaves: ControlFlowNode[] = [];
1321
+ harvestExpressionLeaves(node.childForFieldName("condition"), conditionLeaves);
1322
+ const result: ControlFlowNode = {
1323
+ kind: "repeat",
1324
+ sourceAnchor: anchor,
1325
+ children: bodyChildren,
1326
+ };
1327
+ if (conditionLeaves.length > 0) result.conditionLeaves = conditionLeaves;
1328
+ return result;
1329
+ }
1330
+
1331
+ if (type === "try_statement") {
1332
+ // The AL grammar (tree-sitter-al / src/node-types.json) does not currently
1333
+ // define a `try_statement` node — AL's TryFunction is a [TryFunction] attribute
1334
+ // on a procedure declaration, and `asserterror_statement` is the closest
1335
+ // statement-level cousin. The branching-flag code in `visit` checks for
1336
+ // `try_statement` defensively in case the grammar adds it later; mirror that
1337
+ // here by emitting a "try" wrapper with no children rather than guessing the
1338
+ // field structure.
1339
+ // TODO P6.T2: if the AL grammar ever adds a real try_statement node, verify
1340
+ // its named fields with a grammar probe before populating `children`.
1341
+ return { kind: "try", sourceAnchor: anchor, children: [] };
1342
+ }
1343
+
1344
+ if (type === "exit_statement") {
1345
+ return { kind: "exit", sourceAnchor: anchor };
1346
+ }
1347
+
1348
+ // call_expression: check if we recorded an op or callsite at this node's id.
1349
+ if (type === "call_expression" || type === "member_expression") {
1350
+ // P7.5: harvest argument-position ops/calls (e.g. `Helper(Cust.FindSet())`)
1351
+ // AND chained-receiver ops/calls (e.g. `Helper(C).FindSet()`). The walker
1352
+ // processes both BEFORE applying the outer leaf's own effect. They live
1353
+ // in the outer leaf's `conditionLeaves` because there's no parent slot
1354
+ // at statement position (cf. expression-position harvest, where the
1355
+ // receiver leaf goes into the parent's conditionLeaves as a sibling).
1356
+ const argList = node.namedChildren.find((c) => c !== null && c.type === "argument_list");
1357
+ const preLeaves: ControlFlowNode[] = [];
1358
+ // Receiver first (evaluates first in AL), then arglist.
1359
+ harvestReceiverLeaves(node, preLeaves);
1360
+ if (argList) {
1361
+ for (const arg of argList.namedChildren) {
1362
+ if (arg) harvestExpressionLeaves(arg, preLeaves);
1363
+ }
1364
+ }
1365
+ const opId = opIdByNodeId.get(node.id);
1366
+ if (opId !== undefined) {
1367
+ const leaf: ControlFlowNode = { kind: "op", sourceAnchor: anchor, operationId: opId };
1368
+ if (preLeaves.length > 0) leaf.conditionLeaves = preLeaves;
1369
+ return leaf;
1370
+ }
1371
+ const csId = csIdByNodeId.get(node.id);
1372
+ if (csId !== undefined) {
1373
+ // Determine if this is an error-kind call (bare Error(...)).
1374
+ const funcNode = node.childForFieldName("function") ?? node.namedChildren[0];
1375
+ const isError = funcNode?.type === "identifier" && funcNode.text.toLowerCase() === "error";
1376
+ const leaf: ControlFlowNode = isError
1377
+ ? { kind: "error", sourceAnchor: anchor, callsiteId: csId }
1378
+ : { kind: "call", sourceAnchor: anchor, callsiteId: csId };
1379
+ if (preLeaves.length > 0) leaf.conditionLeaves = preLeaves;
1380
+ return leaf;
1381
+ }
1382
+ // Unrecognised expression-position call (no op/callsite registered). Still
1383
+ // emit any nested argument-position / receiver leaves we found so they
1384
+ // aren't lost.
1385
+ if (preLeaves.length > 0) {
1386
+ return { kind: "other", sourceAnchor: anchor, conditionLeaves: preLeaves };
1387
+ }
1388
+ return { kind: "other", sourceAnchor: anchor };
1389
+ }
1390
+
1391
+ // Compound statements that contain a body (e.g. with_statement, asserterror_statement).
1392
+ // Recurse to expose any ops/calls inside. The `body` field may be a code_block
1393
+ // OR a bare statement — use buildCFNForBranchBody to handle both.
1394
+ if (type === "with_statement" || type === "asserterror_statement") {
1395
+ const bodyNode = node.childForFieldName("body");
1396
+ if (bodyNode !== null) {
1397
+ const children = [buildCFNForBranchBody(bodyNode)];
1398
+ return { kind: "other", sourceAnchor: anchor, children };
1399
+ }
1400
+ return { kind: "other", sourceAnchor: anchor };
1401
+ }
1402
+
1403
+ // Everything else (assignment, message, continue, break, empty, preproc, …):
1404
+ // treat as opaque straight-line "other". These don't affect control flow and
1405
+ // contain no ops/callsites at top-level (sub-expressions with calls are captured
1406
+ // separately by the visit pass, but we don't expose them in the tree for now).
1407
+ if (
1408
+ type === "begin_keyword" ||
1409
+ type === "end_keyword" ||
1410
+ type === "if_keyword" ||
1411
+ type === "then_keyword" ||
1412
+ type === "else_keyword" ||
1413
+ type === "case_keyword" ||
1414
+ type === "of_keyword" ||
1415
+ type === "repeat_keyword" ||
1416
+ type === "until_keyword" ||
1417
+ type === "while_keyword" ||
1418
+ type === "for_keyword" ||
1419
+ type === "do_keyword" ||
1420
+ type === "foreach_keyword" ||
1421
+ type === "in_keyword" ||
1422
+ type === "empty_statement"
1423
+ ) {
1424
+ return null;
1425
+ }
1426
+
1427
+ return { kind: "other", sourceAnchor: anchor };
1428
+ }
1429
+
1430
+ /**
1431
+ * Build a ControlFlowNode for a case_branch or case_else_branch node.
1432
+ * - case_branch: field `body` — may be a code_block OR a bare statement (no begin/end).
1433
+ * - case_else_branch: no named body field. The body is the first non-keyword namedChild
1434
+ * (typically `else_keyword`, then either a `code_block` or a bare statement).
1435
+ */
1436
+ function buildCFNForCaseBranch(node: SyntaxNode): ControlFlowNode | null {
1437
+ const anchor: ControlFlowNode["sourceAnchor"] = {
1438
+ sourceUnitId,
1439
+ range: nodeToSourceRange(node),
1440
+ enclosingRoutineId: routineId,
1441
+ syntaxKind: node.type,
1442
+ };
1443
+ let bodyNode: SyntaxNode | null = null;
1444
+ if (node.type === "case_branch") {
1445
+ bodyNode = node.childForFieldName("body");
1446
+ } else {
1447
+ // case_else_branch: prefer a code_block child, but if absent (single-statement
1448
+ // body, no begin/end), fall back to the first non-keyword namedChild as the body.
1449
+ for (const child of node.namedChildren) {
1450
+ if (child?.type === "code_block") {
1451
+ bodyNode = child;
1452
+ break;
1453
+ }
1454
+ }
1455
+ if (bodyNode === null) {
1456
+ for (const child of node.namedChildren) {
1457
+ if (child === null) continue;
1458
+ if (child.type === "else_keyword") continue;
1459
+ bodyNode = child;
1460
+ break;
1461
+ }
1462
+ }
1463
+ }
1464
+ const children = bodyNode !== null ? [buildCFNForBranchBody(bodyNode)] : [];
1465
+ return { kind: "case-branch", sourceAnchor: anchor, children };
1466
+ }
1467
+ }