gitnexus 1.4.7 → 1.4.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (242) hide show
  1. package/README.md +29 -1
  2. package/dist/cli/ai-context.d.ts +1 -1
  3. package/dist/cli/ai-context.js +1 -1
  4. package/dist/cli/analyze.d.ts +2 -0
  5. package/dist/cli/analyze.js +54 -21
  6. package/dist/cli/index-repo.d.ts +15 -0
  7. package/dist/cli/index-repo.js +115 -0
  8. package/dist/cli/index.js +13 -3
  9. package/dist/cli/setup.js +90 -10
  10. package/dist/cli/wiki.d.ts +4 -0
  11. package/dist/cli/wiki.js +174 -53
  12. package/dist/config/supported-languages.d.ts +33 -1
  13. package/dist/config/supported-languages.js +32 -0
  14. package/dist/core/embeddings/embedder.d.ts +6 -1
  15. package/dist/core/embeddings/embedder.js +65 -5
  16. package/dist/core/embeddings/embedding-pipeline.js +11 -9
  17. package/dist/core/embeddings/http-client.d.ts +31 -0
  18. package/dist/core/embeddings/http-client.js +179 -0
  19. package/dist/core/embeddings/index.d.ts +1 -0
  20. package/dist/core/embeddings/index.js +1 -0
  21. package/dist/core/embeddings/types.d.ts +1 -1
  22. package/dist/core/graph/graph.js +9 -1
  23. package/dist/core/graph/types.d.ts +11 -2
  24. package/dist/core/ingestion/call-processor.d.ts +66 -2
  25. package/dist/core/ingestion/call-processor.js +650 -30
  26. package/dist/core/ingestion/call-routing.d.ts +9 -18
  27. package/dist/core/ingestion/call-routing.js +0 -19
  28. package/dist/core/ingestion/cobol/cobol-copy-expander.d.ts +57 -0
  29. package/dist/core/ingestion/cobol/cobol-copy-expander.js +385 -0
  30. package/dist/core/ingestion/cobol/cobol-preprocessor.d.ts +210 -0
  31. package/dist/core/ingestion/cobol/cobol-preprocessor.js +1509 -0
  32. package/dist/core/ingestion/cobol/jcl-parser.d.ts +68 -0
  33. package/dist/core/ingestion/cobol/jcl-parser.js +217 -0
  34. package/dist/core/ingestion/cobol/jcl-processor.d.ts +33 -0
  35. package/dist/core/ingestion/cobol/jcl-processor.js +229 -0
  36. package/dist/core/ingestion/cobol-processor.d.ts +54 -0
  37. package/dist/core/ingestion/cobol-processor.js +1186 -0
  38. package/dist/core/ingestion/entry-point-scoring.d.ts +17 -0
  39. package/dist/core/ingestion/entry-point-scoring.js +52 -28
  40. package/dist/core/ingestion/export-detection.d.ts +47 -8
  41. package/dist/core/ingestion/export-detection.js +29 -50
  42. package/dist/core/ingestion/field-extractor.d.ts +29 -0
  43. package/dist/core/ingestion/field-extractor.js +25 -0
  44. package/dist/core/ingestion/field-extractors/configs/c-cpp.d.ts +3 -0
  45. package/dist/core/ingestion/field-extractors/configs/c-cpp.js +108 -0
  46. package/dist/core/ingestion/field-extractors/configs/csharp.d.ts +8 -0
  47. package/dist/core/ingestion/field-extractors/configs/csharp.js +73 -0
  48. package/dist/core/ingestion/field-extractors/configs/dart.d.ts +8 -0
  49. package/dist/core/ingestion/field-extractors/configs/dart.js +76 -0
  50. package/dist/core/ingestion/field-extractors/configs/go.d.ts +11 -0
  51. package/dist/core/ingestion/field-extractors/configs/go.js +64 -0
  52. package/dist/core/ingestion/field-extractors/configs/helpers.d.ts +44 -0
  53. package/dist/core/ingestion/field-extractors/configs/helpers.js +134 -0
  54. package/dist/core/ingestion/field-extractors/configs/jvm.d.ts +3 -0
  55. package/dist/core/ingestion/field-extractors/configs/jvm.js +118 -0
  56. package/dist/core/ingestion/field-extractors/configs/php.d.ts +8 -0
  57. package/dist/core/ingestion/field-extractors/configs/php.js +67 -0
  58. package/dist/core/ingestion/field-extractors/configs/python.d.ts +12 -0
  59. package/dist/core/ingestion/field-extractors/configs/python.js +91 -0
  60. package/dist/core/ingestion/field-extractors/configs/ruby.d.ts +16 -0
  61. package/dist/core/ingestion/field-extractors/configs/ruby.js +75 -0
  62. package/dist/core/ingestion/field-extractors/configs/rust.d.ts +9 -0
  63. package/dist/core/ingestion/field-extractors/configs/rust.js +55 -0
  64. package/dist/core/ingestion/field-extractors/configs/swift.d.ts +8 -0
  65. package/dist/core/ingestion/field-extractors/configs/swift.js +63 -0
  66. package/dist/core/ingestion/field-extractors/configs/typescript-javascript.d.ts +3 -0
  67. package/dist/core/ingestion/field-extractors/configs/typescript-javascript.js +60 -0
  68. package/dist/core/ingestion/field-extractors/generic.d.ts +46 -0
  69. package/dist/core/ingestion/field-extractors/generic.js +111 -0
  70. package/dist/core/ingestion/field-extractors/typescript.d.ts +77 -0
  71. package/dist/core/ingestion/field-extractors/typescript.js +291 -0
  72. package/dist/core/ingestion/field-types.d.ts +59 -0
  73. package/dist/core/ingestion/field-types.js +2 -0
  74. package/dist/core/ingestion/framework-detection.d.ts +97 -2
  75. package/dist/core/ingestion/framework-detection.js +114 -14
  76. package/dist/core/ingestion/heritage-processor.js +62 -66
  77. package/dist/core/ingestion/import-processor.d.ts +9 -10
  78. package/dist/core/ingestion/import-processor.js +150 -196
  79. package/dist/core/ingestion/{resolvers → import-resolvers}/csharp.d.ts +6 -9
  80. package/dist/core/ingestion/{resolvers → import-resolvers}/csharp.js +20 -2
  81. package/dist/core/ingestion/import-resolvers/dart.d.ts +7 -0
  82. package/dist/core/ingestion/import-resolvers/dart.js +44 -0
  83. package/dist/core/ingestion/{resolvers → import-resolvers}/go.d.ts +4 -5
  84. package/dist/core/ingestion/{resolvers → import-resolvers}/go.js +17 -0
  85. package/dist/core/ingestion/{resolvers → import-resolvers}/jvm.d.ts +10 -1
  86. package/dist/core/ingestion/import-resolvers/jvm.js +159 -0
  87. package/dist/core/ingestion/import-resolvers/php.d.ts +25 -0
  88. package/dist/core/ingestion/import-resolvers/php.js +80 -0
  89. package/dist/core/ingestion/{resolvers → import-resolvers}/python.d.ts +9 -3
  90. package/dist/core/ingestion/{resolvers → import-resolvers}/python.js +35 -3
  91. package/dist/core/ingestion/{resolvers → import-resolvers}/ruby.d.ts +5 -2
  92. package/dist/core/ingestion/{resolvers → import-resolvers}/ruby.js +7 -2
  93. package/dist/core/ingestion/{resolvers → import-resolvers}/rust.d.ts +5 -2
  94. package/dist/core/ingestion/{resolvers → import-resolvers}/rust.js +41 -2
  95. package/dist/core/ingestion/{resolvers → import-resolvers}/standard.d.ts +15 -7
  96. package/dist/core/ingestion/{resolvers → import-resolvers}/standard.js +22 -3
  97. package/dist/core/ingestion/import-resolvers/swift.d.ts +7 -0
  98. package/dist/core/ingestion/import-resolvers/swift.js +23 -0
  99. package/dist/core/ingestion/import-resolvers/types.d.ts +44 -0
  100. package/dist/core/ingestion/import-resolvers/types.js +6 -0
  101. package/dist/core/ingestion/{resolvers → import-resolvers}/utils.d.ts +2 -0
  102. package/dist/core/ingestion/{resolvers → import-resolvers}/utils.js +7 -0
  103. package/dist/core/ingestion/language-config.d.ts +6 -0
  104. package/dist/core/ingestion/language-config.js +13 -0
  105. package/dist/core/ingestion/language-provider.d.ts +121 -0
  106. package/dist/core/ingestion/language-provider.js +24 -0
  107. package/dist/core/ingestion/languages/c-cpp.d.ts +12 -0
  108. package/dist/core/ingestion/languages/c-cpp.js +71 -0
  109. package/dist/core/ingestion/languages/cobol.d.ts +1 -0
  110. package/dist/core/ingestion/languages/cobol.js +26 -0
  111. package/dist/core/ingestion/languages/csharp.d.ts +8 -0
  112. package/dist/core/ingestion/languages/csharp.js +49 -0
  113. package/dist/core/ingestion/languages/dart.d.ts +12 -0
  114. package/dist/core/ingestion/languages/dart.js +58 -0
  115. package/dist/core/ingestion/languages/go.d.ts +11 -0
  116. package/dist/core/ingestion/languages/go.js +28 -0
  117. package/dist/core/ingestion/languages/index.d.ts +38 -0
  118. package/dist/core/ingestion/languages/index.js +63 -0
  119. package/dist/core/ingestion/languages/java.d.ts +9 -0
  120. package/dist/core/ingestion/languages/java.js +29 -0
  121. package/dist/core/ingestion/languages/kotlin.d.ts +9 -0
  122. package/dist/core/ingestion/languages/kotlin.js +53 -0
  123. package/dist/core/ingestion/languages/php.d.ts +8 -0
  124. package/dist/core/ingestion/languages/php.js +145 -0
  125. package/dist/core/ingestion/languages/python.d.ts +12 -0
  126. package/dist/core/ingestion/languages/python.js +39 -0
  127. package/dist/core/ingestion/languages/ruby.d.ts +9 -0
  128. package/dist/core/ingestion/languages/ruby.js +44 -0
  129. package/dist/core/ingestion/languages/rust.d.ts +12 -0
  130. package/dist/core/ingestion/languages/rust.js +44 -0
  131. package/dist/core/ingestion/languages/swift.d.ts +12 -0
  132. package/dist/core/ingestion/languages/swift.js +133 -0
  133. package/dist/core/ingestion/languages/typescript.d.ts +10 -0
  134. package/dist/core/ingestion/languages/typescript.js +60 -0
  135. package/dist/core/ingestion/markdown-processor.d.ts +17 -0
  136. package/dist/core/ingestion/markdown-processor.js +124 -0
  137. package/dist/core/ingestion/mro-processor.js +22 -18
  138. package/dist/core/ingestion/named-binding-processor.d.ts +18 -0
  139. package/dist/core/ingestion/named-binding-processor.js +42 -0
  140. package/dist/core/ingestion/named-bindings/csharp.d.ts +3 -0
  141. package/dist/core/ingestion/named-bindings/csharp.js +37 -0
  142. package/dist/core/ingestion/named-bindings/java.d.ts +3 -0
  143. package/dist/core/ingestion/named-bindings/java.js +29 -0
  144. package/dist/core/ingestion/named-bindings/kotlin.d.ts +3 -0
  145. package/dist/core/ingestion/named-bindings/kotlin.js +36 -0
  146. package/dist/core/ingestion/named-bindings/php.d.ts +3 -0
  147. package/dist/core/ingestion/named-bindings/php.js +61 -0
  148. package/dist/core/ingestion/named-bindings/python.d.ts +3 -0
  149. package/dist/core/ingestion/named-bindings/python.js +49 -0
  150. package/dist/core/ingestion/named-bindings/rust.d.ts +3 -0
  151. package/dist/core/ingestion/named-bindings/rust.js +64 -0
  152. package/dist/core/ingestion/named-bindings/types.d.ts +16 -0
  153. package/dist/core/ingestion/named-bindings/types.js +6 -0
  154. package/dist/core/ingestion/named-bindings/typescript.d.ts +3 -0
  155. package/dist/core/ingestion/named-bindings/typescript.js +58 -0
  156. package/dist/core/ingestion/parsing-processor.d.ts +6 -2
  157. package/dist/core/ingestion/parsing-processor.js +125 -85
  158. package/dist/core/ingestion/pipeline.d.ts +10 -0
  159. package/dist/core/ingestion/pipeline.js +1235 -317
  160. package/dist/core/ingestion/resolution-context.d.ts +5 -0
  161. package/dist/core/ingestion/resolution-context.js +8 -5
  162. package/dist/core/ingestion/route-extractors/expo.d.ts +1 -0
  163. package/dist/core/ingestion/route-extractors/expo.js +36 -0
  164. package/dist/core/ingestion/route-extractors/middleware.d.ts +47 -0
  165. package/dist/core/ingestion/route-extractors/middleware.js +143 -0
  166. package/dist/core/ingestion/route-extractors/nextjs.d.ts +3 -0
  167. package/dist/core/ingestion/route-extractors/nextjs.js +76 -0
  168. package/dist/core/ingestion/route-extractors/php.d.ts +7 -0
  169. package/dist/core/ingestion/route-extractors/php.js +21 -0
  170. package/dist/core/ingestion/route-extractors/response-shapes.d.ts +20 -0
  171. package/dist/core/ingestion/route-extractors/response-shapes.js +290 -0
  172. package/dist/core/ingestion/symbol-table.d.ts +16 -0
  173. package/dist/core/ingestion/symbol-table.js +20 -6
  174. package/dist/core/ingestion/tree-sitter-queries.d.ts +10 -9
  175. package/dist/core/ingestion/tree-sitter-queries.js +274 -11
  176. package/dist/core/ingestion/type-env.d.ts +42 -18
  177. package/dist/core/ingestion/type-env.js +481 -106
  178. package/dist/core/ingestion/type-extractors/c-cpp.d.ts +5 -0
  179. package/dist/core/ingestion/type-extractors/c-cpp.js +119 -0
  180. package/dist/core/ingestion/type-extractors/csharp.js +149 -16
  181. package/dist/core/ingestion/type-extractors/dart.d.ts +15 -0
  182. package/dist/core/ingestion/type-extractors/dart.js +371 -0
  183. package/dist/core/ingestion/type-extractors/jvm.js +169 -66
  184. package/dist/core/ingestion/type-extractors/rust.js +35 -1
  185. package/dist/core/ingestion/type-extractors/shared.d.ts +1 -15
  186. package/dist/core/ingestion/type-extractors/shared.js +14 -112
  187. package/dist/core/ingestion/type-extractors/swift.js +338 -7
  188. package/dist/core/ingestion/type-extractors/types.d.ts +40 -8
  189. package/dist/core/ingestion/type-extractors/typescript.js +141 -9
  190. package/dist/core/ingestion/utils/ast-helpers.d.ts +83 -0
  191. package/dist/core/ingestion/utils/ast-helpers.js +817 -0
  192. package/dist/core/ingestion/utils/call-analysis.d.ts +73 -0
  193. package/dist/core/ingestion/utils/call-analysis.js +527 -0
  194. package/dist/core/ingestion/utils/event-loop.d.ts +5 -0
  195. package/dist/core/ingestion/utils/event-loop.js +5 -0
  196. package/dist/core/ingestion/utils/language-detection.d.ts +9 -0
  197. package/dist/core/ingestion/utils/language-detection.js +70 -0
  198. package/dist/core/ingestion/utils/verbose.d.ts +1 -0
  199. package/dist/core/ingestion/utils/verbose.js +7 -0
  200. package/dist/core/ingestion/workers/parse-worker.d.ts +55 -5
  201. package/dist/core/ingestion/workers/parse-worker.js +415 -225
  202. package/dist/core/lbug/csv-generator.js +51 -1
  203. package/dist/core/lbug/lbug-adapter.d.ts +10 -0
  204. package/dist/core/lbug/lbug-adapter.js +75 -4
  205. package/dist/core/lbug/schema.d.ts +8 -4
  206. package/dist/core/lbug/schema.js +65 -4
  207. package/dist/core/tree-sitter/parser-loader.js +7 -1
  208. package/dist/core/wiki/cursor-client.d.ts +31 -0
  209. package/dist/core/wiki/cursor-client.js +127 -0
  210. package/dist/core/wiki/generator.d.ts +28 -9
  211. package/dist/core/wiki/generator.js +115 -18
  212. package/dist/core/wiki/graph-queries.d.ts +4 -0
  213. package/dist/core/wiki/graph-queries.js +7 -1
  214. package/dist/core/wiki/llm-client.d.ts +2 -0
  215. package/dist/core/wiki/llm-client.js +8 -4
  216. package/dist/core/wiki/prompts.d.ts +3 -3
  217. package/dist/core/wiki/prompts.js +6 -0
  218. package/dist/mcp/core/embedder.js +11 -3
  219. package/dist/mcp/core/lbug-adapter.d.ts +5 -0
  220. package/dist/mcp/core/lbug-adapter.js +23 -2
  221. package/dist/mcp/local/local-backend.d.ts +38 -5
  222. package/dist/mcp/local/local-backend.js +804 -63
  223. package/dist/mcp/resources.js +2 -0
  224. package/dist/mcp/tools.js +73 -4
  225. package/dist/server/api.d.ts +19 -1
  226. package/dist/server/api.js +66 -6
  227. package/dist/storage/git.d.ts +12 -0
  228. package/dist/storage/git.js +21 -0
  229. package/dist/storage/repo-manager.d.ts +3 -0
  230. package/package.json +25 -16
  231. package/dist/core/ingestion/named-binding-extraction.d.ts +0 -61
  232. package/dist/core/ingestion/named-binding-extraction.js +0 -363
  233. package/dist/core/ingestion/resolvers/index.d.ts +0 -18
  234. package/dist/core/ingestion/resolvers/index.js +0 -13
  235. package/dist/core/ingestion/resolvers/jvm.js +0 -87
  236. package/dist/core/ingestion/resolvers/php.d.ts +0 -15
  237. package/dist/core/ingestion/resolvers/php.js +0 -35
  238. package/dist/core/ingestion/type-extractors/index.d.ts +0 -22
  239. package/dist/core/ingestion/type-extractors/index.js +0 -31
  240. package/dist/core/ingestion/utils.d.ts +0 -138
  241. package/dist/core/ingestion/utils.js +0 -1290
  242. package/scripts/patch-tree-sitter-swift.cjs +0 -74
@@ -0,0 +1,1509 @@
1
+ /**
2
+ * COBOL source pre-processing and regex-based symbol extraction.
3
+ *
4
+ * DESIGN DECISION — Why regex instead of a full parser (ANTLR4, tree-sitter):
5
+ *
6
+ * 1. Performance: Regex processes ~1ms/file vs 50-200ms/file for ANTLR4/tree-sitter.
7
+ * On EPAGHE (14k COBOL files), this is ~14 seconds vs 12-47 minutes.
8
+ *
9
+ * 2. Reliability: tree-sitter-cobol@0.0.1's external scanner hangs indefinitely
10
+ * on ~5% of production files (no timeout possible). ANTLR4's proleap-cobol-parser
11
+ * is a Java project — using it from Node.js requires Java subprocesses or
12
+ * extracting .g4 grammars and generating JS/TS targets (significant effort).
13
+ *
14
+ * 3. Dialect compatibility: GnuCOBOL with Italian comments, patch markers in
15
+ * cols 1-6 (mzADD, estero, etc.), and vendor extensions. Formal grammars
16
+ * target COBOL-85 and would need dialect modifications.
17
+ *
18
+ * 4. Industry precedent: ctags, GitHub code navigation, and Sourcegraph all use
19
+ * regex-based extraction for code indexing. Full parsing is only needed for
20
+ * compilation or semantic analysis, not symbol extraction.
21
+ *
22
+ * 5. Determinism: Every regex pattern is tested with canonical COBOL input
23
+ * (see test/unit/cobol-preprocessor.test.ts). Same input always produces
24
+ * same output — no grammar ambiguity or parser state issues.
25
+ *
26
+ * This module provides:
27
+ * 1. preprocessCobolSource() — cleans patch markers (kept for potential future use)
28
+ * 2. extractCobolSymbolsWithRegex() — single-pass state machine COBOL extraction
29
+ */
30
+ // ---------------------------------------------------------------------------
31
+ // Preserved exactly: preprocessCobolSource
32
+ // ---------------------------------------------------------------------------
33
+ /**
34
+ * Normalize COBOL source for regex-based extraction.
35
+ *
36
+ * The COBOL fixed-format sequence number area (columns 1-6) is semantically
37
+ * irrelevant to parsing — compilers and tools always ignore it. This
38
+ * function replaces ANY non-space content in columns 1-6 with spaces
39
+ * so that position-sensitive regexes (paragraph/section detection, data-item
40
+ * anchors, etc.) work identically whether the file carries numeric sequence
41
+ * numbers (000100), alphabetic patch markers (mzADD, estero, #patch), or
42
+ * the COBOL default of all spaces.
43
+ *
44
+ * Preserves exact line count for position mapping.
45
+ */
46
+ export function preprocessCobolSource(content) {
47
+ // Skip preprocessing for free-format COBOL — cols 1-6 are program text, not sequence area
48
+ // Check first 10 lines (consistent with extractCobolSymbolsWithRegex detection threshold)
49
+ const firstLines = content.split('\n', 10).join('\n');
50
+ if (/>>SOURCE\s+(?:FORMAT\s+(?:IS\s+)?)?FREE/i.test(firstLines)) {
51
+ return content;
52
+ }
53
+ const lines = content.split(/\r?\n/);
54
+ for (let i = 0; i < lines.length; i++) {
55
+ const line = lines[i];
56
+ if (line.length < 7)
57
+ continue;
58
+ const seq = line.substring(0, 6);
59
+ // Replace any non-space content in the sequence area with spaces.
60
+ // This covers numeric sequence numbers (000100), alphabetic patch markers
61
+ // (mzADD, estero), '#'-prefixed markers, and all other col 1-6 content.
62
+ if (/\S/.test(seq)) {
63
+ lines[i] = ' ' + line.substring(6);
64
+ }
65
+ }
66
+ return lines.join('\n');
67
+ }
68
+ // ---------------------------------------------------------------------------
69
+ // Preserved exactly: EXCLUDED_PARA_NAMES
70
+ // ---------------------------------------------------------------------------
71
+ // COBOL calling-convention keywords to filter from USING parameter lists
72
+ const USING_KEYWORDS = new Set(['BY', 'VALUE', 'REFERENCE', 'CONTENT', 'ADDRESS', 'OF', 'RETURNING']);
73
+ // CALL ... USING keyword filter (extends USING_KEYWORDS for CALL-specific forms)
74
+ const CALL_USING_FILTER = new Set([
75
+ 'BY', 'REFERENCE', 'CONTENT', 'VALUE',
76
+ 'ADDRESS', 'OF', 'LENGTH', 'OMITTED',
77
+ ]);
78
+ const EXCLUDED_PARA_NAMES = new Set([
79
+ 'DECLARATIVES', 'END', 'PROCEDURE', 'IDENTIFICATION',
80
+ 'ENVIRONMENT', 'DATA', 'WORKING-STORAGE', 'LINKAGE',
81
+ 'FILE', 'LOCAL-STORAGE', 'COMMUNICATION', 'REPORT',
82
+ 'SCREEN', 'INPUT-OUTPUT', 'CONFIGURATION',
83
+ // COBOL verbs that appear alone on a line with period (false-positive in free-format)
84
+ 'GOBACK', 'STOP', 'EXIT', 'CONTINUE',
85
+ 'DISPLAY', 'ACCEPT', 'WRITE', 'READ', 'REWRITE', 'DELETE',
86
+ 'OPEN', 'CLOSE', 'RETURN', 'RELEASE', 'SORT', 'MERGE',
87
+ ]);
88
+ // ---------------------------------------------------------------------------
89
+ // Regex constants (compiled once, reused across calls)
90
+ // ---------------------------------------------------------------------------
91
+ const RE_DIVISION = /\b(IDENTIFICATION|ENVIRONMENT|DATA|PROCEDURE)\s+DIVISION\b/i;
92
+ const RE_SECTION = /\b(WORKING-STORAGE|LINKAGE|FILE|LOCAL-STORAGE|SCREEN|INPUT-OUTPUT|CONFIGURATION)\s+SECTION\b/i;
93
+ // IDENTIFICATION DIVISION
94
+ const RE_PROGRAM_ID = /\bPROGRAM-ID\.\s*([A-Z][A-Z0-9-]*)(?:\s+IS\s+COMMON)?/i;
95
+ const RE_END_PROGRAM = /\bEND\s+PROGRAM\s+([A-Z][A-Z0-9-]*)\s*\./i;
96
+ const RE_AUTHOR = /^\s+AUTHOR\.\s*(.+)/i;
97
+ const RE_DATE_WRITTEN = /^\s+DATE-WRITTEN\.\s*(.+)/i;
98
+ const RE_DATE_COMPILED = /^\s+DATE-COMPILED\.\s*(.+)/i;
99
+ const RE_INSTALLATION = /^\s+INSTALLATION\.\s*(.+)/i;
100
+ // ENVIRONMENT DIVISION — SELECT
101
+ const RE_SELECT_START = /\bSELECT\s+(?:OPTIONAL\s+)?([A-Z][A-Z0-9-]+)/i;
102
+ // DATA DIVISION
103
+ // ^\s* (not ^\s+) to support both fixed-format (indented) and free-format (trimmed)
104
+ const RE_FD = /^\s*(?:FD|SD|RD)\s+([A-Z][A-Z0-9-]+)/i;
105
+ const RE_DATA_ITEM = /^\s*(\d{1,2})\s+([A-Z][A-Z0-9-]+)\s*(.*)/i;
106
+ const RE_ANONYMOUS_REDEFINES = /^\s*(\d{1,2})\s+REDEFINES\s+([A-Z][A-Z0-9-]+)/i;
107
+ const RE_88_LEVEL = /^\s*88\s+([A-Z][A-Z0-9-]+)\s+VALUES?\s+(?:ARE\s+)?(.+)/i;
108
+ // PROCEDURE DIVISION
109
+ // These patterns support both fixed-format (7 leading spaces) and free-format (any indentation)
110
+ const RE_PROC_SECTION = /^\s*([A-Z][A-Z0-9-]+)\s+SECTION(?:\s+\d+)?\.\s*$/i;
111
+ const RE_PROC_PARAGRAPH = /^\s*([A-Z][A-Z0-9-]+)\.\s*$/i;
112
+ const RE_PERFORM = /\bPERFORM\s+([A-Z][A-Z0-9-]+)(?:\s+(?:THRU|THROUGH)\s+([A-Z][A-Z0-9-]+))?/gi;
113
+ // ALL DIVISIONS
114
+ // Both double-quoted ("PROG") and single-quoted ('PROG') targets are valid COBOL.
115
+ // Use separate alternation groups so quotes must match (prevents "PROG' false-matches).
116
+ const RE_CALL = /\bCALL\s+(?:"([^"]+)"|'([^']+)')/gi;
117
+ // Dynamic CALL via data item (no quotes): CALL WS-PROGRAM-NAME
118
+ const RE_CALL_DYNAMIC = /(?<![A-Z0-9-])\bCALL\s+([A-Z][A-Z0-9-]+)(?=\s|\.|$)/gi;
119
+ const RE_COPY_UNQUOTED = /\bCOPY\s+([A-Z][A-Z0-9-]+)(?:\s|\.)/i;
120
+ const RE_COPY_QUOTED = /\bCOPY\s+(?:"([^"]+)"|'([^']+)')(?:\s|\.)/i;
121
+ // EXEC blocks
122
+ const RE_EXEC_SQL_START = /\bEXEC\s+SQL\b/i;
123
+ const RE_EXEC_CICS_START = /\bEXEC\s+CICS\b/i;
124
+ const RE_END_EXEC = /\bEND-EXEC\b/i;
125
+ // GO TO — control flow transfer (same graph semantics as PERFORM)
126
+ // GO TO — captures first target; GO TO p1 p2 p3 DEPENDING ON x handled below
127
+ const RE_GOTO = /\bGO\s+TO\s+([A-Z][A-Z0-9-]+(?:\s+[A-Z][A-Z0-9-]+)*?)(?:\s+DEPENDING\s+ON\s+[A-Z][A-Z0-9-]+)?(?:\s*\.|$)/i;
128
+ // SORT/MERGE file references
129
+ const RE_SORT = /\bSORT\s+([A-Z][A-Z0-9-]+)/i;
130
+ const RE_MERGE = /\bMERGE\s+([A-Z][A-Z0-9-]+)/i;
131
+ // SEARCH — table access
132
+ const RE_SEARCH = /\bSEARCH\s+(?:ALL\s+)?([A-Z][A-Z0-9-]+)/i;
133
+ // CANCEL — program lifecycle
134
+ const RE_CANCEL = /\bCANCEL\s+(?:"([^"]+)"|'([^']+)')/gi;
135
+ const RE_CANCEL_DYNAMIC = /(?<![A-Z0-9-])\bCANCEL\s+([A-Z][A-Z0-9-]+)(?=\s|\.|$)/gi;
136
+ // Level 66 RENAMES
137
+ const RE_66_LEVEL = /^\s*66\s+([A-Z][A-Z0-9-]+)\s+RENAMES\s+([A-Z][A-Z0-9-]+)/i;
138
+ // DECLARATIVES boundary and USE AFTER EXCEPTION
139
+ const RE_DECLARATIVES_START = /^\s*DECLARATIVES\s*\.\s*$/i;
140
+ const RE_DECLARATIVES_END = /^\s*END\s+DECLARATIVES\s*\.\s*$/i;
141
+ const RE_USE_AFTER = /\bUSE\s+(?:AFTER\s+)?(?:STANDARD\s+)?(?:EXCEPTION|ERROR)\s+ON\s+([A-Z][A-Z0-9-]+|INPUT|OUTPUT|I-O|EXTEND)\b/i;
142
+ // SET statement (condition, index)
143
+ const RE_SET_TO_TRUE = /\bSET\s+((?:[A-Z][A-Z0-9-]+(?:\s+OF\s+[A-Z][A-Z0-9-]+)?\s+)+)TO\s+TRUE\b/i;
144
+ const RE_SET_INDEX = /\bSET\s+((?:[A-Z][A-Z0-9-]+\s+)+)(TO|UP\s+BY|DOWN\s+BY)\s+(\d+|[A-Z][A-Z0-9-]+)/i;
145
+ // INITIALIZE statement — data reset (captures targets before REPLACING/WITH clause)
146
+ const RE_INITIALIZE = /\bINITIALIZE\s+([\s\S]*?)(?=\bREPLACING\b|\bWITH\b|\.\s*$|$)/i;
147
+ const INITIALIZE_CLAUSE_KEYWORDS = new Set([
148
+ 'REPLACING', 'WITH', 'ALL', 'ALPHABETIC', 'ALPHANUMERIC',
149
+ 'NUMERIC', 'NATIONAL', 'DBCS', 'EGCS', 'FILLER',
150
+ ]);
151
+ // EXEC DLI (IMS/DB)
152
+ const RE_EXEC_DLI_START = /\bEXEC\s+DLI\b/i;
153
+ // PROCEDURE DIVISION USING
154
+ const RE_PROC_USING = /\bPROCEDURE\s+DIVISION\s+USING\s+([\s\S]*?)(?:\.|$)/i;
155
+ // ENTRY point
156
+ const RE_ENTRY = /\bENTRY\s+(?:"([^"]+)"|'([^']+)')(?:\s+USING\s+([\s\S]*?))?(?:\.|$)/i;
157
+ // MOVE statement — captures everything after TO for multi-target extraction
158
+ const RE_MOVE = /\bMOVE\s+((?:CORRESPONDING|CORR)\s+)?([A-Z][A-Z0-9-]+)\s+TO\s+(.+)/i;
159
+ const MOVE_SKIP = new Set([
160
+ 'SPACES', 'ZEROS', 'ZEROES', 'LOW-VALUES', 'LOW-VALUE',
161
+ 'HIGH-VALUES', 'HIGH-VALUE', 'QUOTES', 'QUOTE', 'ALL',
162
+ ]);
163
+ /**
164
+ * Parse the text after "MOVE ... TO" into an array of target variable names.
165
+ * Handles: multiple targets, OF/IN qualifiers, subscripts, trailing periods.
166
+ * MOVE CORRESPONDING is always single-target per COBOL standard.
167
+ */
168
+ function extractMoveTargets(afterTo) {
169
+ // Strip trailing period and everything after it
170
+ const text = afterTo.replace(/\..*$/, '').trim();
171
+ if (!text)
172
+ return [];
173
+ // Remove subscript/reference-modification parenthesized suffixes
174
+ const noSubscripts = text.replace(/\([^)]*\)/g, '');
175
+ const tokens = noSubscripts.split(/\s+/).filter(t => t.length > 0);
176
+ const targets = [];
177
+ const QUAL_KEYWORDS = new Set(['OF', 'IN']);
178
+ let skipNext = false;
179
+ for (const token of tokens) {
180
+ if (skipNext) {
181
+ skipNext = false;
182
+ continue;
183
+ }
184
+ if (QUAL_KEYWORDS.has(token.toUpperCase())) {
185
+ skipNext = true;
186
+ continue;
187
+ }
188
+ if (/^[A-Z][A-Z0-9-]+$/i.test(token) && !MOVE_SKIP.has(token.toUpperCase())) {
189
+ targets.push(token);
190
+ }
191
+ }
192
+ return targets;
193
+ }
194
+ // PERFORM: keywords that may follow PERFORM but are NOT paragraph/section names.
195
+ // Inline PERFORM loops (UNTIL, VARYING) and inline test clauses (WITH TEST,
196
+ // FOREVER) must not be stored as perform-target false positives.
197
+ const PERFORM_KEYWORD_SKIP = new Set([
198
+ 'UNTIL', 'VARYING', 'WITH', 'TEST', 'FOREVER',
199
+ ]);
200
+ // SORT/MERGE clause keywords that should not be captured as file names
201
+ const SORT_CLAUSE_NOISE = new Set([
202
+ 'ON', 'ASCENDING', 'DESCENDING', 'KEY', 'WITH', 'DUPLICATES',
203
+ 'IN', 'ORDER', 'COLLATING', 'SEQUENCE', 'IS', 'THROUGH', 'THRU',
204
+ 'INPUT', 'OUTPUT', 'PROCEDURE', 'USING', 'GIVING',
205
+ ]);
206
+ // COBOL statement verbs used as boundary detectors across accumulators.
207
+ // Shared by: callAccum flush trigger, inspectAccum flush trigger, and USING lookahead.
208
+ // Note: CALL is intentionally excluded — it's handled by the callAccum state machine.
209
+ // Including CALL here would cause the flush trigger to consume the new CALL line
210
+ // without re-detecting it as a CALL start.
211
+ const COBOL_STATEMENT_VERBS = [
212
+ 'GO\\s+TO', 'PERFORM', 'MOVE', 'DISPLAY', 'ACCEPT',
213
+ 'INSPECT', 'SEARCH', 'SORT', 'MERGE', 'IF', 'EVALUATE',
214
+ 'SET', 'INITIALIZE', 'STOP', 'EXIT', 'GOBACK', 'CONTINUE',
215
+ 'READ', 'WRITE', 'REWRITE', 'DELETE', 'OPEN', 'CLOSE', 'START',
216
+ 'CANCEL', 'COMPUTE', 'ADD', 'SUBTRACT', 'MULTIPLY', 'DIVIDE',
217
+ 'STRING', 'UNSTRING',
218
+ ];
219
+ /** Regex matching start of any COBOL statement verb (for accumulator flush triggers). */
220
+ const RE_STATEMENT_VERB_START = new RegExp(`^(?:${COBOL_STATEMENT_VERBS.join('|')})(?:\\s|$)`, 'i');
221
+ /** Lookahead alternation for USING parameter extraction (stops before statement verbs).
222
+ * Includes CALL (excluded from COBOL_STATEMENT_VERBS to avoid callAccum conflicts). */
223
+ const USING_VERB_LOOKAHEAD = [...COBOL_STATEMENT_VERBS, 'CALL']
224
+ .filter(v => v !== 'GO\\s+TO') // GO TO handled separately with \bGO\s+TO\b
225
+ .map(v => `\\b${v}(?=\\s|$)`)
226
+ .join('|');
227
+ const RE_USING_PARAMS = new RegExp(`\\bUSING\\s+([\\s\\S]*?)(?=\\bRETURNING\\b|\\bON\\s+(?:EXCEPTION|OVERFLOW)\\b|\\bNOT\\s+ON\\b|\\bEND-CALL\\b|\\bGO\\s+TO\\b|${USING_VERB_LOOKAHEAD}|\\.\\s*$|$)`, 'i');
228
+ // ---------------------------------------------------------------------------
229
+ // Private helper: strip Italian inline comments (| and everything after)
230
+ // ---------------------------------------------------------------------------
231
+ function stripInlineComment(line) {
232
+ let inQuote = null;
233
+ for (let i = 0; i < line.length; i++) {
234
+ const ch = line[i];
235
+ if (inQuote) {
236
+ if (ch === inQuote)
237
+ inQuote = null;
238
+ }
239
+ else if (ch === '"' || ch === "'") {
240
+ inQuote = ch;
241
+ }
242
+ else if (ch === '|') {
243
+ return line.substring(0, i);
244
+ }
245
+ }
246
+ return line;
247
+ }
248
+ // ---------------------------------------------------------------------------
249
+ // Private helper: parse data item trailing clauses (PIC, USAGE, etc.)
250
+ // ---------------------------------------------------------------------------
251
+ function parseDataItemClauses(rest) {
252
+ const result = {};
253
+ // Strip trailing period for easier parsing
254
+ const text = rest.replace(/\.\s*$/, '');
255
+ // PIC / PICTURE [IS] <picture-string>
256
+ const picMatch = text.match(/\bPIC(?:TURE)?\s+(?:IS\s+)?(\S+)/i);
257
+ if (picMatch) {
258
+ result.pic = picMatch[1];
259
+ }
260
+ // USAGE [IS] <usage-type> — including non-standard COMP-6, COMP-X etc.
261
+ const usageMatch = text.match(/\bUSAGE\s+(?:IS\s+)?(COMP(?:UTATIONAL)?(?:-[0-9X])?|BINARY|PACKED-DECIMAL|DISPLAY|INDEX|POINTER|NATIONAL)\b/i);
262
+ if (usageMatch) {
263
+ result.usage = usageMatch[1].toUpperCase();
264
+ }
265
+ else {
266
+ // Standalone COMP variants without USAGE keyword
267
+ const compMatch = text.match(/\b(COMP(?:UTATIONAL)?(?:-[0-9X])?|BINARY|PACKED-DECIMAL)\b/i);
268
+ if (compMatch) {
269
+ result.usage = compMatch[1].toUpperCase();
270
+ }
271
+ }
272
+ // REDEFINES <name>
273
+ const redefMatch = text.match(/\bREDEFINES\s+([A-Z][A-Z0-9-]+)/i);
274
+ if (redefMatch) {
275
+ result.redefines = redefMatch[1];
276
+ }
277
+ // OCCURS <n> [TO <m>] [TIMES] [DEPENDING ON <field>]
278
+ const occursMatch = text.match(/\bOCCURS\s+(\d+)(?:\s+TO\s+(\d+))?\s*(?:TIMES\s*)?(?:DEPENDING\s+ON\s+([A-Z][A-Z0-9-]+(?:\s*\([^)]*\))?))?/i);
279
+ if (occursMatch) {
280
+ result.occurs = parseInt(occursMatch[1], 10);
281
+ if (occursMatch[3]) {
282
+ // Strip any subscript from DEPENDING ON field
283
+ result.dependingOn = occursMatch[3].replace(/\s*\([^)]*\)/, '').trim();
284
+ }
285
+ }
286
+ // IS EXTERNAL / IS GLOBAL
287
+ result.isExternal = /\bIS\s+EXTERNAL\b/i.test(text) || undefined;
288
+ result.isGlobal = /\bIS\s+GLOBAL\b/i.test(text) || undefined;
289
+ // VALUE [IS] literal/constant
290
+ if (!result.value) {
291
+ const valueIdx = text.search(/\bVALUE\b/i);
292
+ if (valueIdx >= 0) {
293
+ const afterValue = text.substring(valueIdx + 5).replace(/^\s+IS\s+/i, '').trimStart();
294
+ // Try quoted: "..." or '...' (with optional type prefix X, N, G, B)
295
+ const quotedMatch = afterValue.match(/^([XNGB])?(?:"([^"]*)"|'([^']*)')/i);
296
+ if (quotedMatch) {
297
+ const prefix = quotedMatch[1] ? quotedMatch[1].toUpperCase() : '';
298
+ result.value = prefix ? `${prefix}'${quotedMatch[2] ?? quotedMatch[3]}'` : (quotedMatch[2] ?? quotedMatch[3]);
299
+ }
300
+ else {
301
+ // Try ALL "..." or ALL '...'
302
+ const allMatch = afterValue.match(/^ALL\s+(?:"([^"]*)"|'([^']*)')/i);
303
+ if (allMatch) {
304
+ result.value = `ALL '${allMatch[1] ?? allMatch[2]}'`;
305
+ }
306
+ else {
307
+ // Try numeric (including negative, decimal)
308
+ const numMatch = afterValue.match(/^(-?\d+\.?\d*)/);
309
+ if (numMatch) {
310
+ result.value = numMatch[1];
311
+ }
312
+ else {
313
+ // Try figurative constant or identifier
314
+ const identMatch = afterValue.match(/^([A-Z][A-Z0-9-]*)/i);
315
+ if (identMatch)
316
+ result.value = identMatch[1].toUpperCase();
317
+ }
318
+ }
319
+ }
320
+ }
321
+ }
322
+ return result;
323
+ }
324
+ // ---------------------------------------------------------------------------
325
+ // Private helper: parse 88-level condition values
326
+ // ---------------------------------------------------------------------------
327
+ function parseConditionValues(valuesStr) {
328
+ // Strip trailing period
329
+ const text = valuesStr.replace(/\.\s*$/, '').trim();
330
+ const values = [];
331
+ // Match quoted strings: "O" "Y" "I"
332
+ const quotedRe = /(?:"([^"]*)"|'([^']*)')/g;
333
+ let qm;
334
+ let hasQuoted = false;
335
+ while ((qm = quotedRe.exec(text)) !== null) {
336
+ values.push(qm[1] ?? qm[2]);
337
+ hasQuoted = true;
338
+ }
339
+ if (hasQuoted)
340
+ return values;
341
+ // No quotes — split on whitespace, filtering out THRU/THROUGH keywords
342
+ // Handle: 11 12 16 17 21 or 1 THRU 5
343
+ const tokens = text.split(/\s+/);
344
+ for (const token of tokens) {
345
+ const upper = token.toUpperCase();
346
+ if (upper === 'THRU' || upper === 'THROUGH') {
347
+ // Keep THRU ranges as combined value: prev THRU next is already captured
348
+ // by having both sides in the array
349
+ continue;
350
+ }
351
+ if (token.length > 0) {
352
+ values.push(token);
353
+ }
354
+ }
355
+ return values;
356
+ }
357
+ function parseSelectStatement(stmt, startLine) {
358
+ // Normalize whitespace
359
+ const text = stmt.replace(/\s+/g, ' ').trim();
360
+ const nameMatch = text.match(/^SELECT\s+(?:OPTIONAL\s+)?([A-Z][A-Z0-9-]+)/i);
361
+ if (!nameMatch)
362
+ return null;
363
+ const result = {
364
+ selectName: nameMatch[1],
365
+ assignTo: '',
366
+ line: startLine,
367
+ };
368
+ const assignMatch = text.match(/\bASSIGN\s+(?:TO\s+)?("([^"]+)"|([A-Z][A-Z0-9-]*))/i);
369
+ if (assignMatch) {
370
+ result.assignTo = assignMatch[2] || assignMatch[3] || '';
371
+ }
372
+ const orgMatch = text.match(/\bORGANIZATION\s+(?:IS\s+)?(SEQUENTIAL|INDEXED|RELATIVE|LINE\s+SEQUENTIAL)/i);
373
+ if (orgMatch) {
374
+ result.organization = orgMatch[1].toUpperCase();
375
+ }
376
+ const accessMatch = text.match(/\bACCESS\s+(?:MODE\s+)?(?:IS\s+)?(SEQUENTIAL|RANDOM|DYNAMIC)/i);
377
+ if (accessMatch) {
378
+ result.access = accessMatch[1].toUpperCase();
379
+ }
380
+ const keyMatch = text.match(/\bRECORD\s+KEY\s+(?:IS\s+)?([A-Z][A-Z0-9-]+)/i);
381
+ if (keyMatch) {
382
+ result.recordKey = keyMatch[1];
383
+ }
384
+ // ALTERNATE RECORD KEY
385
+ const altKeyMatches = text.matchAll(/\bALTERNATE\s+RECORD\s+KEY\s+(?:IS\s+)?([A-Z][A-Z0-9-]+)/gi);
386
+ const alternateKeys = [];
387
+ for (const m of altKeyMatches)
388
+ alternateKeys.push(m[1]);
389
+ if (alternateKeys.length > 0)
390
+ result.alternateKeys = alternateKeys;
391
+ // FILE STATUS IS / STATUS IS
392
+ const statusMatch = text.match(/\b(?:FILE\s+)?STATUS\s+(?:IS\s+)?([A-Z][A-Z0-9-]+)/i);
393
+ if (statusMatch) {
394
+ result.fileStatus = statusMatch[1];
395
+ }
396
+ // SELECT OPTIONAL flag
397
+ result.isOptional = /^SELECT\s+OPTIONAL\b/i.test(text) || undefined;
398
+ return result;
399
+ }
400
+ function parseExecSqlBlock(block, line) {
401
+ // Strip EXEC SQL ... END-EXEC wrapper
402
+ const body = block
403
+ .replace(/\bEXEC\s+SQL\b/i, '')
404
+ .replace(/\bEND-EXEC\b/i, '')
405
+ .replace(/\s+/g, ' ')
406
+ .trim();
407
+ // Determine operation from first SQL keyword
408
+ const firstWord = body.split(/\s+/)[0]?.toUpperCase() || '';
409
+ const OP_MAP = {
410
+ SELECT: 'SELECT', INSERT: 'INSERT', UPDATE: 'UPDATE', DELETE: 'DELETE',
411
+ DECLARE: 'DECLARE', OPEN: 'OPEN', CLOSE: 'CLOSE', FETCH: 'FETCH',
412
+ INCLUDE: 'OTHER', // we handle INCLUDE specially below
413
+ };
414
+ const operation = OP_MAP[firstWord] || 'OTHER';
415
+ // EXEC SQL INCLUDE — extract member name for IMPORTS edge
416
+ let includeMember;
417
+ if (firstWord === 'INCLUDE') {
418
+ const includeMatch = body.match(/^INCLUDE\s+(?:'([^']+)'|"([^"]+)"|([A-Z][A-Z0-9_-]+))/i);
419
+ if (includeMatch) {
420
+ includeMember = includeMatch[1] ?? includeMatch[2] ?? includeMatch[3];
421
+ }
422
+ }
423
+ // Extract table names from FROM, INTO (INSERT), UPDATE, DELETE FROM, JOIN
424
+ const tables = [];
425
+ const tablePatterns = [
426
+ /\bFROM\s+([A-Z][A-Z0-9_]+)/gi,
427
+ /\bINSERT\s+INTO\s+([A-Z][A-Z0-9_]+)/gi,
428
+ /\bUPDATE\s+([A-Z][A-Z0-9_]+)/gi,
429
+ /\bJOIN\s+([A-Z][A-Z0-9_]+)/gi,
430
+ ];
431
+ for (const re of tablePatterns) {
432
+ let m;
433
+ while ((m = re.exec(body)) !== null) {
434
+ const name = m[1].toUpperCase();
435
+ // Skip host variables and SQL keywords
436
+ if (!name.startsWith(':') && !tables.includes(name)) {
437
+ tables.push(name);
438
+ }
439
+ }
440
+ }
441
+ // Extract cursor names from DECLARE ... CURSOR
442
+ const cursors = [];
443
+ const cursorRe = /\bDECLARE\s+([A-Z][A-Z0-9_-]+)\s+CURSOR\b/gi;
444
+ let cm;
445
+ while ((cm = cursorRe.exec(body)) !== null) {
446
+ cursors.push(cm[1]);
447
+ }
448
+ // Extract host variables: :VARIABLE-NAME (strip the colon)
449
+ const hostVariables = [];
450
+ const hostRe = /:([A-Z][A-Z0-9-]+)/gi;
451
+ let hm;
452
+ while ((hm = hostRe.exec(body)) !== null) {
453
+ const name = hm[1];
454
+ if (!hostVariables.includes(name)) {
455
+ hostVariables.push(name);
456
+ }
457
+ }
458
+ return { line, tables, cursors, hostVariables, operation, includeMember };
459
+ }
460
+ // ---------------------------------------------------------------------------
461
+ // Private helper: parse EXEC CICS block
462
+ // ---------------------------------------------------------------------------
463
+ function parseExecCicsBlock(block, line) {
464
+ // Strip EXEC CICS ... END-EXEC wrapper
465
+ const body = block
466
+ .replace(/\bEXEC\s+CICS\b/i, '')
467
+ .replace(/\bEND-EXEC\b/i, '')
468
+ .replace(/\s+/g, ' ')
469
+ .trim();
470
+ // Command: first keyword(s) — handle two-word commands like SEND MAP, RECEIVE MAP
471
+ const twoWordCommands = [
472
+ 'SEND MAP', 'RECEIVE MAP', 'SEND TEXT', 'SEND CONTROL',
473
+ 'READ NEXT', 'READ PREV',
474
+ 'WRITEQ TS', 'WRITEQ TD', 'READQ TS', 'READQ TD',
475
+ 'DELETEQ TS', 'DELETEQ TD',
476
+ 'HANDLE ABEND', 'HANDLE AID', 'HANDLE CONDITION',
477
+ 'START TRANSID',
478
+ ];
479
+ let command = '';
480
+ const upperBody = body.toUpperCase();
481
+ for (const twoWord of twoWordCommands) {
482
+ if (upperBody.startsWith(twoWord)) {
483
+ command = twoWord;
484
+ break;
485
+ }
486
+ }
487
+ if (!command) {
488
+ command = body.split(/\s+/)[0]?.toUpperCase() || '';
489
+ }
490
+ const result = { line, command };
491
+ // MAP name: MAP('name') or MAP("name") or MAP(IDENTIFIER)
492
+ const mapMatch = body.match(/\bMAP\s*\(\s*(?:['"]([^'"]+)['"]|([A-Z][A-Z0-9-]+))\s*\)/i);
493
+ if (mapMatch)
494
+ result.mapName = mapMatch[1] ?? mapMatch[2];
495
+ // PROGRAM name: PROGRAM('name') or PROGRAM("name") or PROGRAM(VARIABLE)
496
+ const progMatch = body.match(/\bPROGRAM\s*\(\s*(?:['"]([^'"]+)['"]|([A-Z][A-Z0-9-]+))\s*\)/i);
497
+ if (progMatch) {
498
+ result.programName = progMatch[1] ?? progMatch[2];
499
+ result.programIsLiteral = !!progMatch[1];
500
+ }
501
+ // TRANSID: TRANSID('name') or TRANSID("name") or TRANSID(VARIABLE)
502
+ const transMatch = body.match(/\bTRANSID\s*\(\s*(?:['"]([^'"]+)['"]|([A-Z][A-Z0-9-]+))\s*\)/i);
503
+ if (transMatch)
504
+ result.transId = transMatch[1] ?? transMatch[2];
505
+ // FILE/DATASET: FILE('name') or DATASET('name') or FILE(VARIABLE)
506
+ // Used in CICS READ, WRITE, REWRITE, DELETE, STARTBR, READNEXT, READPREV, ENDBR
507
+ const fileMatch = body.match(/\b(?:FILE|DATASET)\s*\(\s*(?:['"]([^'"]+)['"]|([A-Z][A-Z0-9-]+))\s*\)/i);
508
+ if (fileMatch) {
509
+ result.fileName = fileMatch[1] ?? fileMatch[2];
510
+ result.fileIsLiteral = !!fileMatch[1];
511
+ }
512
+ // QUEUE: QUEUE('name') — used in WRITEQ/READQ TS/TD
513
+ const queueMatch = body.match(/\bQUEUE\s*\(\s*(?:['"]([^'"]+)['"]|([A-Z][A-Z0-9-]+))\s*\)/i);
514
+ if (queueMatch)
515
+ result.queueName = queueMatch[1] ?? queueMatch[2];
516
+ // HANDLE ABEND LABEL(paragraph-name) — error handler target
517
+ const labelMatch = body.match(/\bLABEL\s*\(\s*([A-Z][A-Z0-9-]+)\s*\)/i);
518
+ if (labelMatch)
519
+ result.labelName = labelMatch[1];
520
+ // INTO(data-area) — data target (READ INTO, RECEIVE INTO, RETRIEVE INTO, READQ INTO)
521
+ const intoMatch = body.match(/\bINTO\s*\(\s*([A-Z][A-Z0-9-]+)\s*\)/i);
522
+ if (intoMatch)
523
+ result.intoField = intoMatch[1];
524
+ // FROM(data-area) — data source (WRITE FROM, SEND FROM, WRITEQ FROM, START FROM)
525
+ const fromMatch = body.match(/\bFROM\s*\(\s*([A-Z][A-Z0-9-]+)\s*\)/i);
526
+ if (fromMatch)
527
+ result.fromField = fromMatch[1];
528
+ return result;
529
+ }
530
+ // ---------------------------------------------------------------------------
531
+ // Private helper: parse EXEC DLI block (IMS/DB)
532
+ // ---------------------------------------------------------------------------
533
+ function parseExecDliBlock(block, line) {
534
+ const body = block.replace(/\bEXEC\s+DLI\b/i, '').replace(/\bEND-EXEC\b/i, '').replace(/\s+/g, ' ').trim();
535
+ const verb = body.split(/\s+/)[0]?.toUpperCase() || '';
536
+ const result = { line, verb };
537
+ const pcbMatch = body.match(/\bUSING\s+PCB\s*\(\s*(\d+)\s*\)/i);
538
+ if (pcbMatch)
539
+ result.pcbNumber = parseInt(pcbMatch[1], 10);
540
+ const segMatch = body.match(/\bSEGMENT\s*\(\s*([A-Z][A-Z0-9-]*)\s*\)/i);
541
+ if (segMatch)
542
+ result.segmentName = segMatch[1];
543
+ const intoMatch = body.match(/\bINTO\s*\(\s*([A-Z][A-Z0-9-]+)\s*\)/i);
544
+ if (intoMatch)
545
+ result.intoField = intoMatch[1];
546
+ const fromMatch = body.match(/\bFROM\s*\(\s*([A-Z][A-Z0-9-]+)\s*\)/i);
547
+ if (fromMatch)
548
+ result.fromField = fromMatch[1];
549
+ const psbMatch = body.match(/\bPSB\s*\(\s*([A-Z][A-Z0-9-]+)\s*\)/i);
550
+ if (psbMatch)
551
+ result.psbName = psbMatch[1];
552
+ return result;
553
+ }
554
+ // ---------------------------------------------------------------------------
555
+ // Main extraction: single-pass state machine
556
+ // ---------------------------------------------------------------------------
557
+ /**
558
+ * Extract COBOL symbols using a single-pass state machine.
559
+ * Extracts program name, paragraphs, sections, CALL, PERFORM, COPY,
560
+ * data items, file declarations, FD entries, and program metadata.
561
+ */
562
+ export function extractCobolSymbolsWithRegex(content, _filePath) {
563
+ const rawLines = content.split(/\r?\n/);
564
+ const result = {
565
+ programName: null,
566
+ programs: [],
567
+ paragraphs: [],
568
+ sections: [],
569
+ performs: [],
570
+ calls: [],
571
+ copies: [],
572
+ dataItems: [],
573
+ fileDeclarations: [],
574
+ fdEntries: [],
575
+ programMetadata: {},
576
+ execSqlBlocks: [],
577
+ execCicsBlocks: [],
578
+ procedureUsing: [],
579
+ entryPoints: [],
580
+ moves: [],
581
+ gotos: [],
582
+ sorts: [],
583
+ searches: [],
584
+ cancels: [],
585
+ execDliBlocks: [],
586
+ declaratives: [],
587
+ sets: [],
588
+ inspects: [],
589
+ initializes: [],
590
+ };
591
+ // --- State ---
592
+ let currentDivision = null;
593
+ let currentDataSection = 'unknown';
594
+ let currentEnvSection = null;
595
+ let currentParagraph = null;
596
+ // Program boundary stack for nested PROGRAM-ID / END PROGRAM tracking
597
+ const programBoundaryStack = [];
598
+ // SELECT accumulator (multi-line)
599
+ let selectAccum = null;
600
+ let selectStartLine = 0;
601
+ // PROCEDURE DIVISION USING on next line
602
+ let pendingProcUsing = false;
603
+ // SORT/MERGE accumulator (multi-line SORT ... USING ... GIVING ...)
604
+ let sortAccum = null;
605
+ let sortStartLine = 0;
606
+ // EXEC block accumulator (multi-line EXEC SQL / EXEC CICS / EXEC DLI)
607
+ let execAccum = null;
608
+ // DECLARATIVES state
609
+ let inDeclaratives = false;
610
+ // INSPECT accumulator (multi-line)
611
+ let inspectAccum = null;
612
+ let inspectStartLine = 0;
613
+ // CALL accumulator (multi-line CALL ... USING on separate lines)
614
+ let callAccum = null;
615
+ let callAccumLine = 0;
616
+ // FD tracking: after seeing FD, the next 01-level data item is its record
617
+ let pendingFdName = null;
618
+ let pendingFdLine = 0;
619
+ // Continuation line buffer
620
+ let pendingLine = null;
621
+ let pendingLineNumber = 0;
622
+ // --- Detect source format: free vs fixed ---
623
+ // GnuCOBOL uses >>SOURCE FREE directive, typically in first 5 lines
624
+ let isFreeFormat = false;
625
+ for (let i = 0; i < Math.min(rawLines.length, 10); i++) {
626
+ if (/>>SOURCE\s+(?:FORMAT\s+(?:IS\s+)?)?FREE/i.test(rawLines[i])) {
627
+ isFreeFormat = true;
628
+ break;
629
+ }
630
+ }
631
+ // --- Process each raw line ---
632
+ for (let i = 0; i < rawLines.length; i++) {
633
+ const raw = rawLines[i];
634
+ if (isFreeFormat) {
635
+ // FREE FORMAT: no column-position rules
636
+ // Skip >>SOURCE directive lines
637
+ if (/^[ \t]*>>/.test(raw))
638
+ continue;
639
+ // Skip free-format comment lines (*> at start of content)
640
+ const trimmed = raw.trimStart();
641
+ if (trimmed.startsWith('*>') || trimmed.length === 0)
642
+ continue;
643
+ // Strip inline *> comments (quote-aware)
644
+ let commentIdx = -1;
645
+ let ffInQuote = null;
646
+ for (let ci = 0; ci < raw.length - 1; ci++) {
647
+ const c = raw[ci];
648
+ if (ffInQuote) {
649
+ if (c === ffInQuote)
650
+ ffInQuote = null;
651
+ }
652
+ else if (c === '"' || c === "'") {
653
+ ffInQuote = c;
654
+ }
655
+ else if (c === '*' && raw[ci + 1] === '>') {
656
+ commentIdx = ci;
657
+ break;
658
+ }
659
+ }
660
+ const line = commentIdx >= 0 ? raw.substring(0, commentIdx) : raw;
661
+ // Free-format lines are logical lines (no continuation indicator)
662
+ const lineNum = i + 1;
663
+ processLogicalLine(line.trim(), lineNum);
664
+ continue;
665
+ }
666
+ // FIXED FORMAT: column-position-based processing
667
+ // Skip lines too short to have indicator area
668
+ if (raw.length < 7) {
669
+ // If there's a pending continuation, flush it
670
+ if (pendingLine !== null) {
671
+ processLogicalLine(pendingLine, pendingLineNumber);
672
+ pendingLine = null;
673
+ }
674
+ continue;
675
+ }
676
+ const indicator = raw[6];
677
+ // Comment line: indicator is '*' or '/'
678
+ if (indicator === '*' || indicator === '/') {
679
+ continue;
680
+ }
681
+ // Continuation line: indicator is '-'
682
+ if (indicator === '-') {
683
+ if (pendingLine !== null) {
684
+ const continuation = raw.substring(7).trimStart();
685
+ // Handle literal continuation: if continuation starts with a quote,
686
+ // remove the trailing quote from the predecessor and skip the opening quote
687
+ if (continuation.length > 0 && (continuation[0] === '"' || continuation[0] === "'")) {
688
+ const quoteChar = continuation[0];
689
+ const lastQuoteIdx = pendingLine.lastIndexOf(quoteChar);
690
+ if (lastQuoteIdx >= 0) {
691
+ pendingLine = pendingLine.substring(0, lastQuoteIdx) + continuation.substring(1);
692
+ }
693
+ else {
694
+ pendingLine += continuation;
695
+ }
696
+ }
697
+ else {
698
+ pendingLine += continuation;
699
+ }
700
+ }
701
+ continue;
702
+ }
703
+ // Normal line — flush any pending continuation first
704
+ if (pendingLine !== null) {
705
+ processLogicalLine(pendingLine, pendingLineNumber);
706
+ pendingLine = null;
707
+ }
708
+ // Strip inline Italian comments, then use area A+B (from col 7 onwards,
709
+ // but keep full line for indentation-sensitive paragraph/section detection)
710
+ const cleaned = stripInlineComment(raw);
711
+ // Buffer as new pending logical line
712
+ pendingLine = cleaned;
713
+ pendingLineNumber = i + 1; // 1-indexed (consistent with free-format)
714
+ }
715
+ // Flush final pending line
716
+ if (pendingLine !== null) {
717
+ processLogicalLine(pendingLine, pendingLineNumber);
718
+ }
719
+ // Flush any pending SELECT
720
+ flushSelect();
721
+ // Flush any pending SORT/MERGE accumulator (truncated file without trailing period)
722
+ flushSort();
723
+ // Flush any pending INSPECT accumulator (truncated file without trailing period)
724
+ flushInspect();
725
+ // Flush any pending CALL accumulator (truncated file without trailing period)
726
+ flushCallAccum();
727
+ // Flush any pending EXEC block (truncated file without END-EXEC)
728
+ if (execAccum !== null) {
729
+ if (execAccum.type === 'sql') {
730
+ result.execSqlBlocks.push(parseExecSqlBlock(execAccum.lines, execAccum.startLine));
731
+ }
732
+ else if (execAccum.type === 'cics') {
733
+ result.execCicsBlocks.push(parseExecCicsBlock(execAccum.lines, execAccum.startLine));
734
+ }
735
+ else if (execAccum.type === 'dli') {
736
+ result.execDliBlocks.push(parseExecDliBlock(execAccum.lines, execAccum.startLine));
737
+ }
738
+ execAccum = null;
739
+ }
740
+ // If we saw an FD but never found its record, emit it without a record name
741
+ if (pendingFdName !== null) {
742
+ result.fdEntries.push({ fdName: pendingFdName, line: pendingFdLine });
743
+ pendingFdName = null;
744
+ }
745
+ // Finalize any remaining programs on the boundary stack (e.g., single-program
746
+ // files without END PROGRAM, or outermost programs in nested files)
747
+ while (programBoundaryStack.length > 0) {
748
+ const topProgram = programBoundaryStack.pop();
749
+ result.programs.push({
750
+ name: topProgram.name,
751
+ startLine: topProgram.startLine,
752
+ endLine: rawLines.length,
753
+ nestingDepth: programBoundaryStack.length,
754
+ procedureUsing: topProgram.procedureUsing,
755
+ isCommon: topProgram.isCommon,
756
+ });
757
+ }
758
+ // Sort by startLine so outer programs come first
759
+ if (result.programs.length > 1) {
760
+ result.programs.sort((a, b) => a.startLine - b.startLine);
761
+ }
762
+ return result;
763
+ // =========================================================================
764
+ // Inner function: process one logical line (after continuation merging)
765
+ // =========================================================================
766
+ function processLogicalLine(line, lineNum) {
767
+ // --- EXEC block accumulation (spans any division) ---
768
+ if (execAccum !== null) {
769
+ execAccum.lines += ' ' + line;
770
+ if (RE_END_EXEC.test(line)) {
771
+ if (execAccum.type === 'sql') {
772
+ result.execSqlBlocks.push(parseExecSqlBlock(execAccum.lines, execAccum.startLine));
773
+ }
774
+ else if (execAccum.type === 'cics') {
775
+ result.execCicsBlocks.push(parseExecCicsBlock(execAccum.lines, execAccum.startLine));
776
+ }
777
+ else if (execAccum.type === 'dli') {
778
+ result.execDliBlocks.push(parseExecDliBlock(execAccum.lines, execAccum.startLine));
779
+ }
780
+ execAccum = null;
781
+ }
782
+ return; // While accumulating, skip normal processing
783
+ }
784
+ // Check for EXEC SQL / EXEC CICS start
785
+ // Flush any pending CALL accumulator before entering EXEC block
786
+ if (RE_EXEC_SQL_START.test(line)) {
787
+ flushCallAccum();
788
+ execAccum = { type: 'sql', lines: line, startLine: lineNum };
789
+ // If END-EXEC is on the same line, finalize immediately
790
+ if (RE_END_EXEC.test(line)) {
791
+ result.execSqlBlocks.push(parseExecSqlBlock(execAccum.lines, execAccum.startLine));
792
+ execAccum = null;
793
+ }
794
+ return;
795
+ }
796
+ if (RE_EXEC_CICS_START.test(line)) {
797
+ flushCallAccum();
798
+ execAccum = { type: 'cics', lines: line, startLine: lineNum };
799
+ if (RE_END_EXEC.test(line)) {
800
+ result.execCicsBlocks.push(parseExecCicsBlock(execAccum.lines, execAccum.startLine));
801
+ execAccum = null;
802
+ }
803
+ return;
804
+ }
805
+ if (RE_EXEC_DLI_START.test(line)) {
806
+ flushCallAccum();
807
+ execAccum = { type: 'dli', lines: line, startLine: lineNum };
808
+ if (RE_END_EXEC.test(line)) {
809
+ result.execDliBlocks.push(parseExecDliBlock(execAccum.lines, execAccum.startLine));
810
+ execAccum = null;
811
+ }
812
+ return;
813
+ }
814
+ // --- END PROGRAM boundary detection ---
815
+ const endProgramMatch = line.match(RE_END_PROGRAM);
816
+ if (endProgramMatch) {
817
+ // Flush any pending accumulators at program boundary
818
+ flushCallAccum();
819
+ flushSort();
820
+ flushInspect();
821
+ const topProgram = programBoundaryStack.pop();
822
+ if (topProgram) {
823
+ result.programs.push({
824
+ name: topProgram.name,
825
+ startLine: topProgram.startLine,
826
+ endLine: lineNum,
827
+ nestingDepth: programBoundaryStack.length,
828
+ procedureUsing: topProgram.procedureUsing,
829
+ isCommon: topProgram.isCommon,
830
+ });
831
+ }
832
+ return;
833
+ }
834
+ // DECLARATIVES boundary detection
835
+ if (RE_DECLARATIVES_START.test(line)) {
836
+ inDeclaratives = true;
837
+ return;
838
+ }
839
+ if (RE_DECLARATIVES_END.test(line)) {
840
+ inDeclaratives = false;
841
+ return;
842
+ }
843
+ // Detect PROGRAM-ID regardless of current division state (handles sibling
844
+ // programs after END PROGRAM where IDENTIFICATION DIVISION header is omitted)
845
+ if (currentDivision !== 'identification') {
846
+ const pgmIdMatch = line.match(RE_PROGRAM_ID);
847
+ if (pgmIdMatch) {
848
+ flushCallAccum();
849
+ flushSort();
850
+ flushInspect();
851
+ extractIdentification(line, lineNum);
852
+ return;
853
+ }
854
+ }
855
+ // --- Division transitions ---
856
+ const divMatch = line.match(RE_DIVISION);
857
+ if (divMatch) {
858
+ // Flush any pending accumulators on division boundary
859
+ flushSelect();
860
+ flushCallAccum();
861
+ flushSort();
862
+ flushInspect();
863
+ const divName = divMatch[1].toUpperCase();
864
+ switch (divName) {
865
+ case 'IDENTIFICATION':
866
+ currentDivision = 'identification';
867
+ break;
868
+ case 'ENVIRONMENT':
869
+ currentDivision = 'environment';
870
+ currentEnvSection = null;
871
+ break;
872
+ case 'DATA':
873
+ currentDivision = 'data';
874
+ currentDataSection = 'unknown';
875
+ break;
876
+ case 'PROCEDURE': {
877
+ currentDivision = 'procedure';
878
+ currentParagraph = null;
879
+ const procUsingMatch = line.match(RE_PROC_USING);
880
+ if (procUsingMatch) {
881
+ const params = procUsingMatch[1].split(/\bRETURNING\b/i)[0].trim().split(/\s+/)
882
+ .filter(s => s.length > 0 && !USING_KEYWORDS.has(s.toUpperCase()));
883
+ result.procedureUsing = params;
884
+ // Store per-program on the boundary stack
885
+ const topProg = programBoundaryStack[programBoundaryStack.length - 1];
886
+ if (topProg)
887
+ topProg.procedureUsing = params;
888
+ pendingProcUsing = false;
889
+ }
890
+ else {
891
+ // USING may be on the next line — flag for extractProcedure to pick up
892
+ // Only set if the line is NOT period-terminated (period = no USING clause)
893
+ pendingProcUsing = !/\.\s*$/.test(line);
894
+ }
895
+ break;
896
+ }
897
+ }
898
+ return;
899
+ }
900
+ // --- Section transitions ---
901
+ const secMatch = line.match(RE_SECTION);
902
+ if (secMatch) {
903
+ flushSelect();
904
+ const secName = secMatch[1].toUpperCase();
905
+ switch (secName) {
906
+ case 'WORKING-STORAGE':
907
+ currentDivision = 'data';
908
+ currentDataSection = 'working-storage';
909
+ break;
910
+ case 'LINKAGE':
911
+ currentDivision = 'data';
912
+ currentDataSection = 'linkage';
913
+ break;
914
+ case 'FILE':
915
+ currentDivision = 'data';
916
+ currentDataSection = 'file';
917
+ break;
918
+ case 'LOCAL-STORAGE':
919
+ currentDivision = 'data';
920
+ currentDataSection = 'local-storage';
921
+ break;
922
+ case 'SCREEN':
923
+ currentDivision = 'data';
924
+ currentDataSection = 'screen';
925
+ break;
926
+ case 'INPUT-OUTPUT':
927
+ currentDivision = 'environment';
928
+ currentEnvSection = 'input-output';
929
+ break;
930
+ case 'CONFIGURATION':
931
+ currentDivision = 'environment';
932
+ currentEnvSection = 'configuration';
933
+ break;
934
+ }
935
+ return;
936
+ }
937
+ // --- COPY (all divisions) ---
938
+ const copyQMatch = line.match(RE_COPY_QUOTED);
939
+ if (copyQMatch) {
940
+ result.copies.push({ target: copyQMatch[1] ?? copyQMatch[2], line: lineNum });
941
+ }
942
+ else {
943
+ const copyUMatch = line.match(RE_COPY_UNQUOTED);
944
+ if (copyUMatch) {
945
+ result.copies.push({ target: copyUMatch[1], line: lineNum });
946
+ }
947
+ }
948
+ // --- CALL (all divisions, typically procedure) ---
949
+ // Multi-line CALL accumulator: accumulate CALL statement until period or END-CALL.
950
+ // Continuation lines (not the start line) are consumed entirely — return after flush
951
+ // to prevent false paragraph detection on lines like "WS-ADDR." or "WS-CUST-CODE."
952
+ if (callAccum !== null) {
953
+ // Check if this continuation line starts a new COBOL statement (not a USING parameter).
954
+ // Use (?:\s|$) instead of \b to prevent matching hyphenated identifiers like MOVE-COUNT.
955
+ // Only use RE_PROC_PARAGRAPH as flush trigger when in Area A (≤7 leading spaces, fixed-format).
956
+ // In free-format, never use RE_PROC_PARAGRAPH (can't distinguish parameters from paragraphs).
957
+ const trimmedLine = line.trimStart();
958
+ const leadingSpaces = (line.match(/^(\s*)/)?.[1].length ?? 0);
959
+ const isAreaAParagraph = RE_PROC_PARAGRAPH.test(line) && (!isFreeFormat ? leadingSpaces <= 7 : false);
960
+ if (RE_STATEMENT_VERB_START.test(trimmedLine)
961
+ || RE_PROC_SECTION.test(line) || isAreaAParagraph) {
962
+ flushCallAccum(); // Flush CALL without this line's content
963
+ // Fall through to process this line normally
964
+ }
965
+ else {
966
+ callAccum += ' ' + line;
967
+ if (/\.\s*$/.test(callAccum) || /\bEND-CALL\b/i.test(callAccum)) {
968
+ flushCallAccum();
969
+ }
970
+ return; // continuation line consumed by CALL accumulator
971
+ }
972
+ }
973
+ else if (currentDivision === 'procedure' && /(?<![A-Z0-9-])\bCALL\s+(?:"[^"]+"|'[^']+'|[A-Z][A-Z0-9-]+)/i.test(line)) {
974
+ // Check if this is a complete single-line CALL (ends with period or END-CALL)
975
+ if (/\.\s*$/.test(line) || /\bEND-CALL\b/i.test(line)) {
976
+ // Single-line CALL — extract immediately via flushCallAccum
977
+ callAccum = line;
978
+ callAccumLine = lineNum;
979
+ flushCallAccum();
980
+ }
981
+ else {
982
+ // Multi-line CALL — start accumulating
983
+ callAccum = line;
984
+ callAccumLine = lineNum;
985
+ return; // prevent CALL start line from feeding sortAccum/inspectAccum
986
+ }
987
+ }
988
+ // --- Division-specific extraction ---
989
+ switch (currentDivision) {
990
+ case 'identification':
991
+ extractIdentification(line, lineNum);
992
+ break;
993
+ case 'environment':
994
+ extractEnvironment(line, lineNum);
995
+ break;
996
+ case 'data':
997
+ extractData(line, lineNum);
998
+ break;
999
+ case 'procedure':
1000
+ extractProcedure(line, lineNum);
1001
+ break;
1002
+ }
1003
+ }
1004
+ // =========================================================================
1005
+ // IDENTIFICATION DIVISION extraction
1006
+ // =========================================================================
1007
+ function extractIdentification(line, lineNum) {
1008
+ const m = line.match(RE_PROGRAM_ID);
1009
+ if (m) {
1010
+ if (result.programName === null) {
1011
+ result.programName = m[1];
1012
+ }
1013
+ // Reset state machine for new program (nested or sibling)
1014
+ currentDivision = 'identification';
1015
+ currentDataSection = 'unknown';
1016
+ currentEnvSection = null;
1017
+ currentParagraph = null;
1018
+ // Detect COMMON attribute
1019
+ const isCommon = /\bIS\s+COMMON\b/i.test(line);
1020
+ // Push program boundary for line-range tracking
1021
+ programBoundaryStack.push({ name: m[1], startLine: lineNum, isCommon: isCommon || undefined });
1022
+ return;
1023
+ }
1024
+ const authorMatch = line.match(RE_AUTHOR);
1025
+ if (authorMatch) {
1026
+ result.programMetadata.author = authorMatch[1].replace(/\.\s*$/, '').trim();
1027
+ return;
1028
+ }
1029
+ const dateMatch = line.match(RE_DATE_WRITTEN);
1030
+ if (dateMatch) {
1031
+ result.programMetadata.dateWritten = dateMatch[1].replace(/\.\s*$/, '').trim();
1032
+ return;
1033
+ }
1034
+ const compMatch = line.match(RE_DATE_COMPILED);
1035
+ if (compMatch) {
1036
+ result.programMetadata.dateCompiled = compMatch[1].replace(/\.\s*$/, '').trim();
1037
+ return;
1038
+ }
1039
+ const instMatch = line.match(RE_INSTALLATION);
1040
+ if (instMatch) {
1041
+ result.programMetadata.installation = instMatch[1].replace(/\.\s*$/, '').trim();
1042
+ }
1043
+ }
1044
+ // =========================================================================
1045
+ // ENVIRONMENT DIVISION extraction
1046
+ // =========================================================================
1047
+ function extractEnvironment(line, lineNum) {
1048
+ if (currentEnvSection !== 'input-output')
1049
+ return;
1050
+ // Check for new SELECT statement
1051
+ const selMatch = line.match(RE_SELECT_START);
1052
+ if (selMatch) {
1053
+ // Flush any previous SELECT
1054
+ flushSelect();
1055
+ selectAccum = line.trim();
1056
+ selectStartLine = lineNum;
1057
+ }
1058
+ else if (selectAccum !== null) {
1059
+ // Accumulate continuation of current SELECT
1060
+ selectAccum += ' ' + line.trim();
1061
+ }
1062
+ // Check if current SELECT is terminated (ends with period)
1063
+ if (selectAccum !== null && /\.\s*$/.test(selectAccum)) {
1064
+ flushSelect();
1065
+ }
1066
+ }
1067
+ function flushSelect() {
1068
+ if (selectAccum === null)
1069
+ return;
1070
+ const decl = parseSelectStatement(selectAccum, selectStartLine);
1071
+ if (decl) {
1072
+ result.fileDeclarations.push(decl);
1073
+ }
1074
+ selectAccum = null;
1075
+ }
1076
+ function flushSort() {
1077
+ if (sortAccum === null)
1078
+ return;
1079
+ const fullSort = sortAccum;
1080
+ const smatch = fullSort.match(RE_SORT) || fullSort.match(RE_MERGE);
1081
+ if (smatch) {
1082
+ const upper = fullSort.toUpperCase();
1083
+ const usingIdx = upper.search(/\bUSING\s/);
1084
+ const givingIdx = upper.search(/\bGIVING\s/);
1085
+ const usingFiles = [];
1086
+ const givingFiles = [];
1087
+ if (usingIdx >= 0) {
1088
+ const afterUsing = fullSort.substring(usingIdx + 6);
1089
+ const gIdx = afterUsing.toUpperCase().search(/\bGIVING\b/);
1090
+ const usingText = gIdx >= 0 ? afterUsing.substring(0, gIdx) : afterUsing;
1091
+ usingFiles.push(...usingText.trim().split(/\s+/).map(f => f.replace(/\.$/, '')).filter(f => /^[A-Z][A-Z0-9-]+$/i.test(f) && !SORT_CLAUSE_NOISE.has(f.toUpperCase())));
1092
+ }
1093
+ if (givingIdx >= 0) {
1094
+ const givingText = fullSort.substring(givingIdx + 7);
1095
+ givingFiles.push(...givingText.trim().split(/\s+/).map(f => f.replace(/\.$/, '')).filter(f => /^[A-Z][A-Z0-9-]+$/i.test(f) && !SORT_CLAUSE_NOISE.has(f.toUpperCase())));
1096
+ }
1097
+ // INPUT PROCEDURE IS / OUTPUT PROCEDURE IS → control-flow targets (like PERFORM)
1098
+ // Supports optional THRU/THROUGH range: INPUT PROCEDURE IS proc-start THRU proc-end
1099
+ const inputProcMatch = fullSort.match(/\bINPUT\s+PROCEDURE\s+(?:IS\s+)?([A-Z][A-Z0-9-]+)(?:\s+(?:THRU|THROUGH)\s+([A-Z][A-Z0-9-]+))?/i);
1100
+ const outputProcMatch = fullSort.match(/\bOUTPUT\s+PROCEDURE\s+(?:IS\s+)?([A-Z][A-Z0-9-]+)(?:\s+(?:THRU|THROUGH)\s+([A-Z][A-Z0-9-]+))?/i);
1101
+ if (inputProcMatch) {
1102
+ result.performs.push({ caller: currentParagraph, target: inputProcMatch[1], thruTarget: inputProcMatch[2] || undefined, line: sortStartLine });
1103
+ }
1104
+ if (outputProcMatch) {
1105
+ result.performs.push({ caller: currentParagraph, target: outputProcMatch[1], thruTarget: outputProcMatch[2] || undefined, line: sortStartLine });
1106
+ }
1107
+ result.sorts.push({ sortFile: smatch[1], usingFiles, givingFiles, line: sortStartLine });
1108
+ }
1109
+ sortAccum = null;
1110
+ }
1111
+ function flushInspect() {
1112
+ if (inspectAccum === null)
1113
+ return;
1114
+ const text = inspectAccum;
1115
+ const fieldMatch = text.match(/\bINSPECT\s+([A-Z][A-Z0-9-]+)/i);
1116
+ if (!fieldMatch) {
1117
+ inspectAccum = null;
1118
+ return;
1119
+ }
1120
+ const counters = [];
1121
+ const tallySection = text.match(/\bTALLYING\b([\s\S]+?)(?:\bREPLACING\b|\bCONVERTING\b|\.\s*$)/i);
1122
+ if (tallySection) {
1123
+ const counterRe = /([A-Z][A-Z0-9-]+)\s+FOR\b/gi;
1124
+ let cm;
1125
+ while ((cm = counterRe.exec(tallySection[1])) !== null) {
1126
+ counters.push(cm[1]);
1127
+ }
1128
+ }
1129
+ const hasTallying = /\bTALLYING\b/i.test(text);
1130
+ const hasReplacing = /\bREPLACING\b/i.test(text);
1131
+ const hasConverting = /\bCONVERTING\b/i.test(text);
1132
+ const form = hasConverting ? 'converting'
1133
+ : hasTallying && hasReplacing ? 'tallying-replacing'
1134
+ : hasTallying ? 'tallying'
1135
+ : 'replacing';
1136
+ result.inspects.push({
1137
+ inspectedField: fieldMatch[1],
1138
+ counters,
1139
+ form,
1140
+ line: inspectStartLine,
1141
+ caller: currentParagraph,
1142
+ });
1143
+ inspectAccum = null;
1144
+ }
1145
+ /**
1146
+ * Flush accumulated multi-line CALL statement. Re-extracts CALL target
1147
+ * and USING parameters from the full accumulated text.
1148
+ */
1149
+ function flushCallAccum() {
1150
+ if (callAccum === null)
1151
+ return;
1152
+ const text = callAccum;
1153
+ // Extract quoted CALLs from the full statement
1154
+ for (const callMatch of text.matchAll(RE_CALL)) {
1155
+ const callTarget = callMatch[1] ?? callMatch[2];
1156
+ const afterCall = text.substring(callMatch.index + callMatch[0].length);
1157
+ const usingMatch = afterCall.match(RE_USING_PARAMS);
1158
+ const parameters = usingMatch
1159
+ ? usingMatch[1].split(/\bRETURNING\b/i)[0].trim().split(/\s+/)
1160
+ .filter(s => s.length > 0 && !CALL_USING_FILTER.has(s.toUpperCase()) && /^[A-Z][A-Z0-9-]+$/i.test(s))
1161
+ : undefined;
1162
+ const retMatch = afterCall.match(/\bRETURNING\s+([A-Z][A-Z0-9-]+)/i);
1163
+ const returning = retMatch ? retMatch[1] : undefined;
1164
+ result.calls.push({ target: callTarget, line: callAccumLine, isQuoted: true, parameters, returning });
1165
+ }
1166
+ // Extract dynamic CALLs from the full statement
1167
+ for (const dynCallMatch of text.matchAll(RE_CALL_DYNAMIC)) {
1168
+ const afterDynCall = text.substring(dynCallMatch.index + dynCallMatch[0].length);
1169
+ const dynUsingMatch = afterDynCall.match(RE_USING_PARAMS);
1170
+ const dynParameters = dynUsingMatch
1171
+ ? dynUsingMatch[1].split(/\bRETURNING\b/i)[0].trim().split(/\s+/)
1172
+ .filter(s => s.length > 0 && !CALL_USING_FILTER.has(s.toUpperCase()) && /^[A-Z][A-Z0-9-]+$/i.test(s))
1173
+ : undefined;
1174
+ const dynRetMatch = afterDynCall.match(/\bRETURNING\s+([A-Z][A-Z0-9-]+)/i);
1175
+ const dynReturning = dynRetMatch ? dynRetMatch[1] : undefined;
1176
+ result.calls.push({ target: dynCallMatch[1], line: callAccumLine, isQuoted: false, parameters: dynParameters, returning: dynReturning });
1177
+ }
1178
+ // Extract CANCELs from within the CALL block (common in ON EXCEPTION handlers)
1179
+ for (const cancelMatch of text.matchAll(RE_CANCEL)) {
1180
+ result.cancels.push({ target: cancelMatch[1] ?? cancelMatch[2], line: callAccumLine, isQuoted: true });
1181
+ }
1182
+ for (const dynCancelMatch of text.matchAll(RE_CANCEL_DYNAMIC)) {
1183
+ result.cancels.push({ target: dynCancelMatch[1], line: callAccumLine, isQuoted: false });
1184
+ }
1185
+ callAccum = null;
1186
+ }
1187
+ // =========================================================================
1188
+ // DATA DIVISION extraction
1189
+ // =========================================================================
1190
+ function extractData(line, lineNum) {
1191
+ // FD entry
1192
+ const fdMatch = line.match(RE_FD);
1193
+ if (fdMatch) {
1194
+ // Flush any previous FD without a record
1195
+ if (pendingFdName !== null) {
1196
+ result.fdEntries.push({ fdName: pendingFdName, line: pendingFdLine });
1197
+ }
1198
+ pendingFdName = fdMatch[1];
1199
+ pendingFdLine = lineNum;
1200
+ return;
1201
+ }
1202
+ // 88-level condition names
1203
+ const lv88Match = line.match(RE_88_LEVEL);
1204
+ if (lv88Match) {
1205
+ const name = lv88Match[1];
1206
+ const values = parseConditionValues(lv88Match[2]);
1207
+ result.dataItems.push({
1208
+ name,
1209
+ level: 88,
1210
+ line: lineNum,
1211
+ values,
1212
+ section: currentDataSection,
1213
+ });
1214
+ return;
1215
+ }
1216
+ // Level 66 RENAMES
1217
+ const lv66Match = line.match(RE_66_LEVEL);
1218
+ if (lv66Match) {
1219
+ result.dataItems.push({
1220
+ name: lv66Match[1],
1221
+ level: 66,
1222
+ line: lineNum,
1223
+ redefines: lv66Match[2], // RENAMES target stored as redefines
1224
+ section: currentDataSection,
1225
+ });
1226
+ return;
1227
+ }
1228
+ // Anonymous REDEFINES (no name, e.g. "01 REDEFINES WK-PERIVAL.")
1229
+ const anonRedefMatch = line.match(RE_ANONYMOUS_REDEFINES);
1230
+ if (anonRedefMatch) {
1231
+ // Check it's truly anonymous: the second capture is not a valid data name
1232
+ // followed by more clauses — it's the REDEFINES target directly after level
1233
+ const level = parseInt(anonRedefMatch[1], 10);
1234
+ // Only skip if this is genuinely "NN REDEFINES target" with no name between
1235
+ // We detect this by checking the full data item regex does NOT match
1236
+ // (because RE_DATA_ITEM expects a name before any clauses)
1237
+ const dataMatch = line.match(RE_DATA_ITEM);
1238
+ if (!dataMatch || dataMatch[2].toUpperCase() === 'REDEFINES') {
1239
+ // Truly anonymous — skip, no node
1240
+ return;
1241
+ }
1242
+ }
1243
+ // Standard data items: level 01-49, 66, 77
1244
+ const dataMatch = line.match(RE_DATA_ITEM);
1245
+ if (dataMatch) {
1246
+ const level = parseInt(dataMatch[1], 10);
1247
+ const name = dataMatch[2];
1248
+ const rest = dataMatch[3] || '';
1249
+ // Skip FILLER
1250
+ if (name.toUpperCase() === 'FILLER')
1251
+ return;
1252
+ // Valid levels: 01-49, 66, 77
1253
+ if ((level >= 1 && level <= 49) || level === 66 || level === 77) {
1254
+ const clauses = parseDataItemClauses(rest);
1255
+ const item = {
1256
+ name,
1257
+ level,
1258
+ line: lineNum,
1259
+ section: currentDataSection,
1260
+ };
1261
+ if (clauses.pic)
1262
+ item.pic = clauses.pic;
1263
+ if (clauses.usage)
1264
+ item.usage = clauses.usage;
1265
+ if (clauses.occurs !== undefined)
1266
+ item.occurs = clauses.occurs;
1267
+ if (clauses.dependingOn)
1268
+ item.dependingOn = clauses.dependingOn;
1269
+ if (clauses.redefines)
1270
+ item.redefines = clauses.redefines;
1271
+ if (clauses.value)
1272
+ item.values = [clauses.value];
1273
+ if (clauses.isExternal)
1274
+ item.isExternal = true;
1275
+ if (clauses.isGlobal)
1276
+ item.isGlobal = true;
1277
+ result.dataItems.push(item);
1278
+ // If there's a pending FD and this is a 01-level, it's the FD's record
1279
+ if (pendingFdName !== null && level === 1) {
1280
+ result.fdEntries.push({
1281
+ fdName: pendingFdName,
1282
+ recordName: name,
1283
+ line: pendingFdLine,
1284
+ });
1285
+ pendingFdName = null;
1286
+ }
1287
+ }
1288
+ }
1289
+ }
1290
+ // =========================================================================
1291
+ // PROCEDURE DIVISION extraction
1292
+ // =========================================================================
1293
+ function extractProcedure(line, lineNum) {
1294
+ // USE AFTER EXCEPTION in DECLARATIVES
1295
+ if (inDeclaratives) {
1296
+ const useMatch = line.match(RE_USE_AFTER);
1297
+ if (useMatch) {
1298
+ // Find the most recent section name
1299
+ const lastSection = result.sections[result.sections.length - 1];
1300
+ if (lastSection) {
1301
+ result.declaratives.push({
1302
+ sectionName: lastSection.name,
1303
+ target: useMatch[1],
1304
+ line: lineNum,
1305
+ });
1306
+ }
1307
+ return;
1308
+ }
1309
+ }
1310
+ // Handle PROCEDURE DIVISION USING on a continuation line
1311
+ if (pendingProcUsing) {
1312
+ const usingMatch = line.match(/\bUSING\s+([\s\S]*?)(?:\.|$)/i);
1313
+ if (usingMatch) {
1314
+ const params = usingMatch[1].split(/\bRETURNING\b/i)[0].trim().split(/\s+/)
1315
+ .filter(s => s.length > 0 && !USING_KEYWORDS.has(s.toUpperCase()));
1316
+ result.procedureUsing = params;
1317
+ const topProg = programBoundaryStack[programBoundaryStack.length - 1];
1318
+ if (topProg)
1319
+ topProg.procedureUsing = params;
1320
+ }
1321
+ pendingProcUsing = false;
1322
+ if (usingMatch)
1323
+ return; // consumed the USING line
1324
+ }
1325
+ // Section header
1326
+ const secMatch = line.match(RE_PROC_SECTION);
1327
+ if (secMatch) {
1328
+ const name = secMatch[1];
1329
+ if (!EXCLUDED_PARA_NAMES.has(name.toUpperCase()) && !name.toUpperCase().includes('DIVISION')) {
1330
+ result.sections.push({ name, line: lineNum });
1331
+ // Don't set currentParagraph to section name — sections are Namespaces,
1332
+ // not Functions. Setting it here would cause PERFORMs to be attributed
1333
+ // to the section instead of the containing paragraph.
1334
+ }
1335
+ return;
1336
+ }
1337
+ // Paragraph header
1338
+ const paraMatch = line.match(RE_PROC_PARAGRAPH);
1339
+ if (paraMatch) {
1340
+ const name = paraMatch[1];
1341
+ // In fixed-format, paragraphs must start in Area A (col 8-11, max 7 leading spaces).
1342
+ // Reject deeply-indented lines (Area B, 8+ spaces) to prevent false paragraphs from
1343
+ // data items or CALL USING parameters on continuation lines.
1344
+ const leadingSpaces = line.match(/^(\s*)/)?.[1].length ?? 0;
1345
+ if (!isFreeFormat && leadingSpaces > 7)
1346
+ return; // Area B — not a paragraph
1347
+ if (!EXCLUDED_PARA_NAMES.has(name.toUpperCase()) && !name.toUpperCase().startsWith('END-') && name.toUpperCase() !== 'DIVISION' && name.toUpperCase() !== 'SECTION') {
1348
+ result.paragraphs.push({ name, line: lineNum });
1349
+ currentParagraph = name;
1350
+ }
1351
+ return;
1352
+ }
1353
+ // PERFORM (global — captures multiple PERFORMs on the same logical line)
1354
+ for (const perfMatch of line.matchAll(RE_PERFORM)) {
1355
+ const target = perfMatch[1];
1356
+ // Skip COBOL inline-perform keywords that are not paragraph names
1357
+ if (!PERFORM_KEYWORD_SKIP.has(target.toUpperCase())) {
1358
+ // Also check for "PERFORM identifier TIMES" — the identifier is a
1359
+ // data item count, not a paragraph name (fundamental regex ambiguity).
1360
+ const matchEnd = perfMatch.index + perfMatch[0].length;
1361
+ const afterTarget = line.substring(matchEnd).trim();
1362
+ if (!/^TIMES\b/i.test(afterTarget)) {
1363
+ result.performs.push({
1364
+ caller: currentParagraph,
1365
+ target,
1366
+ thruTarget: perfMatch[2] || undefined,
1367
+ line: lineNum,
1368
+ });
1369
+ }
1370
+ }
1371
+ }
1372
+ // ENTRY point
1373
+ const entryMatch = line.match(RE_ENTRY);
1374
+ if (entryMatch) {
1375
+ const entryName = entryMatch[1] ?? entryMatch[2];
1376
+ const usingClause = entryMatch[3];
1377
+ if (entryName) {
1378
+ result.entryPoints.push({
1379
+ name: entryName,
1380
+ parameters: usingClause
1381
+ ? usingClause.trim().split(/\s+/).filter(s => s.length > 0 && !USING_KEYWORDS.has(s.toUpperCase()))
1382
+ : [],
1383
+ line: lineNum,
1384
+ });
1385
+ }
1386
+ }
1387
+ // MOVE statement (skip literals and figurative constants)
1388
+ const moveMatch = line.match(RE_MOVE);
1389
+ if (moveMatch) {
1390
+ const from = moveMatch[2].toUpperCase();
1391
+ if (!MOVE_SKIP.has(from)) {
1392
+ const isCorresponding = !!moveMatch[1];
1393
+ // MOVE CORRESPONDING is always single-target per COBOL standard
1394
+ const targets = isCorresponding
1395
+ ? [moveMatch[3].replace(/\..*$/, '').trim().split(/\s+/)[0]].filter(t => /^[A-Z][A-Z0-9-]+$/i.test(t))
1396
+ : extractMoveTargets(moveMatch[3]);
1397
+ if (targets.length > 0) {
1398
+ result.moves.push({
1399
+ from: moveMatch[2],
1400
+ targets,
1401
+ line: lineNum,
1402
+ caller: currentParagraph,
1403
+ corresponding: isCorresponding,
1404
+ });
1405
+ }
1406
+ }
1407
+ }
1408
+ // GO TO — control flow transfer (handles GO TO p1 p2 p3 DEPENDING ON x)
1409
+ const gotoMatch = line.match(RE_GOTO);
1410
+ if (gotoMatch) {
1411
+ const targets = gotoMatch[1].trim().split(/\s+/).filter(t => /^[A-Z][A-Z0-9-]+$/i.test(t));
1412
+ for (const target of targets) {
1413
+ result.gotos.push({ caller: currentParagraph, target, line: lineNum });
1414
+ }
1415
+ }
1416
+ // SORT / MERGE file references (multi-line: accumulate until period)
1417
+ if (sortAccum !== null) {
1418
+ // Continue accumulating SORT/MERGE statement
1419
+ sortAccum += ' ' + line;
1420
+ if (!/\.\s*$/.test(sortAccum))
1421
+ return; // still accumulating — skip other extractors
1422
+ // Period found — flush, then re-check line for a new SORT/MERGE after the period
1423
+ flushSort();
1424
+ // After flushing, fall through to check if this line also starts a new SORT/MERGE
1425
+ }
1426
+ const sortMatch = line.match(RE_SORT) || line.match(RE_MERGE);
1427
+ if (sortMatch && sortAccum === null) {
1428
+ sortAccum = line;
1429
+ sortStartLine = lineNum;
1430
+ if (!/\.\s*$/.test(sortAccum))
1431
+ return; // multi-line — wait for period
1432
+ flushSort();
1433
+ }
1434
+ // INSPECT — multi-line accumulator (like SORT)
1435
+ // If a real paragraph/section header or statement verb arrives during accumulation,
1436
+ // flush the INSPECT as-is and process the line normally.
1437
+ if (inspectAccum !== null) {
1438
+ const inspTrimmed = line.trimStart();
1439
+ const inspLeading = (line.match(/^(\s*)/)?.[1].length ?? 0);
1440
+ const inspIsAreaAPara = RE_PROC_PARAGRAPH.test(line) && (!isFreeFormat ? inspLeading <= 7 : false);
1441
+ if (RE_PROC_SECTION.test(line) || inspIsAreaAPara
1442
+ || RE_STATEMENT_VERB_START.test(inspTrimmed)
1443
+ || /^CALL(?:\s|$)/i.test(inspTrimmed)) {
1444
+ flushInspect();
1445
+ // Fall through to process this line normally
1446
+ }
1447
+ else {
1448
+ inspectAccum += ' ' + line;
1449
+ if (/\.\s*$/.test(inspectAccum)) {
1450
+ flushInspect();
1451
+ }
1452
+ else {
1453
+ return;
1454
+ }
1455
+ }
1456
+ }
1457
+ const inspectMatch = line.match(/\bINSPECT\s+([A-Z][A-Z0-9-]+)/i);
1458
+ if (inspectMatch && inspectAccum === null) {
1459
+ inspectAccum = line;
1460
+ inspectStartLine = lineNum;
1461
+ if (!/\.\s*$/.test(inspectAccum))
1462
+ return;
1463
+ flushInspect();
1464
+ }
1465
+ // SEARCH — table access
1466
+ const searchMatch = line.match(RE_SEARCH);
1467
+ if (searchMatch) {
1468
+ result.searches.push({ target: searchMatch[1], line: lineNum });
1469
+ }
1470
+ // CANCEL — program lifecycle (global matchAll captures multiple CANCELs on same line)
1471
+ for (const cancelMatch of line.matchAll(RE_CANCEL)) {
1472
+ result.cancels.push({ target: cancelMatch[1] ?? cancelMatch[2], line: lineNum, isQuoted: true });
1473
+ }
1474
+ // Dynamic CANCEL — RE_CANCEL_DYNAMIC cannot match quoted targets, no dedup guard needed
1475
+ for (const dynCancelMatch of line.matchAll(RE_CANCEL_DYNAMIC)) {
1476
+ result.cancels.push({ target: dynCancelMatch[1], line: lineNum, isQuoted: false });
1477
+ }
1478
+ // SET statement (condition, index)
1479
+ const setTrueMatch = line.match(RE_SET_TO_TRUE);
1480
+ if (setTrueMatch) {
1481
+ const targets = setTrueMatch[1].trim().split(/\s+/)
1482
+ .filter(t => /^[A-Z][A-Z0-9-]+$/i.test(t) && t.toUpperCase() !== 'OF');
1483
+ if (targets.length > 0) {
1484
+ result.sets.push({ targets, form: 'to-true', line: lineNum, caller: currentParagraph });
1485
+ }
1486
+ }
1487
+ else {
1488
+ const setIdxMatch = line.match(RE_SET_INDEX);
1489
+ if (setIdxMatch) {
1490
+ const targets = setIdxMatch[1].trim().split(/\s+/)
1491
+ .filter(t => /^[A-Z][A-Z0-9-]+$/i.test(t));
1492
+ const mode = setIdxMatch[2].toUpperCase();
1493
+ const form = mode === 'TO' ? 'to-value'
1494
+ : mode.startsWith('UP') ? 'up-by'
1495
+ : 'down-by';
1496
+ result.sets.push({ targets, form, value: setIdxMatch[3], line: lineNum, caller: currentParagraph });
1497
+ }
1498
+ }
1499
+ // INITIALIZE — data reset (multi-target: INITIALIZE WS-A WS-B WS-C.)
1500
+ const initMatch = line.match(RE_INITIALIZE);
1501
+ if (initMatch) {
1502
+ const targets = initMatch[1].trim().split(/\s+/)
1503
+ .filter(t => /^[A-Z][A-Z0-9-]+$/i.test(t) && !INITIALIZE_CLAUSE_KEYWORDS.has(t.toUpperCase()));
1504
+ for (const target of targets) {
1505
+ result.initializes.push({ target, line: lineNum, caller: currentParagraph });
1506
+ }
1507
+ }
1508
+ }
1509
+ }