gitnexus 1.4.8 → 1.4.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -0
- package/dist/cli/index-repo.d.ts +15 -0
- package/dist/cli/index-repo.js +115 -0
- package/dist/cli/index.js +11 -2
- package/dist/cli/setup.js +12 -9
- package/dist/cli/wiki.d.ts +4 -0
- package/dist/cli/wiki.js +174 -53
- package/dist/config/supported-languages.d.ts +7 -5
- package/dist/config/supported-languages.js +6 -4
- package/dist/core/graph/graph.js +9 -1
- package/dist/core/graph/types.d.ts +10 -2
- package/dist/core/ingestion/call-processor.d.ts +18 -1
- package/dist/core/ingestion/call-processor.js +297 -38
- package/dist/core/ingestion/call-routing.d.ts +3 -18
- package/dist/core/ingestion/call-routing.js +0 -19
- package/dist/core/ingestion/cobol/cobol-copy-expander.d.ts +57 -0
- package/dist/core/ingestion/cobol/cobol-copy-expander.js +385 -0
- package/dist/core/ingestion/cobol/cobol-preprocessor.d.ts +210 -0
- package/dist/core/ingestion/cobol/cobol-preprocessor.js +1509 -0
- package/dist/core/ingestion/cobol/jcl-parser.d.ts +68 -0
- package/dist/core/ingestion/cobol/jcl-parser.js +217 -0
- package/dist/core/ingestion/cobol/jcl-processor.d.ts +33 -0
- package/dist/core/ingestion/cobol/jcl-processor.js +229 -0
- package/dist/core/ingestion/cobol-processor.d.ts +54 -0
- package/dist/core/ingestion/cobol-processor.js +1186 -0
- package/dist/core/ingestion/entry-point-scoring.d.ts +17 -0
- package/dist/core/ingestion/entry-point-scoring.js +18 -4
- package/dist/core/ingestion/export-detection.d.ts +47 -8
- package/dist/core/ingestion/export-detection.js +29 -50
- package/dist/core/ingestion/field-extractor.d.ts +29 -0
- package/dist/core/ingestion/field-extractor.js +25 -0
- package/dist/core/ingestion/field-extractors/configs/c-cpp.d.ts +3 -0
- package/dist/core/ingestion/field-extractors/configs/c-cpp.js +108 -0
- package/dist/core/ingestion/field-extractors/configs/csharp.d.ts +8 -0
- package/dist/core/ingestion/field-extractors/configs/csharp.js +73 -0
- package/dist/core/ingestion/field-extractors/configs/dart.d.ts +8 -0
- package/dist/core/ingestion/field-extractors/configs/dart.js +76 -0
- package/dist/core/ingestion/field-extractors/configs/go.d.ts +11 -0
- package/dist/core/ingestion/field-extractors/configs/go.js +64 -0
- package/dist/core/ingestion/field-extractors/configs/helpers.d.ts +44 -0
- package/dist/core/ingestion/field-extractors/configs/helpers.js +134 -0
- package/dist/core/ingestion/field-extractors/configs/jvm.d.ts +3 -0
- package/dist/core/ingestion/field-extractors/configs/jvm.js +118 -0
- package/dist/core/ingestion/field-extractors/configs/php.d.ts +8 -0
- package/dist/core/ingestion/field-extractors/configs/php.js +67 -0
- package/dist/core/ingestion/field-extractors/configs/python.d.ts +12 -0
- package/dist/core/ingestion/field-extractors/configs/python.js +91 -0
- package/dist/core/ingestion/field-extractors/configs/ruby.d.ts +16 -0
- package/dist/core/ingestion/field-extractors/configs/ruby.js +75 -0
- package/dist/core/ingestion/field-extractors/configs/rust.d.ts +9 -0
- package/dist/core/ingestion/field-extractors/configs/rust.js +55 -0
- package/dist/core/ingestion/field-extractors/configs/swift.d.ts +8 -0
- package/dist/core/ingestion/field-extractors/configs/swift.js +63 -0
- package/dist/core/ingestion/field-extractors/configs/typescript-javascript.d.ts +3 -0
- package/dist/core/ingestion/field-extractors/configs/typescript-javascript.js +60 -0
- package/dist/core/ingestion/field-extractors/generic.d.ts +46 -0
- package/dist/core/ingestion/field-extractors/generic.js +111 -0
- package/dist/core/ingestion/field-extractors/typescript.d.ts +77 -0
- package/dist/core/ingestion/field-extractors/typescript.js +291 -0
- package/dist/core/ingestion/field-types.d.ts +59 -0
- package/dist/core/ingestion/field-types.js +2 -0
- package/dist/core/ingestion/framework-detection.d.ts +87 -0
- package/dist/core/ingestion/framework-detection.js +65 -2
- package/dist/core/ingestion/heritage-processor.js +15 -17
- package/dist/core/ingestion/import-processor.d.ts +9 -10
- package/dist/core/ingestion/import-processor.js +59 -14
- package/dist/core/ingestion/{resolvers → import-resolvers}/csharp.d.ts +6 -9
- package/dist/core/ingestion/{resolvers → import-resolvers}/csharp.js +20 -2
- package/dist/core/ingestion/import-resolvers/dart.d.ts +7 -0
- package/dist/core/ingestion/import-resolvers/dart.js +44 -0
- package/dist/core/ingestion/{resolvers → import-resolvers}/go.d.ts +4 -5
- package/dist/core/ingestion/{resolvers → import-resolvers}/go.js +17 -0
- package/dist/core/ingestion/{resolvers → import-resolvers}/jvm.d.ts +9 -1
- package/dist/core/ingestion/{resolvers → import-resolvers}/jvm.js +56 -0
- package/dist/core/ingestion/{resolvers → import-resolvers}/php.d.ts +6 -10
- package/dist/core/ingestion/{resolvers → import-resolvers}/php.js +7 -2
- package/dist/core/ingestion/{resolvers → import-resolvers}/python.d.ts +9 -3
- package/dist/core/ingestion/{resolvers → import-resolvers}/python.js +35 -3
- package/dist/core/ingestion/{resolvers → import-resolvers}/ruby.d.ts +5 -2
- package/dist/core/ingestion/{resolvers → import-resolvers}/ruby.js +7 -2
- package/dist/core/ingestion/{resolvers → import-resolvers}/rust.d.ts +5 -2
- package/dist/core/ingestion/{resolvers → import-resolvers}/rust.js +41 -2
- package/dist/core/ingestion/{resolvers → import-resolvers}/standard.d.ts +15 -7
- package/dist/core/ingestion/{resolvers → import-resolvers}/standard.js +22 -3
- package/dist/core/ingestion/import-resolvers/swift.d.ts +7 -0
- package/dist/core/ingestion/import-resolvers/swift.js +23 -0
- package/dist/core/ingestion/import-resolvers/types.d.ts +44 -0
- package/dist/core/ingestion/import-resolvers/types.js +6 -0
- package/dist/core/ingestion/{resolvers → import-resolvers}/utils.d.ts +0 -3
- package/dist/core/ingestion/{resolvers → import-resolvers}/utils.js +0 -9
- package/dist/core/ingestion/language-config.d.ts +4 -1
- package/dist/core/ingestion/language-provider.d.ts +121 -0
- package/dist/core/ingestion/language-provider.js +24 -0
- package/dist/core/ingestion/languages/c-cpp.d.ts +12 -0
- package/dist/core/ingestion/languages/c-cpp.js +71 -0
- package/dist/core/ingestion/languages/cobol.d.ts +1 -0
- package/dist/core/ingestion/languages/cobol.js +26 -0
- package/dist/core/ingestion/languages/csharp.d.ts +8 -0
- package/dist/core/ingestion/languages/csharp.js +49 -0
- package/dist/core/ingestion/languages/dart.d.ts +12 -0
- package/dist/core/ingestion/languages/dart.js +58 -0
- package/dist/core/ingestion/languages/go.d.ts +11 -0
- package/dist/core/ingestion/languages/go.js +28 -0
- package/dist/core/ingestion/languages/index.d.ts +38 -0
- package/dist/core/ingestion/languages/index.js +63 -0
- package/dist/core/ingestion/languages/java.d.ts +9 -0
- package/dist/core/ingestion/languages/java.js +29 -0
- package/dist/core/ingestion/languages/kotlin.d.ts +9 -0
- package/dist/core/ingestion/languages/kotlin.js +53 -0
- package/dist/core/ingestion/languages/php.d.ts +8 -0
- package/dist/core/ingestion/languages/php.js +145 -0
- package/dist/core/ingestion/languages/python.d.ts +12 -0
- package/dist/core/ingestion/languages/python.js +39 -0
- package/dist/core/ingestion/languages/ruby.d.ts +9 -0
- package/dist/core/ingestion/languages/ruby.js +44 -0
- package/dist/core/ingestion/languages/rust.d.ts +12 -0
- package/dist/core/ingestion/languages/rust.js +44 -0
- package/dist/core/ingestion/languages/swift.d.ts +12 -0
- package/dist/core/ingestion/languages/swift.js +133 -0
- package/dist/core/ingestion/languages/typescript.d.ts +10 -0
- package/dist/core/ingestion/languages/typescript.js +60 -0
- package/dist/core/ingestion/mro-processor.js +14 -15
- package/dist/core/ingestion/{named-binding-extraction.d.ts → named-binding-processor.d.ts} +0 -9
- package/dist/core/ingestion/named-binding-processor.js +42 -0
- package/dist/core/ingestion/named-bindings/csharp.d.ts +3 -0
- package/dist/core/ingestion/named-bindings/csharp.js +37 -0
- package/dist/core/ingestion/named-bindings/java.d.ts +3 -0
- package/dist/core/ingestion/named-bindings/java.js +29 -0
- package/dist/core/ingestion/named-bindings/kotlin.d.ts +3 -0
- package/dist/core/ingestion/named-bindings/kotlin.js +36 -0
- package/dist/core/ingestion/named-bindings/php.d.ts +3 -0
- package/dist/core/ingestion/named-bindings/php.js +61 -0
- package/dist/core/ingestion/named-bindings/python.d.ts +3 -0
- package/dist/core/ingestion/named-bindings/python.js +49 -0
- package/dist/core/ingestion/named-bindings/rust.d.ts +3 -0
- package/dist/core/ingestion/named-bindings/rust.js +64 -0
- package/dist/core/ingestion/named-bindings/types.d.ts +16 -0
- package/dist/core/ingestion/named-bindings/types.js +6 -0
- package/dist/core/ingestion/named-bindings/typescript.d.ts +3 -0
- package/dist/core/ingestion/named-bindings/typescript.js +58 -0
- package/dist/core/ingestion/parsing-processor.d.ts +5 -1
- package/dist/core/ingestion/parsing-processor.js +115 -16
- package/dist/core/ingestion/pipeline.js +925 -424
- package/dist/core/ingestion/resolution-context.js +1 -1
- package/dist/core/ingestion/route-extractors/expo.d.ts +1 -0
- package/dist/core/ingestion/route-extractors/expo.js +36 -0
- package/dist/core/ingestion/route-extractors/middleware.d.ts +47 -0
- package/dist/core/ingestion/route-extractors/middleware.js +143 -0
- package/dist/core/ingestion/route-extractors/nextjs.d.ts +3 -0
- package/dist/core/ingestion/route-extractors/nextjs.js +76 -0
- package/dist/core/ingestion/route-extractors/php.d.ts +7 -0
- package/dist/core/ingestion/route-extractors/php.js +21 -0
- package/dist/core/ingestion/route-extractors/response-shapes.d.ts +20 -0
- package/dist/core/ingestion/route-extractors/response-shapes.js +290 -0
- package/dist/core/ingestion/tree-sitter-queries.d.ts +8 -7
- package/dist/core/ingestion/tree-sitter-queries.js +231 -9
- package/dist/core/ingestion/type-env.d.ts +14 -17
- package/dist/core/ingestion/type-env.js +66 -14
- package/dist/core/ingestion/type-extractors/c-cpp.d.ts +1 -1
- package/dist/core/ingestion/type-extractors/csharp.js +1 -1
- package/dist/core/ingestion/type-extractors/dart.d.ts +15 -0
- package/dist/core/ingestion/type-extractors/dart.js +371 -0
- package/dist/core/ingestion/type-extractors/jvm.js +1 -1
- package/dist/core/ingestion/type-extractors/shared.d.ts +1 -13
- package/dist/core/ingestion/type-extractors/shared.js +9 -102
- package/dist/core/ingestion/type-extractors/swift.js +334 -4
- package/dist/core/ingestion/type-extractors/types.d.ts +3 -1
- package/dist/core/ingestion/{ast-helpers.d.ts → utils/ast-helpers.d.ts} +16 -13
- package/dist/core/ingestion/{ast-helpers.js → utils/ast-helpers.js} +111 -32
- package/dist/core/ingestion/{call-analysis.js → utils/call-analysis.js} +37 -0
- package/dist/core/ingestion/utils/event-loop.d.ts +5 -0
- package/dist/core/ingestion/utils/event-loop.js +5 -0
- package/dist/core/ingestion/utils/language-detection.d.ts +9 -0
- package/dist/core/ingestion/utils/language-detection.js +70 -0
- package/dist/core/ingestion/utils/verbose.d.ts +1 -0
- package/dist/core/ingestion/utils/verbose.js +7 -0
- package/dist/core/ingestion/workers/parse-worker.d.ts +43 -2
- package/dist/core/ingestion/workers/parse-worker.js +361 -150
- package/dist/core/lbug/csv-generator.js +34 -1
- package/dist/core/lbug/lbug-adapter.js +6 -0
- package/dist/core/lbug/schema.d.ts +5 -3
- package/dist/core/lbug/schema.js +39 -2
- package/dist/core/tree-sitter/parser-loader.js +7 -1
- package/dist/core/wiki/cursor-client.d.ts +31 -0
- package/dist/core/wiki/cursor-client.js +127 -0
- package/dist/core/wiki/generator.d.ts +28 -9
- package/dist/core/wiki/generator.js +115 -18
- package/dist/core/wiki/graph-queries.d.ts +4 -0
- package/dist/core/wiki/graph-queries.js +7 -1
- package/dist/core/wiki/llm-client.d.ts +2 -0
- package/dist/core/wiki/llm-client.js +8 -4
- package/dist/core/wiki/prompts.d.ts +3 -3
- package/dist/core/wiki/prompts.js +6 -0
- package/dist/mcp/core/lbug-adapter.d.ts +5 -0
- package/dist/mcp/core/lbug-adapter.js +11 -1
- package/dist/mcp/local/local-backend.d.ts +16 -5
- package/dist/mcp/local/local-backend.js +711 -74
- package/dist/mcp/tools.js +71 -2
- package/dist/storage/repo-manager.d.ts +3 -0
- package/package.json +17 -16
- package/dist/core/ingestion/import-resolution.d.ts +0 -101
- package/dist/core/ingestion/import-resolution.js +0 -251
- package/dist/core/ingestion/named-binding-extraction.js +0 -373
- package/dist/core/ingestion/resolvers/index.d.ts +0 -18
- package/dist/core/ingestion/resolvers/index.js +0 -13
- package/dist/core/ingestion/type-extractors/index.d.ts +0 -22
- package/dist/core/ingestion/type-extractors/index.js +0 -31
- package/dist/core/ingestion/utils.d.ts +0 -20
- package/dist/core/ingestion/utils.js +0 -242
- package/scripts/patch-tree-sitter-swift.cjs +0 -74
- /package/dist/core/ingestion/{call-analysis.d.ts → utils/call-analysis.d.ts} +0 -0
|
@@ -0,0 +1,1509 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* COBOL source pre-processing and regex-based symbol extraction.
|
|
3
|
+
*
|
|
4
|
+
* DESIGN DECISION — Why regex instead of a full parser (ANTLR4, tree-sitter):
|
|
5
|
+
*
|
|
6
|
+
* 1. Performance: Regex processes ~1ms/file vs 50-200ms/file for ANTLR4/tree-sitter.
|
|
7
|
+
* On EPAGHE (14k COBOL files), this is ~14 seconds vs 12-47 minutes.
|
|
8
|
+
*
|
|
9
|
+
* 2. Reliability: tree-sitter-cobol@0.0.1's external scanner hangs indefinitely
|
|
10
|
+
* on ~5% of production files (no timeout possible). ANTLR4's proleap-cobol-parser
|
|
11
|
+
* is a Java project — using it from Node.js requires Java subprocesses or
|
|
12
|
+
* extracting .g4 grammars and generating JS/TS targets (significant effort).
|
|
13
|
+
*
|
|
14
|
+
* 3. Dialect compatibility: GnuCOBOL with Italian comments, patch markers in
|
|
15
|
+
* cols 1-6 (mzADD, estero, etc.), and vendor extensions. Formal grammars
|
|
16
|
+
* target COBOL-85 and would need dialect modifications.
|
|
17
|
+
*
|
|
18
|
+
* 4. Industry precedent: ctags, GitHub code navigation, and Sourcegraph all use
|
|
19
|
+
* regex-based extraction for code indexing. Full parsing is only needed for
|
|
20
|
+
* compilation or semantic analysis, not symbol extraction.
|
|
21
|
+
*
|
|
22
|
+
* 5. Determinism: Every regex pattern is tested with canonical COBOL input
|
|
23
|
+
* (see test/unit/cobol-preprocessor.test.ts). Same input always produces
|
|
24
|
+
* same output — no grammar ambiguity or parser state issues.
|
|
25
|
+
*
|
|
26
|
+
* This module provides:
|
|
27
|
+
* 1. preprocessCobolSource() — cleans patch markers (kept for potential future use)
|
|
28
|
+
* 2. extractCobolSymbolsWithRegex() — single-pass state machine COBOL extraction
|
|
29
|
+
*/
|
|
30
|
+
// ---------------------------------------------------------------------------
|
|
31
|
+
// Preserved exactly: preprocessCobolSource
|
|
32
|
+
// ---------------------------------------------------------------------------
|
|
33
|
+
/**
|
|
34
|
+
* Normalize COBOL source for regex-based extraction.
|
|
35
|
+
*
|
|
36
|
+
* The COBOL fixed-format sequence number area (columns 1-6) is semantically
|
|
37
|
+
* irrelevant to parsing — compilers and tools always ignore it. This
|
|
38
|
+
* function replaces ANY non-space content in columns 1-6 with spaces
|
|
39
|
+
* so that position-sensitive regexes (paragraph/section detection, data-item
|
|
40
|
+
* anchors, etc.) work identically whether the file carries numeric sequence
|
|
41
|
+
* numbers (000100), alphabetic patch markers (mzADD, estero, #patch), or
|
|
42
|
+
* the COBOL default of all spaces.
|
|
43
|
+
*
|
|
44
|
+
* Preserves exact line count for position mapping.
|
|
45
|
+
*/
|
|
46
|
+
export function preprocessCobolSource(content) {
|
|
47
|
+
// Skip preprocessing for free-format COBOL — cols 1-6 are program text, not sequence area
|
|
48
|
+
// Check first 10 lines (consistent with extractCobolSymbolsWithRegex detection threshold)
|
|
49
|
+
const firstLines = content.split('\n', 10).join('\n');
|
|
50
|
+
if (/>>SOURCE\s+(?:FORMAT\s+(?:IS\s+)?)?FREE/i.test(firstLines)) {
|
|
51
|
+
return content;
|
|
52
|
+
}
|
|
53
|
+
const lines = content.split(/\r?\n/);
|
|
54
|
+
for (let i = 0; i < lines.length; i++) {
|
|
55
|
+
const line = lines[i];
|
|
56
|
+
if (line.length < 7)
|
|
57
|
+
continue;
|
|
58
|
+
const seq = line.substring(0, 6);
|
|
59
|
+
// Replace any non-space content in the sequence area with spaces.
|
|
60
|
+
// This covers numeric sequence numbers (000100), alphabetic patch markers
|
|
61
|
+
// (mzADD, estero), '#'-prefixed markers, and all other col 1-6 content.
|
|
62
|
+
if (/\S/.test(seq)) {
|
|
63
|
+
lines[i] = ' ' + line.substring(6);
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
return lines.join('\n');
|
|
67
|
+
}
|
|
68
|
+
// ---------------------------------------------------------------------------
|
|
69
|
+
// Preserved exactly: EXCLUDED_PARA_NAMES
|
|
70
|
+
// ---------------------------------------------------------------------------
|
|
71
|
+
// COBOL calling-convention keywords to filter from USING parameter lists
|
|
72
|
+
const USING_KEYWORDS = new Set(['BY', 'VALUE', 'REFERENCE', 'CONTENT', 'ADDRESS', 'OF', 'RETURNING']);
|
|
73
|
+
// CALL ... USING keyword filter (extends USING_KEYWORDS for CALL-specific forms)
|
|
74
|
+
const CALL_USING_FILTER = new Set([
|
|
75
|
+
'BY', 'REFERENCE', 'CONTENT', 'VALUE',
|
|
76
|
+
'ADDRESS', 'OF', 'LENGTH', 'OMITTED',
|
|
77
|
+
]);
|
|
78
|
+
const EXCLUDED_PARA_NAMES = new Set([
|
|
79
|
+
'DECLARATIVES', 'END', 'PROCEDURE', 'IDENTIFICATION',
|
|
80
|
+
'ENVIRONMENT', 'DATA', 'WORKING-STORAGE', 'LINKAGE',
|
|
81
|
+
'FILE', 'LOCAL-STORAGE', 'COMMUNICATION', 'REPORT',
|
|
82
|
+
'SCREEN', 'INPUT-OUTPUT', 'CONFIGURATION',
|
|
83
|
+
// COBOL verbs that appear alone on a line with period (false-positive in free-format)
|
|
84
|
+
'GOBACK', 'STOP', 'EXIT', 'CONTINUE',
|
|
85
|
+
'DISPLAY', 'ACCEPT', 'WRITE', 'READ', 'REWRITE', 'DELETE',
|
|
86
|
+
'OPEN', 'CLOSE', 'RETURN', 'RELEASE', 'SORT', 'MERGE',
|
|
87
|
+
]);
|
|
88
|
+
// ---------------------------------------------------------------------------
|
|
89
|
+
// Regex constants (compiled once, reused across calls)
|
|
90
|
+
// ---------------------------------------------------------------------------
|
|
91
|
+
const RE_DIVISION = /\b(IDENTIFICATION|ENVIRONMENT|DATA|PROCEDURE)\s+DIVISION\b/i;
|
|
92
|
+
const RE_SECTION = /\b(WORKING-STORAGE|LINKAGE|FILE|LOCAL-STORAGE|SCREEN|INPUT-OUTPUT|CONFIGURATION)\s+SECTION\b/i;
|
|
93
|
+
// IDENTIFICATION DIVISION
|
|
94
|
+
const RE_PROGRAM_ID = /\bPROGRAM-ID\.\s*([A-Z][A-Z0-9-]*)(?:\s+IS\s+COMMON)?/i;
|
|
95
|
+
const RE_END_PROGRAM = /\bEND\s+PROGRAM\s+([A-Z][A-Z0-9-]*)\s*\./i;
|
|
96
|
+
const RE_AUTHOR = /^\s+AUTHOR\.\s*(.+)/i;
|
|
97
|
+
const RE_DATE_WRITTEN = /^\s+DATE-WRITTEN\.\s*(.+)/i;
|
|
98
|
+
const RE_DATE_COMPILED = /^\s+DATE-COMPILED\.\s*(.+)/i;
|
|
99
|
+
const RE_INSTALLATION = /^\s+INSTALLATION\.\s*(.+)/i;
|
|
100
|
+
// ENVIRONMENT DIVISION — SELECT
|
|
101
|
+
const RE_SELECT_START = /\bSELECT\s+(?:OPTIONAL\s+)?([A-Z][A-Z0-9-]+)/i;
|
|
102
|
+
// DATA DIVISION
|
|
103
|
+
// ^\s* (not ^\s+) to support both fixed-format (indented) and free-format (trimmed)
|
|
104
|
+
const RE_FD = /^\s*(?:FD|SD|RD)\s+([A-Z][A-Z0-9-]+)/i;
|
|
105
|
+
const RE_DATA_ITEM = /^\s*(\d{1,2})\s+([A-Z][A-Z0-9-]+)\s*(.*)/i;
|
|
106
|
+
const RE_ANONYMOUS_REDEFINES = /^\s*(\d{1,2})\s+REDEFINES\s+([A-Z][A-Z0-9-]+)/i;
|
|
107
|
+
const RE_88_LEVEL = /^\s*88\s+([A-Z][A-Z0-9-]+)\s+VALUES?\s+(?:ARE\s+)?(.+)/i;
|
|
108
|
+
// PROCEDURE DIVISION
|
|
109
|
+
// These patterns support both fixed-format (7 leading spaces) and free-format (any indentation)
|
|
110
|
+
const RE_PROC_SECTION = /^\s*([A-Z][A-Z0-9-]+)\s+SECTION(?:\s+\d+)?\.\s*$/i;
|
|
111
|
+
const RE_PROC_PARAGRAPH = /^\s*([A-Z][A-Z0-9-]+)\.\s*$/i;
|
|
112
|
+
const RE_PERFORM = /\bPERFORM\s+([A-Z][A-Z0-9-]+)(?:\s+(?:THRU|THROUGH)\s+([A-Z][A-Z0-9-]+))?/gi;
|
|
113
|
+
// ALL DIVISIONS
|
|
114
|
+
// Both double-quoted ("PROG") and single-quoted ('PROG') targets are valid COBOL.
|
|
115
|
+
// Use separate alternation groups so quotes must match (prevents "PROG' false-matches).
|
|
116
|
+
const RE_CALL = /\bCALL\s+(?:"([^"]+)"|'([^']+)')/gi;
|
|
117
|
+
// Dynamic CALL via data item (no quotes): CALL WS-PROGRAM-NAME
|
|
118
|
+
const RE_CALL_DYNAMIC = /(?<![A-Z0-9-])\bCALL\s+([A-Z][A-Z0-9-]+)(?=\s|\.|$)/gi;
|
|
119
|
+
const RE_COPY_UNQUOTED = /\bCOPY\s+([A-Z][A-Z0-9-]+)(?:\s|\.)/i;
|
|
120
|
+
const RE_COPY_QUOTED = /\bCOPY\s+(?:"([^"]+)"|'([^']+)')(?:\s|\.)/i;
|
|
121
|
+
// EXEC blocks
|
|
122
|
+
const RE_EXEC_SQL_START = /\bEXEC\s+SQL\b/i;
|
|
123
|
+
const RE_EXEC_CICS_START = /\bEXEC\s+CICS\b/i;
|
|
124
|
+
const RE_END_EXEC = /\bEND-EXEC\b/i;
|
|
125
|
+
// GO TO — control flow transfer (same graph semantics as PERFORM)
|
|
126
|
+
// GO TO — captures first target; GO TO p1 p2 p3 DEPENDING ON x handled below
|
|
127
|
+
const RE_GOTO = /\bGO\s+TO\s+([A-Z][A-Z0-9-]+(?:\s+[A-Z][A-Z0-9-]+)*?)(?:\s+DEPENDING\s+ON\s+[A-Z][A-Z0-9-]+)?(?:\s*\.|$)/i;
|
|
128
|
+
// SORT/MERGE file references
|
|
129
|
+
const RE_SORT = /\bSORT\s+([A-Z][A-Z0-9-]+)/i;
|
|
130
|
+
const RE_MERGE = /\bMERGE\s+([A-Z][A-Z0-9-]+)/i;
|
|
131
|
+
// SEARCH — table access
|
|
132
|
+
const RE_SEARCH = /\bSEARCH\s+(?:ALL\s+)?([A-Z][A-Z0-9-]+)/i;
|
|
133
|
+
// CANCEL — program lifecycle
|
|
134
|
+
const RE_CANCEL = /\bCANCEL\s+(?:"([^"]+)"|'([^']+)')/gi;
|
|
135
|
+
const RE_CANCEL_DYNAMIC = /(?<![A-Z0-9-])\bCANCEL\s+([A-Z][A-Z0-9-]+)(?=\s|\.|$)/gi;
|
|
136
|
+
// Level 66 RENAMES
|
|
137
|
+
const RE_66_LEVEL = /^\s*66\s+([A-Z][A-Z0-9-]+)\s+RENAMES\s+([A-Z][A-Z0-9-]+)/i;
|
|
138
|
+
// DECLARATIVES boundary and USE AFTER EXCEPTION
|
|
139
|
+
const RE_DECLARATIVES_START = /^\s*DECLARATIVES\s*\.\s*$/i;
|
|
140
|
+
const RE_DECLARATIVES_END = /^\s*END\s+DECLARATIVES\s*\.\s*$/i;
|
|
141
|
+
const RE_USE_AFTER = /\bUSE\s+(?:AFTER\s+)?(?:STANDARD\s+)?(?:EXCEPTION|ERROR)\s+ON\s+([A-Z][A-Z0-9-]+|INPUT|OUTPUT|I-O|EXTEND)\b/i;
|
|
142
|
+
// SET statement (condition, index)
|
|
143
|
+
const RE_SET_TO_TRUE = /\bSET\s+((?:[A-Z][A-Z0-9-]+(?:\s+OF\s+[A-Z][A-Z0-9-]+)?\s+)+)TO\s+TRUE\b/i;
|
|
144
|
+
const RE_SET_INDEX = /\bSET\s+((?:[A-Z][A-Z0-9-]+\s+)+)(TO|UP\s+BY|DOWN\s+BY)\s+(\d+|[A-Z][A-Z0-9-]+)/i;
|
|
145
|
+
// INITIALIZE statement — data reset (captures targets before REPLACING/WITH clause)
|
|
146
|
+
const RE_INITIALIZE = /\bINITIALIZE\s+([\s\S]*?)(?=\bREPLACING\b|\bWITH\b|\.\s*$|$)/i;
|
|
147
|
+
const INITIALIZE_CLAUSE_KEYWORDS = new Set([
|
|
148
|
+
'REPLACING', 'WITH', 'ALL', 'ALPHABETIC', 'ALPHANUMERIC',
|
|
149
|
+
'NUMERIC', 'NATIONAL', 'DBCS', 'EGCS', 'FILLER',
|
|
150
|
+
]);
|
|
151
|
+
// EXEC DLI (IMS/DB)
|
|
152
|
+
const RE_EXEC_DLI_START = /\bEXEC\s+DLI\b/i;
|
|
153
|
+
// PROCEDURE DIVISION USING
|
|
154
|
+
const RE_PROC_USING = /\bPROCEDURE\s+DIVISION\s+USING\s+([\s\S]*?)(?:\.|$)/i;
|
|
155
|
+
// ENTRY point
|
|
156
|
+
const RE_ENTRY = /\bENTRY\s+(?:"([^"]+)"|'([^']+)')(?:\s+USING\s+([\s\S]*?))?(?:\.|$)/i;
|
|
157
|
+
// MOVE statement — captures everything after TO for multi-target extraction
|
|
158
|
+
const RE_MOVE = /\bMOVE\s+((?:CORRESPONDING|CORR)\s+)?([A-Z][A-Z0-9-]+)\s+TO\s+(.+)/i;
|
|
159
|
+
const MOVE_SKIP = new Set([
|
|
160
|
+
'SPACES', 'ZEROS', 'ZEROES', 'LOW-VALUES', 'LOW-VALUE',
|
|
161
|
+
'HIGH-VALUES', 'HIGH-VALUE', 'QUOTES', 'QUOTE', 'ALL',
|
|
162
|
+
]);
|
|
163
|
+
/**
|
|
164
|
+
* Parse the text after "MOVE ... TO" into an array of target variable names.
|
|
165
|
+
* Handles: multiple targets, OF/IN qualifiers, subscripts, trailing periods.
|
|
166
|
+
* MOVE CORRESPONDING is always single-target per COBOL standard.
|
|
167
|
+
*/
|
|
168
|
+
function extractMoveTargets(afterTo) {
|
|
169
|
+
// Strip trailing period and everything after it
|
|
170
|
+
const text = afterTo.replace(/\..*$/, '').trim();
|
|
171
|
+
if (!text)
|
|
172
|
+
return [];
|
|
173
|
+
// Remove subscript/reference-modification parenthesized suffixes
|
|
174
|
+
const noSubscripts = text.replace(/\([^)]*\)/g, '');
|
|
175
|
+
const tokens = noSubscripts.split(/\s+/).filter(t => t.length > 0);
|
|
176
|
+
const targets = [];
|
|
177
|
+
const QUAL_KEYWORDS = new Set(['OF', 'IN']);
|
|
178
|
+
let skipNext = false;
|
|
179
|
+
for (const token of tokens) {
|
|
180
|
+
if (skipNext) {
|
|
181
|
+
skipNext = false;
|
|
182
|
+
continue;
|
|
183
|
+
}
|
|
184
|
+
if (QUAL_KEYWORDS.has(token.toUpperCase())) {
|
|
185
|
+
skipNext = true;
|
|
186
|
+
continue;
|
|
187
|
+
}
|
|
188
|
+
if (/^[A-Z][A-Z0-9-]+$/i.test(token) && !MOVE_SKIP.has(token.toUpperCase())) {
|
|
189
|
+
targets.push(token);
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
return targets;
|
|
193
|
+
}
|
|
194
|
+
// PERFORM: keywords that may follow PERFORM but are NOT paragraph/section names.
|
|
195
|
+
// Inline PERFORM loops (UNTIL, VARYING) and inline test clauses (WITH TEST,
|
|
196
|
+
// FOREVER) must not be stored as perform-target false positives.
|
|
197
|
+
const PERFORM_KEYWORD_SKIP = new Set([
|
|
198
|
+
'UNTIL', 'VARYING', 'WITH', 'TEST', 'FOREVER',
|
|
199
|
+
]);
|
|
200
|
+
// SORT/MERGE clause keywords that should not be captured as file names
|
|
201
|
+
const SORT_CLAUSE_NOISE = new Set([
|
|
202
|
+
'ON', 'ASCENDING', 'DESCENDING', 'KEY', 'WITH', 'DUPLICATES',
|
|
203
|
+
'IN', 'ORDER', 'COLLATING', 'SEQUENCE', 'IS', 'THROUGH', 'THRU',
|
|
204
|
+
'INPUT', 'OUTPUT', 'PROCEDURE', 'USING', 'GIVING',
|
|
205
|
+
]);
|
|
206
|
+
// COBOL statement verbs used as boundary detectors across accumulators.
|
|
207
|
+
// Shared by: callAccum flush trigger, inspectAccum flush trigger, and USING lookahead.
|
|
208
|
+
// Note: CALL is intentionally excluded — it's handled by the callAccum state machine.
|
|
209
|
+
// Including CALL here would cause the flush trigger to consume the new CALL line
|
|
210
|
+
// without re-detecting it as a CALL start.
|
|
211
|
+
const COBOL_STATEMENT_VERBS = [
|
|
212
|
+
'GO\\s+TO', 'PERFORM', 'MOVE', 'DISPLAY', 'ACCEPT',
|
|
213
|
+
'INSPECT', 'SEARCH', 'SORT', 'MERGE', 'IF', 'EVALUATE',
|
|
214
|
+
'SET', 'INITIALIZE', 'STOP', 'EXIT', 'GOBACK', 'CONTINUE',
|
|
215
|
+
'READ', 'WRITE', 'REWRITE', 'DELETE', 'OPEN', 'CLOSE', 'START',
|
|
216
|
+
'CANCEL', 'COMPUTE', 'ADD', 'SUBTRACT', 'MULTIPLY', 'DIVIDE',
|
|
217
|
+
'STRING', 'UNSTRING',
|
|
218
|
+
];
|
|
219
|
+
/** Regex matching start of any COBOL statement verb (for accumulator flush triggers). */
|
|
220
|
+
const RE_STATEMENT_VERB_START = new RegExp(`^(?:${COBOL_STATEMENT_VERBS.join('|')})(?:\\s|$)`, 'i');
|
|
221
|
+
/** Lookahead alternation for USING parameter extraction (stops before statement verbs).
|
|
222
|
+
* Includes CALL (excluded from COBOL_STATEMENT_VERBS to avoid callAccum conflicts). */
|
|
223
|
+
const USING_VERB_LOOKAHEAD = [...COBOL_STATEMENT_VERBS, 'CALL']
|
|
224
|
+
.filter(v => v !== 'GO\\s+TO') // GO TO handled separately with \bGO\s+TO\b
|
|
225
|
+
.map(v => `\\b${v}(?=\\s|$)`)
|
|
226
|
+
.join('|');
|
|
227
|
+
const RE_USING_PARAMS = new RegExp(`\\bUSING\\s+([\\s\\S]*?)(?=\\bRETURNING\\b|\\bON\\s+(?:EXCEPTION|OVERFLOW)\\b|\\bNOT\\s+ON\\b|\\bEND-CALL\\b|\\bGO\\s+TO\\b|${USING_VERB_LOOKAHEAD}|\\.\\s*$|$)`, 'i');
|
|
228
|
+
// ---------------------------------------------------------------------------
|
|
229
|
+
// Private helper: strip Italian inline comments (| and everything after)
|
|
230
|
+
// ---------------------------------------------------------------------------
|
|
231
|
+
function stripInlineComment(line) {
|
|
232
|
+
let inQuote = null;
|
|
233
|
+
for (let i = 0; i < line.length; i++) {
|
|
234
|
+
const ch = line[i];
|
|
235
|
+
if (inQuote) {
|
|
236
|
+
if (ch === inQuote)
|
|
237
|
+
inQuote = null;
|
|
238
|
+
}
|
|
239
|
+
else if (ch === '"' || ch === "'") {
|
|
240
|
+
inQuote = ch;
|
|
241
|
+
}
|
|
242
|
+
else if (ch === '|') {
|
|
243
|
+
return line.substring(0, i);
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
return line;
|
|
247
|
+
}
|
|
248
|
+
// ---------------------------------------------------------------------------
|
|
249
|
+
// Private helper: parse data item trailing clauses (PIC, USAGE, etc.)
|
|
250
|
+
// ---------------------------------------------------------------------------
|
|
251
|
+
function parseDataItemClauses(rest) {
|
|
252
|
+
const result = {};
|
|
253
|
+
// Strip trailing period for easier parsing
|
|
254
|
+
const text = rest.replace(/\.\s*$/, '');
|
|
255
|
+
// PIC / PICTURE [IS] <picture-string>
|
|
256
|
+
const picMatch = text.match(/\bPIC(?:TURE)?\s+(?:IS\s+)?(\S+)/i);
|
|
257
|
+
if (picMatch) {
|
|
258
|
+
result.pic = picMatch[1];
|
|
259
|
+
}
|
|
260
|
+
// USAGE [IS] <usage-type> — including non-standard COMP-6, COMP-X etc.
|
|
261
|
+
const usageMatch = text.match(/\bUSAGE\s+(?:IS\s+)?(COMP(?:UTATIONAL)?(?:-[0-9X])?|BINARY|PACKED-DECIMAL|DISPLAY|INDEX|POINTER|NATIONAL)\b/i);
|
|
262
|
+
if (usageMatch) {
|
|
263
|
+
result.usage = usageMatch[1].toUpperCase();
|
|
264
|
+
}
|
|
265
|
+
else {
|
|
266
|
+
// Standalone COMP variants without USAGE keyword
|
|
267
|
+
const compMatch = text.match(/\b(COMP(?:UTATIONAL)?(?:-[0-9X])?|BINARY|PACKED-DECIMAL)\b/i);
|
|
268
|
+
if (compMatch) {
|
|
269
|
+
result.usage = compMatch[1].toUpperCase();
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
// REDEFINES <name>
|
|
273
|
+
const redefMatch = text.match(/\bREDEFINES\s+([A-Z][A-Z0-9-]+)/i);
|
|
274
|
+
if (redefMatch) {
|
|
275
|
+
result.redefines = redefMatch[1];
|
|
276
|
+
}
|
|
277
|
+
// OCCURS <n> [TO <m>] [TIMES] [DEPENDING ON <field>]
|
|
278
|
+
const occursMatch = text.match(/\bOCCURS\s+(\d+)(?:\s+TO\s+(\d+))?\s*(?:TIMES\s*)?(?:DEPENDING\s+ON\s+([A-Z][A-Z0-9-]+(?:\s*\([^)]*\))?))?/i);
|
|
279
|
+
if (occursMatch) {
|
|
280
|
+
result.occurs = parseInt(occursMatch[1], 10);
|
|
281
|
+
if (occursMatch[3]) {
|
|
282
|
+
// Strip any subscript from DEPENDING ON field
|
|
283
|
+
result.dependingOn = occursMatch[3].replace(/\s*\([^)]*\)/, '').trim();
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
// IS EXTERNAL / IS GLOBAL
|
|
287
|
+
result.isExternal = /\bIS\s+EXTERNAL\b/i.test(text) || undefined;
|
|
288
|
+
result.isGlobal = /\bIS\s+GLOBAL\b/i.test(text) || undefined;
|
|
289
|
+
// VALUE [IS] literal/constant
|
|
290
|
+
if (!result.value) {
|
|
291
|
+
const valueIdx = text.search(/\bVALUE\b/i);
|
|
292
|
+
if (valueIdx >= 0) {
|
|
293
|
+
const afterValue = text.substring(valueIdx + 5).replace(/^\s+IS\s+/i, '').trimStart();
|
|
294
|
+
// Try quoted: "..." or '...' (with optional type prefix X, N, G, B)
|
|
295
|
+
const quotedMatch = afterValue.match(/^([XNGB])?(?:"([^"]*)"|'([^']*)')/i);
|
|
296
|
+
if (quotedMatch) {
|
|
297
|
+
const prefix = quotedMatch[1] ? quotedMatch[1].toUpperCase() : '';
|
|
298
|
+
result.value = prefix ? `${prefix}'${quotedMatch[2] ?? quotedMatch[3]}'` : (quotedMatch[2] ?? quotedMatch[3]);
|
|
299
|
+
}
|
|
300
|
+
else {
|
|
301
|
+
// Try ALL "..." or ALL '...'
|
|
302
|
+
const allMatch = afterValue.match(/^ALL\s+(?:"([^"]*)"|'([^']*)')/i);
|
|
303
|
+
if (allMatch) {
|
|
304
|
+
result.value = `ALL '${allMatch[1] ?? allMatch[2]}'`;
|
|
305
|
+
}
|
|
306
|
+
else {
|
|
307
|
+
// Try numeric (including negative, decimal)
|
|
308
|
+
const numMatch = afterValue.match(/^(-?\d+\.?\d*)/);
|
|
309
|
+
if (numMatch) {
|
|
310
|
+
result.value = numMatch[1];
|
|
311
|
+
}
|
|
312
|
+
else {
|
|
313
|
+
// Try figurative constant or identifier
|
|
314
|
+
const identMatch = afterValue.match(/^([A-Z][A-Z0-9-]*)/i);
|
|
315
|
+
if (identMatch)
|
|
316
|
+
result.value = identMatch[1].toUpperCase();
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
}
|
|
320
|
+
}
|
|
321
|
+
}
|
|
322
|
+
return result;
|
|
323
|
+
}
|
|
324
|
+
// ---------------------------------------------------------------------------
|
|
325
|
+
// Private helper: parse 88-level condition values
|
|
326
|
+
// ---------------------------------------------------------------------------
|
|
327
|
+
function parseConditionValues(valuesStr) {
|
|
328
|
+
// Strip trailing period
|
|
329
|
+
const text = valuesStr.replace(/\.\s*$/, '').trim();
|
|
330
|
+
const values = [];
|
|
331
|
+
// Match quoted strings: "O" "Y" "I"
|
|
332
|
+
const quotedRe = /(?:"([^"]*)"|'([^']*)')/g;
|
|
333
|
+
let qm;
|
|
334
|
+
let hasQuoted = false;
|
|
335
|
+
while ((qm = quotedRe.exec(text)) !== null) {
|
|
336
|
+
values.push(qm[1] ?? qm[2]);
|
|
337
|
+
hasQuoted = true;
|
|
338
|
+
}
|
|
339
|
+
if (hasQuoted)
|
|
340
|
+
return values;
|
|
341
|
+
// No quotes — split on whitespace, filtering out THRU/THROUGH keywords
|
|
342
|
+
// Handle: 11 12 16 17 21 or 1 THRU 5
|
|
343
|
+
const tokens = text.split(/\s+/);
|
|
344
|
+
for (const token of tokens) {
|
|
345
|
+
const upper = token.toUpperCase();
|
|
346
|
+
if (upper === 'THRU' || upper === 'THROUGH') {
|
|
347
|
+
// Keep THRU ranges as combined value: prev THRU next is already captured
|
|
348
|
+
// by having both sides in the array
|
|
349
|
+
continue;
|
|
350
|
+
}
|
|
351
|
+
if (token.length > 0) {
|
|
352
|
+
values.push(token);
|
|
353
|
+
}
|
|
354
|
+
}
|
|
355
|
+
return values;
|
|
356
|
+
}
|
|
357
|
+
function parseSelectStatement(stmt, startLine) {
|
|
358
|
+
// Normalize whitespace
|
|
359
|
+
const text = stmt.replace(/\s+/g, ' ').trim();
|
|
360
|
+
const nameMatch = text.match(/^SELECT\s+(?:OPTIONAL\s+)?([A-Z][A-Z0-9-]+)/i);
|
|
361
|
+
if (!nameMatch)
|
|
362
|
+
return null;
|
|
363
|
+
const result = {
|
|
364
|
+
selectName: nameMatch[1],
|
|
365
|
+
assignTo: '',
|
|
366
|
+
line: startLine,
|
|
367
|
+
};
|
|
368
|
+
const assignMatch = text.match(/\bASSIGN\s+(?:TO\s+)?("([^"]+)"|([A-Z][A-Z0-9-]*))/i);
|
|
369
|
+
if (assignMatch) {
|
|
370
|
+
result.assignTo = assignMatch[2] || assignMatch[3] || '';
|
|
371
|
+
}
|
|
372
|
+
const orgMatch = text.match(/\bORGANIZATION\s+(?:IS\s+)?(SEQUENTIAL|INDEXED|RELATIVE|LINE\s+SEQUENTIAL)/i);
|
|
373
|
+
if (orgMatch) {
|
|
374
|
+
result.organization = orgMatch[1].toUpperCase();
|
|
375
|
+
}
|
|
376
|
+
const accessMatch = text.match(/\bACCESS\s+(?:MODE\s+)?(?:IS\s+)?(SEQUENTIAL|RANDOM|DYNAMIC)/i);
|
|
377
|
+
if (accessMatch) {
|
|
378
|
+
result.access = accessMatch[1].toUpperCase();
|
|
379
|
+
}
|
|
380
|
+
const keyMatch = text.match(/\bRECORD\s+KEY\s+(?:IS\s+)?([A-Z][A-Z0-9-]+)/i);
|
|
381
|
+
if (keyMatch) {
|
|
382
|
+
result.recordKey = keyMatch[1];
|
|
383
|
+
}
|
|
384
|
+
// ALTERNATE RECORD KEY
|
|
385
|
+
const altKeyMatches = text.matchAll(/\bALTERNATE\s+RECORD\s+KEY\s+(?:IS\s+)?([A-Z][A-Z0-9-]+)/gi);
|
|
386
|
+
const alternateKeys = [];
|
|
387
|
+
for (const m of altKeyMatches)
|
|
388
|
+
alternateKeys.push(m[1]);
|
|
389
|
+
if (alternateKeys.length > 0)
|
|
390
|
+
result.alternateKeys = alternateKeys;
|
|
391
|
+
// FILE STATUS IS / STATUS IS
|
|
392
|
+
const statusMatch = text.match(/\b(?:FILE\s+)?STATUS\s+(?:IS\s+)?([A-Z][A-Z0-9-]+)/i);
|
|
393
|
+
if (statusMatch) {
|
|
394
|
+
result.fileStatus = statusMatch[1];
|
|
395
|
+
}
|
|
396
|
+
// SELECT OPTIONAL flag
|
|
397
|
+
result.isOptional = /^SELECT\s+OPTIONAL\b/i.test(text) || undefined;
|
|
398
|
+
return result;
|
|
399
|
+
}
|
|
400
|
+
function parseExecSqlBlock(block, line) {
|
|
401
|
+
// Strip EXEC SQL ... END-EXEC wrapper
|
|
402
|
+
const body = block
|
|
403
|
+
.replace(/\bEXEC\s+SQL\b/i, '')
|
|
404
|
+
.replace(/\bEND-EXEC\b/i, '')
|
|
405
|
+
.replace(/\s+/g, ' ')
|
|
406
|
+
.trim();
|
|
407
|
+
// Determine operation from first SQL keyword
|
|
408
|
+
const firstWord = body.split(/\s+/)[0]?.toUpperCase() || '';
|
|
409
|
+
const OP_MAP = {
|
|
410
|
+
SELECT: 'SELECT', INSERT: 'INSERT', UPDATE: 'UPDATE', DELETE: 'DELETE',
|
|
411
|
+
DECLARE: 'DECLARE', OPEN: 'OPEN', CLOSE: 'CLOSE', FETCH: 'FETCH',
|
|
412
|
+
INCLUDE: 'OTHER', // we handle INCLUDE specially below
|
|
413
|
+
};
|
|
414
|
+
const operation = OP_MAP[firstWord] || 'OTHER';
|
|
415
|
+
// EXEC SQL INCLUDE — extract member name for IMPORTS edge
|
|
416
|
+
let includeMember;
|
|
417
|
+
if (firstWord === 'INCLUDE') {
|
|
418
|
+
const includeMatch = body.match(/^INCLUDE\s+(?:'([^']+)'|"([^"]+)"|([A-Z][A-Z0-9_-]+))/i);
|
|
419
|
+
if (includeMatch) {
|
|
420
|
+
includeMember = includeMatch[1] ?? includeMatch[2] ?? includeMatch[3];
|
|
421
|
+
}
|
|
422
|
+
}
|
|
423
|
+
// Extract table names from FROM, INTO (INSERT), UPDATE, DELETE FROM, JOIN
|
|
424
|
+
const tables = [];
|
|
425
|
+
const tablePatterns = [
|
|
426
|
+
/\bFROM\s+([A-Z][A-Z0-9_]+)/gi,
|
|
427
|
+
/\bINSERT\s+INTO\s+([A-Z][A-Z0-9_]+)/gi,
|
|
428
|
+
/\bUPDATE\s+([A-Z][A-Z0-9_]+)/gi,
|
|
429
|
+
/\bJOIN\s+([A-Z][A-Z0-9_]+)/gi,
|
|
430
|
+
];
|
|
431
|
+
for (const re of tablePatterns) {
|
|
432
|
+
let m;
|
|
433
|
+
while ((m = re.exec(body)) !== null) {
|
|
434
|
+
const name = m[1].toUpperCase();
|
|
435
|
+
// Skip host variables and SQL keywords
|
|
436
|
+
if (!name.startsWith(':') && !tables.includes(name)) {
|
|
437
|
+
tables.push(name);
|
|
438
|
+
}
|
|
439
|
+
}
|
|
440
|
+
}
|
|
441
|
+
// Extract cursor names from DECLARE ... CURSOR
|
|
442
|
+
const cursors = [];
|
|
443
|
+
const cursorRe = /\bDECLARE\s+([A-Z][A-Z0-9_-]+)\s+CURSOR\b/gi;
|
|
444
|
+
let cm;
|
|
445
|
+
while ((cm = cursorRe.exec(body)) !== null) {
|
|
446
|
+
cursors.push(cm[1]);
|
|
447
|
+
}
|
|
448
|
+
// Extract host variables: :VARIABLE-NAME (strip the colon)
|
|
449
|
+
const hostVariables = [];
|
|
450
|
+
const hostRe = /:([A-Z][A-Z0-9-]+)/gi;
|
|
451
|
+
let hm;
|
|
452
|
+
while ((hm = hostRe.exec(body)) !== null) {
|
|
453
|
+
const name = hm[1];
|
|
454
|
+
if (!hostVariables.includes(name)) {
|
|
455
|
+
hostVariables.push(name);
|
|
456
|
+
}
|
|
457
|
+
}
|
|
458
|
+
return { line, tables, cursors, hostVariables, operation, includeMember };
|
|
459
|
+
}
|
|
460
|
+
// ---------------------------------------------------------------------------
|
|
461
|
+
// Private helper: parse EXEC CICS block
|
|
462
|
+
// ---------------------------------------------------------------------------
|
|
463
|
+
function parseExecCicsBlock(block, line) {
|
|
464
|
+
// Strip EXEC CICS ... END-EXEC wrapper
|
|
465
|
+
const body = block
|
|
466
|
+
.replace(/\bEXEC\s+CICS\b/i, '')
|
|
467
|
+
.replace(/\bEND-EXEC\b/i, '')
|
|
468
|
+
.replace(/\s+/g, ' ')
|
|
469
|
+
.trim();
|
|
470
|
+
// Command: first keyword(s) — handle two-word commands like SEND MAP, RECEIVE MAP
|
|
471
|
+
const twoWordCommands = [
|
|
472
|
+
'SEND MAP', 'RECEIVE MAP', 'SEND TEXT', 'SEND CONTROL',
|
|
473
|
+
'READ NEXT', 'READ PREV',
|
|
474
|
+
'WRITEQ TS', 'WRITEQ TD', 'READQ TS', 'READQ TD',
|
|
475
|
+
'DELETEQ TS', 'DELETEQ TD',
|
|
476
|
+
'HANDLE ABEND', 'HANDLE AID', 'HANDLE CONDITION',
|
|
477
|
+
'START TRANSID',
|
|
478
|
+
];
|
|
479
|
+
let command = '';
|
|
480
|
+
const upperBody = body.toUpperCase();
|
|
481
|
+
for (const twoWord of twoWordCommands) {
|
|
482
|
+
if (upperBody.startsWith(twoWord)) {
|
|
483
|
+
command = twoWord;
|
|
484
|
+
break;
|
|
485
|
+
}
|
|
486
|
+
}
|
|
487
|
+
if (!command) {
|
|
488
|
+
command = body.split(/\s+/)[0]?.toUpperCase() || '';
|
|
489
|
+
}
|
|
490
|
+
const result = { line, command };
|
|
491
|
+
// MAP name: MAP('name') or MAP("name") or MAP(IDENTIFIER)
|
|
492
|
+
const mapMatch = body.match(/\bMAP\s*\(\s*(?:['"]([^'"]+)['"]|([A-Z][A-Z0-9-]+))\s*\)/i);
|
|
493
|
+
if (mapMatch)
|
|
494
|
+
result.mapName = mapMatch[1] ?? mapMatch[2];
|
|
495
|
+
// PROGRAM name: PROGRAM('name') or PROGRAM("name") or PROGRAM(VARIABLE)
|
|
496
|
+
const progMatch = body.match(/\bPROGRAM\s*\(\s*(?:['"]([^'"]+)['"]|([A-Z][A-Z0-9-]+))\s*\)/i);
|
|
497
|
+
if (progMatch) {
|
|
498
|
+
result.programName = progMatch[1] ?? progMatch[2];
|
|
499
|
+
result.programIsLiteral = !!progMatch[1];
|
|
500
|
+
}
|
|
501
|
+
// TRANSID: TRANSID('name') or TRANSID("name") or TRANSID(VARIABLE)
|
|
502
|
+
const transMatch = body.match(/\bTRANSID\s*\(\s*(?:['"]([^'"]+)['"]|([A-Z][A-Z0-9-]+))\s*\)/i);
|
|
503
|
+
if (transMatch)
|
|
504
|
+
result.transId = transMatch[1] ?? transMatch[2];
|
|
505
|
+
// FILE/DATASET: FILE('name') or DATASET('name') or FILE(VARIABLE)
|
|
506
|
+
// Used in CICS READ, WRITE, REWRITE, DELETE, STARTBR, READNEXT, READPREV, ENDBR
|
|
507
|
+
const fileMatch = body.match(/\b(?:FILE|DATASET)\s*\(\s*(?:['"]([^'"]+)['"]|([A-Z][A-Z0-9-]+))\s*\)/i);
|
|
508
|
+
if (fileMatch) {
|
|
509
|
+
result.fileName = fileMatch[1] ?? fileMatch[2];
|
|
510
|
+
result.fileIsLiteral = !!fileMatch[1];
|
|
511
|
+
}
|
|
512
|
+
// QUEUE: QUEUE('name') — used in WRITEQ/READQ TS/TD
|
|
513
|
+
const queueMatch = body.match(/\bQUEUE\s*\(\s*(?:['"]([^'"]+)['"]|([A-Z][A-Z0-9-]+))\s*\)/i);
|
|
514
|
+
if (queueMatch)
|
|
515
|
+
result.queueName = queueMatch[1] ?? queueMatch[2];
|
|
516
|
+
// HANDLE ABEND LABEL(paragraph-name) — error handler target
|
|
517
|
+
const labelMatch = body.match(/\bLABEL\s*\(\s*([A-Z][A-Z0-9-]+)\s*\)/i);
|
|
518
|
+
if (labelMatch)
|
|
519
|
+
result.labelName = labelMatch[1];
|
|
520
|
+
// INTO(data-area) — data target (READ INTO, RECEIVE INTO, RETRIEVE INTO, READQ INTO)
|
|
521
|
+
const intoMatch = body.match(/\bINTO\s*\(\s*([A-Z][A-Z0-9-]+)\s*\)/i);
|
|
522
|
+
if (intoMatch)
|
|
523
|
+
result.intoField = intoMatch[1];
|
|
524
|
+
// FROM(data-area) — data source (WRITE FROM, SEND FROM, WRITEQ FROM, START FROM)
|
|
525
|
+
const fromMatch = body.match(/\bFROM\s*\(\s*([A-Z][A-Z0-9-]+)\s*\)/i);
|
|
526
|
+
if (fromMatch)
|
|
527
|
+
result.fromField = fromMatch[1];
|
|
528
|
+
return result;
|
|
529
|
+
}
|
|
530
|
+
// ---------------------------------------------------------------------------
|
|
531
|
+
// Private helper: parse EXEC DLI block (IMS/DB)
|
|
532
|
+
// ---------------------------------------------------------------------------
|
|
533
|
+
function parseExecDliBlock(block, line) {
|
|
534
|
+
const body = block.replace(/\bEXEC\s+DLI\b/i, '').replace(/\bEND-EXEC\b/i, '').replace(/\s+/g, ' ').trim();
|
|
535
|
+
const verb = body.split(/\s+/)[0]?.toUpperCase() || '';
|
|
536
|
+
const result = { line, verb };
|
|
537
|
+
const pcbMatch = body.match(/\bUSING\s+PCB\s*\(\s*(\d+)\s*\)/i);
|
|
538
|
+
if (pcbMatch)
|
|
539
|
+
result.pcbNumber = parseInt(pcbMatch[1], 10);
|
|
540
|
+
const segMatch = body.match(/\bSEGMENT\s*\(\s*([A-Z][A-Z0-9-]*)\s*\)/i);
|
|
541
|
+
if (segMatch)
|
|
542
|
+
result.segmentName = segMatch[1];
|
|
543
|
+
const intoMatch = body.match(/\bINTO\s*\(\s*([A-Z][A-Z0-9-]+)\s*\)/i);
|
|
544
|
+
if (intoMatch)
|
|
545
|
+
result.intoField = intoMatch[1];
|
|
546
|
+
const fromMatch = body.match(/\bFROM\s*\(\s*([A-Z][A-Z0-9-]+)\s*\)/i);
|
|
547
|
+
if (fromMatch)
|
|
548
|
+
result.fromField = fromMatch[1];
|
|
549
|
+
const psbMatch = body.match(/\bPSB\s*\(\s*([A-Z][A-Z0-9-]+)\s*\)/i);
|
|
550
|
+
if (psbMatch)
|
|
551
|
+
result.psbName = psbMatch[1];
|
|
552
|
+
return result;
|
|
553
|
+
}
|
|
554
|
+
// ---------------------------------------------------------------------------
|
|
555
|
+
// Main extraction: single-pass state machine
|
|
556
|
+
// ---------------------------------------------------------------------------
|
|
557
|
+
/**
|
|
558
|
+
* Extract COBOL symbols using a single-pass state machine.
|
|
559
|
+
* Extracts program name, paragraphs, sections, CALL, PERFORM, COPY,
|
|
560
|
+
* data items, file declarations, FD entries, and program metadata.
|
|
561
|
+
*/
|
|
562
|
+
export function extractCobolSymbolsWithRegex(content, _filePath) {
|
|
563
|
+
const rawLines = content.split(/\r?\n/);
|
|
564
|
+
const result = {
|
|
565
|
+
programName: null,
|
|
566
|
+
programs: [],
|
|
567
|
+
paragraphs: [],
|
|
568
|
+
sections: [],
|
|
569
|
+
performs: [],
|
|
570
|
+
calls: [],
|
|
571
|
+
copies: [],
|
|
572
|
+
dataItems: [],
|
|
573
|
+
fileDeclarations: [],
|
|
574
|
+
fdEntries: [],
|
|
575
|
+
programMetadata: {},
|
|
576
|
+
execSqlBlocks: [],
|
|
577
|
+
execCicsBlocks: [],
|
|
578
|
+
procedureUsing: [],
|
|
579
|
+
entryPoints: [],
|
|
580
|
+
moves: [],
|
|
581
|
+
gotos: [],
|
|
582
|
+
sorts: [],
|
|
583
|
+
searches: [],
|
|
584
|
+
cancels: [],
|
|
585
|
+
execDliBlocks: [],
|
|
586
|
+
declaratives: [],
|
|
587
|
+
sets: [],
|
|
588
|
+
inspects: [],
|
|
589
|
+
initializes: [],
|
|
590
|
+
};
|
|
591
|
+
// --- State ---
|
|
592
|
+
let currentDivision = null;
|
|
593
|
+
let currentDataSection = 'unknown';
|
|
594
|
+
let currentEnvSection = null;
|
|
595
|
+
let currentParagraph = null;
|
|
596
|
+
// Program boundary stack for nested PROGRAM-ID / END PROGRAM tracking
|
|
597
|
+
const programBoundaryStack = [];
|
|
598
|
+
// SELECT accumulator (multi-line)
|
|
599
|
+
let selectAccum = null;
|
|
600
|
+
let selectStartLine = 0;
|
|
601
|
+
// PROCEDURE DIVISION USING on next line
|
|
602
|
+
let pendingProcUsing = false;
|
|
603
|
+
// SORT/MERGE accumulator (multi-line SORT ... USING ... GIVING ...)
|
|
604
|
+
let sortAccum = null;
|
|
605
|
+
let sortStartLine = 0;
|
|
606
|
+
// EXEC block accumulator (multi-line EXEC SQL / EXEC CICS / EXEC DLI)
|
|
607
|
+
let execAccum = null;
|
|
608
|
+
// DECLARATIVES state
|
|
609
|
+
let inDeclaratives = false;
|
|
610
|
+
// INSPECT accumulator (multi-line)
|
|
611
|
+
let inspectAccum = null;
|
|
612
|
+
let inspectStartLine = 0;
|
|
613
|
+
// CALL accumulator (multi-line CALL ... USING on separate lines)
|
|
614
|
+
let callAccum = null;
|
|
615
|
+
let callAccumLine = 0;
|
|
616
|
+
// FD tracking: after seeing FD, the next 01-level data item is its record
|
|
617
|
+
let pendingFdName = null;
|
|
618
|
+
let pendingFdLine = 0;
|
|
619
|
+
// Continuation line buffer
|
|
620
|
+
let pendingLine = null;
|
|
621
|
+
let pendingLineNumber = 0;
|
|
622
|
+
// --- Detect source format: free vs fixed ---
|
|
623
|
+
// GnuCOBOL uses >>SOURCE FREE directive, typically in first 5 lines
|
|
624
|
+
let isFreeFormat = false;
|
|
625
|
+
for (let i = 0; i < Math.min(rawLines.length, 10); i++) {
|
|
626
|
+
if (/>>SOURCE\s+(?:FORMAT\s+(?:IS\s+)?)?FREE/i.test(rawLines[i])) {
|
|
627
|
+
isFreeFormat = true;
|
|
628
|
+
break;
|
|
629
|
+
}
|
|
630
|
+
}
|
|
631
|
+
// --- Process each raw line ---
|
|
632
|
+
for (let i = 0; i < rawLines.length; i++) {
|
|
633
|
+
const raw = rawLines[i];
|
|
634
|
+
if (isFreeFormat) {
|
|
635
|
+
// FREE FORMAT: no column-position rules
|
|
636
|
+
// Skip >>SOURCE directive lines
|
|
637
|
+
if (/^[ \t]*>>/.test(raw))
|
|
638
|
+
continue;
|
|
639
|
+
// Skip free-format comment lines (*> at start of content)
|
|
640
|
+
const trimmed = raw.trimStart();
|
|
641
|
+
if (trimmed.startsWith('*>') || trimmed.length === 0)
|
|
642
|
+
continue;
|
|
643
|
+
// Strip inline *> comments (quote-aware)
|
|
644
|
+
let commentIdx = -1;
|
|
645
|
+
let ffInQuote = null;
|
|
646
|
+
for (let ci = 0; ci < raw.length - 1; ci++) {
|
|
647
|
+
const c = raw[ci];
|
|
648
|
+
if (ffInQuote) {
|
|
649
|
+
if (c === ffInQuote)
|
|
650
|
+
ffInQuote = null;
|
|
651
|
+
}
|
|
652
|
+
else if (c === '"' || c === "'") {
|
|
653
|
+
ffInQuote = c;
|
|
654
|
+
}
|
|
655
|
+
else if (c === '*' && raw[ci + 1] === '>') {
|
|
656
|
+
commentIdx = ci;
|
|
657
|
+
break;
|
|
658
|
+
}
|
|
659
|
+
}
|
|
660
|
+
const line = commentIdx >= 0 ? raw.substring(0, commentIdx) : raw;
|
|
661
|
+
// Free-format lines are logical lines (no continuation indicator)
|
|
662
|
+
const lineNum = i + 1;
|
|
663
|
+
processLogicalLine(line.trim(), lineNum);
|
|
664
|
+
continue;
|
|
665
|
+
}
|
|
666
|
+
// FIXED FORMAT: column-position-based processing
|
|
667
|
+
// Skip lines too short to have indicator area
|
|
668
|
+
if (raw.length < 7) {
|
|
669
|
+
// If there's a pending continuation, flush it
|
|
670
|
+
if (pendingLine !== null) {
|
|
671
|
+
processLogicalLine(pendingLine, pendingLineNumber);
|
|
672
|
+
pendingLine = null;
|
|
673
|
+
}
|
|
674
|
+
continue;
|
|
675
|
+
}
|
|
676
|
+
const indicator = raw[6];
|
|
677
|
+
// Comment line: indicator is '*' or '/'
|
|
678
|
+
if (indicator === '*' || indicator === '/') {
|
|
679
|
+
continue;
|
|
680
|
+
}
|
|
681
|
+
// Continuation line: indicator is '-'
|
|
682
|
+
if (indicator === '-') {
|
|
683
|
+
if (pendingLine !== null) {
|
|
684
|
+
const continuation = raw.substring(7).trimStart();
|
|
685
|
+
// Handle literal continuation: if continuation starts with a quote,
|
|
686
|
+
// remove the trailing quote from the predecessor and skip the opening quote
|
|
687
|
+
if (continuation.length > 0 && (continuation[0] === '"' || continuation[0] === "'")) {
|
|
688
|
+
const quoteChar = continuation[0];
|
|
689
|
+
const lastQuoteIdx = pendingLine.lastIndexOf(quoteChar);
|
|
690
|
+
if (lastQuoteIdx >= 0) {
|
|
691
|
+
pendingLine = pendingLine.substring(0, lastQuoteIdx) + continuation.substring(1);
|
|
692
|
+
}
|
|
693
|
+
else {
|
|
694
|
+
pendingLine += continuation;
|
|
695
|
+
}
|
|
696
|
+
}
|
|
697
|
+
else {
|
|
698
|
+
pendingLine += continuation;
|
|
699
|
+
}
|
|
700
|
+
}
|
|
701
|
+
continue;
|
|
702
|
+
}
|
|
703
|
+
// Normal line — flush any pending continuation first
|
|
704
|
+
if (pendingLine !== null) {
|
|
705
|
+
processLogicalLine(pendingLine, pendingLineNumber);
|
|
706
|
+
pendingLine = null;
|
|
707
|
+
}
|
|
708
|
+
// Strip inline Italian comments, then use area A+B (from col 7 onwards,
|
|
709
|
+
// but keep full line for indentation-sensitive paragraph/section detection)
|
|
710
|
+
const cleaned = stripInlineComment(raw);
|
|
711
|
+
// Buffer as new pending logical line
|
|
712
|
+
pendingLine = cleaned;
|
|
713
|
+
pendingLineNumber = i + 1; // 1-indexed (consistent with free-format)
|
|
714
|
+
}
|
|
715
|
+
// Flush final pending line
|
|
716
|
+
if (pendingLine !== null) {
|
|
717
|
+
processLogicalLine(pendingLine, pendingLineNumber);
|
|
718
|
+
}
|
|
719
|
+
// Flush any pending SELECT
|
|
720
|
+
flushSelect();
|
|
721
|
+
// Flush any pending SORT/MERGE accumulator (truncated file without trailing period)
|
|
722
|
+
flushSort();
|
|
723
|
+
// Flush any pending INSPECT accumulator (truncated file without trailing period)
|
|
724
|
+
flushInspect();
|
|
725
|
+
// Flush any pending CALL accumulator (truncated file without trailing period)
|
|
726
|
+
flushCallAccum();
|
|
727
|
+
// Flush any pending EXEC block (truncated file without END-EXEC)
|
|
728
|
+
if (execAccum !== null) {
|
|
729
|
+
if (execAccum.type === 'sql') {
|
|
730
|
+
result.execSqlBlocks.push(parseExecSqlBlock(execAccum.lines, execAccum.startLine));
|
|
731
|
+
}
|
|
732
|
+
else if (execAccum.type === 'cics') {
|
|
733
|
+
result.execCicsBlocks.push(parseExecCicsBlock(execAccum.lines, execAccum.startLine));
|
|
734
|
+
}
|
|
735
|
+
else if (execAccum.type === 'dli') {
|
|
736
|
+
result.execDliBlocks.push(parseExecDliBlock(execAccum.lines, execAccum.startLine));
|
|
737
|
+
}
|
|
738
|
+
execAccum = null;
|
|
739
|
+
}
|
|
740
|
+
// If we saw an FD but never found its record, emit it without a record name
|
|
741
|
+
if (pendingFdName !== null) {
|
|
742
|
+
result.fdEntries.push({ fdName: pendingFdName, line: pendingFdLine });
|
|
743
|
+
pendingFdName = null;
|
|
744
|
+
}
|
|
745
|
+
// Finalize any remaining programs on the boundary stack (e.g., single-program
|
|
746
|
+
// files without END PROGRAM, or outermost programs in nested files)
|
|
747
|
+
while (programBoundaryStack.length > 0) {
|
|
748
|
+
const topProgram = programBoundaryStack.pop();
|
|
749
|
+
result.programs.push({
|
|
750
|
+
name: topProgram.name,
|
|
751
|
+
startLine: topProgram.startLine,
|
|
752
|
+
endLine: rawLines.length,
|
|
753
|
+
nestingDepth: programBoundaryStack.length,
|
|
754
|
+
procedureUsing: topProgram.procedureUsing,
|
|
755
|
+
isCommon: topProgram.isCommon,
|
|
756
|
+
});
|
|
757
|
+
}
|
|
758
|
+
// Sort by startLine so outer programs come first
|
|
759
|
+
if (result.programs.length > 1) {
|
|
760
|
+
result.programs.sort((a, b) => a.startLine - b.startLine);
|
|
761
|
+
}
|
|
762
|
+
return result;
|
|
763
|
+
// =========================================================================
|
|
764
|
+
// Inner function: process one logical line (after continuation merging)
|
|
765
|
+
// =========================================================================
|
|
766
|
+
function processLogicalLine(line, lineNum) {
|
|
767
|
+
// --- EXEC block accumulation (spans any division) ---
|
|
768
|
+
if (execAccum !== null) {
|
|
769
|
+
execAccum.lines += ' ' + line;
|
|
770
|
+
if (RE_END_EXEC.test(line)) {
|
|
771
|
+
if (execAccum.type === 'sql') {
|
|
772
|
+
result.execSqlBlocks.push(parseExecSqlBlock(execAccum.lines, execAccum.startLine));
|
|
773
|
+
}
|
|
774
|
+
else if (execAccum.type === 'cics') {
|
|
775
|
+
result.execCicsBlocks.push(parseExecCicsBlock(execAccum.lines, execAccum.startLine));
|
|
776
|
+
}
|
|
777
|
+
else if (execAccum.type === 'dli') {
|
|
778
|
+
result.execDliBlocks.push(parseExecDliBlock(execAccum.lines, execAccum.startLine));
|
|
779
|
+
}
|
|
780
|
+
execAccum = null;
|
|
781
|
+
}
|
|
782
|
+
return; // While accumulating, skip normal processing
|
|
783
|
+
}
|
|
784
|
+
// Check for EXEC SQL / EXEC CICS start
|
|
785
|
+
// Flush any pending CALL accumulator before entering EXEC block
|
|
786
|
+
if (RE_EXEC_SQL_START.test(line)) {
|
|
787
|
+
flushCallAccum();
|
|
788
|
+
execAccum = { type: 'sql', lines: line, startLine: lineNum };
|
|
789
|
+
// If END-EXEC is on the same line, finalize immediately
|
|
790
|
+
if (RE_END_EXEC.test(line)) {
|
|
791
|
+
result.execSqlBlocks.push(parseExecSqlBlock(execAccum.lines, execAccum.startLine));
|
|
792
|
+
execAccum = null;
|
|
793
|
+
}
|
|
794
|
+
return;
|
|
795
|
+
}
|
|
796
|
+
if (RE_EXEC_CICS_START.test(line)) {
|
|
797
|
+
flushCallAccum();
|
|
798
|
+
execAccum = { type: 'cics', lines: line, startLine: lineNum };
|
|
799
|
+
if (RE_END_EXEC.test(line)) {
|
|
800
|
+
result.execCicsBlocks.push(parseExecCicsBlock(execAccum.lines, execAccum.startLine));
|
|
801
|
+
execAccum = null;
|
|
802
|
+
}
|
|
803
|
+
return;
|
|
804
|
+
}
|
|
805
|
+
if (RE_EXEC_DLI_START.test(line)) {
|
|
806
|
+
flushCallAccum();
|
|
807
|
+
execAccum = { type: 'dli', lines: line, startLine: lineNum };
|
|
808
|
+
if (RE_END_EXEC.test(line)) {
|
|
809
|
+
result.execDliBlocks.push(parseExecDliBlock(execAccum.lines, execAccum.startLine));
|
|
810
|
+
execAccum = null;
|
|
811
|
+
}
|
|
812
|
+
return;
|
|
813
|
+
}
|
|
814
|
+
// --- END PROGRAM boundary detection ---
|
|
815
|
+
const endProgramMatch = line.match(RE_END_PROGRAM);
|
|
816
|
+
if (endProgramMatch) {
|
|
817
|
+
// Flush any pending accumulators at program boundary
|
|
818
|
+
flushCallAccum();
|
|
819
|
+
flushSort();
|
|
820
|
+
flushInspect();
|
|
821
|
+
const topProgram = programBoundaryStack.pop();
|
|
822
|
+
if (topProgram) {
|
|
823
|
+
result.programs.push({
|
|
824
|
+
name: topProgram.name,
|
|
825
|
+
startLine: topProgram.startLine,
|
|
826
|
+
endLine: lineNum,
|
|
827
|
+
nestingDepth: programBoundaryStack.length,
|
|
828
|
+
procedureUsing: topProgram.procedureUsing,
|
|
829
|
+
isCommon: topProgram.isCommon,
|
|
830
|
+
});
|
|
831
|
+
}
|
|
832
|
+
return;
|
|
833
|
+
}
|
|
834
|
+
// DECLARATIVES boundary detection
|
|
835
|
+
if (RE_DECLARATIVES_START.test(line)) {
|
|
836
|
+
inDeclaratives = true;
|
|
837
|
+
return;
|
|
838
|
+
}
|
|
839
|
+
if (RE_DECLARATIVES_END.test(line)) {
|
|
840
|
+
inDeclaratives = false;
|
|
841
|
+
return;
|
|
842
|
+
}
|
|
843
|
+
// Detect PROGRAM-ID regardless of current division state (handles sibling
|
|
844
|
+
// programs after END PROGRAM where IDENTIFICATION DIVISION header is omitted)
|
|
845
|
+
if (currentDivision !== 'identification') {
|
|
846
|
+
const pgmIdMatch = line.match(RE_PROGRAM_ID);
|
|
847
|
+
if (pgmIdMatch) {
|
|
848
|
+
flushCallAccum();
|
|
849
|
+
flushSort();
|
|
850
|
+
flushInspect();
|
|
851
|
+
extractIdentification(line, lineNum);
|
|
852
|
+
return;
|
|
853
|
+
}
|
|
854
|
+
}
|
|
855
|
+
// --- Division transitions ---
|
|
856
|
+
const divMatch = line.match(RE_DIVISION);
|
|
857
|
+
if (divMatch) {
|
|
858
|
+
// Flush any pending accumulators on division boundary
|
|
859
|
+
flushSelect();
|
|
860
|
+
flushCallAccum();
|
|
861
|
+
flushSort();
|
|
862
|
+
flushInspect();
|
|
863
|
+
const divName = divMatch[1].toUpperCase();
|
|
864
|
+
switch (divName) {
|
|
865
|
+
case 'IDENTIFICATION':
|
|
866
|
+
currentDivision = 'identification';
|
|
867
|
+
break;
|
|
868
|
+
case 'ENVIRONMENT':
|
|
869
|
+
currentDivision = 'environment';
|
|
870
|
+
currentEnvSection = null;
|
|
871
|
+
break;
|
|
872
|
+
case 'DATA':
|
|
873
|
+
currentDivision = 'data';
|
|
874
|
+
currentDataSection = 'unknown';
|
|
875
|
+
break;
|
|
876
|
+
case 'PROCEDURE': {
|
|
877
|
+
currentDivision = 'procedure';
|
|
878
|
+
currentParagraph = null;
|
|
879
|
+
const procUsingMatch = line.match(RE_PROC_USING);
|
|
880
|
+
if (procUsingMatch) {
|
|
881
|
+
const params = procUsingMatch[1].split(/\bRETURNING\b/i)[0].trim().split(/\s+/)
|
|
882
|
+
.filter(s => s.length > 0 && !USING_KEYWORDS.has(s.toUpperCase()));
|
|
883
|
+
result.procedureUsing = params;
|
|
884
|
+
// Store per-program on the boundary stack
|
|
885
|
+
const topProg = programBoundaryStack[programBoundaryStack.length - 1];
|
|
886
|
+
if (topProg)
|
|
887
|
+
topProg.procedureUsing = params;
|
|
888
|
+
pendingProcUsing = false;
|
|
889
|
+
}
|
|
890
|
+
else {
|
|
891
|
+
// USING may be on the next line — flag for extractProcedure to pick up
|
|
892
|
+
// Only set if the line is NOT period-terminated (period = no USING clause)
|
|
893
|
+
pendingProcUsing = !/\.\s*$/.test(line);
|
|
894
|
+
}
|
|
895
|
+
break;
|
|
896
|
+
}
|
|
897
|
+
}
|
|
898
|
+
return;
|
|
899
|
+
}
|
|
900
|
+
// --- Section transitions ---
|
|
901
|
+
const secMatch = line.match(RE_SECTION);
|
|
902
|
+
if (secMatch) {
|
|
903
|
+
flushSelect();
|
|
904
|
+
const secName = secMatch[1].toUpperCase();
|
|
905
|
+
switch (secName) {
|
|
906
|
+
case 'WORKING-STORAGE':
|
|
907
|
+
currentDivision = 'data';
|
|
908
|
+
currentDataSection = 'working-storage';
|
|
909
|
+
break;
|
|
910
|
+
case 'LINKAGE':
|
|
911
|
+
currentDivision = 'data';
|
|
912
|
+
currentDataSection = 'linkage';
|
|
913
|
+
break;
|
|
914
|
+
case 'FILE':
|
|
915
|
+
currentDivision = 'data';
|
|
916
|
+
currentDataSection = 'file';
|
|
917
|
+
break;
|
|
918
|
+
case 'LOCAL-STORAGE':
|
|
919
|
+
currentDivision = 'data';
|
|
920
|
+
currentDataSection = 'local-storage';
|
|
921
|
+
break;
|
|
922
|
+
case 'SCREEN':
|
|
923
|
+
currentDivision = 'data';
|
|
924
|
+
currentDataSection = 'screen';
|
|
925
|
+
break;
|
|
926
|
+
case 'INPUT-OUTPUT':
|
|
927
|
+
currentDivision = 'environment';
|
|
928
|
+
currentEnvSection = 'input-output';
|
|
929
|
+
break;
|
|
930
|
+
case 'CONFIGURATION':
|
|
931
|
+
currentDivision = 'environment';
|
|
932
|
+
currentEnvSection = 'configuration';
|
|
933
|
+
break;
|
|
934
|
+
}
|
|
935
|
+
return;
|
|
936
|
+
}
|
|
937
|
+
// --- COPY (all divisions) ---
|
|
938
|
+
const copyQMatch = line.match(RE_COPY_QUOTED);
|
|
939
|
+
if (copyQMatch) {
|
|
940
|
+
result.copies.push({ target: copyQMatch[1] ?? copyQMatch[2], line: lineNum });
|
|
941
|
+
}
|
|
942
|
+
else {
|
|
943
|
+
const copyUMatch = line.match(RE_COPY_UNQUOTED);
|
|
944
|
+
if (copyUMatch) {
|
|
945
|
+
result.copies.push({ target: copyUMatch[1], line: lineNum });
|
|
946
|
+
}
|
|
947
|
+
}
|
|
948
|
+
// --- CALL (all divisions, typically procedure) ---
|
|
949
|
+
// Multi-line CALL accumulator: accumulate CALL statement until period or END-CALL.
|
|
950
|
+
// Continuation lines (not the start line) are consumed entirely — return after flush
|
|
951
|
+
// to prevent false paragraph detection on lines like "WS-ADDR." or "WS-CUST-CODE."
|
|
952
|
+
if (callAccum !== null) {
|
|
953
|
+
// Check if this continuation line starts a new COBOL statement (not a USING parameter).
|
|
954
|
+
// Use (?:\s|$) instead of \b to prevent matching hyphenated identifiers like MOVE-COUNT.
|
|
955
|
+
// Only use RE_PROC_PARAGRAPH as flush trigger when in Area A (≤7 leading spaces, fixed-format).
|
|
956
|
+
// In free-format, never use RE_PROC_PARAGRAPH (can't distinguish parameters from paragraphs).
|
|
957
|
+
const trimmedLine = line.trimStart();
|
|
958
|
+
const leadingSpaces = (line.match(/^(\s*)/)?.[1].length ?? 0);
|
|
959
|
+
const isAreaAParagraph = RE_PROC_PARAGRAPH.test(line) && (!isFreeFormat ? leadingSpaces <= 7 : false);
|
|
960
|
+
if (RE_STATEMENT_VERB_START.test(trimmedLine)
|
|
961
|
+
|| RE_PROC_SECTION.test(line) || isAreaAParagraph) {
|
|
962
|
+
flushCallAccum(); // Flush CALL without this line's content
|
|
963
|
+
// Fall through to process this line normally
|
|
964
|
+
}
|
|
965
|
+
else {
|
|
966
|
+
callAccum += ' ' + line;
|
|
967
|
+
if (/\.\s*$/.test(callAccum) || /\bEND-CALL\b/i.test(callAccum)) {
|
|
968
|
+
flushCallAccum();
|
|
969
|
+
}
|
|
970
|
+
return; // continuation line consumed by CALL accumulator
|
|
971
|
+
}
|
|
972
|
+
}
|
|
973
|
+
else if (currentDivision === 'procedure' && /(?<![A-Z0-9-])\bCALL\s+(?:"[^"]+"|'[^']+'|[A-Z][A-Z0-9-]+)/i.test(line)) {
|
|
974
|
+
// Check if this is a complete single-line CALL (ends with period or END-CALL)
|
|
975
|
+
if (/\.\s*$/.test(line) || /\bEND-CALL\b/i.test(line)) {
|
|
976
|
+
// Single-line CALL — extract immediately via flushCallAccum
|
|
977
|
+
callAccum = line;
|
|
978
|
+
callAccumLine = lineNum;
|
|
979
|
+
flushCallAccum();
|
|
980
|
+
}
|
|
981
|
+
else {
|
|
982
|
+
// Multi-line CALL — start accumulating
|
|
983
|
+
callAccum = line;
|
|
984
|
+
callAccumLine = lineNum;
|
|
985
|
+
return; // prevent CALL start line from feeding sortAccum/inspectAccum
|
|
986
|
+
}
|
|
987
|
+
}
|
|
988
|
+
// --- Division-specific extraction ---
|
|
989
|
+
switch (currentDivision) {
|
|
990
|
+
case 'identification':
|
|
991
|
+
extractIdentification(line, lineNum);
|
|
992
|
+
break;
|
|
993
|
+
case 'environment':
|
|
994
|
+
extractEnvironment(line, lineNum);
|
|
995
|
+
break;
|
|
996
|
+
case 'data':
|
|
997
|
+
extractData(line, lineNum);
|
|
998
|
+
break;
|
|
999
|
+
case 'procedure':
|
|
1000
|
+
extractProcedure(line, lineNum);
|
|
1001
|
+
break;
|
|
1002
|
+
}
|
|
1003
|
+
}
|
|
1004
|
+
// =========================================================================
|
|
1005
|
+
// IDENTIFICATION DIVISION extraction
|
|
1006
|
+
// =========================================================================
|
|
1007
|
+
function extractIdentification(line, lineNum) {
|
|
1008
|
+
const m = line.match(RE_PROGRAM_ID);
|
|
1009
|
+
if (m) {
|
|
1010
|
+
if (result.programName === null) {
|
|
1011
|
+
result.programName = m[1];
|
|
1012
|
+
}
|
|
1013
|
+
// Reset state machine for new program (nested or sibling)
|
|
1014
|
+
currentDivision = 'identification';
|
|
1015
|
+
currentDataSection = 'unknown';
|
|
1016
|
+
currentEnvSection = null;
|
|
1017
|
+
currentParagraph = null;
|
|
1018
|
+
// Detect COMMON attribute
|
|
1019
|
+
const isCommon = /\bIS\s+COMMON\b/i.test(line);
|
|
1020
|
+
// Push program boundary for line-range tracking
|
|
1021
|
+
programBoundaryStack.push({ name: m[1], startLine: lineNum, isCommon: isCommon || undefined });
|
|
1022
|
+
return;
|
|
1023
|
+
}
|
|
1024
|
+
const authorMatch = line.match(RE_AUTHOR);
|
|
1025
|
+
if (authorMatch) {
|
|
1026
|
+
result.programMetadata.author = authorMatch[1].replace(/\.\s*$/, '').trim();
|
|
1027
|
+
return;
|
|
1028
|
+
}
|
|
1029
|
+
const dateMatch = line.match(RE_DATE_WRITTEN);
|
|
1030
|
+
if (dateMatch) {
|
|
1031
|
+
result.programMetadata.dateWritten = dateMatch[1].replace(/\.\s*$/, '').trim();
|
|
1032
|
+
return;
|
|
1033
|
+
}
|
|
1034
|
+
const compMatch = line.match(RE_DATE_COMPILED);
|
|
1035
|
+
if (compMatch) {
|
|
1036
|
+
result.programMetadata.dateCompiled = compMatch[1].replace(/\.\s*$/, '').trim();
|
|
1037
|
+
return;
|
|
1038
|
+
}
|
|
1039
|
+
const instMatch = line.match(RE_INSTALLATION);
|
|
1040
|
+
if (instMatch) {
|
|
1041
|
+
result.programMetadata.installation = instMatch[1].replace(/\.\s*$/, '').trim();
|
|
1042
|
+
}
|
|
1043
|
+
}
|
|
1044
|
+
// =========================================================================
|
|
1045
|
+
// ENVIRONMENT DIVISION extraction
|
|
1046
|
+
// =========================================================================
|
|
1047
|
+
function extractEnvironment(line, lineNum) {
|
|
1048
|
+
if (currentEnvSection !== 'input-output')
|
|
1049
|
+
return;
|
|
1050
|
+
// Check for new SELECT statement
|
|
1051
|
+
const selMatch = line.match(RE_SELECT_START);
|
|
1052
|
+
if (selMatch) {
|
|
1053
|
+
// Flush any previous SELECT
|
|
1054
|
+
flushSelect();
|
|
1055
|
+
selectAccum = line.trim();
|
|
1056
|
+
selectStartLine = lineNum;
|
|
1057
|
+
}
|
|
1058
|
+
else if (selectAccum !== null) {
|
|
1059
|
+
// Accumulate continuation of current SELECT
|
|
1060
|
+
selectAccum += ' ' + line.trim();
|
|
1061
|
+
}
|
|
1062
|
+
// Check if current SELECT is terminated (ends with period)
|
|
1063
|
+
if (selectAccum !== null && /\.\s*$/.test(selectAccum)) {
|
|
1064
|
+
flushSelect();
|
|
1065
|
+
}
|
|
1066
|
+
}
|
|
1067
|
+
function flushSelect() {
|
|
1068
|
+
if (selectAccum === null)
|
|
1069
|
+
return;
|
|
1070
|
+
const decl = parseSelectStatement(selectAccum, selectStartLine);
|
|
1071
|
+
if (decl) {
|
|
1072
|
+
result.fileDeclarations.push(decl);
|
|
1073
|
+
}
|
|
1074
|
+
selectAccum = null;
|
|
1075
|
+
}
|
|
1076
|
+
function flushSort() {
|
|
1077
|
+
if (sortAccum === null)
|
|
1078
|
+
return;
|
|
1079
|
+
const fullSort = sortAccum;
|
|
1080
|
+
const smatch = fullSort.match(RE_SORT) || fullSort.match(RE_MERGE);
|
|
1081
|
+
if (smatch) {
|
|
1082
|
+
const upper = fullSort.toUpperCase();
|
|
1083
|
+
const usingIdx = upper.search(/\bUSING\s/);
|
|
1084
|
+
const givingIdx = upper.search(/\bGIVING\s/);
|
|
1085
|
+
const usingFiles = [];
|
|
1086
|
+
const givingFiles = [];
|
|
1087
|
+
if (usingIdx >= 0) {
|
|
1088
|
+
const afterUsing = fullSort.substring(usingIdx + 6);
|
|
1089
|
+
const gIdx = afterUsing.toUpperCase().search(/\bGIVING\b/);
|
|
1090
|
+
const usingText = gIdx >= 0 ? afterUsing.substring(0, gIdx) : afterUsing;
|
|
1091
|
+
usingFiles.push(...usingText.trim().split(/\s+/).map(f => f.replace(/\.$/, '')).filter(f => /^[A-Z][A-Z0-9-]+$/i.test(f) && !SORT_CLAUSE_NOISE.has(f.toUpperCase())));
|
|
1092
|
+
}
|
|
1093
|
+
if (givingIdx >= 0) {
|
|
1094
|
+
const givingText = fullSort.substring(givingIdx + 7);
|
|
1095
|
+
givingFiles.push(...givingText.trim().split(/\s+/).map(f => f.replace(/\.$/, '')).filter(f => /^[A-Z][A-Z0-9-]+$/i.test(f) && !SORT_CLAUSE_NOISE.has(f.toUpperCase())));
|
|
1096
|
+
}
|
|
1097
|
+
// INPUT PROCEDURE IS / OUTPUT PROCEDURE IS → control-flow targets (like PERFORM)
|
|
1098
|
+
// Supports optional THRU/THROUGH range: INPUT PROCEDURE IS proc-start THRU proc-end
|
|
1099
|
+
const inputProcMatch = fullSort.match(/\bINPUT\s+PROCEDURE\s+(?:IS\s+)?([A-Z][A-Z0-9-]+)(?:\s+(?:THRU|THROUGH)\s+([A-Z][A-Z0-9-]+))?/i);
|
|
1100
|
+
const outputProcMatch = fullSort.match(/\bOUTPUT\s+PROCEDURE\s+(?:IS\s+)?([A-Z][A-Z0-9-]+)(?:\s+(?:THRU|THROUGH)\s+([A-Z][A-Z0-9-]+))?/i);
|
|
1101
|
+
if (inputProcMatch) {
|
|
1102
|
+
result.performs.push({ caller: currentParagraph, target: inputProcMatch[1], thruTarget: inputProcMatch[2] || undefined, line: sortStartLine });
|
|
1103
|
+
}
|
|
1104
|
+
if (outputProcMatch) {
|
|
1105
|
+
result.performs.push({ caller: currentParagraph, target: outputProcMatch[1], thruTarget: outputProcMatch[2] || undefined, line: sortStartLine });
|
|
1106
|
+
}
|
|
1107
|
+
result.sorts.push({ sortFile: smatch[1], usingFiles, givingFiles, line: sortStartLine });
|
|
1108
|
+
}
|
|
1109
|
+
sortAccum = null;
|
|
1110
|
+
}
|
|
1111
|
+
function flushInspect() {
|
|
1112
|
+
if (inspectAccum === null)
|
|
1113
|
+
return;
|
|
1114
|
+
const text = inspectAccum;
|
|
1115
|
+
const fieldMatch = text.match(/\bINSPECT\s+([A-Z][A-Z0-9-]+)/i);
|
|
1116
|
+
if (!fieldMatch) {
|
|
1117
|
+
inspectAccum = null;
|
|
1118
|
+
return;
|
|
1119
|
+
}
|
|
1120
|
+
const counters = [];
|
|
1121
|
+
const tallySection = text.match(/\bTALLYING\b([\s\S]+?)(?:\bREPLACING\b|\bCONVERTING\b|\.\s*$)/i);
|
|
1122
|
+
if (tallySection) {
|
|
1123
|
+
const counterRe = /([A-Z][A-Z0-9-]+)\s+FOR\b/gi;
|
|
1124
|
+
let cm;
|
|
1125
|
+
while ((cm = counterRe.exec(tallySection[1])) !== null) {
|
|
1126
|
+
counters.push(cm[1]);
|
|
1127
|
+
}
|
|
1128
|
+
}
|
|
1129
|
+
const hasTallying = /\bTALLYING\b/i.test(text);
|
|
1130
|
+
const hasReplacing = /\bREPLACING\b/i.test(text);
|
|
1131
|
+
const hasConverting = /\bCONVERTING\b/i.test(text);
|
|
1132
|
+
const form = hasConverting ? 'converting'
|
|
1133
|
+
: hasTallying && hasReplacing ? 'tallying-replacing'
|
|
1134
|
+
: hasTallying ? 'tallying'
|
|
1135
|
+
: 'replacing';
|
|
1136
|
+
result.inspects.push({
|
|
1137
|
+
inspectedField: fieldMatch[1],
|
|
1138
|
+
counters,
|
|
1139
|
+
form,
|
|
1140
|
+
line: inspectStartLine,
|
|
1141
|
+
caller: currentParagraph,
|
|
1142
|
+
});
|
|
1143
|
+
inspectAccum = null;
|
|
1144
|
+
}
|
|
1145
|
+
/**
|
|
1146
|
+
* Flush accumulated multi-line CALL statement. Re-extracts CALL target
|
|
1147
|
+
* and USING parameters from the full accumulated text.
|
|
1148
|
+
*/
|
|
1149
|
+
function flushCallAccum() {
|
|
1150
|
+
if (callAccum === null)
|
|
1151
|
+
return;
|
|
1152
|
+
const text = callAccum;
|
|
1153
|
+
// Extract quoted CALLs from the full statement
|
|
1154
|
+
for (const callMatch of text.matchAll(RE_CALL)) {
|
|
1155
|
+
const callTarget = callMatch[1] ?? callMatch[2];
|
|
1156
|
+
const afterCall = text.substring(callMatch.index + callMatch[0].length);
|
|
1157
|
+
const usingMatch = afterCall.match(RE_USING_PARAMS);
|
|
1158
|
+
const parameters = usingMatch
|
|
1159
|
+
? usingMatch[1].split(/\bRETURNING\b/i)[0].trim().split(/\s+/)
|
|
1160
|
+
.filter(s => s.length > 0 && !CALL_USING_FILTER.has(s.toUpperCase()) && /^[A-Z][A-Z0-9-]+$/i.test(s))
|
|
1161
|
+
: undefined;
|
|
1162
|
+
const retMatch = afterCall.match(/\bRETURNING\s+([A-Z][A-Z0-9-]+)/i);
|
|
1163
|
+
const returning = retMatch ? retMatch[1] : undefined;
|
|
1164
|
+
result.calls.push({ target: callTarget, line: callAccumLine, isQuoted: true, parameters, returning });
|
|
1165
|
+
}
|
|
1166
|
+
// Extract dynamic CALLs from the full statement
|
|
1167
|
+
for (const dynCallMatch of text.matchAll(RE_CALL_DYNAMIC)) {
|
|
1168
|
+
const afterDynCall = text.substring(dynCallMatch.index + dynCallMatch[0].length);
|
|
1169
|
+
const dynUsingMatch = afterDynCall.match(RE_USING_PARAMS);
|
|
1170
|
+
const dynParameters = dynUsingMatch
|
|
1171
|
+
? dynUsingMatch[1].split(/\bRETURNING\b/i)[0].trim().split(/\s+/)
|
|
1172
|
+
.filter(s => s.length > 0 && !CALL_USING_FILTER.has(s.toUpperCase()) && /^[A-Z][A-Z0-9-]+$/i.test(s))
|
|
1173
|
+
: undefined;
|
|
1174
|
+
const dynRetMatch = afterDynCall.match(/\bRETURNING\s+([A-Z][A-Z0-9-]+)/i);
|
|
1175
|
+
const dynReturning = dynRetMatch ? dynRetMatch[1] : undefined;
|
|
1176
|
+
result.calls.push({ target: dynCallMatch[1], line: callAccumLine, isQuoted: false, parameters: dynParameters, returning: dynReturning });
|
|
1177
|
+
}
|
|
1178
|
+
// Extract CANCELs from within the CALL block (common in ON EXCEPTION handlers)
|
|
1179
|
+
for (const cancelMatch of text.matchAll(RE_CANCEL)) {
|
|
1180
|
+
result.cancels.push({ target: cancelMatch[1] ?? cancelMatch[2], line: callAccumLine, isQuoted: true });
|
|
1181
|
+
}
|
|
1182
|
+
for (const dynCancelMatch of text.matchAll(RE_CANCEL_DYNAMIC)) {
|
|
1183
|
+
result.cancels.push({ target: dynCancelMatch[1], line: callAccumLine, isQuoted: false });
|
|
1184
|
+
}
|
|
1185
|
+
callAccum = null;
|
|
1186
|
+
}
|
|
1187
|
+
// =========================================================================
|
|
1188
|
+
// DATA DIVISION extraction
|
|
1189
|
+
// =========================================================================
|
|
1190
|
+
function extractData(line, lineNum) {
|
|
1191
|
+
// FD entry
|
|
1192
|
+
const fdMatch = line.match(RE_FD);
|
|
1193
|
+
if (fdMatch) {
|
|
1194
|
+
// Flush any previous FD without a record
|
|
1195
|
+
if (pendingFdName !== null) {
|
|
1196
|
+
result.fdEntries.push({ fdName: pendingFdName, line: pendingFdLine });
|
|
1197
|
+
}
|
|
1198
|
+
pendingFdName = fdMatch[1];
|
|
1199
|
+
pendingFdLine = lineNum;
|
|
1200
|
+
return;
|
|
1201
|
+
}
|
|
1202
|
+
// 88-level condition names
|
|
1203
|
+
const lv88Match = line.match(RE_88_LEVEL);
|
|
1204
|
+
if (lv88Match) {
|
|
1205
|
+
const name = lv88Match[1];
|
|
1206
|
+
const values = parseConditionValues(lv88Match[2]);
|
|
1207
|
+
result.dataItems.push({
|
|
1208
|
+
name,
|
|
1209
|
+
level: 88,
|
|
1210
|
+
line: lineNum,
|
|
1211
|
+
values,
|
|
1212
|
+
section: currentDataSection,
|
|
1213
|
+
});
|
|
1214
|
+
return;
|
|
1215
|
+
}
|
|
1216
|
+
// Level 66 RENAMES
|
|
1217
|
+
const lv66Match = line.match(RE_66_LEVEL);
|
|
1218
|
+
if (lv66Match) {
|
|
1219
|
+
result.dataItems.push({
|
|
1220
|
+
name: lv66Match[1],
|
|
1221
|
+
level: 66,
|
|
1222
|
+
line: lineNum,
|
|
1223
|
+
redefines: lv66Match[2], // RENAMES target stored as redefines
|
|
1224
|
+
section: currentDataSection,
|
|
1225
|
+
});
|
|
1226
|
+
return;
|
|
1227
|
+
}
|
|
1228
|
+
// Anonymous REDEFINES (no name, e.g. "01 REDEFINES WK-PERIVAL.")
|
|
1229
|
+
const anonRedefMatch = line.match(RE_ANONYMOUS_REDEFINES);
|
|
1230
|
+
if (anonRedefMatch) {
|
|
1231
|
+
// Check it's truly anonymous: the second capture is not a valid data name
|
|
1232
|
+
// followed by more clauses — it's the REDEFINES target directly after level
|
|
1233
|
+
const level = parseInt(anonRedefMatch[1], 10);
|
|
1234
|
+
// Only skip if this is genuinely "NN REDEFINES target" with no name between
|
|
1235
|
+
// We detect this by checking the full data item regex does NOT match
|
|
1236
|
+
// (because RE_DATA_ITEM expects a name before any clauses)
|
|
1237
|
+
const dataMatch = line.match(RE_DATA_ITEM);
|
|
1238
|
+
if (!dataMatch || dataMatch[2].toUpperCase() === 'REDEFINES') {
|
|
1239
|
+
// Truly anonymous — skip, no node
|
|
1240
|
+
return;
|
|
1241
|
+
}
|
|
1242
|
+
}
|
|
1243
|
+
// Standard data items: level 01-49, 66, 77
|
|
1244
|
+
const dataMatch = line.match(RE_DATA_ITEM);
|
|
1245
|
+
if (dataMatch) {
|
|
1246
|
+
const level = parseInt(dataMatch[1], 10);
|
|
1247
|
+
const name = dataMatch[2];
|
|
1248
|
+
const rest = dataMatch[3] || '';
|
|
1249
|
+
// Skip FILLER
|
|
1250
|
+
if (name.toUpperCase() === 'FILLER')
|
|
1251
|
+
return;
|
|
1252
|
+
// Valid levels: 01-49, 66, 77
|
|
1253
|
+
if ((level >= 1 && level <= 49) || level === 66 || level === 77) {
|
|
1254
|
+
const clauses = parseDataItemClauses(rest);
|
|
1255
|
+
const item = {
|
|
1256
|
+
name,
|
|
1257
|
+
level,
|
|
1258
|
+
line: lineNum,
|
|
1259
|
+
section: currentDataSection,
|
|
1260
|
+
};
|
|
1261
|
+
if (clauses.pic)
|
|
1262
|
+
item.pic = clauses.pic;
|
|
1263
|
+
if (clauses.usage)
|
|
1264
|
+
item.usage = clauses.usage;
|
|
1265
|
+
if (clauses.occurs !== undefined)
|
|
1266
|
+
item.occurs = clauses.occurs;
|
|
1267
|
+
if (clauses.dependingOn)
|
|
1268
|
+
item.dependingOn = clauses.dependingOn;
|
|
1269
|
+
if (clauses.redefines)
|
|
1270
|
+
item.redefines = clauses.redefines;
|
|
1271
|
+
if (clauses.value)
|
|
1272
|
+
item.values = [clauses.value];
|
|
1273
|
+
if (clauses.isExternal)
|
|
1274
|
+
item.isExternal = true;
|
|
1275
|
+
if (clauses.isGlobal)
|
|
1276
|
+
item.isGlobal = true;
|
|
1277
|
+
result.dataItems.push(item);
|
|
1278
|
+
// If there's a pending FD and this is a 01-level, it's the FD's record
|
|
1279
|
+
if (pendingFdName !== null && level === 1) {
|
|
1280
|
+
result.fdEntries.push({
|
|
1281
|
+
fdName: pendingFdName,
|
|
1282
|
+
recordName: name,
|
|
1283
|
+
line: pendingFdLine,
|
|
1284
|
+
});
|
|
1285
|
+
pendingFdName = null;
|
|
1286
|
+
}
|
|
1287
|
+
}
|
|
1288
|
+
}
|
|
1289
|
+
}
|
|
1290
|
+
// =========================================================================
|
|
1291
|
+
// PROCEDURE DIVISION extraction
|
|
1292
|
+
// =========================================================================
|
|
1293
|
+
function extractProcedure(line, lineNum) {
|
|
1294
|
+
// USE AFTER EXCEPTION in DECLARATIVES
|
|
1295
|
+
if (inDeclaratives) {
|
|
1296
|
+
const useMatch = line.match(RE_USE_AFTER);
|
|
1297
|
+
if (useMatch) {
|
|
1298
|
+
// Find the most recent section name
|
|
1299
|
+
const lastSection = result.sections[result.sections.length - 1];
|
|
1300
|
+
if (lastSection) {
|
|
1301
|
+
result.declaratives.push({
|
|
1302
|
+
sectionName: lastSection.name,
|
|
1303
|
+
target: useMatch[1],
|
|
1304
|
+
line: lineNum,
|
|
1305
|
+
});
|
|
1306
|
+
}
|
|
1307
|
+
return;
|
|
1308
|
+
}
|
|
1309
|
+
}
|
|
1310
|
+
// Handle PROCEDURE DIVISION USING on a continuation line
|
|
1311
|
+
if (pendingProcUsing) {
|
|
1312
|
+
const usingMatch = line.match(/\bUSING\s+([\s\S]*?)(?:\.|$)/i);
|
|
1313
|
+
if (usingMatch) {
|
|
1314
|
+
const params = usingMatch[1].split(/\bRETURNING\b/i)[0].trim().split(/\s+/)
|
|
1315
|
+
.filter(s => s.length > 0 && !USING_KEYWORDS.has(s.toUpperCase()));
|
|
1316
|
+
result.procedureUsing = params;
|
|
1317
|
+
const topProg = programBoundaryStack[programBoundaryStack.length - 1];
|
|
1318
|
+
if (topProg)
|
|
1319
|
+
topProg.procedureUsing = params;
|
|
1320
|
+
}
|
|
1321
|
+
pendingProcUsing = false;
|
|
1322
|
+
if (usingMatch)
|
|
1323
|
+
return; // consumed the USING line
|
|
1324
|
+
}
|
|
1325
|
+
// Section header
|
|
1326
|
+
const secMatch = line.match(RE_PROC_SECTION);
|
|
1327
|
+
if (secMatch) {
|
|
1328
|
+
const name = secMatch[1];
|
|
1329
|
+
if (!EXCLUDED_PARA_NAMES.has(name.toUpperCase()) && !name.toUpperCase().includes('DIVISION')) {
|
|
1330
|
+
result.sections.push({ name, line: lineNum });
|
|
1331
|
+
// Don't set currentParagraph to section name — sections are Namespaces,
|
|
1332
|
+
// not Functions. Setting it here would cause PERFORMs to be attributed
|
|
1333
|
+
// to the section instead of the containing paragraph.
|
|
1334
|
+
}
|
|
1335
|
+
return;
|
|
1336
|
+
}
|
|
1337
|
+
// Paragraph header
|
|
1338
|
+
const paraMatch = line.match(RE_PROC_PARAGRAPH);
|
|
1339
|
+
if (paraMatch) {
|
|
1340
|
+
const name = paraMatch[1];
|
|
1341
|
+
// In fixed-format, paragraphs must start in Area A (col 8-11, max 7 leading spaces).
|
|
1342
|
+
// Reject deeply-indented lines (Area B, 8+ spaces) to prevent false paragraphs from
|
|
1343
|
+
// data items or CALL USING parameters on continuation lines.
|
|
1344
|
+
const leadingSpaces = line.match(/^(\s*)/)?.[1].length ?? 0;
|
|
1345
|
+
if (!isFreeFormat && leadingSpaces > 7)
|
|
1346
|
+
return; // Area B — not a paragraph
|
|
1347
|
+
if (!EXCLUDED_PARA_NAMES.has(name.toUpperCase()) && !name.toUpperCase().startsWith('END-') && name.toUpperCase() !== 'DIVISION' && name.toUpperCase() !== 'SECTION') {
|
|
1348
|
+
result.paragraphs.push({ name, line: lineNum });
|
|
1349
|
+
currentParagraph = name;
|
|
1350
|
+
}
|
|
1351
|
+
return;
|
|
1352
|
+
}
|
|
1353
|
+
// PERFORM (global — captures multiple PERFORMs on the same logical line)
|
|
1354
|
+
for (const perfMatch of line.matchAll(RE_PERFORM)) {
|
|
1355
|
+
const target = perfMatch[1];
|
|
1356
|
+
// Skip COBOL inline-perform keywords that are not paragraph names
|
|
1357
|
+
if (!PERFORM_KEYWORD_SKIP.has(target.toUpperCase())) {
|
|
1358
|
+
// Also check for "PERFORM identifier TIMES" — the identifier is a
|
|
1359
|
+
// data item count, not a paragraph name (fundamental regex ambiguity).
|
|
1360
|
+
const matchEnd = perfMatch.index + perfMatch[0].length;
|
|
1361
|
+
const afterTarget = line.substring(matchEnd).trim();
|
|
1362
|
+
if (!/^TIMES\b/i.test(afterTarget)) {
|
|
1363
|
+
result.performs.push({
|
|
1364
|
+
caller: currentParagraph,
|
|
1365
|
+
target,
|
|
1366
|
+
thruTarget: perfMatch[2] || undefined,
|
|
1367
|
+
line: lineNum,
|
|
1368
|
+
});
|
|
1369
|
+
}
|
|
1370
|
+
}
|
|
1371
|
+
}
|
|
1372
|
+
// ENTRY point
|
|
1373
|
+
const entryMatch = line.match(RE_ENTRY);
|
|
1374
|
+
if (entryMatch) {
|
|
1375
|
+
const entryName = entryMatch[1] ?? entryMatch[2];
|
|
1376
|
+
const usingClause = entryMatch[3];
|
|
1377
|
+
if (entryName) {
|
|
1378
|
+
result.entryPoints.push({
|
|
1379
|
+
name: entryName,
|
|
1380
|
+
parameters: usingClause
|
|
1381
|
+
? usingClause.trim().split(/\s+/).filter(s => s.length > 0 && !USING_KEYWORDS.has(s.toUpperCase()))
|
|
1382
|
+
: [],
|
|
1383
|
+
line: lineNum,
|
|
1384
|
+
});
|
|
1385
|
+
}
|
|
1386
|
+
}
|
|
1387
|
+
// MOVE statement (skip literals and figurative constants)
|
|
1388
|
+
const moveMatch = line.match(RE_MOVE);
|
|
1389
|
+
if (moveMatch) {
|
|
1390
|
+
const from = moveMatch[2].toUpperCase();
|
|
1391
|
+
if (!MOVE_SKIP.has(from)) {
|
|
1392
|
+
const isCorresponding = !!moveMatch[1];
|
|
1393
|
+
// MOVE CORRESPONDING is always single-target per COBOL standard
|
|
1394
|
+
const targets = isCorresponding
|
|
1395
|
+
? [moveMatch[3].replace(/\..*$/, '').trim().split(/\s+/)[0]].filter(t => /^[A-Z][A-Z0-9-]+$/i.test(t))
|
|
1396
|
+
: extractMoveTargets(moveMatch[3]);
|
|
1397
|
+
if (targets.length > 0) {
|
|
1398
|
+
result.moves.push({
|
|
1399
|
+
from: moveMatch[2],
|
|
1400
|
+
targets,
|
|
1401
|
+
line: lineNum,
|
|
1402
|
+
caller: currentParagraph,
|
|
1403
|
+
corresponding: isCorresponding,
|
|
1404
|
+
});
|
|
1405
|
+
}
|
|
1406
|
+
}
|
|
1407
|
+
}
|
|
1408
|
+
// GO TO — control flow transfer (handles GO TO p1 p2 p3 DEPENDING ON x)
|
|
1409
|
+
const gotoMatch = line.match(RE_GOTO);
|
|
1410
|
+
if (gotoMatch) {
|
|
1411
|
+
const targets = gotoMatch[1].trim().split(/\s+/).filter(t => /^[A-Z][A-Z0-9-]+$/i.test(t));
|
|
1412
|
+
for (const target of targets) {
|
|
1413
|
+
result.gotos.push({ caller: currentParagraph, target, line: lineNum });
|
|
1414
|
+
}
|
|
1415
|
+
}
|
|
1416
|
+
// SORT / MERGE file references (multi-line: accumulate until period)
|
|
1417
|
+
if (sortAccum !== null) {
|
|
1418
|
+
// Continue accumulating SORT/MERGE statement
|
|
1419
|
+
sortAccum += ' ' + line;
|
|
1420
|
+
if (!/\.\s*$/.test(sortAccum))
|
|
1421
|
+
return; // still accumulating — skip other extractors
|
|
1422
|
+
// Period found — flush, then re-check line for a new SORT/MERGE after the period
|
|
1423
|
+
flushSort();
|
|
1424
|
+
// After flushing, fall through to check if this line also starts a new SORT/MERGE
|
|
1425
|
+
}
|
|
1426
|
+
const sortMatch = line.match(RE_SORT) || line.match(RE_MERGE);
|
|
1427
|
+
if (sortMatch && sortAccum === null) {
|
|
1428
|
+
sortAccum = line;
|
|
1429
|
+
sortStartLine = lineNum;
|
|
1430
|
+
if (!/\.\s*$/.test(sortAccum))
|
|
1431
|
+
return; // multi-line — wait for period
|
|
1432
|
+
flushSort();
|
|
1433
|
+
}
|
|
1434
|
+
// INSPECT — multi-line accumulator (like SORT)
|
|
1435
|
+
// If a real paragraph/section header or statement verb arrives during accumulation,
|
|
1436
|
+
// flush the INSPECT as-is and process the line normally.
|
|
1437
|
+
if (inspectAccum !== null) {
|
|
1438
|
+
const inspTrimmed = line.trimStart();
|
|
1439
|
+
const inspLeading = (line.match(/^(\s*)/)?.[1].length ?? 0);
|
|
1440
|
+
const inspIsAreaAPara = RE_PROC_PARAGRAPH.test(line) && (!isFreeFormat ? inspLeading <= 7 : false);
|
|
1441
|
+
if (RE_PROC_SECTION.test(line) || inspIsAreaAPara
|
|
1442
|
+
|| RE_STATEMENT_VERB_START.test(inspTrimmed)
|
|
1443
|
+
|| /^CALL(?:\s|$)/i.test(inspTrimmed)) {
|
|
1444
|
+
flushInspect();
|
|
1445
|
+
// Fall through to process this line normally
|
|
1446
|
+
}
|
|
1447
|
+
else {
|
|
1448
|
+
inspectAccum += ' ' + line;
|
|
1449
|
+
if (/\.\s*$/.test(inspectAccum)) {
|
|
1450
|
+
flushInspect();
|
|
1451
|
+
}
|
|
1452
|
+
else {
|
|
1453
|
+
return;
|
|
1454
|
+
}
|
|
1455
|
+
}
|
|
1456
|
+
}
|
|
1457
|
+
const inspectMatch = line.match(/\bINSPECT\s+([A-Z][A-Z0-9-]+)/i);
|
|
1458
|
+
if (inspectMatch && inspectAccum === null) {
|
|
1459
|
+
inspectAccum = line;
|
|
1460
|
+
inspectStartLine = lineNum;
|
|
1461
|
+
if (!/\.\s*$/.test(inspectAccum))
|
|
1462
|
+
return;
|
|
1463
|
+
flushInspect();
|
|
1464
|
+
}
|
|
1465
|
+
// SEARCH — table access
|
|
1466
|
+
const searchMatch = line.match(RE_SEARCH);
|
|
1467
|
+
if (searchMatch) {
|
|
1468
|
+
result.searches.push({ target: searchMatch[1], line: lineNum });
|
|
1469
|
+
}
|
|
1470
|
+
// CANCEL — program lifecycle (global matchAll captures multiple CANCELs on same line)
|
|
1471
|
+
for (const cancelMatch of line.matchAll(RE_CANCEL)) {
|
|
1472
|
+
result.cancels.push({ target: cancelMatch[1] ?? cancelMatch[2], line: lineNum, isQuoted: true });
|
|
1473
|
+
}
|
|
1474
|
+
// Dynamic CANCEL — RE_CANCEL_DYNAMIC cannot match quoted targets, no dedup guard needed
|
|
1475
|
+
for (const dynCancelMatch of line.matchAll(RE_CANCEL_DYNAMIC)) {
|
|
1476
|
+
result.cancels.push({ target: dynCancelMatch[1], line: lineNum, isQuoted: false });
|
|
1477
|
+
}
|
|
1478
|
+
// SET statement (condition, index)
|
|
1479
|
+
const setTrueMatch = line.match(RE_SET_TO_TRUE);
|
|
1480
|
+
if (setTrueMatch) {
|
|
1481
|
+
const targets = setTrueMatch[1].trim().split(/\s+/)
|
|
1482
|
+
.filter(t => /^[A-Z][A-Z0-9-]+$/i.test(t) && t.toUpperCase() !== 'OF');
|
|
1483
|
+
if (targets.length > 0) {
|
|
1484
|
+
result.sets.push({ targets, form: 'to-true', line: lineNum, caller: currentParagraph });
|
|
1485
|
+
}
|
|
1486
|
+
}
|
|
1487
|
+
else {
|
|
1488
|
+
const setIdxMatch = line.match(RE_SET_INDEX);
|
|
1489
|
+
if (setIdxMatch) {
|
|
1490
|
+
const targets = setIdxMatch[1].trim().split(/\s+/)
|
|
1491
|
+
.filter(t => /^[A-Z][A-Z0-9-]+$/i.test(t));
|
|
1492
|
+
const mode = setIdxMatch[2].toUpperCase();
|
|
1493
|
+
const form = mode === 'TO' ? 'to-value'
|
|
1494
|
+
: mode.startsWith('UP') ? 'up-by'
|
|
1495
|
+
: 'down-by';
|
|
1496
|
+
result.sets.push({ targets, form, value: setIdxMatch[3], line: lineNum, caller: currentParagraph });
|
|
1497
|
+
}
|
|
1498
|
+
}
|
|
1499
|
+
// INITIALIZE — data reset (multi-target: INITIALIZE WS-A WS-B WS-C.)
|
|
1500
|
+
const initMatch = line.match(RE_INITIALIZE);
|
|
1501
|
+
if (initMatch) {
|
|
1502
|
+
const targets = initMatch[1].trim().split(/\s+/)
|
|
1503
|
+
.filter(t => /^[A-Z][A-Z0-9-]+$/i.test(t) && !INITIALIZE_CLAUSE_KEYWORDS.has(t.toUpperCase()));
|
|
1504
|
+
for (const target of targets) {
|
|
1505
|
+
result.initializes.push({ target, line: lineNum, caller: currentParagraph });
|
|
1506
|
+
}
|
|
1507
|
+
}
|
|
1508
|
+
}
|
|
1509
|
+
}
|