gitnexus 1.4.8 → 1.4.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (211) hide show
  1. package/README.md +7 -0
  2. package/dist/cli/index-repo.d.ts +15 -0
  3. package/dist/cli/index-repo.js +115 -0
  4. package/dist/cli/index.js +11 -2
  5. package/dist/cli/setup.js +12 -9
  6. package/dist/cli/wiki.d.ts +4 -0
  7. package/dist/cli/wiki.js +174 -53
  8. package/dist/config/supported-languages.d.ts +7 -5
  9. package/dist/config/supported-languages.js +6 -4
  10. package/dist/core/graph/graph.js +9 -1
  11. package/dist/core/graph/types.d.ts +10 -2
  12. package/dist/core/ingestion/call-processor.d.ts +18 -1
  13. package/dist/core/ingestion/call-processor.js +297 -38
  14. package/dist/core/ingestion/call-routing.d.ts +3 -18
  15. package/dist/core/ingestion/call-routing.js +0 -19
  16. package/dist/core/ingestion/cobol/cobol-copy-expander.d.ts +57 -0
  17. package/dist/core/ingestion/cobol/cobol-copy-expander.js +385 -0
  18. package/dist/core/ingestion/cobol/cobol-preprocessor.d.ts +210 -0
  19. package/dist/core/ingestion/cobol/cobol-preprocessor.js +1509 -0
  20. package/dist/core/ingestion/cobol/jcl-parser.d.ts +68 -0
  21. package/dist/core/ingestion/cobol/jcl-parser.js +217 -0
  22. package/dist/core/ingestion/cobol/jcl-processor.d.ts +33 -0
  23. package/dist/core/ingestion/cobol/jcl-processor.js +229 -0
  24. package/dist/core/ingestion/cobol-processor.d.ts +54 -0
  25. package/dist/core/ingestion/cobol-processor.js +1186 -0
  26. package/dist/core/ingestion/entry-point-scoring.d.ts +17 -0
  27. package/dist/core/ingestion/entry-point-scoring.js +18 -4
  28. package/dist/core/ingestion/export-detection.d.ts +47 -8
  29. package/dist/core/ingestion/export-detection.js +29 -50
  30. package/dist/core/ingestion/field-extractor.d.ts +29 -0
  31. package/dist/core/ingestion/field-extractor.js +25 -0
  32. package/dist/core/ingestion/field-extractors/configs/c-cpp.d.ts +3 -0
  33. package/dist/core/ingestion/field-extractors/configs/c-cpp.js +108 -0
  34. package/dist/core/ingestion/field-extractors/configs/csharp.d.ts +8 -0
  35. package/dist/core/ingestion/field-extractors/configs/csharp.js +73 -0
  36. package/dist/core/ingestion/field-extractors/configs/dart.d.ts +8 -0
  37. package/dist/core/ingestion/field-extractors/configs/dart.js +76 -0
  38. package/dist/core/ingestion/field-extractors/configs/go.d.ts +11 -0
  39. package/dist/core/ingestion/field-extractors/configs/go.js +64 -0
  40. package/dist/core/ingestion/field-extractors/configs/helpers.d.ts +44 -0
  41. package/dist/core/ingestion/field-extractors/configs/helpers.js +134 -0
  42. package/dist/core/ingestion/field-extractors/configs/jvm.d.ts +3 -0
  43. package/dist/core/ingestion/field-extractors/configs/jvm.js +118 -0
  44. package/dist/core/ingestion/field-extractors/configs/php.d.ts +8 -0
  45. package/dist/core/ingestion/field-extractors/configs/php.js +67 -0
  46. package/dist/core/ingestion/field-extractors/configs/python.d.ts +12 -0
  47. package/dist/core/ingestion/field-extractors/configs/python.js +91 -0
  48. package/dist/core/ingestion/field-extractors/configs/ruby.d.ts +16 -0
  49. package/dist/core/ingestion/field-extractors/configs/ruby.js +75 -0
  50. package/dist/core/ingestion/field-extractors/configs/rust.d.ts +9 -0
  51. package/dist/core/ingestion/field-extractors/configs/rust.js +55 -0
  52. package/dist/core/ingestion/field-extractors/configs/swift.d.ts +8 -0
  53. package/dist/core/ingestion/field-extractors/configs/swift.js +63 -0
  54. package/dist/core/ingestion/field-extractors/configs/typescript-javascript.d.ts +3 -0
  55. package/dist/core/ingestion/field-extractors/configs/typescript-javascript.js +60 -0
  56. package/dist/core/ingestion/field-extractors/generic.d.ts +46 -0
  57. package/dist/core/ingestion/field-extractors/generic.js +111 -0
  58. package/dist/core/ingestion/field-extractors/typescript.d.ts +77 -0
  59. package/dist/core/ingestion/field-extractors/typescript.js +291 -0
  60. package/dist/core/ingestion/field-types.d.ts +59 -0
  61. package/dist/core/ingestion/field-types.js +2 -0
  62. package/dist/core/ingestion/framework-detection.d.ts +87 -0
  63. package/dist/core/ingestion/framework-detection.js +65 -2
  64. package/dist/core/ingestion/heritage-processor.js +15 -17
  65. package/dist/core/ingestion/import-processor.d.ts +9 -10
  66. package/dist/core/ingestion/import-processor.js +59 -14
  67. package/dist/core/ingestion/{resolvers → import-resolvers}/csharp.d.ts +6 -9
  68. package/dist/core/ingestion/{resolvers → import-resolvers}/csharp.js +20 -2
  69. package/dist/core/ingestion/import-resolvers/dart.d.ts +7 -0
  70. package/dist/core/ingestion/import-resolvers/dart.js +44 -0
  71. package/dist/core/ingestion/{resolvers → import-resolvers}/go.d.ts +4 -5
  72. package/dist/core/ingestion/{resolvers → import-resolvers}/go.js +17 -0
  73. package/dist/core/ingestion/{resolvers → import-resolvers}/jvm.d.ts +9 -1
  74. package/dist/core/ingestion/{resolvers → import-resolvers}/jvm.js +56 -0
  75. package/dist/core/ingestion/{resolvers → import-resolvers}/php.d.ts +6 -10
  76. package/dist/core/ingestion/{resolvers → import-resolvers}/php.js +7 -2
  77. package/dist/core/ingestion/{resolvers → import-resolvers}/python.d.ts +9 -3
  78. package/dist/core/ingestion/{resolvers → import-resolvers}/python.js +35 -3
  79. package/dist/core/ingestion/{resolvers → import-resolvers}/ruby.d.ts +5 -2
  80. package/dist/core/ingestion/{resolvers → import-resolvers}/ruby.js +7 -2
  81. package/dist/core/ingestion/{resolvers → import-resolvers}/rust.d.ts +5 -2
  82. package/dist/core/ingestion/{resolvers → import-resolvers}/rust.js +41 -2
  83. package/dist/core/ingestion/{resolvers → import-resolvers}/standard.d.ts +15 -7
  84. package/dist/core/ingestion/{resolvers → import-resolvers}/standard.js +22 -3
  85. package/dist/core/ingestion/import-resolvers/swift.d.ts +7 -0
  86. package/dist/core/ingestion/import-resolvers/swift.js +23 -0
  87. package/dist/core/ingestion/import-resolvers/types.d.ts +44 -0
  88. package/dist/core/ingestion/import-resolvers/types.js +6 -0
  89. package/dist/core/ingestion/{resolvers → import-resolvers}/utils.d.ts +0 -3
  90. package/dist/core/ingestion/{resolvers → import-resolvers}/utils.js +0 -9
  91. package/dist/core/ingestion/language-config.d.ts +4 -1
  92. package/dist/core/ingestion/language-provider.d.ts +121 -0
  93. package/dist/core/ingestion/language-provider.js +24 -0
  94. package/dist/core/ingestion/languages/c-cpp.d.ts +12 -0
  95. package/dist/core/ingestion/languages/c-cpp.js +71 -0
  96. package/dist/core/ingestion/languages/cobol.d.ts +1 -0
  97. package/dist/core/ingestion/languages/cobol.js +26 -0
  98. package/dist/core/ingestion/languages/csharp.d.ts +8 -0
  99. package/dist/core/ingestion/languages/csharp.js +49 -0
  100. package/dist/core/ingestion/languages/dart.d.ts +12 -0
  101. package/dist/core/ingestion/languages/dart.js +58 -0
  102. package/dist/core/ingestion/languages/go.d.ts +11 -0
  103. package/dist/core/ingestion/languages/go.js +28 -0
  104. package/dist/core/ingestion/languages/index.d.ts +38 -0
  105. package/dist/core/ingestion/languages/index.js +63 -0
  106. package/dist/core/ingestion/languages/java.d.ts +9 -0
  107. package/dist/core/ingestion/languages/java.js +29 -0
  108. package/dist/core/ingestion/languages/kotlin.d.ts +9 -0
  109. package/dist/core/ingestion/languages/kotlin.js +53 -0
  110. package/dist/core/ingestion/languages/php.d.ts +8 -0
  111. package/dist/core/ingestion/languages/php.js +145 -0
  112. package/dist/core/ingestion/languages/python.d.ts +12 -0
  113. package/dist/core/ingestion/languages/python.js +39 -0
  114. package/dist/core/ingestion/languages/ruby.d.ts +9 -0
  115. package/dist/core/ingestion/languages/ruby.js +44 -0
  116. package/dist/core/ingestion/languages/rust.d.ts +12 -0
  117. package/dist/core/ingestion/languages/rust.js +44 -0
  118. package/dist/core/ingestion/languages/swift.d.ts +12 -0
  119. package/dist/core/ingestion/languages/swift.js +133 -0
  120. package/dist/core/ingestion/languages/typescript.d.ts +10 -0
  121. package/dist/core/ingestion/languages/typescript.js +60 -0
  122. package/dist/core/ingestion/mro-processor.js +14 -15
  123. package/dist/core/ingestion/{named-binding-extraction.d.ts → named-binding-processor.d.ts} +0 -9
  124. package/dist/core/ingestion/named-binding-processor.js +42 -0
  125. package/dist/core/ingestion/named-bindings/csharp.d.ts +3 -0
  126. package/dist/core/ingestion/named-bindings/csharp.js +37 -0
  127. package/dist/core/ingestion/named-bindings/java.d.ts +3 -0
  128. package/dist/core/ingestion/named-bindings/java.js +29 -0
  129. package/dist/core/ingestion/named-bindings/kotlin.d.ts +3 -0
  130. package/dist/core/ingestion/named-bindings/kotlin.js +36 -0
  131. package/dist/core/ingestion/named-bindings/php.d.ts +3 -0
  132. package/dist/core/ingestion/named-bindings/php.js +61 -0
  133. package/dist/core/ingestion/named-bindings/python.d.ts +3 -0
  134. package/dist/core/ingestion/named-bindings/python.js +49 -0
  135. package/dist/core/ingestion/named-bindings/rust.d.ts +3 -0
  136. package/dist/core/ingestion/named-bindings/rust.js +64 -0
  137. package/dist/core/ingestion/named-bindings/types.d.ts +16 -0
  138. package/dist/core/ingestion/named-bindings/types.js +6 -0
  139. package/dist/core/ingestion/named-bindings/typescript.d.ts +3 -0
  140. package/dist/core/ingestion/named-bindings/typescript.js +58 -0
  141. package/dist/core/ingestion/parsing-processor.d.ts +5 -1
  142. package/dist/core/ingestion/parsing-processor.js +115 -16
  143. package/dist/core/ingestion/pipeline.js +925 -424
  144. package/dist/core/ingestion/resolution-context.js +1 -1
  145. package/dist/core/ingestion/route-extractors/expo.d.ts +1 -0
  146. package/dist/core/ingestion/route-extractors/expo.js +36 -0
  147. package/dist/core/ingestion/route-extractors/middleware.d.ts +47 -0
  148. package/dist/core/ingestion/route-extractors/middleware.js +143 -0
  149. package/dist/core/ingestion/route-extractors/nextjs.d.ts +3 -0
  150. package/dist/core/ingestion/route-extractors/nextjs.js +76 -0
  151. package/dist/core/ingestion/route-extractors/php.d.ts +7 -0
  152. package/dist/core/ingestion/route-extractors/php.js +21 -0
  153. package/dist/core/ingestion/route-extractors/response-shapes.d.ts +20 -0
  154. package/dist/core/ingestion/route-extractors/response-shapes.js +290 -0
  155. package/dist/core/ingestion/tree-sitter-queries.d.ts +8 -7
  156. package/dist/core/ingestion/tree-sitter-queries.js +231 -9
  157. package/dist/core/ingestion/type-env.d.ts +14 -17
  158. package/dist/core/ingestion/type-env.js +66 -14
  159. package/dist/core/ingestion/type-extractors/c-cpp.d.ts +1 -1
  160. package/dist/core/ingestion/type-extractors/csharp.js +1 -1
  161. package/dist/core/ingestion/type-extractors/dart.d.ts +15 -0
  162. package/dist/core/ingestion/type-extractors/dart.js +371 -0
  163. package/dist/core/ingestion/type-extractors/jvm.js +1 -1
  164. package/dist/core/ingestion/type-extractors/shared.d.ts +1 -13
  165. package/dist/core/ingestion/type-extractors/shared.js +9 -102
  166. package/dist/core/ingestion/type-extractors/swift.js +334 -4
  167. package/dist/core/ingestion/type-extractors/types.d.ts +3 -1
  168. package/dist/core/ingestion/{ast-helpers.d.ts → utils/ast-helpers.d.ts} +16 -13
  169. package/dist/core/ingestion/{ast-helpers.js → utils/ast-helpers.js} +111 -32
  170. package/dist/core/ingestion/{call-analysis.js → utils/call-analysis.js} +37 -0
  171. package/dist/core/ingestion/utils/event-loop.d.ts +5 -0
  172. package/dist/core/ingestion/utils/event-loop.js +5 -0
  173. package/dist/core/ingestion/utils/language-detection.d.ts +9 -0
  174. package/dist/core/ingestion/utils/language-detection.js +70 -0
  175. package/dist/core/ingestion/utils/verbose.d.ts +1 -0
  176. package/dist/core/ingestion/utils/verbose.js +7 -0
  177. package/dist/core/ingestion/workers/parse-worker.d.ts +43 -2
  178. package/dist/core/ingestion/workers/parse-worker.js +361 -150
  179. package/dist/core/lbug/csv-generator.js +34 -1
  180. package/dist/core/lbug/lbug-adapter.js +6 -0
  181. package/dist/core/lbug/schema.d.ts +5 -3
  182. package/dist/core/lbug/schema.js +39 -2
  183. package/dist/core/tree-sitter/parser-loader.js +7 -1
  184. package/dist/core/wiki/cursor-client.d.ts +31 -0
  185. package/dist/core/wiki/cursor-client.js +127 -0
  186. package/dist/core/wiki/generator.d.ts +28 -9
  187. package/dist/core/wiki/generator.js +115 -18
  188. package/dist/core/wiki/graph-queries.d.ts +4 -0
  189. package/dist/core/wiki/graph-queries.js +7 -1
  190. package/dist/core/wiki/llm-client.d.ts +2 -0
  191. package/dist/core/wiki/llm-client.js +8 -4
  192. package/dist/core/wiki/prompts.d.ts +3 -3
  193. package/dist/core/wiki/prompts.js +6 -0
  194. package/dist/mcp/core/lbug-adapter.d.ts +5 -0
  195. package/dist/mcp/core/lbug-adapter.js +11 -1
  196. package/dist/mcp/local/local-backend.d.ts +16 -5
  197. package/dist/mcp/local/local-backend.js +711 -74
  198. package/dist/mcp/tools.js +71 -2
  199. package/dist/storage/repo-manager.d.ts +3 -0
  200. package/package.json +17 -16
  201. package/dist/core/ingestion/import-resolution.d.ts +0 -101
  202. package/dist/core/ingestion/import-resolution.js +0 -251
  203. package/dist/core/ingestion/named-binding-extraction.js +0 -373
  204. package/dist/core/ingestion/resolvers/index.d.ts +0 -18
  205. package/dist/core/ingestion/resolvers/index.js +0 -13
  206. package/dist/core/ingestion/type-extractors/index.d.ts +0 -22
  207. package/dist/core/ingestion/type-extractors/index.js +0 -31
  208. package/dist/core/ingestion/utils.d.ts +0 -20
  209. package/dist/core/ingestion/utils.js +0 -242
  210. package/scripts/patch-tree-sitter-swift.cjs +0 -74
  211. /package/dist/core/ingestion/{call-analysis.d.ts → utils/call-analysis.d.ts} +0 -0
@@ -0,0 +1,57 @@
1
+ /**
2
+ * COBOL COPY statement expansion engine.
3
+ *
4
+ * Expands COPY statements by inlining copybook content, applying REPLACING
5
+ * transformations (LEADING, TRAILING, EXACT), and handling nested copies
6
+ * with cycle detection.
7
+ *
8
+ * This is a preprocessing step that runs BEFORE extractCobolSymbolsWithRegex.
9
+ * The caller should run preprocessCobolSource first to clean patch markers.
10
+ *
11
+ * Supported syntax:
12
+ * COPY CPSESP.
13
+ * COPY "WORKGRID.CPY".
14
+ * COPY CPSESP REPLACING LEADING "ESP-" BY "LK-ESP-"
15
+ * LEADING "KPSESPL" BY "LK-KPSESPL".
16
+ * COPY ANAZI REPLACING "ANAZI-KEY" BY "LK-KEY".
17
+ */
18
+ export interface CopyReplacing {
19
+ type: 'LEADING' | 'TRAILING' | 'EXACT';
20
+ from: string;
21
+ to: string;
22
+ isPseudotext?: boolean;
23
+ }
24
+ export interface CopyResolution {
25
+ copyTarget: string;
26
+ resolvedPath: string | null;
27
+ line: number;
28
+ replacing: CopyReplacing[];
29
+ library?: string;
30
+ }
31
+ export interface CopyExpansionResult {
32
+ expandedContent: string;
33
+ copyResolutions: CopyResolution[];
34
+ }
35
+ export declare const DEFAULT_MAX_DEPTH = 10;
36
+ /**
37
+ * Parse REPLACING clause text into structured replacements.
38
+ *
39
+ * Input examples:
40
+ * LEADING "ESP-" BY "LK-ESP-" LEADING "KPSESPL" BY "LK-KPSESPL"
41
+ * "ANAZI-KEY" BY "LK-KEY"
42
+ * TRAILING "-IN" BY "-OUT"
43
+ * ==CUST-== BY ==WS-CUST-==
44
+ * ==OLD-TEXT== BY ====
45
+ */
46
+ export declare function parseReplacingClause(text: string): CopyReplacing[];
47
+ /**
48
+ * Expand COBOL COPY statements by inlining copybook content.
49
+ *
50
+ * @param content - Source COBOL content (after preprocessCobolSource)
51
+ * @param filePath - Path of the source file (for diagnostics)
52
+ * @param resolveFile - Maps a COPY target name to a filesystem path, or null if not found
53
+ * @param readFile - Reads file content by path, or null if unreadable
54
+ * @param maxDepth - Maximum nesting depth for recursive expansion (default: 10)
55
+ * @returns Expanded content and resolution metadata
56
+ */
57
+ export declare function expandCopies(content: string, filePath: string, resolveFile: (name: string) => string | null, readFile: (path: string) => string | null, maxDepth?: number): CopyExpansionResult;
@@ -0,0 +1,385 @@
1
+ /**
2
+ * COBOL COPY statement expansion engine.
3
+ *
4
+ * Expands COPY statements by inlining copybook content, applying REPLACING
5
+ * transformations (LEADING, TRAILING, EXACT), and handling nested copies
6
+ * with cycle detection.
7
+ *
8
+ * This is a preprocessing step that runs BEFORE extractCobolSymbolsWithRegex.
9
+ * The caller should run preprocessCobolSource first to clean patch markers.
10
+ *
11
+ * Supported syntax:
12
+ * COPY CPSESP.
13
+ * COPY "WORKGRID.CPY".
14
+ * COPY CPSESP REPLACING LEADING "ESP-" BY "LK-ESP-"
15
+ * LEADING "KPSESPL" BY "LK-KPSESPL".
16
+ * COPY ANAZI REPLACING "ANAZI-KEY" BY "LK-KEY".
17
+ */
18
+ // ---------------------------------------------------------------------------
19
+ // Constants
20
+ // ---------------------------------------------------------------------------
21
+ export const DEFAULT_MAX_DEPTH = 10;
22
+ /** COBOL identifier pattern: starts with letter, contains letters, digits, hyphens. */
23
+ const RE_COBOL_IDENTIFIER = /\b([A-Z][A-Z0-9-]*)\b/gi;
24
+ // ---------------------------------------------------------------------------
25
+ // Private helpers
26
+ // ---------------------------------------------------------------------------
27
+ /**
28
+ * Strip inline comments (Italian-style `|` comments).
29
+ * Only strips if `|` appears in the code area (col 7+).
30
+ */
31
+ function stripInlineComment(line) {
32
+ let inQuote = null;
33
+ for (let i = 0; i < line.length; i++) {
34
+ const ch = line[i];
35
+ if (inQuote) {
36
+ if (ch === inQuote)
37
+ inQuote = null;
38
+ }
39
+ else if (ch === '"' || ch === "'") {
40
+ inQuote = ch;
41
+ }
42
+ else if (ch === '|') {
43
+ return line.substring(0, i);
44
+ }
45
+ }
46
+ return line;
47
+ }
48
+ /**
49
+ * Check if a line is a COBOL comment (indicator in col 7 is `*` or `/`).
50
+ */
51
+ function isCommentLine(line) {
52
+ return line.length >= 7 && (line[6] === '*' || line[6] === '/');
53
+ }
54
+ /**
55
+ * Check if a line is a continuation line (indicator in col 7 is `-`).
56
+ */
57
+ function isContinuationLine(line) {
58
+ return line.length >= 7 && line[6] === '-';
59
+ }
60
+ /**
61
+ * Merge continuation lines into their predecessors.
62
+ * Returns an array of logical lines with their original starting line numbers.
63
+ */
64
+ function mergeLogicalLines(rawLines) {
65
+ const logical = [];
66
+ for (let i = 0; i < rawLines.length; i++) {
67
+ const raw = rawLines[i];
68
+ // Skip comment lines
69
+ if (isCommentLine(raw)) {
70
+ logical.push({ text: '', lineNum: i + 1 });
71
+ continue;
72
+ }
73
+ // Continuation: merge into previous logical line
74
+ if (isContinuationLine(raw)) {
75
+ if (logical.length > 0) {
76
+ const prev = logical[logical.length - 1];
77
+ const continuation = raw.length > 7 ? raw.substring(7).trimStart() : '';
78
+ prev.text += continuation;
79
+ }
80
+ // Push empty placeholder to preserve line count
81
+ logical.push({ text: '', lineNum: i + 1 });
82
+ continue;
83
+ }
84
+ // Normal line: strip inline comments
85
+ const cleaned = stripInlineComment(raw);
86
+ logical.push({ text: cleaned, lineNum: i + 1 });
87
+ }
88
+ return logical;
89
+ }
90
+ /**
91
+ * Parse REPLACING clause text into structured replacements.
92
+ *
93
+ * Input examples:
94
+ * LEADING "ESP-" BY "LK-ESP-" LEADING "KPSESPL" BY "LK-KPSESPL"
95
+ * "ANAZI-KEY" BY "LK-KEY"
96
+ * TRAILING "-IN" BY "-OUT"
97
+ * ==CUST-== BY ==WS-CUST-==
98
+ * ==OLD-TEXT== BY ====
99
+ */
100
+ export function parseReplacingClause(text) {
101
+ const replacings = [];
102
+ if (!text || text.trim().length === 0)
103
+ return replacings;
104
+ const tokens = [];
105
+ const tokenRe = /==((?:[^=]|=[^=])*)==|"([^"]*)"|(\S+)/g;
106
+ let tm;
107
+ while ((tm = tokenRe.exec(text)) !== null) {
108
+ if (tm[1] !== undefined) {
109
+ // Pseudotext: trim leading/trailing whitespace
110
+ tokens.push({ value: tm[1].trim(), isPseudotext: true });
111
+ }
112
+ else if (tm[2] !== undefined) {
113
+ tokens.push({ value: tm[2], isPseudotext: false });
114
+ }
115
+ else {
116
+ tokens.push({ value: tm[3], isPseudotext: false });
117
+ }
118
+ }
119
+ // Parse token stream: [LEADING|TRAILING]? <from> BY <to>
120
+ let i = 0;
121
+ while (i < tokens.length) {
122
+ let type = 'EXACT';
123
+ // Check for type modifier (only on non-pseudotext tokens)
124
+ if (!tokens[i].isPseudotext) {
125
+ const upper = tokens[i].value.toUpperCase();
126
+ if (upper === 'LEADING') {
127
+ type = 'LEADING';
128
+ i++;
129
+ }
130
+ else if (upper === 'TRAILING') {
131
+ type = 'TRAILING';
132
+ i++;
133
+ }
134
+ }
135
+ if (i >= tokens.length)
136
+ break;
137
+ const fromToken = tokens[i];
138
+ i++;
139
+ // Pseudotext always forces EXACT type
140
+ if (fromToken.isPseudotext)
141
+ type = 'EXACT';
142
+ // Expect BY keyword
143
+ if (i >= tokens.length)
144
+ break;
145
+ if (tokens[i].value.toUpperCase() !== 'BY') {
146
+ // Malformed — skip this token and try to resync
147
+ continue;
148
+ }
149
+ i++; // skip BY
150
+ if (i >= tokens.length)
151
+ break;
152
+ const toToken = tokens[i];
153
+ i++;
154
+ replacings.push({ type, from: fromToken.value, to: toToken.value, isPseudotext: fromToken.isPseudotext || undefined });
155
+ }
156
+ return replacings;
157
+ }
158
+ /**
159
+ * Scan logical lines for COPY statements.
160
+ * COPY statements can span multiple lines and terminate with a period.
161
+ */
162
+ function parseCopyStatements(logicalLines) {
163
+ const results = [];
164
+ let accumulator = null;
165
+ let startLine = 0;
166
+ let endLine = 0;
167
+ for (let i = 0; i < logicalLines.length; i++) {
168
+ const { text, lineNum } = logicalLines[i];
169
+ if (text.length === 0)
170
+ continue;
171
+ // Check for COPY keyword start (not inside a string context)
172
+ const copyStart = text.match(/\bCOPY\b/i);
173
+ if (accumulator === null) {
174
+ if (!copyStart)
175
+ continue;
176
+ // Start accumulating from the COPY keyword onwards
177
+ const copyIdx = copyStart.index;
178
+ accumulator = text.substring(copyIdx);
179
+ startLine = lineNum;
180
+ endLine = lineNum;
181
+ }
182
+ else {
183
+ // Continue accumulating
184
+ accumulator += ' ' + text.trim();
185
+ endLine = lineNum;
186
+ }
187
+ // Check if statement terminates (period at end of accumulated text)
188
+ if (accumulator !== null && /\.\s*$/.test(accumulator)) {
189
+ const parsed = parseSingleCopyStatement(accumulator, startLine, endLine);
190
+ if (parsed) {
191
+ results.push(parsed);
192
+ }
193
+ accumulator = null;
194
+ }
195
+ }
196
+ // If there's an unterminated COPY (missing period), try to parse what we have
197
+ if (accumulator !== null) {
198
+ const parsed = parseSingleCopyStatement(accumulator, startLine, endLine);
199
+ if (parsed) {
200
+ results.push(parsed);
201
+ }
202
+ }
203
+ return results;
204
+ }
205
+ /**
206
+ * Parse a single complete COPY statement string.
207
+ *
208
+ * Formats:
209
+ * COPY target.
210
+ * COPY "target".
211
+ * COPY target REPLACING ... .
212
+ */
213
+ function parseSingleCopyStatement(stmt, startLine, endLine) {
214
+ // Strip terminating period
215
+ const text = stmt.replace(/\.\s*$/, '').trim();
216
+ // Extract target: COPY <target> or COPY "<target>" or COPY '<target>'
217
+ // Optionally followed by IN/OF <library-name> (COBOL-85 standard: IN and OF are synonyms)
218
+ const targetMatch = text.match(/^COPY\s+(?:"([^"]+)"|'([^']+)'|([A-Z][A-Z0-9-]*))(?:\s+(?:IN|OF)\s+([A-Z][A-Z0-9-]*))?/i);
219
+ if (!targetMatch)
220
+ return null;
221
+ const target = targetMatch[1] ?? targetMatch[2] ?? targetMatch[3];
222
+ const library = targetMatch[4] || undefined;
223
+ // Extract REPLACING clause if present
224
+ let replacing = [];
225
+ const replacingIdx = text.search(/\bREPLACING\b/i);
226
+ if (replacingIdx >= 0) {
227
+ const replacingText = text.substring(replacingIdx + 'REPLACING'.length);
228
+ replacing = parseReplacingClause(replacingText);
229
+ }
230
+ return { startLine, endLine, target, replacing, library };
231
+ }
232
+ // ---------------------------------------------------------------------------
233
+ // REPLACING application
234
+ // ---------------------------------------------------------------------------
235
+ /**
236
+ * Apply REPLACING transformations to copybook content.
237
+ *
238
+ * LEADING: replace prefix in COBOL identifiers.
239
+ * TRAILING: replace suffix in COBOL identifiers.
240
+ * EXACT: replace exact token matches.
241
+ */
242
+ function applyReplacing(content, replacings) {
243
+ if (replacings.length === 0)
244
+ return content;
245
+ // First pass: handle EXACT replacements that contain spaces or non-identifier
246
+ // characters (pseudotext). These cannot be handled by identifier-level matching.
247
+ let result = content;
248
+ for (const r of replacings) {
249
+ if (r.type === 'EXACT' && (r.isPseudotext || r.from.includes(' ') || !/^[A-Z][A-Z0-9-]*$/i.test(r.from))) {
250
+ const escaped = r.from.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
251
+ const re = new RegExp(escaped, 'gi');
252
+ result = result.replace(re, r.to);
253
+ }
254
+ }
255
+ // Second pass: identifier-level replacements (LEADING, TRAILING, single-word EXACT)
256
+ const identifierReplacings = replacings.filter(r => !(r.type === 'EXACT' && (r.isPseudotext || r.from.includes(' ') || !/^[A-Z][A-Z0-9-]*$/i.test(r.from))));
257
+ if (identifierReplacings.length === 0)
258
+ return result;
259
+ return result.replace(RE_COBOL_IDENTIFIER, (match) => {
260
+ for (const r of identifierReplacings) {
261
+ const upper = match.toUpperCase();
262
+ const from = r.from.toUpperCase();
263
+ const to = r.to.toUpperCase();
264
+ switch (r.type) {
265
+ case 'LEADING':
266
+ if (upper.startsWith(from)) {
267
+ return to + match.substring(from.length);
268
+ }
269
+ break;
270
+ case 'TRAILING':
271
+ if (upper.endsWith(from)) {
272
+ return match.substring(0, match.length - from.length) + to;
273
+ }
274
+ break;
275
+ case 'EXACT':
276
+ if (upper === from) {
277
+ return to;
278
+ }
279
+ break;
280
+ }
281
+ }
282
+ return match;
283
+ });
284
+ }
285
+ // ---------------------------------------------------------------------------
286
+ // Main expansion engine
287
+ // ---------------------------------------------------------------------------
288
+ /**
289
+ * Expand COBOL COPY statements by inlining copybook content.
290
+ *
291
+ * @param content - Source COBOL content (after preprocessCobolSource)
292
+ * @param filePath - Path of the source file (for diagnostics)
293
+ * @param resolveFile - Maps a COPY target name to a filesystem path, or null if not found
294
+ * @param readFile - Reads file content by path, or null if unreadable
295
+ * @param maxDepth - Maximum nesting depth for recursive expansion (default: 10)
296
+ * @returns Expanded content and resolution metadata
297
+ */
298
+ export function expandCopies(content, filePath, resolveFile, readFile, maxDepth = DEFAULT_MAX_DEPTH) {
299
+ const allResolutions = [];
300
+ const warnedCircular = new Set();
301
+ let totalExpansions = 0;
302
+ const MAX_TOTAL_EXPANSIONS = 500;
303
+ const expanded = expandRecursive(content, filePath, 0, new Set());
304
+ return {
305
+ expandedContent: expanded,
306
+ copyResolutions: allResolutions,
307
+ };
308
+ /**
309
+ * Recursively expand COPY statements in content.
310
+ *
311
+ * @param src - Source content to expand
312
+ * @param srcPath - Path of the file being expanded (for cycle detection logging)
313
+ * @param depth - Current recursion depth
314
+ * @param visited - Set of already-visited copybook paths (cycle detection)
315
+ */
316
+ function expandRecursive(src, srcPath, depth, visited) {
317
+ const rawLines = src.split(/\r?\n/);
318
+ const logicalLines = mergeLogicalLines(rawLines);
319
+ const copyStatements = parseCopyStatements(logicalLines);
320
+ // No COPY statements — return as-is
321
+ if (copyStatements.length === 0)
322
+ return src;
323
+ // Process COPY statements in reverse order so line numbers stay valid
324
+ // as we splice content
325
+ const outputLines = [...rawLines];
326
+ for (let ci = copyStatements.length - 1; ci >= 0; ci--) {
327
+ const cs = copyStatements[ci];
328
+ // Resolve the copybook path
329
+ const resolvedPath = resolveFile(cs.target);
330
+ // Record resolution metadata
331
+ allResolutions.push({
332
+ copyTarget: cs.target,
333
+ resolvedPath,
334
+ line: cs.startLine,
335
+ replacing: cs.replacing,
336
+ library: cs.library,
337
+ });
338
+ // Cannot resolve — keep original lines
339
+ if (resolvedPath === null) {
340
+ continue;
341
+ }
342
+ // Cycle detection
343
+ if (visited.has(resolvedPath)) {
344
+ if (!warnedCircular.has(resolvedPath)) {
345
+ warnedCircular.add(resolvedPath);
346
+ console.warn(`[cobol-copy-expander] Circular COPY detected: ${cs.target} (${resolvedPath}) ` +
347
+ `includes itself. Skipping expansion.`);
348
+ }
349
+ continue;
350
+ }
351
+ // Max depth exceeded — keep unexpanded
352
+ if (depth >= maxDepth) {
353
+ console.warn(`[cobol-copy-expander] Max expansion depth (${maxDepth}) reached for ` +
354
+ `COPY ${cs.target} in ${srcPath}. Skipping expansion.`);
355
+ continue;
356
+ }
357
+ // Guard against exponential breadth amplification (N copybooks each with N COPYs)
358
+ if (++totalExpansions > MAX_TOTAL_EXPANSIONS) {
359
+ if (!warnedCircular.has('__max_total__')) {
360
+ warnedCircular.add('__max_total__');
361
+ console.warn(`[cobol-copy-expander] Max total expansions (${MAX_TOTAL_EXPANSIONS}) reached ` +
362
+ `in ${srcPath}. Skipping further expansions.`);
363
+ }
364
+ continue;
365
+ }
366
+ // Read the copybook content
367
+ const copybookContent = readFile(resolvedPath);
368
+ if (copybookContent === null) {
369
+ continue;
370
+ }
371
+ // Apply REPLACING transformations
372
+ const replaced = applyReplacing(copybookContent, cs.replacing);
373
+ // Recurse into the copybook for nested COPYs
374
+ const nestedVisited = new Set(visited);
375
+ nestedVisited.add(resolvedPath);
376
+ const expandedCopybook = expandRecursive(replaced, resolvedPath, depth + 1, nestedVisited);
377
+ // Splice: replace the COPY statement lines with expanded content
378
+ // startLine/endLine are 1-based; convert to 0-based array index
379
+ const expansionLines = expandedCopybook.split('\n');
380
+ const removeCount = cs.endLine - cs.startLine + 1;
381
+ outputLines.splice(cs.startLine - 1, removeCount, ...expansionLines);
382
+ }
383
+ return outputLines.join('\n');
384
+ }
385
+ }
@@ -0,0 +1,210 @@
1
+ /**
2
+ * COBOL source pre-processing and regex-based symbol extraction.
3
+ *
4
+ * DESIGN DECISION — Why regex instead of a full parser (ANTLR4, tree-sitter):
5
+ *
6
+ * 1. Performance: Regex processes ~1ms/file vs 50-200ms/file for ANTLR4/tree-sitter.
7
+ * On EPAGHE (14k COBOL files), this is ~14 seconds vs 12-47 minutes.
8
+ *
9
+ * 2. Reliability: tree-sitter-cobol@0.0.1's external scanner hangs indefinitely
10
+ * on ~5% of production files (no timeout possible). ANTLR4's proleap-cobol-parser
11
+ * is a Java project — using it from Node.js requires Java subprocesses or
12
+ * extracting .g4 grammars and generating JS/TS targets (significant effort).
13
+ *
14
+ * 3. Dialect compatibility: GnuCOBOL with Italian comments, patch markers in
15
+ * cols 1-6 (mzADD, estero, etc.), and vendor extensions. Formal grammars
16
+ * target COBOL-85 and would need dialect modifications.
17
+ *
18
+ * 4. Industry precedent: ctags, GitHub code navigation, and Sourcegraph all use
19
+ * regex-based extraction for code indexing. Full parsing is only needed for
20
+ * compilation or semantic analysis, not symbol extraction.
21
+ *
22
+ * 5. Determinism: Every regex pattern is tested with canonical COBOL input
23
+ * (see test/unit/cobol-preprocessor.test.ts). Same input always produces
24
+ * same output — no grammar ambiguity or parser state issues.
25
+ *
26
+ * This module provides:
27
+ * 1. preprocessCobolSource() — cleans patch markers (kept for potential future use)
28
+ * 2. extractCobolSymbolsWithRegex() — single-pass state machine COBOL extraction
29
+ */
30
+ export interface CobolRegexResults {
31
+ programName: string | null;
32
+ /** All programs in this file with line-range boundaries for per-program scoping. */
33
+ programs: Array<{
34
+ name: string;
35
+ startLine: number;
36
+ endLine: number;
37
+ nestingDepth: number;
38
+ procedureUsing?: string[];
39
+ isCommon?: boolean;
40
+ }>;
41
+ paragraphs: Array<{
42
+ name: string;
43
+ line: number;
44
+ }>;
45
+ sections: Array<{
46
+ name: string;
47
+ line: number;
48
+ }>;
49
+ performs: Array<{
50
+ caller: string | null;
51
+ target: string;
52
+ thruTarget?: string;
53
+ line: number;
54
+ }>;
55
+ calls: Array<{
56
+ target: string;
57
+ line: number;
58
+ isQuoted: boolean;
59
+ parameters?: string[];
60
+ returning?: string;
61
+ }>;
62
+ copies: Array<{
63
+ target: string;
64
+ line: number;
65
+ }>;
66
+ dataItems: Array<{
67
+ name: string;
68
+ level: number;
69
+ line: number;
70
+ pic?: string;
71
+ usage?: string;
72
+ occurs?: number;
73
+ dependingOn?: string;
74
+ redefines?: string;
75
+ values?: string[];
76
+ isExternal?: boolean;
77
+ isGlobal?: boolean;
78
+ section: 'working-storage' | 'linkage' | 'file' | 'local-storage' | 'screen' | 'unknown';
79
+ }>;
80
+ fileDeclarations: Array<{
81
+ selectName: string;
82
+ assignTo: string;
83
+ organization?: string;
84
+ access?: string;
85
+ recordKey?: string;
86
+ alternateKeys?: string[];
87
+ fileStatus?: string;
88
+ isOptional?: boolean;
89
+ line: number;
90
+ }>;
91
+ fdEntries: Array<{
92
+ fdName: string;
93
+ recordName?: string;
94
+ line: number;
95
+ }>;
96
+ programMetadata: {
97
+ author?: string;
98
+ dateWritten?: string;
99
+ dateCompiled?: string;
100
+ installation?: string;
101
+ };
102
+ execSqlBlocks: Array<{
103
+ line: number;
104
+ tables: string[];
105
+ cursors: string[];
106
+ hostVariables: string[];
107
+ operation: 'SELECT' | 'INSERT' | 'UPDATE' | 'DELETE' | 'DECLARE' | 'OPEN' | 'CLOSE' | 'FETCH' | 'OTHER';
108
+ includeMember?: string;
109
+ }>;
110
+ execCicsBlocks: Array<{
111
+ line: number;
112
+ command: string;
113
+ mapName?: string;
114
+ programName?: string;
115
+ programIsLiteral?: boolean;
116
+ transId?: string;
117
+ fileName?: string;
118
+ fileIsLiteral?: boolean;
119
+ queueName?: string;
120
+ labelName?: string;
121
+ intoField?: string;
122
+ fromField?: string;
123
+ }>;
124
+ procedureUsing: string[];
125
+ entryPoints: Array<{
126
+ name: string;
127
+ parameters: string[];
128
+ line: number;
129
+ }>;
130
+ moves: Array<{
131
+ from: string;
132
+ targets: string[];
133
+ line: number;
134
+ caller: string | null;
135
+ corresponding: boolean;
136
+ }>;
137
+ gotos: Array<{
138
+ caller: string | null;
139
+ target: string;
140
+ line: number;
141
+ }>;
142
+ sorts: Array<{
143
+ sortFile: string;
144
+ usingFiles: string[];
145
+ givingFiles: string[];
146
+ line: number;
147
+ }>;
148
+ searches: Array<{
149
+ target: string;
150
+ line: number;
151
+ }>;
152
+ cancels: Array<{
153
+ target: string;
154
+ line: number;
155
+ isQuoted: boolean;
156
+ }>;
157
+ execDliBlocks: Array<{
158
+ line: number;
159
+ verb: string;
160
+ pcbNumber?: number;
161
+ segmentName?: string;
162
+ intoField?: string;
163
+ fromField?: string;
164
+ psbName?: string;
165
+ }>;
166
+ declaratives: Array<{
167
+ sectionName: string;
168
+ target: string;
169
+ line: number;
170
+ }>;
171
+ sets: Array<{
172
+ targets: string[];
173
+ form: 'to-true' | 'to-value' | 'up-by' | 'down-by';
174
+ value?: string;
175
+ line: number;
176
+ caller: string | null;
177
+ }>;
178
+ inspects: Array<{
179
+ inspectedField: string;
180
+ counters: string[];
181
+ form: 'tallying' | 'replacing' | 'converting' | 'tallying-replacing';
182
+ line: number;
183
+ caller: string | null;
184
+ }>;
185
+ initializes: Array<{
186
+ target: string;
187
+ line: number;
188
+ caller: string | null;
189
+ }>;
190
+ }
191
+ /**
192
+ * Normalize COBOL source for regex-based extraction.
193
+ *
194
+ * The COBOL fixed-format sequence number area (columns 1-6) is semantically
195
+ * irrelevant to parsing — compilers and tools always ignore it. This
196
+ * function replaces ANY non-space content in columns 1-6 with spaces
197
+ * so that position-sensitive regexes (paragraph/section detection, data-item
198
+ * anchors, etc.) work identically whether the file carries numeric sequence
199
+ * numbers (000100), alphabetic patch markers (mzADD, estero, #patch), or
200
+ * the COBOL default of all spaces.
201
+ *
202
+ * Preserves exact line count for position mapping.
203
+ */
204
+ export declare function preprocessCobolSource(content: string): string;
205
+ /**
206
+ * Extract COBOL symbols using a single-pass state machine.
207
+ * Extracts program name, paragraphs, sections, CALL, PERFORM, COPY,
208
+ * data items, file declarations, FD entries, and program metadata.
209
+ */
210
+ export declare function extractCobolSymbolsWithRegex(content: string, _filePath: string): CobolRegexResults;