@futpib/parser 1.0.3 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (262) hide show
  1. package/.claude/settings.local.json +24 -0
  2. package/.github/workflows/main.yml +1 -0
  3. package/build/androidPackageParser.js +30 -32
  4. package/build/arbitraryDalvikBytecode.d.ts +3 -3
  5. package/build/arbitraryDalvikBytecode.js +33 -27
  6. package/build/arbitraryDalvikExecutable.js +55 -17
  7. package/build/arbitraryJava.d.ts +31 -0
  8. package/build/arbitraryJava.js +532 -0
  9. package/build/arbitraryJavaScript.d.ts +3 -0
  10. package/build/arbitraryJavaScript.js +263 -0
  11. package/build/arbitraryJavascript.d.ts +3 -0
  12. package/build/arbitraryJavascript.js +263 -0
  13. package/build/arbitraryZig.d.ts +3 -0
  14. package/build/arbitraryZig.js +240 -0
  15. package/build/arbitraryZipStream.d.ts +1 -1
  16. package/build/arrayParser.js +72 -13
  17. package/build/backsmali.d.ts +4 -3
  18. package/build/backsmali.js +26 -6
  19. package/build/bash.d.ts +89 -0
  20. package/build/bash.js +1 -0
  21. package/build/bashParser.d.ts +6 -0
  22. package/build/bashParser.js +335 -0
  23. package/build/bashParser.test.d.ts +1 -0
  24. package/build/bashParser.test.js +343 -0
  25. package/build/bashParserEdgeCases.test.d.ts +1 -0
  26. package/build/bashParserEdgeCases.test.js +117 -0
  27. package/build/dalvikBytecodeParser/addressConversion.d.ts +110 -0
  28. package/build/dalvikBytecodeParser/addressConversion.js +334 -0
  29. package/build/dalvikBytecodeParser/formatParsers.d.ts +7 -6
  30. package/build/dalvikBytecodeParser/formatParsers.js +13 -14
  31. package/build/dalvikBytecodeParser.d.ts +60 -31
  32. package/build/dalvikBytecodeParser.js +92 -35
  33. package/build/dalvikBytecodeParser.test-d.d.ts +1 -0
  34. package/build/dalvikBytecodeParser.test-d.js +268 -0
  35. package/build/dalvikBytecodeUnparser/formatUnparsers.d.ts +9 -8
  36. package/build/dalvikBytecodeUnparser/formatUnparsers.js +13 -12
  37. package/build/dalvikBytecodeUnparser.d.ts +2 -2
  38. package/build/dalvikBytecodeUnparser.js +23 -23
  39. package/build/dalvikBytecodeUnparser.test.js +7 -7
  40. package/build/dalvikExecutable.d.ts +3 -3
  41. package/build/dalvikExecutable.test-d.d.ts +1 -0
  42. package/build/dalvikExecutable.test-d.js +59 -0
  43. package/build/dalvikExecutableParser/typedNumbers.d.ts +18 -0
  44. package/build/dalvikExecutableParser/typedNumbers.js +3 -0
  45. package/build/dalvikExecutableParser.d.ts +2 -1
  46. package/build/dalvikExecutableParser.js +96 -77
  47. package/build/dalvikExecutableParser.test.js +24 -3
  48. package/build/dalvikExecutableParserAgainstSmaliParser.test.js +3 -0
  49. package/build/dalvikExecutableUnparser/poolScanners.d.ts +2 -2
  50. package/build/dalvikExecutableUnparser/sectionUnparsers.d.ts +3 -3
  51. package/build/dalvikExecutableUnparser/sectionUnparsers.js +26 -11
  52. package/build/dalvikExecutableUnparser.d.ts +2 -2
  53. package/build/dalvikExecutableUnparser.test.js +2 -1
  54. package/build/disjunctionParser.d.ts +5 -3
  55. package/build/disjunctionParser.js +79 -17
  56. package/build/disjunctionParser.test-d.d.ts +1 -0
  57. package/build/disjunctionParser.test-d.js +72 -0
  58. package/build/elementSwitchParser.d.ts +4 -0
  59. package/build/{exactElementSwitchParser.js → elementSwitchParser.js} +3 -4
  60. package/build/elementSwitchParser.test-d.d.ts +1 -0
  61. package/build/elementSwitchParser.test-d.js +44 -0
  62. package/build/exactSequenceParser.d.ts +4 -2
  63. package/build/exactSequenceParser.test-d.d.ts +1 -0
  64. package/build/exactSequenceParser.test-d.js +36 -0
  65. package/build/fetchCid.js +2 -66
  66. package/build/index.d.ts +25 -2
  67. package/build/index.js +23 -1
  68. package/build/index.test.js +16 -1
  69. package/build/inputReader.d.ts +10 -0
  70. package/build/inputReader.js +36 -0
  71. package/build/java.d.ts +502 -0
  72. package/build/java.js +2 -0
  73. package/build/javaKeyStoreParser.js +14 -17
  74. package/build/javaParser.d.ts +51 -0
  75. package/build/javaParser.js +1538 -0
  76. package/build/javaParser.test.d.ts +1 -0
  77. package/build/javaParser.test.js +1287 -0
  78. package/build/javaScript.d.ts +35 -0
  79. package/build/javaScript.js +1 -0
  80. package/build/javaScriptParser.d.ts +9 -0
  81. package/build/javaScriptParser.js +34 -0
  82. package/build/javaScriptUnparser.d.ts +3 -0
  83. package/build/javaScriptUnparser.js +4 -0
  84. package/build/javaScriptUnparser.test.d.ts +1 -0
  85. package/build/javaScriptUnparser.test.js +24 -0
  86. package/build/javaUnparser.d.ts +2 -0
  87. package/build/javaUnparser.js +519 -0
  88. package/build/javaUnparser.test.d.ts +1 -0
  89. package/build/javaUnparser.test.js +24 -0
  90. package/build/javascript.d.ts +35 -0
  91. package/build/javascript.js +1 -0
  92. package/build/javascriptParser.d.ts +9 -0
  93. package/build/javascriptParser.js +34 -0
  94. package/build/javascriptUnparser.d.ts +3 -0
  95. package/build/javascriptUnparser.js +4 -0
  96. package/build/javascriptUnparser.test.d.ts +1 -0
  97. package/build/javascriptUnparser.test.js +24 -0
  98. package/build/jsonParser.js +2 -12
  99. package/build/lazyMessageError.d.ts +3 -0
  100. package/build/lookaheadParser.js +60 -3
  101. package/build/negativeLookaheadParser.js +70 -11
  102. package/build/nonEmptyArrayParser.js +72 -13
  103. package/build/objectParser.d.ts +12 -0
  104. package/build/objectParser.js +31 -0
  105. package/build/objectParser.test-d.d.ts +1 -0
  106. package/build/objectParser.test-d.js +112 -0
  107. package/build/objectParser.test.d.ts +1 -0
  108. package/build/objectParser.test.js +55 -0
  109. package/build/optionalParser.js +69 -10
  110. package/build/parser.d.ts +4 -0
  111. package/build/parser.js +3 -1
  112. package/build/parser.test.js +114 -1
  113. package/build/parserConsumedSequenceParser.js +66 -7
  114. package/build/parserContext.d.ts +6 -0
  115. package/build/parserContext.js +20 -11
  116. package/build/parserError.d.ts +119 -27
  117. package/build/parserError.js +16 -8
  118. package/build/regexpParser.d.ts +2 -0
  119. package/build/regexpParser.js +101 -0
  120. package/build/regexpParser.test.d.ts +1 -0
  121. package/build/regexpParser.test.js +114 -0
  122. package/build/regularExpression.d.ts +63 -0
  123. package/build/regularExpression.js +1 -0
  124. package/build/regularExpressionParser.d.ts +3 -0
  125. package/build/regularExpressionParser.js +600 -0
  126. package/build/regularExpressionParser.test.d.ts +1 -0
  127. package/build/regularExpressionParser.test.js +89 -0
  128. package/build/separatedArrayParser.js +73 -14
  129. package/build/separatedNonEmptyArrayParser.js +73 -14
  130. package/build/sliceBoundedParser.js +62 -5
  131. package/build/smaliParser.d.ts +7 -7
  132. package/build/smaliParser.js +185 -268
  133. package/build/smaliParser.test.js +58 -0
  134. package/build/stringEscapes.d.ts +5 -0
  135. package/build/stringEscapes.js +244 -0
  136. package/build/symbolicExpression.d.ts +29 -0
  137. package/build/symbolicExpression.js +1 -0
  138. package/build/symbolicExpressionParser.d.ts +4 -0
  139. package/build/symbolicExpressionParser.js +123 -0
  140. package/build/symbolicExpressionParser.test.d.ts +1 -0
  141. package/build/symbolicExpressionParser.test.js +289 -0
  142. package/build/terminatedArrayParser.js +113 -38
  143. package/build/terminatedArrayParser.test.js +4 -2
  144. package/build/tupleParser.d.ts +7 -15
  145. package/build/tupleParser.js +1 -0
  146. package/build/unionParser.d.ts +5 -3
  147. package/build/unionParser.js +7 -2
  148. package/build/unionParser.test-d.d.ts +1 -0
  149. package/build/unionParser.test-d.js +72 -0
  150. package/build/unionParser.test.js +10 -11
  151. package/build/zig.d.ts +280 -0
  152. package/build/zig.js +2 -0
  153. package/build/zigParser.d.ts +3 -0
  154. package/build/zigParser.js +1119 -0
  155. package/build/zigParser.test.d.ts +1 -0
  156. package/build/zigParser.test.js +1590 -0
  157. package/build/zigUnparser.d.ts +2 -0
  158. package/build/zigUnparser.js +460 -0
  159. package/build/zigUnparser.test.d.ts +1 -0
  160. package/build/zigUnparser.test.js +24 -0
  161. package/build/zipParser.js +19 -32
  162. package/build/zipUnparser.js +19 -7
  163. package/build/zipUnparser.test.js +1 -1
  164. package/node_modules-@types/s-expression/index.d.ts +5 -0
  165. package/package.json +25 -6
  166. package/src/androidPackageParser.ts +33 -60
  167. package/src/arbitraryDalvikBytecode.ts +39 -31
  168. package/src/arbitraryDalvikExecutable.ts +65 -20
  169. package/src/arbitraryJava.ts +804 -0
  170. package/src/arbitraryJavaScript.ts +410 -0
  171. package/src/arbitraryZig.ts +380 -0
  172. package/src/arrayParser.ts +1 -3
  173. package/src/backsmali.ts +35 -4
  174. package/src/bash.ts +127 -0
  175. package/src/bashParser.test.ts +590 -0
  176. package/src/bashParser.ts +498 -0
  177. package/src/dalvikBytecodeParser/addressConversion.ts +496 -0
  178. package/src/dalvikBytecodeParser/formatParsers.ts +19 -29
  179. package/src/dalvikBytecodeParser.test-d.ts +310 -0
  180. package/src/dalvikBytecodeParser.ts +194 -69
  181. package/src/dalvikBytecodeUnparser/formatUnparsers.ts +27 -26
  182. package/src/dalvikBytecodeUnparser.test.ts +7 -7
  183. package/src/dalvikBytecodeUnparser.ts +31 -30
  184. package/src/dalvikExecutable.test-d.ts +132 -0
  185. package/src/dalvikExecutable.ts +3 -3
  186. package/src/dalvikExecutableParser/typedNumbers.ts +11 -0
  187. package/src/dalvikExecutableParser.test.ts +37 -3
  188. package/src/dalvikExecutableParser.test.ts.md +163 -2
  189. package/src/dalvikExecutableParser.test.ts.snap +0 -0
  190. package/src/dalvikExecutableParser.ts +121 -139
  191. package/src/dalvikExecutableParserAgainstSmaliParser.test.ts +4 -0
  192. package/src/dalvikExecutableUnparser/poolScanners.ts +6 -6
  193. package/src/dalvikExecutableUnparser/sectionUnparsers.ts +38 -14
  194. package/src/dalvikExecutableUnparser.test.ts +3 -2
  195. package/src/dalvikExecutableUnparser.ts +4 -4
  196. package/src/disjunctionParser.test-d.ts +105 -0
  197. package/src/disjunctionParser.ts +18 -15
  198. package/src/elementSwitchParser.test-d.ts +74 -0
  199. package/src/elementSwitchParser.ts +51 -0
  200. package/src/exactSequenceParser.test-d.ts +43 -0
  201. package/src/exactSequenceParser.ts +13 -8
  202. package/src/fetchCid.ts +2 -76
  203. package/src/index.test.ts +22 -1
  204. package/src/index.ts +119 -2
  205. package/src/inputReader.ts +53 -0
  206. package/src/java.ts +708 -0
  207. package/src/javaKeyStoreParser.ts +18 -32
  208. package/src/javaParser.test.ts +1592 -0
  209. package/src/javaParser.ts +2640 -0
  210. package/src/javaScript.ts +36 -0
  211. package/src/javaScriptParser.ts +57 -0
  212. package/src/javaScriptUnparser.test.ts +37 -0
  213. package/src/javaScriptUnparser.ts +7 -0
  214. package/src/javaUnparser.test.ts +37 -0
  215. package/src/javaUnparser.ts +640 -0
  216. package/src/jsonParser.ts +6 -27
  217. package/src/lookaheadParser.ts +2 -6
  218. package/src/negativeLookaheadParser.ts +1 -3
  219. package/src/nonEmptyArrayParser.ts +1 -3
  220. package/src/objectParser.test-d.ts +152 -0
  221. package/src/objectParser.test.ts +71 -0
  222. package/src/objectParser.ts +69 -0
  223. package/src/optionalParser.ts +1 -3
  224. package/src/parser.test.ts +151 -4
  225. package/src/parser.ts +11 -1
  226. package/src/parserConsumedSequenceParser.ts +2 -4
  227. package/src/parserContext.ts +26 -11
  228. package/src/parserError.ts +17 -3
  229. package/src/regexpParser.test.ts +264 -0
  230. package/src/regexpParser.ts +126 -0
  231. package/src/regularExpression.ts +24 -0
  232. package/src/regularExpressionParser.test.ts +102 -0
  233. package/src/regularExpressionParser.ts +920 -0
  234. package/src/separatedArrayParser.ts +1 -3
  235. package/src/separatedNonEmptyArrayParser.ts +1 -3
  236. package/src/sliceBoundedParser.test.ts +2 -2
  237. package/src/sliceBoundedParser.ts +15 -19
  238. package/src/smaliParser.test.ts +64 -0
  239. package/src/smaliParser.test.ts.md +12 -12
  240. package/src/smaliParser.test.ts.snap +0 -0
  241. package/src/smaliParser.ts +246 -534
  242. package/src/stringEscapes.ts +253 -0
  243. package/src/symbolicExpression.ts +17 -0
  244. package/src/symbolicExpressionParser.test.ts +466 -0
  245. package/src/symbolicExpressionParser.ts +190 -0
  246. package/src/terminatedArrayParser.test.ts +9 -6
  247. package/src/terminatedArrayParser.ts +25 -29
  248. package/src/tupleParser.ts +21 -18
  249. package/src/unionParser.test-d.ts +105 -0
  250. package/src/unionParser.test.ts +18 -17
  251. package/src/unionParser.ts +28 -16
  252. package/src/zig.ts +411 -0
  253. package/src/zigParser.test.ts +1693 -0
  254. package/src/zigParser.ts +1745 -0
  255. package/src/zigUnparser.test.ts +37 -0
  256. package/src/zigUnparser.ts +615 -0
  257. package/src/zipParser.ts +20 -56
  258. package/src/zipUnparser.test.ts +1 -1
  259. package/src/zipUnparser.ts +22 -7
  260. package/tsconfig.json +2 -2
  261. package/build/exactElementSwitchParser.d.ts +0 -3
  262. package/src/exactElementSwitchParser.ts +0 -41
@@ -0,0 +1,920 @@
1
+ import { type Parser } from './parser.js';
2
+ import { createUnionParser } from './unionParser.js';
3
+ import { createExactSequenceParser } from './exactSequenceParser.js';
4
+ import { promiseCompose } from './promiseCompose.js';
5
+ import { createTupleParser } from './tupleParser.js';
6
+ import { createArrayParser } from './arrayParser.js';
7
+ import { createParserAccessorParser } from './parserAccessorParser.js';
8
+ import { createElementParser } from './elementParser.js';
9
+ import { parserCreatorCompose } from './parserCreatorCompose.js';
10
+ import { createOptionalParser } from './optionalParser.js';
11
+ import { createFixedLengthSequenceParser } from './fixedLengthSequenceParser.js';
12
+ import { createTerminatedArrayParser } from './terminatedArrayParser.js';
13
+ import { createDisjunctionParser } from './disjunctionParser.js';
14
+ import { createNegativeLookaheadParser } from './negativeLookaheadParser.js';
15
+ import { createObjectParser } from './objectParser.js';
16
+ import {
17
+ type CharacterSet,
18
+ type CodePointRange,
19
+ type RegularExpression,
20
+ type RepeatBounds,
21
+ } from './regularExpression.js';
22
+
23
+ // CharacterSet helpers
24
+
25
+ const emptyCharacterSet: CharacterSet = { type: 'empty' };
26
+
27
+ function codePointRangeIsEmpty(range: CodePointRange): boolean {
28
+ return range.start > range.end;
29
+ }
30
+
31
+ function codePointRangeIsStrictlyBefore(rangeA: CodePointRange, rangeB: CodePointRange): boolean {
32
+ return rangeA.end + 1 < rangeB.start;
33
+ }
34
+
35
+ function codePointRangeIsStrictlyAfter(rangeA: CodePointRange, rangeB: CodePointRange): boolean {
36
+ return codePointRangeIsStrictlyBefore(rangeB, rangeA);
37
+ }
38
+
39
+ function codePointRangeLeastUpperBound(rangeA: CodePointRange, rangeB: CodePointRange): CodePointRange {
40
+ if (codePointRangeIsEmpty(rangeA)) return rangeB;
41
+ if (codePointRangeIsEmpty(rangeB)) return rangeA;
42
+ return {
43
+ start: Math.min(rangeA.start, rangeB.start),
44
+ end: Math.max(rangeA.end, rangeB.end),
45
+ };
46
+ }
47
+
48
+ function codePointRangeStrictlyDisjoint(rangeA: CodePointRange, rangeB: CodePointRange): boolean {
49
+ return codePointRangeIsStrictlyBefore(rangeA, rangeB) || codePointRangeIsStrictlyAfter(rangeA, rangeB);
50
+ }
51
+
52
+ function characterSetNode(range: CodePointRange, left: CharacterSet, right: CharacterSet): CharacterSet {
53
+ return { type: 'node', range, left, right };
54
+ }
55
+
56
+ function* characterSetGetRanges(set: CharacterSet): Generator<CodePointRange> {
57
+ if (set.type === 'node') {
58
+ yield* characterSetGetRanges(set.left);
59
+ yield set.range;
60
+ yield* characterSetGetRanges(set.right);
61
+ }
62
+ }
63
+
64
+ function characterSetExtractOverlap(set: CharacterSet, range: CodePointRange): { restCharSet: CharacterSet; extendedRange: CodePointRange } {
65
+ if (set.type === 'empty') {
66
+ return { restCharSet: set, extendedRange: range };
67
+ }
68
+
69
+ let extendedRange = range;
70
+ let newLeft = set.left;
71
+ let newRight = set.right;
72
+
73
+ if (range.start < set.range.start) {
74
+ const resultLeft = characterSetExtractOverlap(set.left, range);
75
+ extendedRange = codePointRangeLeastUpperBound(extendedRange, resultLeft.extendedRange);
76
+ newLeft = resultLeft.restCharSet;
77
+ }
78
+
79
+ if (range.end > set.range.end) {
80
+ const resultRight = characterSetExtractOverlap(set.right, range);
81
+ extendedRange = codePointRangeLeastUpperBound(extendedRange, resultRight.extendedRange);
82
+ newRight = resultRight.restCharSet;
83
+ }
84
+
85
+ if (codePointRangeStrictlyDisjoint(range, set.range)) {
86
+ return {
87
+ extendedRange,
88
+ restCharSet: characterSetNode(set.range, newLeft, newRight),
89
+ };
90
+ }
91
+
92
+ return {
93
+ extendedRange: codePointRangeLeastUpperBound(set.range, extendedRange),
94
+ restCharSet: characterSetUnion(newLeft, newRight),
95
+ };
96
+ }
97
+
98
+ function characterSetInsertRange(set: CharacterSet, range: CodePointRange): CharacterSet {
99
+ if (codePointRangeIsEmpty(range)) {
100
+ return set;
101
+ }
102
+
103
+ if (set.type === 'empty') {
104
+ return characterSetNode(range, emptyCharacterSet, emptyCharacterSet);
105
+ }
106
+
107
+ if (codePointRangeIsStrictlyBefore(range, set.range)) {
108
+ return characterSetNode(set.range, characterSetInsertRange(set.left, range), set.right);
109
+ }
110
+
111
+ if (codePointRangeIsStrictlyAfter(range, set.range)) {
112
+ return characterSetNode(set.range, set.left, characterSetInsertRange(set.right, range));
113
+ }
114
+
115
+ const resultLeft = characterSetExtractOverlap(set.left, range);
116
+ const resultRight = characterSetExtractOverlap(set.right, range);
117
+ const resultRange = [set.range, resultLeft.extendedRange, resultRight.extendedRange].reduce(codePointRangeLeastUpperBound);
118
+
119
+ if (codePointRangeIsEmpty(resultRange)) {
120
+ return emptyCharacterSet;
121
+ }
122
+
123
+ return characterSetNode(resultRange, resultLeft.restCharSet, resultRight.restCharSet);
124
+ }
125
+
126
+ function characterSetUnion(setA: CharacterSet, setB: CharacterSet): CharacterSet {
127
+ return [...characterSetGetRanges(setB)].reduce(characterSetInsertRange, setA);
128
+ }
129
+
130
+ function codePointRangeSplitAt(point: number, range: CodePointRange): [CodePointRange, CodePointRange] {
131
+ return [
132
+ { start: range.start, end: Math.min(range.end, point) },
133
+ { start: Math.max(range.start, point + 1), end: range.end },
134
+ ];
135
+ }
136
+
137
+ function codePointRangeUnion(rangeA: CodePointRange, rangeB: CodePointRange): CodePointRange[] {
138
+ if (codePointRangeIsEmpty(rangeA) && codePointRangeIsEmpty(rangeB)) return [];
139
+ if (codePointRangeIsEmpty(rangeA)) return [rangeB];
140
+ if (codePointRangeIsEmpty(rangeB)) return [rangeA];
141
+ if (rangeA.end + 1 < rangeB.start) return [rangeA, rangeB];
142
+ if (rangeB.end + 1 < rangeA.start) return [rangeB, rangeA];
143
+ return [{
144
+ start: Math.min(rangeA.start, rangeB.start),
145
+ end: Math.max(rangeA.end, rangeB.end),
146
+ }];
147
+ }
148
+
149
+ function codePointRangeDifference(rangeA: CodePointRange, rangeB: CodePointRange): CodePointRange[] {
150
+ const [before, restRangeA] = codePointRangeSplitAt(rangeB.start - 1, rangeA);
151
+ const [, after] = codePointRangeSplitAt(rangeB.end, restRangeA);
152
+ return codePointRangeUnion(before, after);
153
+ }
154
+
155
+ function characterSetDeleteRange(set: CharacterSet, range: CodePointRange): CharacterSet {
156
+ if (codePointRangeIsEmpty(range)) {
157
+ return set;
158
+ }
159
+
160
+ if (set.type === 'empty') {
161
+ return emptyCharacterSet;
162
+ }
163
+
164
+ const [rangeBeforeStart] = codePointRangeSplitAt(set.range.start - 1, range);
165
+ const [rangeRest2, rangeAfterEnd] = codePointRangeSplitAt(set.range.end, range);
166
+ const newLeft = characterSetDeleteRange(set.left, rangeBeforeStart);
167
+ const newRight = characterSetDeleteRange(set.right, rangeAfterEnd);
168
+ const setRangeRest = codePointRangeDifference(set.range, rangeRest2);
169
+
170
+ if (setRangeRest.length === 0) {
171
+ return characterSetUnion(newLeft, newRight);
172
+ }
173
+
174
+ if (setRangeRest.length === 1) {
175
+ return characterSetNode(setRangeRest[0]!, newLeft, newRight);
176
+ }
177
+
178
+ // setRangeRest.length === 2
179
+ return characterSetUnion(
180
+ characterSetInsertRange(newLeft, setRangeRest[0]!),
181
+ characterSetInsertRange(newRight, setRangeRest[1]!),
182
+ );
183
+ }
184
+
185
+ function characterSetDifference(setA: CharacterSet, setB: CharacterSet): CharacterSet {
186
+ return [...characterSetGetRanges(setB)].reduce(characterSetDeleteRange, setA);
187
+ }
188
+
189
+ function characterSetFromRange(range: CodePointRange): CharacterSet {
190
+ if (codePointRangeIsEmpty(range)) {
191
+ return emptyCharacterSet;
192
+ }
193
+ return characterSetNode(range, emptyCharacterSet, emptyCharacterSet);
194
+ }
195
+
196
+ function characterSetSingleton(char: string): CharacterSet {
197
+ const codePoint = char.codePointAt(0)!;
198
+ return characterSetFromRange({ start: codePoint, end: codePoint });
199
+ }
200
+
201
+ function characterSetCharRange(startChar: string, endChar: string): CharacterSet {
202
+ const start = startChar.codePointAt(0)!;
203
+ const end = endChar.codePointAt(0)!;
204
+ return characterSetFromRange({ start, end });
205
+ }
206
+
207
+ function characterSetFromArray(chars: string[]): CharacterSet {
208
+ return chars.map(characterSetSingleton).reduce(characterSetUnion, emptyCharacterSet);
209
+ }
210
+
211
+ function characterSetComplement(set: CharacterSet): CharacterSet {
212
+ return characterSetDifference(alphabet, set);
213
+ }
214
+
215
+ // Pre-defined character sets
216
+ const alphabet: CharacterSet = characterSetDifference(
217
+ characterSetFromRange({ start: 0, end: 0x10FFFF }),
218
+ characterSetFromArray(['\r', '\n', '\u2028', '\u2029']),
219
+ );
220
+
221
+ const wildcardCharacterSet: CharacterSet = characterSetDifference(
222
+ alphabet,
223
+ characterSetFromArray(['\r', '\n', '\u2028', '\u2029']),
224
+ );
225
+
226
+ const digitChars: CharacterSet = characterSetCharRange('0', '9');
227
+ const nonDigitChars: CharacterSet = characterSetComplement(digitChars);
228
+
229
+ const wordChars: CharacterSet = [
230
+ characterSetCharRange('a', 'z'),
231
+ characterSetCharRange('A', 'Z'),
232
+ characterSetCharRange('0', '9'),
233
+ characterSetSingleton('_'),
234
+ ].reduce(characterSetUnion);
235
+ const nonWordChars: CharacterSet = characterSetComplement(wordChars);
236
+
237
+ const whiteSpaceChars: CharacterSet = [
238
+ characterSetSingleton('\f'),
239
+ characterSetSingleton('\n'),
240
+ characterSetSingleton('\r'),
241
+ characterSetSingleton('\t'),
242
+ characterSetSingleton('\v'),
243
+ characterSetSingleton('\u0020'),
244
+ characterSetSingleton('\u00a0'),
245
+ characterSetSingleton('\u1680'),
246
+ characterSetCharRange('\u2000', '\u200a'),
247
+ characterSetSingleton('\u2028'),
248
+ characterSetSingleton('\u2029'),
249
+ characterSetSingleton('\u202f'),
250
+ characterSetSingleton('\u205f'),
251
+ characterSetSingleton('\u3000'),
252
+ characterSetSingleton('\ufeff'),
253
+ ].reduce(characterSetUnion);
254
+ const nonWhiteSpaceChars: CharacterSet = characterSetComplement(whiteSpaceChars);
255
+
256
+ // AST constructors
257
+
258
+ const epsilon: RegularExpression = { type: 'epsilon' };
259
+
260
+ function literal(charset: CharacterSet): RegularExpression {
261
+ return { type: 'literal', charset };
262
+ }
263
+
264
+ function concat(left: RegularExpression, right: RegularExpression): RegularExpression {
265
+ return { type: 'concat', left, right };
266
+ }
267
+
268
+ function union(left: RegularExpression, right: RegularExpression): RegularExpression {
269
+ return { type: 'union', left, right };
270
+ }
271
+
272
+ function star(inner: RegularExpression): RegularExpression {
273
+ return { type: 'star', inner };
274
+ }
275
+
276
+ function plus(inner: RegularExpression): RegularExpression {
277
+ return { type: 'plus', inner };
278
+ }
279
+
280
+ function optional(inner: RegularExpression): RegularExpression {
281
+ return { type: 'optional', inner };
282
+ }
283
+
284
+ function repeat(inner: RegularExpression, bounds: RepeatBounds): RegularExpression {
285
+ return { type: 'repeat', inner, bounds };
286
+ }
287
+
288
+ function captureGroup(inner: RegularExpression, name?: string): RegularExpression {
289
+ if (name === undefined) {
290
+ return { type: 'capture-group', inner };
291
+ }
292
+ return { type: 'capture-group', inner, name };
293
+ }
294
+
295
+ function lookahead(isPositive: boolean, inner: RegularExpression, right: RegularExpression): RegularExpression {
296
+ return { type: 'lookahead', isPositive, inner, right };
297
+ }
298
+
299
+ function startAnchor(left: RegularExpression, right: RegularExpression): RegularExpression {
300
+ return { type: 'start-anchor', left, right };
301
+ }
302
+
303
+ function endAnchor(left: RegularExpression, right: RegularExpression): RegularExpression {
304
+ return { type: 'end-anchor', left, right };
305
+ }
306
+
307
+ // Parser implementation
308
+
309
+ const elementParser: Parser<string, string> = createElementParser();
310
+
311
+ const metaCharacters = new Set(['\\', '^', '$', '.', '|', '?', '*', '+', '(', ')', '[', ']', '{', '}']);
312
+
313
+ // Escape sequences for control characters
314
+ const escapeNParser: Parser<RegularExpression, string> = promiseCompose(
315
+ createExactSequenceParser('\\n'),
316
+ () => literal(characterSetSingleton('\n')),
317
+ );
318
+
319
+ const escapeRParser: Parser<RegularExpression, string> = promiseCompose(
320
+ createExactSequenceParser('\\r'),
321
+ () => literal(characterSetSingleton('\r')),
322
+ );
323
+
324
+ const escapeTParser: Parser<RegularExpression, string> = promiseCompose(
325
+ createExactSequenceParser('\\t'),
326
+ () => literal(characterSetSingleton('\t')),
327
+ );
328
+
329
+ const escapeFParser: Parser<RegularExpression, string> = promiseCompose(
330
+ createExactSequenceParser('\\f'),
331
+ () => literal(characterSetSingleton('\f')),
332
+ );
333
+
334
+ const escapeVParser: Parser<RegularExpression, string> = promiseCompose(
335
+ createExactSequenceParser('\\v'),
336
+ () => literal(characterSetSingleton('\v')),
337
+ );
338
+
339
+ const escape0Parser: Parser<RegularExpression, string> = promiseCompose(
340
+ createExactSequenceParser('\\0'),
341
+ () => literal(characterSetSingleton('\0')),
342
+ );
343
+
344
+ // Character class escapes
345
+ const escapeDigitParser: Parser<RegularExpression, string> = promiseCompose(
346
+ createExactSequenceParser('\\d'),
347
+ () => literal(digitChars),
348
+ );
349
+
350
+ const escapeNonDigitParser: Parser<RegularExpression, string> = promiseCompose(
351
+ createExactSequenceParser('\\D'),
352
+ () => literal(nonDigitChars),
353
+ );
354
+
355
+ const escapeWordParser: Parser<RegularExpression, string> = promiseCompose(
356
+ createExactSequenceParser('\\w'),
357
+ () => literal(wordChars),
358
+ );
359
+
360
+ const escapeNonWordParser: Parser<RegularExpression, string> = promiseCompose(
361
+ createExactSequenceParser('\\W'),
362
+ () => literal(nonWordChars),
363
+ );
364
+
365
+ const escapeSpaceParser: Parser<RegularExpression, string> = promiseCompose(
366
+ createExactSequenceParser('\\s'),
367
+ () => literal(whiteSpaceChars),
368
+ );
369
+
370
+ const escapeNonSpaceParser: Parser<RegularExpression, string> = promiseCompose(
371
+ createExactSequenceParser('\\S'),
372
+ () => literal(nonWhiteSpaceChars),
373
+ );
374
+
375
+ // Hex escape \xHH
376
+ const escapeHexParser: Parser<RegularExpression, string> = promiseCompose(
377
+ createTupleParser([
378
+ createExactSequenceParser('\\x'),
379
+ createFixedLengthSequenceParser<string>(2),
380
+ ]),
381
+ ([, hexCode]) => literal(characterSetSingleton(String.fromCharCode(Number.parseInt(hexCode, 16)))),
382
+ );
383
+
384
+ // Unicode escape \uHHHH
385
+ const escapeUnicodeParser: Parser<RegularExpression, string> = promiseCompose(
386
+ createTupleParser([
387
+ createExactSequenceParser('\\u'),
388
+ createFixedLengthSequenceParser<string>(4),
389
+ ]),
390
+ ([, hexCode]) => literal(characterSetSingleton(String.fromCharCode(Number.parseInt(hexCode, 16)))),
391
+ );
392
+
393
+ // Escaped metacharacter (e.g., \., \*, etc.)
394
+ const escapeMetacharacterParser: Parser<RegularExpression, string> = promiseCompose(
395
+ createTupleParser([
396
+ createExactSequenceParser('\\'),
397
+ elementParser,
398
+ ]),
399
+ ([, char]) => literal(characterSetSingleton(char)),
400
+ );
401
+
402
+ // All escape sequences - use createDisjunctionParser to try specific escapes first
403
+ const escapeParser: Parser<RegularExpression, string> = createDisjunctionParser([
404
+ escapeNParser,
405
+ escapeRParser,
406
+ escapeTParser,
407
+ escapeFParser,
408
+ escapeVParser,
409
+ escape0Parser,
410
+ escapeDigitParser,
411
+ escapeNonDigitParser,
412
+ escapeWordParser,
413
+ escapeNonWordParser,
414
+ escapeSpaceParser,
415
+ escapeNonSpaceParser,
416
+ escapeHexParser,
417
+ escapeUnicodeParser,
418
+ escapeMetacharacterParser, // Must be last - matches any escaped char
419
+ ]);
420
+
421
+ // Dot (matches any character except newline)
422
+ const dotParser: Parser<RegularExpression, string> = promiseCompose(
423
+ createExactSequenceParser('.'),
424
+ () => literal(wildcardCharacterSet),
425
+ );
426
+
427
+ // Literal character (non-metacharacter)
428
+ const literalCharacterParser: Parser<RegularExpression, string> = parserCreatorCompose(
429
+ () => elementParser,
430
+ char => async parserContext => {
431
+ parserContext.invariant(!metaCharacters.has(char), 'Unexpected metacharacter "%s"', char);
432
+ return literal(characterSetSingleton(char));
433
+ },
434
+ )();
435
+
436
+ // Character class internals
437
+
438
+ // Character in a character class (different rules than outside)
439
+ const charClassMetaCharacters = new Set(['\\', ']', '^', '-']);
440
+
441
+ // Escape sequences inside character class (returns CharacterSet)
442
+ const charClassEscapeNParser: Parser<CharacterSet, string> = promiseCompose(
443
+ createExactSequenceParser('\\n'),
444
+ () => characterSetSingleton('\n'),
445
+ );
446
+
447
+ const charClassEscapeRParser: Parser<CharacterSet, string> = promiseCompose(
448
+ createExactSequenceParser('\\r'),
449
+ () => characterSetSingleton('\r'),
450
+ );
451
+
452
+ const charClassEscapeTParser: Parser<CharacterSet, string> = promiseCompose(
453
+ createExactSequenceParser('\\t'),
454
+ () => characterSetSingleton('\t'),
455
+ );
456
+
457
+ const charClassEscapeFParser: Parser<CharacterSet, string> = promiseCompose(
458
+ createExactSequenceParser('\\f'),
459
+ () => characterSetSingleton('\f'),
460
+ );
461
+
462
+ const charClassEscapeVParser: Parser<CharacterSet, string> = promiseCompose(
463
+ createExactSequenceParser('\\v'),
464
+ () => characterSetSingleton('\v'),
465
+ );
466
+
467
+ const charClassEscape0Parser: Parser<CharacterSet, string> = promiseCompose(
468
+ createExactSequenceParser('\\0'),
469
+ () => characterSetSingleton('\0'),
470
+ );
471
+
472
+ const charClassEscapeDigitParser: Parser<CharacterSet, string> = promiseCompose(
473
+ createExactSequenceParser('\\d'),
474
+ () => digitChars,
475
+ );
476
+
477
+ const charClassEscapeNonDigitParser: Parser<CharacterSet, string> = promiseCompose(
478
+ createExactSequenceParser('\\D'),
479
+ () => nonDigitChars,
480
+ );
481
+
482
+ const charClassEscapeWordParser: Parser<CharacterSet, string> = promiseCompose(
483
+ createExactSequenceParser('\\w'),
484
+ () => wordChars,
485
+ );
486
+
487
+ const charClassEscapeNonWordParser: Parser<CharacterSet, string> = promiseCompose(
488
+ createExactSequenceParser('\\W'),
489
+ () => nonWordChars,
490
+ );
491
+
492
+ const charClassEscapeSpaceParser: Parser<CharacterSet, string> = promiseCompose(
493
+ createExactSequenceParser('\\s'),
494
+ () => whiteSpaceChars,
495
+ );
496
+
497
+ const charClassEscapeNonSpaceParser: Parser<CharacterSet, string> = promiseCompose(
498
+ createExactSequenceParser('\\S'),
499
+ () => nonWhiteSpaceChars,
500
+ );
501
+
502
+ const charClassEscapeHexParser: Parser<CharacterSet, string> = promiseCompose(
503
+ createTupleParser([
504
+ createExactSequenceParser('\\x'),
505
+ createFixedLengthSequenceParser<string>(2),
506
+ ]),
507
+ ([, hexCode]) => characterSetSingleton(String.fromCharCode(Number.parseInt(hexCode, 16))),
508
+ );
509
+
510
+ const charClassEscapeUnicodeParser: Parser<CharacterSet, string> = promiseCompose(
511
+ createTupleParser([
512
+ createExactSequenceParser('\\u'),
513
+ createFixedLengthSequenceParser<string>(4),
514
+ ]),
515
+ ([, hexCode]) => characterSetSingleton(String.fromCharCode(Number.parseInt(hexCode, 16))),
516
+ );
517
+
518
+ const charClassEscapeMetacharacterParser: Parser<CharacterSet, string> = promiseCompose(
519
+ createTupleParser([
520
+ createExactSequenceParser('\\'),
521
+ elementParser,
522
+ ]),
523
+ ([, char]) => characterSetSingleton(char),
524
+ );
525
+
526
+ // Use createDisjunctionParser to try specific escapes before generic metacharacter escape
527
+ const charClassEscapeParser: Parser<CharacterSet, string> = createDisjunctionParser([
528
+ charClassEscapeNParser,
529
+ charClassEscapeRParser,
530
+ charClassEscapeTParser,
531
+ charClassEscapeFParser,
532
+ charClassEscapeVParser,
533
+ charClassEscape0Parser,
534
+ charClassEscapeDigitParser,
535
+ charClassEscapeNonDigitParser,
536
+ charClassEscapeWordParser,
537
+ charClassEscapeNonWordParser,
538
+ charClassEscapeSpaceParser,
539
+ charClassEscapeNonSpaceParser,
540
+ charClassEscapeHexParser,
541
+ charClassEscapeUnicodeParser,
542
+ charClassEscapeMetacharacterParser, // Must be last - matches any escaped char
543
+ ]);
544
+
545
+ // Single character (not escape, not ], not -)
546
+ const charClassLiteralParser: Parser<CharacterSet, string> = parserCreatorCompose(
547
+ () => elementParser,
548
+ char => async parserContext => {
549
+ parserContext.invariant(!charClassMetaCharacters.has(char), 'Unexpected character class metacharacter "%s"', char);
550
+ return characterSetSingleton(char);
551
+ },
552
+ )();
553
+
554
+ // Single char in character class (escape or literal) - returns the character string for range checking
555
+ const charClassSingleCharParser: Parser<string, string> = createUnionParser([
556
+ // Escape sequences that produce single chars
557
+ promiseCompose(createExactSequenceParser('\\n'), () => '\n'),
558
+ promiseCompose(createExactSequenceParser('\\r'), () => '\r'),
559
+ promiseCompose(createExactSequenceParser('\\t'), () => '\t'),
560
+ promiseCompose(createExactSequenceParser('\\f'), () => '\f'),
561
+ promiseCompose(createExactSequenceParser('\\v'), () => '\v'),
562
+ promiseCompose(createExactSequenceParser('\\0'), () => '\0'),
563
+ promiseCompose(
564
+ createTupleParser([
565
+ createExactSequenceParser('\\x'),
566
+ createFixedLengthSequenceParser<string>(2),
567
+ ]),
568
+ ([, hexCode]) => String.fromCharCode(Number.parseInt(hexCode, 16)),
569
+ ),
570
+ promiseCompose(
571
+ createTupleParser([
572
+ createExactSequenceParser('\\u'),
573
+ createFixedLengthSequenceParser<string>(4),
574
+ ]),
575
+ ([, hexCode]) => String.fromCharCode(Number.parseInt(hexCode, 16)),
576
+ ),
577
+ promiseCompose(
578
+ createTupleParser([
579
+ createExactSequenceParser('\\'),
580
+ elementParser,
581
+ ]),
582
+ ([, char]) => char,
583
+ ),
584
+ // Literal char (not metacharacter, not -)
585
+ parserCreatorCompose(
586
+ () => elementParser,
587
+ char => async parserContext => {
588
+ parserContext.invariant(
589
+ !charClassMetaCharacters.has(char) && char !== '-',
590
+ 'Unexpected character "%s"',
591
+ char,
592
+ );
593
+ return char;
594
+ },
595
+ )(),
596
+ ]);
597
+
598
+ // Character range (a-z)
599
+ const charClassRangeParser: Parser<CharacterSet, string> = promiseCompose(
600
+ createTupleParser([
601
+ charClassSingleCharParser,
602
+ createExactSequenceParser('-'),
603
+ charClassSingleCharParser,
604
+ ]),
605
+ ([startChar, , endChar]) => characterSetCharRange(startChar, endChar),
606
+ );
607
+
608
+ // Character class element: range, escape (for \d, \w, etc.), or single char
609
+ const charClassElementParser: Parser<CharacterSet, string> = createDisjunctionParser([
610
+ charClassRangeParser,
611
+ charClassEscapeParser,
612
+ charClassLiteralParser,
613
+ // Literal hyphen at end or after negation
614
+ promiseCompose(
615
+ createTupleParser([
616
+ createExactSequenceParser('-'),
617
+ createNegativeLookaheadParser(createExactSequenceParser(']')),
618
+ ]),
619
+ () => characterSetSingleton('-'),
620
+ ),
621
+ ]);
622
+
623
+ // Character class [...]
624
+ const characterClassParser: Parser<RegularExpression, string> = promiseCompose(
625
+ createTupleParser([
626
+ createExactSequenceParser('['),
627
+ createOptionalParser(createExactSequenceParser('^')),
628
+ createTerminatedArrayParser(
629
+ charClassElementParser,
630
+ createExactSequenceParser(']'),
631
+ ),
632
+ ]),
633
+ ([, negation, [elements]]) => {
634
+ let charset = elements.reduce(
635
+ (acc, el) => characterSetUnion(acc, el),
636
+ emptyCharacterSet,
637
+ );
638
+ if (negation !== undefined) {
639
+ charset = characterSetComplement(charset);
640
+ }
641
+ return literal(charset);
642
+ },
643
+ );
644
+
645
+ // Quantifiers
646
+ type Quantifier =
647
+ | { type: 'star' }
648
+ | { type: 'plus' }
649
+ | { type: 'optional' }
650
+ | { type: 'repeat'; bounds: RepeatBounds };
651
+
652
+ const starQuantifierParser: Parser<Quantifier, string> = createObjectParser({
653
+ type: 'star' as const,
654
+ _marker: createExactSequenceParser('*'),
655
+ });
656
+
657
+ const plusQuantifierParser: Parser<Quantifier, string> = createObjectParser({
658
+ type: 'plus' as const,
659
+ _marker: createExactSequenceParser('+'),
660
+ });
661
+
662
+ const optionalQuantifierParser: Parser<Quantifier, string> = createObjectParser({
663
+ type: 'optional' as const,
664
+ _marker: createExactSequenceParser('?'),
665
+ });
666
+
667
+ // Parse a number for quantifiers
668
+ const numberParser: Parser<number, string> = parserCreatorCompose(
669
+ () => createArrayParser(parserCreatorCompose(
670
+ () => elementParser,
671
+ char => async parserContext => {
672
+ parserContext.invariant(char >= '0' && char <= '9', 'Expected digit, got "%s"', char);
673
+ return char;
674
+ },
675
+ )()),
676
+ digits => async parserContext => {
677
+ parserContext.invariant(digits.length > 0, 'Expected at least one digit');
678
+ return Number.parseInt(digits.join(''), 10);
679
+ },
680
+ )();
681
+
682
+ // {n}, {n,}, {n,m}
683
+ const braceQuantifierParser: Parser<Quantifier, string> = promiseCompose(
684
+ createTupleParser([
685
+ createExactSequenceParser('{'),
686
+ numberParser,
687
+ createOptionalParser(
688
+ createTupleParser([
689
+ createExactSequenceParser(','),
690
+ createOptionalParser(numberParser),
691
+ ]),
692
+ ),
693
+ createExactSequenceParser('}'),
694
+ ]),
695
+ ([, min, comma]): Quantifier => {
696
+ if (comma === undefined) {
697
+ // {n} - exactly n
698
+ return { type: 'repeat', bounds: min };
699
+ }
700
+ const [, max] = comma;
701
+ if (max === undefined) {
702
+ // {n,} - at least n
703
+ return { type: 'repeat', bounds: { min } };
704
+ }
705
+ // {n,m} - between n and m
706
+ return { type: 'repeat', bounds: { min, max } };
707
+ },
708
+ );
709
+
710
+ const quantifierParser: Parser<Quantifier, string> = createUnionParser([
711
+ starQuantifierParser,
712
+ plusQuantifierParser,
713
+ optionalQuantifierParser,
714
+ braceQuantifierParser,
715
+ ]);
716
+
717
+ // Groups
718
+ // Capture group (...)
719
+ const captureGroupParser: Parser<RegularExpression, string> = promiseCompose(
720
+ createTupleParser([
721
+ createExactSequenceParser('('),
722
+ createNegativeLookaheadParser(createExactSequenceParser('?')),
723
+ createParserAccessorParser(() => alternationParser),
724
+ createExactSequenceParser(')'),
725
+ ]),
726
+ ([, , inner]) => captureGroup(inner),
727
+ );
728
+
729
+ // Named capture group (?<name>...)
730
+ const namedCaptureGroupParser: Parser<RegularExpression, string> = promiseCompose(
731
+ createTupleParser([
732
+ createExactSequenceParser('(?<'),
733
+ createTerminatedArrayParser(
734
+ parserCreatorCompose(
735
+ () => elementParser,
736
+ char => async parserContext => {
737
+ parserContext.invariant(char !== '>', 'Unexpected ">"');
738
+ return char;
739
+ },
740
+ )(),
741
+ createExactSequenceParser('>'),
742
+ ),
743
+ createParserAccessorParser(() => alternationParser),
744
+ createExactSequenceParser(')'),
745
+ ]),
746
+ ([, [nameChars], inner]) => captureGroup(inner, nameChars.join('')),
747
+ );
748
+
749
+ // Non-capture group (?:...)
750
+ const nonCaptureGroupParser: Parser<RegularExpression, string> = promiseCompose(
751
+ createTupleParser([
752
+ createExactSequenceParser('(?:'),
753
+ createParserAccessorParser(() => alternationParser),
754
+ createExactSequenceParser(')'),
755
+ ]),
756
+ ([, inner]) => inner,
757
+ );
758
+
759
+ // Lookahead markers for internal use during parsing
760
+ type LookaheadMarker = { type: 'lookahead-marker'; isPositive: boolean; inner: RegularExpression };
761
+
762
+ // Positive lookahead (?=...)
763
+ const positiveLookaheadMarkerParser: Parser<LookaheadMarker, string> = createObjectParser({
764
+ type: 'lookahead-marker' as const,
765
+ isPositive: true as const,
766
+ _open: createExactSequenceParser('(?='),
767
+ inner: createParserAccessorParser(() => alternationParser),
768
+ _close: createExactSequenceParser(')'),
769
+ });
770
+
771
+ // Negative lookahead (?!...)
772
+ const negativeLookaheadMarkerParser: Parser<LookaheadMarker, string> = createObjectParser({
773
+ type: 'lookahead-marker' as const,
774
+ isPositive: false as const,
775
+ _open: createExactSequenceParser('(?!'),
776
+ inner: createParserAccessorParser(() => alternationParser),
777
+ _close: createExactSequenceParser(')'),
778
+ });
779
+
780
+ const groupParser: Parser<RegularExpression, string> = createUnionParser([
781
+ namedCaptureGroupParser,
782
+ nonCaptureGroupParser,
783
+ captureGroupParser,
784
+ ]);
785
+
786
+ // Anchors
787
+ // Anchor markers for internal use during parsing
788
+ type AnchorMarker = { type: 'start-anchor-marker' } | { type: 'end-anchor-marker' };
789
+ type ParsedElement = RegularExpression | AnchorMarker | LookaheadMarker;
790
+
791
+ const startAnchorMarkerParser: Parser<AnchorMarker, string> = createObjectParser({
792
+ type: 'start-anchor-marker' as const,
793
+ _marker: createExactSequenceParser('^'),
794
+ });
795
+
796
+ const endAnchorMarkerParser: Parser<AnchorMarker, string> = createObjectParser({
797
+ type: 'end-anchor-marker' as const,
798
+ _marker: createExactSequenceParser('$'),
799
+ });
800
+
801
+ // Atom: the basic unit that can be quantified (excluding anchors)
802
+ const atomParser: Parser<RegularExpression, string> = createUnionParser([
803
+ groupParser,
804
+ characterClassParser,
805
+ escapeParser,
806
+ dotParser,
807
+ literalCharacterParser,
808
+ ]);
809
+
810
+ // Quantified atom
811
+ const quantifiedParser: Parser<RegularExpression, string> = promiseCompose(
812
+ createTupleParser([
813
+ atomParser,
814
+ createOptionalParser(quantifierParser),
815
+ ]),
816
+ ([atom, quantifier]) => {
817
+ if (quantifier === undefined) {
818
+ return atom;
819
+ }
820
+ switch (quantifier.type) {
821
+ case 'star':
822
+ return star(atom);
823
+ case 'plus':
824
+ return plus(atom);
825
+ case 'optional':
826
+ return optional(atom);
827
+ case 'repeat':
828
+ return repeat(atom, quantifier.bounds);
829
+ }
830
+ },
831
+ );
832
+
833
+ // Element in a sequence: either a quantified atom, anchor marker, or lookahead marker
834
+ const sequenceElementParser: Parser<ParsedElement, string> = createUnionParser([
835
+ startAnchorMarkerParser,
836
+ endAnchorMarkerParser,
837
+ positiveLookaheadMarkerParser,
838
+ negativeLookaheadMarkerParser,
839
+ quantifiedParser,
840
+ ]);
841
+
842
+ // Helper to concatenate a list of RegularExpressions (right-associative)
843
+ function concatList(parts: RegularExpression[]): RegularExpression {
844
+ if (parts.length === 0) {
845
+ return epsilon;
846
+ }
847
+ return parts.reduceRight((acc, part) => concat(part, acc));
848
+ }
849
+
850
+ // Process elements with anchor markers and lookahead markers into proper AST
851
+ // Handles anchors and lookahead as infix operators like @gruhn/regex-utils
852
+ // Precedence order (lowest to highest): union -> start-anchor -> end-anchor -> lookahead -> concat
853
+ function processElements(elements: ParsedElement[]): RegularExpression {
854
+ if (elements.length === 0) {
855
+ return epsilon;
856
+ }
857
+
858
+ // Process start anchors first (lowest precedence among infix operators)
859
+ const startAnchorIdx = elements.findIndex(e => 'type' in e && e.type === 'start-anchor-marker');
860
+ if (startAnchorIdx !== -1) {
861
+ const left = elements.slice(0, startAnchorIdx);
862
+ const right = elements.slice(startAnchorIdx + 1);
863
+ return startAnchor(processElements(left), processElements(right));
864
+ }
865
+
866
+ // Then end anchors
867
+ const endAnchorIdx = elements.findIndex(e => 'type' in e && e.type === 'end-anchor-marker');
868
+ if (endAnchorIdx !== -1) {
869
+ const left = elements.slice(0, endAnchorIdx);
870
+ const right = elements.slice(endAnchorIdx + 1);
871
+ return endAnchor(processElements(left), processElements(right));
872
+ }
873
+
874
+ // Then lookaheads (higher precedence than anchors)
875
+ const lookaheadIdx = elements.findIndex(e => 'type' in e && e.type === 'lookahead-marker');
876
+ if (lookaheadIdx !== -1) {
877
+ const marker = elements[lookaheadIdx] as LookaheadMarker;
878
+ const left = elements.slice(0, lookaheadIdx);
879
+ const right = elements.slice(lookaheadIdx + 1);
880
+ const lookaheadExpr = lookahead(marker.isPositive, marker.inner, processElements(right));
881
+ if (left.length === 0) {
882
+ return lookaheadExpr;
883
+ }
884
+ // If there's content before the lookahead, concatenate it
885
+ return concat(processElements(left), lookaheadExpr);
886
+ }
887
+
888
+ // No markers, just regular expressions - concatenate them
889
+ const regexParts = elements as RegularExpression[];
890
+ return concatList(regexParts);
891
+ }
892
+
893
+ // Concatenation: sequence of quantified atoms and anchors
894
+ const concatParser: Parser<RegularExpression, string> = promiseCompose(
895
+ createArrayParser(sequenceElementParser),
896
+ processElements,
897
+ );
898
+
899
+ // Alternation: concat ('|' concat)*
900
+ const alternationParser: Parser<RegularExpression, string> = promiseCompose(
901
+ createTupleParser([
902
+ concatParser,
903
+ createArrayParser(
904
+ promiseCompose(
905
+ createTupleParser([
906
+ createExactSequenceParser('|'),
907
+ concatParser,
908
+ ]),
909
+ ([, right]) => right,
910
+ ),
911
+ ),
912
+ ]),
913
+ ([first, rest]) => {
914
+ // Right-associative union like @gruhn/regex-utils
915
+ const allParts = [first, ...rest];
916
+ return allParts.reduceRight((acc, part) => union(part, acc));
917
+ },
918
+ );
919
+
920
+ export const regularExpressionParser: Parser<RegularExpression, string> = alternationParser;