@futpib/parser 1.0.2 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (311) hide show
  1. package/.github/copilot-instructions.md +149 -0
  2. package/.github/workflows/copilot-setup-steps.yml +18 -0
  3. package/.github/workflows/main.yml +29 -8
  4. package/.yarn/releases/yarn-4.9.4.cjs +942 -0
  5. package/.yarnrc.yml +1 -1
  6. package/build/allSettledStream.js +1 -1
  7. package/build/allSettledStream.test.js +2 -2
  8. package/build/androidPackageParser.d.ts +1 -1
  9. package/build/androidPackageParser.js +5 -3
  10. package/build/androidPackageParser.test.js +7 -7
  11. package/build/androidPackageUnparser.d.ts +2 -2
  12. package/build/androidPackageUnparser.js +18 -14
  13. package/build/androidPackageUnparser.test.js +7 -7
  14. package/build/arbitrarilySlicedAsyncInterator.js +2 -1
  15. package/build/arbitraryDalvikBytecode.d.ts +4 -0
  16. package/build/arbitraryDalvikBytecode.js +640 -0
  17. package/build/arbitraryDalvikExecutable.d.ts +3 -0
  18. package/build/arbitraryDalvikExecutable.js +282 -0
  19. package/build/arbitraryDosDateTime.js +1 -0
  20. package/build/arbitraryZipStream.js +1 -1
  21. package/build/arrayParser.js +2 -2
  22. package/build/arrayUnparser.d.ts +1 -1
  23. package/build/backsmali.d.ts +3 -1
  24. package/build/backsmali.js +31 -3
  25. package/build/bash.d.ts +84 -0
  26. package/build/bash.js +1 -0
  27. package/build/bashParser.d.ts +6 -0
  28. package/build/bashParser.js +294 -0
  29. package/build/bashParser.test.d.ts +1 -0
  30. package/build/bashParser.test.js +181 -0
  31. package/build/customInvariant.d.ts +2 -1
  32. package/build/customInvariant.js +4 -6
  33. package/build/dalvikBytecodeParser/formatParsers.d.ts +76 -2
  34. package/build/dalvikBytecodeParser/formatParsers.js +146 -11
  35. package/build/dalvikBytecodeParser/formatSizes.d.ts +34 -0
  36. package/build/dalvikBytecodeParser/formatSizes.js +34 -0
  37. package/build/dalvikBytecodeParser/operationFormats.d.ts +225 -0
  38. package/build/dalvikBytecodeParser/operationFormats.js +225 -0
  39. package/build/dalvikBytecodeParser.d.ts +1105 -5
  40. package/build/dalvikBytecodeParser.js +658 -205
  41. package/build/dalvikBytecodeUnparser/formatUnparsers.d.ts +152 -0
  42. package/build/dalvikBytecodeUnparser/formatUnparsers.js +225 -0
  43. package/build/dalvikBytecodeUnparser.d.ts +3 -0
  44. package/build/dalvikBytecodeUnparser.js +642 -0
  45. package/build/dalvikBytecodeUnparser.test.d.ts +1 -0
  46. package/build/dalvikBytecodeUnparser.test.js +25 -0
  47. package/build/dalvikExecutable.d.ts +65 -8
  48. package/build/dalvikExecutable.js +36 -0
  49. package/build/dalvikExecutableParser/stringSyntaxParser.d.ts +1 -1
  50. package/build/dalvikExecutableParser/stringSyntaxParser.js +17 -17
  51. package/build/dalvikExecutableParser/typeParsers.d.ts +2 -1
  52. package/build/dalvikExecutableParser/typeParsers.js +16 -11
  53. package/build/dalvikExecutableParser/typedNumbers.d.ts +85 -69
  54. package/build/dalvikExecutableParser/typedNumbers.js +0 -1
  55. package/build/dalvikExecutableParser.d.ts +2 -2
  56. package/build/dalvikExecutableParser.js +655 -337
  57. package/build/dalvikExecutableParser.test.js +24 -22
  58. package/build/dalvikExecutableParserAgainstSmaliParser.test.js +223 -246
  59. package/build/dalvikExecutableUnparser/annotationUnparsers.d.ts +14 -0
  60. package/build/dalvikExecutableUnparser/annotationUnparsers.js +97 -0
  61. package/build/dalvikExecutableUnparser/poolBuilders.d.ts +49 -0
  62. package/build/dalvikExecutableUnparser/poolBuilders.js +140 -0
  63. package/build/dalvikExecutableUnparser/poolScanners.d.ts +4 -0
  64. package/build/dalvikExecutableUnparser/poolScanners.js +220 -0
  65. package/build/dalvikExecutableUnparser/sectionUnparsers.d.ts +25 -0
  66. package/build/dalvikExecutableUnparser/sectionUnparsers.js +581 -0
  67. package/build/dalvikExecutableUnparser/utils.d.ts +10 -0
  68. package/build/dalvikExecutableUnparser/utils.js +108 -0
  69. package/build/dalvikExecutableUnparser.d.ts +4 -0
  70. package/build/dalvikExecutableUnparser.js +406 -0
  71. package/build/dalvikExecutableUnparser.test.d.ts +1 -0
  72. package/build/dalvikExecutableUnparser.test.js +31 -0
  73. package/build/debugLogInputParser.js +1 -1
  74. package/build/disjunctionParser.d.ts +2 -2
  75. package/build/disjunctionParser.js +2 -2
  76. package/build/elementTerminatedArrayParser.d.ts +2 -2
  77. package/build/elementTerminatedArrayParser.js +1 -1
  78. package/build/elementTerminatedArrayParser.test.js +5 -5
  79. package/build/elementTerminatedSequenceArrayParser.d.ts +2 -2
  80. package/build/elementTerminatedSequenceArrayParser.js +1 -1
  81. package/build/elementTerminatedSequenceArrayParser.test.js +2 -2
  82. package/build/elementTerminatedSequenceParser.d.ts +2 -2
  83. package/build/elementTerminatedSequenceParser.js +1 -1
  84. package/build/elementTerminatedSequenceParser.test.js +2 -2
  85. package/build/endOfInputParser.d.ts +1 -1
  86. package/build/exactElementSwitchParser.d.ts +3 -0
  87. package/build/exactElementSwitchParser.js +22 -0
  88. package/build/fetchCid.js +2 -6
  89. package/build/fetchCid.test.d.ts +1 -0
  90. package/build/fetchCid.test.js +16 -0
  91. package/build/fixedLengthSequenceParser.test.js +2 -2
  92. package/build/hasExecutable.js +2 -2
  93. package/build/highResolutionTimer.js +1 -1
  94. package/build/index.d.ts +24 -2
  95. package/build/index.js +22 -1
  96. package/build/inputReader.d.ts +1 -1
  97. package/build/inputReader.test.js +33 -45
  98. package/build/javaKeyStoreParser.test.js +6 -6
  99. package/build/jsonParser.js +8 -8
  100. package/build/lazyMessageError.d.ts +48 -0
  101. package/build/lazyMessageError.js +53 -0
  102. package/build/lazyMessageError.test.d.ts +1 -0
  103. package/build/lazyMessageError.test.js +15 -0
  104. package/build/leb128Parser.d.ts +1 -1
  105. package/build/leb128Parser.js +10 -10
  106. package/build/leb128Parser.test.js +7 -7
  107. package/build/negativeLookaheadParser.js +2 -2
  108. package/build/negativeLookaheadParser.test.js +4 -4
  109. package/build/noStackCaptureOverheadError.d.ts +4 -0
  110. package/build/noStackCaptureOverheadError.js +9 -0
  111. package/build/noStackCaptureOverheadError.test.d.ts +1 -0
  112. package/build/noStackCaptureOverheadError.test.js +15 -0
  113. package/build/nonEmptyArrayParser.js +2 -2
  114. package/build/nonEmptyArrayParser.test.js +2 -1
  115. package/build/optionalParser.js +2 -2
  116. package/build/parser.d.ts +2 -1
  117. package/build/parser.js +23 -8
  118. package/build/parser.test.js +78 -29
  119. package/build/parserConsumedSequenceParser.d.ts +1 -1
  120. package/build/parserConsumedSequenceParser.js +2 -2
  121. package/build/parserContext.d.ts +8 -6
  122. package/build/parserContext.js +60 -33
  123. package/build/parserContext.test.js +7 -3
  124. package/build/parserError.d.ts +603 -44
  125. package/build/parserError.js +98 -53
  126. package/build/parserImplementationInvariant.d.ts +1 -1
  127. package/build/parserImplementationInvariant.js +2 -2
  128. package/build/parserInputCompanion.js +2 -2
  129. package/build/promiseCompose.js +1 -2
  130. package/build/regexpParser.d.ts +2 -0
  131. package/build/regexpParser.js +71 -0
  132. package/build/regexpParser.test.d.ts +1 -0
  133. package/build/regexpParser.test.js +83 -0
  134. package/build/regularExpression.d.ts +63 -0
  135. package/build/regularExpression.js +1 -0
  136. package/build/regularExpressionParser.d.ts +3 -0
  137. package/build/regularExpressionParser.js +580 -0
  138. package/build/regularExpressionParser.test.d.ts +1 -0
  139. package/build/regularExpressionParser.test.js +89 -0
  140. package/build/separatedArrayParser.js +2 -2
  141. package/build/separatedNonEmptyArrayParser.d.ts +2 -0
  142. package/build/separatedNonEmptyArrayParser.js +40 -0
  143. package/build/separatedNonEmptyArrayParser.test.d.ts +1 -0
  144. package/build/separatedNonEmptyArrayParser.test.js +66 -0
  145. package/build/sequenceBuffer.js +1 -1
  146. package/build/sequenceTerminatedSequenceParser.d.ts +2 -2
  147. package/build/sequenceTerminatedSequenceParser.js +3 -3
  148. package/build/sequenceTerminatedSequenceParser.test.js +1 -1
  149. package/build/sequenceUnparser.d.ts +1 -1
  150. package/build/skipToParser.d.ts +1 -1
  151. package/build/skipToParser.js +2 -2
  152. package/build/sliceBoundedParser.test.js +4 -9
  153. package/build/smali.d.ts +1 -1
  154. package/build/smali.js +6 -2
  155. package/build/smaliParser.d.ts +62 -6
  156. package/build/smaliParser.js +1721 -296
  157. package/build/smaliParser.test.js +338 -43
  158. package/build/stringFromAsyncIterable.d.ts +1 -0
  159. package/build/stringFromAsyncIterable.js +7 -0
  160. package/build/terminatedArrayParser.js +4 -4
  161. package/build/terminatedArrayParser.test.js +7 -7
  162. package/build/toAsyncIterator.js +4 -4
  163. package/build/unionParser.d.ts +1 -1
  164. package/build/unionParser.js +2 -2
  165. package/build/unionParser.test.js +3 -3
  166. package/build/unparser.d.ts +3 -3
  167. package/build/unparser.js +6 -4
  168. package/build/unparser.test.js +7 -19
  169. package/build/unparserContext.d.ts +2 -2
  170. package/build/unparserContext.js +2 -3
  171. package/build/unparserError.d.ts +2 -1
  172. package/build/unparserError.js +2 -1
  173. package/build/unparserImplementationInvariant.d.ts +1 -1
  174. package/build/unparserOutputCompanion.d.ts +1 -1
  175. package/build/unparserOutputCompanion.js +1 -1
  176. package/build/zipParser.js +1 -1
  177. package/build/zipUnparser.d.ts +3 -3
  178. package/build/zipUnparser.js +9 -19
  179. package/build/zipUnparser.test.js +1 -1
  180. package/package.json +20 -26
  181. package/src/allSettledStream.test.ts +2 -2
  182. package/src/allSettledStream.ts +3 -3
  183. package/src/androidPackageParser.test.ts +17 -19
  184. package/src/androidPackageParser.ts +129 -171
  185. package/src/androidPackageUnparser.test.ts +19 -21
  186. package/src/androidPackageUnparser.ts +23 -17
  187. package/src/arbitrarilySlicedAsyncInterable.ts +1 -1
  188. package/src/arbitrarilySlicedAsyncInterator.ts +4 -4
  189. package/src/arbitraryDalvikBytecode.ts +992 -0
  190. package/src/arbitraryDalvikExecutable.ts +434 -0
  191. package/src/arbitraryDosDateTime.ts +1 -0
  192. package/src/arbitraryZipStream.ts +1 -1
  193. package/src/arrayParser.ts +2 -2
  194. package/src/arrayUnparser.ts +2 -2
  195. package/src/backsmali.ts +48 -4
  196. package/src/bash.ts +120 -0
  197. package/src/bashParser.test.ts +332 -0
  198. package/src/bashParser.ts +461 -0
  199. package/src/bsonParser.test.ts +12 -14
  200. package/src/customInvariant.ts +8 -12
  201. package/src/dalvikBytecodeParser/formatParsers.ts +376 -17
  202. package/src/dalvikBytecodeParser/formatSizes.ts +35 -0
  203. package/src/dalvikBytecodeParser/operationFormats.ts +226 -0
  204. package/src/dalvikBytecodeParser.ts +1042 -243
  205. package/src/dalvikBytecodeUnparser/formatUnparsers.ts +442 -0
  206. package/src/dalvikBytecodeUnparser.test.ts +44 -0
  207. package/src/dalvikBytecodeUnparser.ts +758 -0
  208. package/src/dalvikExecutable.ts +110 -48
  209. package/src/dalvikExecutableParser/stringSyntaxParser.ts +33 -33
  210. package/src/dalvikExecutableParser/typeParsers.ts +23 -14
  211. package/src/dalvikExecutableParser/typedNumbers.ts +19 -19
  212. package/src/dalvikExecutableParser.test.ts +60 -60
  213. package/src/dalvikExecutableParser.test.ts.md +6 -6
  214. package/src/dalvikExecutableParser.test.ts.snap +0 -0
  215. package/src/dalvikExecutableParser.ts +911 -434
  216. package/src/dalvikExecutableParserAgainstSmaliParser.test.ts +256 -239
  217. package/src/dalvikExecutableUnparser/annotationUnparsers.ts +135 -0
  218. package/src/dalvikExecutableUnparser/poolBuilders.ts +189 -0
  219. package/src/dalvikExecutableUnparser/poolScanners.ts +297 -0
  220. package/src/dalvikExecutableUnparser/sectionUnparsers.ts +683 -0
  221. package/src/dalvikExecutableUnparser/utils.ts +149 -0
  222. package/src/dalvikExecutableUnparser.test.ts +57 -0
  223. package/src/dalvikExecutableUnparser.ts +581 -0
  224. package/src/debugLogInputParser.ts +1 -1
  225. package/src/disjunctionParser.ts +5 -5
  226. package/src/elementTerminatedArrayParser.test.ts +8 -8
  227. package/src/elementTerminatedArrayParser.ts +2 -2
  228. package/src/elementTerminatedSequenceArrayParser.test.ts +4 -6
  229. package/src/elementTerminatedSequenceArrayParser.ts +2 -2
  230. package/src/elementTerminatedSequenceParser.test.ts +4 -6
  231. package/src/elementTerminatedSequenceParser.ts +2 -2
  232. package/src/endOfInputParser.ts +1 -1
  233. package/src/exactElementSwitchParser.ts +41 -0
  234. package/src/fetchCid.test.ts +20 -0
  235. package/src/fetchCid.ts +3 -7
  236. package/src/fixedLengthSequenceParser.test.ts +10 -12
  237. package/src/hasExecutable.ts +2 -2
  238. package/src/highResolutionTimer.ts +1 -1
  239. package/src/index.ts +113 -2
  240. package/src/inputReader.test.ts +39 -52
  241. package/src/inputReader.ts +2 -4
  242. package/src/inputReaderState.ts +1 -1
  243. package/src/inspect.ts +1 -1
  244. package/src/javaKeyStoreParser.test.ts +12 -14
  245. package/src/javaKeyStoreParser.ts +2 -6
  246. package/src/jsonParser.test.ts +2 -4
  247. package/src/jsonParser.ts +34 -38
  248. package/src/lazyMessageError.test.ts +21 -0
  249. package/src/lazyMessageError.ts +88 -0
  250. package/src/leb128Parser.test.ts +25 -23
  251. package/src/leb128Parser.ts +19 -19
  252. package/src/negativeLookaheadParser.test.ts +7 -11
  253. package/src/negativeLookaheadParser.ts +2 -2
  254. package/src/noStackCaptureOverheadError.test.ts +17 -0
  255. package/src/noStackCaptureOverheadError.ts +12 -0
  256. package/src/nonEmptyArrayParser.test.ts +3 -2
  257. package/src/nonEmptyArrayParser.ts +2 -2
  258. package/src/optionalParser.ts +2 -2
  259. package/src/parser.test.ts +96 -43
  260. package/src/parser.test.ts.md +13 -6
  261. package/src/parser.test.ts.snap +0 -0
  262. package/src/parser.ts +35 -12
  263. package/src/parserAccessorParser.ts +1 -1
  264. package/src/parserConsumedSequenceParser.ts +3 -3
  265. package/src/parserContext.test.ts +7 -3
  266. package/src/parserContext.ts +82 -48
  267. package/src/parserError.ts +143 -63
  268. package/src/parserImplementationInvariant.ts +3 -3
  269. package/src/parserInputCompanion.ts +2 -2
  270. package/src/promiseCompose.ts +2 -2
  271. package/src/regexpParser.test.ts +186 -0
  272. package/src/regexpParser.ts +94 -0
  273. package/src/regularExpression.ts +24 -0
  274. package/src/regularExpressionParser.test.ts +102 -0
  275. package/src/regularExpressionParser.ts +921 -0
  276. package/src/separatedArrayParser.ts +3 -3
  277. package/src/separatedNonEmptyArrayParser.test.ts +117 -0
  278. package/src/separatedNonEmptyArrayParser.ts +61 -0
  279. package/src/sequenceBuffer.test.ts +9 -9
  280. package/src/sequenceBuffer.ts +4 -4
  281. package/src/sequenceTerminatedSequenceParser.test.ts +3 -5
  282. package/src/sequenceTerminatedSequenceParser.ts +4 -4
  283. package/src/sequenceUnparser.ts +2 -2
  284. package/src/skipToParser.ts +2 -2
  285. package/src/sliceBoundedParser.test.ts +4 -12
  286. package/src/sliceBoundedParser.ts +2 -2
  287. package/src/smali.ts +8 -3
  288. package/src/smaliParser.test.ts +377 -66
  289. package/src/smaliParser.test.ts.md +1635 -48
  290. package/src/smaliParser.test.ts.snap +0 -0
  291. package/src/smaliParser.ts +2751 -569
  292. package/src/stringFromAsyncIterable.ts +9 -0
  293. package/src/terminatedArrayParser.test.ts +11 -11
  294. package/src/terminatedArrayParser.ts +5 -7
  295. package/src/toAsyncIterator.ts +8 -8
  296. package/src/uint8Array.ts +2 -3
  297. package/src/unionParser.test.ts +22 -23
  298. package/src/unionParser.ts +6 -8
  299. package/src/unparser.test.ts +18 -34
  300. package/src/unparser.ts +13 -9
  301. package/src/unparserContext.ts +9 -13
  302. package/src/unparserError.ts +2 -1
  303. package/src/unparserImplementationInvariant.ts +1 -1
  304. package/src/unparserOutputCompanion.ts +1 -1
  305. package/src/zip.ts +2 -6
  306. package/src/zipParser.ts +10 -18
  307. package/src/zipUnparser.test.ts +1 -1
  308. package/src/zipUnparser.ts +52 -64
  309. package/tsconfig.json +7 -1
  310. package/xo.config.ts +15 -0
  311. package/.yarn/releases/yarn-4.5.3.cjs +0 -934
@@ -0,0 +1,921 @@
1
+ import { type Parser } from './parser.js';
2
+ import { createUnionParser } from './unionParser.js';
3
+ import { createExactSequenceParser } from './exactSequenceParser.js';
4
+ import { promiseCompose } from './promiseCompose.js';
5
+ import { createTupleParser } from './tupleParser.js';
6
+ import { createArrayParser } from './arrayParser.js';
7
+ import { createParserAccessorParser } from './parserAccessorParser.js';
8
+ import { createElementParser } from './elementParser.js';
9
+ import { parserCreatorCompose } from './parserCreatorCompose.js';
10
+ import { createOptionalParser } from './optionalParser.js';
11
+ import { createFixedLengthSequenceParser } from './fixedLengthSequenceParser.js';
12
+ import { createTerminatedArrayParser } from './terminatedArrayParser.js';
13
+ import { createDisjunctionParser } from './disjunctionParser.js';
14
+ import { createNegativeLookaheadParser } from './negativeLookaheadParser.js';
15
+ import {
16
+ type CharacterSet,
17
+ type CodePointRange,
18
+ type RegularExpression,
19
+ type RepeatBounds,
20
+ } from './regularExpression.js';
21
+
22
+ // CharacterSet helpers
23
+
24
+ const emptyCharacterSet: CharacterSet = { type: 'empty' };
25
+
26
+ function codePointRangeIsEmpty(range: CodePointRange): boolean {
27
+ return range.start > range.end;
28
+ }
29
+
30
+ function codePointRangeIsStrictlyBefore(rangeA: CodePointRange, rangeB: CodePointRange): boolean {
31
+ return rangeA.end + 1 < rangeB.start;
32
+ }
33
+
34
+ function codePointRangeIsStrictlyAfter(rangeA: CodePointRange, rangeB: CodePointRange): boolean {
35
+ return codePointRangeIsStrictlyBefore(rangeB, rangeA);
36
+ }
37
+
38
+ function codePointRangeLeastUpperBound(rangeA: CodePointRange, rangeB: CodePointRange): CodePointRange {
39
+ if (codePointRangeIsEmpty(rangeA)) return rangeB;
40
+ if (codePointRangeIsEmpty(rangeB)) return rangeA;
41
+ return {
42
+ start: Math.min(rangeA.start, rangeB.start),
43
+ end: Math.max(rangeA.end, rangeB.end),
44
+ };
45
+ }
46
+
47
+ function codePointRangeStrictlyDisjoint(rangeA: CodePointRange, rangeB: CodePointRange): boolean {
48
+ return codePointRangeIsStrictlyBefore(rangeA, rangeB) || codePointRangeIsStrictlyAfter(rangeA, rangeB);
49
+ }
50
+
51
+ function characterSetNode(range: CodePointRange, left: CharacterSet, right: CharacterSet): CharacterSet {
52
+ return { type: 'node', range, left, right };
53
+ }
54
+
55
+ function* characterSetGetRanges(set: CharacterSet): Generator<CodePointRange> {
56
+ if (set.type === 'node') {
57
+ yield* characterSetGetRanges(set.left);
58
+ yield set.range;
59
+ yield* characterSetGetRanges(set.right);
60
+ }
61
+ }
62
+
63
+ function characterSetExtractOverlap(set: CharacterSet, range: CodePointRange): { restCharSet: CharacterSet; extendedRange: CodePointRange } {
64
+ if (set.type === 'empty') {
65
+ return { restCharSet: set, extendedRange: range };
66
+ }
67
+
68
+ let extendedRange = range;
69
+ let newLeft = set.left;
70
+ let newRight = set.right;
71
+
72
+ if (range.start < set.range.start) {
73
+ const resultLeft = characterSetExtractOverlap(set.left, range);
74
+ extendedRange = codePointRangeLeastUpperBound(extendedRange, resultLeft.extendedRange);
75
+ newLeft = resultLeft.restCharSet;
76
+ }
77
+
78
+ if (range.end > set.range.end) {
79
+ const resultRight = characterSetExtractOverlap(set.right, range);
80
+ extendedRange = codePointRangeLeastUpperBound(extendedRange, resultRight.extendedRange);
81
+ newRight = resultRight.restCharSet;
82
+ }
83
+
84
+ if (codePointRangeStrictlyDisjoint(range, set.range)) {
85
+ return {
86
+ extendedRange,
87
+ restCharSet: characterSetNode(set.range, newLeft, newRight),
88
+ };
89
+ }
90
+
91
+ return {
92
+ extendedRange: codePointRangeLeastUpperBound(set.range, extendedRange),
93
+ restCharSet: characterSetUnion(newLeft, newRight),
94
+ };
95
+ }
96
+
97
+ function characterSetInsertRange(set: CharacterSet, range: CodePointRange): CharacterSet {
98
+ if (codePointRangeIsEmpty(range)) {
99
+ return set;
100
+ }
101
+
102
+ if (set.type === 'empty') {
103
+ return characterSetNode(range, emptyCharacterSet, emptyCharacterSet);
104
+ }
105
+
106
+ if (codePointRangeIsStrictlyBefore(range, set.range)) {
107
+ return characterSetNode(set.range, characterSetInsertRange(set.left, range), set.right);
108
+ }
109
+
110
+ if (codePointRangeIsStrictlyAfter(range, set.range)) {
111
+ return characterSetNode(set.range, set.left, characterSetInsertRange(set.right, range));
112
+ }
113
+
114
+ const resultLeft = characterSetExtractOverlap(set.left, range);
115
+ const resultRight = characterSetExtractOverlap(set.right, range);
116
+ const resultRange = [set.range, resultLeft.extendedRange, resultRight.extendedRange].reduce(codePointRangeLeastUpperBound);
117
+
118
+ if (codePointRangeIsEmpty(resultRange)) {
119
+ return emptyCharacterSet;
120
+ }
121
+
122
+ return characterSetNode(resultRange, resultLeft.restCharSet, resultRight.restCharSet);
123
+ }
124
+
125
+ function characterSetUnion(setA: CharacterSet, setB: CharacterSet): CharacterSet {
126
+ return [...characterSetGetRanges(setB)].reduce(characterSetInsertRange, setA);
127
+ }
128
+
129
+ function codePointRangeSplitAt(point: number, range: CodePointRange): [CodePointRange, CodePointRange] {
130
+ return [
131
+ { start: range.start, end: Math.min(range.end, point) },
132
+ { start: Math.max(range.start, point + 1), end: range.end },
133
+ ];
134
+ }
135
+
136
+ function codePointRangeUnion(rangeA: CodePointRange, rangeB: CodePointRange): CodePointRange[] {
137
+ if (codePointRangeIsEmpty(rangeA) && codePointRangeIsEmpty(rangeB)) return [];
138
+ if (codePointRangeIsEmpty(rangeA)) return [rangeB];
139
+ if (codePointRangeIsEmpty(rangeB)) return [rangeA];
140
+ if (rangeA.end + 1 < rangeB.start) return [rangeA, rangeB];
141
+ if (rangeB.end + 1 < rangeA.start) return [rangeB, rangeA];
142
+ return [{
143
+ start: Math.min(rangeA.start, rangeB.start),
144
+ end: Math.max(rangeA.end, rangeB.end),
145
+ }];
146
+ }
147
+
148
+ function codePointRangeDifference(rangeA: CodePointRange, rangeB: CodePointRange): CodePointRange[] {
149
+ const [before, restRangeA] = codePointRangeSplitAt(rangeB.start - 1, rangeA);
150
+ const [, after] = codePointRangeSplitAt(rangeB.end, restRangeA);
151
+ return codePointRangeUnion(before, after);
152
+ }
153
+
154
+ function characterSetDeleteRange(set: CharacterSet, range: CodePointRange): CharacterSet {
155
+ if (codePointRangeIsEmpty(range)) {
156
+ return set;
157
+ }
158
+
159
+ if (set.type === 'empty') {
160
+ return emptyCharacterSet;
161
+ }
162
+
163
+ const [rangeBeforeStart] = codePointRangeSplitAt(set.range.start - 1, range);
164
+ const [rangeRest2, rangeAfterEnd] = codePointRangeSplitAt(set.range.end, range);
165
+ const newLeft = characterSetDeleteRange(set.left, rangeBeforeStart);
166
+ const newRight = characterSetDeleteRange(set.right, rangeAfterEnd);
167
+ const setRangeRest = codePointRangeDifference(set.range, rangeRest2);
168
+
169
+ if (setRangeRest.length === 0) {
170
+ return characterSetUnion(newLeft, newRight);
171
+ }
172
+
173
+ if (setRangeRest.length === 1) {
174
+ return characterSetNode(setRangeRest[0]!, newLeft, newRight);
175
+ }
176
+
177
+ // setRangeRest.length === 2
178
+ return characterSetUnion(
179
+ characterSetInsertRange(newLeft, setRangeRest[0]!),
180
+ characterSetInsertRange(newRight, setRangeRest[1]!),
181
+ );
182
+ }
183
+
184
+ function characterSetDifference(setA: CharacterSet, setB: CharacterSet): CharacterSet {
185
+ return [...characterSetGetRanges(setB)].reduce(characterSetDeleteRange, setA);
186
+ }
187
+
188
+ function characterSetFromRange(range: CodePointRange): CharacterSet {
189
+ if (codePointRangeIsEmpty(range)) {
190
+ return emptyCharacterSet;
191
+ }
192
+ return characterSetNode(range, emptyCharacterSet, emptyCharacterSet);
193
+ }
194
+
195
+ function characterSetSingleton(char: string): CharacterSet {
196
+ const codePoint = char.codePointAt(0)!;
197
+ return characterSetFromRange({ start: codePoint, end: codePoint });
198
+ }
199
+
200
+ function characterSetCharRange(startChar: string, endChar: string): CharacterSet {
201
+ const start = startChar.codePointAt(0)!;
202
+ const end = endChar.codePointAt(0)!;
203
+ return characterSetFromRange({ start, end });
204
+ }
205
+
206
+ function characterSetFromArray(chars: string[]): CharacterSet {
207
+ return chars.map(characterSetSingleton).reduce(characterSetUnion, emptyCharacterSet);
208
+ }
209
+
210
+ function characterSetComplement(set: CharacterSet): CharacterSet {
211
+ return characterSetDifference(alphabet, set);
212
+ }
213
+
214
+ // Pre-defined character sets
215
+ const alphabet: CharacterSet = characterSetDifference(
216
+ characterSetFromRange({ start: 0, end: 0x10FFFF }),
217
+ characterSetFromArray(['\r', '\n', '\u2028', '\u2029']),
218
+ );
219
+
220
+ const wildcardCharacterSet: CharacterSet = characterSetDifference(
221
+ alphabet,
222
+ characterSetFromArray(['\r', '\n', '\u2028', '\u2029']),
223
+ );
224
+
225
+ const digitChars: CharacterSet = characterSetCharRange('0', '9');
226
+ const nonDigitChars: CharacterSet = characterSetComplement(digitChars);
227
+
228
+ const wordChars: CharacterSet = [
229
+ characterSetCharRange('a', 'z'),
230
+ characterSetCharRange('A', 'Z'),
231
+ characterSetCharRange('0', '9'),
232
+ characterSetSingleton('_'),
233
+ ].reduce(characterSetUnion);
234
+ const nonWordChars: CharacterSet = characterSetComplement(wordChars);
235
+
236
+ const whiteSpaceChars: CharacterSet = [
237
+ characterSetSingleton('\f'),
238
+ characterSetSingleton('\n'),
239
+ characterSetSingleton('\r'),
240
+ characterSetSingleton('\t'),
241
+ characterSetSingleton('\v'),
242
+ characterSetSingleton('\u0020'),
243
+ characterSetSingleton('\u00a0'),
244
+ characterSetSingleton('\u1680'),
245
+ characterSetCharRange('\u2000', '\u200a'),
246
+ characterSetSingleton('\u2028'),
247
+ characterSetSingleton('\u2029'),
248
+ characterSetSingleton('\u202f'),
249
+ characterSetSingleton('\u205f'),
250
+ characterSetSingleton('\u3000'),
251
+ characterSetSingleton('\ufeff'),
252
+ ].reduce(characterSetUnion);
253
+ const nonWhiteSpaceChars: CharacterSet = characterSetComplement(whiteSpaceChars);
254
+
255
+ // AST constructors
256
+
257
+ const epsilon: RegularExpression = { type: 'epsilon' };
258
+
259
+ function literal(charset: CharacterSet): RegularExpression {
260
+ return { type: 'literal', charset };
261
+ }
262
+
263
+ function concat(left: RegularExpression, right: RegularExpression): RegularExpression {
264
+ return { type: 'concat', left, right };
265
+ }
266
+
267
+ function union(left: RegularExpression, right: RegularExpression): RegularExpression {
268
+ return { type: 'union', left, right };
269
+ }
270
+
271
+ function star(inner: RegularExpression): RegularExpression {
272
+ return { type: 'star', inner };
273
+ }
274
+
275
+ function plus(inner: RegularExpression): RegularExpression {
276
+ return { type: 'plus', inner };
277
+ }
278
+
279
+ function optional(inner: RegularExpression): RegularExpression {
280
+ return { type: 'optional', inner };
281
+ }
282
+
283
+ function repeat(inner: RegularExpression, bounds: RepeatBounds): RegularExpression {
284
+ return { type: 'repeat', inner, bounds };
285
+ }
286
+
287
+ function captureGroup(inner: RegularExpression, name?: string): RegularExpression {
288
+ if (name === undefined) {
289
+ return { type: 'capture-group', inner };
290
+ }
291
+ return { type: 'capture-group', inner, name };
292
+ }
293
+
294
+ function lookahead(isPositive: boolean, inner: RegularExpression, right: RegularExpression): RegularExpression {
295
+ return { type: 'lookahead', isPositive, inner, right };
296
+ }
297
+
298
+ function startAnchor(left: RegularExpression, right: RegularExpression): RegularExpression {
299
+ return { type: 'start-anchor', left, right };
300
+ }
301
+
302
+ function endAnchor(left: RegularExpression, right: RegularExpression): RegularExpression {
303
+ return { type: 'end-anchor', left, right };
304
+ }
305
+
306
+ // Parser implementation
307
+
308
+ const elementParser: Parser<string, string> = createElementParser();
309
+
310
+ const metaCharacters = new Set(['\\', '^', '$', '.', '|', '?', '*', '+', '(', ')', '[', ']', '{', '}']);
311
+
312
+ // Escape sequences for control characters
313
+ const escapeNParser: Parser<RegularExpression, string> = promiseCompose(
314
+ createExactSequenceParser('\\n'),
315
+ () => literal(characterSetSingleton('\n')),
316
+ );
317
+
318
+ const escapeRParser: Parser<RegularExpression, string> = promiseCompose(
319
+ createExactSequenceParser('\\r'),
320
+ () => literal(characterSetSingleton('\r')),
321
+ );
322
+
323
+ const escapeTParser: Parser<RegularExpression, string> = promiseCompose(
324
+ createExactSequenceParser('\\t'),
325
+ () => literal(characterSetSingleton('\t')),
326
+ );
327
+
328
+ const escapeFParser: Parser<RegularExpression, string> = promiseCompose(
329
+ createExactSequenceParser('\\f'),
330
+ () => literal(characterSetSingleton('\f')),
331
+ );
332
+
333
+ const escapeVParser: Parser<RegularExpression, string> = promiseCompose(
334
+ createExactSequenceParser('\\v'),
335
+ () => literal(characterSetSingleton('\v')),
336
+ );
337
+
338
+ const escape0Parser: Parser<RegularExpression, string> = promiseCompose(
339
+ createExactSequenceParser('\\0'),
340
+ () => literal(characterSetSingleton('\0')),
341
+ );
342
+
343
+ // Character class escapes
344
+ const escapeDigitParser: Parser<RegularExpression, string> = promiseCompose(
345
+ createExactSequenceParser('\\d'),
346
+ () => literal(digitChars),
347
+ );
348
+
349
+ const escapeNonDigitParser: Parser<RegularExpression, string> = promiseCompose(
350
+ createExactSequenceParser('\\D'),
351
+ () => literal(nonDigitChars),
352
+ );
353
+
354
+ const escapeWordParser: Parser<RegularExpression, string> = promiseCompose(
355
+ createExactSequenceParser('\\w'),
356
+ () => literal(wordChars),
357
+ );
358
+
359
+ const escapeNonWordParser: Parser<RegularExpression, string> = promiseCompose(
360
+ createExactSequenceParser('\\W'),
361
+ () => literal(nonWordChars),
362
+ );
363
+
364
+ const escapeSpaceParser: Parser<RegularExpression, string> = promiseCompose(
365
+ createExactSequenceParser('\\s'),
366
+ () => literal(whiteSpaceChars),
367
+ );
368
+
369
+ const escapeNonSpaceParser: Parser<RegularExpression, string> = promiseCompose(
370
+ createExactSequenceParser('\\S'),
371
+ () => literal(nonWhiteSpaceChars),
372
+ );
373
+
374
+ // Hex escape \xHH
375
+ const escapeHexParser: Parser<RegularExpression, string> = promiseCompose(
376
+ createTupleParser([
377
+ createExactSequenceParser('\\x'),
378
+ createFixedLengthSequenceParser<string>(2),
379
+ ]),
380
+ ([, hexCode]) => literal(characterSetSingleton(String.fromCharCode(Number.parseInt(hexCode, 16)))),
381
+ );
382
+
383
+ // Unicode escape \uHHHH
384
+ const escapeUnicodeParser: Parser<RegularExpression, string> = promiseCompose(
385
+ createTupleParser([
386
+ createExactSequenceParser('\\u'),
387
+ createFixedLengthSequenceParser<string>(4),
388
+ ]),
389
+ ([, hexCode]) => literal(characterSetSingleton(String.fromCharCode(Number.parseInt(hexCode, 16)))),
390
+ );
391
+
392
+ // Escaped metacharacter (e.g., \., \*, etc.)
393
+ const escapeMetacharacterParser: Parser<RegularExpression, string> = promiseCompose(
394
+ createTupleParser([
395
+ createExactSequenceParser('\\'),
396
+ elementParser,
397
+ ]),
398
+ ([, char]) => literal(characterSetSingleton(char)),
399
+ );
400
+
401
+ // All escape sequences - use createDisjunctionParser to try specific escapes first
402
+ const escapeParser: Parser<RegularExpression, string> = createDisjunctionParser([
403
+ escapeNParser,
404
+ escapeRParser,
405
+ escapeTParser,
406
+ escapeFParser,
407
+ escapeVParser,
408
+ escape0Parser,
409
+ escapeDigitParser,
410
+ escapeNonDigitParser,
411
+ escapeWordParser,
412
+ escapeNonWordParser,
413
+ escapeSpaceParser,
414
+ escapeNonSpaceParser,
415
+ escapeHexParser,
416
+ escapeUnicodeParser,
417
+ escapeMetacharacterParser, // Must be last - matches any escaped char
418
+ ]);
419
+
420
+ // Dot (matches any character except newline)
421
+ const dotParser: Parser<RegularExpression, string> = promiseCompose(
422
+ createExactSequenceParser('.'),
423
+ () => literal(wildcardCharacterSet),
424
+ );
425
+
426
+ // Literal character (non-metacharacter)
427
+ const literalCharacterParser: Parser<RegularExpression, string> = parserCreatorCompose(
428
+ () => elementParser,
429
+ char => async parserContext => {
430
+ parserContext.invariant(!metaCharacters.has(char), 'Unexpected metacharacter "%s"', char);
431
+ return literal(characterSetSingleton(char));
432
+ },
433
+ )();
434
+
435
+ // Character class internals
436
+
437
+ // Character in a character class (different rules than outside)
438
+ const charClassMetaCharacters = new Set(['\\', ']', '^', '-']);
439
+
440
+ // Escape sequences inside character class (returns CharacterSet)
441
+ const charClassEscapeNParser: Parser<CharacterSet, string> = promiseCompose(
442
+ createExactSequenceParser('\\n'),
443
+ () => characterSetSingleton('\n'),
444
+ );
445
+
446
+ const charClassEscapeRParser: Parser<CharacterSet, string> = promiseCompose(
447
+ createExactSequenceParser('\\r'),
448
+ () => characterSetSingleton('\r'),
449
+ );
450
+
451
+ const charClassEscapeTParser: Parser<CharacterSet, string> = promiseCompose(
452
+ createExactSequenceParser('\\t'),
453
+ () => characterSetSingleton('\t'),
454
+ );
455
+
456
+ const charClassEscapeFParser: Parser<CharacterSet, string> = promiseCompose(
457
+ createExactSequenceParser('\\f'),
458
+ () => characterSetSingleton('\f'),
459
+ );
460
+
461
+ const charClassEscapeVParser: Parser<CharacterSet, string> = promiseCompose(
462
+ createExactSequenceParser('\\v'),
463
+ () => characterSetSingleton('\v'),
464
+ );
465
+
466
+ const charClassEscape0Parser: Parser<CharacterSet, string> = promiseCompose(
467
+ createExactSequenceParser('\\0'),
468
+ () => characterSetSingleton('\0'),
469
+ );
470
+
471
+ const charClassEscapeDigitParser: Parser<CharacterSet, string> = promiseCompose(
472
+ createExactSequenceParser('\\d'),
473
+ () => digitChars,
474
+ );
475
+
476
+ const charClassEscapeNonDigitParser: Parser<CharacterSet, string> = promiseCompose(
477
+ createExactSequenceParser('\\D'),
478
+ () => nonDigitChars,
479
+ );
480
+
481
+ const charClassEscapeWordParser: Parser<CharacterSet, string> = promiseCompose(
482
+ createExactSequenceParser('\\w'),
483
+ () => wordChars,
484
+ );
485
+
486
+ const charClassEscapeNonWordParser: Parser<CharacterSet, string> = promiseCompose(
487
+ createExactSequenceParser('\\W'),
488
+ () => nonWordChars,
489
+ );
490
+
491
+ const charClassEscapeSpaceParser: Parser<CharacterSet, string> = promiseCompose(
492
+ createExactSequenceParser('\\s'),
493
+ () => whiteSpaceChars,
494
+ );
495
+
496
+ const charClassEscapeNonSpaceParser: Parser<CharacterSet, string> = promiseCompose(
497
+ createExactSequenceParser('\\S'),
498
+ () => nonWhiteSpaceChars,
499
+ );
500
+
501
+ const charClassEscapeHexParser: Parser<CharacterSet, string> = promiseCompose(
502
+ createTupleParser([
503
+ createExactSequenceParser('\\x'),
504
+ createFixedLengthSequenceParser<string>(2),
505
+ ]),
506
+ ([, hexCode]) => characterSetSingleton(String.fromCharCode(Number.parseInt(hexCode, 16))),
507
+ );
508
+
509
+ const charClassEscapeUnicodeParser: Parser<CharacterSet, string> = promiseCompose(
510
+ createTupleParser([
511
+ createExactSequenceParser('\\u'),
512
+ createFixedLengthSequenceParser<string>(4),
513
+ ]),
514
+ ([, hexCode]) => characterSetSingleton(String.fromCharCode(Number.parseInt(hexCode, 16))),
515
+ );
516
+
517
+ const charClassEscapeMetacharacterParser: Parser<CharacterSet, string> = promiseCompose(
518
+ createTupleParser([
519
+ createExactSequenceParser('\\'),
520
+ elementParser,
521
+ ]),
522
+ ([, char]) => characterSetSingleton(char),
523
+ );
524
+
525
+ // Use createDisjunctionParser to try specific escapes before generic metacharacter escape
526
+ const charClassEscapeParser: Parser<CharacterSet, string> = createDisjunctionParser([
527
+ charClassEscapeNParser,
528
+ charClassEscapeRParser,
529
+ charClassEscapeTParser,
530
+ charClassEscapeFParser,
531
+ charClassEscapeVParser,
532
+ charClassEscape0Parser,
533
+ charClassEscapeDigitParser,
534
+ charClassEscapeNonDigitParser,
535
+ charClassEscapeWordParser,
536
+ charClassEscapeNonWordParser,
537
+ charClassEscapeSpaceParser,
538
+ charClassEscapeNonSpaceParser,
539
+ charClassEscapeHexParser,
540
+ charClassEscapeUnicodeParser,
541
+ charClassEscapeMetacharacterParser, // Must be last - matches any escaped char
542
+ ]);
543
+
544
+ // Single character (not escape, not ], not -)
545
+ const charClassLiteralParser: Parser<CharacterSet, string> = parserCreatorCompose(
546
+ () => elementParser,
547
+ char => async parserContext => {
548
+ parserContext.invariant(!charClassMetaCharacters.has(char), 'Unexpected character class metacharacter "%s"', char);
549
+ return characterSetSingleton(char);
550
+ },
551
+ )();
552
+
553
+ // Single char in character class (escape or literal) - returns the character string for range checking
554
+ const charClassSingleCharParser: Parser<string, string> = createUnionParser([
555
+ // Escape sequences that produce single chars
556
+ promiseCompose(createExactSequenceParser('\\n'), () => '\n'),
557
+ promiseCompose(createExactSequenceParser('\\r'), () => '\r'),
558
+ promiseCompose(createExactSequenceParser('\\t'), () => '\t'),
559
+ promiseCompose(createExactSequenceParser('\\f'), () => '\f'),
560
+ promiseCompose(createExactSequenceParser('\\v'), () => '\v'),
561
+ promiseCompose(createExactSequenceParser('\\0'), () => '\0'),
562
+ promiseCompose(
563
+ createTupleParser([
564
+ createExactSequenceParser('\\x'),
565
+ createFixedLengthSequenceParser<string>(2),
566
+ ]),
567
+ ([, hexCode]) => String.fromCharCode(Number.parseInt(hexCode, 16)),
568
+ ),
569
+ promiseCompose(
570
+ createTupleParser([
571
+ createExactSequenceParser('\\u'),
572
+ createFixedLengthSequenceParser<string>(4),
573
+ ]),
574
+ ([, hexCode]) => String.fromCharCode(Number.parseInt(hexCode, 16)),
575
+ ),
576
+ promiseCompose(
577
+ createTupleParser([
578
+ createExactSequenceParser('\\'),
579
+ elementParser,
580
+ ]),
581
+ ([, char]) => char,
582
+ ),
583
+ // Literal char (not metacharacter, not -)
584
+ parserCreatorCompose(
585
+ () => elementParser,
586
+ char => async parserContext => {
587
+ parserContext.invariant(
588
+ !charClassMetaCharacters.has(char) && char !== '-',
589
+ 'Unexpected character "%s"',
590
+ char,
591
+ );
592
+ return char;
593
+ },
594
+ )(),
595
+ ]);
596
+
597
+ // Character range (a-z)
598
+ const charClassRangeParser: Parser<CharacterSet, string> = promiseCompose(
599
+ createTupleParser([
600
+ charClassSingleCharParser,
601
+ createExactSequenceParser('-'),
602
+ charClassSingleCharParser,
603
+ ]),
604
+ ([startChar, , endChar]) => characterSetCharRange(startChar, endChar),
605
+ );
606
+
607
+ // Character class element: range, escape (for \d, \w, etc.), or single char
608
+ const charClassElementParser: Parser<CharacterSet, string> = createDisjunctionParser([
609
+ charClassRangeParser,
610
+ charClassEscapeParser,
611
+ charClassLiteralParser,
612
+ // Literal hyphen at end or after negation
613
+ promiseCompose(
614
+ createTupleParser([
615
+ createExactSequenceParser('-'),
616
+ createNegativeLookaheadParser(createExactSequenceParser(']')),
617
+ ]),
618
+ () => characterSetSingleton('-'),
619
+ ),
620
+ ]);
621
+
622
+ // Character class [...]
623
+ const characterClassParser: Parser<RegularExpression, string> = promiseCompose(
624
+ createTupleParser([
625
+ createExactSequenceParser('['),
626
+ createOptionalParser(createExactSequenceParser('^')),
627
+ createTerminatedArrayParser(
628
+ charClassElementParser,
629
+ createExactSequenceParser(']'),
630
+ ),
631
+ ]),
632
+ ([, negation, [elements]]) => {
633
+ let charset = elements.reduce(
634
+ (acc, el) => characterSetUnion(acc, el),
635
+ emptyCharacterSet,
636
+ );
637
+ if (negation !== undefined) {
638
+ charset = characterSetComplement(charset);
639
+ }
640
+ return literal(charset);
641
+ },
642
+ );
643
+
644
+ // Quantifiers
645
+ type Quantifier =
646
+ | { type: 'star' }
647
+ | { type: 'plus' }
648
+ | { type: 'optional' }
649
+ | { type: 'repeat'; bounds: RepeatBounds };
650
+
651
+ const starQuantifierParser: Parser<Quantifier, string> = promiseCompose(
652
+ createExactSequenceParser('*'),
653
+ () => ({ type: 'star' as const }),
654
+ );
655
+
656
+ const plusQuantifierParser: Parser<Quantifier, string> = promiseCompose(
657
+ createExactSequenceParser('+'),
658
+ () => ({ type: 'plus' as const }),
659
+ );
660
+
661
+ const optionalQuantifierParser: Parser<Quantifier, string> = promiseCompose(
662
+ createExactSequenceParser('?'),
663
+ () => ({ type: 'optional' as const }),
664
+ );
665
+
666
+ // Parse a number for quantifiers
667
+ const numberParser: Parser<number, string> = parserCreatorCompose(
668
+ () => createArrayParser(parserCreatorCompose(
669
+ () => elementParser,
670
+ char => async parserContext => {
671
+ parserContext.invariant(char >= '0' && char <= '9', 'Expected digit, got "%s"', char);
672
+ return char;
673
+ },
674
+ )()),
675
+ digits => async parserContext => {
676
+ parserContext.invariant(digits.length > 0, 'Expected at least one digit');
677
+ return Number.parseInt(digits.join(''), 10);
678
+ },
679
+ )();
680
+
681
+ // {n}, {n,}, {n,m}
682
+ const braceQuantifierParser: Parser<Quantifier, string> = promiseCompose(
683
+ createTupleParser([
684
+ createExactSequenceParser('{'),
685
+ numberParser,
686
+ createOptionalParser(
687
+ createTupleParser([
688
+ createExactSequenceParser(','),
689
+ createOptionalParser(numberParser),
690
+ ]),
691
+ ),
692
+ createExactSequenceParser('}'),
693
+ ]),
694
+ ([, min, comma]): Quantifier => {
695
+ if (comma === undefined) {
696
+ // {n} - exactly n
697
+ return { type: 'repeat', bounds: min };
698
+ }
699
+ const [, max] = comma;
700
+ if (max === undefined) {
701
+ // {n,} - at least n
702
+ return { type: 'repeat', bounds: { min } };
703
+ }
704
+ // {n,m} - between n and m
705
+ return { type: 'repeat', bounds: { min, max } };
706
+ },
707
+ );
708
+
709
+ const quantifierParser: Parser<Quantifier, string> = createUnionParser([
710
+ starQuantifierParser,
711
+ plusQuantifierParser,
712
+ optionalQuantifierParser,
713
+ braceQuantifierParser,
714
+ ]);
715
+
716
+ // Groups
717
+ // Capture group (...)
718
+ const captureGroupParser: Parser<RegularExpression, string> = promiseCompose(
719
+ createTupleParser([
720
+ createExactSequenceParser('('),
721
+ createNegativeLookaheadParser(createExactSequenceParser('?')),
722
+ createParserAccessorParser(() => alternationParser),
723
+ createExactSequenceParser(')'),
724
+ ]),
725
+ ([, , inner]) => captureGroup(inner),
726
+ );
727
+
728
+ // Named capture group (?<name>...)
729
+ const namedCaptureGroupParser: Parser<RegularExpression, string> = promiseCompose(
730
+ createTupleParser([
731
+ createExactSequenceParser('(?<'),
732
+ createTerminatedArrayParser(
733
+ parserCreatorCompose(
734
+ () => elementParser,
735
+ char => async parserContext => {
736
+ parserContext.invariant(char !== '>', 'Unexpected ">"');
737
+ return char;
738
+ },
739
+ )(),
740
+ createExactSequenceParser('>'),
741
+ ),
742
+ createParserAccessorParser(() => alternationParser),
743
+ createExactSequenceParser(')'),
744
+ ]),
745
+ ([, [nameChars], inner]) => captureGroup(inner, nameChars.join('')),
746
+ );
747
+
748
+ // Non-capture group (?:...)
749
+ const nonCaptureGroupParser: Parser<RegularExpression, string> = promiseCompose(
750
+ createTupleParser([
751
+ createExactSequenceParser('(?:'),
752
+ createParserAccessorParser(() => alternationParser),
753
+ createExactSequenceParser(')'),
754
+ ]),
755
+ ([, inner]) => inner,
756
+ );
757
+
758
+ // Lookahead markers for internal use during parsing
759
+ type LookaheadMarker = { type: 'lookahead-marker'; isPositive: boolean; inner: RegularExpression };
760
+
761
+ // Positive lookahead (?=...)
762
+ const positiveLookaheadMarkerParser: Parser<LookaheadMarker, string> = promiseCompose(
763
+ createTupleParser([
764
+ createExactSequenceParser('(?='),
765
+ createParserAccessorParser(() => alternationParser),
766
+ createExactSequenceParser(')'),
767
+ ]),
768
+ ([, inner]) => ({ type: 'lookahead-marker' as const, isPositive: true, inner }),
769
+ );
770
+
771
+ // Negative lookahead (?!...)
772
+ const negativeLookaheadMarkerParser: Parser<LookaheadMarker, string> = promiseCompose(
773
+ createTupleParser([
774
+ createExactSequenceParser('(?!'),
775
+ createParserAccessorParser(() => alternationParser),
776
+ createExactSequenceParser(')'),
777
+ ]),
778
+ ([, inner]) => ({ type: 'lookahead-marker' as const, isPositive: false, inner }),
779
+ );
780
+
781
+ const groupParser: Parser<RegularExpression, string> = createUnionParser([
782
+ namedCaptureGroupParser,
783
+ nonCaptureGroupParser,
784
+ captureGroupParser,
785
+ ]);
786
+
787
+ // Anchors
788
+ // Anchor markers for internal use during parsing
789
+ type AnchorMarker = { type: 'start-anchor-marker' } | { type: 'end-anchor-marker' };
790
+ type ParsedElement = RegularExpression | AnchorMarker | LookaheadMarker;
791
+
792
+ const startAnchorMarkerParser: Parser<AnchorMarker, string> = promiseCompose(
793
+ createExactSequenceParser('^'),
794
+ () => ({ type: 'start-anchor-marker' as const }),
795
+ );
796
+
797
+ const endAnchorMarkerParser: Parser<AnchorMarker, string> = promiseCompose(
798
+ createExactSequenceParser('$'),
799
+ () => ({ type: 'end-anchor-marker' as const }),
800
+ );
801
+
802
+ // Atom: the basic unit that can be quantified (excluding anchors)
803
+ const atomParser: Parser<RegularExpression, string> = createUnionParser([
804
+ groupParser,
805
+ characterClassParser,
806
+ escapeParser,
807
+ dotParser,
808
+ literalCharacterParser,
809
+ ]);
810
+
811
+ // Quantified atom
812
+ const quantifiedParser: Parser<RegularExpression, string> = promiseCompose(
813
+ createTupleParser([
814
+ atomParser,
815
+ createOptionalParser(quantifierParser),
816
+ ]),
817
+ ([atom, quantifier]) => {
818
+ if (quantifier === undefined) {
819
+ return atom;
820
+ }
821
+ switch (quantifier.type) {
822
+ case 'star':
823
+ return star(atom);
824
+ case 'plus':
825
+ return plus(atom);
826
+ case 'optional':
827
+ return optional(atom);
828
+ case 'repeat':
829
+ return repeat(atom, quantifier.bounds);
830
+ }
831
+ },
832
+ );
833
+
834
+ // Element in a sequence: either a quantified atom, anchor marker, or lookahead marker
835
+ const sequenceElementParser: Parser<ParsedElement, string> = createUnionParser<ParsedElement, string>([
836
+ startAnchorMarkerParser,
837
+ endAnchorMarkerParser,
838
+ positiveLookaheadMarkerParser,
839
+ negativeLookaheadMarkerParser,
840
+ quantifiedParser,
841
+ ]);
842
+
843
+ // Helper to concatenate a list of RegularExpressions (right-associative)
844
+ function concatList(parts: RegularExpression[]): RegularExpression {
845
+ if (parts.length === 0) {
846
+ return epsilon;
847
+ }
848
+ return parts.reduceRight((acc, part) => concat(part, acc));
849
+ }
850
+
851
+ // Process elements with anchor markers and lookahead markers into proper AST
852
+ // Handles anchors and lookahead as infix operators like @gruhn/regex-utils
853
+ // Precedence order (lowest to highest): union -> start-anchor -> end-anchor -> lookahead -> concat
854
+ function processElements(elements: ParsedElement[]): RegularExpression {
855
+ if (elements.length === 0) {
856
+ return epsilon;
857
+ }
858
+
859
+ // Process start anchors first (lowest precedence among infix operators)
860
+ const startAnchorIdx = elements.findIndex(e => 'type' in e && e.type === 'start-anchor-marker');
861
+ if (startAnchorIdx !== -1) {
862
+ const left = elements.slice(0, startAnchorIdx);
863
+ const right = elements.slice(startAnchorIdx + 1);
864
+ return startAnchor(processElements(left), processElements(right));
865
+ }
866
+
867
+ // Then end anchors
868
+ const endAnchorIdx = elements.findIndex(e => 'type' in e && e.type === 'end-anchor-marker');
869
+ if (endAnchorIdx !== -1) {
870
+ const left = elements.slice(0, endAnchorIdx);
871
+ const right = elements.slice(endAnchorIdx + 1);
872
+ return endAnchor(processElements(left), processElements(right));
873
+ }
874
+
875
+ // Then lookaheads (higher precedence than anchors)
876
+ const lookaheadIdx = elements.findIndex(e => 'type' in e && e.type === 'lookahead-marker');
877
+ if (lookaheadIdx !== -1) {
878
+ const marker = elements[lookaheadIdx] as LookaheadMarker;
879
+ const left = elements.slice(0, lookaheadIdx);
880
+ const right = elements.slice(lookaheadIdx + 1);
881
+ const lookaheadExpr = lookahead(marker.isPositive, marker.inner, processElements(right));
882
+ if (left.length === 0) {
883
+ return lookaheadExpr;
884
+ }
885
+ // If there's content before the lookahead, concatenate it
886
+ return concat(processElements(left), lookaheadExpr);
887
+ }
888
+
889
+ // No markers, just regular expressions - concatenate them
890
+ const regexParts = elements as RegularExpression[];
891
+ return concatList(regexParts);
892
+ }
893
+
894
+ // Concatenation: sequence of quantified atoms and anchors
895
+ const concatParser: Parser<RegularExpression, string> = promiseCompose(
896
+ createArrayParser(sequenceElementParser),
897
+ processElements,
898
+ );
899
+
900
+ // Alternation: concat ('|' concat)*
901
+ const alternationParser: Parser<RegularExpression, string> = promiseCompose(
902
+ createTupleParser([
903
+ concatParser,
904
+ createArrayParser(
905
+ promiseCompose(
906
+ createTupleParser([
907
+ createExactSequenceParser('|'),
908
+ concatParser,
909
+ ]),
910
+ ([, right]) => right,
911
+ ),
912
+ ),
913
+ ]),
914
+ ([first, rest]) => {
915
+ // Right-associative union like @gruhn/regex-utils
916
+ const allParts = [first, ...rest];
917
+ return allParts.reduceRight((acc, part) => union(part, acc));
918
+ },
919
+ );
920
+
921
+ export const regularExpressionParser: Parser<RegularExpression, string> = alternationParser;