@futpib/parser 1.0.3 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (262) hide show
  1. package/.claude/settings.local.json +24 -0
  2. package/.github/workflows/main.yml +1 -0
  3. package/build/androidPackageParser.js +30 -32
  4. package/build/arbitraryDalvikBytecode.d.ts +3 -3
  5. package/build/arbitraryDalvikBytecode.js +33 -27
  6. package/build/arbitraryDalvikExecutable.js +55 -17
  7. package/build/arbitraryJava.d.ts +31 -0
  8. package/build/arbitraryJava.js +532 -0
  9. package/build/arbitraryJavaScript.d.ts +3 -0
  10. package/build/arbitraryJavaScript.js +263 -0
  11. package/build/arbitraryJavascript.d.ts +3 -0
  12. package/build/arbitraryJavascript.js +263 -0
  13. package/build/arbitraryZig.d.ts +3 -0
  14. package/build/arbitraryZig.js +240 -0
  15. package/build/arbitraryZipStream.d.ts +1 -1
  16. package/build/arrayParser.js +72 -13
  17. package/build/backsmali.d.ts +4 -3
  18. package/build/backsmali.js +26 -6
  19. package/build/bash.d.ts +89 -0
  20. package/build/bash.js +1 -0
  21. package/build/bashParser.d.ts +6 -0
  22. package/build/bashParser.js +335 -0
  23. package/build/bashParser.test.d.ts +1 -0
  24. package/build/bashParser.test.js +343 -0
  25. package/build/bashParserEdgeCases.test.d.ts +1 -0
  26. package/build/bashParserEdgeCases.test.js +117 -0
  27. package/build/dalvikBytecodeParser/addressConversion.d.ts +110 -0
  28. package/build/dalvikBytecodeParser/addressConversion.js +334 -0
  29. package/build/dalvikBytecodeParser/formatParsers.d.ts +7 -6
  30. package/build/dalvikBytecodeParser/formatParsers.js +13 -14
  31. package/build/dalvikBytecodeParser.d.ts +60 -31
  32. package/build/dalvikBytecodeParser.js +92 -35
  33. package/build/dalvikBytecodeParser.test-d.d.ts +1 -0
  34. package/build/dalvikBytecodeParser.test-d.js +268 -0
  35. package/build/dalvikBytecodeUnparser/formatUnparsers.d.ts +9 -8
  36. package/build/dalvikBytecodeUnparser/formatUnparsers.js +13 -12
  37. package/build/dalvikBytecodeUnparser.d.ts +2 -2
  38. package/build/dalvikBytecodeUnparser.js +23 -23
  39. package/build/dalvikBytecodeUnparser.test.js +7 -7
  40. package/build/dalvikExecutable.d.ts +3 -3
  41. package/build/dalvikExecutable.test-d.d.ts +1 -0
  42. package/build/dalvikExecutable.test-d.js +59 -0
  43. package/build/dalvikExecutableParser/typedNumbers.d.ts +18 -0
  44. package/build/dalvikExecutableParser/typedNumbers.js +3 -0
  45. package/build/dalvikExecutableParser.d.ts +2 -1
  46. package/build/dalvikExecutableParser.js +96 -77
  47. package/build/dalvikExecutableParser.test.js +24 -3
  48. package/build/dalvikExecutableParserAgainstSmaliParser.test.js +3 -0
  49. package/build/dalvikExecutableUnparser/poolScanners.d.ts +2 -2
  50. package/build/dalvikExecutableUnparser/sectionUnparsers.d.ts +3 -3
  51. package/build/dalvikExecutableUnparser/sectionUnparsers.js +26 -11
  52. package/build/dalvikExecutableUnparser.d.ts +2 -2
  53. package/build/dalvikExecutableUnparser.test.js +2 -1
  54. package/build/disjunctionParser.d.ts +5 -3
  55. package/build/disjunctionParser.js +79 -17
  56. package/build/disjunctionParser.test-d.d.ts +1 -0
  57. package/build/disjunctionParser.test-d.js +72 -0
  58. package/build/elementSwitchParser.d.ts +4 -0
  59. package/build/{exactElementSwitchParser.js → elementSwitchParser.js} +3 -4
  60. package/build/elementSwitchParser.test-d.d.ts +1 -0
  61. package/build/elementSwitchParser.test-d.js +44 -0
  62. package/build/exactSequenceParser.d.ts +4 -2
  63. package/build/exactSequenceParser.test-d.d.ts +1 -0
  64. package/build/exactSequenceParser.test-d.js +36 -0
  65. package/build/fetchCid.js +2 -66
  66. package/build/index.d.ts +25 -2
  67. package/build/index.js +23 -1
  68. package/build/index.test.js +16 -1
  69. package/build/inputReader.d.ts +10 -0
  70. package/build/inputReader.js +36 -0
  71. package/build/java.d.ts +502 -0
  72. package/build/java.js +2 -0
  73. package/build/javaKeyStoreParser.js +14 -17
  74. package/build/javaParser.d.ts +51 -0
  75. package/build/javaParser.js +1538 -0
  76. package/build/javaParser.test.d.ts +1 -0
  77. package/build/javaParser.test.js +1287 -0
  78. package/build/javaScript.d.ts +35 -0
  79. package/build/javaScript.js +1 -0
  80. package/build/javaScriptParser.d.ts +9 -0
  81. package/build/javaScriptParser.js +34 -0
  82. package/build/javaScriptUnparser.d.ts +3 -0
  83. package/build/javaScriptUnparser.js +4 -0
  84. package/build/javaScriptUnparser.test.d.ts +1 -0
  85. package/build/javaScriptUnparser.test.js +24 -0
  86. package/build/javaUnparser.d.ts +2 -0
  87. package/build/javaUnparser.js +519 -0
  88. package/build/javaUnparser.test.d.ts +1 -0
  89. package/build/javaUnparser.test.js +24 -0
  90. package/build/javascript.d.ts +35 -0
  91. package/build/javascript.js +1 -0
  92. package/build/javascriptParser.d.ts +9 -0
  93. package/build/javascriptParser.js +34 -0
  94. package/build/javascriptUnparser.d.ts +3 -0
  95. package/build/javascriptUnparser.js +4 -0
  96. package/build/javascriptUnparser.test.d.ts +1 -0
  97. package/build/javascriptUnparser.test.js +24 -0
  98. package/build/jsonParser.js +2 -12
  99. package/build/lazyMessageError.d.ts +3 -0
  100. package/build/lookaheadParser.js +60 -3
  101. package/build/negativeLookaheadParser.js +70 -11
  102. package/build/nonEmptyArrayParser.js +72 -13
  103. package/build/objectParser.d.ts +12 -0
  104. package/build/objectParser.js +31 -0
  105. package/build/objectParser.test-d.d.ts +1 -0
  106. package/build/objectParser.test-d.js +112 -0
  107. package/build/objectParser.test.d.ts +1 -0
  108. package/build/objectParser.test.js +55 -0
  109. package/build/optionalParser.js +69 -10
  110. package/build/parser.d.ts +4 -0
  111. package/build/parser.js +3 -1
  112. package/build/parser.test.js +114 -1
  113. package/build/parserConsumedSequenceParser.js +66 -7
  114. package/build/parserContext.d.ts +6 -0
  115. package/build/parserContext.js +20 -11
  116. package/build/parserError.d.ts +119 -27
  117. package/build/parserError.js +16 -8
  118. package/build/regexpParser.d.ts +2 -0
  119. package/build/regexpParser.js +101 -0
  120. package/build/regexpParser.test.d.ts +1 -0
  121. package/build/regexpParser.test.js +114 -0
  122. package/build/regularExpression.d.ts +63 -0
  123. package/build/regularExpression.js +1 -0
  124. package/build/regularExpressionParser.d.ts +3 -0
  125. package/build/regularExpressionParser.js +600 -0
  126. package/build/regularExpressionParser.test.d.ts +1 -0
  127. package/build/regularExpressionParser.test.js +89 -0
  128. package/build/separatedArrayParser.js +73 -14
  129. package/build/separatedNonEmptyArrayParser.js +73 -14
  130. package/build/sliceBoundedParser.js +62 -5
  131. package/build/smaliParser.d.ts +7 -7
  132. package/build/smaliParser.js +185 -268
  133. package/build/smaliParser.test.js +58 -0
  134. package/build/stringEscapes.d.ts +5 -0
  135. package/build/stringEscapes.js +244 -0
  136. package/build/symbolicExpression.d.ts +29 -0
  137. package/build/symbolicExpression.js +1 -0
  138. package/build/symbolicExpressionParser.d.ts +4 -0
  139. package/build/symbolicExpressionParser.js +123 -0
  140. package/build/symbolicExpressionParser.test.d.ts +1 -0
  141. package/build/symbolicExpressionParser.test.js +289 -0
  142. package/build/terminatedArrayParser.js +113 -38
  143. package/build/terminatedArrayParser.test.js +4 -2
  144. package/build/tupleParser.d.ts +7 -15
  145. package/build/tupleParser.js +1 -0
  146. package/build/unionParser.d.ts +5 -3
  147. package/build/unionParser.js +7 -2
  148. package/build/unionParser.test-d.d.ts +1 -0
  149. package/build/unionParser.test-d.js +72 -0
  150. package/build/unionParser.test.js +10 -11
  151. package/build/zig.d.ts +280 -0
  152. package/build/zig.js +2 -0
  153. package/build/zigParser.d.ts +3 -0
  154. package/build/zigParser.js +1119 -0
  155. package/build/zigParser.test.d.ts +1 -0
  156. package/build/zigParser.test.js +1590 -0
  157. package/build/zigUnparser.d.ts +2 -0
  158. package/build/zigUnparser.js +460 -0
  159. package/build/zigUnparser.test.d.ts +1 -0
  160. package/build/zigUnparser.test.js +24 -0
  161. package/build/zipParser.js +19 -32
  162. package/build/zipUnparser.js +19 -7
  163. package/build/zipUnparser.test.js +1 -1
  164. package/node_modules-@types/s-expression/index.d.ts +5 -0
  165. package/package.json +25 -6
  166. package/src/androidPackageParser.ts +33 -60
  167. package/src/arbitraryDalvikBytecode.ts +39 -31
  168. package/src/arbitraryDalvikExecutable.ts +65 -20
  169. package/src/arbitraryJava.ts +804 -0
  170. package/src/arbitraryJavaScript.ts +410 -0
  171. package/src/arbitraryZig.ts +380 -0
  172. package/src/arrayParser.ts +1 -3
  173. package/src/backsmali.ts +35 -4
  174. package/src/bash.ts +127 -0
  175. package/src/bashParser.test.ts +590 -0
  176. package/src/bashParser.ts +498 -0
  177. package/src/dalvikBytecodeParser/addressConversion.ts +496 -0
  178. package/src/dalvikBytecodeParser/formatParsers.ts +19 -29
  179. package/src/dalvikBytecodeParser.test-d.ts +310 -0
  180. package/src/dalvikBytecodeParser.ts +194 -69
  181. package/src/dalvikBytecodeUnparser/formatUnparsers.ts +27 -26
  182. package/src/dalvikBytecodeUnparser.test.ts +7 -7
  183. package/src/dalvikBytecodeUnparser.ts +31 -30
  184. package/src/dalvikExecutable.test-d.ts +132 -0
  185. package/src/dalvikExecutable.ts +3 -3
  186. package/src/dalvikExecutableParser/typedNumbers.ts +11 -0
  187. package/src/dalvikExecutableParser.test.ts +37 -3
  188. package/src/dalvikExecutableParser.test.ts.md +163 -2
  189. package/src/dalvikExecutableParser.test.ts.snap +0 -0
  190. package/src/dalvikExecutableParser.ts +121 -139
  191. package/src/dalvikExecutableParserAgainstSmaliParser.test.ts +4 -0
  192. package/src/dalvikExecutableUnparser/poolScanners.ts +6 -6
  193. package/src/dalvikExecutableUnparser/sectionUnparsers.ts +38 -14
  194. package/src/dalvikExecutableUnparser.test.ts +3 -2
  195. package/src/dalvikExecutableUnparser.ts +4 -4
  196. package/src/disjunctionParser.test-d.ts +105 -0
  197. package/src/disjunctionParser.ts +18 -15
  198. package/src/elementSwitchParser.test-d.ts +74 -0
  199. package/src/elementSwitchParser.ts +51 -0
  200. package/src/exactSequenceParser.test-d.ts +43 -0
  201. package/src/exactSequenceParser.ts +13 -8
  202. package/src/fetchCid.ts +2 -76
  203. package/src/index.test.ts +22 -1
  204. package/src/index.ts +119 -2
  205. package/src/inputReader.ts +53 -0
  206. package/src/java.ts +708 -0
  207. package/src/javaKeyStoreParser.ts +18 -32
  208. package/src/javaParser.test.ts +1592 -0
  209. package/src/javaParser.ts +2640 -0
  210. package/src/javaScript.ts +36 -0
  211. package/src/javaScriptParser.ts +57 -0
  212. package/src/javaScriptUnparser.test.ts +37 -0
  213. package/src/javaScriptUnparser.ts +7 -0
  214. package/src/javaUnparser.test.ts +37 -0
  215. package/src/javaUnparser.ts +640 -0
  216. package/src/jsonParser.ts +6 -27
  217. package/src/lookaheadParser.ts +2 -6
  218. package/src/negativeLookaheadParser.ts +1 -3
  219. package/src/nonEmptyArrayParser.ts +1 -3
  220. package/src/objectParser.test-d.ts +152 -0
  221. package/src/objectParser.test.ts +71 -0
  222. package/src/objectParser.ts +69 -0
  223. package/src/optionalParser.ts +1 -3
  224. package/src/parser.test.ts +151 -4
  225. package/src/parser.ts +11 -1
  226. package/src/parserConsumedSequenceParser.ts +2 -4
  227. package/src/parserContext.ts +26 -11
  228. package/src/parserError.ts +17 -3
  229. package/src/regexpParser.test.ts +264 -0
  230. package/src/regexpParser.ts +126 -0
  231. package/src/regularExpression.ts +24 -0
  232. package/src/regularExpressionParser.test.ts +102 -0
  233. package/src/regularExpressionParser.ts +920 -0
  234. package/src/separatedArrayParser.ts +1 -3
  235. package/src/separatedNonEmptyArrayParser.ts +1 -3
  236. package/src/sliceBoundedParser.test.ts +2 -2
  237. package/src/sliceBoundedParser.ts +15 -19
  238. package/src/smaliParser.test.ts +64 -0
  239. package/src/smaliParser.test.ts.md +12 -12
  240. package/src/smaliParser.test.ts.snap +0 -0
  241. package/src/smaliParser.ts +246 -534
  242. package/src/stringEscapes.ts +253 -0
  243. package/src/symbolicExpression.ts +17 -0
  244. package/src/symbolicExpressionParser.test.ts +466 -0
  245. package/src/symbolicExpressionParser.ts +190 -0
  246. package/src/terminatedArrayParser.test.ts +9 -6
  247. package/src/terminatedArrayParser.ts +25 -29
  248. package/src/tupleParser.ts +21 -18
  249. package/src/unionParser.test-d.ts +105 -0
  250. package/src/unionParser.test.ts +18 -17
  251. package/src/unionParser.ts +28 -16
  252. package/src/zig.ts +411 -0
  253. package/src/zigParser.test.ts +1693 -0
  254. package/src/zigParser.ts +1745 -0
  255. package/src/zigUnparser.test.ts +37 -0
  256. package/src/zigUnparser.ts +615 -0
  257. package/src/zipParser.ts +20 -56
  258. package/src/zipUnparser.test.ts +1 -1
  259. package/src/zipUnparser.ts +22 -7
  260. package/tsconfig.json +2 -2
  261. package/build/exactElementSwitchParser.d.ts +0 -3
  262. package/src/exactElementSwitchParser.ts +0 -41
@@ -0,0 +1,600 @@
1
+ import { createUnionParser } from './unionParser.js';
2
+ import { createExactSequenceParser } from './exactSequenceParser.js';
3
+ import { promiseCompose } from './promiseCompose.js';
4
+ import { createTupleParser } from './tupleParser.js';
5
+ import { createArrayParser } from './arrayParser.js';
6
+ import { createParserAccessorParser } from './parserAccessorParser.js';
7
+ import { createElementParser } from './elementParser.js';
8
+ import { parserCreatorCompose } from './parserCreatorCompose.js';
9
+ import { createOptionalParser } from './optionalParser.js';
10
+ import { createFixedLengthSequenceParser } from './fixedLengthSequenceParser.js';
11
+ import { createTerminatedArrayParser } from './terminatedArrayParser.js';
12
+ import { createDisjunctionParser } from './disjunctionParser.js';
13
+ import { createNegativeLookaheadParser } from './negativeLookaheadParser.js';
14
+ import { createObjectParser } from './objectParser.js';
15
+ // CharacterSet helpers
16
+ const emptyCharacterSet = { type: 'empty' };
17
+ function codePointRangeIsEmpty(range) {
18
+ return range.start > range.end;
19
+ }
20
+ function codePointRangeIsStrictlyBefore(rangeA, rangeB) {
21
+ return rangeA.end + 1 < rangeB.start;
22
+ }
23
+ function codePointRangeIsStrictlyAfter(rangeA, rangeB) {
24
+ return codePointRangeIsStrictlyBefore(rangeB, rangeA);
25
+ }
26
+ function codePointRangeLeastUpperBound(rangeA, rangeB) {
27
+ if (codePointRangeIsEmpty(rangeA))
28
+ return rangeB;
29
+ if (codePointRangeIsEmpty(rangeB))
30
+ return rangeA;
31
+ return {
32
+ start: Math.min(rangeA.start, rangeB.start),
33
+ end: Math.max(rangeA.end, rangeB.end),
34
+ };
35
+ }
36
+ function codePointRangeStrictlyDisjoint(rangeA, rangeB) {
37
+ return codePointRangeIsStrictlyBefore(rangeA, rangeB) || codePointRangeIsStrictlyAfter(rangeA, rangeB);
38
+ }
39
+ function characterSetNode(range, left, right) {
40
+ return { type: 'node', range, left, right };
41
+ }
42
+ function* characterSetGetRanges(set) {
43
+ if (set.type === 'node') {
44
+ yield* characterSetGetRanges(set.left);
45
+ yield set.range;
46
+ yield* characterSetGetRanges(set.right);
47
+ }
48
+ }
49
+ function characterSetExtractOverlap(set, range) {
50
+ if (set.type === 'empty') {
51
+ return { restCharSet: set, extendedRange: range };
52
+ }
53
+ let extendedRange = range;
54
+ let newLeft = set.left;
55
+ let newRight = set.right;
56
+ if (range.start < set.range.start) {
57
+ const resultLeft = characterSetExtractOverlap(set.left, range);
58
+ extendedRange = codePointRangeLeastUpperBound(extendedRange, resultLeft.extendedRange);
59
+ newLeft = resultLeft.restCharSet;
60
+ }
61
+ if (range.end > set.range.end) {
62
+ const resultRight = characterSetExtractOverlap(set.right, range);
63
+ extendedRange = codePointRangeLeastUpperBound(extendedRange, resultRight.extendedRange);
64
+ newRight = resultRight.restCharSet;
65
+ }
66
+ if (codePointRangeStrictlyDisjoint(range, set.range)) {
67
+ return {
68
+ extendedRange,
69
+ restCharSet: characterSetNode(set.range, newLeft, newRight),
70
+ };
71
+ }
72
+ return {
73
+ extendedRange: codePointRangeLeastUpperBound(set.range, extendedRange),
74
+ restCharSet: characterSetUnion(newLeft, newRight),
75
+ };
76
+ }
77
+ function characterSetInsertRange(set, range) {
78
+ if (codePointRangeIsEmpty(range)) {
79
+ return set;
80
+ }
81
+ if (set.type === 'empty') {
82
+ return characterSetNode(range, emptyCharacterSet, emptyCharacterSet);
83
+ }
84
+ if (codePointRangeIsStrictlyBefore(range, set.range)) {
85
+ return characterSetNode(set.range, characterSetInsertRange(set.left, range), set.right);
86
+ }
87
+ if (codePointRangeIsStrictlyAfter(range, set.range)) {
88
+ return characterSetNode(set.range, set.left, characterSetInsertRange(set.right, range));
89
+ }
90
+ const resultLeft = characterSetExtractOverlap(set.left, range);
91
+ const resultRight = characterSetExtractOverlap(set.right, range);
92
+ const resultRange = [set.range, resultLeft.extendedRange, resultRight.extendedRange].reduce(codePointRangeLeastUpperBound);
93
+ if (codePointRangeIsEmpty(resultRange)) {
94
+ return emptyCharacterSet;
95
+ }
96
+ return characterSetNode(resultRange, resultLeft.restCharSet, resultRight.restCharSet);
97
+ }
98
+ function characterSetUnion(setA, setB) {
99
+ return [...characterSetGetRanges(setB)].reduce(characterSetInsertRange, setA);
100
+ }
101
+ function codePointRangeSplitAt(point, range) {
102
+ return [
103
+ { start: range.start, end: Math.min(range.end, point) },
104
+ { start: Math.max(range.start, point + 1), end: range.end },
105
+ ];
106
+ }
107
+ function codePointRangeUnion(rangeA, rangeB) {
108
+ if (codePointRangeIsEmpty(rangeA) && codePointRangeIsEmpty(rangeB))
109
+ return [];
110
+ if (codePointRangeIsEmpty(rangeA))
111
+ return [rangeB];
112
+ if (codePointRangeIsEmpty(rangeB))
113
+ return [rangeA];
114
+ if (rangeA.end + 1 < rangeB.start)
115
+ return [rangeA, rangeB];
116
+ if (rangeB.end + 1 < rangeA.start)
117
+ return [rangeB, rangeA];
118
+ return [{
119
+ start: Math.min(rangeA.start, rangeB.start),
120
+ end: Math.max(rangeA.end, rangeB.end),
121
+ }];
122
+ }
123
+ function codePointRangeDifference(rangeA, rangeB) {
124
+ const [before, restRangeA] = codePointRangeSplitAt(rangeB.start - 1, rangeA);
125
+ const [, after] = codePointRangeSplitAt(rangeB.end, restRangeA);
126
+ return codePointRangeUnion(before, after);
127
+ }
128
+ function characterSetDeleteRange(set, range) {
129
+ if (codePointRangeIsEmpty(range)) {
130
+ return set;
131
+ }
132
+ if (set.type === 'empty') {
133
+ return emptyCharacterSet;
134
+ }
135
+ const [rangeBeforeStart] = codePointRangeSplitAt(set.range.start - 1, range);
136
+ const [rangeRest2, rangeAfterEnd] = codePointRangeSplitAt(set.range.end, range);
137
+ const newLeft = characterSetDeleteRange(set.left, rangeBeforeStart);
138
+ const newRight = characterSetDeleteRange(set.right, rangeAfterEnd);
139
+ const setRangeRest = codePointRangeDifference(set.range, rangeRest2);
140
+ if (setRangeRest.length === 0) {
141
+ return characterSetUnion(newLeft, newRight);
142
+ }
143
+ if (setRangeRest.length === 1) {
144
+ return characterSetNode(setRangeRest[0], newLeft, newRight);
145
+ }
146
+ // setRangeRest.length === 2
147
+ return characterSetUnion(characterSetInsertRange(newLeft, setRangeRest[0]), characterSetInsertRange(newRight, setRangeRest[1]));
148
+ }
149
+ function characterSetDifference(setA, setB) {
150
+ return [...characterSetGetRanges(setB)].reduce(characterSetDeleteRange, setA);
151
+ }
152
+ function characterSetFromRange(range) {
153
+ if (codePointRangeIsEmpty(range)) {
154
+ return emptyCharacterSet;
155
+ }
156
+ return characterSetNode(range, emptyCharacterSet, emptyCharacterSet);
157
+ }
158
+ function characterSetSingleton(char) {
159
+ const codePoint = char.codePointAt(0);
160
+ return characterSetFromRange({ start: codePoint, end: codePoint });
161
+ }
162
+ function characterSetCharRange(startChar, endChar) {
163
+ const start = startChar.codePointAt(0);
164
+ const end = endChar.codePointAt(0);
165
+ return characterSetFromRange({ start, end });
166
+ }
167
+ function characterSetFromArray(chars) {
168
+ return chars.map(characterSetSingleton).reduce(characterSetUnion, emptyCharacterSet);
169
+ }
170
+ function characterSetComplement(set) {
171
+ return characterSetDifference(alphabet, set);
172
+ }
173
+ // Pre-defined character sets
174
+ const alphabet = characterSetDifference(characterSetFromRange({ start: 0, end: 0x10FFFF }), characterSetFromArray(['\r', '\n', '\u2028', '\u2029']));
175
+ const wildcardCharacterSet = characterSetDifference(alphabet, characterSetFromArray(['\r', '\n', '\u2028', '\u2029']));
176
+ const digitChars = characterSetCharRange('0', '9');
177
+ const nonDigitChars = characterSetComplement(digitChars);
178
+ const wordChars = [
179
+ characterSetCharRange('a', 'z'),
180
+ characterSetCharRange('A', 'Z'),
181
+ characterSetCharRange('0', '9'),
182
+ characterSetSingleton('_'),
183
+ ].reduce(characterSetUnion);
184
+ const nonWordChars = characterSetComplement(wordChars);
185
+ const whiteSpaceChars = [
186
+ characterSetSingleton('\f'),
187
+ characterSetSingleton('\n'),
188
+ characterSetSingleton('\r'),
189
+ characterSetSingleton('\t'),
190
+ characterSetSingleton('\v'),
191
+ characterSetSingleton('\u0020'),
192
+ characterSetSingleton('\u00a0'),
193
+ characterSetSingleton('\u1680'),
194
+ characterSetCharRange('\u2000', '\u200a'),
195
+ characterSetSingleton('\u2028'),
196
+ characterSetSingleton('\u2029'),
197
+ characterSetSingleton('\u202f'),
198
+ characterSetSingleton('\u205f'),
199
+ characterSetSingleton('\u3000'),
200
+ characterSetSingleton('\ufeff'),
201
+ ].reduce(characterSetUnion);
202
+ const nonWhiteSpaceChars = characterSetComplement(whiteSpaceChars);
203
+ // AST constructors
204
+ const epsilon = { type: 'epsilon' };
205
+ function literal(charset) {
206
+ return { type: 'literal', charset };
207
+ }
208
+ function concat(left, right) {
209
+ return { type: 'concat', left, right };
210
+ }
211
+ function union(left, right) {
212
+ return { type: 'union', left, right };
213
+ }
214
+ function star(inner) {
215
+ return { type: 'star', inner };
216
+ }
217
+ function plus(inner) {
218
+ return { type: 'plus', inner };
219
+ }
220
+ function optional(inner) {
221
+ return { type: 'optional', inner };
222
+ }
223
+ function repeat(inner, bounds) {
224
+ return { type: 'repeat', inner, bounds };
225
+ }
226
+ function captureGroup(inner, name) {
227
+ if (name === undefined) {
228
+ return { type: 'capture-group', inner };
229
+ }
230
+ return { type: 'capture-group', inner, name };
231
+ }
232
+ function lookahead(isPositive, inner, right) {
233
+ return { type: 'lookahead', isPositive, inner, right };
234
+ }
235
+ function startAnchor(left, right) {
236
+ return { type: 'start-anchor', left, right };
237
+ }
238
+ function endAnchor(left, right) {
239
+ return { type: 'end-anchor', left, right };
240
+ }
241
+ // Parser implementation
242
+ const elementParser = createElementParser();
243
+ const metaCharacters = new Set(['\\', '^', '$', '.', '|', '?', '*', '+', '(', ')', '[', ']', '{', '}']);
244
+ // Escape sequences for control characters
245
+ const escapeNParser = promiseCompose(createExactSequenceParser('\\n'), () => literal(characterSetSingleton('\n')));
246
+ const escapeRParser = promiseCompose(createExactSequenceParser('\\r'), () => literal(characterSetSingleton('\r')));
247
+ const escapeTParser = promiseCompose(createExactSequenceParser('\\t'), () => literal(characterSetSingleton('\t')));
248
+ const escapeFParser = promiseCompose(createExactSequenceParser('\\f'), () => literal(characterSetSingleton('\f')));
249
+ const escapeVParser = promiseCompose(createExactSequenceParser('\\v'), () => literal(characterSetSingleton('\v')));
250
+ const escape0Parser = promiseCompose(createExactSequenceParser('\\0'), () => literal(characterSetSingleton('\0')));
251
+ // Character class escapes
252
+ const escapeDigitParser = promiseCompose(createExactSequenceParser('\\d'), () => literal(digitChars));
253
+ const escapeNonDigitParser = promiseCompose(createExactSequenceParser('\\D'), () => literal(nonDigitChars));
254
+ const escapeWordParser = promiseCompose(createExactSequenceParser('\\w'), () => literal(wordChars));
255
+ const escapeNonWordParser = promiseCompose(createExactSequenceParser('\\W'), () => literal(nonWordChars));
256
+ const escapeSpaceParser = promiseCompose(createExactSequenceParser('\\s'), () => literal(whiteSpaceChars));
257
+ const escapeNonSpaceParser = promiseCompose(createExactSequenceParser('\\S'), () => literal(nonWhiteSpaceChars));
258
+ // Hex escape \xHH
259
+ const escapeHexParser = promiseCompose(createTupleParser([
260
+ createExactSequenceParser('\\x'),
261
+ createFixedLengthSequenceParser(2),
262
+ ]), ([, hexCode]) => literal(characterSetSingleton(String.fromCharCode(Number.parseInt(hexCode, 16)))));
263
+ // Unicode escape \uHHHH
264
+ const escapeUnicodeParser = promiseCompose(createTupleParser([
265
+ createExactSequenceParser('\\u'),
266
+ createFixedLengthSequenceParser(4),
267
+ ]), ([, hexCode]) => literal(characterSetSingleton(String.fromCharCode(Number.parseInt(hexCode, 16)))));
268
+ // Escaped metacharacter (e.g., \., \*, etc.)
269
+ const escapeMetacharacterParser = promiseCompose(createTupleParser([
270
+ createExactSequenceParser('\\'),
271
+ elementParser,
272
+ ]), ([, char]) => literal(characterSetSingleton(char)));
273
+ // All escape sequences - use createDisjunctionParser to try specific escapes first
274
+ const escapeParser = createDisjunctionParser([
275
+ escapeNParser,
276
+ escapeRParser,
277
+ escapeTParser,
278
+ escapeFParser,
279
+ escapeVParser,
280
+ escape0Parser,
281
+ escapeDigitParser,
282
+ escapeNonDigitParser,
283
+ escapeWordParser,
284
+ escapeNonWordParser,
285
+ escapeSpaceParser,
286
+ escapeNonSpaceParser,
287
+ escapeHexParser,
288
+ escapeUnicodeParser,
289
+ escapeMetacharacterParser, // Must be last - matches any escaped char
290
+ ]);
291
+ // Dot (matches any character except newline)
292
+ const dotParser = promiseCompose(createExactSequenceParser('.'), () => literal(wildcardCharacterSet));
293
+ // Literal character (non-metacharacter)
294
+ const literalCharacterParser = parserCreatorCompose(() => elementParser, char => async (parserContext) => {
295
+ parserContext.invariant(!metaCharacters.has(char), 'Unexpected metacharacter "%s"', char);
296
+ return literal(characterSetSingleton(char));
297
+ })();
298
+ // Character class internals
299
+ // Character in a character class (different rules than outside)
300
+ const charClassMetaCharacters = new Set(['\\', ']', '^', '-']);
301
+ // Escape sequences inside character class (returns CharacterSet)
302
+ const charClassEscapeNParser = promiseCompose(createExactSequenceParser('\\n'), () => characterSetSingleton('\n'));
303
+ const charClassEscapeRParser = promiseCompose(createExactSequenceParser('\\r'), () => characterSetSingleton('\r'));
304
+ const charClassEscapeTParser = promiseCompose(createExactSequenceParser('\\t'), () => characterSetSingleton('\t'));
305
+ const charClassEscapeFParser = promiseCompose(createExactSequenceParser('\\f'), () => characterSetSingleton('\f'));
306
+ const charClassEscapeVParser = promiseCompose(createExactSequenceParser('\\v'), () => characterSetSingleton('\v'));
307
+ const charClassEscape0Parser = promiseCompose(createExactSequenceParser('\\0'), () => characterSetSingleton('\0'));
308
+ const charClassEscapeDigitParser = promiseCompose(createExactSequenceParser('\\d'), () => digitChars);
309
+ const charClassEscapeNonDigitParser = promiseCompose(createExactSequenceParser('\\D'), () => nonDigitChars);
310
+ const charClassEscapeWordParser = promiseCompose(createExactSequenceParser('\\w'), () => wordChars);
311
+ const charClassEscapeNonWordParser = promiseCompose(createExactSequenceParser('\\W'), () => nonWordChars);
312
+ const charClassEscapeSpaceParser = promiseCompose(createExactSequenceParser('\\s'), () => whiteSpaceChars);
313
+ const charClassEscapeNonSpaceParser = promiseCompose(createExactSequenceParser('\\S'), () => nonWhiteSpaceChars);
314
+ const charClassEscapeHexParser = promiseCompose(createTupleParser([
315
+ createExactSequenceParser('\\x'),
316
+ createFixedLengthSequenceParser(2),
317
+ ]), ([, hexCode]) => characterSetSingleton(String.fromCharCode(Number.parseInt(hexCode, 16))));
318
+ const charClassEscapeUnicodeParser = promiseCompose(createTupleParser([
319
+ createExactSequenceParser('\\u'),
320
+ createFixedLengthSequenceParser(4),
321
+ ]), ([, hexCode]) => characterSetSingleton(String.fromCharCode(Number.parseInt(hexCode, 16))));
322
+ const charClassEscapeMetacharacterParser = promiseCompose(createTupleParser([
323
+ createExactSequenceParser('\\'),
324
+ elementParser,
325
+ ]), ([, char]) => characterSetSingleton(char));
326
+ // Use createDisjunctionParser to try specific escapes before generic metacharacter escape
327
+ const charClassEscapeParser = createDisjunctionParser([
328
+ charClassEscapeNParser,
329
+ charClassEscapeRParser,
330
+ charClassEscapeTParser,
331
+ charClassEscapeFParser,
332
+ charClassEscapeVParser,
333
+ charClassEscape0Parser,
334
+ charClassEscapeDigitParser,
335
+ charClassEscapeNonDigitParser,
336
+ charClassEscapeWordParser,
337
+ charClassEscapeNonWordParser,
338
+ charClassEscapeSpaceParser,
339
+ charClassEscapeNonSpaceParser,
340
+ charClassEscapeHexParser,
341
+ charClassEscapeUnicodeParser,
342
+ charClassEscapeMetacharacterParser, // Must be last - matches any escaped char
343
+ ]);
344
+ // Single character (not escape, not ], not -)
345
+ const charClassLiteralParser = parserCreatorCompose(() => elementParser, char => async (parserContext) => {
346
+ parserContext.invariant(!charClassMetaCharacters.has(char), 'Unexpected character class metacharacter "%s"', char);
347
+ return characterSetSingleton(char);
348
+ })();
349
+ // Single char in character class (escape or literal) - returns the character string for range checking
350
+ const charClassSingleCharParser = createUnionParser([
351
+ // Escape sequences that produce single chars
352
+ promiseCompose(createExactSequenceParser('\\n'), () => '\n'),
353
+ promiseCompose(createExactSequenceParser('\\r'), () => '\r'),
354
+ promiseCompose(createExactSequenceParser('\\t'), () => '\t'),
355
+ promiseCompose(createExactSequenceParser('\\f'), () => '\f'),
356
+ promiseCompose(createExactSequenceParser('\\v'), () => '\v'),
357
+ promiseCompose(createExactSequenceParser('\\0'), () => '\0'),
358
+ promiseCompose(createTupleParser([
359
+ createExactSequenceParser('\\x'),
360
+ createFixedLengthSequenceParser(2),
361
+ ]), ([, hexCode]) => String.fromCharCode(Number.parseInt(hexCode, 16))),
362
+ promiseCompose(createTupleParser([
363
+ createExactSequenceParser('\\u'),
364
+ createFixedLengthSequenceParser(4),
365
+ ]), ([, hexCode]) => String.fromCharCode(Number.parseInt(hexCode, 16))),
366
+ promiseCompose(createTupleParser([
367
+ createExactSequenceParser('\\'),
368
+ elementParser,
369
+ ]), ([, char]) => char),
370
+ // Literal char (not metacharacter, not -)
371
+ parserCreatorCompose(() => elementParser, char => async (parserContext) => {
372
+ parserContext.invariant(!charClassMetaCharacters.has(char) && char !== '-', 'Unexpected character "%s"', char);
373
+ return char;
374
+ })(),
375
+ ]);
376
+ // Character range (a-z)
377
+ const charClassRangeParser = promiseCompose(createTupleParser([
378
+ charClassSingleCharParser,
379
+ createExactSequenceParser('-'),
380
+ charClassSingleCharParser,
381
+ ]), ([startChar, , endChar]) => characterSetCharRange(startChar, endChar));
382
+ // Character class element: range, escape (for \d, \w, etc.), or single char
383
+ const charClassElementParser = createDisjunctionParser([
384
+ charClassRangeParser,
385
+ charClassEscapeParser,
386
+ charClassLiteralParser,
387
+ // Literal hyphen at end or after negation
388
+ promiseCompose(createTupleParser([
389
+ createExactSequenceParser('-'),
390
+ createNegativeLookaheadParser(createExactSequenceParser(']')),
391
+ ]), () => characterSetSingleton('-')),
392
+ ]);
393
+ // Character class [...]
394
+ const characterClassParser = promiseCompose(createTupleParser([
395
+ createExactSequenceParser('['),
396
+ createOptionalParser(createExactSequenceParser('^')),
397
+ createTerminatedArrayParser(charClassElementParser, createExactSequenceParser(']')),
398
+ ]), ([, negation, [elements]]) => {
399
+ let charset = elements.reduce((acc, el) => characterSetUnion(acc, el), emptyCharacterSet);
400
+ if (negation !== undefined) {
401
+ charset = characterSetComplement(charset);
402
+ }
403
+ return literal(charset);
404
+ });
405
+ const starQuantifierParser = createObjectParser({
406
+ type: 'star',
407
+ _marker: createExactSequenceParser('*'),
408
+ });
409
+ const plusQuantifierParser = createObjectParser({
410
+ type: 'plus',
411
+ _marker: createExactSequenceParser('+'),
412
+ });
413
+ const optionalQuantifierParser = createObjectParser({
414
+ type: 'optional',
415
+ _marker: createExactSequenceParser('?'),
416
+ });
417
+ // Parse a number for quantifiers
418
+ const numberParser = parserCreatorCompose(() => createArrayParser(parserCreatorCompose(() => elementParser, char => async (parserContext) => {
419
+ parserContext.invariant(char >= '0' && char <= '9', 'Expected digit, got "%s"', char);
420
+ return char;
421
+ })()), digits => async (parserContext) => {
422
+ parserContext.invariant(digits.length > 0, 'Expected at least one digit');
423
+ return Number.parseInt(digits.join(''), 10);
424
+ })();
425
+ // {n}, {n,}, {n,m}
426
+ const braceQuantifierParser = promiseCompose(createTupleParser([
427
+ createExactSequenceParser('{'),
428
+ numberParser,
429
+ createOptionalParser(createTupleParser([
430
+ createExactSequenceParser(','),
431
+ createOptionalParser(numberParser),
432
+ ])),
433
+ createExactSequenceParser('}'),
434
+ ]), ([, min, comma]) => {
435
+ if (comma === undefined) {
436
+ // {n} - exactly n
437
+ return { type: 'repeat', bounds: min };
438
+ }
439
+ const [, max] = comma;
440
+ if (max === undefined) {
441
+ // {n,} - at least n
442
+ return { type: 'repeat', bounds: { min } };
443
+ }
444
+ // {n,m} - between n and m
445
+ return { type: 'repeat', bounds: { min, max } };
446
+ });
447
+ const quantifierParser = createUnionParser([
448
+ starQuantifierParser,
449
+ plusQuantifierParser,
450
+ optionalQuantifierParser,
451
+ braceQuantifierParser,
452
+ ]);
453
+ // Groups
454
+ // Capture group (...)
455
+ const captureGroupParser = promiseCompose(createTupleParser([
456
+ createExactSequenceParser('('),
457
+ createNegativeLookaheadParser(createExactSequenceParser('?')),
458
+ createParserAccessorParser(() => alternationParser),
459
+ createExactSequenceParser(')'),
460
+ ]), ([, , inner]) => captureGroup(inner));
461
+ // Named capture group (?<name>...)
462
+ const namedCaptureGroupParser = promiseCompose(createTupleParser([
463
+ createExactSequenceParser('(?<'),
464
+ createTerminatedArrayParser(parserCreatorCompose(() => elementParser, char => async (parserContext) => {
465
+ parserContext.invariant(char !== '>', 'Unexpected ">"');
466
+ return char;
467
+ })(), createExactSequenceParser('>')),
468
+ createParserAccessorParser(() => alternationParser),
469
+ createExactSequenceParser(')'),
470
+ ]), ([, [nameChars], inner]) => captureGroup(inner, nameChars.join('')));
471
+ // Non-capture group (?:...)
472
+ const nonCaptureGroupParser = promiseCompose(createTupleParser([
473
+ createExactSequenceParser('(?:'),
474
+ createParserAccessorParser(() => alternationParser),
475
+ createExactSequenceParser(')'),
476
+ ]), ([, inner]) => inner);
477
+ // Positive lookahead (?=...)
478
+ const positiveLookaheadMarkerParser = createObjectParser({
479
+ type: 'lookahead-marker',
480
+ isPositive: true,
481
+ _open: createExactSequenceParser('(?='),
482
+ inner: createParserAccessorParser(() => alternationParser),
483
+ _close: createExactSequenceParser(')'),
484
+ });
485
+ // Negative lookahead (?!...)
486
+ const negativeLookaheadMarkerParser = createObjectParser({
487
+ type: 'lookahead-marker',
488
+ isPositive: false,
489
+ _open: createExactSequenceParser('(?!'),
490
+ inner: createParserAccessorParser(() => alternationParser),
491
+ _close: createExactSequenceParser(')'),
492
+ });
493
+ const groupParser = createUnionParser([
494
+ namedCaptureGroupParser,
495
+ nonCaptureGroupParser,
496
+ captureGroupParser,
497
+ ]);
498
+ const startAnchorMarkerParser = createObjectParser({
499
+ type: 'start-anchor-marker',
500
+ _marker: createExactSequenceParser('^'),
501
+ });
502
+ const endAnchorMarkerParser = createObjectParser({
503
+ type: 'end-anchor-marker',
504
+ _marker: createExactSequenceParser('$'),
505
+ });
506
+ // Atom: the basic unit that can be quantified (excluding anchors)
507
+ const atomParser = createUnionParser([
508
+ groupParser,
509
+ characterClassParser,
510
+ escapeParser,
511
+ dotParser,
512
+ literalCharacterParser,
513
+ ]);
514
+ // Quantified atom
515
+ const quantifiedParser = promiseCompose(createTupleParser([
516
+ atomParser,
517
+ createOptionalParser(quantifierParser),
518
+ ]), ([atom, quantifier]) => {
519
+ if (quantifier === undefined) {
520
+ return atom;
521
+ }
522
+ switch (quantifier.type) {
523
+ case 'star':
524
+ return star(atom);
525
+ case 'plus':
526
+ return plus(atom);
527
+ case 'optional':
528
+ return optional(atom);
529
+ case 'repeat':
530
+ return repeat(atom, quantifier.bounds);
531
+ }
532
+ });
533
+ // Element in a sequence: either a quantified atom, anchor marker, or lookahead marker
534
+ const sequenceElementParser = createUnionParser([
535
+ startAnchorMarkerParser,
536
+ endAnchorMarkerParser,
537
+ positiveLookaheadMarkerParser,
538
+ negativeLookaheadMarkerParser,
539
+ quantifiedParser,
540
+ ]);
541
+ // Helper to concatenate a list of RegularExpressions (right-associative)
542
+ function concatList(parts) {
543
+ if (parts.length === 0) {
544
+ return epsilon;
545
+ }
546
+ return parts.reduceRight((acc, part) => concat(part, acc));
547
+ }
548
+ // Process elements with anchor markers and lookahead markers into proper AST
549
+ // Handles anchors and lookahead as infix operators like @gruhn/regex-utils
550
+ // Precedence order (lowest to highest): union -> start-anchor -> end-anchor -> lookahead -> concat
551
+ function processElements(elements) {
552
+ if (elements.length === 0) {
553
+ return epsilon;
554
+ }
555
+ // Process start anchors first (lowest precedence among infix operators)
556
+ const startAnchorIdx = elements.findIndex(e => 'type' in e && e.type === 'start-anchor-marker');
557
+ if (startAnchorIdx !== -1) {
558
+ const left = elements.slice(0, startAnchorIdx);
559
+ const right = elements.slice(startAnchorIdx + 1);
560
+ return startAnchor(processElements(left), processElements(right));
561
+ }
562
+ // Then end anchors
563
+ const endAnchorIdx = elements.findIndex(e => 'type' in e && e.type === 'end-anchor-marker');
564
+ if (endAnchorIdx !== -1) {
565
+ const left = elements.slice(0, endAnchorIdx);
566
+ const right = elements.slice(endAnchorIdx + 1);
567
+ return endAnchor(processElements(left), processElements(right));
568
+ }
569
+ // Then lookaheads (higher precedence than anchors)
570
+ const lookaheadIdx = elements.findIndex(e => 'type' in e && e.type === 'lookahead-marker');
571
+ if (lookaheadIdx !== -1) {
572
+ const marker = elements[lookaheadIdx];
573
+ const left = elements.slice(0, lookaheadIdx);
574
+ const right = elements.slice(lookaheadIdx + 1);
575
+ const lookaheadExpr = lookahead(marker.isPositive, marker.inner, processElements(right));
576
+ if (left.length === 0) {
577
+ return lookaheadExpr;
578
+ }
579
+ // If there's content before the lookahead, concatenate it
580
+ return concat(processElements(left), lookaheadExpr);
581
+ }
582
+ // No markers, just regular expressions - concatenate them
583
+ const regexParts = elements;
584
+ return concatList(regexParts);
585
+ }
586
+ // Concatenation: sequence of quantified atoms and anchors
587
+ const concatParser = promiseCompose(createArrayParser(sequenceElementParser), processElements);
588
+ // Alternation: concat ('|' concat)*
589
+ const alternationParser = promiseCompose(createTupleParser([
590
+ concatParser,
591
+ createArrayParser(promiseCompose(createTupleParser([
592
+ createExactSequenceParser('|'),
593
+ concatParser,
594
+ ]), ([, right]) => right)),
595
+ ]), ([first, rest]) => {
596
+ // Right-associative union like @gruhn/regex-utils
597
+ const allParts = [first, ...rest];
598
+ return allParts.reduceRight((acc, part) => union(part, acc));
599
+ });
600
+ export const regularExpressionParser = alternationParser;
@@ -0,0 +1 @@
1
+ export {};