@futpib/parser 1.0.3 → 1.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +24 -0
- package/.github/workflows/main.yml +1 -0
- package/build/androidPackageParser.js +30 -32
- package/build/arbitraryDalvikBytecode.d.ts +3 -3
- package/build/arbitraryDalvikBytecode.js +33 -27
- package/build/arbitraryDalvikExecutable.js +55 -17
- package/build/arbitraryJava.d.ts +31 -0
- package/build/arbitraryJava.js +532 -0
- package/build/arbitraryJavaScript.d.ts +3 -0
- package/build/arbitraryJavaScript.js +263 -0
- package/build/arbitraryJavascript.d.ts +3 -0
- package/build/arbitraryJavascript.js +263 -0
- package/build/arbitraryZig.d.ts +3 -0
- package/build/arbitraryZig.js +240 -0
- package/build/arbitraryZipStream.d.ts +1 -1
- package/build/arrayParser.js +72 -13
- package/build/backsmali.d.ts +4 -3
- package/build/backsmali.js +26 -6
- package/build/bash.d.ts +89 -0
- package/build/bash.js +1 -0
- package/build/bashParser.d.ts +6 -0
- package/build/bashParser.js +335 -0
- package/build/bashParser.test.d.ts +1 -0
- package/build/bashParser.test.js +343 -0
- package/build/bashParserEdgeCases.test.d.ts +1 -0
- package/build/bashParserEdgeCases.test.js +117 -0
- package/build/dalvikBytecodeParser/addressConversion.d.ts +110 -0
- package/build/dalvikBytecodeParser/addressConversion.js +334 -0
- package/build/dalvikBytecodeParser/formatParsers.d.ts +7 -6
- package/build/dalvikBytecodeParser/formatParsers.js +13 -14
- package/build/dalvikBytecodeParser.d.ts +60 -31
- package/build/dalvikBytecodeParser.js +92 -35
- package/build/dalvikBytecodeParser.test-d.d.ts +1 -0
- package/build/dalvikBytecodeParser.test-d.js +268 -0
- package/build/dalvikBytecodeUnparser/formatUnparsers.d.ts +9 -8
- package/build/dalvikBytecodeUnparser/formatUnparsers.js +13 -12
- package/build/dalvikBytecodeUnparser.d.ts +2 -2
- package/build/dalvikBytecodeUnparser.js +23 -23
- package/build/dalvikBytecodeUnparser.test.js +7 -7
- package/build/dalvikExecutable.d.ts +3 -3
- package/build/dalvikExecutable.test-d.d.ts +1 -0
- package/build/dalvikExecutable.test-d.js +59 -0
- package/build/dalvikExecutableParser/typedNumbers.d.ts +18 -0
- package/build/dalvikExecutableParser/typedNumbers.js +3 -0
- package/build/dalvikExecutableParser.d.ts +2 -1
- package/build/dalvikExecutableParser.js +96 -77
- package/build/dalvikExecutableParser.test.js +24 -3
- package/build/dalvikExecutableParserAgainstSmaliParser.test.js +3 -0
- package/build/dalvikExecutableUnparser/poolScanners.d.ts +2 -2
- package/build/dalvikExecutableUnparser/sectionUnparsers.d.ts +3 -3
- package/build/dalvikExecutableUnparser/sectionUnparsers.js +26 -11
- package/build/dalvikExecutableUnparser.d.ts +2 -2
- package/build/dalvikExecutableUnparser.test.js +2 -1
- package/build/disjunctionParser.d.ts +5 -3
- package/build/disjunctionParser.js +79 -17
- package/build/disjunctionParser.test-d.d.ts +1 -0
- package/build/disjunctionParser.test-d.js +72 -0
- package/build/elementSwitchParser.d.ts +4 -0
- package/build/{exactElementSwitchParser.js → elementSwitchParser.js} +3 -4
- package/build/elementSwitchParser.test-d.d.ts +1 -0
- package/build/elementSwitchParser.test-d.js +44 -0
- package/build/exactSequenceParser.d.ts +4 -2
- package/build/exactSequenceParser.test-d.d.ts +1 -0
- package/build/exactSequenceParser.test-d.js +36 -0
- package/build/fetchCid.js +2 -66
- package/build/index.d.ts +25 -2
- package/build/index.js +23 -1
- package/build/index.test.js +16 -1
- package/build/inputReader.d.ts +10 -0
- package/build/inputReader.js +36 -0
- package/build/java.d.ts +502 -0
- package/build/java.js +2 -0
- package/build/javaKeyStoreParser.js +14 -17
- package/build/javaParser.d.ts +51 -0
- package/build/javaParser.js +1538 -0
- package/build/javaParser.test.d.ts +1 -0
- package/build/javaParser.test.js +1287 -0
- package/build/javaScript.d.ts +35 -0
- package/build/javaScript.js +1 -0
- package/build/javaScriptParser.d.ts +9 -0
- package/build/javaScriptParser.js +34 -0
- package/build/javaScriptUnparser.d.ts +3 -0
- package/build/javaScriptUnparser.js +4 -0
- package/build/javaScriptUnparser.test.d.ts +1 -0
- package/build/javaScriptUnparser.test.js +24 -0
- package/build/javaUnparser.d.ts +2 -0
- package/build/javaUnparser.js +519 -0
- package/build/javaUnparser.test.d.ts +1 -0
- package/build/javaUnparser.test.js +24 -0
- package/build/javascript.d.ts +35 -0
- package/build/javascript.js +1 -0
- package/build/javascriptParser.d.ts +9 -0
- package/build/javascriptParser.js +34 -0
- package/build/javascriptUnparser.d.ts +3 -0
- package/build/javascriptUnparser.js +4 -0
- package/build/javascriptUnparser.test.d.ts +1 -0
- package/build/javascriptUnparser.test.js +24 -0
- package/build/jsonParser.js +2 -12
- package/build/lazyMessageError.d.ts +3 -0
- package/build/lookaheadParser.js +60 -3
- package/build/negativeLookaheadParser.js +70 -11
- package/build/nonEmptyArrayParser.js +72 -13
- package/build/objectParser.d.ts +12 -0
- package/build/objectParser.js +31 -0
- package/build/objectParser.test-d.d.ts +1 -0
- package/build/objectParser.test-d.js +112 -0
- package/build/objectParser.test.d.ts +1 -0
- package/build/objectParser.test.js +55 -0
- package/build/optionalParser.js +69 -10
- package/build/parser.d.ts +4 -0
- package/build/parser.js +3 -1
- package/build/parser.test.js +114 -1
- package/build/parserConsumedSequenceParser.js +66 -7
- package/build/parserContext.d.ts +6 -0
- package/build/parserContext.js +20 -11
- package/build/parserError.d.ts +119 -27
- package/build/parserError.js +16 -8
- package/build/regexpParser.d.ts +2 -0
- package/build/regexpParser.js +101 -0
- package/build/regexpParser.test.d.ts +1 -0
- package/build/regexpParser.test.js +114 -0
- package/build/regularExpression.d.ts +63 -0
- package/build/regularExpression.js +1 -0
- package/build/regularExpressionParser.d.ts +3 -0
- package/build/regularExpressionParser.js +600 -0
- package/build/regularExpressionParser.test.d.ts +1 -0
- package/build/regularExpressionParser.test.js +89 -0
- package/build/separatedArrayParser.js +73 -14
- package/build/separatedNonEmptyArrayParser.js +73 -14
- package/build/sliceBoundedParser.js +62 -5
- package/build/smaliParser.d.ts +7 -7
- package/build/smaliParser.js +185 -268
- package/build/smaliParser.test.js +58 -0
- package/build/stringEscapes.d.ts +5 -0
- package/build/stringEscapes.js +244 -0
- package/build/symbolicExpression.d.ts +29 -0
- package/build/symbolicExpression.js +1 -0
- package/build/symbolicExpressionParser.d.ts +4 -0
- package/build/symbolicExpressionParser.js +123 -0
- package/build/symbolicExpressionParser.test.d.ts +1 -0
- package/build/symbolicExpressionParser.test.js +289 -0
- package/build/terminatedArrayParser.js +113 -38
- package/build/terminatedArrayParser.test.js +4 -2
- package/build/tupleParser.d.ts +7 -15
- package/build/tupleParser.js +1 -0
- package/build/unionParser.d.ts +5 -3
- package/build/unionParser.js +7 -2
- package/build/unionParser.test-d.d.ts +1 -0
- package/build/unionParser.test-d.js +72 -0
- package/build/unionParser.test.js +10 -11
- package/build/zig.d.ts +280 -0
- package/build/zig.js +2 -0
- package/build/zigParser.d.ts +3 -0
- package/build/zigParser.js +1119 -0
- package/build/zigParser.test.d.ts +1 -0
- package/build/zigParser.test.js +1590 -0
- package/build/zigUnparser.d.ts +2 -0
- package/build/zigUnparser.js +460 -0
- package/build/zigUnparser.test.d.ts +1 -0
- package/build/zigUnparser.test.js +24 -0
- package/build/zipParser.js +19 -32
- package/build/zipUnparser.js +19 -7
- package/build/zipUnparser.test.js +1 -1
- package/node_modules-@types/s-expression/index.d.ts +5 -0
- package/package.json +25 -6
- package/src/androidPackageParser.ts +33 -60
- package/src/arbitraryDalvikBytecode.ts +39 -31
- package/src/arbitraryDalvikExecutable.ts +65 -20
- package/src/arbitraryJava.ts +804 -0
- package/src/arbitraryJavaScript.ts +410 -0
- package/src/arbitraryZig.ts +380 -0
- package/src/arrayParser.ts +1 -3
- package/src/backsmali.ts +35 -4
- package/src/bash.ts +127 -0
- package/src/bashParser.test.ts +590 -0
- package/src/bashParser.ts +498 -0
- package/src/dalvikBytecodeParser/addressConversion.ts +496 -0
- package/src/dalvikBytecodeParser/formatParsers.ts +19 -29
- package/src/dalvikBytecodeParser.test-d.ts +310 -0
- package/src/dalvikBytecodeParser.ts +194 -69
- package/src/dalvikBytecodeUnparser/formatUnparsers.ts +27 -26
- package/src/dalvikBytecodeUnparser.test.ts +7 -7
- package/src/dalvikBytecodeUnparser.ts +31 -30
- package/src/dalvikExecutable.test-d.ts +132 -0
- package/src/dalvikExecutable.ts +3 -3
- package/src/dalvikExecutableParser/typedNumbers.ts +11 -0
- package/src/dalvikExecutableParser.test.ts +37 -3
- package/src/dalvikExecutableParser.test.ts.md +163 -2
- package/src/dalvikExecutableParser.test.ts.snap +0 -0
- package/src/dalvikExecutableParser.ts +121 -139
- package/src/dalvikExecutableParserAgainstSmaliParser.test.ts +4 -0
- package/src/dalvikExecutableUnparser/poolScanners.ts +6 -6
- package/src/dalvikExecutableUnparser/sectionUnparsers.ts +38 -14
- package/src/dalvikExecutableUnparser.test.ts +3 -2
- package/src/dalvikExecutableUnparser.ts +4 -4
- package/src/disjunctionParser.test-d.ts +105 -0
- package/src/disjunctionParser.ts +18 -15
- package/src/elementSwitchParser.test-d.ts +74 -0
- package/src/elementSwitchParser.ts +51 -0
- package/src/exactSequenceParser.test-d.ts +43 -0
- package/src/exactSequenceParser.ts +13 -8
- package/src/fetchCid.ts +2 -76
- package/src/index.test.ts +22 -1
- package/src/index.ts +119 -2
- package/src/inputReader.ts +53 -0
- package/src/java.ts +708 -0
- package/src/javaKeyStoreParser.ts +18 -32
- package/src/javaParser.test.ts +1592 -0
- package/src/javaParser.ts +2640 -0
- package/src/javaScript.ts +36 -0
- package/src/javaScriptParser.ts +57 -0
- package/src/javaScriptUnparser.test.ts +37 -0
- package/src/javaScriptUnparser.ts +7 -0
- package/src/javaUnparser.test.ts +37 -0
- package/src/javaUnparser.ts +640 -0
- package/src/jsonParser.ts +6 -27
- package/src/lookaheadParser.ts +2 -6
- package/src/negativeLookaheadParser.ts +1 -3
- package/src/nonEmptyArrayParser.ts +1 -3
- package/src/objectParser.test-d.ts +152 -0
- package/src/objectParser.test.ts +71 -0
- package/src/objectParser.ts +69 -0
- package/src/optionalParser.ts +1 -3
- package/src/parser.test.ts +151 -4
- package/src/parser.ts +11 -1
- package/src/parserConsumedSequenceParser.ts +2 -4
- package/src/parserContext.ts +26 -11
- package/src/parserError.ts +17 -3
- package/src/regexpParser.test.ts +264 -0
- package/src/regexpParser.ts +126 -0
- package/src/regularExpression.ts +24 -0
- package/src/regularExpressionParser.test.ts +102 -0
- package/src/regularExpressionParser.ts +920 -0
- package/src/separatedArrayParser.ts +1 -3
- package/src/separatedNonEmptyArrayParser.ts +1 -3
- package/src/sliceBoundedParser.test.ts +2 -2
- package/src/sliceBoundedParser.ts +15 -19
- package/src/smaliParser.test.ts +64 -0
- package/src/smaliParser.test.ts.md +12 -12
- package/src/smaliParser.test.ts.snap +0 -0
- package/src/smaliParser.ts +246 -534
- package/src/stringEscapes.ts +253 -0
- package/src/symbolicExpression.ts +17 -0
- package/src/symbolicExpressionParser.test.ts +466 -0
- package/src/symbolicExpressionParser.ts +190 -0
- package/src/terminatedArrayParser.test.ts +9 -6
- package/src/terminatedArrayParser.ts +25 -29
- package/src/tupleParser.ts +21 -18
- package/src/unionParser.test-d.ts +105 -0
- package/src/unionParser.test.ts +18 -17
- package/src/unionParser.ts +28 -16
- package/src/zig.ts +411 -0
- package/src/zigParser.test.ts +1693 -0
- package/src/zigParser.ts +1745 -0
- package/src/zigUnparser.test.ts +37 -0
- package/src/zigUnparser.ts +615 -0
- package/src/zipParser.ts +20 -56
- package/src/zipUnparser.test.ts +1 -1
- package/src/zipUnparser.ts +22 -7
- package/tsconfig.json +2 -2
- package/build/exactElementSwitchParser.d.ts +0 -3
- package/src/exactElementSwitchParser.ts +0 -41
|
@@ -0,0 +1,600 @@
|
|
|
1
|
+
import { createUnionParser } from './unionParser.js';
|
|
2
|
+
import { createExactSequenceParser } from './exactSequenceParser.js';
|
|
3
|
+
import { promiseCompose } from './promiseCompose.js';
|
|
4
|
+
import { createTupleParser } from './tupleParser.js';
|
|
5
|
+
import { createArrayParser } from './arrayParser.js';
|
|
6
|
+
import { createParserAccessorParser } from './parserAccessorParser.js';
|
|
7
|
+
import { createElementParser } from './elementParser.js';
|
|
8
|
+
import { parserCreatorCompose } from './parserCreatorCompose.js';
|
|
9
|
+
import { createOptionalParser } from './optionalParser.js';
|
|
10
|
+
import { createFixedLengthSequenceParser } from './fixedLengthSequenceParser.js';
|
|
11
|
+
import { createTerminatedArrayParser } from './terminatedArrayParser.js';
|
|
12
|
+
import { createDisjunctionParser } from './disjunctionParser.js';
|
|
13
|
+
import { createNegativeLookaheadParser } from './negativeLookaheadParser.js';
|
|
14
|
+
import { createObjectParser } from './objectParser.js';
|
|
15
|
+
// CharacterSet helpers
|
|
16
|
+
const emptyCharacterSet = { type: 'empty' };
|
|
17
|
+
function codePointRangeIsEmpty(range) {
|
|
18
|
+
return range.start > range.end;
|
|
19
|
+
}
|
|
20
|
+
function codePointRangeIsStrictlyBefore(rangeA, rangeB) {
|
|
21
|
+
return rangeA.end + 1 < rangeB.start;
|
|
22
|
+
}
|
|
23
|
+
function codePointRangeIsStrictlyAfter(rangeA, rangeB) {
|
|
24
|
+
return codePointRangeIsStrictlyBefore(rangeB, rangeA);
|
|
25
|
+
}
|
|
26
|
+
function codePointRangeLeastUpperBound(rangeA, rangeB) {
|
|
27
|
+
if (codePointRangeIsEmpty(rangeA))
|
|
28
|
+
return rangeB;
|
|
29
|
+
if (codePointRangeIsEmpty(rangeB))
|
|
30
|
+
return rangeA;
|
|
31
|
+
return {
|
|
32
|
+
start: Math.min(rangeA.start, rangeB.start),
|
|
33
|
+
end: Math.max(rangeA.end, rangeB.end),
|
|
34
|
+
};
|
|
35
|
+
}
|
|
36
|
+
function codePointRangeStrictlyDisjoint(rangeA, rangeB) {
|
|
37
|
+
return codePointRangeIsStrictlyBefore(rangeA, rangeB) || codePointRangeIsStrictlyAfter(rangeA, rangeB);
|
|
38
|
+
}
|
|
39
|
+
function characterSetNode(range, left, right) {
|
|
40
|
+
return { type: 'node', range, left, right };
|
|
41
|
+
}
|
|
42
|
+
function* characterSetGetRanges(set) {
|
|
43
|
+
if (set.type === 'node') {
|
|
44
|
+
yield* characterSetGetRanges(set.left);
|
|
45
|
+
yield set.range;
|
|
46
|
+
yield* characterSetGetRanges(set.right);
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
function characterSetExtractOverlap(set, range) {
|
|
50
|
+
if (set.type === 'empty') {
|
|
51
|
+
return { restCharSet: set, extendedRange: range };
|
|
52
|
+
}
|
|
53
|
+
let extendedRange = range;
|
|
54
|
+
let newLeft = set.left;
|
|
55
|
+
let newRight = set.right;
|
|
56
|
+
if (range.start < set.range.start) {
|
|
57
|
+
const resultLeft = characterSetExtractOverlap(set.left, range);
|
|
58
|
+
extendedRange = codePointRangeLeastUpperBound(extendedRange, resultLeft.extendedRange);
|
|
59
|
+
newLeft = resultLeft.restCharSet;
|
|
60
|
+
}
|
|
61
|
+
if (range.end > set.range.end) {
|
|
62
|
+
const resultRight = characterSetExtractOverlap(set.right, range);
|
|
63
|
+
extendedRange = codePointRangeLeastUpperBound(extendedRange, resultRight.extendedRange);
|
|
64
|
+
newRight = resultRight.restCharSet;
|
|
65
|
+
}
|
|
66
|
+
if (codePointRangeStrictlyDisjoint(range, set.range)) {
|
|
67
|
+
return {
|
|
68
|
+
extendedRange,
|
|
69
|
+
restCharSet: characterSetNode(set.range, newLeft, newRight),
|
|
70
|
+
};
|
|
71
|
+
}
|
|
72
|
+
return {
|
|
73
|
+
extendedRange: codePointRangeLeastUpperBound(set.range, extendedRange),
|
|
74
|
+
restCharSet: characterSetUnion(newLeft, newRight),
|
|
75
|
+
};
|
|
76
|
+
}
|
|
77
|
+
function characterSetInsertRange(set, range) {
|
|
78
|
+
if (codePointRangeIsEmpty(range)) {
|
|
79
|
+
return set;
|
|
80
|
+
}
|
|
81
|
+
if (set.type === 'empty') {
|
|
82
|
+
return characterSetNode(range, emptyCharacterSet, emptyCharacterSet);
|
|
83
|
+
}
|
|
84
|
+
if (codePointRangeIsStrictlyBefore(range, set.range)) {
|
|
85
|
+
return characterSetNode(set.range, characterSetInsertRange(set.left, range), set.right);
|
|
86
|
+
}
|
|
87
|
+
if (codePointRangeIsStrictlyAfter(range, set.range)) {
|
|
88
|
+
return characterSetNode(set.range, set.left, characterSetInsertRange(set.right, range));
|
|
89
|
+
}
|
|
90
|
+
const resultLeft = characterSetExtractOverlap(set.left, range);
|
|
91
|
+
const resultRight = characterSetExtractOverlap(set.right, range);
|
|
92
|
+
const resultRange = [set.range, resultLeft.extendedRange, resultRight.extendedRange].reduce(codePointRangeLeastUpperBound);
|
|
93
|
+
if (codePointRangeIsEmpty(resultRange)) {
|
|
94
|
+
return emptyCharacterSet;
|
|
95
|
+
}
|
|
96
|
+
return characterSetNode(resultRange, resultLeft.restCharSet, resultRight.restCharSet);
|
|
97
|
+
}
|
|
98
|
+
function characterSetUnion(setA, setB) {
|
|
99
|
+
return [...characterSetGetRanges(setB)].reduce(characterSetInsertRange, setA);
|
|
100
|
+
}
|
|
101
|
+
function codePointRangeSplitAt(point, range) {
|
|
102
|
+
return [
|
|
103
|
+
{ start: range.start, end: Math.min(range.end, point) },
|
|
104
|
+
{ start: Math.max(range.start, point + 1), end: range.end },
|
|
105
|
+
];
|
|
106
|
+
}
|
|
107
|
+
function codePointRangeUnion(rangeA, rangeB) {
|
|
108
|
+
if (codePointRangeIsEmpty(rangeA) && codePointRangeIsEmpty(rangeB))
|
|
109
|
+
return [];
|
|
110
|
+
if (codePointRangeIsEmpty(rangeA))
|
|
111
|
+
return [rangeB];
|
|
112
|
+
if (codePointRangeIsEmpty(rangeB))
|
|
113
|
+
return [rangeA];
|
|
114
|
+
if (rangeA.end + 1 < rangeB.start)
|
|
115
|
+
return [rangeA, rangeB];
|
|
116
|
+
if (rangeB.end + 1 < rangeA.start)
|
|
117
|
+
return [rangeB, rangeA];
|
|
118
|
+
return [{
|
|
119
|
+
start: Math.min(rangeA.start, rangeB.start),
|
|
120
|
+
end: Math.max(rangeA.end, rangeB.end),
|
|
121
|
+
}];
|
|
122
|
+
}
|
|
123
|
+
function codePointRangeDifference(rangeA, rangeB) {
|
|
124
|
+
const [before, restRangeA] = codePointRangeSplitAt(rangeB.start - 1, rangeA);
|
|
125
|
+
const [, after] = codePointRangeSplitAt(rangeB.end, restRangeA);
|
|
126
|
+
return codePointRangeUnion(before, after);
|
|
127
|
+
}
|
|
128
|
+
function characterSetDeleteRange(set, range) {
|
|
129
|
+
if (codePointRangeIsEmpty(range)) {
|
|
130
|
+
return set;
|
|
131
|
+
}
|
|
132
|
+
if (set.type === 'empty') {
|
|
133
|
+
return emptyCharacterSet;
|
|
134
|
+
}
|
|
135
|
+
const [rangeBeforeStart] = codePointRangeSplitAt(set.range.start - 1, range);
|
|
136
|
+
const [rangeRest2, rangeAfterEnd] = codePointRangeSplitAt(set.range.end, range);
|
|
137
|
+
const newLeft = characterSetDeleteRange(set.left, rangeBeforeStart);
|
|
138
|
+
const newRight = characterSetDeleteRange(set.right, rangeAfterEnd);
|
|
139
|
+
const setRangeRest = codePointRangeDifference(set.range, rangeRest2);
|
|
140
|
+
if (setRangeRest.length === 0) {
|
|
141
|
+
return characterSetUnion(newLeft, newRight);
|
|
142
|
+
}
|
|
143
|
+
if (setRangeRest.length === 1) {
|
|
144
|
+
return characterSetNode(setRangeRest[0], newLeft, newRight);
|
|
145
|
+
}
|
|
146
|
+
// setRangeRest.length === 2
|
|
147
|
+
return characterSetUnion(characterSetInsertRange(newLeft, setRangeRest[0]), characterSetInsertRange(newRight, setRangeRest[1]));
|
|
148
|
+
}
|
|
149
|
+
function characterSetDifference(setA, setB) {
|
|
150
|
+
return [...characterSetGetRanges(setB)].reduce(characterSetDeleteRange, setA);
|
|
151
|
+
}
|
|
152
|
+
function characterSetFromRange(range) {
|
|
153
|
+
if (codePointRangeIsEmpty(range)) {
|
|
154
|
+
return emptyCharacterSet;
|
|
155
|
+
}
|
|
156
|
+
return characterSetNode(range, emptyCharacterSet, emptyCharacterSet);
|
|
157
|
+
}
|
|
158
|
+
function characterSetSingleton(char) {
|
|
159
|
+
const codePoint = char.codePointAt(0);
|
|
160
|
+
return characterSetFromRange({ start: codePoint, end: codePoint });
|
|
161
|
+
}
|
|
162
|
+
function characterSetCharRange(startChar, endChar) {
|
|
163
|
+
const start = startChar.codePointAt(0);
|
|
164
|
+
const end = endChar.codePointAt(0);
|
|
165
|
+
return characterSetFromRange({ start, end });
|
|
166
|
+
}
|
|
167
|
+
function characterSetFromArray(chars) {
|
|
168
|
+
return chars.map(characterSetSingleton).reduce(characterSetUnion, emptyCharacterSet);
|
|
169
|
+
}
|
|
170
|
+
function characterSetComplement(set) {
|
|
171
|
+
return characterSetDifference(alphabet, set);
|
|
172
|
+
}
|
|
173
|
+
// Pre-defined character sets
|
|
174
|
+
const alphabet = characterSetDifference(characterSetFromRange({ start: 0, end: 0x10FFFF }), characterSetFromArray(['\r', '\n', '\u2028', '\u2029']));
|
|
175
|
+
const wildcardCharacterSet = characterSetDifference(alphabet, characterSetFromArray(['\r', '\n', '\u2028', '\u2029']));
|
|
176
|
+
const digitChars = characterSetCharRange('0', '9');
|
|
177
|
+
const nonDigitChars = characterSetComplement(digitChars);
|
|
178
|
+
const wordChars = [
|
|
179
|
+
characterSetCharRange('a', 'z'),
|
|
180
|
+
characterSetCharRange('A', 'Z'),
|
|
181
|
+
characterSetCharRange('0', '9'),
|
|
182
|
+
characterSetSingleton('_'),
|
|
183
|
+
].reduce(characterSetUnion);
|
|
184
|
+
const nonWordChars = characterSetComplement(wordChars);
|
|
185
|
+
const whiteSpaceChars = [
|
|
186
|
+
characterSetSingleton('\f'),
|
|
187
|
+
characterSetSingleton('\n'),
|
|
188
|
+
characterSetSingleton('\r'),
|
|
189
|
+
characterSetSingleton('\t'),
|
|
190
|
+
characterSetSingleton('\v'),
|
|
191
|
+
characterSetSingleton('\u0020'),
|
|
192
|
+
characterSetSingleton('\u00a0'),
|
|
193
|
+
characterSetSingleton('\u1680'),
|
|
194
|
+
characterSetCharRange('\u2000', '\u200a'),
|
|
195
|
+
characterSetSingleton('\u2028'),
|
|
196
|
+
characterSetSingleton('\u2029'),
|
|
197
|
+
characterSetSingleton('\u202f'),
|
|
198
|
+
characterSetSingleton('\u205f'),
|
|
199
|
+
characterSetSingleton('\u3000'),
|
|
200
|
+
characterSetSingleton('\ufeff'),
|
|
201
|
+
].reduce(characterSetUnion);
|
|
202
|
+
const nonWhiteSpaceChars = characterSetComplement(whiteSpaceChars);
|
|
203
|
+
// AST constructors
|
|
204
|
+
const epsilon = { type: 'epsilon' };
|
|
205
|
+
function literal(charset) {
|
|
206
|
+
return { type: 'literal', charset };
|
|
207
|
+
}
|
|
208
|
+
function concat(left, right) {
|
|
209
|
+
return { type: 'concat', left, right };
|
|
210
|
+
}
|
|
211
|
+
function union(left, right) {
|
|
212
|
+
return { type: 'union', left, right };
|
|
213
|
+
}
|
|
214
|
+
function star(inner) {
|
|
215
|
+
return { type: 'star', inner };
|
|
216
|
+
}
|
|
217
|
+
function plus(inner) {
|
|
218
|
+
return { type: 'plus', inner };
|
|
219
|
+
}
|
|
220
|
+
function optional(inner) {
|
|
221
|
+
return { type: 'optional', inner };
|
|
222
|
+
}
|
|
223
|
+
function repeat(inner, bounds) {
|
|
224
|
+
return { type: 'repeat', inner, bounds };
|
|
225
|
+
}
|
|
226
|
+
function captureGroup(inner, name) {
|
|
227
|
+
if (name === undefined) {
|
|
228
|
+
return { type: 'capture-group', inner };
|
|
229
|
+
}
|
|
230
|
+
return { type: 'capture-group', inner, name };
|
|
231
|
+
}
|
|
232
|
+
function lookahead(isPositive, inner, right) {
|
|
233
|
+
return { type: 'lookahead', isPositive, inner, right };
|
|
234
|
+
}
|
|
235
|
+
function startAnchor(left, right) {
|
|
236
|
+
return { type: 'start-anchor', left, right };
|
|
237
|
+
}
|
|
238
|
+
function endAnchor(left, right) {
|
|
239
|
+
return { type: 'end-anchor', left, right };
|
|
240
|
+
}
|
|
241
|
+
// Parser implementation
|
|
242
|
+
const elementParser = createElementParser();
|
|
243
|
+
const metaCharacters = new Set(['\\', '^', '$', '.', '|', '?', '*', '+', '(', ')', '[', ']', '{', '}']);
|
|
244
|
+
// Escape sequences for control characters
|
|
245
|
+
const escapeNParser = promiseCompose(createExactSequenceParser('\\n'), () => literal(characterSetSingleton('\n')));
|
|
246
|
+
const escapeRParser = promiseCompose(createExactSequenceParser('\\r'), () => literal(characterSetSingleton('\r')));
|
|
247
|
+
const escapeTParser = promiseCompose(createExactSequenceParser('\\t'), () => literal(characterSetSingleton('\t')));
|
|
248
|
+
const escapeFParser = promiseCompose(createExactSequenceParser('\\f'), () => literal(characterSetSingleton('\f')));
|
|
249
|
+
const escapeVParser = promiseCompose(createExactSequenceParser('\\v'), () => literal(characterSetSingleton('\v')));
|
|
250
|
+
const escape0Parser = promiseCompose(createExactSequenceParser('\\0'), () => literal(characterSetSingleton('\0')));
|
|
251
|
+
// Character class escapes
|
|
252
|
+
const escapeDigitParser = promiseCompose(createExactSequenceParser('\\d'), () => literal(digitChars));
|
|
253
|
+
const escapeNonDigitParser = promiseCompose(createExactSequenceParser('\\D'), () => literal(nonDigitChars));
|
|
254
|
+
const escapeWordParser = promiseCompose(createExactSequenceParser('\\w'), () => literal(wordChars));
|
|
255
|
+
const escapeNonWordParser = promiseCompose(createExactSequenceParser('\\W'), () => literal(nonWordChars));
|
|
256
|
+
const escapeSpaceParser = promiseCompose(createExactSequenceParser('\\s'), () => literal(whiteSpaceChars));
|
|
257
|
+
const escapeNonSpaceParser = promiseCompose(createExactSequenceParser('\\S'), () => literal(nonWhiteSpaceChars));
|
|
258
|
+
// Hex escape \xHH
|
|
259
|
+
const escapeHexParser = promiseCompose(createTupleParser([
|
|
260
|
+
createExactSequenceParser('\\x'),
|
|
261
|
+
createFixedLengthSequenceParser(2),
|
|
262
|
+
]), ([, hexCode]) => literal(characterSetSingleton(String.fromCharCode(Number.parseInt(hexCode, 16)))));
|
|
263
|
+
// Unicode escape \uHHHH
|
|
264
|
+
const escapeUnicodeParser = promiseCompose(createTupleParser([
|
|
265
|
+
createExactSequenceParser('\\u'),
|
|
266
|
+
createFixedLengthSequenceParser(4),
|
|
267
|
+
]), ([, hexCode]) => literal(characterSetSingleton(String.fromCharCode(Number.parseInt(hexCode, 16)))));
|
|
268
|
+
// Escaped metacharacter (e.g., \., \*, etc.)
|
|
269
|
+
const escapeMetacharacterParser = promiseCompose(createTupleParser([
|
|
270
|
+
createExactSequenceParser('\\'),
|
|
271
|
+
elementParser,
|
|
272
|
+
]), ([, char]) => literal(characterSetSingleton(char)));
|
|
273
|
+
// All escape sequences - use createDisjunctionParser to try specific escapes first
|
|
274
|
+
const escapeParser = createDisjunctionParser([
|
|
275
|
+
escapeNParser,
|
|
276
|
+
escapeRParser,
|
|
277
|
+
escapeTParser,
|
|
278
|
+
escapeFParser,
|
|
279
|
+
escapeVParser,
|
|
280
|
+
escape0Parser,
|
|
281
|
+
escapeDigitParser,
|
|
282
|
+
escapeNonDigitParser,
|
|
283
|
+
escapeWordParser,
|
|
284
|
+
escapeNonWordParser,
|
|
285
|
+
escapeSpaceParser,
|
|
286
|
+
escapeNonSpaceParser,
|
|
287
|
+
escapeHexParser,
|
|
288
|
+
escapeUnicodeParser,
|
|
289
|
+
escapeMetacharacterParser, // Must be last - matches any escaped char
|
|
290
|
+
]);
|
|
291
|
+
// Dot (matches any character except newline)
|
|
292
|
+
const dotParser = promiseCompose(createExactSequenceParser('.'), () => literal(wildcardCharacterSet));
|
|
293
|
+
// Literal character (non-metacharacter)
|
|
294
|
+
const literalCharacterParser = parserCreatorCompose(() => elementParser, char => async (parserContext) => {
|
|
295
|
+
parserContext.invariant(!metaCharacters.has(char), 'Unexpected metacharacter "%s"', char);
|
|
296
|
+
return literal(characterSetSingleton(char));
|
|
297
|
+
})();
|
|
298
|
+
// Character class internals
|
|
299
|
+
// Character in a character class (different rules than outside)
|
|
300
|
+
const charClassMetaCharacters = new Set(['\\', ']', '^', '-']);
|
|
301
|
+
// Escape sequences inside character class (returns CharacterSet)
|
|
302
|
+
const charClassEscapeNParser = promiseCompose(createExactSequenceParser('\\n'), () => characterSetSingleton('\n'));
|
|
303
|
+
const charClassEscapeRParser = promiseCompose(createExactSequenceParser('\\r'), () => characterSetSingleton('\r'));
|
|
304
|
+
const charClassEscapeTParser = promiseCompose(createExactSequenceParser('\\t'), () => characterSetSingleton('\t'));
|
|
305
|
+
const charClassEscapeFParser = promiseCompose(createExactSequenceParser('\\f'), () => characterSetSingleton('\f'));
|
|
306
|
+
const charClassEscapeVParser = promiseCompose(createExactSequenceParser('\\v'), () => characterSetSingleton('\v'));
|
|
307
|
+
const charClassEscape0Parser = promiseCompose(createExactSequenceParser('\\0'), () => characterSetSingleton('\0'));
|
|
308
|
+
const charClassEscapeDigitParser = promiseCompose(createExactSequenceParser('\\d'), () => digitChars);
|
|
309
|
+
const charClassEscapeNonDigitParser = promiseCompose(createExactSequenceParser('\\D'), () => nonDigitChars);
|
|
310
|
+
const charClassEscapeWordParser = promiseCompose(createExactSequenceParser('\\w'), () => wordChars);
|
|
311
|
+
const charClassEscapeNonWordParser = promiseCompose(createExactSequenceParser('\\W'), () => nonWordChars);
|
|
312
|
+
const charClassEscapeSpaceParser = promiseCompose(createExactSequenceParser('\\s'), () => whiteSpaceChars);
|
|
313
|
+
const charClassEscapeNonSpaceParser = promiseCompose(createExactSequenceParser('\\S'), () => nonWhiteSpaceChars);
|
|
314
|
+
const charClassEscapeHexParser = promiseCompose(createTupleParser([
|
|
315
|
+
createExactSequenceParser('\\x'),
|
|
316
|
+
createFixedLengthSequenceParser(2),
|
|
317
|
+
]), ([, hexCode]) => characterSetSingleton(String.fromCharCode(Number.parseInt(hexCode, 16))));
|
|
318
|
+
const charClassEscapeUnicodeParser = promiseCompose(createTupleParser([
|
|
319
|
+
createExactSequenceParser('\\u'),
|
|
320
|
+
createFixedLengthSequenceParser(4),
|
|
321
|
+
]), ([, hexCode]) => characterSetSingleton(String.fromCharCode(Number.parseInt(hexCode, 16))));
|
|
322
|
+
const charClassEscapeMetacharacterParser = promiseCompose(createTupleParser([
|
|
323
|
+
createExactSequenceParser('\\'),
|
|
324
|
+
elementParser,
|
|
325
|
+
]), ([, char]) => characterSetSingleton(char));
|
|
326
|
+
// Use createDisjunctionParser to try specific escapes before generic metacharacter escape
|
|
327
|
+
const charClassEscapeParser = createDisjunctionParser([
|
|
328
|
+
charClassEscapeNParser,
|
|
329
|
+
charClassEscapeRParser,
|
|
330
|
+
charClassEscapeTParser,
|
|
331
|
+
charClassEscapeFParser,
|
|
332
|
+
charClassEscapeVParser,
|
|
333
|
+
charClassEscape0Parser,
|
|
334
|
+
charClassEscapeDigitParser,
|
|
335
|
+
charClassEscapeNonDigitParser,
|
|
336
|
+
charClassEscapeWordParser,
|
|
337
|
+
charClassEscapeNonWordParser,
|
|
338
|
+
charClassEscapeSpaceParser,
|
|
339
|
+
charClassEscapeNonSpaceParser,
|
|
340
|
+
charClassEscapeHexParser,
|
|
341
|
+
charClassEscapeUnicodeParser,
|
|
342
|
+
charClassEscapeMetacharacterParser, // Must be last - matches any escaped char
|
|
343
|
+
]);
|
|
344
|
+
// Single character (not escape, not ], not -)
|
|
345
|
+
const charClassLiteralParser = parserCreatorCompose(() => elementParser, char => async (parserContext) => {
|
|
346
|
+
parserContext.invariant(!charClassMetaCharacters.has(char), 'Unexpected character class metacharacter "%s"', char);
|
|
347
|
+
return characterSetSingleton(char);
|
|
348
|
+
})();
|
|
349
|
+
// Single char in character class (escape or literal) - returns the character string for range checking
|
|
350
|
+
const charClassSingleCharParser = createUnionParser([
|
|
351
|
+
// Escape sequences that produce single chars
|
|
352
|
+
promiseCompose(createExactSequenceParser('\\n'), () => '\n'),
|
|
353
|
+
promiseCompose(createExactSequenceParser('\\r'), () => '\r'),
|
|
354
|
+
promiseCompose(createExactSequenceParser('\\t'), () => '\t'),
|
|
355
|
+
promiseCompose(createExactSequenceParser('\\f'), () => '\f'),
|
|
356
|
+
promiseCompose(createExactSequenceParser('\\v'), () => '\v'),
|
|
357
|
+
promiseCompose(createExactSequenceParser('\\0'), () => '\0'),
|
|
358
|
+
promiseCompose(createTupleParser([
|
|
359
|
+
createExactSequenceParser('\\x'),
|
|
360
|
+
createFixedLengthSequenceParser(2),
|
|
361
|
+
]), ([, hexCode]) => String.fromCharCode(Number.parseInt(hexCode, 16))),
|
|
362
|
+
promiseCompose(createTupleParser([
|
|
363
|
+
createExactSequenceParser('\\u'),
|
|
364
|
+
createFixedLengthSequenceParser(4),
|
|
365
|
+
]), ([, hexCode]) => String.fromCharCode(Number.parseInt(hexCode, 16))),
|
|
366
|
+
promiseCompose(createTupleParser([
|
|
367
|
+
createExactSequenceParser('\\'),
|
|
368
|
+
elementParser,
|
|
369
|
+
]), ([, char]) => char),
|
|
370
|
+
// Literal char (not metacharacter, not -)
|
|
371
|
+
parserCreatorCompose(() => elementParser, char => async (parserContext) => {
|
|
372
|
+
parserContext.invariant(!charClassMetaCharacters.has(char) && char !== '-', 'Unexpected character "%s"', char);
|
|
373
|
+
return char;
|
|
374
|
+
})(),
|
|
375
|
+
]);
|
|
376
|
+
// Character range (a-z)
|
|
377
|
+
const charClassRangeParser = promiseCompose(createTupleParser([
|
|
378
|
+
charClassSingleCharParser,
|
|
379
|
+
createExactSequenceParser('-'),
|
|
380
|
+
charClassSingleCharParser,
|
|
381
|
+
]), ([startChar, , endChar]) => characterSetCharRange(startChar, endChar));
|
|
382
|
+
// Character class element: range, escape (for \d, \w, etc.), or single char
|
|
383
|
+
const charClassElementParser = createDisjunctionParser([
|
|
384
|
+
charClassRangeParser,
|
|
385
|
+
charClassEscapeParser,
|
|
386
|
+
charClassLiteralParser,
|
|
387
|
+
// Literal hyphen at end or after negation
|
|
388
|
+
promiseCompose(createTupleParser([
|
|
389
|
+
createExactSequenceParser('-'),
|
|
390
|
+
createNegativeLookaheadParser(createExactSequenceParser(']')),
|
|
391
|
+
]), () => characterSetSingleton('-')),
|
|
392
|
+
]);
|
|
393
|
+
// Character class [...]
|
|
394
|
+
const characterClassParser = promiseCompose(createTupleParser([
|
|
395
|
+
createExactSequenceParser('['),
|
|
396
|
+
createOptionalParser(createExactSequenceParser('^')),
|
|
397
|
+
createTerminatedArrayParser(charClassElementParser, createExactSequenceParser(']')),
|
|
398
|
+
]), ([, negation, [elements]]) => {
|
|
399
|
+
let charset = elements.reduce((acc, el) => characterSetUnion(acc, el), emptyCharacterSet);
|
|
400
|
+
if (negation !== undefined) {
|
|
401
|
+
charset = characterSetComplement(charset);
|
|
402
|
+
}
|
|
403
|
+
return literal(charset);
|
|
404
|
+
});
|
|
405
|
+
const starQuantifierParser = createObjectParser({
|
|
406
|
+
type: 'star',
|
|
407
|
+
_marker: createExactSequenceParser('*'),
|
|
408
|
+
});
|
|
409
|
+
const plusQuantifierParser = createObjectParser({
|
|
410
|
+
type: 'plus',
|
|
411
|
+
_marker: createExactSequenceParser('+'),
|
|
412
|
+
});
|
|
413
|
+
const optionalQuantifierParser = createObjectParser({
|
|
414
|
+
type: 'optional',
|
|
415
|
+
_marker: createExactSequenceParser('?'),
|
|
416
|
+
});
|
|
417
|
+
// Parse a number for quantifiers
|
|
418
|
+
const numberParser = parserCreatorCompose(() => createArrayParser(parserCreatorCompose(() => elementParser, char => async (parserContext) => {
|
|
419
|
+
parserContext.invariant(char >= '0' && char <= '9', 'Expected digit, got "%s"', char);
|
|
420
|
+
return char;
|
|
421
|
+
})()), digits => async (parserContext) => {
|
|
422
|
+
parserContext.invariant(digits.length > 0, 'Expected at least one digit');
|
|
423
|
+
return Number.parseInt(digits.join(''), 10);
|
|
424
|
+
})();
|
|
425
|
+
// {n}, {n,}, {n,m}
|
|
426
|
+
const braceQuantifierParser = promiseCompose(createTupleParser([
|
|
427
|
+
createExactSequenceParser('{'),
|
|
428
|
+
numberParser,
|
|
429
|
+
createOptionalParser(createTupleParser([
|
|
430
|
+
createExactSequenceParser(','),
|
|
431
|
+
createOptionalParser(numberParser),
|
|
432
|
+
])),
|
|
433
|
+
createExactSequenceParser('}'),
|
|
434
|
+
]), ([, min, comma]) => {
|
|
435
|
+
if (comma === undefined) {
|
|
436
|
+
// {n} - exactly n
|
|
437
|
+
return { type: 'repeat', bounds: min };
|
|
438
|
+
}
|
|
439
|
+
const [, max] = comma;
|
|
440
|
+
if (max === undefined) {
|
|
441
|
+
// {n,} - at least n
|
|
442
|
+
return { type: 'repeat', bounds: { min } };
|
|
443
|
+
}
|
|
444
|
+
// {n,m} - between n and m
|
|
445
|
+
return { type: 'repeat', bounds: { min, max } };
|
|
446
|
+
});
|
|
447
|
+
const quantifierParser = createUnionParser([
|
|
448
|
+
starQuantifierParser,
|
|
449
|
+
plusQuantifierParser,
|
|
450
|
+
optionalQuantifierParser,
|
|
451
|
+
braceQuantifierParser,
|
|
452
|
+
]);
|
|
453
|
+
// Groups
|
|
454
|
+
// Capture group (...)
|
|
455
|
+
const captureGroupParser = promiseCompose(createTupleParser([
|
|
456
|
+
createExactSequenceParser('('),
|
|
457
|
+
createNegativeLookaheadParser(createExactSequenceParser('?')),
|
|
458
|
+
createParserAccessorParser(() => alternationParser),
|
|
459
|
+
createExactSequenceParser(')'),
|
|
460
|
+
]), ([, , inner]) => captureGroup(inner));
|
|
461
|
+
// Named capture group (?<name>...)
|
|
462
|
+
const namedCaptureGroupParser = promiseCompose(createTupleParser([
|
|
463
|
+
createExactSequenceParser('(?<'),
|
|
464
|
+
createTerminatedArrayParser(parserCreatorCompose(() => elementParser, char => async (parserContext) => {
|
|
465
|
+
parserContext.invariant(char !== '>', 'Unexpected ">"');
|
|
466
|
+
return char;
|
|
467
|
+
})(), createExactSequenceParser('>')),
|
|
468
|
+
createParserAccessorParser(() => alternationParser),
|
|
469
|
+
createExactSequenceParser(')'),
|
|
470
|
+
]), ([, [nameChars], inner]) => captureGroup(inner, nameChars.join('')));
|
|
471
|
+
// Non-capture group (?:...)
|
|
472
|
+
const nonCaptureGroupParser = promiseCompose(createTupleParser([
|
|
473
|
+
createExactSequenceParser('(?:'),
|
|
474
|
+
createParserAccessorParser(() => alternationParser),
|
|
475
|
+
createExactSequenceParser(')'),
|
|
476
|
+
]), ([, inner]) => inner);
|
|
477
|
+
// Positive lookahead (?=...)
|
|
478
|
+
const positiveLookaheadMarkerParser = createObjectParser({
|
|
479
|
+
type: 'lookahead-marker',
|
|
480
|
+
isPositive: true,
|
|
481
|
+
_open: createExactSequenceParser('(?='),
|
|
482
|
+
inner: createParserAccessorParser(() => alternationParser),
|
|
483
|
+
_close: createExactSequenceParser(')'),
|
|
484
|
+
});
|
|
485
|
+
// Negative lookahead (?!...)
|
|
486
|
+
const negativeLookaheadMarkerParser = createObjectParser({
|
|
487
|
+
type: 'lookahead-marker',
|
|
488
|
+
isPositive: false,
|
|
489
|
+
_open: createExactSequenceParser('(?!'),
|
|
490
|
+
inner: createParserAccessorParser(() => alternationParser),
|
|
491
|
+
_close: createExactSequenceParser(')'),
|
|
492
|
+
});
|
|
493
|
+
const groupParser = createUnionParser([
|
|
494
|
+
namedCaptureGroupParser,
|
|
495
|
+
nonCaptureGroupParser,
|
|
496
|
+
captureGroupParser,
|
|
497
|
+
]);
|
|
498
|
+
const startAnchorMarkerParser = createObjectParser({
|
|
499
|
+
type: 'start-anchor-marker',
|
|
500
|
+
_marker: createExactSequenceParser('^'),
|
|
501
|
+
});
|
|
502
|
+
const endAnchorMarkerParser = createObjectParser({
|
|
503
|
+
type: 'end-anchor-marker',
|
|
504
|
+
_marker: createExactSequenceParser('$'),
|
|
505
|
+
});
|
|
506
|
+
// Atom: the basic unit that can be quantified (excluding anchors)
|
|
507
|
+
const atomParser = createUnionParser([
|
|
508
|
+
groupParser,
|
|
509
|
+
characterClassParser,
|
|
510
|
+
escapeParser,
|
|
511
|
+
dotParser,
|
|
512
|
+
literalCharacterParser,
|
|
513
|
+
]);
|
|
514
|
+
// Quantified atom
|
|
515
|
+
const quantifiedParser = promiseCompose(createTupleParser([
|
|
516
|
+
atomParser,
|
|
517
|
+
createOptionalParser(quantifierParser),
|
|
518
|
+
]), ([atom, quantifier]) => {
|
|
519
|
+
if (quantifier === undefined) {
|
|
520
|
+
return atom;
|
|
521
|
+
}
|
|
522
|
+
switch (quantifier.type) {
|
|
523
|
+
case 'star':
|
|
524
|
+
return star(atom);
|
|
525
|
+
case 'plus':
|
|
526
|
+
return plus(atom);
|
|
527
|
+
case 'optional':
|
|
528
|
+
return optional(atom);
|
|
529
|
+
case 'repeat':
|
|
530
|
+
return repeat(atom, quantifier.bounds);
|
|
531
|
+
}
|
|
532
|
+
});
|
|
533
|
+
// Element in a sequence: either a quantified atom, anchor marker, or lookahead marker
|
|
534
|
+
const sequenceElementParser = createUnionParser([
|
|
535
|
+
startAnchorMarkerParser,
|
|
536
|
+
endAnchorMarkerParser,
|
|
537
|
+
positiveLookaheadMarkerParser,
|
|
538
|
+
negativeLookaheadMarkerParser,
|
|
539
|
+
quantifiedParser,
|
|
540
|
+
]);
|
|
541
|
+
// Helper to concatenate a list of RegularExpressions (right-associative)
|
|
542
|
+
function concatList(parts) {
|
|
543
|
+
if (parts.length === 0) {
|
|
544
|
+
return epsilon;
|
|
545
|
+
}
|
|
546
|
+
return parts.reduceRight((acc, part) => concat(part, acc));
|
|
547
|
+
}
|
|
548
|
+
// Process elements with anchor markers and lookahead markers into proper AST
|
|
549
|
+
// Handles anchors and lookahead as infix operators like @gruhn/regex-utils
|
|
550
|
+
// Precedence order (lowest to highest): union -> start-anchor -> end-anchor -> lookahead -> concat
|
|
551
|
+
function processElements(elements) {
|
|
552
|
+
if (elements.length === 0) {
|
|
553
|
+
return epsilon;
|
|
554
|
+
}
|
|
555
|
+
// Process start anchors first (lowest precedence among infix operators)
|
|
556
|
+
const startAnchorIdx = elements.findIndex(e => 'type' in e && e.type === 'start-anchor-marker');
|
|
557
|
+
if (startAnchorIdx !== -1) {
|
|
558
|
+
const left = elements.slice(0, startAnchorIdx);
|
|
559
|
+
const right = elements.slice(startAnchorIdx + 1);
|
|
560
|
+
return startAnchor(processElements(left), processElements(right));
|
|
561
|
+
}
|
|
562
|
+
// Then end anchors
|
|
563
|
+
const endAnchorIdx = elements.findIndex(e => 'type' in e && e.type === 'end-anchor-marker');
|
|
564
|
+
if (endAnchorIdx !== -1) {
|
|
565
|
+
const left = elements.slice(0, endAnchorIdx);
|
|
566
|
+
const right = elements.slice(endAnchorIdx + 1);
|
|
567
|
+
return endAnchor(processElements(left), processElements(right));
|
|
568
|
+
}
|
|
569
|
+
// Then lookaheads (higher precedence than anchors)
|
|
570
|
+
const lookaheadIdx = elements.findIndex(e => 'type' in e && e.type === 'lookahead-marker');
|
|
571
|
+
if (lookaheadIdx !== -1) {
|
|
572
|
+
const marker = elements[lookaheadIdx];
|
|
573
|
+
const left = elements.slice(0, lookaheadIdx);
|
|
574
|
+
const right = elements.slice(lookaheadIdx + 1);
|
|
575
|
+
const lookaheadExpr = lookahead(marker.isPositive, marker.inner, processElements(right));
|
|
576
|
+
if (left.length === 0) {
|
|
577
|
+
return lookaheadExpr;
|
|
578
|
+
}
|
|
579
|
+
// If there's content before the lookahead, concatenate it
|
|
580
|
+
return concat(processElements(left), lookaheadExpr);
|
|
581
|
+
}
|
|
582
|
+
// No markers, just regular expressions - concatenate them
|
|
583
|
+
const regexParts = elements;
|
|
584
|
+
return concatList(regexParts);
|
|
585
|
+
}
|
|
586
|
+
// Concatenation: sequence of quantified atoms and anchors
|
|
587
|
+
const concatParser = promiseCompose(createArrayParser(sequenceElementParser), processElements);
|
|
588
|
+
// Alternation: concat ('|' concat)*
|
|
589
|
+
const alternationParser = promiseCompose(createTupleParser([
|
|
590
|
+
concatParser,
|
|
591
|
+
createArrayParser(promiseCompose(createTupleParser([
|
|
592
|
+
createExactSequenceParser('|'),
|
|
593
|
+
concatParser,
|
|
594
|
+
]), ([, right]) => right)),
|
|
595
|
+
]), ([first, rest]) => {
|
|
596
|
+
// Right-associative union like @gruhn/regex-utils
|
|
597
|
+
const allParts = [first, ...rest];
|
|
598
|
+
return allParts.reduceRight((acc, part) => union(part, acc));
|
|
599
|
+
});
|
|
600
|
+
export const regularExpressionParser = alternationParser;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|