@futpib/parser 1.0.6 → 1.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/arbitraryBash.d.ts +3 -0
- package/build/arbitraryBash.js +142 -0
- package/build/arbitraryJavaScript.js +4 -4
- package/build/arbitraryZipStream.d.ts +1 -1
- package/build/bashParser.js +317 -75
- package/build/bashParser.test.js +71 -0
- package/build/bashUnparser.d.ts +3 -0
- package/build/bashUnparser.js +157 -0
- package/build/bashUnparser.test.d.ts +1 -0
- package/build/bashUnparser.test.js +24 -0
- package/build/bsonParser.js +3 -3
- package/build/dalvikExecutableParser/stringSyntaxParser.js +20 -31
- package/build/dalvikExecutableParser.js +2 -5
- package/build/hasExecutable.js +1 -1
- package/build/index.d.ts +1 -0
- package/build/index.js +1 -0
- package/build/jsonParser.js +2 -7
- package/build/predicateElementParser.d.ts +3 -0
- package/build/predicateElementParser.js +10 -0
- package/build/regularExpression.d.ts +12 -3
- package/build/regularExpression.js +10 -1
- package/build/regularExpressionParser.js +39 -25
- package/build/regularExpressionParser.test.js +2 -2
- package/build/smaliParser.js +5 -9
- package/build/symbolicExpressionParser.js +8 -3
- package/package.json +9 -9
- package/readme.md +468 -7
- package/src/arbitraryBash.ts +237 -0
- package/src/arbitraryJavaScript.ts +4 -4
- package/src/bashParser.test.ts +138 -0
- package/src/bashParser.ts +467 -139
- package/src/bashUnparser.test.ts +37 -0
- package/src/bashUnparser.ts +211 -0
- package/src/bsonParser.ts +4 -7
- package/src/dalvikExecutableParser/stringSyntaxParser.ts +27 -74
- package/src/dalvikExecutableParser.ts +4 -10
- package/src/hasExecutable.ts +1 -1
- package/src/index.ts +4 -0
- package/src/jsonParser.ts +2 -11
- package/src/predicateElementParser.ts +22 -0
- package/src/regularExpression.ts +11 -1
- package/src/regularExpressionParser.test.ts +3 -3
- package/src/regularExpressionParser.ts +49 -30
- package/src/smaliParser.ts +11 -23
- package/src/symbolicExpressionParser.ts +9 -3
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import { testProp } from '@fast-check/ava';
|
|
2
|
+
import { arbitraryBashCommandList } from './arbitraryBash.js';
|
|
3
|
+
import { bashScriptUnparser } from './bashUnparser.js';
|
|
4
|
+
import { bashScriptParser } from './bashParser.js';
|
|
5
|
+
import { runParser } from './parser.js';
|
|
6
|
+
import { runUnparser } from './unparser.js';
|
|
7
|
+
import { stringParserInputCompanion } from './parserInputCompanion.js';
|
|
8
|
+
import { stringUnparserOutputCompanion } from './unparserOutputCompanion.js';
|
|
9
|
+
|
|
10
|
+
const seed = process.env.SEED ? Number(process.env.SEED) : undefined;
|
|
11
|
+
|
|
12
|
+
async function collectString(asyncIterable: AsyncIterable<string>): Promise<string> {
|
|
13
|
+
let result = '';
|
|
14
|
+
for await (const chunk of asyncIterable) {
|
|
15
|
+
result += chunk;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
return result;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
testProp(
|
|
22
|
+
'bash roundtrip',
|
|
23
|
+
[arbitraryBashCommandList],
|
|
24
|
+
async (t, command) => {
|
|
25
|
+
const source = await collectString(runUnparser(
|
|
26
|
+
bashScriptUnparser, command, stringUnparserOutputCompanion));
|
|
27
|
+
|
|
28
|
+
const reparsed = await runParser(
|
|
29
|
+
bashScriptParser, source, stringParserInputCompanion);
|
|
30
|
+
|
|
31
|
+
t.deepEqual(reparsed, command);
|
|
32
|
+
},
|
|
33
|
+
{
|
|
34
|
+
verbose: true,
|
|
35
|
+
seed,
|
|
36
|
+
},
|
|
37
|
+
);
|
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
import { type Unparser } from './unparser.js';
|
|
2
|
+
import {
|
|
3
|
+
type BashWord,
|
|
4
|
+
type BashWordPart,
|
|
5
|
+
type BashSimpleCommand,
|
|
6
|
+
type BashSubshell,
|
|
7
|
+
type BashBraceGroup,
|
|
8
|
+
type BashCommandUnit,
|
|
9
|
+
type BashPipeline,
|
|
10
|
+
type BashCommandList,
|
|
11
|
+
type BashRedirect,
|
|
12
|
+
type BashAssignment,
|
|
13
|
+
type BashCommand,
|
|
14
|
+
} from './bash.js';
|
|
15
|
+
|
|
16
|
+
function isIdentChar(ch: string): boolean {
|
|
17
|
+
return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9') || ch === '_';
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
function nextPartStartsWithIdentChar(parts: BashWordPart[], index: number): boolean {
|
|
21
|
+
const next = parts[index + 1];
|
|
22
|
+
if (next === undefined) {
|
|
23
|
+
return false;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
if (next.type === 'literal') {
|
|
27
|
+
return next.value.length > 0 && isIdentChar(next.value[0]!);
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
return false;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
function unparseWord(word: BashWord): string {
|
|
34
|
+
return word.parts.map((part, i) => unparseWordPartInContext(part, word.parts, i)).join('');
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
function unparseWordPartInContext(part: BashWordPart, parts: BashWordPart[], index: number): string {
|
|
38
|
+
return unparseWordPart(part);
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
function unparseWordPart(part: BashWordPart): string {
|
|
42
|
+
switch (part.type) {
|
|
43
|
+
case 'literal':
|
|
44
|
+
return escapeLiteral(part.value);
|
|
45
|
+
|
|
46
|
+
case 'singleQuoted':
|
|
47
|
+
return "'" + part.value + "'";
|
|
48
|
+
|
|
49
|
+
case 'doubleQuoted':
|
|
50
|
+
return '"' + part.parts.map(p => unparseDoubleQuotedPart(p)).join('') + '"';
|
|
51
|
+
|
|
52
|
+
case 'variable':
|
|
53
|
+
return '$' + part.name;
|
|
54
|
+
|
|
55
|
+
case 'variableBraced': {
|
|
56
|
+
let result = '${' + part.name;
|
|
57
|
+
if (part.operator !== undefined) {
|
|
58
|
+
result += part.operator;
|
|
59
|
+
if (part.operand !== undefined) {
|
|
60
|
+
result += unparseWord(part.operand);
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
result += '}';
|
|
65
|
+
return result;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
case 'commandSubstitution':
|
|
69
|
+
return '$( ' + unparseCommand(part.command) + ' )';
|
|
70
|
+
|
|
71
|
+
case 'backtickSubstitution':
|
|
72
|
+
return '`' + unparseCommand(part.command) + '`';
|
|
73
|
+
|
|
74
|
+
case 'arithmeticExpansion':
|
|
75
|
+
return '$((' + part.expression + '))';
|
|
76
|
+
|
|
77
|
+
case 'processSubstitution':
|
|
78
|
+
return part.direction + '(' + unparseCommand(part.command) + ')';
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
function unparseDoubleQuotedPart(part: BashWordPart): string {
|
|
83
|
+
switch (part.type) {
|
|
84
|
+
case 'literal': {
|
|
85
|
+
let result = '';
|
|
86
|
+
for (const ch of part.value) {
|
|
87
|
+
if (ch === '\\' || ch === '$' || ch === '`' || ch === '"') {
|
|
88
|
+
result += '\\' + ch;
|
|
89
|
+
} else {
|
|
90
|
+
result += ch;
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
return result;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
default:
|
|
98
|
+
return unparseWordPart(part);
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
function escapeLiteral(value: string): string {
|
|
103
|
+
let result = '';
|
|
104
|
+
for (const ch of value) {
|
|
105
|
+
if (' \t\n|&;<>()$`"\' \\'.includes(ch) || ch === '{' || ch === '}' || ch === '#') {
|
|
106
|
+
result += '\\' + ch;
|
|
107
|
+
} else {
|
|
108
|
+
result += ch;
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
return result;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
function unparseRedirect(redirect: BashRedirect): string {
|
|
116
|
+
let result = '';
|
|
117
|
+
if (redirect.fd !== undefined) {
|
|
118
|
+
result += String(redirect.fd);
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
result += redirect.operator;
|
|
122
|
+
if ('type' in redirect.target && redirect.target.type === 'hereDoc') {
|
|
123
|
+
result += redirect.target.delimiter;
|
|
124
|
+
} else {
|
|
125
|
+
result += unparseWord(redirect.target as BashWord);
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
return result;
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
function unparseAssignment(assignment: BashAssignment): string {
|
|
132
|
+
let result = assignment.name + '=';
|
|
133
|
+
if (assignment.value !== undefined) {
|
|
134
|
+
result += unparseWord(assignment.value);
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
return result;
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
function unparseSimpleCommand(cmd: BashSimpleCommand): string {
|
|
141
|
+
const parts: string[] = [];
|
|
142
|
+
|
|
143
|
+
for (const assignment of cmd.assignments) {
|
|
144
|
+
parts.push(unparseAssignment(assignment));
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
if (cmd.name !== undefined) {
|
|
148
|
+
parts.push(unparseWord(cmd.name));
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
for (const arg of cmd.args) {
|
|
152
|
+
parts.push(unparseWord(arg));
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
const wordParts = parts.join(' ');
|
|
156
|
+
const redirectParts = cmd.redirects.map(r => unparseRedirect(r)).join(' ');
|
|
157
|
+
|
|
158
|
+
if (redirectParts) {
|
|
159
|
+
return wordParts ? wordParts + ' ' + redirectParts : redirectParts;
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
return wordParts;
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
function unparseCommandUnit(unit: BashCommandUnit): string {
|
|
166
|
+
switch (unit.type) {
|
|
167
|
+
case 'simple':
|
|
168
|
+
return unparseSimpleCommand(unit);
|
|
169
|
+
|
|
170
|
+
case 'subshell':
|
|
171
|
+
return '(' + unparseCommand(unit.body) + ')';
|
|
172
|
+
|
|
173
|
+
case 'braceGroup':
|
|
174
|
+
return '{ ' + unparseCommand(unit.body) + ' }';
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
function unparsePipeline(pipeline: BashPipeline): string {
|
|
179
|
+
let result = '';
|
|
180
|
+
if (pipeline.negated) {
|
|
181
|
+
result += '! ';
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
result += pipeline.commands.map(cmd => unparseCommandUnit(cmd)).join(' | ');
|
|
185
|
+
return result;
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
function unparseCommand(command: BashCommand): string {
|
|
189
|
+
return unparseCommandList(command);
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
function unparseCommandList(list: BashCommandList): string {
|
|
193
|
+
let result = '';
|
|
194
|
+
for (let i = 0; i < list.entries.length; i++) {
|
|
195
|
+
const entry = list.entries[i]!;
|
|
196
|
+
if (i > 0) {
|
|
197
|
+
result += ' ';
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
result += unparsePipeline(entry.pipeline);
|
|
201
|
+
if (entry.separator !== undefined) {
|
|
202
|
+
result += entry.separator;
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
return result;
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
export const bashScriptUnparser: Unparser<BashCommand, string> = async function * (command) {
|
|
210
|
+
yield unparseCommand(command);
|
|
211
|
+
};
|
package/src/bsonParser.ts
CHANGED
|
@@ -6,10 +6,10 @@ import { createTupleParser } from './tupleParser.js';
|
|
|
6
6
|
import { createSkipParser } from './skipParser.js';
|
|
7
7
|
import { createParserAccessorParser } from './parserAccessorParser.js';
|
|
8
8
|
import { createTerminatedArrayParser } from './terminatedArrayParser.js';
|
|
9
|
-
import { createElementParser } from './elementParser.js';
|
|
10
9
|
import { createExactElementParser } from './exactElementParser.js';
|
|
11
10
|
import { createUnionParser } from './unionParser.js';
|
|
12
11
|
import { parserCreatorCompose } from './parserCreatorCompose.js';
|
|
12
|
+
import { createPredicateElementParser } from './predicateElementParser.js';
|
|
13
13
|
|
|
14
14
|
const createFixedLengthBufferParser = (length: number): Parser<Buffer, Uint8Array> => promiseCompose(createFixedLengthSequenceParser<Uint8Array>(length), sequence => Buffer.from(sequence));
|
|
15
15
|
|
|
@@ -17,16 +17,13 @@ const buffer1Parser = createFixedLengthBufferParser(1);
|
|
|
17
17
|
const buffer4Parser = createFixedLengthBufferParser(4);
|
|
18
18
|
const buffer8Parser = createFixedLengthBufferParser(8);
|
|
19
19
|
|
|
20
|
-
const elementParser: Parser<number, Uint8Array> = createElementParser();
|
|
21
|
-
|
|
22
20
|
const nullByteParser: Parser<number, Uint8Array> = createExactElementParser(0);
|
|
23
21
|
|
|
22
|
+
const nonNullByteParser: Parser<number, Uint8Array> = createPredicateElementParser((byte: number) => byte !== 0);
|
|
23
|
+
|
|
24
24
|
const cstringParser: Parser<string, Uint8Array> = promiseCompose(
|
|
25
25
|
createTerminatedArrayParser(
|
|
26
|
-
|
|
27
|
-
() => elementParser,
|
|
28
|
-
(byte: number) => async parserContext => parserContext.invariant(byte, 'Expected non-null byte'),
|
|
29
|
-
)(),
|
|
26
|
+
nonNullByteParser,
|
|
30
27
|
nullByteParser,
|
|
31
28
|
),
|
|
32
29
|
([ sequence ]) => Buffer.from(sequence).toString('utf8'),
|
|
@@ -1,84 +1,37 @@
|
|
|
1
|
-
import invariant from 'invariant';
|
|
2
1
|
import { type Parser, setParserName } from '../parser.js';
|
|
3
|
-
import { type ParserContext } from '../parserContext.js';
|
|
4
2
|
import { promiseCompose } from '../promiseCompose.js';
|
|
5
3
|
import { createSeparatedArrayParser } from '../separatedArrayParser.js';
|
|
6
4
|
import { createExactSequenceParser } from '../exactSequenceParser.js';
|
|
7
5
|
import { createUnionParser } from '../unionParser.js';
|
|
8
6
|
import { createTupleParser } from '../tupleParser.js';
|
|
9
7
|
import { createArrayParser } from '../arrayParser.js';
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|| (
|
|
38
|
-
character === '-'
|
|
39
|
-
)
|
|
40
|
-
|| (
|
|
41
|
-
character === '_'
|
|
42
|
-
)
|
|
43
|
-
|| (
|
|
44
|
-
character === '\u00A0'
|
|
45
|
-
)
|
|
46
|
-
|| (
|
|
47
|
-
character >= '\u00A1' && character <= '\u1FFF'
|
|
48
|
-
)
|
|
49
|
-
|| (
|
|
50
|
-
character >= '\u2000' && character <= '\u200A'
|
|
51
|
-
)
|
|
52
|
-
|| (
|
|
53
|
-
character >= '\u2010' && character <= '\u2027'
|
|
54
|
-
)
|
|
55
|
-
|| (
|
|
56
|
-
character === '\u202F'
|
|
57
|
-
)
|
|
58
|
-
|| (
|
|
59
|
-
character >= '\u2030' && character <= '\uD7FF'
|
|
60
|
-
)
|
|
61
|
-
|| (
|
|
62
|
-
character >= '\uE000' && character <= '\uFFEF'
|
|
63
|
-
)
|
|
64
|
-
|| (
|
|
65
|
-
character >= '\uD800' && character <= '\uDBFF'
|
|
66
|
-
)
|
|
67
|
-
) {
|
|
68
|
-
parserContext.skip(1);
|
|
69
|
-
|
|
70
|
-
characters.push(character);
|
|
71
|
-
|
|
72
|
-
continue;
|
|
73
|
-
}
|
|
74
|
-
|
|
75
|
-
parserContext.invariant(characters.length > 0, 'Expected at least one character');
|
|
76
|
-
|
|
77
|
-
break;
|
|
78
|
-
}
|
|
79
|
-
|
|
80
|
-
return characters.join('');
|
|
81
|
-
};
|
|
8
|
+
import { createNonEmptyArrayParser } from '../nonEmptyArrayParser.js';
|
|
9
|
+
import { createPredicateElementParser } from '../predicateElementParser.js';
|
|
10
|
+
|
|
11
|
+
function isSmaliSimpleNameChar(character: string): boolean {
|
|
12
|
+
return (
|
|
13
|
+
(character >= 'a' && character <= 'z')
|
|
14
|
+
|| (character >= 'A' && character <= 'Z')
|
|
15
|
+
|| (character >= '0' && character <= '9')
|
|
16
|
+
|| character === ' '
|
|
17
|
+
|| character === '$'
|
|
18
|
+
|| character === '-'
|
|
19
|
+
|| character === '_'
|
|
20
|
+
|| character === '\u00A0'
|
|
21
|
+
|| (character >= '\u00A1' && character <= '\u1FFF')
|
|
22
|
+
|| (character >= '\u2000' && character <= '\u200A')
|
|
23
|
+
|| (character >= '\u2010' && character <= '\u2027')
|
|
24
|
+
|| character === '\u202F'
|
|
25
|
+
|| (character >= '\u2030' && character <= '\uD7FF')
|
|
26
|
+
|| (character >= '\uE000' && character <= '\uFFEF')
|
|
27
|
+
|| (character >= '\uD800' && character <= '\uDBFF')
|
|
28
|
+
);
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
export const smaliSimpleNameParser: Parser<string, string> = promiseCompose(
|
|
32
|
+
createNonEmptyArrayParser(createPredicateElementParser(isSmaliSimpleNameChar)),
|
|
33
|
+
characters => characters.join(''),
|
|
34
|
+
);
|
|
82
35
|
|
|
83
36
|
setParserName(smaliSimpleNameParser, 'smaliSimpleNameParser');
|
|
84
37
|
|
|
@@ -13,6 +13,7 @@ import { createTupleParser } from './tupleParser.js';
|
|
|
13
13
|
import { createParserAccessorParser } from './parserAccessorParser.js';
|
|
14
14
|
import { createSkipToParser } from './skipToParser.js';
|
|
15
15
|
import { createLookaheadParser } from './lookaheadParser.js';
|
|
16
|
+
import { createPredicateElementParser } from './predicateElementParser.js';
|
|
16
17
|
import {
|
|
17
18
|
getIsoTypedNumberArray,
|
|
18
19
|
type IndexIntoFieldIds,
|
|
@@ -843,16 +844,9 @@ type DalvikExecutableTaggedEncodedValue =
|
|
|
843
844
|
| { type: 'boolean'; value: boolean };
|
|
844
845
|
|
|
845
846
|
const createByteWith5LeastSignificantBitsEqualParser = (leastSignificant5: number): Parser<number, Uint8Array> => {
|
|
846
|
-
const byteWith5LeastSignificantBitsEqualParser: Parser<number, Uint8Array> =
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
(byte & 0b0001_1111) === leastSignificant5,
|
|
850
|
-
'Expected byte with 5 least significant bits equal to %s, but got %s',
|
|
851
|
-
leastSignificant5.toString(2).padStart(8, '0'),
|
|
852
|
-
byte.toString(2).padStart(8, '0'),
|
|
853
|
-
);
|
|
854
|
-
return byte;
|
|
855
|
-
};
|
|
847
|
+
const byteWith5LeastSignificantBitsEqualParser: Parser<number, Uint8Array> = createPredicateElementParser(
|
|
848
|
+
(byte: number) => (byte & 0b0001_1111) === leastSignificant5,
|
|
849
|
+
);
|
|
856
850
|
|
|
857
851
|
setParserName(byteWith5LeastSignificantBitsEqualParser, `createByteWith5LeastSignificantBitsEqualParser(${leastSignificant5.toString(2).padStart(5, '0')})`);
|
|
858
852
|
|
package/src/hasExecutable.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { execa } from 'execa';
|
|
2
2
|
|
|
3
3
|
export async function hasExecutable(executable: string) {
|
|
4
|
-
const hasExecutable = execa(executable).
|
|
4
|
+
const hasExecutable = await execa(executable).then(() => true).catch(() => false);
|
|
5
5
|
|
|
6
6
|
if (!hasExecutable) {
|
|
7
7
|
console.warn('Executable %o not found', executable);
|
package/src/index.ts
CHANGED
package/src/jsonParser.ts
CHANGED
|
@@ -11,8 +11,7 @@ import { createDisjunctionParser } from './disjunctionParser.js';
|
|
|
11
11
|
import { createTerminatedArrayParser } from './terminatedArrayParser.js';
|
|
12
12
|
import { createArrayParser } from './arrayParser.js';
|
|
13
13
|
import { createParserAccessorParser } from './parserAccessorParser.js';
|
|
14
|
-
import {
|
|
15
|
-
import { parserCreatorCompose } from './parserCreatorCompose.js';
|
|
14
|
+
import { createPredicateElementParser } from './predicateElementParser.js';
|
|
16
15
|
import { createSeparatedArrayParser } from './separatedArrayParser.js';
|
|
17
16
|
import { createRegExpParser } from './regexpParser.js';
|
|
18
17
|
|
|
@@ -52,17 +51,9 @@ const jsonStringEscapeSequenceParser: Parser<string, string> = createUnionParser
|
|
|
52
51
|
jsonUnicodeEscapeSequenceParser,
|
|
53
52
|
]);
|
|
54
53
|
|
|
55
|
-
const elementParser: Parser<string, string> = createElementParser();
|
|
56
|
-
|
|
57
54
|
const jsonStringCharacterParser: Parser<string, string> = createDisjunctionParser([
|
|
58
55
|
jsonStringEscapeSequenceParser,
|
|
59
|
-
|
|
60
|
-
() => elementParser,
|
|
61
|
-
character => async parserContext => {
|
|
62
|
-
parserContext.invariant(character !== '"', 'Unexpected """');
|
|
63
|
-
return character;
|
|
64
|
-
},
|
|
65
|
-
)(),
|
|
56
|
+
createPredicateElementParser((character: string) => character !== '"'),
|
|
66
57
|
]);
|
|
67
58
|
|
|
68
59
|
export const jsonStringParser: Parser<string, string> = promiseCompose(
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import { setParserName, type Parser } from './parser.js';
|
|
2
|
+
import { type DeriveSequenceElement } from './sequence.js';
|
|
3
|
+
|
|
4
|
+
export const createPredicateElementParser = <Sequence, Element = DeriveSequenceElement<Sequence>>(
|
|
5
|
+
predicate: (element: Element) => boolean,
|
|
6
|
+
): Parser<Element, Sequence, Element> => {
|
|
7
|
+
const predicateElementParser: Parser<Element, Sequence, Element> = async parserContext => {
|
|
8
|
+
const element = await parserContext.read(0);
|
|
9
|
+
|
|
10
|
+
parserContext.invariant(
|
|
11
|
+
predicate(element),
|
|
12
|
+
'Element does not match predicate: %s',
|
|
13
|
+
element,
|
|
14
|
+
);
|
|
15
|
+
|
|
16
|
+
return element;
|
|
17
|
+
};
|
|
18
|
+
|
|
19
|
+
setParserName(predicateElementParser, `createPredicateElementParser(${predicate.name || 'anonymous'})`);
|
|
20
|
+
|
|
21
|
+
return predicateElementParser;
|
|
22
|
+
};
|
package/src/regularExpression.ts
CHANGED
|
@@ -9,6 +9,16 @@ export type CharacterSet =
|
|
|
9
9
|
|
|
10
10
|
export type RepeatBounds = number | { min: number; max?: number } | { min?: number; max: number };
|
|
11
11
|
|
|
12
|
+
export enum AssertionSign {
|
|
13
|
+
POSITIVE = 0,
|
|
14
|
+
NEGATIVE = 1,
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export enum AssertionDir {
|
|
18
|
+
AHEAD = 0,
|
|
19
|
+
BEHIND = 1,
|
|
20
|
+
}
|
|
21
|
+
|
|
12
22
|
export type RegularExpression =
|
|
13
23
|
| { type: 'epsilon' }
|
|
14
24
|
| { type: 'literal'; charset: CharacterSet }
|
|
@@ -19,6 +29,6 @@ export type RegularExpression =
|
|
|
19
29
|
| { type: 'optional'; inner: RegularExpression }
|
|
20
30
|
| { type: 'repeat'; inner: RegularExpression; bounds: RepeatBounds }
|
|
21
31
|
| { type: 'capture-group'; inner: RegularExpression; name?: string }
|
|
22
|
-
| { type: '
|
|
32
|
+
| { type: 'assertion'; direction: AssertionDir; sign: AssertionSign; inner: RegularExpression; outer: RegularExpression }
|
|
23
33
|
| { type: 'start-anchor'; left: RegularExpression; right: RegularExpression }
|
|
24
34
|
| { type: 'end-anchor'; left: RegularExpression; right: RegularExpression };
|
|
@@ -9,7 +9,7 @@ import { parseRegExpString } from '../node_modules/@gruhn/regex-utils/dist/regex
|
|
|
9
9
|
import { runParser } from './parser.js';
|
|
10
10
|
import { stringParserInputCompanion } from './parserInputCompanion.js';
|
|
11
11
|
import { arbitrarilySlicedAsyncIterator } from './arbitrarilySlicedAsyncInterator.js';
|
|
12
|
-
import type
|
|
12
|
+
import { AssertionDir, AssertionSign, type RegularExpression, type CharacterSet } from './regularExpression.js';
|
|
13
13
|
|
|
14
14
|
// Normalize AST for comparison - removes hashes from CharSets and normalizes structure
|
|
15
15
|
function normalizeCharacterSet(charset: CharacterSet): CharacterSet {
|
|
@@ -47,8 +47,8 @@ function normalizeRegularExpression(ast: RegularExpression): RegularExpression {
|
|
|
47
47
|
return { type: 'capture-group', inner: normalizeRegularExpression(ast.inner), name: ast.name };
|
|
48
48
|
}
|
|
49
49
|
return { type: 'capture-group', inner: normalizeRegularExpression(ast.inner) };
|
|
50
|
-
case '
|
|
51
|
-
return { type: '
|
|
50
|
+
case 'assertion':
|
|
51
|
+
return { type: 'assertion', direction: ast.direction, sign: ast.sign, inner: normalizeRegularExpression(ast.inner), outer: normalizeRegularExpression(ast.outer) };
|
|
52
52
|
case 'start-anchor':
|
|
53
53
|
return { type: 'start-anchor', left: normalizeRegularExpression(ast.left), right: normalizeRegularExpression(ast.right) };
|
|
54
54
|
case 'end-anchor':
|