@futpib/parser 1.0.6 → 1.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/arbitraryBash.d.ts +3 -0
- package/build/arbitraryBash.js +142 -0
- package/build/arbitraryJavaScript.js +4 -4
- package/build/arbitraryZipStream.d.ts +1 -1
- package/build/bashParser.js +317 -75
- package/build/bashParser.test.js +71 -0
- package/build/bashUnparser.d.ts +3 -0
- package/build/bashUnparser.js +157 -0
- package/build/bashUnparser.test.d.ts +1 -0
- package/build/bashUnparser.test.js +24 -0
- package/build/bsonParser.js +3 -3
- package/build/dalvikExecutableParser/stringSyntaxParser.js +20 -31
- package/build/dalvikExecutableParser.js +2 -5
- package/build/hasExecutable.js +1 -1
- package/build/index.d.ts +1 -0
- package/build/index.js +1 -0
- package/build/jsonParser.js +2 -7
- package/build/predicateElementParser.d.ts +3 -0
- package/build/predicateElementParser.js +10 -0
- package/build/regularExpression.d.ts +12 -3
- package/build/regularExpression.js +10 -1
- package/build/regularExpressionParser.js +39 -25
- package/build/regularExpressionParser.test.js +2 -2
- package/build/smaliParser.js +5 -9
- package/build/symbolicExpressionParser.js +8 -3
- package/package.json +9 -9
- package/readme.md +468 -7
- package/src/arbitraryBash.ts +237 -0
- package/src/arbitraryJavaScript.ts +4 -4
- package/src/bashParser.test.ts +138 -0
- package/src/bashParser.ts +467 -139
- package/src/bashUnparser.test.ts +37 -0
- package/src/bashUnparser.ts +211 -0
- package/src/bsonParser.ts +4 -7
- package/src/dalvikExecutableParser/stringSyntaxParser.ts +27 -74
- package/src/dalvikExecutableParser.ts +4 -10
- package/src/hasExecutable.ts +1 -1
- package/src/index.ts +4 -0
- package/src/jsonParser.ts +2 -11
- package/src/predicateElementParser.ts +22 -0
- package/src/regularExpression.ts +11 -1
- package/src/regularExpressionParser.test.ts +3 -3
- package/src/regularExpressionParser.ts +49 -30
- package/src/smaliParser.ts +11 -23
- package/src/symbolicExpressionParser.ts +9 -3
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
function isIdentChar(ch) {
|
|
2
|
+
return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9') || ch === '_';
|
|
3
|
+
}
|
|
4
|
+
function nextPartStartsWithIdentChar(parts, index) {
|
|
5
|
+
const next = parts[index + 1];
|
|
6
|
+
if (next === undefined) {
|
|
7
|
+
return false;
|
|
8
|
+
}
|
|
9
|
+
if (next.type === 'literal') {
|
|
10
|
+
return next.value.length > 0 && isIdentChar(next.value[0]);
|
|
11
|
+
}
|
|
12
|
+
return false;
|
|
13
|
+
}
|
|
14
|
+
function unparseWord(word) {
|
|
15
|
+
return word.parts.map((part, i) => unparseWordPartInContext(part, word.parts, i)).join('');
|
|
16
|
+
}
|
|
17
|
+
function unparseWordPartInContext(part, parts, index) {
|
|
18
|
+
return unparseWordPart(part);
|
|
19
|
+
}
|
|
20
|
+
function unparseWordPart(part) {
|
|
21
|
+
switch (part.type) {
|
|
22
|
+
case 'literal':
|
|
23
|
+
return escapeLiteral(part.value);
|
|
24
|
+
case 'singleQuoted':
|
|
25
|
+
return "'" + part.value + "'";
|
|
26
|
+
case 'doubleQuoted':
|
|
27
|
+
return '"' + part.parts.map(p => unparseDoubleQuotedPart(p)).join('') + '"';
|
|
28
|
+
case 'variable':
|
|
29
|
+
return '$' + part.name;
|
|
30
|
+
case 'variableBraced': {
|
|
31
|
+
let result = '${' + part.name;
|
|
32
|
+
if (part.operator !== undefined) {
|
|
33
|
+
result += part.operator;
|
|
34
|
+
if (part.operand !== undefined) {
|
|
35
|
+
result += unparseWord(part.operand);
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
result += '}';
|
|
39
|
+
return result;
|
|
40
|
+
}
|
|
41
|
+
case 'commandSubstitution':
|
|
42
|
+
return '$( ' + unparseCommand(part.command) + ' )';
|
|
43
|
+
case 'backtickSubstitution':
|
|
44
|
+
return '`' + unparseCommand(part.command) + '`';
|
|
45
|
+
case 'arithmeticExpansion':
|
|
46
|
+
return '$((' + part.expression + '))';
|
|
47
|
+
case 'processSubstitution':
|
|
48
|
+
return part.direction + '(' + unparseCommand(part.command) + ')';
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
function unparseDoubleQuotedPart(part) {
|
|
52
|
+
switch (part.type) {
|
|
53
|
+
case 'literal': {
|
|
54
|
+
let result = '';
|
|
55
|
+
for (const ch of part.value) {
|
|
56
|
+
if (ch === '\\' || ch === '$' || ch === '`' || ch === '"') {
|
|
57
|
+
result += '\\' + ch;
|
|
58
|
+
}
|
|
59
|
+
else {
|
|
60
|
+
result += ch;
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
return result;
|
|
64
|
+
}
|
|
65
|
+
default:
|
|
66
|
+
return unparseWordPart(part);
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
function escapeLiteral(value) {
|
|
70
|
+
let result = '';
|
|
71
|
+
for (const ch of value) {
|
|
72
|
+
if (' \t\n|&;<>()$`"\' \\'.includes(ch) || ch === '{' || ch === '}' || ch === '#') {
|
|
73
|
+
result += '\\' + ch;
|
|
74
|
+
}
|
|
75
|
+
else {
|
|
76
|
+
result += ch;
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
return result;
|
|
80
|
+
}
|
|
81
|
+
function unparseRedirect(redirect) {
|
|
82
|
+
let result = '';
|
|
83
|
+
if (redirect.fd !== undefined) {
|
|
84
|
+
result += String(redirect.fd);
|
|
85
|
+
}
|
|
86
|
+
result += redirect.operator;
|
|
87
|
+
if ('type' in redirect.target && redirect.target.type === 'hereDoc') {
|
|
88
|
+
result += redirect.target.delimiter;
|
|
89
|
+
}
|
|
90
|
+
else {
|
|
91
|
+
result += unparseWord(redirect.target);
|
|
92
|
+
}
|
|
93
|
+
return result;
|
|
94
|
+
}
|
|
95
|
+
function unparseAssignment(assignment) {
|
|
96
|
+
let result = assignment.name + '=';
|
|
97
|
+
if (assignment.value !== undefined) {
|
|
98
|
+
result += unparseWord(assignment.value);
|
|
99
|
+
}
|
|
100
|
+
return result;
|
|
101
|
+
}
|
|
102
|
+
function unparseSimpleCommand(cmd) {
|
|
103
|
+
const parts = [];
|
|
104
|
+
for (const assignment of cmd.assignments) {
|
|
105
|
+
parts.push(unparseAssignment(assignment));
|
|
106
|
+
}
|
|
107
|
+
if (cmd.name !== undefined) {
|
|
108
|
+
parts.push(unparseWord(cmd.name));
|
|
109
|
+
}
|
|
110
|
+
for (const arg of cmd.args) {
|
|
111
|
+
parts.push(unparseWord(arg));
|
|
112
|
+
}
|
|
113
|
+
const wordParts = parts.join(' ');
|
|
114
|
+
const redirectParts = cmd.redirects.map(r => unparseRedirect(r)).join(' ');
|
|
115
|
+
if (redirectParts) {
|
|
116
|
+
return wordParts ? wordParts + ' ' + redirectParts : redirectParts;
|
|
117
|
+
}
|
|
118
|
+
return wordParts;
|
|
119
|
+
}
|
|
120
|
+
function unparseCommandUnit(unit) {
|
|
121
|
+
switch (unit.type) {
|
|
122
|
+
case 'simple':
|
|
123
|
+
return unparseSimpleCommand(unit);
|
|
124
|
+
case 'subshell':
|
|
125
|
+
return '(' + unparseCommand(unit.body) + ')';
|
|
126
|
+
case 'braceGroup':
|
|
127
|
+
return '{ ' + unparseCommand(unit.body) + ' }';
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
function unparsePipeline(pipeline) {
|
|
131
|
+
let result = '';
|
|
132
|
+
if (pipeline.negated) {
|
|
133
|
+
result += '! ';
|
|
134
|
+
}
|
|
135
|
+
result += pipeline.commands.map(cmd => unparseCommandUnit(cmd)).join(' | ');
|
|
136
|
+
return result;
|
|
137
|
+
}
|
|
138
|
+
function unparseCommand(command) {
|
|
139
|
+
return unparseCommandList(command);
|
|
140
|
+
}
|
|
141
|
+
function unparseCommandList(list) {
|
|
142
|
+
let result = '';
|
|
143
|
+
for (let i = 0; i < list.entries.length; i++) {
|
|
144
|
+
const entry = list.entries[i];
|
|
145
|
+
if (i > 0) {
|
|
146
|
+
result += ' ';
|
|
147
|
+
}
|
|
148
|
+
result += unparsePipeline(entry.pipeline);
|
|
149
|
+
if (entry.separator !== undefined) {
|
|
150
|
+
result += entry.separator;
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
return result;
|
|
154
|
+
}
|
|
155
|
+
export const bashScriptUnparser = async function* (command) {
|
|
156
|
+
yield unparseCommand(command);
|
|
157
|
+
};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import { testProp } from '@fast-check/ava';
|
|
2
|
+
import { arbitraryBashCommandList } from './arbitraryBash.js';
|
|
3
|
+
import { bashScriptUnparser } from './bashUnparser.js';
|
|
4
|
+
import { bashScriptParser } from './bashParser.js';
|
|
5
|
+
import { runParser } from './parser.js';
|
|
6
|
+
import { runUnparser } from './unparser.js';
|
|
7
|
+
import { stringParserInputCompanion } from './parserInputCompanion.js';
|
|
8
|
+
import { stringUnparserOutputCompanion } from './unparserOutputCompanion.js';
|
|
9
|
+
const seed = process.env.SEED ? Number(process.env.SEED) : undefined;
|
|
10
|
+
async function collectString(asyncIterable) {
|
|
11
|
+
let result = '';
|
|
12
|
+
for await (const chunk of asyncIterable) {
|
|
13
|
+
result += chunk;
|
|
14
|
+
}
|
|
15
|
+
return result;
|
|
16
|
+
}
|
|
17
|
+
testProp('bash roundtrip', [arbitraryBashCommandList], async (t, command) => {
|
|
18
|
+
const source = await collectString(runUnparser(bashScriptUnparser, command, stringUnparserOutputCompanion));
|
|
19
|
+
const reparsed = await runParser(bashScriptParser, source, stringParserInputCompanion);
|
|
20
|
+
t.deepEqual(reparsed, command);
|
|
21
|
+
}, {
|
|
22
|
+
verbose: true,
|
|
23
|
+
seed,
|
|
24
|
+
});
|
package/build/bsonParser.js
CHANGED
|
@@ -5,17 +5,17 @@ import { createTupleParser } from './tupleParser.js';
|
|
|
5
5
|
import { createSkipParser } from './skipParser.js';
|
|
6
6
|
import { createParserAccessorParser } from './parserAccessorParser.js';
|
|
7
7
|
import { createTerminatedArrayParser } from './terminatedArrayParser.js';
|
|
8
|
-
import { createElementParser } from './elementParser.js';
|
|
9
8
|
import { createExactElementParser } from './exactElementParser.js';
|
|
10
9
|
import { createUnionParser } from './unionParser.js';
|
|
11
10
|
import { parserCreatorCompose } from './parserCreatorCompose.js';
|
|
11
|
+
import { createPredicateElementParser } from './predicateElementParser.js';
|
|
12
12
|
const createFixedLengthBufferParser = (length) => promiseCompose(createFixedLengthSequenceParser(length), sequence => Buffer.from(sequence));
|
|
13
13
|
const buffer1Parser = createFixedLengthBufferParser(1);
|
|
14
14
|
const buffer4Parser = createFixedLengthBufferParser(4);
|
|
15
15
|
const buffer8Parser = createFixedLengthBufferParser(8);
|
|
16
|
-
const elementParser = createElementParser();
|
|
17
16
|
const nullByteParser = createExactElementParser(0);
|
|
18
|
-
const
|
|
17
|
+
const nonNullByteParser = createPredicateElementParser((byte) => byte !== 0);
|
|
18
|
+
const cstringParser = promiseCompose(createTerminatedArrayParser(nonNullByteParser, nullByteParser), ([sequence]) => Buffer.from(sequence).toString('utf8'));
|
|
19
19
|
const doubleParser = promiseCompose(buffer8Parser, buffer => buffer.readDoubleLE(0));
|
|
20
20
|
setParserName(doubleParser, 'doubleParser');
|
|
21
21
|
const int32Parser = promiseCompose(buffer4Parser, buffer => buffer.readInt32LE(0));
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import invariant from 'invariant';
|
|
2
1
|
import { setParserName } from '../parser.js';
|
|
3
2
|
import { promiseCompose } from '../promiseCompose.js';
|
|
4
3
|
import { createSeparatedArrayParser } from '../separatedArrayParser.js';
|
|
@@ -6,36 +5,26 @@ import { createExactSequenceParser } from '../exactSequenceParser.js';
|
|
|
6
5
|
import { createUnionParser } from '../unionParser.js';
|
|
7
6
|
import { createTupleParser } from '../tupleParser.js';
|
|
8
7
|
import { createArrayParser } from '../arrayParser.js';
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|| (character >= '\uD800' && character <= '\uDBFF')) {
|
|
30
|
-
parserContext.skip(1);
|
|
31
|
-
characters.push(character);
|
|
32
|
-
continue;
|
|
33
|
-
}
|
|
34
|
-
parserContext.invariant(characters.length > 0, 'Expected at least one character');
|
|
35
|
-
break;
|
|
36
|
-
}
|
|
37
|
-
return characters.join('');
|
|
38
|
-
};
|
|
8
|
+
import { createNonEmptyArrayParser } from '../nonEmptyArrayParser.js';
|
|
9
|
+
import { createPredicateElementParser } from '../predicateElementParser.js';
|
|
10
|
+
function isSmaliSimpleNameChar(character) {
|
|
11
|
+
return ((character >= 'a' && character <= 'z')
|
|
12
|
+
|| (character >= 'A' && character <= 'Z')
|
|
13
|
+
|| (character >= '0' && character <= '9')
|
|
14
|
+
|| character === ' '
|
|
15
|
+
|| character === '$'
|
|
16
|
+
|| character === '-'
|
|
17
|
+
|| character === '_'
|
|
18
|
+
|| character === '\u00A0'
|
|
19
|
+
|| (character >= '\u00A1' && character <= '\u1FFF')
|
|
20
|
+
|| (character >= '\u2000' && character <= '\u200A')
|
|
21
|
+
|| (character >= '\u2010' && character <= '\u2027')
|
|
22
|
+
|| character === '\u202F'
|
|
23
|
+
|| (character >= '\u2030' && character <= '\uD7FF')
|
|
24
|
+
|| (character >= '\uE000' && character <= '\uFFEF')
|
|
25
|
+
|| (character >= '\uD800' && character <= '\uDBFF'));
|
|
26
|
+
}
|
|
27
|
+
export const smaliSimpleNameParser = promiseCompose(createNonEmptyArrayParser(createPredicateElementParser(isSmaliSimpleNameChar)), characters => characters.join(''));
|
|
39
28
|
setParserName(smaliSimpleNameParser, 'smaliSimpleNameParser');
|
|
40
29
|
export const smaliMemberNameParser = createUnionParser([
|
|
41
30
|
smaliSimpleNameParser,
|
|
@@ -12,6 +12,7 @@ import { createTupleParser } from './tupleParser.js';
|
|
|
12
12
|
import { createParserAccessorParser } from './parserAccessorParser.js';
|
|
13
13
|
import { createSkipToParser } from './skipToParser.js';
|
|
14
14
|
import { createLookaheadParser } from './lookaheadParser.js';
|
|
15
|
+
import { createPredicateElementParser } from './predicateElementParser.js';
|
|
15
16
|
import { getIsoTypedNumberArray, isoIndexIntoFieldIds, isoIndexIntoMethodIds, isoIndexIntoPrototypeIds, isoIndexIntoStringIds, isoIndexIntoTypeIds, isoOffsetFromEncodedCatchHandlerListToEncodedCatchHandler, isoOffsetToAnnotationItem, isoOffsetToAnnotationsDirectoryItem, isoOffsetToAnnotationSetItem, isoOffsetToAnnotationSetRefListItem, isoOffsetToClassDataItem, isoOffsetToCodeItem, isoOffsetToDebugInfoItem, isoOffsetToEncodedArrayItem, isoOffsetToStringDataItem, isoOffsetToTypeList, } from './dalvikExecutableParser/typedNumbers.js';
|
|
16
17
|
import { sleb128NumberParser, uleb128NumberParser } from './leb128Parser.js';
|
|
17
18
|
import { createDisjunctionParser } from './disjunctionParser.js';
|
|
@@ -354,11 +355,7 @@ const createSkipToThenClassDataItemsParser = (sizeOffset) => createSkipToThenIte
|
|
|
354
355
|
parserName: 'skipToThenClassDataItemsParser',
|
|
355
356
|
});
|
|
356
357
|
const createByteWith5LeastSignificantBitsEqualParser = (leastSignificant5) => {
|
|
357
|
-
const byteWith5LeastSignificantBitsEqualParser =
|
|
358
|
-
const byte = await parserContext.read(0);
|
|
359
|
-
parserContext.invariant((byte & 0b0001_1111) === leastSignificant5, 'Expected byte with 5 least significant bits equal to %s, but got %s', leastSignificant5.toString(2).padStart(8, '0'), byte.toString(2).padStart(8, '0'));
|
|
360
|
-
return byte;
|
|
361
|
-
};
|
|
358
|
+
const byteWith5LeastSignificantBitsEqualParser = createPredicateElementParser((byte) => (byte & 0b0001_1111) === leastSignificant5);
|
|
362
359
|
setParserName(byteWith5LeastSignificantBitsEqualParser, `createByteWith5LeastSignificantBitsEqualParser(${leastSignificant5.toString(2).padStart(5, '0')})`);
|
|
363
360
|
return byteWith5LeastSignificantBitsEqualParser;
|
|
364
361
|
};
|
package/build/hasExecutable.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { execa } from 'execa';
|
|
2
2
|
export async function hasExecutable(executable) {
|
|
3
|
-
const hasExecutable = execa(executable).
|
|
3
|
+
const hasExecutable = await execa(executable).then(() => true).catch(() => false);
|
|
4
4
|
if (!hasExecutable) {
|
|
5
5
|
console.warn('Executable %o not found', executable);
|
|
6
6
|
}
|
package/build/index.d.ts
CHANGED
|
@@ -16,6 +16,7 @@ export { createUnionParser, } from './unionParser.js';
|
|
|
16
16
|
export { createDisjunctionParser, } from './disjunctionParser.js';
|
|
17
17
|
export { createParserAccessorParser, } from './parserAccessorParser.js';
|
|
18
18
|
export { createElementParser, } from './elementParser.js';
|
|
19
|
+
export { createPredicateElementParser, } from './predicateElementParser.js';
|
|
19
20
|
export { createTerminatedArrayParser, } from './terminatedArrayParser.js';
|
|
20
21
|
export { createSliceBoundedParser, } from './sliceBoundedParser.js';
|
|
21
22
|
export { createExactElementParser, } from './exactElementParser.js';
|
package/build/index.js
CHANGED
|
@@ -14,6 +14,7 @@ export { createUnionParser, } from './unionParser.js';
|
|
|
14
14
|
export { createDisjunctionParser, } from './disjunctionParser.js';
|
|
15
15
|
export { createParserAccessorParser, } from './parserAccessorParser.js';
|
|
16
16
|
export { createElementParser, } from './elementParser.js';
|
|
17
|
+
export { createPredicateElementParser, } from './predicateElementParser.js';
|
|
17
18
|
export { createTerminatedArrayParser, } from './terminatedArrayParser.js';
|
|
18
19
|
export { createSliceBoundedParser, } from './sliceBoundedParser.js';
|
|
19
20
|
export { createExactElementParser, } from './exactElementParser.js';
|
package/build/jsonParser.js
CHANGED
|
@@ -8,8 +8,7 @@ import { createDisjunctionParser } from './disjunctionParser.js';
|
|
|
8
8
|
import { createTerminatedArrayParser } from './terminatedArrayParser.js';
|
|
9
9
|
import { createArrayParser } from './arrayParser.js';
|
|
10
10
|
import { createParserAccessorParser } from './parserAccessorParser.js';
|
|
11
|
-
import {
|
|
12
|
-
import { parserCreatorCompose } from './parserCreatorCompose.js';
|
|
11
|
+
import { createPredicateElementParser } from './predicateElementParser.js';
|
|
13
12
|
import { createSeparatedArrayParser } from './separatedArrayParser.js';
|
|
14
13
|
import { createRegExpParser } from './regexpParser.js';
|
|
15
14
|
const whitespaceParser = createArrayParser(createUnionParser([
|
|
@@ -41,13 +40,9 @@ const jsonStringEscapeSequenceParser = createUnionParser([
|
|
|
41
40
|
jsonTabEscapeSequenceParser,
|
|
42
41
|
jsonUnicodeEscapeSequenceParser,
|
|
43
42
|
]);
|
|
44
|
-
const elementParser = createElementParser();
|
|
45
43
|
const jsonStringCharacterParser = createDisjunctionParser([
|
|
46
44
|
jsonStringEscapeSequenceParser,
|
|
47
|
-
|
|
48
|
-
parserContext.invariant(character !== '"', 'Unexpected """');
|
|
49
|
-
return character;
|
|
50
|
-
})(),
|
|
45
|
+
createPredicateElementParser((character) => character !== '"'),
|
|
51
46
|
]);
|
|
52
47
|
export const jsonStringParser = promiseCompose(createTupleParser([
|
|
53
48
|
createExactSequenceParser('"'),
|
|
@@ -0,0 +1,3 @@
|
|
|
1
|
+
import { type Parser } from './parser.js';
|
|
2
|
+
import { type DeriveSequenceElement } from './sequence.js';
|
|
3
|
+
export declare const createPredicateElementParser: <Sequence, Element = DeriveSequenceElement<Sequence>>(predicate: (element: Element) => boolean) => Parser<Element, Sequence, Element>;
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import { setParserName } from './parser.js';
|
|
2
|
+
export const createPredicateElementParser = (predicate) => {
|
|
3
|
+
const predicateElementParser = async (parserContext) => {
|
|
4
|
+
const element = await parserContext.read(0);
|
|
5
|
+
parserContext.invariant(predicate(element), 'Element does not match predicate: %s', element);
|
|
6
|
+
return element;
|
|
7
|
+
};
|
|
8
|
+
setParserName(predicateElementParser, `createPredicateElementParser(${predicate.name || 'anonymous'})`);
|
|
9
|
+
return predicateElementParser;
|
|
10
|
+
};
|
|
@@ -17,6 +17,14 @@ export type RepeatBounds = number | {
|
|
|
17
17
|
min?: number;
|
|
18
18
|
max: number;
|
|
19
19
|
};
|
|
20
|
+
export declare enum AssertionSign {
|
|
21
|
+
POSITIVE = 0,
|
|
22
|
+
NEGATIVE = 1
|
|
23
|
+
}
|
|
24
|
+
export declare enum AssertionDir {
|
|
25
|
+
AHEAD = 0,
|
|
26
|
+
BEHIND = 1
|
|
27
|
+
}
|
|
20
28
|
export type RegularExpression = {
|
|
21
29
|
type: 'epsilon';
|
|
22
30
|
} | {
|
|
@@ -48,10 +56,11 @@ export type RegularExpression = {
|
|
|
48
56
|
inner: RegularExpression;
|
|
49
57
|
name?: string;
|
|
50
58
|
} | {
|
|
51
|
-
type: '
|
|
52
|
-
|
|
59
|
+
type: 'assertion';
|
|
60
|
+
direction: AssertionDir;
|
|
61
|
+
sign: AssertionSign;
|
|
53
62
|
inner: RegularExpression;
|
|
54
|
-
|
|
63
|
+
outer: RegularExpression;
|
|
55
64
|
} | {
|
|
56
65
|
type: 'start-anchor';
|
|
57
66
|
left: RegularExpression;
|
|
@@ -1 +1,10 @@
|
|
|
1
|
-
export
|
|
1
|
+
export var AssertionSign;
|
|
2
|
+
(function (AssertionSign) {
|
|
3
|
+
AssertionSign[AssertionSign["POSITIVE"] = 0] = "POSITIVE";
|
|
4
|
+
AssertionSign[AssertionSign["NEGATIVE"] = 1] = "NEGATIVE";
|
|
5
|
+
})(AssertionSign || (AssertionSign = {}));
|
|
6
|
+
export var AssertionDir;
|
|
7
|
+
(function (AssertionDir) {
|
|
8
|
+
AssertionDir[AssertionDir["AHEAD"] = 0] = "AHEAD";
|
|
9
|
+
AssertionDir[AssertionDir["BEHIND"] = 1] = "BEHIND";
|
|
10
|
+
})(AssertionDir || (AssertionDir = {}));
|
|
@@ -12,6 +12,7 @@ import { createTerminatedArrayParser } from './terminatedArrayParser.js';
|
|
|
12
12
|
import { createDisjunctionParser } from './disjunctionParser.js';
|
|
13
13
|
import { createNegativeLookaheadParser } from './negativeLookaheadParser.js';
|
|
14
14
|
import { createObjectParser } from './objectParser.js';
|
|
15
|
+
import { AssertionDir, AssertionSign, } from './regularExpression.js';
|
|
15
16
|
// CharacterSet helpers
|
|
16
17
|
const emptyCharacterSet = { type: 'empty' };
|
|
17
18
|
function codePointRangeIsEmpty(range) {
|
|
@@ -168,20 +169,21 @@ function characterSetFromArray(chars) {
|
|
|
168
169
|
return chars.map(characterSetSingleton).reduce(characterSetUnion, emptyCharacterSet);
|
|
169
170
|
}
|
|
170
171
|
function characterSetComplement(set) {
|
|
171
|
-
return characterSetDifference(
|
|
172
|
+
return characterSetDifference(bmpRange, set);
|
|
172
173
|
}
|
|
173
174
|
// Pre-defined character sets
|
|
174
|
-
const
|
|
175
|
+
const bmpRange = characterSetFromRange({ start: 0, end: 0xFFFF });
|
|
176
|
+
const alphabet = characterSetDifference(bmpRange, characterSetFromArray(['\r', '\n', '\u2028', '\u2029']));
|
|
175
177
|
const wildcardCharacterSet = characterSetDifference(alphabet, characterSetFromArray(['\r', '\n', '\u2028', '\u2029']));
|
|
176
178
|
const digitChars = characterSetCharRange('0', '9');
|
|
177
|
-
const nonDigitChars =
|
|
179
|
+
const nonDigitChars = characterSetDifference(bmpRange, digitChars);
|
|
178
180
|
const wordChars = [
|
|
179
181
|
characterSetCharRange('a', 'z'),
|
|
180
182
|
characterSetCharRange('A', 'Z'),
|
|
181
183
|
characterSetCharRange('0', '9'),
|
|
182
184
|
characterSetSingleton('_'),
|
|
183
185
|
].reduce(characterSetUnion);
|
|
184
|
-
const nonWordChars =
|
|
186
|
+
const nonWordChars = characterSetDifference(bmpRange, wordChars);
|
|
185
187
|
const whiteSpaceChars = [
|
|
186
188
|
characterSetSingleton('\f'),
|
|
187
189
|
characterSetSingleton('\n'),
|
|
@@ -199,7 +201,7 @@ const whiteSpaceChars = [
|
|
|
199
201
|
characterSetSingleton('\u3000'),
|
|
200
202
|
characterSetSingleton('\ufeff'),
|
|
201
203
|
].reduce(characterSetUnion);
|
|
202
|
-
const nonWhiteSpaceChars =
|
|
204
|
+
const nonWhiteSpaceChars = characterSetDifference(bmpRange, whiteSpaceChars);
|
|
203
205
|
// AST constructors
|
|
204
206
|
const epsilon = { type: 'epsilon' };
|
|
205
207
|
function literal(charset) {
|
|
@@ -229,8 +231,8 @@ function captureGroup(inner, name) {
|
|
|
229
231
|
}
|
|
230
232
|
return { type: 'capture-group', inner, name };
|
|
231
233
|
}
|
|
232
|
-
function
|
|
233
|
-
return { type: '
|
|
234
|
+
function assertion(direction, sign, inner, outer) {
|
|
235
|
+
return { type: 'assertion', direction, sign, inner, outer };
|
|
234
236
|
}
|
|
235
237
|
function startAnchor(left, right) {
|
|
236
238
|
return { type: 'start-anchor', left, right };
|
|
@@ -476,16 +478,18 @@ const nonCaptureGroupParser = promiseCompose(createTupleParser([
|
|
|
476
478
|
]), ([, inner]) => inner);
|
|
477
479
|
// Positive lookahead (?=...)
|
|
478
480
|
const positiveLookaheadMarkerParser = createObjectParser({
|
|
479
|
-
type: '
|
|
480
|
-
|
|
481
|
+
type: 'assertion-marker',
|
|
482
|
+
direction: AssertionDir.AHEAD,
|
|
483
|
+
sign: AssertionSign.POSITIVE,
|
|
481
484
|
_open: createExactSequenceParser('(?='),
|
|
482
485
|
inner: createParserAccessorParser(() => alternationParser),
|
|
483
486
|
_close: createExactSequenceParser(')'),
|
|
484
487
|
});
|
|
485
488
|
// Negative lookahead (?!...)
|
|
486
489
|
const negativeLookaheadMarkerParser = createObjectParser({
|
|
487
|
-
type: '
|
|
488
|
-
|
|
490
|
+
type: 'assertion-marker',
|
|
491
|
+
direction: AssertionDir.AHEAD,
|
|
492
|
+
sign: AssertionSign.NEGATIVE,
|
|
489
493
|
_open: createExactSequenceParser('(?!'),
|
|
490
494
|
inner: createParserAccessorParser(() => alternationParser),
|
|
491
495
|
_close: createExactSequenceParser(')'),
|
|
@@ -545,9 +549,9 @@ function concatList(parts) {
|
|
|
545
549
|
}
|
|
546
550
|
return parts.reduceRight((acc, part) => concat(part, acc));
|
|
547
551
|
}
|
|
548
|
-
// Process elements with anchor markers and
|
|
549
|
-
// Handles anchors and
|
|
550
|
-
// Precedence order (lowest to highest): union -> start-anchor -> end-anchor ->
|
|
552
|
+
// Process elements with anchor markers and assertion markers into proper AST
|
|
553
|
+
// Handles anchors and assertions as infix operators like @gruhn/regex-utils
|
|
554
|
+
// Precedence order (lowest to highest): union -> start-anchor -> end-anchor -> assertion -> concat
|
|
551
555
|
function processElements(elements) {
|
|
552
556
|
if (elements.length === 0) {
|
|
553
557
|
return epsilon;
|
|
@@ -566,18 +570,28 @@ function processElements(elements) {
|
|
|
566
570
|
const right = elements.slice(endAnchorIdx + 1);
|
|
567
571
|
return endAnchor(processElements(left), processElements(right));
|
|
568
572
|
}
|
|
569
|
-
// Then
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
const
|
|
575
|
-
const
|
|
576
|
-
|
|
577
|
-
|
|
573
|
+
// Then assertions (higher precedence than anchors)
|
|
574
|
+
// Special handling: Negative lookahead at the start with more content after it
|
|
575
|
+
// forms a concat with epsilon outer, instead of consuming everything into outer
|
|
576
|
+
const assertionIdx = elements.findIndex(e => 'type' in e && e.type === 'assertion-marker');
|
|
577
|
+
if (assertionIdx !== -1) {
|
|
578
|
+
const marker = elements[assertionIdx];
|
|
579
|
+
const left = elements.slice(0, assertionIdx);
|
|
580
|
+
const right = elements.slice(assertionIdx + 1);
|
|
581
|
+
// Special case: Negative lookahead at the start followed by more content
|
|
582
|
+
// Creates concat instead of nesting
|
|
583
|
+
if (left.length === 0 && marker.sign === AssertionSign.NEGATIVE && right.length > 0) {
|
|
584
|
+
const assertionExpr = assertion(marker.direction, marker.sign, marker.inner, epsilon);
|
|
585
|
+
return concat(assertionExpr, processElements(right));
|
|
578
586
|
}
|
|
579
|
-
//
|
|
580
|
-
|
|
587
|
+
// Assertion after content: always concat with epsilon outer
|
|
588
|
+
if (left.length > 0) {
|
|
589
|
+
const assertionExpr = assertion(marker.direction, marker.sign, marker.inner, epsilon);
|
|
590
|
+
return concat(processElements(left), concat(assertionExpr, processElements(right)));
|
|
591
|
+
}
|
|
592
|
+
// Assertion at start (not negative lookahead with content after): consume everything
|
|
593
|
+
const assertionExpr = assertion(marker.direction, marker.sign, marker.inner, processElements(right));
|
|
594
|
+
return assertionExpr;
|
|
581
595
|
}
|
|
582
596
|
// No markers, just regular expressions - concatenate them
|
|
583
597
|
const regexParts = elements;
|
|
@@ -42,8 +42,8 @@ function normalizeRegularExpression(ast) {
|
|
|
42
42
|
return { type: 'capture-group', inner: normalizeRegularExpression(ast.inner), name: ast.name };
|
|
43
43
|
}
|
|
44
44
|
return { type: 'capture-group', inner: normalizeRegularExpression(ast.inner) };
|
|
45
|
-
case '
|
|
46
|
-
return { type: '
|
|
45
|
+
case 'assertion':
|
|
46
|
+
return { type: 'assertion', direction: ast.direction, sign: ast.sign, inner: normalizeRegularExpression(ast.inner), outer: normalizeRegularExpression(ast.outer) };
|
|
47
47
|
case 'start-anchor':
|
|
48
48
|
return { type: 'start-anchor', left: normalizeRegularExpression(ast.left), right: normalizeRegularExpression(ast.right) };
|
|
49
49
|
case 'end-anchor':
|
package/build/smaliParser.js
CHANGED
|
@@ -14,6 +14,7 @@ import { createNonEmptyArrayParser } from './nonEmptyArrayParser.js';
|
|
|
14
14
|
import { createOptionalParser } from './optionalParser.js';
|
|
15
15
|
import { createNegativeLookaheadParser } from './negativeLookaheadParser.js';
|
|
16
16
|
import { createSeparatedArrayParser } from './separatedArrayParser.js';
|
|
17
|
+
import { createPredicateElementParser } from './predicateElementParser.js';
|
|
17
18
|
import { smaliMemberNameParser, smaliTypeDescriptorParser } from './dalvikExecutableParser/stringSyntaxParser.js';
|
|
18
19
|
import { createDisjunctionParser } from './disjunctionParser.js';
|
|
19
20
|
import { createSeparatedNonEmptyArrayParser } from './separatedNonEmptyArrayParser.js';
|
|
@@ -236,19 +237,14 @@ const smaliCharacterLiteralParser = promiseCompose(createTupleParser([
|
|
|
236
237
|
]), ([, character]) => character.charCodeAt(0));
|
|
237
238
|
setParserName(smaliCharacterLiteralParser, 'smaliCharacterLiteralParser');
|
|
238
239
|
// Parser that matches identifier continuation characters (letters, digits, $, -, _)
|
|
239
|
-
const smaliIdentifierContinuationParser =
|
|
240
|
-
|
|
241
|
-
parserContext.invariant(character !== undefined, 'Unexpected end of input');
|
|
242
|
-
invariant(character !== undefined, 'Unexpected end of input');
|
|
243
|
-
parserContext.invariant((character >= 'a' && character <= 'z')
|
|
240
|
+
const smaliIdentifierContinuationParser = createPredicateElementParser(function isSmaliIdentifierContinuation(character) {
|
|
241
|
+
return (character >= 'a' && character <= 'z')
|
|
244
242
|
|| (character >= 'A' && character <= 'Z')
|
|
245
243
|
|| (character >= '0' && character <= '9')
|
|
246
244
|
|| character === '$'
|
|
247
245
|
|| character === '-'
|
|
248
|
-
|| character === '_'
|
|
249
|
-
|
|
250
|
-
return character;
|
|
251
|
-
};
|
|
246
|
+
|| character === '_';
|
|
247
|
+
});
|
|
252
248
|
setParserName(smaliIdentifierContinuationParser, 'smaliIdentifierContinuationParser');
|
|
253
249
|
// Helper to create an access flag parser with word boundary check
|
|
254
250
|
const createAccessFlagParser = (keyword) => promiseCompose(createTupleParser([
|
|
@@ -34,11 +34,16 @@ const symbolicExpressionStringParser = promiseCompose(createRegExpParser(/"(?:[^
|
|
|
34
34
|
});
|
|
35
35
|
setParserName(symbolicExpressionStringParser, 'symbolicExpressionStringParser');
|
|
36
36
|
// Atom parser: unquoted symbols (any chars except whitespace, parens, quotes, etc.)
|
|
37
|
-
// Supports backslash escapes: \x becomes x
|
|
38
|
-
|
|
37
|
+
// Supports backslash escapes: \x becomes x
|
|
38
|
+
// Note: A lone backslash or one that produces an empty atom should fail parsing
|
|
39
|
+
const symbolicExpressionAtomParser = promiseCompose(createRegExpParser(/(?:[^\s()"'`,;\\]|\\.)+\\?/), match => {
|
|
39
40
|
const raw = match[0];
|
|
40
|
-
// Process backslash escapes: \x becomes x
|
|
41
|
+
// Process backslash escapes: \x becomes x
|
|
41
42
|
const value = raw.replace(/\\(.?)/g, '$1');
|
|
43
|
+
// Reject atoms that result in empty strings
|
|
44
|
+
if (value.length === 0) {
|
|
45
|
+
throw new Error('Atom cannot be empty');
|
|
46
|
+
}
|
|
42
47
|
return {
|
|
43
48
|
type: 'atom',
|
|
44
49
|
value,
|