@futpib/parser 1.0.7 → 1.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/arbitraryBash.d.ts +3 -0
- package/build/arbitraryBash.js +142 -0
- package/build/arbitraryJavaScript.js +4 -4
- package/build/arbitraryZipStream.d.ts +1 -1
- package/build/bashUnparser.d.ts +3 -0
- package/build/bashUnparser.js +157 -0
- package/build/bashUnparser.test.d.ts +1 -0
- package/build/bashUnparser.test.js +24 -0
- package/build/bsonParser.js +3 -3
- package/build/dalvikExecutableParser/stringSyntaxParser.js +20 -31
- package/build/dalvikExecutableParser.js +2 -5
- package/build/hasExecutable.js +1 -1
- package/build/jsonParser.js +2 -7
- package/build/regularExpression.d.ts +12 -3
- package/build/regularExpression.js +10 -1
- package/build/regularExpressionParser.js +39 -25
- package/build/regularExpressionParser.test.js +2 -2
- package/build/smaliParser.js +5 -9
- package/build/symbolicExpressionParser.js +8 -3
- package/package.json +9 -9
- package/readme.md +468 -7
- package/src/arbitraryBash.ts +237 -0
- package/src/arbitraryJavaScript.ts +4 -4
- package/src/bashUnparser.test.ts +37 -0
- package/src/bashUnparser.ts +211 -0
- package/src/bsonParser.ts +4 -7
- package/src/dalvikExecutableParser/stringSyntaxParser.ts +27 -74
- package/src/dalvikExecutableParser.ts +4 -10
- package/src/hasExecutable.ts +1 -1
- package/src/jsonParser.ts +2 -11
- package/src/regularExpression.ts +11 -1
- package/src/regularExpressionParser.test.ts +3 -3
- package/src/regularExpressionParser.ts +49 -30
- package/src/smaliParser.ts +11 -23
- package/src/symbolicExpressionParser.ts +9 -3
package/src/hasExecutable.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { execa } from 'execa';
|
|
2
2
|
|
|
3
3
|
export async function hasExecutable(executable: string) {
|
|
4
|
-
const hasExecutable = execa(executable).
|
|
4
|
+
const hasExecutable = await execa(executable).then(() => true).catch(() => false);
|
|
5
5
|
|
|
6
6
|
if (!hasExecutable) {
|
|
7
7
|
console.warn('Executable %o not found', executable);
|
package/src/jsonParser.ts
CHANGED
|
@@ -11,8 +11,7 @@ import { createDisjunctionParser } from './disjunctionParser.js';
|
|
|
11
11
|
import { createTerminatedArrayParser } from './terminatedArrayParser.js';
|
|
12
12
|
import { createArrayParser } from './arrayParser.js';
|
|
13
13
|
import { createParserAccessorParser } from './parserAccessorParser.js';
|
|
14
|
-
import {
|
|
15
|
-
import { parserCreatorCompose } from './parserCreatorCompose.js';
|
|
14
|
+
import { createPredicateElementParser } from './predicateElementParser.js';
|
|
16
15
|
import { createSeparatedArrayParser } from './separatedArrayParser.js';
|
|
17
16
|
import { createRegExpParser } from './regexpParser.js';
|
|
18
17
|
|
|
@@ -52,17 +51,9 @@ const jsonStringEscapeSequenceParser: Parser<string, string> = createUnionParser
|
|
|
52
51
|
jsonUnicodeEscapeSequenceParser,
|
|
53
52
|
]);
|
|
54
53
|
|
|
55
|
-
const elementParser: Parser<string, string> = createElementParser();
|
|
56
|
-
|
|
57
54
|
const jsonStringCharacterParser: Parser<string, string> = createDisjunctionParser([
|
|
58
55
|
jsonStringEscapeSequenceParser,
|
|
59
|
-
|
|
60
|
-
() => elementParser,
|
|
61
|
-
character => async parserContext => {
|
|
62
|
-
parserContext.invariant(character !== '"', 'Unexpected """');
|
|
63
|
-
return character;
|
|
64
|
-
},
|
|
65
|
-
)(),
|
|
56
|
+
createPredicateElementParser((character: string) => character !== '"'),
|
|
66
57
|
]);
|
|
67
58
|
|
|
68
59
|
export const jsonStringParser: Parser<string, string> = promiseCompose(
|
package/src/regularExpression.ts
CHANGED
|
@@ -9,6 +9,16 @@ export type CharacterSet =
|
|
|
9
9
|
|
|
10
10
|
export type RepeatBounds = number | { min: number; max?: number } | { min?: number; max: number };
|
|
11
11
|
|
|
12
|
+
export enum AssertionSign {
|
|
13
|
+
POSITIVE = 0,
|
|
14
|
+
NEGATIVE = 1,
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export enum AssertionDir {
|
|
18
|
+
AHEAD = 0,
|
|
19
|
+
BEHIND = 1,
|
|
20
|
+
}
|
|
21
|
+
|
|
12
22
|
export type RegularExpression =
|
|
13
23
|
| { type: 'epsilon' }
|
|
14
24
|
| { type: 'literal'; charset: CharacterSet }
|
|
@@ -19,6 +29,6 @@ export type RegularExpression =
|
|
|
19
29
|
| { type: 'optional'; inner: RegularExpression }
|
|
20
30
|
| { type: 'repeat'; inner: RegularExpression; bounds: RepeatBounds }
|
|
21
31
|
| { type: 'capture-group'; inner: RegularExpression; name?: string }
|
|
22
|
-
| { type: '
|
|
32
|
+
| { type: 'assertion'; direction: AssertionDir; sign: AssertionSign; inner: RegularExpression; outer: RegularExpression }
|
|
23
33
|
| { type: 'start-anchor'; left: RegularExpression; right: RegularExpression }
|
|
24
34
|
| { type: 'end-anchor'; left: RegularExpression; right: RegularExpression };
|
|
@@ -9,7 +9,7 @@ import { parseRegExpString } from '../node_modules/@gruhn/regex-utils/dist/regex
|
|
|
9
9
|
import { runParser } from './parser.js';
|
|
10
10
|
import { stringParserInputCompanion } from './parserInputCompanion.js';
|
|
11
11
|
import { arbitrarilySlicedAsyncIterator } from './arbitrarilySlicedAsyncInterator.js';
|
|
12
|
-
import type
|
|
12
|
+
import { AssertionDir, AssertionSign, type RegularExpression, type CharacterSet } from './regularExpression.js';
|
|
13
13
|
|
|
14
14
|
// Normalize AST for comparison - removes hashes from CharSets and normalizes structure
|
|
15
15
|
function normalizeCharacterSet(charset: CharacterSet): CharacterSet {
|
|
@@ -47,8 +47,8 @@ function normalizeRegularExpression(ast: RegularExpression): RegularExpression {
|
|
|
47
47
|
return { type: 'capture-group', inner: normalizeRegularExpression(ast.inner), name: ast.name };
|
|
48
48
|
}
|
|
49
49
|
return { type: 'capture-group', inner: normalizeRegularExpression(ast.inner) };
|
|
50
|
-
case '
|
|
51
|
-
return { type: '
|
|
50
|
+
case 'assertion':
|
|
51
|
+
return { type: 'assertion', direction: ast.direction, sign: ast.sign, inner: normalizeRegularExpression(ast.inner), outer: normalizeRegularExpression(ast.outer) };
|
|
52
52
|
case 'start-anchor':
|
|
53
53
|
return { type: 'start-anchor', left: normalizeRegularExpression(ast.left), right: normalizeRegularExpression(ast.right) };
|
|
54
54
|
case 'end-anchor':
|
|
@@ -14,6 +14,8 @@ import { createDisjunctionParser } from './disjunctionParser.js';
|
|
|
14
14
|
import { createNegativeLookaheadParser } from './negativeLookaheadParser.js';
|
|
15
15
|
import { createObjectParser } from './objectParser.js';
|
|
16
16
|
import {
|
|
17
|
+
AssertionDir,
|
|
18
|
+
AssertionSign,
|
|
17
19
|
type CharacterSet,
|
|
18
20
|
type CodePointRange,
|
|
19
21
|
type RegularExpression,
|
|
@@ -209,12 +211,14 @@ function characterSetFromArray(chars: string[]): CharacterSet {
|
|
|
209
211
|
}
|
|
210
212
|
|
|
211
213
|
function characterSetComplement(set: CharacterSet): CharacterSet {
|
|
212
|
-
return characterSetDifference(
|
|
214
|
+
return characterSetDifference(bmpRange, set);
|
|
213
215
|
}
|
|
214
216
|
|
|
215
217
|
// Pre-defined character sets
|
|
218
|
+
const bmpRange: CharacterSet = characterSetFromRange({ start: 0, end: 0xFFFF });
|
|
219
|
+
|
|
216
220
|
const alphabet: CharacterSet = characterSetDifference(
|
|
217
|
-
|
|
221
|
+
bmpRange,
|
|
218
222
|
characterSetFromArray(['\r', '\n', '\u2028', '\u2029']),
|
|
219
223
|
);
|
|
220
224
|
|
|
@@ -224,7 +228,7 @@ const wildcardCharacterSet: CharacterSet = characterSetDifference(
|
|
|
224
228
|
);
|
|
225
229
|
|
|
226
230
|
const digitChars: CharacterSet = characterSetCharRange('0', '9');
|
|
227
|
-
const nonDigitChars: CharacterSet =
|
|
231
|
+
const nonDigitChars: CharacterSet = characterSetDifference(bmpRange, digitChars);
|
|
228
232
|
|
|
229
233
|
const wordChars: CharacterSet = [
|
|
230
234
|
characterSetCharRange('a', 'z'),
|
|
@@ -232,7 +236,7 @@ const wordChars: CharacterSet = [
|
|
|
232
236
|
characterSetCharRange('0', '9'),
|
|
233
237
|
characterSetSingleton('_'),
|
|
234
238
|
].reduce(characterSetUnion);
|
|
235
|
-
const nonWordChars: CharacterSet =
|
|
239
|
+
const nonWordChars: CharacterSet = characterSetDifference(bmpRange, wordChars);
|
|
236
240
|
|
|
237
241
|
const whiteSpaceChars: CharacterSet = [
|
|
238
242
|
characterSetSingleton('\f'),
|
|
@@ -251,7 +255,7 @@ const whiteSpaceChars: CharacterSet = [
|
|
|
251
255
|
characterSetSingleton('\u3000'),
|
|
252
256
|
characterSetSingleton('\ufeff'),
|
|
253
257
|
].reduce(characterSetUnion);
|
|
254
|
-
const nonWhiteSpaceChars: CharacterSet =
|
|
258
|
+
const nonWhiteSpaceChars: CharacterSet = characterSetDifference(bmpRange, whiteSpaceChars);
|
|
255
259
|
|
|
256
260
|
// AST constructors
|
|
257
261
|
|
|
@@ -292,8 +296,8 @@ function captureGroup(inner: RegularExpression, name?: string): RegularExpressio
|
|
|
292
296
|
return { type: 'capture-group', inner, name };
|
|
293
297
|
}
|
|
294
298
|
|
|
295
|
-
function
|
|
296
|
-
return { type: '
|
|
299
|
+
function assertion(direction: AssertionDir, sign: AssertionSign, inner: RegularExpression, outer: RegularExpression): RegularExpression {
|
|
300
|
+
return { type: 'assertion', direction, sign, inner, outer };
|
|
297
301
|
}
|
|
298
302
|
|
|
299
303
|
function startAnchor(left: RegularExpression, right: RegularExpression): RegularExpression {
|
|
@@ -756,22 +760,24 @@ const nonCaptureGroupParser: Parser<RegularExpression, string> = promiseCompose(
|
|
|
756
760
|
([, inner]) => inner,
|
|
757
761
|
);
|
|
758
762
|
|
|
759
|
-
//
|
|
760
|
-
type
|
|
763
|
+
// Assertion markers for internal use during parsing
|
|
764
|
+
type AssertionMarker = { type: 'assertion-marker'; direction: AssertionDir; sign: AssertionSign; inner: RegularExpression };
|
|
761
765
|
|
|
762
766
|
// Positive lookahead (?=...)
|
|
763
|
-
const positiveLookaheadMarkerParser: Parser<
|
|
764
|
-
type: '
|
|
765
|
-
|
|
767
|
+
const positiveLookaheadMarkerParser: Parser<AssertionMarker, string> = createObjectParser({
|
|
768
|
+
type: 'assertion-marker' as const,
|
|
769
|
+
direction: AssertionDir.AHEAD as const,
|
|
770
|
+
sign: AssertionSign.POSITIVE as const,
|
|
766
771
|
_open: createExactSequenceParser('(?='),
|
|
767
772
|
inner: createParserAccessorParser(() => alternationParser),
|
|
768
773
|
_close: createExactSequenceParser(')'),
|
|
769
774
|
});
|
|
770
775
|
|
|
771
776
|
// Negative lookahead (?!...)
|
|
772
|
-
const negativeLookaheadMarkerParser: Parser<
|
|
773
|
-
type: '
|
|
774
|
-
|
|
777
|
+
const negativeLookaheadMarkerParser: Parser<AssertionMarker, string> = createObjectParser({
|
|
778
|
+
type: 'assertion-marker' as const,
|
|
779
|
+
direction: AssertionDir.AHEAD as const,
|
|
780
|
+
sign: AssertionSign.NEGATIVE as const,
|
|
775
781
|
_open: createExactSequenceParser('(?!'),
|
|
776
782
|
inner: createParserAccessorParser(() => alternationParser),
|
|
777
783
|
_close: createExactSequenceParser(')'),
|
|
@@ -786,7 +792,7 @@ const groupParser: Parser<RegularExpression, string> = createUnionParser([
|
|
|
786
792
|
// Anchors
|
|
787
793
|
// Anchor markers for internal use during parsing
|
|
788
794
|
type AnchorMarker = { type: 'start-anchor-marker' } | { type: 'end-anchor-marker' };
|
|
789
|
-
type ParsedElement = RegularExpression | AnchorMarker |
|
|
795
|
+
type ParsedElement = RegularExpression | AnchorMarker | AssertionMarker;
|
|
790
796
|
|
|
791
797
|
const startAnchorMarkerParser: Parser<AnchorMarker, string> = createObjectParser({
|
|
792
798
|
type: 'start-anchor-marker' as const,
|
|
@@ -847,9 +853,9 @@ function concatList(parts: RegularExpression[]): RegularExpression {
|
|
|
847
853
|
return parts.reduceRight((acc, part) => concat(part, acc));
|
|
848
854
|
}
|
|
849
855
|
|
|
850
|
-
// Process elements with anchor markers and
|
|
851
|
-
// Handles anchors and
|
|
852
|
-
// Precedence order (lowest to highest): union -> start-anchor -> end-anchor ->
|
|
856
|
+
// Process elements with anchor markers and assertion markers into proper AST
|
|
857
|
+
// Handles anchors and assertions as infix operators like @gruhn/regex-utils
|
|
858
|
+
// Precedence order (lowest to highest): union -> start-anchor -> end-anchor -> assertion -> concat
|
|
853
859
|
function processElements(elements: ParsedElement[]): RegularExpression {
|
|
854
860
|
if (elements.length === 0) {
|
|
855
861
|
return epsilon;
|
|
@@ -871,18 +877,31 @@ function processElements(elements: ParsedElement[]): RegularExpression {
|
|
|
871
877
|
return endAnchor(processElements(left), processElements(right));
|
|
872
878
|
}
|
|
873
879
|
|
|
874
|
-
// Then
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
const
|
|
880
|
-
const
|
|
881
|
-
|
|
882
|
-
|
|
880
|
+
// Then assertions (higher precedence than anchors)
|
|
881
|
+
// Special handling: Negative lookahead at the start with more content after it
|
|
882
|
+
// forms a concat with epsilon outer, instead of consuming everything into outer
|
|
883
|
+
const assertionIdx = elements.findIndex(e => 'type' in e && e.type === 'assertion-marker');
|
|
884
|
+
if (assertionIdx !== -1) {
|
|
885
|
+
const marker = elements[assertionIdx] as AssertionMarker;
|
|
886
|
+
const left = elements.slice(0, assertionIdx);
|
|
887
|
+
const right = elements.slice(assertionIdx + 1);
|
|
888
|
+
|
|
889
|
+
// Special case: Negative lookahead at the start followed by more content
|
|
890
|
+
// Creates concat instead of nesting
|
|
891
|
+
if (left.length === 0 && marker.sign === AssertionSign.NEGATIVE && right.length > 0) {
|
|
892
|
+
const assertionExpr = assertion(marker.direction, marker.sign, marker.inner, epsilon);
|
|
893
|
+
return concat(assertionExpr, processElements(right));
|
|
894
|
+
}
|
|
895
|
+
|
|
896
|
+
// Assertion after content: always concat with epsilon outer
|
|
897
|
+
if (left.length > 0) {
|
|
898
|
+
const assertionExpr = assertion(marker.direction, marker.sign, marker.inner, epsilon);
|
|
899
|
+
return concat(processElements(left), concat(assertionExpr, processElements(right)));
|
|
883
900
|
}
|
|
884
|
-
|
|
885
|
-
|
|
901
|
+
|
|
902
|
+
// Assertion at start (not negative lookahead with content after): consume everything
|
|
903
|
+
const assertionExpr = assertion(marker.direction, marker.sign, marker.inner, processElements(right));
|
|
904
|
+
return assertionExpr;
|
|
886
905
|
}
|
|
887
906
|
|
|
888
907
|
// No markers, just regular expressions - concatenate them
|
package/src/smaliParser.ts
CHANGED
|
@@ -8,7 +8,6 @@ import {
|
|
|
8
8
|
import { createExactSequenceParser } from './exactSequenceParser.js';
|
|
9
9
|
import { createObjectParser } from './objectParser.js';
|
|
10
10
|
import { cloneParser, type Parser, setParserName } from './parser.js';
|
|
11
|
-
import { type ParserContext } from './parserContext.js';
|
|
12
11
|
import { promiseCompose } from './promiseCompose.js';
|
|
13
12
|
import { createTupleParser } from './tupleParser.js';
|
|
14
13
|
import { createUnionParser } from './unionParser.js';
|
|
@@ -18,6 +17,7 @@ import { createNonEmptyArrayParser } from './nonEmptyArrayParser.js';
|
|
|
18
17
|
import { createOptionalParser } from './optionalParser.js';
|
|
19
18
|
import { createNegativeLookaheadParser } from './negativeLookaheadParser.js';
|
|
20
19
|
import { createSeparatedArrayParser } from './separatedArrayParser.js';
|
|
20
|
+
import { createPredicateElementParser } from './predicateElementParser.js';
|
|
21
21
|
import { smaliMemberNameParser, smaliTypeDescriptorParser } from './dalvikExecutableParser/stringSyntaxParser.js';
|
|
22
22
|
import { createDisjunctionParser } from './disjunctionParser.js';
|
|
23
23
|
import { createSeparatedNonEmptyArrayParser } from './separatedNonEmptyArrayParser.js';
|
|
@@ -348,28 +348,16 @@ const smaliCharacterLiteralParser: Parser<number, string> = promiseCompose(
|
|
|
348
348
|
setParserName(smaliCharacterLiteralParser, 'smaliCharacterLiteralParser');
|
|
349
349
|
|
|
350
350
|
// Parser that matches identifier continuation characters (letters, digits, $, -, _)
|
|
351
|
-
const smaliIdentifierContinuationParser: Parser<string, string> =
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|| (character >= '0' && character <= '9')
|
|
362
|
-
|| character === '$'
|
|
363
|
-
|| character === '-'
|
|
364
|
-
|| character === '_',
|
|
365
|
-
'Expected identifier continuation character, got "%s"',
|
|
366
|
-
character,
|
|
367
|
-
);
|
|
368
|
-
|
|
369
|
-
parserContext.skip(1);
|
|
370
|
-
|
|
371
|
-
return character;
|
|
372
|
-
};
|
|
351
|
+
const smaliIdentifierContinuationParser: Parser<string, string> = createPredicateElementParser(
|
|
352
|
+
function isSmaliIdentifierContinuation(character: string) {
|
|
353
|
+
return (character >= 'a' && character <= 'z')
|
|
354
|
+
|| (character >= 'A' && character <= 'Z')
|
|
355
|
+
|| (character >= '0' && character <= '9')
|
|
356
|
+
|| character === '$'
|
|
357
|
+
|| character === '-'
|
|
358
|
+
|| character === '_';
|
|
359
|
+
},
|
|
360
|
+
);
|
|
373
361
|
|
|
374
362
|
setParserName(smaliIdentifierContinuationParser, 'smaliIdentifierContinuationParser');
|
|
375
363
|
|
|
@@ -60,13 +60,19 @@ const symbolicExpressionStringParser: Parser<SymbolicExpressionString, string> =
|
|
|
60
60
|
setParserName(symbolicExpressionStringParser, 'symbolicExpressionStringParser');
|
|
61
61
|
|
|
62
62
|
// Atom parser: unquoted symbols (any chars except whitespace, parens, quotes, etc.)
|
|
63
|
-
// Supports backslash escapes: \x becomes x
|
|
63
|
+
// Supports backslash escapes: \x becomes x
|
|
64
|
+
// Note: A lone backslash or one that produces an empty atom should fail parsing
|
|
64
65
|
const symbolicExpressionAtomParser: Parser<SymbolicExpressionAtom, string> = promiseCompose(
|
|
65
|
-
createRegExpParser(/(?:[^\s()"'`,;\\]|\\.)
|
|
66
|
+
createRegExpParser(/(?:[^\s()"'`,;\\]|\\.)+\\?/),
|
|
66
67
|
match => {
|
|
67
68
|
const raw = match[0];
|
|
68
|
-
// Process backslash escapes: \x becomes x
|
|
69
|
+
// Process backslash escapes: \x becomes x
|
|
69
70
|
const value = raw.replace(/\\(.?)/g, '$1');
|
|
71
|
+
// Reject atoms that result in empty strings
|
|
72
|
+
if (value.length === 0) {
|
|
73
|
+
throw new Error('Atom cannot be empty');
|
|
74
|
+
}
|
|
75
|
+
|
|
70
76
|
return {
|
|
71
77
|
type: 'atom' as const,
|
|
72
78
|
value,
|