@futpib/parser 1.0.7 → 1.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/arbitraryBash.d.ts +3 -0
- package/build/arbitraryBash.js +142 -0
- package/build/arbitraryJavaScript.js +4 -4
- package/build/arbitraryZipStream.d.ts +1 -1
- package/build/bashUnparser.d.ts +3 -0
- package/build/bashUnparser.js +157 -0
- package/build/bashUnparser.test.d.ts +1 -0
- package/build/bashUnparser.test.js +24 -0
- package/build/bsonParser.js +3 -3
- package/build/dalvikExecutableParser/stringSyntaxParser.js +20 -31
- package/build/dalvikExecutableParser.js +2 -5
- package/build/hasExecutable.js +1 -1
- package/build/jsonParser.js +2 -7
- package/build/regularExpression.d.ts +12 -3
- package/build/regularExpression.js +10 -1
- package/build/regularExpressionParser.js +39 -25
- package/build/regularExpressionParser.test.js +2 -2
- package/build/smaliParser.js +5 -9
- package/build/symbolicExpressionParser.js +8 -3
- package/package.json +9 -9
- package/readme.md +468 -7
- package/src/arbitraryBash.ts +237 -0
- package/src/arbitraryJavaScript.ts +4 -4
- package/src/bashUnparser.test.ts +37 -0
- package/src/bashUnparser.ts +211 -0
- package/src/bsonParser.ts +4 -7
- package/src/dalvikExecutableParser/stringSyntaxParser.ts +27 -74
- package/src/dalvikExecutableParser.ts +4 -10
- package/src/hasExecutable.ts +1 -1
- package/src/jsonParser.ts +2 -11
- package/src/regularExpression.ts +11 -1
- package/src/regularExpressionParser.test.ts +3 -3
- package/src/regularExpressionParser.ts +49 -30
- package/src/smaliParser.ts +11 -23
- package/src/symbolicExpressionParser.ts +9 -3
|
@@ -12,6 +12,7 @@ import { createTerminatedArrayParser } from './terminatedArrayParser.js';
|
|
|
12
12
|
import { createDisjunctionParser } from './disjunctionParser.js';
|
|
13
13
|
import { createNegativeLookaheadParser } from './negativeLookaheadParser.js';
|
|
14
14
|
import { createObjectParser } from './objectParser.js';
|
|
15
|
+
import { AssertionDir, AssertionSign, } from './regularExpression.js';
|
|
15
16
|
// CharacterSet helpers
|
|
16
17
|
const emptyCharacterSet = { type: 'empty' };
|
|
17
18
|
function codePointRangeIsEmpty(range) {
|
|
@@ -168,20 +169,21 @@ function characterSetFromArray(chars) {
|
|
|
168
169
|
return chars.map(characterSetSingleton).reduce(characterSetUnion, emptyCharacterSet);
|
|
169
170
|
}
|
|
170
171
|
function characterSetComplement(set) {
|
|
171
|
-
return characterSetDifference(
|
|
172
|
+
return characterSetDifference(bmpRange, set);
|
|
172
173
|
}
|
|
173
174
|
// Pre-defined character sets
|
|
174
|
-
const
|
|
175
|
+
const bmpRange = characterSetFromRange({ start: 0, end: 0xFFFF });
|
|
176
|
+
const alphabet = characterSetDifference(bmpRange, characterSetFromArray(['\r', '\n', '\u2028', '\u2029']));
|
|
175
177
|
const wildcardCharacterSet = characterSetDifference(alphabet, characterSetFromArray(['\r', '\n', '\u2028', '\u2029']));
|
|
176
178
|
const digitChars = characterSetCharRange('0', '9');
|
|
177
|
-
const nonDigitChars =
|
|
179
|
+
const nonDigitChars = characterSetDifference(bmpRange, digitChars);
|
|
178
180
|
const wordChars = [
|
|
179
181
|
characterSetCharRange('a', 'z'),
|
|
180
182
|
characterSetCharRange('A', 'Z'),
|
|
181
183
|
characterSetCharRange('0', '9'),
|
|
182
184
|
characterSetSingleton('_'),
|
|
183
185
|
].reduce(characterSetUnion);
|
|
184
|
-
const nonWordChars =
|
|
186
|
+
const nonWordChars = characterSetDifference(bmpRange, wordChars);
|
|
185
187
|
const whiteSpaceChars = [
|
|
186
188
|
characterSetSingleton('\f'),
|
|
187
189
|
characterSetSingleton('\n'),
|
|
@@ -199,7 +201,7 @@ const whiteSpaceChars = [
|
|
|
199
201
|
characterSetSingleton('\u3000'),
|
|
200
202
|
characterSetSingleton('\ufeff'),
|
|
201
203
|
].reduce(characterSetUnion);
|
|
202
|
-
const nonWhiteSpaceChars =
|
|
204
|
+
const nonWhiteSpaceChars = characterSetDifference(bmpRange, whiteSpaceChars);
|
|
203
205
|
// AST constructors
|
|
204
206
|
const epsilon = { type: 'epsilon' };
|
|
205
207
|
function literal(charset) {
|
|
@@ -229,8 +231,8 @@ function captureGroup(inner, name) {
|
|
|
229
231
|
}
|
|
230
232
|
return { type: 'capture-group', inner, name };
|
|
231
233
|
}
|
|
232
|
-
function
|
|
233
|
-
return { type: '
|
|
234
|
+
function assertion(direction, sign, inner, outer) {
|
|
235
|
+
return { type: 'assertion', direction, sign, inner, outer };
|
|
234
236
|
}
|
|
235
237
|
function startAnchor(left, right) {
|
|
236
238
|
return { type: 'start-anchor', left, right };
|
|
@@ -476,16 +478,18 @@ const nonCaptureGroupParser = promiseCompose(createTupleParser([
|
|
|
476
478
|
]), ([, inner]) => inner);
|
|
477
479
|
// Positive lookahead (?=...)
|
|
478
480
|
const positiveLookaheadMarkerParser = createObjectParser({
|
|
479
|
-
type: '
|
|
480
|
-
|
|
481
|
+
type: 'assertion-marker',
|
|
482
|
+
direction: AssertionDir.AHEAD,
|
|
483
|
+
sign: AssertionSign.POSITIVE,
|
|
481
484
|
_open: createExactSequenceParser('(?='),
|
|
482
485
|
inner: createParserAccessorParser(() => alternationParser),
|
|
483
486
|
_close: createExactSequenceParser(')'),
|
|
484
487
|
});
|
|
485
488
|
// Negative lookahead (?!...)
|
|
486
489
|
const negativeLookaheadMarkerParser = createObjectParser({
|
|
487
|
-
type: '
|
|
488
|
-
|
|
490
|
+
type: 'assertion-marker',
|
|
491
|
+
direction: AssertionDir.AHEAD,
|
|
492
|
+
sign: AssertionSign.NEGATIVE,
|
|
489
493
|
_open: createExactSequenceParser('(?!'),
|
|
490
494
|
inner: createParserAccessorParser(() => alternationParser),
|
|
491
495
|
_close: createExactSequenceParser(')'),
|
|
@@ -545,9 +549,9 @@ function concatList(parts) {
|
|
|
545
549
|
}
|
|
546
550
|
return parts.reduceRight((acc, part) => concat(part, acc));
|
|
547
551
|
}
|
|
548
|
-
// Process elements with anchor markers and
|
|
549
|
-
// Handles anchors and
|
|
550
|
-
// Precedence order (lowest to highest): union -> start-anchor -> end-anchor ->
|
|
552
|
+
// Process elements with anchor markers and assertion markers into proper AST
|
|
553
|
+
// Handles anchors and assertions as infix operators like @gruhn/regex-utils
|
|
554
|
+
// Precedence order (lowest to highest): union -> start-anchor -> end-anchor -> assertion -> concat
|
|
551
555
|
function processElements(elements) {
|
|
552
556
|
if (elements.length === 0) {
|
|
553
557
|
return epsilon;
|
|
@@ -566,18 +570,28 @@ function processElements(elements) {
|
|
|
566
570
|
const right = elements.slice(endAnchorIdx + 1);
|
|
567
571
|
return endAnchor(processElements(left), processElements(right));
|
|
568
572
|
}
|
|
569
|
-
// Then
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
const
|
|
575
|
-
const
|
|
576
|
-
|
|
577
|
-
|
|
573
|
+
// Then assertions (higher precedence than anchors)
|
|
574
|
+
// Special handling: Negative lookahead at the start with more content after it
|
|
575
|
+
// forms a concat with epsilon outer, instead of consuming everything into outer
|
|
576
|
+
const assertionIdx = elements.findIndex(e => 'type' in e && e.type === 'assertion-marker');
|
|
577
|
+
if (assertionIdx !== -1) {
|
|
578
|
+
const marker = elements[assertionIdx];
|
|
579
|
+
const left = elements.slice(0, assertionIdx);
|
|
580
|
+
const right = elements.slice(assertionIdx + 1);
|
|
581
|
+
// Special case: Negative lookahead at the start followed by more content
|
|
582
|
+
// Creates concat instead of nesting
|
|
583
|
+
if (left.length === 0 && marker.sign === AssertionSign.NEGATIVE && right.length > 0) {
|
|
584
|
+
const assertionExpr = assertion(marker.direction, marker.sign, marker.inner, epsilon);
|
|
585
|
+
return concat(assertionExpr, processElements(right));
|
|
578
586
|
}
|
|
579
|
-
//
|
|
580
|
-
|
|
587
|
+
// Assertion after content: always concat with epsilon outer
|
|
588
|
+
if (left.length > 0) {
|
|
589
|
+
const assertionExpr = assertion(marker.direction, marker.sign, marker.inner, epsilon);
|
|
590
|
+
return concat(processElements(left), concat(assertionExpr, processElements(right)));
|
|
591
|
+
}
|
|
592
|
+
// Assertion at start (not negative lookahead with content after): consume everything
|
|
593
|
+
const assertionExpr = assertion(marker.direction, marker.sign, marker.inner, processElements(right));
|
|
594
|
+
return assertionExpr;
|
|
581
595
|
}
|
|
582
596
|
// No markers, just regular expressions - concatenate them
|
|
583
597
|
const regexParts = elements;
|
|
@@ -42,8 +42,8 @@ function normalizeRegularExpression(ast) {
|
|
|
42
42
|
return { type: 'capture-group', inner: normalizeRegularExpression(ast.inner), name: ast.name };
|
|
43
43
|
}
|
|
44
44
|
return { type: 'capture-group', inner: normalizeRegularExpression(ast.inner) };
|
|
45
|
-
case '
|
|
46
|
-
return { type: '
|
|
45
|
+
case 'assertion':
|
|
46
|
+
return { type: 'assertion', direction: ast.direction, sign: ast.sign, inner: normalizeRegularExpression(ast.inner), outer: normalizeRegularExpression(ast.outer) };
|
|
47
47
|
case 'start-anchor':
|
|
48
48
|
return { type: 'start-anchor', left: normalizeRegularExpression(ast.left), right: normalizeRegularExpression(ast.right) };
|
|
49
49
|
case 'end-anchor':
|
package/build/smaliParser.js
CHANGED
|
@@ -14,6 +14,7 @@ import { createNonEmptyArrayParser } from './nonEmptyArrayParser.js';
|
|
|
14
14
|
import { createOptionalParser } from './optionalParser.js';
|
|
15
15
|
import { createNegativeLookaheadParser } from './negativeLookaheadParser.js';
|
|
16
16
|
import { createSeparatedArrayParser } from './separatedArrayParser.js';
|
|
17
|
+
import { createPredicateElementParser } from './predicateElementParser.js';
|
|
17
18
|
import { smaliMemberNameParser, smaliTypeDescriptorParser } from './dalvikExecutableParser/stringSyntaxParser.js';
|
|
18
19
|
import { createDisjunctionParser } from './disjunctionParser.js';
|
|
19
20
|
import { createSeparatedNonEmptyArrayParser } from './separatedNonEmptyArrayParser.js';
|
|
@@ -236,19 +237,14 @@ const smaliCharacterLiteralParser = promiseCompose(createTupleParser([
|
|
|
236
237
|
]), ([, character]) => character.charCodeAt(0));
|
|
237
238
|
setParserName(smaliCharacterLiteralParser, 'smaliCharacterLiteralParser');
|
|
238
239
|
// Parser that matches identifier continuation characters (letters, digits, $, -, _)
|
|
239
|
-
const smaliIdentifierContinuationParser =
|
|
240
|
-
|
|
241
|
-
parserContext.invariant(character !== undefined, 'Unexpected end of input');
|
|
242
|
-
invariant(character !== undefined, 'Unexpected end of input');
|
|
243
|
-
parserContext.invariant((character >= 'a' && character <= 'z')
|
|
240
|
+
const smaliIdentifierContinuationParser = createPredicateElementParser(function isSmaliIdentifierContinuation(character) {
|
|
241
|
+
return (character >= 'a' && character <= 'z')
|
|
244
242
|
|| (character >= 'A' && character <= 'Z')
|
|
245
243
|
|| (character >= '0' && character <= '9')
|
|
246
244
|
|| character === '$'
|
|
247
245
|
|| character === '-'
|
|
248
|
-
|| character === '_'
|
|
249
|
-
|
|
250
|
-
return character;
|
|
251
|
-
};
|
|
246
|
+
|| character === '_';
|
|
247
|
+
});
|
|
252
248
|
setParserName(smaliIdentifierContinuationParser, 'smaliIdentifierContinuationParser');
|
|
253
249
|
// Helper to create an access flag parser with word boundary check
|
|
254
250
|
const createAccessFlagParser = (keyword) => promiseCompose(createTupleParser([
|
|
@@ -34,11 +34,16 @@ const symbolicExpressionStringParser = promiseCompose(createRegExpParser(/"(?:[^
|
|
|
34
34
|
});
|
|
35
35
|
setParserName(symbolicExpressionStringParser, 'symbolicExpressionStringParser');
|
|
36
36
|
// Atom parser: unquoted symbols (any chars except whitespace, parens, quotes, etc.)
|
|
37
|
-
// Supports backslash escapes: \x becomes x
|
|
38
|
-
|
|
37
|
+
// Supports backslash escapes: \x becomes x
|
|
38
|
+
// Note: A lone backslash or one that produces an empty atom should fail parsing
|
|
39
|
+
const symbolicExpressionAtomParser = promiseCompose(createRegExpParser(/(?:[^\s()"'`,;\\]|\\.)+\\?/), match => {
|
|
39
40
|
const raw = match[0];
|
|
40
|
-
// Process backslash escapes: \x becomes x
|
|
41
|
+
// Process backslash escapes: \x becomes x
|
|
41
42
|
const value = raw.replace(/\\(.?)/g, '$1');
|
|
43
|
+
// Reject atoms that result in empty strings
|
|
44
|
+
if (value.length === 0) {
|
|
45
|
+
throw new Error('Atom cannot be empty');
|
|
46
|
+
}
|
|
42
47
|
return {
|
|
43
48
|
type: 'atom',
|
|
44
49
|
value,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@futpib/parser",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.8",
|
|
4
4
|
"main": "build/index.js",
|
|
5
5
|
"types": "build/index.d.ts",
|
|
6
6
|
"license": "GPL-3.0-only",
|
|
@@ -37,27 +37,27 @@
|
|
|
37
37
|
"@ava/typescript": "^6.0.0",
|
|
38
38
|
"@fast-check/ava": "^2.0.2",
|
|
39
39
|
"@futpib/fetch-cid": "^1.0.2",
|
|
40
|
-
"@gruhn/regex-utils": "
|
|
40
|
+
"@gruhn/regex-utils": "2.9.1",
|
|
41
41
|
"@types/estree": "^1.0.8",
|
|
42
42
|
"@types/invariant": "^2.2.37",
|
|
43
|
-
"@types/node": "^
|
|
43
|
+
"@types/node": "^25.2.3",
|
|
44
44
|
"ava": "^6.4.1",
|
|
45
|
-
"bson": "^7.
|
|
45
|
+
"bson": "^7.2.0",
|
|
46
46
|
"c8": "^10.1.3",
|
|
47
47
|
"coveralls": "^3.1.1",
|
|
48
|
-
"env-paths": "^
|
|
48
|
+
"env-paths": "^4.0.0",
|
|
49
49
|
"eslint-config-xo-typescript-overrides": "^2.0.3",
|
|
50
|
-
"execa": "^9.6.
|
|
51
|
-
"fast-check": "^4.3
|
|
50
|
+
"execa": "^9.6.1",
|
|
51
|
+
"fast-check": "^4.5.3",
|
|
52
52
|
"invariant": "^2.2.4",
|
|
53
53
|
"jszip": "^3.10.1",
|
|
54
54
|
"leb128": "^0.0.5",
|
|
55
55
|
"mutf-8": "^1.2.2",
|
|
56
56
|
"p-memoize": "^8.0.0",
|
|
57
57
|
"s-expression": "^3.1.1",
|
|
58
|
-
"tempy": "^3.
|
|
58
|
+
"tempy": "^3.2.0",
|
|
59
59
|
"tsd": "^0.33.0",
|
|
60
|
-
"type-fest": "^5.
|
|
60
|
+
"type-fest": "^5.4.4",
|
|
61
61
|
"typescript": "^5.9.3",
|
|
62
62
|
"xo": "^1.2.3"
|
|
63
63
|
},
|