npm - @futpib/parser - Versions diffs - 1.0.6 → 1.0.8 - Mend

@futpib/parser 1.0.6 → 1.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

package/build/arbitraryBash.d.ts +3 -0
package/build/arbitraryBash.js +142 -0
package/build/arbitraryJavaScript.js +4 -4
package/build/arbitraryZipStream.d.ts +1 -1
package/build/bashParser.js +317 -75
package/build/bashParser.test.js +71 -0
package/build/bashUnparser.d.ts +3 -0
package/build/bashUnparser.js +157 -0
package/build/bashUnparser.test.d.ts +1 -0
package/build/bashUnparser.test.js +24 -0
package/build/bsonParser.js +3 -3
package/build/dalvikExecutableParser/stringSyntaxParser.js +20 -31
package/build/dalvikExecutableParser.js +2 -5
package/build/hasExecutable.js +1 -1
package/build/index.d.ts +1 -0
package/build/index.js +1 -0
package/build/jsonParser.js +2 -7
package/build/predicateElementParser.d.ts +3 -0
package/build/predicateElementParser.js +10 -0
package/build/regularExpression.d.ts +12 -3
package/build/regularExpression.js +10 -1
package/build/regularExpressionParser.js +39 -25
package/build/regularExpressionParser.test.js +2 -2
package/build/smaliParser.js +5 -9
package/build/symbolicExpressionParser.js +8 -3
package/package.json +9 -9
package/readme.md +468 -7
package/src/arbitraryBash.ts +237 -0
package/src/arbitraryJavaScript.ts +4 -4
package/src/bashParser.test.ts +138 -0
package/src/bashParser.ts +467 -139
package/src/bashUnparser.test.ts +37 -0
package/src/bashUnparser.ts +211 -0
package/src/bsonParser.ts +4 -7
package/src/dalvikExecutableParser/stringSyntaxParser.ts +27 -74
package/src/dalvikExecutableParser.ts +4 -10
package/src/hasExecutable.ts +1 -1
package/src/index.ts +4 -0
package/src/jsonParser.ts +2 -11
package/src/predicateElementParser.ts +22 -0
package/src/regularExpression.ts +11 -1
package/src/regularExpressionParser.test.ts +3 -3
package/src/regularExpressionParser.ts +49 -30
package/src/smaliParser.ts +11 -23
package/src/symbolicExpressionParser.ts +9 -3

package/src/regularExpressionParser.ts CHANGED Viewed

@@ -14,6 +14,8 @@ import { createDisjunctionParser } from './disjunctionParser.js';
 import { createNegativeLookaheadParser } from './negativeLookaheadParser.js';
 import { createObjectParser } from './objectParser.js';
 import {
+	AssertionDir,
+	AssertionSign,
 	type CharacterSet,
 	type CodePointRange,
 	type RegularExpression,
@@ -209,12 +211,14 @@ function characterSetFromArray(chars: string[]): CharacterSet {
 }
 function characterSetComplement(set: CharacterSet): CharacterSet {
-	return characterSetDifference(alphabet, set);
+	return characterSetDifference(bmpRange, set);
 }
 // Pre-defined character sets
+const bmpRange: CharacterSet = characterSetFromRange({ start: 0, end: 0xFFFF });
 const alphabet: CharacterSet = characterSetDifference(
-	characterSetFromRange({ start: 0, end: 0x10FFFF }),
+	bmpRange,
 	characterSetFromArray(['\r', '\n', '\u2028', '\u2029']),
 );
@@ -224,7 +228,7 @@ const wildcardCharacterSet: CharacterSet = characterSetDifference(
 );
 const digitChars: CharacterSet = characterSetCharRange('0', '9');
-const nonDigitChars: CharacterSet = characterSetComplement(digitChars);
+const nonDigitChars: CharacterSet = characterSetDifference(bmpRange, digitChars);
 const wordChars: CharacterSet = [
 	characterSetCharRange('a', 'z'),
@@ -232,7 +236,7 @@ const wordChars: CharacterSet = [
 	characterSetCharRange('0', '9'),
 	characterSetSingleton('_'),
 ].reduce(characterSetUnion);
-const nonWordChars: CharacterSet = characterSetComplement(wordChars);
+const nonWordChars: CharacterSet = characterSetDifference(bmpRange, wordChars);
 const whiteSpaceChars: CharacterSet = [
 	characterSetSingleton('\f'),
@@ -251,7 +255,7 @@ const whiteSpaceChars: CharacterSet = [
 	characterSetSingleton('\u3000'),
 	characterSetSingleton('\ufeff'),
 ].reduce(characterSetUnion);
-const nonWhiteSpaceChars: CharacterSet = characterSetComplement(whiteSpaceChars);
+const nonWhiteSpaceChars: CharacterSet = characterSetDifference(bmpRange, whiteSpaceChars);
 // AST constructors
@@ -292,8 +296,8 @@ function captureGroup(inner: RegularExpression, name?: string): RegularExpressio
 	return { type: 'capture-group', inner, name };
 }
-function lookahead(isPositive: boolean, inner: RegularExpression, right: RegularExpression): RegularExpression {
-	return { type: 'lookahead', isPositive, inner, right };
+function assertion(direction: AssertionDir, sign: AssertionSign, inner: RegularExpression, outer: RegularExpression): RegularExpression {
+	return { type: 'assertion', direction, sign, inner, outer };
 }
 function startAnchor(left: RegularExpression, right: RegularExpression): RegularExpression {
@@ -756,22 +760,24 @@ const nonCaptureGroupParser: Parser<RegularExpression, string> = promiseCompose(
 	([, inner]) => inner,
 );
-// Lookahead markers for internal use during parsing
-type LookaheadMarker = { type: 'lookahead-marker'; isPositive: boolean; inner: RegularExpression };
+// Assertion markers for internal use during parsing
+type AssertionMarker = { type: 'assertion-marker'; direction: AssertionDir; sign: AssertionSign; inner: RegularExpression };
 // Positive lookahead (?=...)
-const positiveLookaheadMarkerParser: Parser<LookaheadMarker, string> = createObjectParser({
-	type: 'lookahead-marker' as const,
-	isPositive: true as const,
+const positiveLookaheadMarkerParser: Parser<AssertionMarker, string> = createObjectParser({
+	type: 'assertion-marker' as const,
+	direction: AssertionDir.AHEAD as const,
+	sign: AssertionSign.POSITIVE as const,
 	_open: createExactSequenceParser('(?='),
 	inner: createParserAccessorParser(() => alternationParser),
 	_close: createExactSequenceParser(')'),
 });
 // Negative lookahead (?!...)
-const negativeLookaheadMarkerParser: Parser<LookaheadMarker, string> = createObjectParser({
-	type: 'lookahead-marker' as const,
-	isPositive: false as const,
+const negativeLookaheadMarkerParser: Parser<AssertionMarker, string> = createObjectParser({
+	type: 'assertion-marker' as const,
+	direction: AssertionDir.AHEAD as const,
+	sign: AssertionSign.NEGATIVE as const,
 	_open: createExactSequenceParser('(?!'),
 	inner: createParserAccessorParser(() => alternationParser),
 	_close: createExactSequenceParser(')'),
@@ -786,7 +792,7 @@ const groupParser: Parser<RegularExpression, string> = createUnionParser([
 // Anchors
 // Anchor markers for internal use during parsing
 type AnchorMarker = { type: 'start-anchor-marker' } | { type: 'end-anchor-marker' };
-type ParsedElement = RegularExpression | AnchorMarker | LookaheadMarker;
+type ParsedElement = RegularExpression | AnchorMarker | AssertionMarker;
 const startAnchorMarkerParser: Parser<AnchorMarker, string> = createObjectParser({
 	type: 'start-anchor-marker' as const,
@@ -847,9 +853,9 @@ function concatList(parts: RegularExpression[]): RegularExpression {
 	return parts.reduceRight((acc, part) => concat(part, acc));
 }
-// Process elements with anchor markers and lookahead markers into proper AST
-// Handles anchors and lookahead as infix operators like @gruhn/regex-utils
-// Precedence order (lowest to highest): union -> start-anchor -> end-anchor -> lookahead -> concat
+// Process elements with anchor markers and assertion markers into proper AST
+// Handles anchors and assertions as infix operators like @gruhn/regex-utils
+// Precedence order (lowest to highest): union -> start-anchor -> end-anchor -> assertion -> concat
 function processElements(elements: ParsedElement[]): RegularExpression {
 	if (elements.length === 0) {
 		return epsilon;
@@ -871,18 +877,31 @@ function processElements(elements: ParsedElement[]): RegularExpression {
 		return endAnchor(processElements(left), processElements(right));
 	}
-	// Then lookaheads (higher precedence than anchors)
-	const lookaheadIdx = elements.findIndex(e => 'type' in e && e.type === 'lookahead-marker');
-	if (lookaheadIdx !== -1) {
-		const marker = elements[lookaheadIdx] as LookaheadMarker;
-		const left = elements.slice(0, lookaheadIdx);
-		const right = elements.slice(lookaheadIdx + 1);
-		const lookaheadExpr = lookahead(marker.isPositive, marker.inner, processElements(right));
-		if (left.length === 0) {
-			return lookaheadExpr;
+	// Then assertions (higher precedence than anchors)
+	// Special handling: Negative lookahead at the start with more content after it
+	// forms a concat with epsilon outer, instead of consuming everything into outer
+	const assertionIdx = elements.findIndex(e => 'type' in e && e.type === 'assertion-marker');
+	if (assertionIdx !== -1) {
+		const marker = elements[assertionIdx] as AssertionMarker;
+		const left = elements.slice(0, assertionIdx);
+		const right = elements.slice(assertionIdx + 1);
+		// Special case: Negative lookahead at the start followed by more content
+		// Creates concat instead of nesting
+		if (left.length === 0 && marker.sign === AssertionSign.NEGATIVE && right.length > 0) {
+			const assertionExpr = assertion(marker.direction, marker.sign, marker.inner, epsilon);
+			return concat(assertionExpr, processElements(right));
+		}
+		// Assertion after content: always concat with epsilon outer
+		if (left.length > 0) {
+			const assertionExpr = assertion(marker.direction, marker.sign, marker.inner, epsilon);
+			return concat(processElements(left), concat(assertionExpr, processElements(right)));
 		}
-		// If there's content before the lookahead, concatenate it
-		return concat(processElements(left), lookaheadExpr);
+		// Assertion at start (not negative lookahead with content after): consume everything
+		const assertionExpr = assertion(marker.direction, marker.sign, marker.inner, processElements(right));
+		return assertionExpr;
 	}
 	// No markers, just regular expressions - concatenate them

package/src/smaliParser.ts CHANGED Viewed

@@ -8,7 +8,6 @@ import {
 import { createExactSequenceParser } from './exactSequenceParser.js';
 import { createObjectParser } from './objectParser.js';
 import { cloneParser, type Parser, setParserName } from './parser.js';
-import { type ParserContext } from './parserContext.js';
 import { promiseCompose } from './promiseCompose.js';
 import { createTupleParser } from './tupleParser.js';
 import { createUnionParser } from './unionParser.js';
@@ -18,6 +17,7 @@ import { createNonEmptyArrayParser } from './nonEmptyArrayParser.js';
 import { createOptionalParser } from './optionalParser.js';
 import { createNegativeLookaheadParser } from './negativeLookaheadParser.js';
 import { createSeparatedArrayParser } from './separatedArrayParser.js';
+import { createPredicateElementParser } from './predicateElementParser.js';
 import { smaliMemberNameParser, smaliTypeDescriptorParser } from './dalvikExecutableParser/stringSyntaxParser.js';
 import { createDisjunctionParser } from './disjunctionParser.js';
 import { createSeparatedNonEmptyArrayParser } from './separatedNonEmptyArrayParser.js';
@@ -348,28 +348,16 @@ const smaliCharacterLiteralParser: Parser<number, string> = promiseCompose(
 setParserName(smaliCharacterLiteralParser, 'smaliCharacterLiteralParser');
 // Parser that matches identifier continuation characters (letters, digits, $, -, _)
-const smaliIdentifierContinuationParser: Parser<string, string> = async (parserContext: ParserContext<string, string>) => {
-	const character = await parserContext.peek(0);
-	parserContext.invariant(character !== undefined, 'Unexpected end of input');
-	invariant(character !== undefined, 'Unexpected end of input');
-	parserContext.invariant(
-		(character >= 'a' && character <= 'z')
-		|| (character >= 'A' && character <= 'Z')
-		|| (character >= '0' && character <= '9')
-		|| character === '$'
-		|| character === '-'
-		|| character === '_',
-		'Expected identifier continuation character, got "%s"',
-		character,
-	);
-	parserContext.skip(1);
-	return character;
-};
+const smaliIdentifierContinuationParser: Parser<string, string> = createPredicateElementParser(
+	function isSmaliIdentifierContinuation(character: string) {
+		return (character >= 'a' && character <= 'z')
+			|| (character >= 'A' && character <= 'Z')
+			|| (character >= '0' && character <= '9')
+			|| character === '$'
+			|| character === '-'
+			|| character === '_';
+	},
+);
 setParserName(smaliIdentifierContinuationParser, 'smaliIdentifierContinuationParser');

package/src/symbolicExpressionParser.ts CHANGED Viewed

@@ -60,13 +60,19 @@ const symbolicExpressionStringParser: Parser<SymbolicExpressionString, string> =
 setParserName(symbolicExpressionStringParser, 'symbolicExpressionStringParser');
 // Atom parser: unquoted symbols (any chars except whitespace, parens, quotes, etc.)
-// Supports backslash escapes: \x becomes x, trailing \ becomes nothing
+// Supports backslash escapes: \x becomes x
+// Note: A lone backslash or one that produces an empty atom should fail parsing
 const symbolicExpressionAtomParser: Parser<SymbolicExpressionAtom, string> = promiseCompose(
-	createRegExpParser(/(?:[^\s()"'`,;\\]|\\.)+\\?|\\$/),
+	createRegExpParser(/(?:[^\s()"'`,;\\]|\\.)+\\?/),
 	match => {
 		const raw = match[0];
-		// Process backslash escapes: \x becomes x, trailing \ becomes nothing
+		// Process backslash escapes: \x becomes x
 		const value = raw.replace(/\\(.?)/g, '$1');
+		// Reject atoms that result in empty strings
+		if (value.length === 0) {
+			throw new Error('Atom cannot be empty');
+		}
 		return {
 			type: 'atom' as const,
 			value,