npm - @futpib/parser - Versions diffs - 1.0.7 → 1.0.8 - Mend

@futpib/parser 1.0.7 → 1.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

package/build/arbitraryBash.d.ts +3 -0
package/build/arbitraryBash.js +142 -0
package/build/arbitraryJavaScript.js +4 -4
package/build/arbitraryZipStream.d.ts +1 -1
package/build/bashUnparser.d.ts +3 -0
package/build/bashUnparser.js +157 -0
package/build/bashUnparser.test.d.ts +1 -0
package/build/bashUnparser.test.js +24 -0
package/build/bsonParser.js +3 -3
package/build/dalvikExecutableParser/stringSyntaxParser.js +20 -31
package/build/dalvikExecutableParser.js +2 -5
package/build/hasExecutable.js +1 -1
package/build/jsonParser.js +2 -7
package/build/regularExpression.d.ts +12 -3
package/build/regularExpression.js +10 -1
package/build/regularExpressionParser.js +39 -25
package/build/regularExpressionParser.test.js +2 -2
package/build/smaliParser.js +5 -9
package/build/symbolicExpressionParser.js +8 -3
package/package.json +9 -9
package/readme.md +468 -7
package/src/arbitraryBash.ts +237 -0
package/src/arbitraryJavaScript.ts +4 -4
package/src/bashUnparser.test.ts +37 -0
package/src/bashUnparser.ts +211 -0
package/src/bsonParser.ts +4 -7
package/src/dalvikExecutableParser/stringSyntaxParser.ts +27 -74
package/src/dalvikExecutableParser.ts +4 -10
package/src/hasExecutable.ts +1 -1
package/src/jsonParser.ts +2 -11
package/src/regularExpression.ts +11 -1
package/src/regularExpressionParser.test.ts +3 -3
package/src/regularExpressionParser.ts +49 -30
package/src/smaliParser.ts +11 -23
package/src/symbolicExpressionParser.ts +9 -3

package/build/regularExpressionParser.js CHANGED Viewed

@@ -12,6 +12,7 @@ import { createTerminatedArrayParser } from './terminatedArrayParser.js';
 import { createDisjunctionParser } from './disjunctionParser.js';
 import { createNegativeLookaheadParser } from './negativeLookaheadParser.js';
 import { createObjectParser } from './objectParser.js';
+import { AssertionDir, AssertionSign, } from './regularExpression.js';
 // CharacterSet helpers
 const emptyCharacterSet = { type: 'empty' };
 function codePointRangeIsEmpty(range) {
@@ -168,20 +169,21 @@ function characterSetFromArray(chars) {
     return chars.map(characterSetSingleton).reduce(characterSetUnion, emptyCharacterSet);
 }
 function characterSetComplement(set) {
-    return characterSetDifference(alphabet, set);
+    return characterSetDifference(bmpRange, set);
 }
 // Pre-defined character sets
-const alphabet = characterSetDifference(characterSetFromRange({ start: 0, end: 0x10FFFF }), characterSetFromArray(['\r', '\n', '\u2028', '\u2029']));
+const bmpRange = characterSetFromRange({ start: 0, end: 0xFFFF });
+const alphabet = characterSetDifference(bmpRange, characterSetFromArray(['\r', '\n', '\u2028', '\u2029']));
 const wildcardCharacterSet = characterSetDifference(alphabet, characterSetFromArray(['\r', '\n', '\u2028', '\u2029']));
 const digitChars = characterSetCharRange('0', '9');
-const nonDigitChars = characterSetComplement(digitChars);
+const nonDigitChars = characterSetDifference(bmpRange, digitChars);
 const wordChars = [
     characterSetCharRange('a', 'z'),
     characterSetCharRange('A', 'Z'),
     characterSetCharRange('0', '9'),
     characterSetSingleton('_'),
 ].reduce(characterSetUnion);
-const nonWordChars = characterSetComplement(wordChars);
+const nonWordChars = characterSetDifference(bmpRange, wordChars);
 const whiteSpaceChars = [
     characterSetSingleton('\f'),
     characterSetSingleton('\n'),
@@ -199,7 +201,7 @@ const whiteSpaceChars = [
     characterSetSingleton('\u3000'),
     characterSetSingleton('\ufeff'),
 ].reduce(characterSetUnion);
-const nonWhiteSpaceChars = characterSetComplement(whiteSpaceChars);
+const nonWhiteSpaceChars = characterSetDifference(bmpRange, whiteSpaceChars);
 // AST constructors
 const epsilon = { type: 'epsilon' };
 function literal(charset) {
@@ -229,8 +231,8 @@ function captureGroup(inner, name) {
     }
     return { type: 'capture-group', inner, name };
 }
-function lookahead(isPositive, inner, right) {
-    return { type: 'lookahead', isPositive, inner, right };
+function assertion(direction, sign, inner, outer) {
+    return { type: 'assertion', direction, sign, inner, outer };
 }
 function startAnchor(left, right) {
     return { type: 'start-anchor', left, right };
@@ -476,16 +478,18 @@ const nonCaptureGroupParser = promiseCompose(createTupleParser([
 ]), ([, inner]) => inner);
 // Positive lookahead (?=...)
 const positiveLookaheadMarkerParser = createObjectParser({
-    type: 'lookahead-marker',
-    isPositive: true,
+    type: 'assertion-marker',
+    direction: AssertionDir.AHEAD,
+    sign: AssertionSign.POSITIVE,
     _open: createExactSequenceParser('(?='),
     inner: createParserAccessorParser(() => alternationParser),
     _close: createExactSequenceParser(')'),
 });
 // Negative lookahead (?!...)
 const negativeLookaheadMarkerParser = createObjectParser({
-    type: 'lookahead-marker',
-    isPositive: false,
+    type: 'assertion-marker',
+    direction: AssertionDir.AHEAD,
+    sign: AssertionSign.NEGATIVE,
     _open: createExactSequenceParser('(?!'),
     inner: createParserAccessorParser(() => alternationParser),
     _close: createExactSequenceParser(')'),
@@ -545,9 +549,9 @@ function concatList(parts) {
     }
     return parts.reduceRight((acc, part) => concat(part, acc));
 }
-// Process elements with anchor markers and lookahead markers into proper AST
-// Handles anchors and lookahead as infix operators like @gruhn/regex-utils
-// Precedence order (lowest to highest): union -> start-anchor -> end-anchor -> lookahead -> concat
+// Process elements with anchor markers and assertion markers into proper AST
+// Handles anchors and assertions as infix operators like @gruhn/regex-utils
+// Precedence order (lowest to highest): union -> start-anchor -> end-anchor -> assertion -> concat
 function processElements(elements) {
     if (elements.length === 0) {
         return epsilon;
@@ -566,18 +570,28 @@ function processElements(elements) {
         const right = elements.slice(endAnchorIdx + 1);
         return endAnchor(processElements(left), processElements(right));
     }
-    // Then lookaheads (higher precedence than anchors)
-    const lookaheadIdx = elements.findIndex(e => 'type' in e && e.type === 'lookahead-marker');
-    if (lookaheadIdx !== -1) {
-        const marker = elements[lookaheadIdx];
-        const left = elements.slice(0, lookaheadIdx);
-        const right = elements.slice(lookaheadIdx + 1);
-        const lookaheadExpr = lookahead(marker.isPositive, marker.inner, processElements(right));
-        if (left.length === 0) {
-            return lookaheadExpr;
+    // Then assertions (higher precedence than anchors)
+    // Special handling: Negative lookahead at the start with more content after it
+    // forms a concat with epsilon outer, instead of consuming everything into outer
+    const assertionIdx = elements.findIndex(e => 'type' in e && e.type === 'assertion-marker');
+    if (assertionIdx !== -1) {
+        const marker = elements[assertionIdx];
+        const left = elements.slice(0, assertionIdx);
+        const right = elements.slice(assertionIdx + 1);
+        // Special case: Negative lookahead at the start followed by more content
+        // Creates concat instead of nesting
+        if (left.length === 0 && marker.sign === AssertionSign.NEGATIVE && right.length > 0) {
+            const assertionExpr = assertion(marker.direction, marker.sign, marker.inner, epsilon);
+            return concat(assertionExpr, processElements(right));
         }
-        // If there's content before the lookahead, concatenate it
-        return concat(processElements(left), lookaheadExpr);
+        // Assertion after content: always concat with epsilon outer
+        if (left.length > 0) {
+            const assertionExpr = assertion(marker.direction, marker.sign, marker.inner, epsilon);
+            return concat(processElements(left), concat(assertionExpr, processElements(right)));
+        }
+        // Assertion at start (not negative lookahead with content after): consume everything
+        const assertionExpr = assertion(marker.direction, marker.sign, marker.inner, processElements(right));
+        return assertionExpr;
     }
     // No markers, just regular expressions - concatenate them
     const regexParts = elements;

package/build/regularExpressionParser.test.js CHANGED Viewed

@@ -42,8 +42,8 @@ function normalizeRegularExpression(ast) {
                 return { type: 'capture-group', inner: normalizeRegularExpression(ast.inner), name: ast.name };
             }
             return { type: 'capture-group', inner: normalizeRegularExpression(ast.inner) };
-        case 'lookahead':
-            return { type: 'lookahead', isPositive: ast.isPositive, inner: normalizeRegularExpression(ast.inner), right: normalizeRegularExpression(ast.right) };
+        case 'assertion':
+            return { type: 'assertion', direction: ast.direction, sign: ast.sign, inner: normalizeRegularExpression(ast.inner), outer: normalizeRegularExpression(ast.outer) };
         case 'start-anchor':
             return { type: 'start-anchor', left: normalizeRegularExpression(ast.left), right: normalizeRegularExpression(ast.right) };
         case 'end-anchor':

package/build/smaliParser.js CHANGED Viewed

@@ -14,6 +14,7 @@ import { createNonEmptyArrayParser } from './nonEmptyArrayParser.js';
 import { createOptionalParser } from './optionalParser.js';
 import { createNegativeLookaheadParser } from './negativeLookaheadParser.js';
 import { createSeparatedArrayParser } from './separatedArrayParser.js';
+import { createPredicateElementParser } from './predicateElementParser.js';
 import { smaliMemberNameParser, smaliTypeDescriptorParser } from './dalvikExecutableParser/stringSyntaxParser.js';
 import { createDisjunctionParser } from './disjunctionParser.js';
 import { createSeparatedNonEmptyArrayParser } from './separatedNonEmptyArrayParser.js';
@@ -236,19 +237,14 @@ const smaliCharacterLiteralParser = promiseCompose(createTupleParser([
 ]), ([, character]) => character.charCodeAt(0));
 setParserName(smaliCharacterLiteralParser, 'smaliCharacterLiteralParser');
 // Parser that matches identifier continuation characters (letters, digits, $, -, _)
-const smaliIdentifierContinuationParser = async (parserContext) => {
-    const character = await parserContext.peek(0);
-    parserContext.invariant(character !== undefined, 'Unexpected end of input');
-    invariant(character !== undefined, 'Unexpected end of input');
-    parserContext.invariant((character >= 'a' && character <= 'z')
+const smaliIdentifierContinuationParser = createPredicateElementParser(function isSmaliIdentifierContinuation(character) {
+    return (character >= 'a' && character <= 'z')
         || (character >= 'A' && character <= 'Z')
         || (character >= '0' && character <= '9')
         || character === '$'
         || character === '-'
-        || character === '_', 'Expected identifier continuation character, got "%s"', character);
-    parserContext.skip(1);
-    return character;
-};
+        || character === '_';
+});
 setParserName(smaliIdentifierContinuationParser, 'smaliIdentifierContinuationParser');
 // Helper to create an access flag parser with word boundary check
 const createAccessFlagParser = (keyword) => promiseCompose(createTupleParser([

package/build/symbolicExpressionParser.js CHANGED Viewed

@@ -34,11 +34,16 @@ const symbolicExpressionStringParser = promiseCompose(createRegExpParser(/"(?:[^
 });
 setParserName(symbolicExpressionStringParser, 'symbolicExpressionStringParser');
 // Atom parser: unquoted symbols (any chars except whitespace, parens, quotes, etc.)
-// Supports backslash escapes: \x becomes x, trailing \ becomes nothing
-const symbolicExpressionAtomParser = promiseCompose(createRegExpParser(/(?:[^\s()"'`,;\\]|\\.)+\\?|\\$/), match => {
+// Supports backslash escapes: \x becomes x
+// Note: A lone backslash or one that produces an empty atom should fail parsing
+const symbolicExpressionAtomParser = promiseCompose(createRegExpParser(/(?:[^\s()"'`,;\\]|\\.)+\\?/), match => {
     const raw = match[0];
-    // Process backslash escapes: \x becomes x, trailing \ becomes nothing
+    // Process backslash escapes: \x becomes x
     const value = raw.replace(/\\(.?)/g, '$1');
+    // Reject atoms that result in empty strings
+    if (value.length === 0) {
+        throw new Error('Atom cannot be empty');
+    }
     return {
         type: 'atom',
         value,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@futpib/parser",
-  "version": "1.0.7",
+  "version": "1.0.8",
   "main": "build/index.js",
   "types": "build/index.d.ts",
   "license": "GPL-3.0-only",
@@ -37,27 +37,27 @@
     "@ava/typescript": "^6.0.0",
     "@fast-check/ava": "^2.0.2",
     "@futpib/fetch-cid": "^1.0.2",
-    "@gruhn/regex-utils": "^2.7.3",
+    "@gruhn/regex-utils": "2.9.1",
     "@types/estree": "^1.0.8",
     "@types/invariant": "^2.2.37",
-    "@types/node": "^24.10.1",
+    "@types/node": "^25.2.3",
     "ava": "^6.4.1",
-    "bson": "^7.0.0",
+    "bson": "^7.2.0",
     "c8": "^10.1.3",
     "coveralls": "^3.1.1",
-    "env-paths": "^3.0.0",
+    "env-paths": "^4.0.0",
     "eslint-config-xo-typescript-overrides": "^2.0.3",
-    "execa": "^9.6.0",
-    "fast-check": "^4.3.0",
+    "execa": "^9.6.1",
+    "fast-check": "^4.5.3",
     "invariant": "^2.2.4",
     "jszip": "^3.10.1",
     "leb128": "^0.0.5",
     "mutf-8": "^1.2.2",
     "p-memoize": "^8.0.0",
     "s-expression": "^3.1.1",
-    "tempy": "^3.1.0",
+    "tempy": "^3.2.0",
     "tsd": "^0.33.0",
-    "type-fest": "^5.2.0",
+    "type-fest": "^5.4.4",
     "typescript": "^5.9.3",
     "xo": "^1.2.3"
   },