@futpib/parser 1.0.7 → 1.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,6 +12,7 @@ import { createTerminatedArrayParser } from './terminatedArrayParser.js';
12
12
  import { createDisjunctionParser } from './disjunctionParser.js';
13
13
  import { createNegativeLookaheadParser } from './negativeLookaheadParser.js';
14
14
  import { createObjectParser } from './objectParser.js';
15
+ import { AssertionDir, AssertionSign, } from './regularExpression.js';
15
16
  // CharacterSet helpers
16
17
  const emptyCharacterSet = { type: 'empty' };
17
18
  function codePointRangeIsEmpty(range) {
@@ -168,20 +169,21 @@ function characterSetFromArray(chars) {
168
169
  return chars.map(characterSetSingleton).reduce(characterSetUnion, emptyCharacterSet);
169
170
  }
170
171
  function characterSetComplement(set) {
171
- return characterSetDifference(alphabet, set);
172
+ return characterSetDifference(bmpRange, set);
172
173
  }
173
174
  // Pre-defined character sets
174
- const alphabet = characterSetDifference(characterSetFromRange({ start: 0, end: 0x10FFFF }), characterSetFromArray(['\r', '\n', '\u2028', '\u2029']));
175
+ const bmpRange = characterSetFromRange({ start: 0, end: 0xFFFF });
176
+ const alphabet = characterSetDifference(bmpRange, characterSetFromArray(['\r', '\n', '\u2028', '\u2029']));
175
177
  const wildcardCharacterSet = characterSetDifference(alphabet, characterSetFromArray(['\r', '\n', '\u2028', '\u2029']));
176
178
  const digitChars = characterSetCharRange('0', '9');
177
- const nonDigitChars = characterSetComplement(digitChars);
179
+ const nonDigitChars = characterSetDifference(bmpRange, digitChars);
178
180
  const wordChars = [
179
181
  characterSetCharRange('a', 'z'),
180
182
  characterSetCharRange('A', 'Z'),
181
183
  characterSetCharRange('0', '9'),
182
184
  characterSetSingleton('_'),
183
185
  ].reduce(characterSetUnion);
184
- const nonWordChars = characterSetComplement(wordChars);
186
+ const nonWordChars = characterSetDifference(bmpRange, wordChars);
185
187
  const whiteSpaceChars = [
186
188
  characterSetSingleton('\f'),
187
189
  characterSetSingleton('\n'),
@@ -199,7 +201,7 @@ const whiteSpaceChars = [
199
201
  characterSetSingleton('\u3000'),
200
202
  characterSetSingleton('\ufeff'),
201
203
  ].reduce(characterSetUnion);
202
- const nonWhiteSpaceChars = characterSetComplement(whiteSpaceChars);
204
+ const nonWhiteSpaceChars = characterSetDifference(bmpRange, whiteSpaceChars);
203
205
  // AST constructors
204
206
  const epsilon = { type: 'epsilon' };
205
207
  function literal(charset) {
@@ -229,8 +231,8 @@ function captureGroup(inner, name) {
229
231
  }
230
232
  return { type: 'capture-group', inner, name };
231
233
  }
232
- function lookahead(isPositive, inner, right) {
233
- return { type: 'lookahead', isPositive, inner, right };
234
+ function assertion(direction, sign, inner, outer) {
235
+ return { type: 'assertion', direction, sign, inner, outer };
234
236
  }
235
237
  function startAnchor(left, right) {
236
238
  return { type: 'start-anchor', left, right };
@@ -476,16 +478,18 @@ const nonCaptureGroupParser = promiseCompose(createTupleParser([
476
478
  ]), ([, inner]) => inner);
477
479
  // Positive lookahead (?=...)
478
480
  const positiveLookaheadMarkerParser = createObjectParser({
479
- type: 'lookahead-marker',
480
- isPositive: true,
481
+ type: 'assertion-marker',
482
+ direction: AssertionDir.AHEAD,
483
+ sign: AssertionSign.POSITIVE,
481
484
  _open: createExactSequenceParser('(?='),
482
485
  inner: createParserAccessorParser(() => alternationParser),
483
486
  _close: createExactSequenceParser(')'),
484
487
  });
485
488
  // Negative lookahead (?!...)
486
489
  const negativeLookaheadMarkerParser = createObjectParser({
487
- type: 'lookahead-marker',
488
- isPositive: false,
490
+ type: 'assertion-marker',
491
+ direction: AssertionDir.AHEAD,
492
+ sign: AssertionSign.NEGATIVE,
489
493
  _open: createExactSequenceParser('(?!'),
490
494
  inner: createParserAccessorParser(() => alternationParser),
491
495
  _close: createExactSequenceParser(')'),
@@ -545,9 +549,9 @@ function concatList(parts) {
545
549
  }
546
550
  return parts.reduceRight((acc, part) => concat(part, acc));
547
551
  }
548
- // Process elements with anchor markers and lookahead markers into proper AST
549
- // Handles anchors and lookahead as infix operators like @gruhn/regex-utils
550
- // Precedence order (lowest to highest): union -> start-anchor -> end-anchor -> lookahead -> concat
552
+ // Process elements with anchor markers and assertion markers into proper AST
553
+ // Handles anchors and assertions as infix operators like @gruhn/regex-utils
554
+ // Precedence order (lowest to highest): union -> start-anchor -> end-anchor -> assertion -> concat
551
555
  function processElements(elements) {
552
556
  if (elements.length === 0) {
553
557
  return epsilon;
@@ -566,18 +570,28 @@ function processElements(elements) {
566
570
  const right = elements.slice(endAnchorIdx + 1);
567
571
  return endAnchor(processElements(left), processElements(right));
568
572
  }
569
- // Then lookaheads (higher precedence than anchors)
570
- const lookaheadIdx = elements.findIndex(e => 'type' in e && e.type === 'lookahead-marker');
571
- if (lookaheadIdx !== -1) {
572
- const marker = elements[lookaheadIdx];
573
- const left = elements.slice(0, lookaheadIdx);
574
- const right = elements.slice(lookaheadIdx + 1);
575
- const lookaheadExpr = lookahead(marker.isPositive, marker.inner, processElements(right));
576
- if (left.length === 0) {
577
- return lookaheadExpr;
573
+ // Then assertions (higher precedence than anchors)
574
+ // Special handling: Negative lookahead at the start with more content after it
575
+ // forms a concat with epsilon outer, instead of consuming everything into outer
576
+ const assertionIdx = elements.findIndex(e => 'type' in e && e.type === 'assertion-marker');
577
+ if (assertionIdx !== -1) {
578
+ const marker = elements[assertionIdx];
579
+ const left = elements.slice(0, assertionIdx);
580
+ const right = elements.slice(assertionIdx + 1);
581
+ // Special case: Negative lookahead at the start followed by more content
582
+ // Creates concat instead of nesting
583
+ if (left.length === 0 && marker.sign === AssertionSign.NEGATIVE && right.length > 0) {
584
+ const assertionExpr = assertion(marker.direction, marker.sign, marker.inner, epsilon);
585
+ return concat(assertionExpr, processElements(right));
578
586
  }
579
- // If there's content before the lookahead, concatenate it
580
- return concat(processElements(left), lookaheadExpr);
587
+ // Assertion after content: always concat with epsilon outer
588
+ if (left.length > 0) {
589
+ const assertionExpr = assertion(marker.direction, marker.sign, marker.inner, epsilon);
590
+ return concat(processElements(left), concat(assertionExpr, processElements(right)));
591
+ }
592
+ // Assertion at start (not negative lookahead with content after): consume everything
593
+ const assertionExpr = assertion(marker.direction, marker.sign, marker.inner, processElements(right));
594
+ return assertionExpr;
581
595
  }
582
596
  // No markers, just regular expressions - concatenate them
583
597
  const regexParts = elements;
@@ -42,8 +42,8 @@ function normalizeRegularExpression(ast) {
42
42
  return { type: 'capture-group', inner: normalizeRegularExpression(ast.inner), name: ast.name };
43
43
  }
44
44
  return { type: 'capture-group', inner: normalizeRegularExpression(ast.inner) };
45
- case 'lookahead':
46
- return { type: 'lookahead', isPositive: ast.isPositive, inner: normalizeRegularExpression(ast.inner), right: normalizeRegularExpression(ast.right) };
45
+ case 'assertion':
46
+ return { type: 'assertion', direction: ast.direction, sign: ast.sign, inner: normalizeRegularExpression(ast.inner), outer: normalizeRegularExpression(ast.outer) };
47
47
  case 'start-anchor':
48
48
  return { type: 'start-anchor', left: normalizeRegularExpression(ast.left), right: normalizeRegularExpression(ast.right) };
49
49
  case 'end-anchor':
@@ -14,6 +14,7 @@ import { createNonEmptyArrayParser } from './nonEmptyArrayParser.js';
14
14
  import { createOptionalParser } from './optionalParser.js';
15
15
  import { createNegativeLookaheadParser } from './negativeLookaheadParser.js';
16
16
  import { createSeparatedArrayParser } from './separatedArrayParser.js';
17
+ import { createPredicateElementParser } from './predicateElementParser.js';
17
18
  import { smaliMemberNameParser, smaliTypeDescriptorParser } from './dalvikExecutableParser/stringSyntaxParser.js';
18
19
  import { createDisjunctionParser } from './disjunctionParser.js';
19
20
  import { createSeparatedNonEmptyArrayParser } from './separatedNonEmptyArrayParser.js';
@@ -236,19 +237,14 @@ const smaliCharacterLiteralParser = promiseCompose(createTupleParser([
236
237
  ]), ([, character]) => character.charCodeAt(0));
237
238
  setParserName(smaliCharacterLiteralParser, 'smaliCharacterLiteralParser');
238
239
  // Parser that matches identifier continuation characters (letters, digits, $, -, _)
239
- const smaliIdentifierContinuationParser = async (parserContext) => {
240
- const character = await parserContext.peek(0);
241
- parserContext.invariant(character !== undefined, 'Unexpected end of input');
242
- invariant(character !== undefined, 'Unexpected end of input');
243
- parserContext.invariant((character >= 'a' && character <= 'z')
240
+ const smaliIdentifierContinuationParser = createPredicateElementParser(function isSmaliIdentifierContinuation(character) {
241
+ return (character >= 'a' && character <= 'z')
244
242
  || (character >= 'A' && character <= 'Z')
245
243
  || (character >= '0' && character <= '9')
246
244
  || character === '$'
247
245
  || character === '-'
248
- || character === '_', 'Expected identifier continuation character, got "%s"', character);
249
- parserContext.skip(1);
250
- return character;
251
- };
246
+ || character === '_';
247
+ });
252
248
  setParserName(smaliIdentifierContinuationParser, 'smaliIdentifierContinuationParser');
253
249
  // Helper to create an access flag parser with word boundary check
254
250
  const createAccessFlagParser = (keyword) => promiseCompose(createTupleParser([
@@ -34,11 +34,16 @@ const symbolicExpressionStringParser = promiseCompose(createRegExpParser(/"(?:[^
34
34
  });
35
35
  setParserName(symbolicExpressionStringParser, 'symbolicExpressionStringParser');
36
36
  // Atom parser: unquoted symbols (any chars except whitespace, parens, quotes, etc.)
37
- // Supports backslash escapes: \x becomes x, trailing \ becomes nothing
38
- const symbolicExpressionAtomParser = promiseCompose(createRegExpParser(/(?:[^\s()"'`,;\\]|\\.)+\\?|\\$/), match => {
37
+ // Supports backslash escapes: \x becomes x
38
+ // Note: A lone backslash or one that produces an empty atom should fail parsing
39
+ const symbolicExpressionAtomParser = promiseCompose(createRegExpParser(/(?:[^\s()"'`,;\\]|\\.)+\\?/), match => {
39
40
  const raw = match[0];
40
- // Process backslash escapes: \x becomes x, trailing \ becomes nothing
41
+ // Process backslash escapes: \x becomes x
41
42
  const value = raw.replace(/\\(.?)/g, '$1');
43
+ // Reject atoms that result in empty strings
44
+ if (value.length === 0) {
45
+ throw new Error('Atom cannot be empty');
46
+ }
42
47
  return {
43
48
  type: 'atom',
44
49
  value,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@futpib/parser",
3
- "version": "1.0.7",
3
+ "version": "1.0.8",
4
4
  "main": "build/index.js",
5
5
  "types": "build/index.d.ts",
6
6
  "license": "GPL-3.0-only",
@@ -37,27 +37,27 @@
37
37
  "@ava/typescript": "^6.0.0",
38
38
  "@fast-check/ava": "^2.0.2",
39
39
  "@futpib/fetch-cid": "^1.0.2",
40
- "@gruhn/regex-utils": "^2.7.3",
40
+ "@gruhn/regex-utils": "2.9.1",
41
41
  "@types/estree": "^1.0.8",
42
42
  "@types/invariant": "^2.2.37",
43
- "@types/node": "^24.10.1",
43
+ "@types/node": "^25.2.3",
44
44
  "ava": "^6.4.1",
45
- "bson": "^7.0.0",
45
+ "bson": "^7.2.0",
46
46
  "c8": "^10.1.3",
47
47
  "coveralls": "^3.1.1",
48
- "env-paths": "^3.0.0",
48
+ "env-paths": "^4.0.0",
49
49
  "eslint-config-xo-typescript-overrides": "^2.0.3",
50
- "execa": "^9.6.0",
51
- "fast-check": "^4.3.0",
50
+ "execa": "^9.6.1",
51
+ "fast-check": "^4.5.3",
52
52
  "invariant": "^2.2.4",
53
53
  "jszip": "^3.10.1",
54
54
  "leb128": "^0.0.5",
55
55
  "mutf-8": "^1.2.2",
56
56
  "p-memoize": "^8.0.0",
57
57
  "s-expression": "^3.1.1",
58
- "tempy": "^3.1.0",
58
+ "tempy": "^3.2.0",
59
59
  "tsd": "^0.33.0",
60
- "type-fest": "^5.2.0",
60
+ "type-fest": "^5.4.4",
61
61
  "typescript": "^5.9.3",
62
62
  "xo": "^1.2.3"
63
63
  },