@futpib/parser 1.0.6 → 1.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/build/arbitraryBash.d.ts +3 -0
  2. package/build/arbitraryBash.js +142 -0
  3. package/build/arbitraryJavaScript.js +4 -4
  4. package/build/arbitraryZipStream.d.ts +1 -1
  5. package/build/bashParser.js +317 -75
  6. package/build/bashParser.test.js +71 -0
  7. package/build/bashUnparser.d.ts +3 -0
  8. package/build/bashUnparser.js +157 -0
  9. package/build/bashUnparser.test.d.ts +1 -0
  10. package/build/bashUnparser.test.js +24 -0
  11. package/build/bsonParser.js +3 -3
  12. package/build/dalvikExecutableParser/stringSyntaxParser.js +20 -31
  13. package/build/dalvikExecutableParser.js +2 -5
  14. package/build/hasExecutable.js +1 -1
  15. package/build/index.d.ts +1 -0
  16. package/build/index.js +1 -0
  17. package/build/jsonParser.js +2 -7
  18. package/build/predicateElementParser.d.ts +3 -0
  19. package/build/predicateElementParser.js +10 -0
  20. package/build/regularExpression.d.ts +12 -3
  21. package/build/regularExpression.js +10 -1
  22. package/build/regularExpressionParser.js +39 -25
  23. package/build/regularExpressionParser.test.js +2 -2
  24. package/build/smaliParser.js +5 -9
  25. package/build/symbolicExpressionParser.js +8 -3
  26. package/package.json +9 -9
  27. package/readme.md +468 -7
  28. package/src/arbitraryBash.ts +237 -0
  29. package/src/arbitraryJavaScript.ts +4 -4
  30. package/src/bashParser.test.ts +138 -0
  31. package/src/bashParser.ts +467 -139
  32. package/src/bashUnparser.test.ts +37 -0
  33. package/src/bashUnparser.ts +211 -0
  34. package/src/bsonParser.ts +4 -7
  35. package/src/dalvikExecutableParser/stringSyntaxParser.ts +27 -74
  36. package/src/dalvikExecutableParser.ts +4 -10
  37. package/src/hasExecutable.ts +1 -1
  38. package/src/index.ts +4 -0
  39. package/src/jsonParser.ts +2 -11
  40. package/src/predicateElementParser.ts +22 -0
  41. package/src/regularExpression.ts +11 -1
  42. package/src/regularExpressionParser.test.ts +3 -3
  43. package/src/regularExpressionParser.ts +49 -30
  44. package/src/smaliParser.ts +11 -23
  45. package/src/symbolicExpressionParser.ts +9 -3
@@ -14,6 +14,8 @@ import { createDisjunctionParser } from './disjunctionParser.js';
14
14
  import { createNegativeLookaheadParser } from './negativeLookaheadParser.js';
15
15
  import { createObjectParser } from './objectParser.js';
16
16
  import {
17
+ AssertionDir,
18
+ AssertionSign,
17
19
  type CharacterSet,
18
20
  type CodePointRange,
19
21
  type RegularExpression,
@@ -209,12 +211,14 @@ function characterSetFromArray(chars: string[]): CharacterSet {
209
211
  }
210
212
 
211
213
  function characterSetComplement(set: CharacterSet): CharacterSet {
212
- return characterSetDifference(alphabet, set);
214
+ return characterSetDifference(bmpRange, set);
213
215
  }
214
216
 
215
217
  // Pre-defined character sets
218
+ const bmpRange: CharacterSet = characterSetFromRange({ start: 0, end: 0xFFFF });
219
+
216
220
  const alphabet: CharacterSet = characterSetDifference(
217
- characterSetFromRange({ start: 0, end: 0x10FFFF }),
221
+ bmpRange,
218
222
  characterSetFromArray(['\r', '\n', '\u2028', '\u2029']),
219
223
  );
220
224
 
@@ -224,7 +228,7 @@ const wildcardCharacterSet: CharacterSet = characterSetDifference(
224
228
  );
225
229
 
226
230
  const digitChars: CharacterSet = characterSetCharRange('0', '9');
227
- const nonDigitChars: CharacterSet = characterSetComplement(digitChars);
231
+ const nonDigitChars: CharacterSet = characterSetDifference(bmpRange, digitChars);
228
232
 
229
233
  const wordChars: CharacterSet = [
230
234
  characterSetCharRange('a', 'z'),
@@ -232,7 +236,7 @@ const wordChars: CharacterSet = [
232
236
  characterSetCharRange('0', '9'),
233
237
  characterSetSingleton('_'),
234
238
  ].reduce(characterSetUnion);
235
- const nonWordChars: CharacterSet = characterSetComplement(wordChars);
239
+ const nonWordChars: CharacterSet = characterSetDifference(bmpRange, wordChars);
236
240
 
237
241
  const whiteSpaceChars: CharacterSet = [
238
242
  characterSetSingleton('\f'),
@@ -251,7 +255,7 @@ const whiteSpaceChars: CharacterSet = [
251
255
  characterSetSingleton('\u3000'),
252
256
  characterSetSingleton('\ufeff'),
253
257
  ].reduce(characterSetUnion);
254
- const nonWhiteSpaceChars: CharacterSet = characterSetComplement(whiteSpaceChars);
258
+ const nonWhiteSpaceChars: CharacterSet = characterSetDifference(bmpRange, whiteSpaceChars);
255
259
 
256
260
  // AST constructors
257
261
 
@@ -292,8 +296,8 @@ function captureGroup(inner: RegularExpression, name?: string): RegularExpressio
292
296
  return { type: 'capture-group', inner, name };
293
297
  }
294
298
 
295
- function lookahead(isPositive: boolean, inner: RegularExpression, right: RegularExpression): RegularExpression {
296
- return { type: 'lookahead', isPositive, inner, right };
299
+ function assertion(direction: AssertionDir, sign: AssertionSign, inner: RegularExpression, outer: RegularExpression): RegularExpression {
300
+ return { type: 'assertion', direction, sign, inner, outer };
297
301
  }
298
302
 
299
303
  function startAnchor(left: RegularExpression, right: RegularExpression): RegularExpression {
@@ -756,22 +760,24 @@ const nonCaptureGroupParser: Parser<RegularExpression, string> = promiseCompose(
756
760
  ([, inner]) => inner,
757
761
  );
758
762
 
759
- // Lookahead markers for internal use during parsing
760
- type LookaheadMarker = { type: 'lookahead-marker'; isPositive: boolean; inner: RegularExpression };
763
+ // Assertion markers for internal use during parsing
764
+ type AssertionMarker = { type: 'assertion-marker'; direction: AssertionDir; sign: AssertionSign; inner: RegularExpression };
761
765
 
762
766
  // Positive lookahead (?=...)
763
- const positiveLookaheadMarkerParser: Parser<LookaheadMarker, string> = createObjectParser({
764
- type: 'lookahead-marker' as const,
765
- isPositive: true as const,
767
+ const positiveLookaheadMarkerParser: Parser<AssertionMarker, string> = createObjectParser({
768
+ type: 'assertion-marker' as const,
769
+ direction: AssertionDir.AHEAD as const,
770
+ sign: AssertionSign.POSITIVE as const,
766
771
  _open: createExactSequenceParser('(?='),
767
772
  inner: createParserAccessorParser(() => alternationParser),
768
773
  _close: createExactSequenceParser(')'),
769
774
  });
770
775
 
771
776
  // Negative lookahead (?!...)
772
- const negativeLookaheadMarkerParser: Parser<LookaheadMarker, string> = createObjectParser({
773
- type: 'lookahead-marker' as const,
774
- isPositive: false as const,
777
+ const negativeLookaheadMarkerParser: Parser<AssertionMarker, string> = createObjectParser({
778
+ type: 'assertion-marker' as const,
779
+ direction: AssertionDir.AHEAD as const,
780
+ sign: AssertionSign.NEGATIVE as const,
775
781
  _open: createExactSequenceParser('(?!'),
776
782
  inner: createParserAccessorParser(() => alternationParser),
777
783
  _close: createExactSequenceParser(')'),
@@ -786,7 +792,7 @@ const groupParser: Parser<RegularExpression, string> = createUnionParser([
786
792
  // Anchors
787
793
  // Anchor markers for internal use during parsing
788
794
  type AnchorMarker = { type: 'start-anchor-marker' } | { type: 'end-anchor-marker' };
789
- type ParsedElement = RegularExpression | AnchorMarker | LookaheadMarker;
795
+ type ParsedElement = RegularExpression | AnchorMarker | AssertionMarker;
790
796
 
791
797
  const startAnchorMarkerParser: Parser<AnchorMarker, string> = createObjectParser({
792
798
  type: 'start-anchor-marker' as const,
@@ -847,9 +853,9 @@ function concatList(parts: RegularExpression[]): RegularExpression {
847
853
  return parts.reduceRight((acc, part) => concat(part, acc));
848
854
  }
849
855
 
850
- // Process elements with anchor markers and lookahead markers into proper AST
851
- // Handles anchors and lookahead as infix operators like @gruhn/regex-utils
852
- // Precedence order (lowest to highest): union -> start-anchor -> end-anchor -> lookahead -> concat
856
+ // Process elements with anchor markers and assertion markers into proper AST
857
+ // Handles anchors and assertions as infix operators like @gruhn/regex-utils
858
+ // Precedence order (lowest to highest): union -> start-anchor -> end-anchor -> assertion -> concat
853
859
  function processElements(elements: ParsedElement[]): RegularExpression {
854
860
  if (elements.length === 0) {
855
861
  return epsilon;
@@ -871,18 +877,31 @@ function processElements(elements: ParsedElement[]): RegularExpression {
871
877
  return endAnchor(processElements(left), processElements(right));
872
878
  }
873
879
 
874
- // Then lookaheads (higher precedence than anchors)
875
- const lookaheadIdx = elements.findIndex(e => 'type' in e && e.type === 'lookahead-marker');
876
- if (lookaheadIdx !== -1) {
877
- const marker = elements[lookaheadIdx] as LookaheadMarker;
878
- const left = elements.slice(0, lookaheadIdx);
879
- const right = elements.slice(lookaheadIdx + 1);
880
- const lookaheadExpr = lookahead(marker.isPositive, marker.inner, processElements(right));
881
- if (left.length === 0) {
882
- return lookaheadExpr;
880
+ // Then assertions (higher precedence than anchors)
881
+ // Special handling: Negative lookahead at the start with more content after it
882
+ // forms a concat with epsilon outer, instead of consuming everything into outer
883
+ const assertionIdx = elements.findIndex(e => 'type' in e && e.type === 'assertion-marker');
884
+ if (assertionIdx !== -1) {
885
+ const marker = elements[assertionIdx] as AssertionMarker;
886
+ const left = elements.slice(0, assertionIdx);
887
+ const right = elements.slice(assertionIdx + 1);
888
+
889
+ // Special case: Negative lookahead at the start followed by more content
890
+ // Creates concat instead of nesting
891
+ if (left.length === 0 && marker.sign === AssertionSign.NEGATIVE && right.length > 0) {
892
+ const assertionExpr = assertion(marker.direction, marker.sign, marker.inner, epsilon);
893
+ return concat(assertionExpr, processElements(right));
894
+ }
895
+
896
+ // Assertion after content: always concat with epsilon outer
897
+ if (left.length > 0) {
898
+ const assertionExpr = assertion(marker.direction, marker.sign, marker.inner, epsilon);
899
+ return concat(processElements(left), concat(assertionExpr, processElements(right)));
883
900
  }
884
- // If there's content before the lookahead, concatenate it
885
- return concat(processElements(left), lookaheadExpr);
901
+
902
+ // Assertion at start (not negative lookahead with content after): consume everything
903
+ const assertionExpr = assertion(marker.direction, marker.sign, marker.inner, processElements(right));
904
+ return assertionExpr;
886
905
  }
887
906
 
888
907
  // No markers, just regular expressions - concatenate them
@@ -8,7 +8,6 @@ import {
8
8
  import { createExactSequenceParser } from './exactSequenceParser.js';
9
9
  import { createObjectParser } from './objectParser.js';
10
10
  import { cloneParser, type Parser, setParserName } from './parser.js';
11
- import { type ParserContext } from './parserContext.js';
12
11
  import { promiseCompose } from './promiseCompose.js';
13
12
  import { createTupleParser } from './tupleParser.js';
14
13
  import { createUnionParser } from './unionParser.js';
@@ -18,6 +17,7 @@ import { createNonEmptyArrayParser } from './nonEmptyArrayParser.js';
18
17
  import { createOptionalParser } from './optionalParser.js';
19
18
  import { createNegativeLookaheadParser } from './negativeLookaheadParser.js';
20
19
  import { createSeparatedArrayParser } from './separatedArrayParser.js';
20
+ import { createPredicateElementParser } from './predicateElementParser.js';
21
21
  import { smaliMemberNameParser, smaliTypeDescriptorParser } from './dalvikExecutableParser/stringSyntaxParser.js';
22
22
  import { createDisjunctionParser } from './disjunctionParser.js';
23
23
  import { createSeparatedNonEmptyArrayParser } from './separatedNonEmptyArrayParser.js';
@@ -348,28 +348,16 @@ const smaliCharacterLiteralParser: Parser<number, string> = promiseCompose(
348
348
  setParserName(smaliCharacterLiteralParser, 'smaliCharacterLiteralParser');
349
349
 
350
350
  // Parser that matches identifier continuation characters (letters, digits, $, -, _)
351
- const smaliIdentifierContinuationParser: Parser<string, string> = async (parserContext: ParserContext<string, string>) => {
352
- const character = await parserContext.peek(0);
353
-
354
- parserContext.invariant(character !== undefined, 'Unexpected end of input');
355
-
356
- invariant(character !== undefined, 'Unexpected end of input');
357
-
358
- parserContext.invariant(
359
- (character >= 'a' && character <= 'z')
360
- || (character >= 'A' && character <= 'Z')
361
- || (character >= '0' && character <= '9')
362
- || character === '$'
363
- || character === '-'
364
- || character === '_',
365
- 'Expected identifier continuation character, got "%s"',
366
- character,
367
- );
368
-
369
- parserContext.skip(1);
370
-
371
- return character;
372
- };
351
+ const smaliIdentifierContinuationParser: Parser<string, string> = createPredicateElementParser(
352
+ function isSmaliIdentifierContinuation(character: string) {
353
+ return (character >= 'a' && character <= 'z')
354
+ || (character >= 'A' && character <= 'Z')
355
+ || (character >= '0' && character <= '9')
356
+ || character === '$'
357
+ || character === '-'
358
+ || character === '_';
359
+ },
360
+ );
373
361
 
374
362
  setParserName(smaliIdentifierContinuationParser, 'smaliIdentifierContinuationParser');
375
363
 
@@ -60,13 +60,19 @@ const symbolicExpressionStringParser: Parser<SymbolicExpressionString, string> =
60
60
  setParserName(symbolicExpressionStringParser, 'symbolicExpressionStringParser');
61
61
 
62
62
  // Atom parser: unquoted symbols (any chars except whitespace, parens, quotes, etc.)
63
- // Supports backslash escapes: \x becomes x, trailing \ becomes nothing
63
+ // Supports backslash escapes: \x becomes x
64
+ // Note: A lone backslash or one that produces an empty atom should fail parsing
64
65
  const symbolicExpressionAtomParser: Parser<SymbolicExpressionAtom, string> = promiseCompose(
65
- createRegExpParser(/(?:[^\s()"'`,;\\]|\\.)+\\?|\\$/),
66
+ createRegExpParser(/(?:[^\s()"'`,;\\]|\\.)+\\?/),
66
67
  match => {
67
68
  const raw = match[0];
68
- // Process backslash escapes: \x becomes x, trailing \ becomes nothing
69
+ // Process backslash escapes: \x becomes x
69
70
  const value = raw.replace(/\\(.?)/g, '$1');
71
+ // Reject atoms that result in empty strings
72
+ if (value.length === 0) {
73
+ throw new Error('Atom cannot be empty');
74
+ }
75
+
70
76
  return {
71
77
  type: 'atom' as const,
72
78
  value,