@futpib/parser 1.0.6 → 1.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/build/arbitraryBash.d.ts +3 -0
  2. package/build/arbitraryBash.js +142 -0
  3. package/build/arbitraryJavaScript.js +4 -4
  4. package/build/arbitraryZipStream.d.ts +1 -1
  5. package/build/bashParser.js +317 -75
  6. package/build/bashParser.test.js +71 -0
  7. package/build/bashUnparser.d.ts +3 -0
  8. package/build/bashUnparser.js +157 -0
  9. package/build/bashUnparser.test.d.ts +1 -0
  10. package/build/bashUnparser.test.js +24 -0
  11. package/build/bsonParser.js +3 -3
  12. package/build/dalvikExecutableParser/stringSyntaxParser.js +20 -31
  13. package/build/dalvikExecutableParser.js +2 -5
  14. package/build/hasExecutable.js +1 -1
  15. package/build/index.d.ts +1 -0
  16. package/build/index.js +1 -0
  17. package/build/jsonParser.js +2 -7
  18. package/build/predicateElementParser.d.ts +3 -0
  19. package/build/predicateElementParser.js +10 -0
  20. package/build/regularExpression.d.ts +12 -3
  21. package/build/regularExpression.js +10 -1
  22. package/build/regularExpressionParser.js +39 -25
  23. package/build/regularExpressionParser.test.js +2 -2
  24. package/build/smaliParser.js +5 -9
  25. package/build/symbolicExpressionParser.js +8 -3
  26. package/package.json +9 -9
  27. package/readme.md +468 -7
  28. package/src/arbitraryBash.ts +237 -0
  29. package/src/arbitraryJavaScript.ts +4 -4
  30. package/src/bashParser.test.ts +138 -0
  31. package/src/bashParser.ts +467 -139
  32. package/src/bashUnparser.test.ts +37 -0
  33. package/src/bashUnparser.ts +211 -0
  34. package/src/bsonParser.ts +4 -7
  35. package/src/dalvikExecutableParser/stringSyntaxParser.ts +27 -74
  36. package/src/dalvikExecutableParser.ts +4 -10
  37. package/src/hasExecutable.ts +1 -1
  38. package/src/index.ts +4 -0
  39. package/src/jsonParser.ts +2 -11
  40. package/src/predicateElementParser.ts +22 -0
  41. package/src/regularExpression.ts +11 -1
  42. package/src/regularExpressionParser.test.ts +3 -3
  43. package/src/regularExpressionParser.ts +49 -30
  44. package/src/smaliParser.ts +11 -23
  45. package/src/symbolicExpressionParser.ts +9 -3
@@ -0,0 +1,37 @@
1
+ import { testProp } from '@fast-check/ava';
2
+ import { arbitraryBashCommandList } from './arbitraryBash.js';
3
+ import { bashScriptUnparser } from './bashUnparser.js';
4
+ import { bashScriptParser } from './bashParser.js';
5
+ import { runParser } from './parser.js';
6
+ import { runUnparser } from './unparser.js';
7
+ import { stringParserInputCompanion } from './parserInputCompanion.js';
8
+ import { stringUnparserOutputCompanion } from './unparserOutputCompanion.js';
9
+
10
+ const seed = process.env.SEED ? Number(process.env.SEED) : undefined;
11
+
12
+ async function collectString(asyncIterable: AsyncIterable<string>): Promise<string> {
13
+ let result = '';
14
+ for await (const chunk of asyncIterable) {
15
+ result += chunk;
16
+ }
17
+
18
+ return result;
19
+ }
20
+
21
+ testProp(
22
+ 'bash roundtrip',
23
+ [arbitraryBashCommandList],
24
+ async (t, command) => {
25
+ const source = await collectString(runUnparser(
26
+ bashScriptUnparser, command, stringUnparserOutputCompanion));
27
+
28
+ const reparsed = await runParser(
29
+ bashScriptParser, source, stringParserInputCompanion);
30
+
31
+ t.deepEqual(reparsed, command);
32
+ },
33
+ {
34
+ verbose: true,
35
+ seed,
36
+ },
37
+ );
@@ -0,0 +1,211 @@
1
+ import { type Unparser } from './unparser.js';
2
+ import {
3
+ type BashWord,
4
+ type BashWordPart,
5
+ type BashSimpleCommand,
6
+ type BashSubshell,
7
+ type BashBraceGroup,
8
+ type BashCommandUnit,
9
+ type BashPipeline,
10
+ type BashCommandList,
11
+ type BashRedirect,
12
+ type BashAssignment,
13
+ type BashCommand,
14
+ } from './bash.js';
15
+
16
+ function isIdentChar(ch: string): boolean {
17
+ return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9') || ch === '_';
18
+ }
19
+
20
+ function nextPartStartsWithIdentChar(parts: BashWordPart[], index: number): boolean {
21
+ const next = parts[index + 1];
22
+ if (next === undefined) {
23
+ return false;
24
+ }
25
+
26
+ if (next.type === 'literal') {
27
+ return next.value.length > 0 && isIdentChar(next.value[0]!);
28
+ }
29
+
30
+ return false;
31
+ }
32
+
33
+ function unparseWord(word: BashWord): string {
34
+ return word.parts.map((part, i) => unparseWordPartInContext(part, word.parts, i)).join('');
35
+ }
36
+
37
+ function unparseWordPartInContext(part: BashWordPart, parts: BashWordPart[], index: number): string {
38
+ return unparseWordPart(part);
39
+ }
40
+
41
+ function unparseWordPart(part: BashWordPart): string {
42
+ switch (part.type) {
43
+ case 'literal':
44
+ return escapeLiteral(part.value);
45
+
46
+ case 'singleQuoted':
47
+ return "'" + part.value + "'";
48
+
49
+ case 'doubleQuoted':
50
+ return '"' + part.parts.map(p => unparseDoubleQuotedPart(p)).join('') + '"';
51
+
52
+ case 'variable':
53
+ return '$' + part.name;
54
+
55
+ case 'variableBraced': {
56
+ let result = '${' + part.name;
57
+ if (part.operator !== undefined) {
58
+ result += part.operator;
59
+ if (part.operand !== undefined) {
60
+ result += unparseWord(part.operand);
61
+ }
62
+ }
63
+
64
+ result += '}';
65
+ return result;
66
+ }
67
+
68
+ case 'commandSubstitution':
69
+ return '$( ' + unparseCommand(part.command) + ' )';
70
+
71
+ case 'backtickSubstitution':
72
+ return '`' + unparseCommand(part.command) + '`';
73
+
74
+ case 'arithmeticExpansion':
75
+ return '$((' + part.expression + '))';
76
+
77
+ case 'processSubstitution':
78
+ return part.direction + '(' + unparseCommand(part.command) + ')';
79
+ }
80
+ }
81
+
82
+ function unparseDoubleQuotedPart(part: BashWordPart): string {
83
+ switch (part.type) {
84
+ case 'literal': {
85
+ let result = '';
86
+ for (const ch of part.value) {
87
+ if (ch === '\\' || ch === '$' || ch === '`' || ch === '"') {
88
+ result += '\\' + ch;
89
+ } else {
90
+ result += ch;
91
+ }
92
+ }
93
+
94
+ return result;
95
+ }
96
+
97
+ default:
98
+ return unparseWordPart(part);
99
+ }
100
+ }
101
+
102
+ function escapeLiteral(value: string): string {
103
+ let result = '';
104
+ for (const ch of value) {
105
+ if (' \t\n|&;<>()$`"\' \\'.includes(ch) || ch === '{' || ch === '}' || ch === '#') {
106
+ result += '\\' + ch;
107
+ } else {
108
+ result += ch;
109
+ }
110
+ }
111
+
112
+ return result;
113
+ }
114
+
115
+ function unparseRedirect(redirect: BashRedirect): string {
116
+ let result = '';
117
+ if (redirect.fd !== undefined) {
118
+ result += String(redirect.fd);
119
+ }
120
+
121
+ result += redirect.operator;
122
+ if ('type' in redirect.target && redirect.target.type === 'hereDoc') {
123
+ result += redirect.target.delimiter;
124
+ } else {
125
+ result += unparseWord(redirect.target as BashWord);
126
+ }
127
+
128
+ return result;
129
+ }
130
+
131
+ function unparseAssignment(assignment: BashAssignment): string {
132
+ let result = assignment.name + '=';
133
+ if (assignment.value !== undefined) {
134
+ result += unparseWord(assignment.value);
135
+ }
136
+
137
+ return result;
138
+ }
139
+
140
+ function unparseSimpleCommand(cmd: BashSimpleCommand): string {
141
+ const parts: string[] = [];
142
+
143
+ for (const assignment of cmd.assignments) {
144
+ parts.push(unparseAssignment(assignment));
145
+ }
146
+
147
+ if (cmd.name !== undefined) {
148
+ parts.push(unparseWord(cmd.name));
149
+ }
150
+
151
+ for (const arg of cmd.args) {
152
+ parts.push(unparseWord(arg));
153
+ }
154
+
155
+ const wordParts = parts.join(' ');
156
+ const redirectParts = cmd.redirects.map(r => unparseRedirect(r)).join(' ');
157
+
158
+ if (redirectParts) {
159
+ return wordParts ? wordParts + ' ' + redirectParts : redirectParts;
160
+ }
161
+
162
+ return wordParts;
163
+ }
164
+
165
+ function unparseCommandUnit(unit: BashCommandUnit): string {
166
+ switch (unit.type) {
167
+ case 'simple':
168
+ return unparseSimpleCommand(unit);
169
+
170
+ case 'subshell':
171
+ return '(' + unparseCommand(unit.body) + ')';
172
+
173
+ case 'braceGroup':
174
+ return '{ ' + unparseCommand(unit.body) + ' }';
175
+ }
176
+ }
177
+
178
+ function unparsePipeline(pipeline: BashPipeline): string {
179
+ let result = '';
180
+ if (pipeline.negated) {
181
+ result += '! ';
182
+ }
183
+
184
+ result += pipeline.commands.map(cmd => unparseCommandUnit(cmd)).join(' | ');
185
+ return result;
186
+ }
187
+
188
+ function unparseCommand(command: BashCommand): string {
189
+ return unparseCommandList(command);
190
+ }
191
+
192
+ function unparseCommandList(list: BashCommandList): string {
193
+ let result = '';
194
+ for (let i = 0; i < list.entries.length; i++) {
195
+ const entry = list.entries[i]!;
196
+ if (i > 0) {
197
+ result += ' ';
198
+ }
199
+
200
+ result += unparsePipeline(entry.pipeline);
201
+ if (entry.separator !== undefined) {
202
+ result += entry.separator;
203
+ }
204
+ }
205
+
206
+ return result;
207
+ }
208
+
209
+ export const bashScriptUnparser: Unparser<BashCommand, string> = async function * (command) {
210
+ yield unparseCommand(command);
211
+ };
package/src/bsonParser.ts CHANGED
@@ -6,10 +6,10 @@ import { createTupleParser } from './tupleParser.js';
6
6
  import { createSkipParser } from './skipParser.js';
7
7
  import { createParserAccessorParser } from './parserAccessorParser.js';
8
8
  import { createTerminatedArrayParser } from './terminatedArrayParser.js';
9
- import { createElementParser } from './elementParser.js';
10
9
  import { createExactElementParser } from './exactElementParser.js';
11
10
  import { createUnionParser } from './unionParser.js';
12
11
  import { parserCreatorCompose } from './parserCreatorCompose.js';
12
+ import { createPredicateElementParser } from './predicateElementParser.js';
13
13
 
14
14
  const createFixedLengthBufferParser = (length: number): Parser<Buffer, Uint8Array> => promiseCompose(createFixedLengthSequenceParser<Uint8Array>(length), sequence => Buffer.from(sequence));
15
15
 
@@ -17,16 +17,13 @@ const buffer1Parser = createFixedLengthBufferParser(1);
17
17
  const buffer4Parser = createFixedLengthBufferParser(4);
18
18
  const buffer8Parser = createFixedLengthBufferParser(8);
19
19
 
20
- const elementParser: Parser<number, Uint8Array> = createElementParser();
21
-
22
20
  const nullByteParser: Parser<number, Uint8Array> = createExactElementParser(0);
23
21
 
22
+ const nonNullByteParser: Parser<number, Uint8Array> = createPredicateElementParser((byte: number) => byte !== 0);
23
+
24
24
  const cstringParser: Parser<string, Uint8Array> = promiseCompose(
25
25
  createTerminatedArrayParser(
26
- parserCreatorCompose(
27
- () => elementParser,
28
- (byte: number) => async parserContext => parserContext.invariant(byte, 'Expected non-null byte'),
29
- )(),
26
+ nonNullByteParser,
30
27
  nullByteParser,
31
28
  ),
32
29
  ([ sequence ]) => Buffer.from(sequence).toString('utf8'),
@@ -1,84 +1,37 @@
1
- import invariant from 'invariant';
2
1
  import { type Parser, setParserName } from '../parser.js';
3
- import { type ParserContext } from '../parserContext.js';
4
2
  import { promiseCompose } from '../promiseCompose.js';
5
3
  import { createSeparatedArrayParser } from '../separatedArrayParser.js';
6
4
  import { createExactSequenceParser } from '../exactSequenceParser.js';
7
5
  import { createUnionParser } from '../unionParser.js';
8
6
  import { createTupleParser } from '../tupleParser.js';
9
7
  import { createArrayParser } from '../arrayParser.js';
10
-
11
- export const smaliSimpleNameParser: Parser<string, string> = async (parserContext: ParserContext<string, string>) => {
12
- const characters: string[] = [];
13
-
14
- while (true) {
15
- const character = await parserContext.peek(0);
16
-
17
- parserContext.invariant(character !== undefined, 'Unexpected end of input');
18
-
19
- invariant(character !== undefined, 'Unexpected end of input');
20
-
21
- if (
22
- (
23
- character >= 'a' && character <= 'z'
24
- )
25
- || (
26
- character >= 'A' && character <= 'Z'
27
- )
28
- || (
29
- character >= '0' && character <= '9'
30
- )
31
- || (
32
- character === ' '
33
- )
34
- || (
35
- character === '$'
36
- )
37
- || (
38
- character === '-'
39
- )
40
- || (
41
- character === '_'
42
- )
43
- || (
44
- character === '\u00A0'
45
- )
46
- || (
47
- character >= '\u00A1' && character <= '\u1FFF'
48
- )
49
- || (
50
- character >= '\u2000' && character <= '\u200A'
51
- )
52
- || (
53
- character >= '\u2010' && character <= '\u2027'
54
- )
55
- || (
56
- character === '\u202F'
57
- )
58
- || (
59
- character >= '\u2030' && character <= '\uD7FF'
60
- )
61
- || (
62
- character >= '\uE000' && character <= '\uFFEF'
63
- )
64
- || (
65
- character >= '\uD800' && character <= '\uDBFF'
66
- )
67
- ) {
68
- parserContext.skip(1);
69
-
70
- characters.push(character);
71
-
72
- continue;
73
- }
74
-
75
- parserContext.invariant(characters.length > 0, 'Expected at least one character');
76
-
77
- break;
78
- }
79
-
80
- return characters.join('');
81
- };
8
+ import { createNonEmptyArrayParser } from '../nonEmptyArrayParser.js';
9
+ import { createPredicateElementParser } from '../predicateElementParser.js';
10
+
11
+ function isSmaliSimpleNameChar(character: string): boolean {
12
+ return (
13
+ (character >= 'a' && character <= 'z')
14
+ || (character >= 'A' && character <= 'Z')
15
+ || (character >= '0' && character <= '9')
16
+ || character === ' '
17
+ || character === '$'
18
+ || character === '-'
19
+ || character === '_'
20
+ || character === '\u00A0'
21
+ || (character >= '\u00A1' && character <= '\u1FFF')
22
+ || (character >= '\u2000' && character <= '\u200A')
23
+ || (character >= '\u2010' && character <= '\u2027')
24
+ || character === '\u202F'
25
+ || (character >= '\u2030' && character <= '\uD7FF')
26
+ || (character >= '\uE000' && character <= '\uFFEF')
27
+ || (character >= '\uD800' && character <= '\uDBFF')
28
+ );
29
+ }
30
+
31
+ export const smaliSimpleNameParser: Parser<string, string> = promiseCompose(
32
+ createNonEmptyArrayParser(createPredicateElementParser(isSmaliSimpleNameChar)),
33
+ characters => characters.join(''),
34
+ );
82
35
 
83
36
  setParserName(smaliSimpleNameParser, 'smaliSimpleNameParser');
84
37
 
@@ -13,6 +13,7 @@ import { createTupleParser } from './tupleParser.js';
13
13
  import { createParserAccessorParser } from './parserAccessorParser.js';
14
14
  import { createSkipToParser } from './skipToParser.js';
15
15
  import { createLookaheadParser } from './lookaheadParser.js';
16
+ import { createPredicateElementParser } from './predicateElementParser.js';
16
17
  import {
17
18
  getIsoTypedNumberArray,
18
19
  type IndexIntoFieldIds,
@@ -843,16 +844,9 @@ type DalvikExecutableTaggedEncodedValue =
843
844
  | { type: 'boolean'; value: boolean };
844
845
 
845
846
  const createByteWith5LeastSignificantBitsEqualParser = (leastSignificant5: number): Parser<number, Uint8Array> => {
846
- const byteWith5LeastSignificantBitsEqualParser: Parser<number, Uint8Array> = async parserContext => {
847
- const byte = await parserContext.read(0);
848
- parserContext.invariant(
849
- (byte & 0b0001_1111) === leastSignificant5,
850
- 'Expected byte with 5 least significant bits equal to %s, but got %s',
851
- leastSignificant5.toString(2).padStart(8, '0'),
852
- byte.toString(2).padStart(8, '0'),
853
- );
854
- return byte;
855
- };
847
+ const byteWith5LeastSignificantBitsEqualParser: Parser<number, Uint8Array> = createPredicateElementParser(
848
+ (byte: number) => (byte & 0b0001_1111) === leastSignificant5,
849
+ );
856
850
 
857
851
  setParserName(byteWith5LeastSignificantBitsEqualParser, `createByteWith5LeastSignificantBitsEqualParser(${leastSignificant5.toString(2).padStart(5, '0')})`);
858
852
 
@@ -1,7 +1,7 @@
1
1
  import { execa } from 'execa';
2
2
 
3
3
  export async function hasExecutable(executable: string) {
4
- const hasExecutable = execa(executable).catch(() => false).then(() => true);
4
+ const hasExecutable = await execa(executable).then(() => true).catch(() => false);
5
5
 
6
6
  if (!hasExecutable) {
7
7
  console.warn('Executable %o not found', executable);
package/src/index.ts CHANGED
@@ -96,6 +96,10 @@ export {
96
96
  createElementParser,
97
97
  } from './elementParser.js';
98
98
 
99
+ export {
100
+ createPredicateElementParser,
101
+ } from './predicateElementParser.js';
102
+
99
103
  export {
100
104
  createTerminatedArrayParser,
101
105
  } from './terminatedArrayParser.js';
package/src/jsonParser.ts CHANGED
@@ -11,8 +11,7 @@ import { createDisjunctionParser } from './disjunctionParser.js';
11
11
  import { createTerminatedArrayParser } from './terminatedArrayParser.js';
12
12
  import { createArrayParser } from './arrayParser.js';
13
13
  import { createParserAccessorParser } from './parserAccessorParser.js';
14
- import { createElementParser } from './elementParser.js';
15
- import { parserCreatorCompose } from './parserCreatorCompose.js';
14
+ import { createPredicateElementParser } from './predicateElementParser.js';
16
15
  import { createSeparatedArrayParser } from './separatedArrayParser.js';
17
16
  import { createRegExpParser } from './regexpParser.js';
18
17
 
@@ -52,17 +51,9 @@ const jsonStringEscapeSequenceParser: Parser<string, string> = createUnionParser
52
51
  jsonUnicodeEscapeSequenceParser,
53
52
  ]);
54
53
 
55
- const elementParser: Parser<string, string> = createElementParser();
56
-
57
54
  const jsonStringCharacterParser: Parser<string, string> = createDisjunctionParser([
58
55
  jsonStringEscapeSequenceParser,
59
- parserCreatorCompose(
60
- () => elementParser,
61
- character => async parserContext => {
62
- parserContext.invariant(character !== '"', 'Unexpected """');
63
- return character;
64
- },
65
- )(),
56
+ createPredicateElementParser((character: string) => character !== '"'),
66
57
  ]);
67
58
 
68
59
  export const jsonStringParser: Parser<string, string> = promiseCompose(
@@ -0,0 +1,22 @@
1
+ import { setParserName, type Parser } from './parser.js';
2
+ import { type DeriveSequenceElement } from './sequence.js';
3
+
4
+ export const createPredicateElementParser = <Sequence, Element = DeriveSequenceElement<Sequence>>(
5
+ predicate: (element: Element) => boolean,
6
+ ): Parser<Element, Sequence, Element> => {
7
+ const predicateElementParser: Parser<Element, Sequence, Element> = async parserContext => {
8
+ const element = await parserContext.read(0);
9
+
10
+ parserContext.invariant(
11
+ predicate(element),
12
+ 'Element does not match predicate: %s',
13
+ element,
14
+ );
15
+
16
+ return element;
17
+ };
18
+
19
+ setParserName(predicateElementParser, `createPredicateElementParser(${predicate.name || 'anonymous'})`);
20
+
21
+ return predicateElementParser;
22
+ };
@@ -9,6 +9,16 @@ export type CharacterSet =
9
9
 
10
10
  export type RepeatBounds = number | { min: number; max?: number } | { min?: number; max: number };
11
11
 
12
+ export enum AssertionSign {
13
+ POSITIVE = 0,
14
+ NEGATIVE = 1,
15
+ }
16
+
17
+ export enum AssertionDir {
18
+ AHEAD = 0,
19
+ BEHIND = 1,
20
+ }
21
+
12
22
  export type RegularExpression =
13
23
  | { type: 'epsilon' }
14
24
  | { type: 'literal'; charset: CharacterSet }
@@ -19,6 +29,6 @@ export type RegularExpression =
19
29
  | { type: 'optional'; inner: RegularExpression }
20
30
  | { type: 'repeat'; inner: RegularExpression; bounds: RepeatBounds }
21
31
  | { type: 'capture-group'; inner: RegularExpression; name?: string }
22
- | { type: 'lookahead'; isPositive: boolean; inner: RegularExpression; right: RegularExpression }
32
+ | { type: 'assertion'; direction: AssertionDir; sign: AssertionSign; inner: RegularExpression; outer: RegularExpression }
23
33
  | { type: 'start-anchor'; left: RegularExpression; right: RegularExpression }
24
34
  | { type: 'end-anchor'; left: RegularExpression; right: RegularExpression };
@@ -9,7 +9,7 @@ import { parseRegExpString } from '../node_modules/@gruhn/regex-utils/dist/regex
9
9
  import { runParser } from './parser.js';
10
10
  import { stringParserInputCompanion } from './parserInputCompanion.js';
11
11
  import { arbitrarilySlicedAsyncIterator } from './arbitrarilySlicedAsyncInterator.js';
12
- import type { RegularExpression, CharacterSet } from './regularExpression.js';
12
+ import { AssertionDir, AssertionSign, type RegularExpression, type CharacterSet } from './regularExpression.js';
13
13
 
14
14
  // Normalize AST for comparison - removes hashes from CharSets and normalizes structure
15
15
  function normalizeCharacterSet(charset: CharacterSet): CharacterSet {
@@ -47,8 +47,8 @@ function normalizeRegularExpression(ast: RegularExpression): RegularExpression {
47
47
  return { type: 'capture-group', inner: normalizeRegularExpression(ast.inner), name: ast.name };
48
48
  }
49
49
  return { type: 'capture-group', inner: normalizeRegularExpression(ast.inner) };
50
- case 'lookahead':
51
- return { type: 'lookahead', isPositive: ast.isPositive, inner: normalizeRegularExpression(ast.inner), right: normalizeRegularExpression(ast.right) };
50
+ case 'assertion':
51
+ return { type: 'assertion', direction: ast.direction, sign: ast.sign, inner: normalizeRegularExpression(ast.inner), outer: normalizeRegularExpression(ast.outer) };
52
52
  case 'start-anchor':
53
53
  return { type: 'start-anchor', left: normalizeRegularExpression(ast.left), right: normalizeRegularExpression(ast.right) };
54
54
  case 'end-anchor':