@futpib/parser 1.0.6 → 1.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/build/arbitraryBash.d.ts +3 -0
  2. package/build/arbitraryBash.js +142 -0
  3. package/build/arbitraryJavaScript.js +4 -4
  4. package/build/arbitraryZipStream.d.ts +1 -1
  5. package/build/bashParser.js +317 -75
  6. package/build/bashParser.test.js +71 -0
  7. package/build/bashUnparser.d.ts +3 -0
  8. package/build/bashUnparser.js +157 -0
  9. package/build/bashUnparser.test.d.ts +1 -0
  10. package/build/bashUnparser.test.js +24 -0
  11. package/build/bsonParser.js +3 -3
  12. package/build/dalvikExecutableParser/stringSyntaxParser.js +20 -31
  13. package/build/dalvikExecutableParser.js +2 -5
  14. package/build/hasExecutable.js +1 -1
  15. package/build/index.d.ts +1 -0
  16. package/build/index.js +1 -0
  17. package/build/jsonParser.js +2 -7
  18. package/build/predicateElementParser.d.ts +3 -0
  19. package/build/predicateElementParser.js +10 -0
  20. package/build/regularExpression.d.ts +12 -3
  21. package/build/regularExpression.js +10 -1
  22. package/build/regularExpressionParser.js +39 -25
  23. package/build/regularExpressionParser.test.js +2 -2
  24. package/build/smaliParser.js +5 -9
  25. package/build/symbolicExpressionParser.js +8 -3
  26. package/package.json +9 -9
  27. package/readme.md +468 -7
  28. package/src/arbitraryBash.ts +237 -0
  29. package/src/arbitraryJavaScript.ts +4 -4
  30. package/src/bashParser.test.ts +138 -0
  31. package/src/bashParser.ts +467 -139
  32. package/src/bashUnparser.test.ts +37 -0
  33. package/src/bashUnparser.ts +211 -0
  34. package/src/bsonParser.ts +4 -7
  35. package/src/dalvikExecutableParser/stringSyntaxParser.ts +27 -74
  36. package/src/dalvikExecutableParser.ts +4 -10
  37. package/src/hasExecutable.ts +1 -1
  38. package/src/index.ts +4 -0
  39. package/src/jsonParser.ts +2 -11
  40. package/src/predicateElementParser.ts +22 -0
  41. package/src/regularExpression.ts +11 -1
  42. package/src/regularExpressionParser.test.ts +3 -3
  43. package/src/regularExpressionParser.ts +49 -30
  44. package/src/smaliParser.ts +11 -23
  45. package/src/symbolicExpressionParser.ts +9 -3
@@ -0,0 +1,237 @@
1
+ import * as fc from 'fast-check';
2
+ import {
3
+ type BashWord,
4
+ type BashWordPart,
5
+ type BashWordPartLiteral,
6
+ type BashWordPartSingleQuoted,
7
+ type BashWordPartDoubleQuoted,
8
+ type BashWordPartVariable,
9
+ type BashWordPartVariableBraced,
10
+ type BashWordPartArithmeticExpansion,
11
+ type BashSimpleCommand,
12
+ type BashSubshell,
13
+ type BashBraceGroup,
14
+ type BashCommandUnit,
15
+ type BashPipeline,
16
+ type BashCommandList,
17
+ type BashRedirect,
18
+ type BashAssignment,
19
+ } from './bash.js';
20
+
21
+ const arbitraryBashIdentifier: fc.Arbitrary<string> = fc.stringMatching(/^[a-zA-Z_][a-zA-Z0-9_]*$/);
22
+
23
+ // Safe unquoted literal: no shell special chars, no leading {/} or #, no = (would be parsed as assignment)
24
+ const arbitraryBashWordPartLiteral: fc.Arbitrary<BashWordPartLiteral> = fc.record({
25
+ type: fc.constant('literal' as const),
26
+ value: fc.stringMatching(/^[a-zA-Z0-9][a-zA-Z0-9._@%,:^~-]*$/),
27
+ });
28
+
29
+ // Single-quoted: no single quotes, no newlines inside (keep simple)
30
+ const arbitraryBashWordPartSingleQuoted: fc.Arbitrary<BashWordPartSingleQuoted> = fc.record({
31
+ type: fc.constant('singleQuoted' as const),
32
+ value: fc.stringMatching(/^[^'\n]*$/),
33
+ });
34
+
35
+ const arbitraryBashWordPartVariable: fc.Arbitrary<BashWordPartVariable> = fc.record({
36
+ type: fc.constant('variable' as const),
37
+ name: arbitraryBashIdentifier,
38
+ });
39
+
40
+ // variableBraced without operator/operand (always include the optional keys so deepEqual matches parser output)
41
+ const arbitraryBashWordPartVariableBraced: fc.Arbitrary<BashWordPartVariableBraced> = fc.record({
42
+ type: fc.constant('variableBraced' as const),
43
+ name: arbitraryBashIdentifier,
44
+ operator: fc.constant(undefined),
45
+ operand: fc.constant(undefined),
46
+ });
47
+
48
+ const arbitraryBashWordPartArithmeticExpansion: fc.Arbitrary<BashWordPartArithmeticExpansion> = fc.record({
49
+ type: fc.constant('arithmeticExpansion' as const),
50
+ expression: fc.stringMatching(/^[0-9+\- ]*$/),
51
+ });
52
+
53
+ type RecursiveArbitraries = {
54
+ commandList: BashCommandList;
55
+ };
56
+
57
+ const recursiveArbitraries = fc.letrec<RecursiveArbitraries>(tie => {
58
+ const arbitraryCommandList = tie('commandList') as fc.Arbitrary<BashCommandList>;
59
+
60
+ // Double-quoted literal: no shell-special chars inside double quotes
61
+ const arbitraryDoubleQuotedLiteral: fc.Arbitrary<BashWordPartLiteral> = fc.record({
62
+ type: fc.constant('literal' as const),
63
+ value: fc.stringMatching(/^[^"\\$`\n]+$/),
64
+ });
65
+
66
+ const arbitraryBashWordPartDoubleQuoted: fc.Arbitrary<BashWordPartDoubleQuoted> = fc.record({
67
+ type: fc.constant('doubleQuoted' as const),
68
+ parts: fc.array(
69
+ fc.oneof(
70
+ { weight: 3, arbitrary: arbitraryDoubleQuotedLiteral as fc.Arbitrary<BashWordPart> },
71
+ { weight: 1, arbitrary: arbitraryBashWordPartVariable as fc.Arbitrary<BashWordPart> },
72
+ { weight: 1, arbitrary: arbitraryBashWordPartVariableBraced as fc.Arbitrary<BashWordPart> },
73
+ ),
74
+ { minLength: 1, maxLength: 3 },
75
+ ),
76
+ }).filter(dq =>
77
+ dq.parts.every((part, i) => {
78
+ const next = dq.parts[i + 1];
79
+ // Prevent adjacent literal parts (they merge when re-parsed)
80
+ if (part.type === 'literal' && next !== undefined && next.type === 'literal') {
81
+ return false;
82
+ }
83
+
84
+ // Prevent $var followed by literal starting with ident char (would be mis-parsed as one variable)
85
+ if (part.type === 'variable' && next !== undefined && next.type === 'literal') {
86
+ return next.value.length === 0 || !isIdentChar(next.value[0]!);
87
+ }
88
+
89
+ return true;
90
+ }),
91
+ );
92
+
93
+ const arbitraryBashWordPartCommandSubstitution = fc.record({
94
+ type: fc.constant('commandSubstitution' as const),
95
+ command: arbitraryCommandList,
96
+ });
97
+
98
+ const arbitraryBashWordPart: fc.Arbitrary<BashWordPart> = fc.oneof(
99
+ { weight: 4, arbitrary: arbitraryBashWordPartLiteral as fc.Arbitrary<BashWordPart> },
100
+ { weight: 2, arbitrary: arbitraryBashWordPartSingleQuoted as fc.Arbitrary<BashWordPart> },
101
+ { weight: 2, arbitrary: arbitraryBashWordPartDoubleQuoted as fc.Arbitrary<BashWordPart> },
102
+ { weight: 2, arbitrary: arbitraryBashWordPartVariable as fc.Arbitrary<BashWordPart> },
103
+ { weight: 1, arbitrary: arbitraryBashWordPartVariableBraced as fc.Arbitrary<BashWordPart> },
104
+ { weight: 1, arbitrary: arbitraryBashWordPartArithmeticExpansion as fc.Arbitrary<BashWordPart> },
105
+ { weight: 1, arbitrary: arbitraryBashWordPartCommandSubstitution as fc.Arbitrary<BashWordPart> },
106
+ );
107
+
108
+ function isIdentChar(ch: string): boolean {
109
+ return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9') || ch === '_';
110
+ }
111
+
112
+ const arbitraryWord: fc.Arbitrary<BashWord> = fc.record({
113
+ parts: fc.array(arbitraryBashWordPart, { minLength: 1, maxLength: 2 }),
114
+ }).filter(word =>
115
+ word.parts.every((part, i) => {
116
+ const next = word.parts[i + 1];
117
+ // Prevent adjacent literal parts (they merge when re-parsed)
118
+ if (part.type === 'literal' && next !== undefined && next.type === 'literal') {
119
+ return false;
120
+ }
121
+
122
+ // Prevent $var followed by literal starting with ident char (would be mis-parsed as one variable)
123
+ if (part.type === 'variable' && next !== undefined && next.type === 'literal') {
124
+ return next.value.length === 0 || !isIdentChar(next.value[0]!);
125
+ }
126
+
127
+ return true;
128
+ }),
129
+ );
130
+
131
+ // Always include value key (even if undefined) to match createObjectParser behavior
132
+ const arbitraryBashAssignment: fc.Arbitrary<BashAssignment> = fc.record({
133
+ name: arbitraryBashIdentifier,
134
+ value: fc.option(arbitraryWord, { nil: undefined }),
135
+ });
136
+
137
+ // Always include fd key (even if undefined) to match createObjectParser behavior
138
+ const arbitraryBashRedirect: fc.Arbitrary<BashRedirect> = fc.record({
139
+ fd: fc.constant(undefined),
140
+ operator: fc.oneof(
141
+ fc.constant('>' as const),
142
+ fc.constant('>>' as const),
143
+ fc.constant('<' as const),
144
+ ),
145
+ target: arbitraryWord,
146
+ });
147
+
148
+ const arbitraryBashSimpleCommandWithName: fc.Arbitrary<BashSimpleCommand> = fc.record({
149
+ type: fc.constant('simple' as const),
150
+ name: arbitraryWord,
151
+ args: fc.array(arbitraryWord, { maxLength: 2 }),
152
+ redirects: fc.array(arbitraryBashRedirect, { maxLength: 1 }),
153
+ assignments: fc.array(arbitraryBashAssignment, { maxLength: 1 }),
154
+ });
155
+
156
+ // Commands with no name: only assignments and/or redirects (no args)
157
+ const arbitraryBashSimpleCommandNoName: fc.Arbitrary<BashSimpleCommand> = fc.record({
158
+ type: fc.constant('simple' as const),
159
+ name: fc.constant(undefined),
160
+ args: fc.constant([]),
161
+ redirects: fc.array(arbitraryBashRedirect, { maxLength: 1 }),
162
+ assignments: fc.array(arbitraryBashAssignment, { minLength: 1, maxLength: 2 }),
163
+ });
164
+
165
+ const arbitraryBashSimpleCommand: fc.Arbitrary<BashSimpleCommand> = fc.oneof(
166
+ { weight: 4, arbitrary: arbitraryBashSimpleCommandWithName },
167
+ { weight: 1, arbitrary: arbitraryBashSimpleCommandNoName },
168
+ );
169
+
170
+ const arbitraryBashSubshell: fc.Arbitrary<BashSubshell> = fc.record({
171
+ type: fc.constant('subshell' as const),
172
+ body: arbitraryCommandList,
173
+ });
174
+
175
+ // Brace group bodies need trailing ';' on last entry (required by "{ cmd; }" syntax)
176
+ const arbitraryBraceGroupBody: fc.Arbitrary<BashCommandList> = arbitraryCommandList.map(list => {
177
+ const entries = list.entries.map((entry, i) => {
178
+ if (i === list.entries.length - 1 && entry.separator === undefined) {
179
+ return { pipeline: entry.pipeline, separator: ';' as const };
180
+ }
181
+
182
+ return entry;
183
+ });
184
+ return { ...list, entries };
185
+ });
186
+
187
+ const arbitraryBashBraceGroup: fc.Arbitrary<BashBraceGroup> = fc.record({
188
+ type: fc.constant('braceGroup' as const),
189
+ body: arbitraryBraceGroupBody,
190
+ });
191
+
192
+ const arbitraryBashCommandUnit: fc.Arbitrary<BashCommandUnit> = fc.oneof(
193
+ { weight: 5, arbitrary: arbitraryBashSimpleCommand as fc.Arbitrary<BashCommandUnit> },
194
+ { weight: 1, arbitrary: arbitraryBashSubshell as fc.Arbitrary<BashCommandUnit> },
195
+ { weight: 1, arbitrary: arbitraryBashBraceGroup as fc.Arbitrary<BashCommandUnit> },
196
+ );
197
+
198
+ const arbitraryBashPipeline: fc.Arbitrary<BashPipeline> = fc.record({
199
+ type: fc.constant('pipeline' as const),
200
+ negated: fc.boolean(),
201
+ commands: fc.array(arbitraryBashCommandUnit, { minLength: 1, maxLength: 2 }),
202
+ });
203
+
204
+ const commandListArbitrary: fc.Arbitrary<BashCommandList> = fc.record({
205
+ type: fc.constant('list' as const),
206
+ entries: fc.array(
207
+ fc.record({
208
+ pipeline: arbitraryBashPipeline,
209
+ separator: fc.option(
210
+ fc.oneof(
211
+ fc.constant('&&' as const),
212
+ fc.constant('||' as const),
213
+ fc.constant(';' as const),
214
+ ),
215
+ { nil: undefined },
216
+ ),
217
+ }),
218
+ { minLength: 1, maxLength: 2 },
219
+ ),
220
+ }).map(list => {
221
+ const entries = list.entries.map((entry, i) => {
222
+ if (i < list.entries.length - 1 && entry.separator === undefined) {
223
+ return { pipeline: entry.pipeline, separator: ';' as const };
224
+ }
225
+
226
+ return entry;
227
+ });
228
+ return { ...list, entries };
229
+ });
230
+
231
+ return {
232
+ commandList: commandListArbitrary,
233
+ };
234
+ });
235
+
236
+ export const arbitraryBashCommandList: fc.Arbitrary<BashCommandList> =
237
+ recursiveArbitraries.commandList as fc.Arbitrary<BashCommandList>;
@@ -256,7 +256,7 @@ const arbitraryArrowFunctionExpression = fc.oneof(
256
256
  fc.record({
257
257
  type: fc.constant('ArrowFunctionExpression' as const),
258
258
  id: fc.constant(null),
259
- params: fc.array(arbitraryIdentifier, { minLength: 0, maxLength: 3 }),
259
+ params: fc.uniqueArray(arbitraryIdentifier, { minLength: 0, maxLength: 3, selector: (id: Identifier) => id.name }),
260
260
  body: arbitraryFunctionBodyBlockStatement,
261
261
  expression: fc.constant(false),
262
262
  generator: fc.constant(false),
@@ -265,7 +265,7 @@ const arbitraryArrowFunctionExpression = fc.oneof(
265
265
  fc.record({
266
266
  type: fc.constant('ArrowFunctionExpression' as const),
267
267
  id: fc.constant(null),
268
- params: fc.array(arbitraryIdentifier, { minLength: 0, maxLength: 3 }),
268
+ params: fc.uniqueArray(arbitraryIdentifier, { minLength: 0, maxLength: 3, selector: (id: Identifier) => id.name }),
269
269
  body: arbitraryLeafExpression,
270
270
  expression: fc.constant(true),
271
271
  generator: fc.constant(false),
@@ -276,7 +276,7 @@ const arbitraryArrowFunctionExpression = fc.oneof(
276
276
  const arbitraryFunctionExpression = fc.record({
277
277
  type: fc.constant('FunctionExpression' as const),
278
278
  id: fc.constant(null),
279
- params: fc.array(arbitraryIdentifier, { minLength: 0, maxLength: 3 }),
279
+ params: fc.uniqueArray(arbitraryIdentifier, { minLength: 0, maxLength: 3, selector: (id: Identifier) => id.name }),
280
280
  body: arbitraryFunctionBodyBlockStatement,
281
281
  expression: fc.constant(false),
282
282
  generator: fc.constant(false),
@@ -359,7 +359,7 @@ const arbitraryTryStatement: fc.Arbitrary<Statement> = fc.oneof(
359
359
  const arbitraryFunctionDeclaration: fc.Arbitrary<Statement> = fc.record({
360
360
  type: fc.constant('FunctionDeclaration' as const),
361
361
  id: arbitraryIdentifier,
362
- params: fc.array(arbitraryIdentifier, { minLength: 0, maxLength: 3 }),
362
+ params: fc.uniqueArray(arbitraryIdentifier, { minLength: 0, maxLength: 3, selector: (id: Identifier) => id.name }),
363
363
  body: arbitraryFunctionBodyBlockStatement,
364
364
  expression: fc.constant(false),
365
365
  generator: fc.constant(false),
@@ -2,6 +2,7 @@ import test from 'ava';
2
2
  import { runParser, runParserWithRemainingInput } from './parser.js';
3
3
  import { stringParserInputCompanion } from './parserInputCompanion.js';
4
4
  import { bashScriptParser, bashWordParser, bashSimpleCommandParser } from './bashParser.js';
5
+ import type { BashSimpleCommand, BashWordPartLiteral } from './bash.js';
5
6
 
6
7
  test('simple command parser - single word', async t => {
7
8
  const result = await runParser(
@@ -588,3 +589,140 @@ test('if treated as command name', async t => {
588
589
  t.deepEqual(cmd.name, { parts: [{ type: 'literal', value: 'if' }] });
589
590
  }
590
591
  });
592
+
593
+ test('find -exec with {} placeholder', async t => {
594
+ const result = await runParser(
595
+ bashScriptParser,
596
+ 'find . -name "*.tmp" -exec rm {} \\;',
597
+ stringParserInputCompanion,
598
+ );
599
+
600
+ const cmd = result.entries[0].pipeline.commands[0] as BashSimpleCommand;
601
+ t.is(cmd.name!.parts[0].type, 'literal');
602
+ t.is((cmd.name!.parts[0] as BashWordPartLiteral).value, 'find');
603
+ // {} should be parsed as a literal word argument
604
+ const braceArg = cmd.args[5]; // ., -name, "*.tmp", -exec, rm, {}, \;
605
+ t.is(braceArg.parts[0].type, 'literal');
606
+ t.is((braceArg.parts[0] as BashWordPartLiteral).value, '{}');
607
+ });
608
+
609
+ test('lone open brace as argument', async t => {
610
+ const result = await runParser(
611
+ bashScriptParser,
612
+ 'echo {',
613
+ stringParserInputCompanion,
614
+ );
615
+
616
+ const cmd = result.entries[0].pipeline.commands[0] as BashSimpleCommand;
617
+ t.deepEqual(cmd.args[0], {
618
+ parts: [{ type: 'literal', value: '{' }],
619
+ });
620
+ });
621
+
622
+ test('close brace mid-word', async t => {
623
+ const result = await runParser(
624
+ bashScriptParser,
625
+ 'echo foo}bar',
626
+ stringParserInputCompanion,
627
+ );
628
+
629
+ const cmd = result.entries[0].pipeline.commands[0] as BashSimpleCommand;
630
+ t.is(cmd.args.length, 1);
631
+ t.is(cmd.args[0].parts[0].type, 'literal');
632
+ });
633
+
634
+ test('open brace mid-word', async t => {
635
+ const result = await runParser(
636
+ bashScriptParser,
637
+ 'echo foo{bar',
638
+ stringParserInputCompanion,
639
+ );
640
+
641
+ const cmd = result.entries[0].pipeline.commands[0] as BashSimpleCommand;
642
+ t.is(cmd.args.length, 1);
643
+ t.is(cmd.args[0].parts[0].type, 'literal');
644
+ });
645
+
646
+ test('braces mid-word like brace expansion', async t => {
647
+ const result = await runParser(
648
+ bashScriptParser,
649
+ 'echo file.{c,h}',
650
+ stringParserInputCompanion,
651
+ );
652
+
653
+ const cmd = result.entries[0].pipeline.commands[0] as BashSimpleCommand;
654
+ t.is(cmd.args.length, 1);
655
+ t.is(cmd.args[0].parts[0].type, 'literal');
656
+ });
657
+
658
+ test('find -exec with {.} placeholder variant', async t => {
659
+ const result = await runParser(
660
+ bashScriptParser,
661
+ 'echo {.}',
662
+ stringParserInputCompanion,
663
+ );
664
+
665
+ const cmd = result.entries[0].pipeline.commands[0] as BashSimpleCommand;
666
+ t.is(cmd.args.length, 1);
667
+ t.is(cmd.args[0].parts[0].type, 'literal');
668
+ });
669
+
670
+ test('lone close brace as argument', async t => {
671
+ const result = await runParser(
672
+ bashScriptParser,
673
+ 'echo }',
674
+ stringParserInputCompanion,
675
+ );
676
+
677
+ const cmd = result.entries[0].pipeline.commands[0] as BashSimpleCommand;
678
+ t.deepEqual(cmd.args[0], {
679
+ parts: [{ type: 'literal', value: '}' }],
680
+ });
681
+ });
682
+
683
+ test('close brace at start of word', async t => {
684
+ const result = await runParser(
685
+ bashScriptParser,
686
+ 'echo }hello',
687
+ stringParserInputCompanion,
688
+ );
689
+
690
+ const cmd = result.entries[0].pipeline.commands[0] as BashSimpleCommand;
691
+ t.is(cmd.args.length, 1);
692
+ t.is(cmd.args[0].parts[0].type, 'literal');
693
+ });
694
+
695
+ test('multi-line script with blank lines', async t => {
696
+ const result = await runParser(
697
+ bashScriptParser,
698
+ 'echo hello\n\necho world',
699
+ stringParserInputCompanion,
700
+ );
701
+
702
+ t.is(result.entries.length, 2);
703
+ });
704
+
705
+ test('mid-script comment', async t => {
706
+ const result = await runParser(
707
+ bashScriptParser,
708
+ 'echo hello\n# comment\necho world',
709
+ stringParserInputCompanion,
710
+ );
711
+
712
+ t.is(result.entries.length, 2);
713
+ });
714
+
715
+ test('nested parentheses in arithmetic expansion', async t => {
716
+ const result = await runParser(
717
+ bashScriptParser,
718
+ 'echo $((1 + (2 * 3)))',
719
+ stringParserInputCompanion,
720
+ );
721
+
722
+ const cmd = result.entries[0].pipeline.commands[0] as BashSimpleCommand;
723
+ const arith = cmd.args[0].parts[0];
724
+ t.is(arith.type, 'arithmeticExpansion');
725
+ if (arith.type === 'arithmeticExpansion') {
726
+ t.is(arith.expression, '1 + (2 * 3)');
727
+ }
728
+ });