@futpib/parser 1.0.6 → 1.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,33 +1,88 @@
1
1
  import { setParserName } from './parser.js';
2
2
  import { createExactSequenceParser } from './exactSequenceParser.js';
3
+ import { createElementParser } from './elementParser.js';
4
+ import { createPredicateElementParser } from './predicateElementParser.js';
5
+ import { createNegativeLookaheadParser } from './negativeLookaheadParser.js';
6
+ import { createLookaheadParser } from './lookaheadParser.js';
3
7
  import { promiseCompose } from './promiseCompose.js';
4
8
  import { createTupleParser } from './tupleParser.js';
5
9
  import { createDisjunctionParser } from './disjunctionParser.js';
6
10
  import { createArrayParser } from './arrayParser.js';
7
11
  import { createParserAccessorParser } from './parserAccessorParser.js';
8
12
  import { createOptionalParser } from './optionalParser.js';
9
- import { createRegExpParser } from './regexpParser.js';
10
13
  import { createNonEmptyArrayParser } from './nonEmptyArrayParser.js';
11
14
  import { createSeparatedNonEmptyArrayParser } from './separatedNonEmptyArrayParser.js';
12
15
  import { createObjectParser } from './objectParser.js';
16
+ // Character predicates
17
+ function isDigit(ch) {
18
+ return ch >= '0' && ch <= '9';
19
+ }
20
+ function isLetter(ch) {
21
+ return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z');
22
+ }
23
+ function isIdentStart(ch) {
24
+ return isLetter(ch) || ch === '_';
25
+ }
26
+ function isIdentChar(ch) {
27
+ return isIdentStart(ch) || isDigit(ch);
28
+ }
13
29
  // Whitespace (spaces, tabs, and line continuations - not bare newlines which are significant)
14
- const bashInlineWhitespaceParser = promiseCompose(createRegExpParser(/(?:[ \t]|\\\n)+/), match => match[0]);
15
- const bashOptionalInlineWhitespaceParser = promiseCompose(createRegExpParser(/(?:[ \t]|\\\n)*/), match => match[0]);
16
- // Newline
17
- const bashNewlineParser = promiseCompose(createRegExpParser(/\n/), match => match[0]);
30
+ const bashInlineWhitespaceUnitParser = createDisjunctionParser([
31
+ promiseCompose(createExactSequenceParser(' '), () => ' '),
32
+ promiseCompose(createExactSequenceParser('\t'), () => '\t'),
33
+ promiseCompose(createExactSequenceParser('\\\n'), () => '\\\n'),
34
+ ]);
35
+ const bashInlineWhitespaceParser = promiseCompose(createNonEmptyArrayParser(bashInlineWhitespaceUnitParser), parts => parts.join(''));
36
+ const bashOptionalInlineWhitespaceParser = promiseCompose(createArrayParser(bashInlineWhitespaceUnitParser), parts => parts.join(''));
18
37
  // Word characters (unquoted, no special chars)
19
- // Note: {} are excluded so brace groups are parsed correctly
20
- // # is excluded from the first character (starts a comment) but allowed mid-word
21
- const bashUnquotedWordCharsParser = promiseCompose(createRegExpParser(/[^\s\n|&;<>(){}$`"'\\#][^\s\n|&;<>(){}$`"'\\]*/), match => match[0]);
38
+ // Note: {} and # are excluded from the first character so brace groups and comments are parsed correctly,
39
+ // but allowed as continuation characters for mid-word braces (e.g., file.{c,h}, foo}bar) and hash (foo#bar)
40
+ const bashSpecialCharParser = createDisjunctionParser([...' \t\n|&;<>()$`"\'\\'].map(ch => createExactSequenceParser(ch)));
41
+ const bashWordStartExcludeParser = createDisjunctionParser([
42
+ bashSpecialCharParser,
43
+ createExactSequenceParser('{'),
44
+ createExactSequenceParser('}'),
45
+ createExactSequenceParser('#'),
46
+ ]);
47
+ const bashUnquotedWordStartCharParser = promiseCompose(createTupleParser([
48
+ createNegativeLookaheadParser(bashWordStartExcludeParser),
49
+ createElementParser(),
50
+ ]), ([, ch]) => ch);
51
+ const bashUnquotedWordContinueCharParser = promiseCompose(createTupleParser([
52
+ createNegativeLookaheadParser(bashSpecialCharParser),
53
+ createElementParser(),
54
+ ]), ([, ch]) => ch);
55
+ const bashUnquotedWordCharsParser = promiseCompose(createTupleParser([
56
+ bashUnquotedWordStartCharParser,
57
+ createArrayParser(bashUnquotedWordContinueCharParser),
58
+ ]), ([first, rest]) => first + rest.join(''));
59
+ // Consume characters until a given terminator, returning the accumulated string
60
+ function createUntilCharParser(terminator) {
61
+ return promiseCompose(createArrayParser(promiseCompose(createTupleParser([
62
+ createNegativeLookaheadParser(createExactSequenceParser(terminator)),
63
+ createElementParser(),
64
+ ]), ([, ch]) => ch)), chars => chars.join(''));
65
+ }
22
66
  // Single quoted string: '...'
23
67
  const bashSingleQuotedParser = createObjectParser({
24
68
  type: 'singleQuoted',
25
69
  _open: createExactSequenceParser("'"),
26
- value: promiseCompose(createRegExpParser(/[^']*/), match => match[0]),
70
+ value: createUntilCharParser("'"),
27
71
  _close: createExactSequenceParser("'"),
28
72
  });
29
- // Variable name
30
- const bashVariableNameParser = promiseCompose(createRegExpParser(/[a-zA-Z_][a-zA-Z0-9_]*|[0-9]+|[@*#?$!-]/), match => match[0]);
73
+ // Variable name: identifiers, positional params ($0, $1...), or special params ($@, $*, $#, $?, $$, $!, $-)
74
+ const bashSpecialParams = new Set(['@', '*', '#', '?', '$', '!', '-']);
75
+ const bashIdentifierParser = promiseCompose(createTupleParser([
76
+ createPredicateElementParser(isIdentStart),
77
+ createArrayParser(createPredicateElementParser(isIdentChar)),
78
+ ]), ([first, rest]) => first + rest.join(''));
79
+ const bashDigitsParser = promiseCompose(createNonEmptyArrayParser(createPredicateElementParser(isDigit)), chars => chars.join(''));
80
+ const bashSpecialParamParser = createPredicateElementParser(ch => bashSpecialParams.has(ch));
81
+ const bashVariableNameParser = createDisjunctionParser([
82
+ bashIdentifierParser,
83
+ bashDigitsParser,
84
+ bashSpecialParamParser,
85
+ ]);
31
86
  // Simple variable: $var
32
87
  const bashSimpleVariableParser = createObjectParser({
33
88
  type: 'variable',
@@ -50,40 +105,148 @@ const bashBacktickSubstitutionParser = createObjectParser({
50
105
  command: createParserAccessorParser(() => bashCommandParser),
51
106
  _close: createExactSequenceParser('`'),
52
107
  });
108
+ // Word characters for use inside ${...} operands (} excluded from continuation to not consume the closing brace)
109
+ const bashBracedVarContinueExcludeParser = createDisjunctionParser([
110
+ bashSpecialCharParser,
111
+ createExactSequenceParser('{'),
112
+ createExactSequenceParser('}'),
113
+ ]);
114
+ const bashBracedVarUnquotedWordCharsParser = promiseCompose(createTupleParser([
115
+ bashUnquotedWordStartCharParser,
116
+ createArrayParser(promiseCompose(createTupleParser([
117
+ createNegativeLookaheadParser(bashBracedVarContinueExcludeParser),
118
+ createElementParser(),
119
+ ]), ([, ch]) => ch)),
120
+ ]), ([first, rest]) => first + rest.join(''));
121
+ const bashBracedVarLiteralWordPartParser = createObjectParser({
122
+ type: 'literal',
123
+ value: bashBracedVarUnquotedWordCharsParser,
124
+ });
53
125
  // Braced variable expansion: ${VAR} or ${VAR:-default}
54
126
  const bashBracedVariableParser = createObjectParser({
55
127
  type: 'variableBraced',
56
128
  _open: createExactSequenceParser('${'),
57
129
  name: bashVariableNameParser,
58
- operator: createOptionalParser(promiseCompose(createRegExpParser(/:-|:=|:\+|:\?|-|=|\+|\?|##|#|%%|%/), match => match[0])),
59
- operand: createOptionalParser(createParserAccessorParser(() => bashWordParser)),
130
+ operator: createOptionalParser(createDisjunctionParser([
131
+ promiseCompose(createExactSequenceParser(':-'), () => ':-'),
132
+ promiseCompose(createExactSequenceParser(':='), () => ':='),
133
+ promiseCompose(createExactSequenceParser(':+'), () => ':+'),
134
+ promiseCompose(createExactSequenceParser(':?'), () => ':?'),
135
+ promiseCompose(createExactSequenceParser('##'), () => '##'),
136
+ promiseCompose(createExactSequenceParser('%%'), () => '%%'),
137
+ promiseCompose(createExactSequenceParser('-'), () => '-'),
138
+ promiseCompose(createExactSequenceParser('='), () => '='),
139
+ promiseCompose(createExactSequenceParser('+'), () => '+'),
140
+ promiseCompose(createExactSequenceParser('?'), () => '?'),
141
+ promiseCompose(createExactSequenceParser('#'), () => '#'),
142
+ promiseCompose(createExactSequenceParser('%'), () => '%'),
143
+ ])),
144
+ operand: createOptionalParser(createParserAccessorParser(() => bashBracedVarWordParser)),
60
145
  _close: createExactSequenceParser('}'),
61
146
  });
62
- // Arithmetic expansion: $((expression))
147
+ // Arithmetic expansion: $((expression)) - handles nested parentheses
148
+ const bashArithmeticExpressionParser = async (parserContext) => {
149
+ let result = '';
150
+ let depth = 0;
151
+ for (;;) {
152
+ const ch = await parserContext.peek(0);
153
+ if (ch === undefined) {
154
+ break;
155
+ }
156
+ if (ch === '(') {
157
+ depth++;
158
+ result += ch;
159
+ parserContext.skip(1);
160
+ continue;
161
+ }
162
+ if (ch === ')') {
163
+ if (depth > 0) {
164
+ depth--;
165
+ result += ch;
166
+ parserContext.skip(1);
167
+ continue;
168
+ }
169
+ // At depth 0, a ')' means we've hit the closing '))' of $((
170
+ break;
171
+ }
172
+ result += ch;
173
+ parserContext.skip(1);
174
+ }
175
+ return result;
176
+ };
63
177
  const bashArithmeticExpansionParser = createObjectParser({
64
178
  type: 'arithmeticExpansion',
65
179
  _open: createExactSequenceParser('$(('),
66
- expression: promiseCompose(createRegExpParser(/(?:[^)]|\)(?!\)))*/), match => match[0]),
180
+ expression: bashArithmeticExpressionParser,
67
181
  _close: createExactSequenceParser('))'),
68
182
  });
69
- // ANSI-C quoting: $'...'
183
+ // ANSI-C quoting: $'...' - content can include \' escapes
184
+ // Each unit is either a backslash-escape pair or a non-quote character
185
+ const bashAnsiCContentUnitParser = createDisjunctionParser([
186
+ // Backslash escape: \x (any char after backslash)
187
+ promiseCompose(createTupleParser([
188
+ createExactSequenceParser('\\'),
189
+ createElementParser(),
190
+ ]), ([bs, ch]) => bs + ch),
191
+ // Any character that isn't ' (and isn't \ which is handled above)
192
+ promiseCompose(createTupleParser([
193
+ createNegativeLookaheadParser(createExactSequenceParser("'")),
194
+ createElementParser(),
195
+ ]), ([, ch]) => ch),
196
+ ]);
197
+ const bashAnsiCContentParser = promiseCompose(createArrayParser(bashAnsiCContentUnitParser), parts => parts.join(''));
70
198
  const bashAnsiCQuotedParser = createObjectParser({
71
199
  type: 'singleQuoted',
72
200
  _prefix: createExactSequenceParser('$'),
73
201
  _open: createExactSequenceParser("'"),
74
- value: promiseCompose(createRegExpParser(/(?:[^'\\]|\\.)*/), match => match[0]),
202
+ value: bashAnsiCContentParser,
75
203
  _close: createExactSequenceParser("'"),
76
204
  });
77
205
  // Process substitution: <(cmd) or >(cmd)
206
+ const bashProcessSubstitutionDirectionParser = promiseCompose(createTupleParser([
207
+ createDisjunctionParser([
208
+ createExactSequenceParser('<'),
209
+ createExactSequenceParser('>'),
210
+ ]),
211
+ createLookaheadParser(createExactSequenceParser('(')),
212
+ ]), ([dir]) => dir);
78
213
  const bashProcessSubstitutionParser = createObjectParser({
79
214
  type: 'processSubstitution',
80
- direction: promiseCompose(createRegExpParser(/[<>](?=\()/), match => match[0]),
215
+ direction: bashProcessSubstitutionDirectionParser,
81
216
  _open: createExactSequenceParser('('),
82
217
  _ws1: bashOptionalInlineWhitespaceParser,
83
218
  command: createParserAccessorParser(() => bashCommandParser),
84
219
  _ws2: bashOptionalInlineWhitespaceParser,
85
220
  _close: createExactSequenceParser(')'),
86
221
  });
222
+ // Escape sequences in double quotes: \\ \$ \` \" \! \newline
223
+ const bashDoubleQuotedEscapeCharParser = createDisjunctionParser([
224
+ createExactSequenceParser('\\'),
225
+ createExactSequenceParser('$'),
226
+ createExactSequenceParser('`'),
227
+ createExactSequenceParser('"'),
228
+ createExactSequenceParser('!'),
229
+ createExactSequenceParser('\n'),
230
+ ]);
231
+ const bashDoubleQuotedEscapeParser = promiseCompose(createTupleParser([
232
+ createExactSequenceParser('\\'),
233
+ bashDoubleQuotedEscapeCharParser,
234
+ ]), ([, ch]) => ({ type: 'literal', value: ch }));
235
+ // Literal text inside double quotes (no special chars)
236
+ const bashDoubleQuotedLiteralCharParser = promiseCompose(createTupleParser([
237
+ createNegativeLookaheadParser(createDisjunctionParser([
238
+ createExactSequenceParser('$'),
239
+ createExactSequenceParser('`'),
240
+ createExactSequenceParser('"'),
241
+ createExactSequenceParser('\\'),
242
+ ])),
243
+ createElementParser(),
244
+ ]), ([, ch]) => ch);
245
+ const bashDoubleQuotedLiteralParser = promiseCompose(createNonEmptyArrayParser(bashDoubleQuotedLiteralCharParser), chars => ({ type: 'literal', value: chars.join('') }));
246
+ // Bare $ not followed by a valid expansion start
247
+ const bashBareDollarParser = promiseCompose(createExactSequenceParser('$'), () => ({ type: 'literal', value: '$' }));
248
+ // Bare \ not followed by a recognized escape character
249
+ const bashBareBackslashParser = promiseCompose(createExactSequenceParser('\\'), () => ({ type: 'literal', value: '\\' }));
87
250
  // Double quoted string parts (inside "...")
88
251
  const bashDoubleQuotedPartParser = createDisjunctionParser([
89
252
  bashBracedVariableParser,
@@ -91,26 +254,10 @@ const bashDoubleQuotedPartParser = createDisjunctionParser([
91
254
  bashSimpleVariableParser,
92
255
  bashCommandSubstitutionParser,
93
256
  bashBacktickSubstitutionParser,
94
- // Escape sequences in double quotes
95
- promiseCompose(createRegExpParser(/\\[\\$`"!\n]/), match => ({
96
- type: 'literal',
97
- value: match[0].slice(1),
98
- })),
99
- // Literal text (no special chars)
100
- promiseCompose(createRegExpParser(/[^$`"\\]+/), match => ({
101
- type: 'literal',
102
- value: match[0],
103
- })),
104
- // Bare $ not followed by a valid expansion start (e.g. $" at end of double-quoted string)
105
- promiseCompose(createRegExpParser(/\$/), () => ({
106
- type: 'literal',
107
- value: '$',
108
- })),
109
- // Bare \ not followed by a recognized escape character (treated as literal backslash in bash)
110
- promiseCompose(createRegExpParser(/\\/), () => ({
111
- type: 'literal',
112
- value: '\\',
113
- })),
257
+ bashDoubleQuotedEscapeParser,
258
+ bashDoubleQuotedLiteralParser,
259
+ bashBareDollarParser,
260
+ bashBareBackslashParser,
114
261
  ]);
115
262
  // Double quoted string: "..."
116
263
  const bashDoubleQuotedParser = createObjectParser({
@@ -124,12 +271,45 @@ const bashLiteralWordPartParser = createObjectParser({
124
271
  type: 'literal',
125
272
  value: bashUnquotedWordCharsParser,
126
273
  });
127
- // Escape sequence outside quotes
128
- const bashEscapeParser = promiseCompose(createRegExpParser(/\\./), match => ({
274
+ // Bare {} treated as a literal word (e.g., find -exec cmd {} \;)
275
+ const bashBraceWordPartParser = promiseCompose(createExactSequenceParser('{}'), () => ({
276
+ type: 'literal',
277
+ value: '{}',
278
+ }));
279
+ // Bare { treated as a literal word part (e.g., echo {, echo {.})
280
+ // Note: } is NOT included here because it would break brace group closing
281
+ const bashOpenBraceWordPartParser = promiseCompose(createExactSequenceParser('{'), () => ({
129
282
  type: 'literal',
130
- value: match[0].slice(1),
283
+ value: '{',
131
284
  }));
132
- // Word part (any part of a word)
285
+ // Bare } treated as a literal word part (e.g., echo }, echo }hello)
286
+ const bashCloseBraceWordPartParser = promiseCompose(createExactSequenceParser('}'), () => ({
287
+ type: 'literal',
288
+ value: '}',
289
+ }));
290
+ // Escape sequence outside quotes: backslash followed by any character
291
+ const bashEscapeParser = promiseCompose(createTupleParser([
292
+ createExactSequenceParser('\\'),
293
+ createElementParser(),
294
+ ]), ([, ch]) => ({ type: 'literal', value: ch }));
295
+ // Word part for use inside ${...} operands (uses literal parser that excludes } from continuation)
296
+ const bashBracedVarWordPartParser = createDisjunctionParser([
297
+ bashAnsiCQuotedParser,
298
+ bashSingleQuotedParser,
299
+ bashDoubleQuotedParser,
300
+ bashBracedVariableParser,
301
+ bashArithmeticExpansionParser,
302
+ bashCommandSubstitutionParser,
303
+ bashBacktickSubstitutionParser,
304
+ bashSimpleVariableParser,
305
+ bashEscapeParser,
306
+ bashBracedVarLiteralWordPartParser,
307
+ bashBareDollarParser,
308
+ ]);
309
+ const bashBracedVarWordParser = createObjectParser({
310
+ parts: createNonEmptyArrayParser(bashBracedVarWordPartParser),
311
+ });
312
+ // Word part (any part of a word, } excluded from first position so brace groups work)
133
313
  const bashWordPartParser = createDisjunctionParser([
134
314
  bashAnsiCQuotedParser,
135
315
  bashSingleQuotedParser,
@@ -141,21 +321,46 @@ const bashWordPartParser = createDisjunctionParser([
141
321
  bashSimpleVariableParser,
142
322
  bashProcessSubstitutionParser,
143
323
  bashEscapeParser,
324
+ bashBraceWordPartParser,
325
+ bashOpenBraceWordPartParser,
144
326
  bashLiteralWordPartParser,
145
- // Bare $ not followed by a valid expansion start
146
- promiseCompose(createRegExpParser(/\$/), () => ({
147
- type: 'literal',
148
- value: '$',
149
- })),
327
+ bashBareDollarParser,
328
+ ]);
329
+ // Word part including } as a starter (for argument positions where } is not reserved)
330
+ const bashArgWordPartParser = createDisjunctionParser([
331
+ bashAnsiCQuotedParser,
332
+ bashSingleQuotedParser,
333
+ bashDoubleQuotedParser,
334
+ bashBracedVariableParser,
335
+ bashArithmeticExpansionParser,
336
+ bashCommandSubstitutionParser,
337
+ bashBacktickSubstitutionParser,
338
+ bashSimpleVariableParser,
339
+ bashProcessSubstitutionParser,
340
+ bashEscapeParser,
341
+ bashBraceWordPartParser,
342
+ bashOpenBraceWordPartParser,
343
+ bashCloseBraceWordPartParser,
344
+ bashLiteralWordPartParser,
345
+ bashBareDollarParser,
150
346
  ]);
151
347
  // Word (sequence of word parts)
152
348
  export const bashWordParser = createObjectParser({
153
349
  parts: createNonEmptyArrayParser(bashWordPartParser),
154
350
  });
351
+ // Argument word (allows } as first character)
352
+ const bashArgWordParser = createObjectParser({
353
+ parts: createNonEmptyArrayParser(bashArgWordPartParser),
354
+ });
155
355
  setParserName(bashWordParser, 'bashWordParser');
356
+ // Assignment name: identifier followed by =
357
+ const bashAssignmentNameParser = promiseCompose(createTupleParser([
358
+ bashIdentifierParser,
359
+ createExactSequenceParser('='),
360
+ ]), ([name]) => name);
156
361
  // Assignment: NAME=value or NAME=
157
362
  const bashAssignmentParser = createObjectParser({
158
- name: promiseCompose(createRegExpParser(/[a-zA-Z_][a-zA-Z0-9_]*=/), match => match[0].slice(0, -1)),
363
+ name: bashAssignmentNameParser,
159
364
  value: createOptionalParser(bashWordParser),
160
365
  });
161
366
  // Redirect operators
@@ -169,9 +374,11 @@ const bashRedirectOperatorParser = createDisjunctionParser([
169
374
  promiseCompose(createExactSequenceParser('<&'), () => '<&'),
170
375
  promiseCompose(createExactSequenceParser('<'), () => '<'),
171
376
  ]);
377
+ // File descriptor number
378
+ const bashFdParser = promiseCompose(bashDigitsParser, digits => Number.parseInt(digits, 10));
172
379
  // Redirect: [n]op word
173
380
  const bashRedirectParser = createObjectParser({
174
- fd: createOptionalParser(promiseCompose(createRegExpParser(/[0-9]+/), match => Number.parseInt(match[0], 10))),
381
+ fd: createOptionalParser(bashFdParser),
175
382
  operator: bashRedirectOperatorParser,
176
383
  _ws: bashOptionalInlineWhitespaceParser,
177
384
  target: bashWordParser,
@@ -181,37 +388,48 @@ const bashWordWithWhitespaceParser = promiseCompose(createTupleParser([
181
388
  bashWordParser,
182
389
  bashOptionalInlineWhitespaceParser,
183
390
  ]), ([word]) => word);
391
+ // Arg word (allows }) with optional trailing whitespace
392
+ const bashArgWordWithWhitespaceParser = promiseCompose(createTupleParser([
393
+ bashArgWordParser,
394
+ bashOptionalInlineWhitespaceParser,
395
+ ]), ([word]) => word);
184
396
  // Redirect with optional trailing whitespace
185
397
  const bashRedirectWithWhitespaceParser = promiseCompose(createTupleParser([
186
398
  bashRedirectParser,
187
399
  bashOptionalInlineWhitespaceParser,
188
400
  ]), ([redirect]) => redirect);
189
- // Word or redirect - for interleaved parsing in simple commands
190
- const bashWordOrRedirectParser = createDisjunctionParser([
401
+ // Word or redirect for argument position (} allowed)
402
+ const bashArgWordOrRedirectParser = createDisjunctionParser([
191
403
  createObjectParser({ type: 'redirect', redirect: bashRedirectWithWhitespaceParser }),
192
- createObjectParser({ type: 'word', word: bashWordWithWhitespaceParser }),
404
+ createObjectParser({ type: 'word', word: bashArgWordWithWhitespaceParser }),
193
405
  ]);
194
406
  // Simple command: [assignments] [name] [args] [redirects]
195
- export const bashSimpleCommandParser = promiseCompose(createTupleParser([
196
- // Assignments at the start
197
- createArrayParser(promiseCompose(createTupleParser([
407
+ export const bashSimpleCommandParser = async (parserContext) => {
408
+ // Parse assignments at the start
409
+ const assignmentsParser = createArrayParser(promiseCompose(createTupleParser([
198
410
  bashAssignmentParser,
199
411
  bashOptionalInlineWhitespaceParser,
200
- ]), ([assignment]) => assignment)),
201
- // Command name, args, and redirects (interleaved)
202
- createArrayParser(bashWordOrRedirectParser),
203
- ]), ([assignments, items]) => {
204
- const words = [];
205
- const redirects = [];
206
- for (const item of items) {
207
- if (item.type === 'word') {
208
- words.push(item.word);
209
- }
210
- else {
211
- redirects.push(item.redirect);
412
+ ]), ([assignment]) => assignment));
413
+ const assignments = await assignmentsParser(parserContext);
414
+ // Parse leading redirects before command name
415
+ const leadingRedirectsParser = createArrayParser(bashRedirectWithWhitespaceParser);
416
+ const leadingRedirects = await leadingRedirectsParser(parserContext);
417
+ // Parse command name (} not allowed here, so brace group closing works)
418
+ const name = await createOptionalParser(bashWordWithWhitespaceParser)(parserContext);
419
+ // Only parse args if we have a command name
420
+ const args = [];
421
+ const redirects = [...leadingRedirects];
422
+ if (name !== undefined) {
423
+ const argItems = await createArrayParser(bashArgWordOrRedirectParser)(parserContext);
424
+ for (const item of argItems) {
425
+ if (item.type === 'word') {
426
+ args.push(item.word);
427
+ }
428
+ else {
429
+ redirects.push(item.redirect);
430
+ }
212
431
  }
213
432
  }
214
- const [name, ...args] = words;
215
433
  return {
216
434
  type: 'simple',
217
435
  name,
@@ -219,7 +437,7 @@ export const bashSimpleCommandParser = promiseCompose(createTupleParser([
219
437
  redirects,
220
438
  assignments,
221
439
  };
222
- });
440
+ };
223
441
  setParserName(bashSimpleCommandParser, 'bashSimpleCommandParser');
224
442
  // Subshell: ( command )
225
443
  const bashSubshellParser = createObjectParser({
@@ -251,7 +469,10 @@ const bashCommandUnitParser = createDisjunctionParser([
251
469
  ]);
252
470
  setParserName(bashCommandUnitParser, 'bashCommandUnitParser');
253
471
  // Single pipe (not ||) - matches | only when not followed by another |
254
- const bashSinglePipeParser = promiseCompose(createRegExpParser(/\|(?!\|)/), match => match[0]);
472
+ const bashSinglePipeParser = promiseCompose(createTupleParser([
473
+ createExactSequenceParser('|'),
474
+ createNegativeLookaheadParser(createExactSequenceParser('|')),
475
+ ]), () => '|');
255
476
  // Pipeline: [!] cmd [| cmd]...
256
477
  const bashPipelineParser = promiseCompose(createTupleParser([
257
478
  createOptionalParser(promiseCompose(createTupleParser([
@@ -269,13 +490,35 @@ const bashPipelineParser = promiseCompose(createTupleParser([
269
490
  commands,
270
491
  }));
271
492
  setParserName(bashPipelineParser, 'bashPipelineParser');
493
+ // Non-newline character
494
+ const bashNonNewlineCharParser = promiseCompose(createTupleParser([
495
+ createNegativeLookaheadParser(createExactSequenceParser('\n')),
496
+ createElementParser(),
497
+ ]), ([, ch]) => ch);
498
+ // Comment: # through end of line (not consuming the newline)
499
+ const bashCommentParser = promiseCompose(createTupleParser([
500
+ createExactSequenceParser('#'),
501
+ createArrayParser(bashNonNewlineCharParser),
502
+ ]), ([hash, chars]) => hash + chars.join(''));
503
+ // Blank line filler: whitespace, newlines, and comments
504
+ const bashBlankLineFillerParser = promiseCompose(createArrayParser(createDisjunctionParser([
505
+ bashInlineWhitespaceUnitParser,
506
+ promiseCompose(createExactSequenceParser('\n'), () => '\n'),
507
+ bashCommentParser,
508
+ ])), () => { });
509
+ // Newline separator: consumes a newline plus any following blank lines, comments, and whitespace
510
+ // This allows multi-line scripts with blank lines and mid-script comments
511
+ const bashNewlineSeparatorParser = promiseCompose(createTupleParser([
512
+ createExactSequenceParser('\n'),
513
+ bashBlankLineFillerParser,
514
+ ]), () => '\n');
272
515
  // Command list separator
273
516
  const bashListSeparatorParser = createDisjunctionParser([
274
517
  promiseCompose(createExactSequenceParser('&&'), () => '&&'),
275
518
  promiseCompose(createExactSequenceParser('||'), () => '||'),
276
519
  promiseCompose(createExactSequenceParser(';'), () => ';'),
277
520
  promiseCompose(createExactSequenceParser('&'), () => '&'),
278
- promiseCompose(bashNewlineParser, () => '\n'),
521
+ bashNewlineSeparatorParser,
279
522
  ]);
280
523
  // Command list: pipeline [sep pipeline]...
281
524
  const bashCommandListParser = promiseCompose(createTupleParser([
@@ -323,13 +566,12 @@ setParserName(bashCommandListParser, 'bashCommandListParser');
323
566
  // Top-level command parser
324
567
  export const bashCommandParser = bashCommandListParser;
325
568
  setParserName(bashCommandParser, 'bashCommandParser');
326
- // Comment: # through end of line
327
- const bashOptionalCommentParser = createOptionalParser(promiseCompose(createRegExpParser(/#[^\n]*/), match => match[0]));
569
+ // Trailing whitespace/comments/blank lines at end of script
570
+ const bashTrailingWhitespaceAndCommentsParser = promiseCompose(bashBlankLineFillerParser, () => undefined);
328
571
  // Script parser (handles leading/trailing whitespace and comments)
329
572
  export const bashScriptParser = promiseCompose(createTupleParser([
330
573
  bashOptionalInlineWhitespaceParser,
331
574
  bashCommandParser,
332
- bashOptionalInlineWhitespaceParser,
333
- bashOptionalCommentParser,
575
+ bashTrailingWhitespaceAndCommentsParser,
334
576
  ]), ([, command]) => command);
335
577
  setParserName(bashScriptParser, 'bashScriptParser');
@@ -341,3 +341,74 @@ test('if treated as command name', async (t) => {
341
341
  t.deepEqual(cmd.name, { parts: [{ type: 'literal', value: 'if' }] });
342
342
  }
343
343
  });
344
+ test('find -exec with {} placeholder', async (t) => {
345
+ const result = await runParser(bashScriptParser, 'find . -name "*.tmp" -exec rm {} \\;', stringParserInputCompanion);
346
+ const cmd = result.entries[0].pipeline.commands[0];
347
+ t.is(cmd.name.parts[0].type, 'literal');
348
+ t.is(cmd.name.parts[0].value, 'find');
349
+ // {} should be parsed as a literal word argument
350
+ const braceArg = cmd.args[5]; // ., -name, "*.tmp", -exec, rm, {}, \;
351
+ t.is(braceArg.parts[0].type, 'literal');
352
+ t.is(braceArg.parts[0].value, '{}');
353
+ });
354
+ test('lone open brace as argument', async (t) => {
355
+ const result = await runParser(bashScriptParser, 'echo {', stringParserInputCompanion);
356
+ const cmd = result.entries[0].pipeline.commands[0];
357
+ t.deepEqual(cmd.args[0], {
358
+ parts: [{ type: 'literal', value: '{' }],
359
+ });
360
+ });
361
+ test('close brace mid-word', async (t) => {
362
+ const result = await runParser(bashScriptParser, 'echo foo}bar', stringParserInputCompanion);
363
+ const cmd = result.entries[0].pipeline.commands[0];
364
+ t.is(cmd.args.length, 1);
365
+ t.is(cmd.args[0].parts[0].type, 'literal');
366
+ });
367
+ test('open brace mid-word', async (t) => {
368
+ const result = await runParser(bashScriptParser, 'echo foo{bar', stringParserInputCompanion);
369
+ const cmd = result.entries[0].pipeline.commands[0];
370
+ t.is(cmd.args.length, 1);
371
+ t.is(cmd.args[0].parts[0].type, 'literal');
372
+ });
373
+ test('braces mid-word like brace expansion', async (t) => {
374
+ const result = await runParser(bashScriptParser, 'echo file.{c,h}', stringParserInputCompanion);
375
+ const cmd = result.entries[0].pipeline.commands[0];
376
+ t.is(cmd.args.length, 1);
377
+ t.is(cmd.args[0].parts[0].type, 'literal');
378
+ });
379
+ test('find -exec with {.} placeholder variant', async (t) => {
380
+ const result = await runParser(bashScriptParser, 'echo {.}', stringParserInputCompanion);
381
+ const cmd = result.entries[0].pipeline.commands[0];
382
+ t.is(cmd.args.length, 1);
383
+ t.is(cmd.args[0].parts[0].type, 'literal');
384
+ });
385
+ test('lone close brace as argument', async (t) => {
386
+ const result = await runParser(bashScriptParser, 'echo }', stringParserInputCompanion);
387
+ const cmd = result.entries[0].pipeline.commands[0];
388
+ t.deepEqual(cmd.args[0], {
389
+ parts: [{ type: 'literal', value: '}' }],
390
+ });
391
+ });
392
+ test('close brace at start of word', async (t) => {
393
+ const result = await runParser(bashScriptParser, 'echo }hello', stringParserInputCompanion);
394
+ const cmd = result.entries[0].pipeline.commands[0];
395
+ t.is(cmd.args.length, 1);
396
+ t.is(cmd.args[0].parts[0].type, 'literal');
397
+ });
398
+ test('multi-line script with blank lines', async (t) => {
399
+ const result = await runParser(bashScriptParser, 'echo hello\n\necho world', stringParserInputCompanion);
400
+ t.is(result.entries.length, 2);
401
+ });
402
+ test('mid-script comment', async (t) => {
403
+ const result = await runParser(bashScriptParser, 'echo hello\n# comment\necho world', stringParserInputCompanion);
404
+ t.is(result.entries.length, 2);
405
+ });
406
+ test('nested parentheses in arithmetic expansion', async (t) => {
407
+ const result = await runParser(bashScriptParser, 'echo $((1 + (2 * 3)))', stringParserInputCompanion);
408
+ const cmd = result.entries[0].pipeline.commands[0];
409
+ const arith = cmd.args[0].parts[0];
410
+ t.is(arith.type, 'arithmeticExpansion');
411
+ if (arith.type === 'arithmeticExpansion') {
412
+ t.is(arith.expression, '1 + (2 * 3)');
413
+ }
414
+ });
package/build/index.d.ts CHANGED
@@ -16,6 +16,7 @@ export { createUnionParser, } from './unionParser.js';
16
16
  export { createDisjunctionParser, } from './disjunctionParser.js';
17
17
  export { createParserAccessorParser, } from './parserAccessorParser.js';
18
18
  export { createElementParser, } from './elementParser.js';
19
+ export { createPredicateElementParser, } from './predicateElementParser.js';
19
20
  export { createTerminatedArrayParser, } from './terminatedArrayParser.js';
20
21
  export { createSliceBoundedParser, } from './sliceBoundedParser.js';
21
22
  export { createExactElementParser, } from './exactElementParser.js';
package/build/index.js CHANGED
@@ -14,6 +14,7 @@ export { createUnionParser, } from './unionParser.js';
14
14
  export { createDisjunctionParser, } from './disjunctionParser.js';
15
15
  export { createParserAccessorParser, } from './parserAccessorParser.js';
16
16
  export { createElementParser, } from './elementParser.js';
17
+ export { createPredicateElementParser, } from './predicateElementParser.js';
17
18
  export { createTerminatedArrayParser, } from './terminatedArrayParser.js';
18
19
  export { createSliceBoundedParser, } from './sliceBoundedParser.js';
19
20
  export { createExactElementParser, } from './exactElementParser.js';
@@ -0,0 +1,3 @@
1
+ import { type Parser } from './parser.js';
2
+ import { type DeriveSequenceElement } from './sequence.js';
3
+ export declare const createPredicateElementParser: <Sequence, Element = DeriveSequenceElement<Sequence>>(predicate: (element: Element) => boolean) => Parser<Element, Sequence, Element>;
@@ -0,0 +1,10 @@
1
+ import { setParserName } from './parser.js';
2
+ export const createPredicateElementParser = (predicate) => {
3
+ const predicateElementParser = async (parserContext) => {
4
+ const element = await parserContext.read(0);
5
+ parserContext.invariant(predicate(element), 'Element does not match predicate: %s', element);
6
+ return element;
7
+ };
8
+ setParserName(predicateElementParser, `createPredicateElementParser(${predicate.name || 'anonymous'})`);
9
+ return predicateElementParser;
10
+ };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@futpib/parser",
3
- "version": "1.0.6",
3
+ "version": "1.0.7",
4
4
  "main": "build/index.js",
5
5
  "types": "build/index.d.ts",
6
6
  "license": "GPL-3.0-only",