@futpib/parser 1.0.6 → 1.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/bashParser.ts CHANGED
@@ -1,13 +1,15 @@
1
1
  import { type Parser, setParserName } from './parser.js';
2
- import { createUnionParser } from './unionParser.js';
3
2
  import { createExactSequenceParser } from './exactSequenceParser.js';
3
+ import { createElementParser } from './elementParser.js';
4
+ import { createPredicateElementParser } from './predicateElementParser.js';
5
+ import { createNegativeLookaheadParser } from './negativeLookaheadParser.js';
6
+ import { createLookaheadParser } from './lookaheadParser.js';
4
7
  import { promiseCompose } from './promiseCompose.js';
5
8
  import { createTupleParser } from './tupleParser.js';
6
9
  import { createDisjunctionParser } from './disjunctionParser.js';
7
10
  import { createArrayParser } from './arrayParser.js';
8
11
  import { createParserAccessorParser } from './parserAccessorParser.js';
9
12
  import { createOptionalParser } from './optionalParser.js';
10
- import { createRegExpParser } from './regexpParser.js';
11
13
  import { createNonEmptyArrayParser } from './nonEmptyArrayParser.js';
12
14
  import { createSeparatedNonEmptyArrayParser } from './separatedNonEmptyArrayParser.js';
13
15
  import { createObjectParser } from './objectParser.js';
@@ -34,48 +36,126 @@ import {
34
36
  type BashCommand,
35
37
  } from './bash.js';
36
38
 
39
+ // Character predicates
40
+ function isDigit(ch: string): boolean {
41
+ return ch >= '0' && ch <= '9';
42
+ }
43
+
44
+ function isLetter(ch: string): boolean {
45
+ return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z');
46
+ }
47
+
48
+ function isIdentStart(ch: string): boolean {
49
+ return isLetter(ch) || ch === '_';
50
+ }
51
+
52
+ function isIdentChar(ch: string): boolean {
53
+ return isIdentStart(ch) || isDigit(ch);
54
+ }
55
+
37
56
  // Whitespace (spaces, tabs, and line continuations - not bare newlines which are significant)
57
+ const bashInlineWhitespaceUnitParser: Parser<string, string> = createDisjunctionParser([
58
+ promiseCompose(createExactSequenceParser(' '), () => ' '),
59
+ promiseCompose(createExactSequenceParser('\t'), () => '\t'),
60
+ promiseCompose(createExactSequenceParser('\\\n'), () => '\\\n'),
61
+ ]);
62
+
38
63
  const bashInlineWhitespaceParser: Parser<string, string> = promiseCompose(
39
- createRegExpParser(/(?:[ \t]|\\\n)+/),
40
- match => match[0],
64
+ createNonEmptyArrayParser(bashInlineWhitespaceUnitParser),
65
+ parts => parts.join(''),
41
66
  );
42
67
 
43
68
  const bashOptionalInlineWhitespaceParser: Parser<string, string> = promiseCompose(
44
- createRegExpParser(/(?:[ \t]|\\\n)*/),
45
- match => match[0],
69
+ createArrayParser(bashInlineWhitespaceUnitParser),
70
+ parts => parts.join(''),
46
71
  );
47
72
 
48
- // Newline
49
- const bashNewlineParser: Parser<string, string> = promiseCompose(
50
- createRegExpParser(/\n/),
51
- match => match[0],
73
+ // Word characters (unquoted, no special chars)
74
+ // Note: {} and # are excluded from the first character so brace groups and comments are parsed correctly,
75
+ // but allowed as continuation characters for mid-word braces (e.g., file.{c,h}, foo}bar) and hash (foo#bar)
76
+ const bashSpecialCharParser: Parser<unknown, string> = createDisjunctionParser(
77
+ [...' \t\n|&;<>()$`"\'\\'].map(ch => createExactSequenceParser(ch)),
78
+ );
79
+
80
+ const bashWordStartExcludeParser: Parser<unknown, string> = createDisjunctionParser([
81
+ bashSpecialCharParser,
82
+ createExactSequenceParser('{'),
83
+ createExactSequenceParser('}'),
84
+ createExactSequenceParser('#'),
85
+ ]);
86
+
87
+ const bashUnquotedWordStartCharParser: Parser<string, string> = promiseCompose(
88
+ createTupleParser([
89
+ createNegativeLookaheadParser(bashWordStartExcludeParser),
90
+ createElementParser<string>(),
91
+ ]),
92
+ ([, ch]) => ch,
93
+ );
94
+
95
+ const bashUnquotedWordContinueCharParser: Parser<string, string> = promiseCompose(
96
+ createTupleParser([
97
+ createNegativeLookaheadParser(bashSpecialCharParser),
98
+ createElementParser<string>(),
99
+ ]),
100
+ ([, ch]) => ch,
52
101
  );
53
102
 
54
- // Word characters (unquoted, no special chars)
55
- // Note: {} are excluded so brace groups are parsed correctly
56
- // # is excluded from the first character (starts a comment) but allowed mid-word
57
103
  const bashUnquotedWordCharsParser: Parser<string, string> = promiseCompose(
58
- createRegExpParser(/[^\s\n|&;<>(){}$`"'\\#][^\s\n|&;<>(){}$`"'\\]*/),
59
- match => match[0],
104
+ createTupleParser([
105
+ bashUnquotedWordStartCharParser,
106
+ createArrayParser(bashUnquotedWordContinueCharParser),
107
+ ]),
108
+ ([first, rest]) => first + rest.join(''),
60
109
  );
61
110
 
111
+ // Consume characters until a given terminator, returning the accumulated string
112
+ function createUntilCharParser(terminator: string): Parser<string, string> {
113
+ return promiseCompose(
114
+ createArrayParser(promiseCompose(
115
+ createTupleParser([
116
+ createNegativeLookaheadParser(createExactSequenceParser(terminator)),
117
+ createElementParser<string>(),
118
+ ]),
119
+ ([, ch]) => ch,
120
+ )),
121
+ chars => chars.join(''),
122
+ );
123
+ }
124
+
62
125
  // Single quoted string: '...'
63
126
  const bashSingleQuotedParser: Parser<BashWordPartSingleQuoted, string> = createObjectParser({
64
127
  type: 'singleQuoted' as const,
65
128
  _open: createExactSequenceParser("'"),
66
- value: promiseCompose(
67
- createRegExpParser(/[^']*/),
68
- match => match[0],
69
- ),
129
+ value: createUntilCharParser("'"),
70
130
  _close: createExactSequenceParser("'"),
71
131
  });
72
132
 
73
- // Variable name
74
- const bashVariableNameParser: Parser<string, string> = promiseCompose(
75
- createRegExpParser(/[a-zA-Z_][a-zA-Z0-9_]*|[0-9]+|[@*#?$!-]/),
76
- match => match[0],
133
+ // Variable name: identifiers, positional params ($0, $1...), or special params ($@, $*, $#, $?, $$, $!, $-)
134
+ const bashSpecialParams = new Set(['@', '*', '#', '?', '$', '!', '-']);
135
+
136
+ const bashIdentifierParser: Parser<string, string> = promiseCompose(
137
+ createTupleParser([
138
+ createPredicateElementParser<string>(isIdentStart),
139
+ createArrayParser(createPredicateElementParser<string>(isIdentChar)),
140
+ ]),
141
+ ([first, rest]) => first + rest.join(''),
142
+ );
143
+
144
+ const bashDigitsParser: Parser<string, string> = promiseCompose(
145
+ createNonEmptyArrayParser(createPredicateElementParser<string>(isDigit)),
146
+ chars => chars.join(''),
147
+ );
148
+
149
+ const bashSpecialParamParser: Parser<string, string> = createPredicateElementParser<string>(
150
+ ch => bashSpecialParams.has(ch),
77
151
  );
78
152
 
153
+ const bashVariableNameParser: Parser<string, string> = createDisjunctionParser([
154
+ bashIdentifierParser,
155
+ bashDigitsParser,
156
+ bashSpecialParamParser,
157
+ ]);
158
+
79
159
  // Simple variable: $var
80
160
  const bashSimpleVariableParser: Parser<BashWordPartVariable, string> = createObjectParser({
81
161
  type: 'variable' as const,
@@ -101,49 +181,147 @@ const bashBacktickSubstitutionParser: Parser<BashWordPartBacktickSubstitution, s
101
181
  _close: createExactSequenceParser('`'),
102
182
  });
103
183
 
184
+ // Word characters for use inside ${...} operands (} excluded from continuation to not consume the closing brace)
185
+ const bashBracedVarContinueExcludeParser: Parser<unknown, string> = createDisjunctionParser([
186
+ bashSpecialCharParser,
187
+ createExactSequenceParser('{'),
188
+ createExactSequenceParser('}'),
189
+ ]);
190
+
191
+ const bashBracedVarUnquotedWordCharsParser: Parser<string, string> = promiseCompose(
192
+ createTupleParser([
193
+ bashUnquotedWordStartCharParser,
194
+ createArrayParser(promiseCompose(
195
+ createTupleParser([
196
+ createNegativeLookaheadParser(bashBracedVarContinueExcludeParser),
197
+ createElementParser<string>(),
198
+ ]),
199
+ ([, ch]) => ch,
200
+ )),
201
+ ]),
202
+ ([first, rest]) => first + rest.join(''),
203
+ );
204
+
205
+ const bashBracedVarLiteralWordPartParser: Parser<BashWordPartLiteral, string> = createObjectParser({
206
+ type: 'literal' as const,
207
+ value: bashBracedVarUnquotedWordCharsParser,
208
+ });
209
+
104
210
  // Braced variable expansion: ${VAR} or ${VAR:-default}
105
211
  const bashBracedVariableParser: Parser<BashWordPartVariableBraced, string> = createObjectParser({
106
212
  type: 'variableBraced' as const,
107
213
  _open: createExactSequenceParser('${'),
108
214
  name: bashVariableNameParser,
109
- operator: createOptionalParser(promiseCompose(
110
- createRegExpParser(/:-|:=|:\+|:\?|-|=|\+|\?|##|#|%%|%/),
111
- match => match[0],
112
- )),
113
- operand: createOptionalParser(createParserAccessorParser(() => bashWordParser)),
215
+ operator: createOptionalParser(createDisjunctionParser([
216
+ promiseCompose(createExactSequenceParser(':-'), () => ':-'),
217
+ promiseCompose(createExactSequenceParser(':='), () => ':='),
218
+ promiseCompose(createExactSequenceParser(':+'), () => ':+'),
219
+ promiseCompose(createExactSequenceParser(':?'), () => ':?'),
220
+ promiseCompose(createExactSequenceParser('##'), () => '##'),
221
+ promiseCompose(createExactSequenceParser('%%'), () => '%%'),
222
+ promiseCompose(createExactSequenceParser('-'), () => '-'),
223
+ promiseCompose(createExactSequenceParser('='), () => '='),
224
+ promiseCompose(createExactSequenceParser('+'), () => '+'),
225
+ promiseCompose(createExactSequenceParser('?'), () => '?'),
226
+ promiseCompose(createExactSequenceParser('#'), () => '#'),
227
+ promiseCompose(createExactSequenceParser('%'), () => '%'),
228
+ ])),
229
+ operand: createOptionalParser(createParserAccessorParser(() => bashBracedVarWordParser)),
114
230
  _close: createExactSequenceParser('}'),
115
231
  });
116
232
 
117
- // Arithmetic expansion: $((expression))
233
+ // Arithmetic expansion: $((expression)) - handles nested parentheses
234
+ const bashArithmeticExpressionParser: Parser<string, string> = async (parserContext) => {
235
+ let result = '';
236
+ let depth = 0;
237
+ for (;;) {
238
+ const ch = await parserContext.peek(0);
239
+ if (ch === undefined) {
240
+ break;
241
+ }
242
+
243
+ if (ch === '(') {
244
+ depth++;
245
+ result += ch;
246
+ parserContext.skip(1);
247
+ continue;
248
+ }
249
+
250
+ if (ch === ')') {
251
+ if (depth > 0) {
252
+ depth--;
253
+ result += ch;
254
+ parserContext.skip(1);
255
+ continue;
256
+ }
257
+
258
+ // At depth 0, a ')' means we've hit the closing '))' of $((
259
+ break;
260
+ }
261
+
262
+ result += ch;
263
+ parserContext.skip(1);
264
+ }
265
+
266
+ return result;
267
+ };
268
+
118
269
  const bashArithmeticExpansionParser: Parser<BashWordPartArithmeticExpansion, string> = createObjectParser({
119
270
  type: 'arithmeticExpansion' as const,
120
271
  _open: createExactSequenceParser('$(('),
121
- expression: promiseCompose(
122
- createRegExpParser(/(?:[^)]|\)(?!\)))*/),
123
- match => match[0],
124
- ),
272
+ expression: bashArithmeticExpressionParser,
125
273
  _close: createExactSequenceParser('))'),
126
274
  });
127
275
 
128
- // ANSI-C quoting: $'...'
276
+ // ANSI-C quoting: $'...' - content can include \' escapes
277
+ // Each unit is either a backslash-escape pair or a non-quote character
278
+ const bashAnsiCContentUnitParser: Parser<string, string> = createDisjunctionParser([
279
+ // Backslash escape: \x (any char after backslash)
280
+ promiseCompose(
281
+ createTupleParser([
282
+ createExactSequenceParser('\\'),
283
+ createElementParser<string>(),
284
+ ]),
285
+ ([bs, ch]) => bs + ch,
286
+ ),
287
+ // Any character that isn't ' (and isn't \ which is handled above)
288
+ promiseCompose(
289
+ createTupleParser([
290
+ createNegativeLookaheadParser(createExactSequenceParser("'")),
291
+ createElementParser<string>(),
292
+ ]),
293
+ ([, ch]) => ch,
294
+ ),
295
+ ]);
296
+
297
+ const bashAnsiCContentParser: Parser<string, string> = promiseCompose(
298
+ createArrayParser(bashAnsiCContentUnitParser),
299
+ parts => parts.join(''),
300
+ );
301
+
129
302
  const bashAnsiCQuotedParser: Parser<BashWordPartSingleQuoted, string> = createObjectParser({
130
303
  type: 'singleQuoted' as const,
131
304
  _prefix: createExactSequenceParser('$'),
132
305
  _open: createExactSequenceParser("'"),
133
- value: promiseCompose(
134
- createRegExpParser(/(?:[^'\\]|\\.)*/),
135
- match => match[0],
136
- ),
306
+ value: bashAnsiCContentParser,
137
307
  _close: createExactSequenceParser("'"),
138
308
  });
139
309
 
140
310
  // Process substitution: <(cmd) or >(cmd)
311
+ const bashProcessSubstitutionDirectionParser: Parser<'<' | '>', string> = promiseCompose(
312
+ createTupleParser([
313
+ createDisjunctionParser([
314
+ createExactSequenceParser('<' as const),
315
+ createExactSequenceParser('>' as const),
316
+ ]),
317
+ createLookaheadParser(createExactSequenceParser('(')),
318
+ ]),
319
+ ([dir]) => dir as '<' | '>',
320
+ );
321
+
141
322
  const bashProcessSubstitutionParser: Parser<BashWordPartProcessSubstitution, string> = createObjectParser({
142
323
  type: 'processSubstitution' as const,
143
- direction: promiseCompose(
144
- createRegExpParser(/[<>](?=\()/),
145
- match => match[0] as '<' | '>',
146
- ),
324
+ direction: bashProcessSubstitutionDirectionParser,
147
325
  _open: createExactSequenceParser('('),
148
326
  _ws1: bashOptionalInlineWhitespaceParser,
149
327
  command: createParserAccessorParser(() => bashCommandParser),
@@ -151,6 +329,55 @@ const bashProcessSubstitutionParser: Parser<BashWordPartProcessSubstitution, str
151
329
  _close: createExactSequenceParser(')'),
152
330
  });
153
331
 
332
+ // Escape sequences in double quotes: \\ \$ \` \" \! \newline
333
+ const bashDoubleQuotedEscapeCharParser: Parser<string, string> = createDisjunctionParser([
334
+ createExactSequenceParser('\\'),
335
+ createExactSequenceParser('$'),
336
+ createExactSequenceParser('`'),
337
+ createExactSequenceParser('"'),
338
+ createExactSequenceParser('!'),
339
+ createExactSequenceParser('\n'),
340
+ ]);
341
+
342
+ const bashDoubleQuotedEscapeParser: Parser<BashWordPartLiteral, string> = promiseCompose(
343
+ createTupleParser([
344
+ createExactSequenceParser('\\'),
345
+ bashDoubleQuotedEscapeCharParser,
346
+ ]),
347
+ ([, ch]) => ({ type: 'literal' as const, value: ch }),
348
+ );
349
+
350
+ // Literal text inside double quotes (no special chars)
351
+ const bashDoubleQuotedLiteralCharParser: Parser<string, string> = promiseCompose(
352
+ createTupleParser([
353
+ createNegativeLookaheadParser(createDisjunctionParser([
354
+ createExactSequenceParser('$'),
355
+ createExactSequenceParser('`'),
356
+ createExactSequenceParser('"'),
357
+ createExactSequenceParser('\\'),
358
+ ])),
359
+ createElementParser<string>(),
360
+ ]),
361
+ ([, ch]) => ch,
362
+ );
363
+
364
+ const bashDoubleQuotedLiteralParser: Parser<BashWordPartLiteral, string> = promiseCompose(
365
+ createNonEmptyArrayParser(bashDoubleQuotedLiteralCharParser),
366
+ chars => ({ type: 'literal' as const, value: chars.join('') }),
367
+ );
368
+
369
+ // Bare $ not followed by a valid expansion start
370
+ const bashBareDollarParser: Parser<BashWordPartLiteral, string> = promiseCompose(
371
+ createExactSequenceParser('$'),
372
+ () => ({ type: 'literal' as const, value: '$' }),
373
+ );
374
+
375
+ // Bare \ not followed by a recognized escape character
376
+ const bashBareBackslashParser: Parser<BashWordPartLiteral, string> = promiseCompose(
377
+ createExactSequenceParser('\\'),
378
+ () => ({ type: 'literal' as const, value: '\\' }),
379
+ );
380
+
154
381
  // Double quoted string parts (inside "...")
155
382
  const bashDoubleQuotedPartParser: Parser<BashWordPart, string> = createDisjunctionParser([
156
383
  bashBracedVariableParser,
@@ -158,38 +385,10 @@ const bashDoubleQuotedPartParser: Parser<BashWordPart, string> = createDisjuncti
158
385
  bashSimpleVariableParser,
159
386
  bashCommandSubstitutionParser,
160
387
  bashBacktickSubstitutionParser,
161
- // Escape sequences in double quotes
162
- promiseCompose(
163
- createRegExpParser(/\\[\\$`"!\n]/),
164
- match => ({
165
- type: 'literal' as const,
166
- value: match[0].slice(1),
167
- }),
168
- ),
169
- // Literal text (no special chars)
170
- promiseCompose(
171
- createRegExpParser(/[^$`"\\]+/),
172
- match => ({
173
- type: 'literal' as const,
174
- value: match[0],
175
- }),
176
- ),
177
- // Bare $ not followed by a valid expansion start (e.g. $" at end of double-quoted string)
178
- promiseCompose(
179
- createRegExpParser(/\$/),
180
- () => ({
181
- type: 'literal' as const,
182
- value: '$',
183
- }),
184
- ),
185
- // Bare \ not followed by a recognized escape character (treated as literal backslash in bash)
186
- promiseCompose(
187
- createRegExpParser(/\\/),
188
- () => ({
189
- type: 'literal' as const,
190
- value: '\\',
191
- }),
192
- ),
388
+ bashDoubleQuotedEscapeParser,
389
+ bashDoubleQuotedLiteralParser,
390
+ bashBareDollarParser,
391
+ bashBareBackslashParser,
193
392
  ]);
194
393
 
195
394
  // Double quoted string: "..."
@@ -206,16 +405,63 @@ const bashLiteralWordPartParser: Parser<BashWordPartLiteral, string> = createObj
206
405
  value: bashUnquotedWordCharsParser,
207
406
  });
208
407
 
209
- // Escape sequence outside quotes
210
- const bashEscapeParser: Parser<BashWordPartLiteral, string> = promiseCompose(
211
- createRegExpParser(/\\./),
212
- match => ({
408
+ // Bare {} treated as a literal word (e.g., find -exec cmd {} \;)
409
+ const bashBraceWordPartParser: Parser<BashWordPartLiteral, string> = promiseCompose(
410
+ createExactSequenceParser('{}'),
411
+ () => ({
412
+ type: 'literal' as const,
413
+ value: '{}',
414
+ }),
415
+ );
416
+
417
+ // Bare { treated as a literal word part (e.g., echo {, echo {.})
418
+ // Note: } is NOT included here because it would break brace group closing
419
+ const bashOpenBraceWordPartParser: Parser<BashWordPartLiteral, string> = promiseCompose(
420
+ createExactSequenceParser('{'),
421
+ () => ({
213
422
  type: 'literal' as const,
214
- value: match[0].slice(1),
423
+ value: '{',
215
424
  }),
216
425
  );
217
426
 
218
- // Word part (any part of a word)
427
+ // Bare } treated as a literal word part (e.g., echo }, echo }hello)
428
+ const bashCloseBraceWordPartParser: Parser<BashWordPartLiteral, string> = promiseCompose(
429
+ createExactSequenceParser('}'),
430
+ () => ({
431
+ type: 'literal' as const,
432
+ value: '}',
433
+ }),
434
+ );
435
+
436
+ // Escape sequence outside quotes: backslash followed by any character
437
+ const bashEscapeParser: Parser<BashWordPartLiteral, string> = promiseCompose(
438
+ createTupleParser([
439
+ createExactSequenceParser('\\'),
440
+ createElementParser<string>(),
441
+ ]),
442
+ ([, ch]) => ({ type: 'literal' as const, value: ch }),
443
+ );
444
+
445
+ // Word part for use inside ${...} operands (uses literal parser that excludes } from continuation)
446
+ const bashBracedVarWordPartParser: Parser<BashWordPart, string> = createDisjunctionParser([
447
+ bashAnsiCQuotedParser,
448
+ bashSingleQuotedParser,
449
+ bashDoubleQuotedParser,
450
+ bashBracedVariableParser,
451
+ bashArithmeticExpansionParser,
452
+ bashCommandSubstitutionParser,
453
+ bashBacktickSubstitutionParser,
454
+ bashSimpleVariableParser,
455
+ bashEscapeParser,
456
+ bashBracedVarLiteralWordPartParser,
457
+ bashBareDollarParser,
458
+ ]);
459
+
460
+ const bashBracedVarWordParser: Parser<BashWord, string> = createObjectParser({
461
+ parts: createNonEmptyArrayParser(bashBracedVarWordPartParser),
462
+ });
463
+
464
+ // Word part (any part of a word, } excluded from first position so brace groups work)
219
465
  const bashWordPartParser: Parser<BashWordPart, string> = createDisjunctionParser([
220
466
  bashAnsiCQuotedParser,
221
467
  bashSingleQuotedParser,
@@ -227,15 +473,29 @@ const bashWordPartParser: Parser<BashWordPart, string> = createDisjunctionParser
227
473
  bashSimpleVariableParser,
228
474
  bashProcessSubstitutionParser,
229
475
  bashEscapeParser,
476
+ bashBraceWordPartParser,
477
+ bashOpenBraceWordPartParser,
230
478
  bashLiteralWordPartParser,
231
- // Bare $ not followed by a valid expansion start
232
- promiseCompose(
233
- createRegExpParser(/\$/),
234
- () => ({
235
- type: 'literal' as const,
236
- value: '$',
237
- }),
238
- ),
479
+ bashBareDollarParser,
480
+ ]);
481
+
482
+ // Word part including } as a starter (for argument positions where } is not reserved)
483
+ const bashArgWordPartParser: Parser<BashWordPart, string> = createDisjunctionParser([
484
+ bashAnsiCQuotedParser,
485
+ bashSingleQuotedParser,
486
+ bashDoubleQuotedParser,
487
+ bashBracedVariableParser,
488
+ bashArithmeticExpansionParser,
489
+ bashCommandSubstitutionParser,
490
+ bashBacktickSubstitutionParser,
491
+ bashSimpleVariableParser,
492
+ bashProcessSubstitutionParser,
493
+ bashEscapeParser,
494
+ bashBraceWordPartParser,
495
+ bashOpenBraceWordPartParser,
496
+ bashCloseBraceWordPartParser,
497
+ bashLiteralWordPartParser,
498
+ bashBareDollarParser,
239
499
  ]);
240
500
 
241
501
  // Word (sequence of word parts)
@@ -243,14 +503,25 @@ export const bashWordParser: Parser<BashWord, string> = createObjectParser({
243
503
  parts: createNonEmptyArrayParser(bashWordPartParser),
244
504
  });
245
505
 
506
+ // Argument word (allows } as first character)
507
+ const bashArgWordParser: Parser<BashWord, string> = createObjectParser({
508
+ parts: createNonEmptyArrayParser(bashArgWordPartParser),
509
+ });
510
+
246
511
  setParserName(bashWordParser, 'bashWordParser');
247
512
 
513
+ // Assignment name: identifier followed by =
514
+ const bashAssignmentNameParser: Parser<string, string> = promiseCompose(
515
+ createTupleParser([
516
+ bashIdentifierParser,
517
+ createExactSequenceParser('='),
518
+ ]),
519
+ ([name]) => name,
520
+ );
521
+
248
522
  // Assignment: NAME=value or NAME=
249
523
  const bashAssignmentParser: Parser<BashAssignment, string> = createObjectParser({
250
- name: promiseCompose(
251
- createRegExpParser(/[a-zA-Z_][a-zA-Z0-9_]*=/),
252
- match => match[0].slice(0, -1),
253
- ),
524
+ name: bashAssignmentNameParser,
254
525
  value: createOptionalParser(bashWordParser),
255
526
  });
256
527
 
@@ -266,12 +537,15 @@ const bashRedirectOperatorParser: Parser<BashRedirect['operator'], string> = cre
266
537
  promiseCompose(createExactSequenceParser('<'), () => '<' as const),
267
538
  ]);
268
539
 
540
+ // File descriptor number
541
+ const bashFdParser: Parser<number, string> = promiseCompose(
542
+ bashDigitsParser,
543
+ digits => Number.parseInt(digits, 10),
544
+ );
545
+
269
546
  // Redirect: [n]op word
270
547
  const bashRedirectParser: Parser<BashRedirect, string> = createObjectParser({
271
- fd: createOptionalParser(promiseCompose(
272
- createRegExpParser(/[0-9]+/),
273
- match => Number.parseInt(match[0], 10),
274
- )),
548
+ fd: createOptionalParser(bashFdParser),
275
549
  operator: bashRedirectOperatorParser,
276
550
  _ws: bashOptionalInlineWhitespaceParser,
277
551
  target: bashWordParser,
@@ -286,6 +560,15 @@ const bashWordWithWhitespaceParser: Parser<BashWord, string> = promiseCompose(
286
560
  ([word]) => word,
287
561
  );
288
562
 
563
+ // Arg word (allows }) with optional trailing whitespace
564
+ const bashArgWordWithWhitespaceParser: Parser<BashWord, string> = promiseCompose(
565
+ createTupleParser([
566
+ bashArgWordParser,
567
+ bashOptionalInlineWhitespaceParser,
568
+ ]),
569
+ ([word]) => word,
570
+ );
571
+
289
572
  // Redirect with optional trailing whitespace
290
573
  const bashRedirectWithWhitespaceParser: Parser<BashRedirect, string> = promiseCompose(
291
574
  createTupleParser([
@@ -295,49 +578,54 @@ const bashRedirectWithWhitespaceParser: Parser<BashRedirect, string> = promiseCo
295
578
  ([redirect]) => redirect,
296
579
  );
297
580
 
298
- // Word or redirect - for interleaved parsing in simple commands
299
- const bashWordOrRedirectParser: Parser<{ type: 'word'; word: BashWord } | { type: 'redirect'; redirect: BashRedirect }, string> = createDisjunctionParser([
581
+ // Word or redirect for argument position (} allowed)
582
+ const bashArgWordOrRedirectParser: Parser<{ type: 'word'; word: BashWord } | { type: 'redirect'; redirect: BashRedirect }, string> = createDisjunctionParser([
300
583
  createObjectParser({ type: 'redirect' as const, redirect: bashRedirectWithWhitespaceParser }),
301
- createObjectParser({ type: 'word' as const, word: bashWordWithWhitespaceParser }),
584
+ createObjectParser({ type: 'word' as const, word: bashArgWordWithWhitespaceParser }),
302
585
  ]);
303
586
 
304
587
  // Simple command: [assignments] [name] [args] [redirects]
305
- export const bashSimpleCommandParser: Parser<BashSimpleCommand, string> = promiseCompose(
306
- createTupleParser([
307
- // Assignments at the start
308
- createArrayParser(promiseCompose(
309
- createTupleParser([
310
- bashAssignmentParser,
311
- bashOptionalInlineWhitespaceParser,
312
- ]),
313
- ([assignment]) => assignment,
314
- )),
315
- // Command name, args, and redirects (interleaved)
316
- createArrayParser(bashWordOrRedirectParser),
317
- ]),
318
- ([assignments, items]) => {
319
- const words: BashWord[] = [];
320
- const redirects: BashRedirect[] = [];
321
-
322
- for (const item of items) {
588
+ export const bashSimpleCommandParser: Parser<BashSimpleCommand, string> = async (parserContext) => {
589
+ // Parse assignments at the start
590
+ const assignmentsParser = createArrayParser(promiseCompose(
591
+ createTupleParser([
592
+ bashAssignmentParser,
593
+ bashOptionalInlineWhitespaceParser,
594
+ ]),
595
+ ([assignment]) => assignment,
596
+ ));
597
+ const assignments = await assignmentsParser(parserContext);
598
+
599
+ // Parse leading redirects before command name
600
+ const leadingRedirectsParser = createArrayParser(bashRedirectWithWhitespaceParser);
601
+ const leadingRedirects = await leadingRedirectsParser(parserContext);
602
+
603
+ // Parse command name (} not allowed here, so brace group closing works)
604
+ const name = await createOptionalParser(bashWordWithWhitespaceParser)(parserContext);
605
+
606
+ // Only parse args if we have a command name
607
+ const args: BashWord[] = [];
608
+ const redirects: BashRedirect[] = [...leadingRedirects];
609
+
610
+ if (name !== undefined) {
611
+ const argItems = await createArrayParser(bashArgWordOrRedirectParser)(parserContext);
612
+ for (const item of argItems) {
323
613
  if (item.type === 'word') {
324
- words.push(item.word);
614
+ args.push(item.word);
325
615
  } else {
326
616
  redirects.push(item.redirect);
327
617
  }
328
618
  }
619
+ }
329
620
 
330
- const [name, ...args] = words;
331
-
332
- return {
333
- type: 'simple' as const,
334
- name,
335
- args,
336
- redirects,
337
- assignments,
338
- };
339
- },
340
- );
621
+ return {
622
+ type: 'simple' as const,
623
+ name,
624
+ args,
625
+ redirects,
626
+ assignments,
627
+ };
628
+ };
341
629
 
342
630
  setParserName(bashSimpleCommandParser, 'bashSimpleCommandParser');
343
631
 
@@ -378,8 +666,11 @@ setParserName(bashCommandUnitParser, 'bashCommandUnitParser');
378
666
 
379
667
  // Single pipe (not ||) - matches | only when not followed by another |
380
668
  const bashSinglePipeParser: Parser<string, string> = promiseCompose(
381
- createRegExpParser(/\|(?!\|)/),
382
- match => match[0],
669
+ createTupleParser([
670
+ createExactSequenceParser('|'),
671
+ createNegativeLookaheadParser(createExactSequenceParser('|')),
672
+ ]),
673
+ () => '|',
383
674
  );
384
675
 
385
676
  // Pipeline: [!] cmd [| cmd]...
@@ -410,13 +701,51 @@ const bashPipelineParser: Parser<BashPipeline, string> = promiseCompose(
410
701
 
411
702
  setParserName(bashPipelineParser, 'bashPipelineParser');
412
703
 
704
+ // Non-newline character
705
+ const bashNonNewlineCharParser: Parser<string, string> = promiseCompose(
706
+ createTupleParser([
707
+ createNegativeLookaheadParser(createExactSequenceParser('\n')),
708
+ createElementParser<string>(),
709
+ ]),
710
+ ([, ch]) => ch,
711
+ );
712
+
713
+ // Comment: # through end of line (not consuming the newline)
714
+ const bashCommentParser: Parser<string, string> = promiseCompose(
715
+ createTupleParser([
716
+ createExactSequenceParser('#'),
717
+ createArrayParser(bashNonNewlineCharParser),
718
+ ]),
719
+ ([hash, chars]) => hash + chars.join(''),
720
+ );
721
+
722
+ // Blank line filler: whitespace, newlines, and comments
723
+ const bashBlankLineFillerParser: Parser<void, string> = promiseCompose(
724
+ createArrayParser(createDisjunctionParser([
725
+ bashInlineWhitespaceUnitParser,
726
+ promiseCompose(createExactSequenceParser('\n'), () => '\n'),
727
+ bashCommentParser,
728
+ ])),
729
+ () => {},
730
+ );
731
+
732
+ // Newline separator: consumes a newline plus any following blank lines, comments, and whitespace
733
+ // This allows multi-line scripts with blank lines and mid-script comments
734
+ const bashNewlineSeparatorParser: Parser<'\n', string> = promiseCompose(
735
+ createTupleParser([
736
+ createExactSequenceParser('\n'),
737
+ bashBlankLineFillerParser,
738
+ ]),
739
+ () => '\n' as const,
740
+ );
741
+
413
742
  // Command list separator
414
743
  const bashListSeparatorParser: Parser<'&&' | '||' | ';' | '&' | '\n', string> = createDisjunctionParser([
415
744
  promiseCompose(createExactSequenceParser('&&'), () => '&&' as const),
416
745
  promiseCompose(createExactSequenceParser('||'), () => '||' as const),
417
746
  promiseCompose(createExactSequenceParser(';'), () => ';' as const),
418
747
  promiseCompose(createExactSequenceParser('&'), () => '&' as const),
419
- promiseCompose(bashNewlineParser, () => '\n' as const),
748
+ bashNewlineSeparatorParser,
420
749
  ]);
421
750
 
422
751
  // Command list: pipeline [sep pipeline]...
@@ -478,19 +807,18 @@ export const bashCommandParser: Parser<BashCommand, string> = bashCommandListPar
478
807
 
479
808
  setParserName(bashCommandParser, 'bashCommandParser');
480
809
 
481
- // Comment: # through end of line
482
- const bashOptionalCommentParser: Parser<string | undefined, string> = createOptionalParser(promiseCompose(
483
- createRegExpParser(/#[^\n]*/),
484
- match => match[0],
485
- ));
810
+ // Trailing whitespace/comments/blank lines at end of script
811
+ const bashTrailingWhitespaceAndCommentsParser: Parser<undefined, string> = promiseCompose(
812
+ bashBlankLineFillerParser,
813
+ () => undefined,
814
+ );
486
815
 
487
816
  // Script parser (handles leading/trailing whitespace and comments)
488
817
  export const bashScriptParser: Parser<BashCommand, string> = promiseCompose(
489
818
  createTupleParser([
490
819
  bashOptionalInlineWhitespaceParser,
491
820
  bashCommandParser,
492
- bashOptionalInlineWhitespaceParser,
493
- bashOptionalCommentParser,
821
+ bashTrailingWhitespaceAndCommentsParser,
494
822
  ]),
495
823
  ([, command]) => command,
496
824
  );