just-bash 0.1.8__py3-none-any.whl → 0.1.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. just_bash/ast/factory.py +3 -1
  2. just_bash/bash.py +28 -6
  3. just_bash/commands/awk/awk.py +112 -7
  4. just_bash/commands/cat/cat.py +5 -1
  5. just_bash/commands/echo/echo.py +33 -1
  6. just_bash/commands/grep/grep.py +30 -1
  7. just_bash/commands/od/od.py +144 -30
  8. just_bash/commands/printf/printf.py +289 -87
  9. just_bash/commands/pwd/pwd.py +32 -2
  10. just_bash/commands/read/read.py +243 -64
  11. just_bash/commands/readlink/readlink.py +3 -9
  12. just_bash/commands/registry.py +24 -0
  13. just_bash/commands/rmdir/__init__.py +5 -0
  14. just_bash/commands/rmdir/rmdir.py +160 -0
  15. just_bash/commands/sed/sed.py +142 -31
  16. just_bash/commands/stat/stat.py +9 -0
  17. just_bash/commands/time/__init__.py +5 -0
  18. just_bash/commands/time/time.py +74 -0
  19. just_bash/commands/touch/touch.py +118 -8
  20. just_bash/commands/whoami/__init__.py +5 -0
  21. just_bash/commands/whoami/whoami.py +18 -0
  22. just_bash/fs/in_memory_fs.py +22 -0
  23. just_bash/fs/overlay_fs.py +14 -0
  24. just_bash/interpreter/__init__.py +1 -1
  25. just_bash/interpreter/builtins/__init__.py +2 -0
  26. just_bash/interpreter/builtins/control.py +4 -8
  27. just_bash/interpreter/builtins/declare.py +321 -24
  28. just_bash/interpreter/builtins/getopts.py +163 -0
  29. just_bash/interpreter/builtins/let.py +2 -2
  30. just_bash/interpreter/builtins/local.py +71 -5
  31. just_bash/interpreter/builtins/misc.py +22 -6
  32. just_bash/interpreter/builtins/readonly.py +38 -10
  33. just_bash/interpreter/builtins/set.py +58 -8
  34. just_bash/interpreter/builtins/test.py +136 -19
  35. just_bash/interpreter/builtins/unset.py +62 -10
  36. just_bash/interpreter/conditionals.py +29 -4
  37. just_bash/interpreter/control_flow.py +61 -17
  38. just_bash/interpreter/expansion.py +1647 -104
  39. just_bash/interpreter/interpreter.py +424 -70
  40. just_bash/interpreter/types.py +263 -2
  41. just_bash/parser/__init__.py +2 -0
  42. just_bash/parser/lexer.py +295 -26
  43. just_bash/parser/parser.py +523 -64
  44. just_bash/types.py +11 -0
  45. {just_bash-0.1.8.dist-info → just_bash-0.1.10.dist-info}/METADATA +40 -1
  46. {just_bash-0.1.8.dist-info → just_bash-0.1.10.dist-info}/RECORD +47 -40
  47. {just_bash-0.1.8.dist-info → just_bash-0.1.10.dist-info}/WHEEL +0 -0
@@ -42,6 +42,7 @@ from ..ast import (
42
42
  IfNode,
43
43
  IfClause,
44
44
  ForNode,
45
+ CStyleForNode,
45
46
  WhileNode,
46
47
  UntilNode,
47
48
  CaseNode,
@@ -110,6 +111,124 @@ class ParseException(Exception):
110
111
  super().__init__(f"{message} at line {line}, column {column}")
111
112
 
112
113
 
114
+ def _decode_ansi_c_escapes(s: str) -> str:
115
+ """Decode ANSI-C escape sequences from $'...' strings.
116
+
117
+ Supports: \\a \\b \\e \\E \\f \\n \\r \\t \\v \\\\ \\' \\" \\0NNN \\NNN \\xHH \\uNNNN \\UNNNNNNNN
118
+ """
119
+ result: list[str] = []
120
+ i = 0
121
+ while i < len(s):
122
+ if s[i] == "\\" and i + 1 < len(s):
123
+ c = s[i + 1]
124
+ if c == "a":
125
+ result.append("\a")
126
+ i += 2
127
+ elif c == "b":
128
+ result.append("\b")
129
+ i += 2
130
+ elif c in ("e", "E"):
131
+ result.append("\x1b")
132
+ i += 2
133
+ elif c == "f":
134
+ result.append("\f")
135
+ i += 2
136
+ elif c == "n":
137
+ result.append("\n")
138
+ i += 2
139
+ elif c == "r":
140
+ result.append("\r")
141
+ i += 2
142
+ elif c == "t":
143
+ result.append("\t")
144
+ i += 2
145
+ elif c == "v":
146
+ result.append("\v")
147
+ i += 2
148
+ elif c == "\\":
149
+ result.append("\\")
150
+ i += 2
151
+ elif c == "'":
152
+ result.append("'")
153
+ i += 2
154
+ elif c == '"':
155
+ result.append('"')
156
+ i += 2
157
+ elif c == "x":
158
+ # Hex escape: \xHH (1-2 hex digits)
159
+ j = i + 2
160
+ while j < len(s) and j < i + 4 and s[j] in "0123456789abcdefABCDEF":
161
+ j += 1
162
+ if j > i + 2:
163
+ val = int(s[i + 2 : j], 16)
164
+ if val == 0:
165
+ pass # NUL bytes are stripped
166
+ else:
167
+ result.append(chr(val))
168
+ else:
169
+ result.append("\\x")
170
+ i = j
171
+ elif c == "u":
172
+ # Unicode escape: \uNNNN (1-4 hex digits)
173
+ j = i + 2
174
+ while j < len(s) and j < i + 6 and s[j] in "0123456789abcdefABCDEF":
175
+ j += 1
176
+ if j > i + 2:
177
+ val = int(s[i + 2 : j], 16)
178
+ if val == 0:
179
+ pass # NUL bytes are stripped
180
+ else:
181
+ result.append(chr(val))
182
+ else:
183
+ result.append("\\u")
184
+ i = j
185
+ elif c == "U":
186
+ # Unicode escape: \UNNNNNNNN (1-8 hex digits)
187
+ j = i + 2
188
+ while j < len(s) and j < i + 10 and s[j] in "0123456789abcdefABCDEF":
189
+ j += 1
190
+ if j > i + 2:
191
+ val = int(s[i + 2 : j], 16)
192
+ if val == 0:
193
+ pass # NUL bytes are stripped
194
+ else:
195
+ result.append(chr(val))
196
+ else:
197
+ result.append("\\U")
198
+ i = j
199
+ elif c == "0":
200
+ # Octal escape: \0NNN (0-3 octal digits after the 0)
201
+ j = i + 2
202
+ while j < len(s) and j < i + 5 and s[j] in "01234567":
203
+ j += 1
204
+ val = int(s[i + 1 : j], 8) if j > i + 1 else 0
205
+ if val == 0:
206
+ pass # NUL bytes are stripped
207
+ else:
208
+ result.append(chr(val))
209
+ i = j
210
+ elif c in "1234567":
211
+ # Octal escape: \NNN (1-3 octal digits)
212
+ j = i + 1
213
+ while j < len(s) and j < i + 4 and s[j] in "01234567":
214
+ j += 1
215
+ val = int(s[i + 1 : j], 8)
216
+ if val == 0:
217
+ pass # NUL bytes are stripped
218
+ else:
219
+ result.append(chr(val))
220
+ i = j
221
+ else:
222
+ # Unknown escape: keep backslash + char
223
+ result.append("\\")
224
+ result.append(c)
225
+ i += 2
226
+ else:
227
+ result.append(s[i])
228
+ i += 1
229
+ return "".join(result)
230
+
231
+
113
232
  class Parser:
114
233
  """Parser class - transforms tokens into AST."""
115
234
 
@@ -118,6 +237,7 @@ class Parser:
118
237
  self.pos = 0
119
238
  self.pending_heredocs: list[dict] = []
120
239
  self.parse_iterations = 0
240
+ self._consumed_heredoc_positions: set[int] = set()
121
241
 
122
242
  def _check_iteration_limit(self) -> None:
123
243
  """Check parse iteration limit to prevent infinite loops."""
@@ -153,6 +273,7 @@ class Parser:
153
273
  self.pos = 0
154
274
  self.pending_heredocs = []
155
275
  self.parse_iterations = 0
276
+ self._consumed_heredoc_positions = set()
156
277
  return self._parse_script()
157
278
 
158
279
  def parse_tokens(self, tokens: list[Token]) -> ScriptNode:
@@ -161,6 +282,7 @@ class Parser:
161
282
  self.pos = 0
162
283
  self.pending_heredocs = []
163
284
  self.parse_iterations = 0
285
+ self._consumed_heredoc_positions = set()
164
286
  return self._parse_script()
165
287
 
166
288
  # =========================================================================
@@ -206,13 +328,21 @@ class Parser:
206
328
  return ParseException(message, token.line, token.column, token)
207
329
 
208
330
  def _skip_newlines(self) -> None:
209
- """Skip newlines and comments."""
210
- while self._check(TokenType.NEWLINE, TokenType.COMMENT):
331
+ """Skip newlines, comments, and consumed heredoc content."""
332
+ while True:
211
333
  if self._check(TokenType.NEWLINE):
212
334
  self._advance()
213
335
  self._process_heredocs()
214
- else:
336
+ continue
337
+ if self._check(TokenType.COMMENT):
338
+ self._advance()
339
+ continue
340
+ # Skip heredoc content tokens already consumed by scan-ahead resolution
341
+ if (self._check(TokenType.HEREDOC_CONTENT)
342
+ and self.pos in self._consumed_heredoc_positions):
215
343
  self._advance()
344
+ continue
345
+ break
216
346
 
217
347
  def _skip_separators(self) -> None:
218
348
  """Skip statement separators (newlines, semicolons, comments)."""
@@ -224,6 +354,11 @@ class Parser:
224
354
  if self._check(TokenType.SEMICOLON, TokenType.COMMENT):
225
355
  self._advance()
226
356
  continue
357
+ # Skip heredoc content tokens already consumed by scan-ahead resolution
358
+ if (self._check(TokenType.HEREDOC_CONTENT)
359
+ and self.pos in self._consumed_heredoc_positions):
360
+ self._advance()
361
+ continue
227
362
  break
228
363
 
229
364
  def _is_statement_end(self) -> bool:
@@ -299,30 +434,36 @@ class Parser:
299
434
  def _resolve_pending_heredocs(
300
435
  self, redirections: list[RedirectionNode]
301
436
  ) -> list[RedirectionNode]:
302
- """Resolve pending heredocs by reading their content and updating redirections."""
437
+ """Resolve pending heredocs by reading their content and updating redirections.
438
+
439
+ Scans ahead in the token stream without moving the parser position,
440
+ so this works even when called before the full pipeline is parsed
441
+ (e.g., for 'cat << EOF | grep hello').
442
+ """
303
443
  if not self.pending_heredocs:
304
444
  return redirections
305
445
 
306
- # We need to skip past the current line to find heredoc content
307
- # Save position and scan for heredoc content
308
- saved_pos = self.pos
309
-
310
- # Skip to find HEREDOC_CONTENT tokens (they come after newline)
311
- while self.pos < len(self.tokens):
312
- token = self.tokens[self.pos]
313
- if token.type == TokenType.HEREDOC_CONTENT:
314
- break
315
- elif token.type == TokenType.NEWLINE:
316
- self.pos += 1
317
- else:
318
- break
446
+ # Scan ahead to find HEREDOC_CONTENT tokens without moving self.pos
447
+ # They may be past pipes, semicolons, etc. on the same line
448
+ # Skip positions already consumed by earlier resolutions
449
+ content_tokens = []
450
+ content_positions = []
451
+ scan_pos = self.pos
452
+ while scan_pos < len(self.tokens) and len(content_tokens) < len(self.pending_heredocs):
453
+ if (self.tokens[scan_pos].type == TokenType.HEREDOC_CONTENT
454
+ and scan_pos not in self._consumed_heredoc_positions):
455
+ content_tokens.append(self.tokens[scan_pos])
456
+ content_positions.append(scan_pos)
457
+ scan_pos += 1
458
+
459
+ # Mark these positions as consumed
460
+ self._consumed_heredoc_positions.update(content_positions)
319
461
 
320
462
  # Process each pending heredoc
321
463
  new_redirections = list(redirections)
322
- heredoc_idx = 0
323
- for heredoc_info in self.pending_heredocs:
324
- if self._check(TokenType.HEREDOC_CONTENT):
325
- content_token = self._advance()
464
+ for idx, heredoc_info in enumerate(self.pending_heredocs):
465
+ if idx < len(content_tokens):
466
+ content_token = content_tokens[idx]
326
467
  # If delimiter was quoted, treat content as literal (no expansion)
327
468
  content_word = self._parse_word_from_string(
328
469
  content_token.value,
@@ -347,7 +488,6 @@ class Parser:
347
488
  redir.operator, heredoc_node, redir.fd
348
489
  )
349
490
  break
350
- heredoc_idx += 1
351
491
 
352
492
  self.pending_heredocs = []
353
493
  return new_redirections
@@ -363,6 +503,13 @@ class Parser:
363
503
 
364
504
  while not self._check(TokenType.EOF):
365
505
  self._check_iteration_limit()
506
+
507
+ # Skip heredoc content tokens already consumed by scan-ahead resolution
508
+ if (self._check(TokenType.HEREDOC_CONTENT)
509
+ and self.pos in self._consumed_heredoc_positions):
510
+ self._advance()
511
+ continue
512
+
366
513
  stmt = self._parse_statement()
367
514
  if stmt:
368
515
  statements.append(stmt)
@@ -473,6 +620,7 @@ class Parser:
473
620
  name: Optional[WordNode] = None
474
621
  args: list[WordNode] = []
475
622
  redirections: list[RedirectionNode] = []
623
+ start_line = self._current().line
476
624
 
477
625
  # Parse leading redirections and assignments
478
626
  while True:
@@ -578,7 +726,7 @@ class Parser:
578
726
  if self.pending_heredocs:
579
727
  redirections = self._resolve_pending_heredocs(redirections)
580
728
 
581
- return AST.simple_command(name, args, assignments, redirections)
729
+ return AST.simple_command(name, args, assignments, redirections, line=start_line)
582
730
 
583
731
  def _parse_assignment(self) -> AssignmentNode:
584
732
  """Parse a variable assignment."""
@@ -605,12 +753,12 @@ class Parser:
605
753
  if value_str.startswith("("):
606
754
  # TODO: Parse array assignment
607
755
  # For now, treat as simple value
608
- value_word = self._parse_word_from_string(value_str, quoted=False)
756
+ value_word = self._parse_word_from_string(value_str, quoted=False, in_assignment=True)
609
757
  return AST.assignment(name, value_word, append)
610
758
 
611
759
  # Simple value
612
760
  if value_str:
613
- value_word = self._parse_word_from_string(value_str, quoted=False)
761
+ value_word = self._parse_word_from_string(value_str, quoted=False, in_assignment=True)
614
762
  else:
615
763
  value_word = None
616
764
 
@@ -732,6 +880,16 @@ class Parser:
732
880
  delimiter = delimiter[1:-1]
733
881
  quoted = True
734
882
 
883
+ # Check for embedded quotes (e.g., E'O'F or E"O"F)
884
+ if not quoted and ("'" in delimiter or '"' in delimiter):
885
+ delimiter = delimiter.replace("'", "").replace('"', "")
886
+ quoted = True
887
+
888
+ # Check for backslash-escaped delimiter (e.g., \EOF)
889
+ # The lexer removes backslashes, so the token span is longer than the value
890
+ if not quoted and (delim_token.end - delim_token.start) > len(delimiter):
891
+ quoted = True
892
+
735
893
  # Create placeholder target (will be filled when heredoc content is read)
736
894
  placeholder = AST.word([AST.literal("")])
737
895
 
@@ -834,13 +992,17 @@ class Parser:
834
992
 
835
993
  return AST.if_node(clauses, else_body, redirections)
836
994
 
837
- def _parse_for(self) -> ForNode:
838
- """Parse a for loop."""
995
+ def _parse_for(self) -> "ForNode | CStyleForNode":
996
+ """Parse a for loop (standard or C-style)."""
839
997
  self._expect(TokenType.FOR)
840
998
  self._skip_newlines()
841
999
 
842
- # Get variable name
843
- if not self._check(TokenType.NAME, TokenType.WORD):
1000
+ # Check for C-style for: for (( ... ))
1001
+ if self._check(TokenType.DPAREN_START):
1002
+ return self._parse_c_style_for()
1003
+
1004
+ # Get variable name (bash allows 'in' and other keywords as variable names here)
1005
+ if not self._check(TokenType.NAME, TokenType.WORD, TokenType.IN):
844
1006
  raise self._error("Expected variable name after 'for'")
845
1007
  variable = self._advance().value
846
1008
 
@@ -884,6 +1046,98 @@ class Parser:
884
1046
 
885
1047
  return AST.for_node(variable, words, body, redirections)
886
1048
 
1049
+ def _parse_c_style_for(self) -> CStyleForNode:
1050
+ """Parse a C-style for loop: for (( init; cond; update )); do body; done."""
1051
+ line = self._current().line
1052
+ self._expect(TokenType.DPAREN_START)
1053
+
1054
+ # Collect everything between (( and )) as raw text
1055
+ expr_text = ""
1056
+ depth = 1
1057
+
1058
+ while depth > 0 and not self._check(TokenType.EOF):
1059
+ if self._check(TokenType.DPAREN_START):
1060
+ depth += 1
1061
+ expr_text += "(("
1062
+ self._advance()
1063
+ elif self._check(TokenType.DPAREN_END):
1064
+ depth -= 1
1065
+ if depth > 0:
1066
+ expr_text += "))"
1067
+ self._advance()
1068
+ elif self._check(TokenType.LPAREN):
1069
+ expr_text += "("
1070
+ self._advance()
1071
+ elif self._check(TokenType.RPAREN):
1072
+ expr_text += ")"
1073
+ self._advance()
1074
+ else:
1075
+ expr_text += self._current().value
1076
+ self._advance()
1077
+
1078
+ # Split on semicolons to get init, condition, update
1079
+ parts = expr_text.split(";")
1080
+ init_text = parts[0].strip() if len(parts) > 0 else ""
1081
+ cond_text = parts[1].strip() if len(parts) > 1 else ""
1082
+ update_text = parts[2].strip() if len(parts) > 2 else ""
1083
+
1084
+ # Parse each part as an arithmetic expression
1085
+ init_node = None
1086
+ cond_node = None
1087
+ update_node = None
1088
+
1089
+ if init_text:
1090
+ try:
1091
+ init_node = ArithmeticExpressionNode(
1092
+ expression=self._parse_arithmetic_expression(init_text)
1093
+ )
1094
+ except Exception:
1095
+ pass
1096
+
1097
+ if cond_text:
1098
+ try:
1099
+ cond_node = ArithmeticExpressionNode(
1100
+ expression=self._parse_arithmetic_expression(cond_text)
1101
+ )
1102
+ except Exception:
1103
+ pass
1104
+
1105
+ if update_text:
1106
+ try:
1107
+ update_node = ArithmeticExpressionNode(
1108
+ expression=self._parse_arithmetic_expression(update_text)
1109
+ )
1110
+ except Exception:
1111
+ pass
1112
+
1113
+ # Skip to 'do'
1114
+ self._skip_separators()
1115
+ self._expect(TokenType.DO, "Expected 'do' in for loop")
1116
+ self._skip_newlines()
1117
+
1118
+ # Parse body
1119
+ body = self._parse_compound_list()
1120
+
1121
+ self._skip_newlines()
1122
+ self._expect(TokenType.DONE, "Expected 'done' to close for loop")
1123
+
1124
+ # Parse optional redirections
1125
+ redirections: list[RedirectionNode] = []
1126
+ while True:
1127
+ redir = self._try_parse_redirection()
1128
+ if not redir:
1129
+ break
1130
+ redirections.append(redir)
1131
+
1132
+ return CStyleForNode(
1133
+ init=init_node,
1134
+ condition=cond_node,
1135
+ update=update_node,
1136
+ body=tuple(body),
1137
+ redirections=tuple(redirections),
1138
+ line=line,
1139
+ )
1140
+
887
1141
  def _parse_while(self) -> WhileNode:
888
1142
  """Parse a while loop."""
889
1143
  self._expect(TokenType.WHILE)
@@ -1030,7 +1284,21 @@ class Parser:
1030
1284
  body = self._parse_compound_list()
1031
1285
 
1032
1286
  self._skip_newlines()
1033
- self._expect(TokenType.RPAREN, "Expected ')' to close subshell")
1287
+ # Handle )) being tokenized as DPAREN_END instead of two RPARENs
1288
+ if self._check(TokenType.DPAREN_END):
1289
+ tok = self._advance()
1290
+ # Insert a synthetic RPAREN token for the outer parser
1291
+ synthetic = Token(
1292
+ type=TokenType.RPAREN,
1293
+ value=")",
1294
+ start=tok.start + 1,
1295
+ end=tok.end,
1296
+ line=tok.line,
1297
+ column=tok.column + 1,
1298
+ )
1299
+ self.tokens.insert(self.pos, synthetic)
1300
+ else:
1301
+ self._expect(TokenType.RPAREN, "Expected ')' to close subshell")
1034
1302
 
1035
1303
  # Parse optional redirections
1036
1304
  redirections: list[RedirectionNode] = []
@@ -1358,24 +1626,47 @@ class Parser:
1358
1626
  def _parse_word(self) -> WordNode:
1359
1627
  """Parse a word token into a WordNode with parts."""
1360
1628
  token = self._advance()
1629
+ # Use segments for mixed-quoting words (e.g., "pre"{a,b}"suf")
1630
+ if token.segments:
1631
+ return self._parse_word_from_segments(token.segments)
1361
1632
  return self._parse_word_from_string(
1362
1633
  token.value,
1363
1634
  quoted=token.quoted,
1364
1635
  single_quoted=token.single_quoted,
1365
1636
  )
1366
1637
 
1367
- def _parse_word_from_string(self, value: str, quoted: bool = False, single_quoted: bool = False) -> WordNode:
1638
+ def _parse_word_from_segments(self, segments: list[tuple[str, str]]) -> WordNode:
1639
+ """Parse a word from lexer segments with mixed quoting.
1640
+
1641
+ Each segment is (text, mode) where mode is 'unquoted', 'double', or 'single'.
1642
+ """
1643
+ all_parts: list = []
1644
+ for text, mode in segments:
1645
+ if mode == "single":
1646
+ all_parts.append(SingleQuotedPart(value=text))
1647
+ elif mode == "double":
1648
+ inner_parts = self._parse_word_parts(text, quoted=True)
1649
+ all_parts.append(DoubleQuotedPart(parts=tuple(inner_parts)))
1650
+ else: # unquoted
1651
+ inner_parts = self._parse_word_parts(text, quoted=False)
1652
+ all_parts.extend(inner_parts)
1653
+ return AST.word(all_parts)
1654
+
1655
+ def _parse_word_from_string(self, value: str, quoted: bool = False, single_quoted: bool = False, in_assignment: bool = False) -> WordNode:
1368
1656
  """Parse a string into a WordNode with appropriate parts."""
1369
- parts = self._parse_word_parts(value, quoted, single_quoted)
1657
+ parts = self._parse_word_parts(value, quoted, single_quoted, in_assignment=in_assignment)
1370
1658
  # Wrap double-quoted content in DoubleQuotedPart to preserve quote context
1371
1659
  if quoted and not single_quoted:
1372
1660
  return AST.word([DoubleQuotedPart(parts=tuple(parts))])
1373
1661
  # Wrap single-quoted content in SingleQuotedPart
1374
- if single_quoted and len(parts) == 1 and isinstance(parts[0], LiteralPart):
1375
- return AST.word([SingleQuotedPart(value=parts[0].value)])
1662
+ if single_quoted:
1663
+ if len(parts) == 1 and isinstance(parts[0], LiteralPart):
1664
+ return AST.word([SingleQuotedPart(value=parts[0].value)])
1665
+ elif len(parts) == 0:
1666
+ return AST.word([SingleQuotedPart(value="")])
1376
1667
  return AST.word(parts)
1377
1668
 
1378
- def _parse_word_parts(self, value: str, quoted: bool = False, single_quoted: bool = False) -> list[WordPart]:
1669
+ def _parse_word_parts(self, value: str, quoted: bool = False, single_quoted: bool = False, in_assignment: bool = False) -> list[WordPart]:
1379
1670
  """Parse word parts from a string value."""
1380
1671
  # Single-quoted strings are completely literal - no expansions
1381
1672
  if single_quoted:
@@ -1429,6 +1720,29 @@ class Parser:
1429
1720
  i = j
1430
1721
  continue
1431
1722
 
1723
+ # Handle $[...] legacy arithmetic expansion (equivalent to $((...)))
1724
+ if c == "$" and i + 1 < len(value) and value[i + 1] == "[":
1725
+ flush_literal()
1726
+ # Find matching closing ]
1727
+ depth = 1
1728
+ start = i + 2
1729
+ j = start
1730
+ while j < len(value) and depth > 0:
1731
+ if value[j] == "[":
1732
+ depth += 1
1733
+ elif value[j] == "]":
1734
+ depth -= 1
1735
+ j += 1
1736
+ arith_expr = value[start : j - 1]
1737
+ arith_node = self._parse_arithmetic_expression(arith_expr)
1738
+ parts.append(
1739
+ ArithmeticExpansionPart(
1740
+ expression=ArithmeticExpressionNode(expression=arith_node),
1741
+ )
1742
+ )
1743
+ i = j
1744
+ continue
1745
+
1432
1746
  # Handle $(...) command substitution
1433
1747
  if c == "$" and i + 1 < len(value) and value[i + 1] == "(":
1434
1748
  flush_literal()
@@ -1438,6 +1752,7 @@ class Parser:
1438
1752
  j = start
1439
1753
  while j < len(value) and depth > 0:
1440
1754
  if value[j] == "(":
1755
+
1441
1756
  depth += 1
1442
1757
  elif value[j] == ")":
1443
1758
  depth -= 1
@@ -1477,6 +1792,24 @@ class Parser:
1477
1792
  i = j
1478
1793
  continue
1479
1794
 
1795
+ # Handle $'...' ANSI-C quoting
1796
+ if c == "$" and i + 1 < len(value) and value[i + 1] == "'" and not quoted:
1797
+ flush_literal()
1798
+ # Find closing single quote, respecting backslash escapes
1799
+ j = i + 2
1800
+ while j < len(value):
1801
+ if value[j] == "\\" and j + 1 < len(value):
1802
+ j += 2 # Skip escaped character
1803
+ elif value[j] == "'":
1804
+ break
1805
+ else:
1806
+ j += 1
1807
+ raw_content = value[i + 2 : j]
1808
+ decoded = _decode_ansi_c_escapes(raw_content)
1809
+ parts.append(SingleQuotedPart(value=decoded))
1810
+ i = j + 1 if j < len(value) else j
1811
+ continue
1812
+
1480
1813
  # Handle simple $VAR expansion
1481
1814
  if c == "$" and i + 1 < len(value):
1482
1815
  next_c = value[i + 1]
@@ -1573,19 +1906,24 @@ class Parser:
1573
1906
  i += 1
1574
1907
  continue
1575
1908
 
1576
- # Handle tilde expansion at start
1577
- if c == "~" and i == 0 and not quoted:
1909
+ # Handle tilde expansion at start or after : in assignments
1910
+ if c == "~" and not quoted and (i == 0 or (in_assignment and i > 0 and value[i - 1] == ":")):
1578
1911
  flush_literal()
1579
- # Check for ~user
1580
- j = 1
1581
- while j < len(value) and (value[j].isalnum() or value[j] == "_"):
1582
- j += 1
1583
- if j > 1:
1584
- user = value[1:j]
1585
- parts.append(TildeExpansionPart(user=user))
1912
+ # Check for ~user, ~+, ~-
1913
+ j = i + 1
1914
+ if j < len(value) and value[j] in "+-":
1915
+ # ~+ or ~-
1916
+ parts.append(TildeExpansionPart(user=value[j]))
1917
+ i = j + 1
1586
1918
  else:
1587
- parts.append(TildeExpansionPart(user=None))
1588
- i = j
1919
+ while j < len(value) and (value[j].isalnum() or value[j] == "_"):
1920
+ j += 1
1921
+ if j > i + 1:
1922
+ user = value[i + 1:j]
1923
+ parts.append(TildeExpansionPart(user=user))
1924
+ else:
1925
+ parts.append(TildeExpansionPart(user=None))
1926
+ i = j
1589
1927
  continue
1590
1928
 
1591
1929
  # Handle escape sequences (only in unquoted context - lexer already handled quoted escapes)
@@ -1642,6 +1980,16 @@ class Parser:
1642
1980
  # Regular variable name
1643
1981
  while i < len(content) and (content[i].isalnum() or content[i] == "_"):
1644
1982
  i += 1
1983
+ # Handle array subscript: name[subscript]
1984
+ if i < len(content) and content[i] == "[":
1985
+ bracket_depth = 1
1986
+ i += 1
1987
+ while i < len(content) and bracket_depth > 0:
1988
+ if content[i] == "[":
1989
+ bracket_depth += 1
1990
+ elif content[i] == "]":
1991
+ bracket_depth -= 1
1992
+ i += 1
1645
1993
  param = content[:i]
1646
1994
 
1647
1995
  # If no operation follows, return simple expansion
@@ -1754,6 +2102,7 @@ class Parser:
1754
2102
  )
1755
2103
 
1756
2104
  # Handle pattern replacement ${VAR/pattern/replacement} ${VAR//pattern/replacement}
2105
+ # Also handle anchored patterns: ${VAR/#pattern/repl} (start) ${VAR/%pattern/repl} (end)
1757
2106
  if rest.startswith("//"):
1758
2107
  slash_pos = rest.find("/", 2)
1759
2108
  if slash_pos >= 0:
@@ -1765,7 +2114,37 @@ class Parser:
1765
2114
  return ParameterExpansionPart(
1766
2115
  parameter=param,
1767
2116
  operation=PatternReplacementOp(
1768
- pattern=pattern, replacement=replacement, replace_all=True
2117
+ pattern=pattern, replacement=replacement, all=True
2118
+ ),
2119
+ )
2120
+ if rest.startswith("/#"):
2121
+ # Anchored at start (prefix)
2122
+ slash_pos = rest.find("/", 2)
2123
+ if slash_pos >= 0:
2124
+ pattern = self._parse_word_from_string(rest[2:slash_pos])
2125
+ replacement = self._parse_word_from_string(rest[slash_pos + 1:])
2126
+ else:
2127
+ pattern = self._parse_word_from_string(rest[2:])
2128
+ replacement = self._parse_word_from_string("")
2129
+ return ParameterExpansionPart(
2130
+ parameter=param,
2131
+ operation=PatternReplacementOp(
2132
+ pattern=pattern, replacement=replacement, all=False, anchor="start"
2133
+ ),
2134
+ )
2135
+ if rest.startswith("/%"):
2136
+ # Anchored at end (suffix)
2137
+ slash_pos = rest.find("/", 2)
2138
+ if slash_pos >= 0:
2139
+ pattern = self._parse_word_from_string(rest[2:slash_pos])
2140
+ replacement = self._parse_word_from_string(rest[slash_pos + 1:])
2141
+ else:
2142
+ pattern = self._parse_word_from_string(rest[2:])
2143
+ replacement = self._parse_word_from_string("")
2144
+ return ParameterExpansionPart(
2145
+ parameter=param,
2146
+ operation=PatternReplacementOp(
2147
+ pattern=pattern, replacement=replacement, all=False, anchor="end"
1769
2148
  ),
1770
2149
  )
1771
2150
  if rest.startswith("/"):
@@ -1779,7 +2158,7 @@ class Parser:
1779
2158
  return ParameterExpansionPart(
1780
2159
  parameter=param,
1781
2160
  operation=PatternReplacementOp(
1782
- pattern=pattern, replacement=replacement, replace_all=False
2161
+ pattern=pattern, replacement=replacement, all=False
1783
2162
  ),
1784
2163
  )
1785
2164
 
@@ -1845,13 +2224,23 @@ class Parser:
1845
2224
 
1846
2225
  # Scan right-to-left for assignment operator (right-associative)
1847
2226
  depth = 0
2227
+ brace_depth = 0
2228
+ bracket_depth = 0
1848
2229
  for i in range(len(expr) - 1, -1, -1):
1849
2230
  c = expr[i]
1850
2231
  if c == ')':
1851
2232
  depth += 1
1852
2233
  elif c == '(':
1853
2234
  depth -= 1
1854
- elif depth == 0:
2235
+ elif c == '}':
2236
+ brace_depth += 1
2237
+ elif c == '{' and brace_depth > 0:
2238
+ brace_depth -= 1
2239
+ elif c == ']':
2240
+ bracket_depth += 1
2241
+ elif c == '[' and bracket_depth > 0:
2242
+ bracket_depth -= 1
2243
+ elif depth == 0 and brace_depth == 0 and bracket_depth == 0:
1855
2244
  for op in assign_ops:
1856
2245
  op_start = i - len(op) + 1
1857
2246
  if op_start >= 0 and expr[op_start:i + 1] == op:
@@ -1882,30 +2271,65 @@ class Parser:
1882
2271
  """Parse ternary: cond ? a : b"""
1883
2272
  # Find unquoted ? and : for ternary
1884
2273
  depth = 0
2274
+ brace_depth = 0
2275
+ bracket_depth = 0
1885
2276
  question_pos = -1
1886
- for i, c in enumerate(expr):
1887
- if c == '(':
2277
+ i = 0
2278
+ while i < len(expr):
2279
+ c = expr[i]
2280
+ if c == '$' and i + 1 < len(expr) and expr[i + 1] == '{':
2281
+ brace_depth += 1
2282
+ i += 2
2283
+ continue
2284
+ elif c == '{' and brace_depth > 0:
2285
+ brace_depth += 1
2286
+ elif c == '}' and brace_depth > 0:
2287
+ brace_depth -= 1
2288
+ elif brace_depth > 0:
2289
+ i += 1
2290
+ continue
2291
+ elif c == '[':
2292
+ bracket_depth += 1
2293
+ elif c == ']' and bracket_depth > 0:
2294
+ bracket_depth -= 1
2295
+ elif c == '(':
1888
2296
  depth += 1
1889
2297
  elif c == ')':
1890
2298
  depth -= 1
1891
- elif c == '?' and depth == 0:
2299
+ elif c == '?' and depth == 0 and bracket_depth == 0:
1892
2300
  question_pos = i
2301
+ i += 1
1893
2302
  break
2303
+ i += 1
1894
2304
 
1895
2305
  if question_pos > 0:
1896
2306
  # Find the matching : (must track nested ternary depth)
1897
2307
  colon_pos = -1
1898
2308
  ternary_depth = 0
1899
2309
  paren_depth = 0
2310
+ brace_depth2 = 0
2311
+ bracket_depth2 = 0
1900
2312
  for i in range(question_pos + 1, len(expr)):
1901
2313
  c = expr[i]
1902
- if c == '(':
2314
+ if c == '$' and i + 1 < len(expr) and expr[i + 1] == '{':
2315
+ brace_depth2 += 1
2316
+ elif c == '{' and brace_depth2 > 0:
2317
+ brace_depth2 += 1
2318
+ elif c == '}' and brace_depth2 > 0:
2319
+ brace_depth2 -= 1
2320
+ elif brace_depth2 > 0:
2321
+ continue
2322
+ elif c == '[':
2323
+ bracket_depth2 += 1
2324
+ elif c == ']' and bracket_depth2 > 0:
2325
+ bracket_depth2 -= 1
2326
+ elif c == '(':
1903
2327
  paren_depth += 1
1904
2328
  elif c == ')':
1905
2329
  paren_depth -= 1
1906
- elif c == '?' and paren_depth == 0:
2330
+ elif c == '?' and paren_depth == 0 and bracket_depth2 == 0:
1907
2331
  ternary_depth += 1 # Nested ternary
1908
- elif c == ':' and paren_depth == 0:
2332
+ elif c == ':' and paren_depth == 0 and bracket_depth2 == 0:
1909
2333
  if ternary_depth > 0:
1910
2334
  ternary_depth -= 1 # Close nested ternary
1911
2335
  else:
@@ -1969,23 +2393,44 @@ class Parser:
1969
2393
  """Parse binary operators at a given precedence level."""
1970
2394
  expr = expr.strip()
1971
2395
  depth = 0
2396
+ brace_depth = 0 # Track ${...} depth
2397
+ bracket_depth = 0 # Track [...] depth
1972
2398
 
1973
2399
  # Sort operators by length (longest first) to match ** before *
1974
2400
  ops = sorted(operators, key=len, reverse=True)
1975
2401
  exclude = exclude or []
1976
2402
 
2403
+ # Characters that indicate the preceding context is an operator (not a value)
2404
+ # Used to distinguish binary +/- from unary +/-
2405
+ _op_chars = frozenset('+-*/%=<>!&|^~(,?:')
2406
+
1977
2407
  # Scan for operator (right-to-left for left-assoc, left-to-right for right-assoc)
1978
2408
  positions = []
1979
2409
  i = 0
1980
2410
  while i < len(expr):
1981
2411
  c = expr[i]
1982
- if c == '(':
2412
+ if c == '(' and brace_depth == 0:
1983
2413
  depth += 1
1984
2414
  i += 1
1985
- elif c == ')':
2415
+ elif c == ')' and brace_depth == 0:
1986
2416
  depth -= 1
1987
2417
  i += 1
1988
- elif depth == 0:
2418
+ elif c == '$' and i + 1 < len(expr) and expr[i + 1] == '{':
2419
+ brace_depth += 1
2420
+ i += 2
2421
+ elif c == '{' and brace_depth > 0:
2422
+ brace_depth += 1
2423
+ i += 1
2424
+ elif c == '}' and brace_depth > 0:
2425
+ brace_depth -= 1
2426
+ i += 1
2427
+ elif c == '[' and depth == 0 and brace_depth == 0:
2428
+ bracket_depth += 1
2429
+ i += 1
2430
+ elif c == ']' and bracket_depth > 0:
2431
+ bracket_depth -= 1
2432
+ i += 1
2433
+ elif depth == 0 and brace_depth == 0 and bracket_depth == 0:
1989
2434
  # First check exclusions - skip past them entirely
1990
2435
  skip_len = 0
1991
2436
  for ex in exclude:
@@ -1999,6 +2444,20 @@ class Parser:
1999
2444
  matched = False
2000
2445
  for op in ops:
2001
2446
  if expr[i:i+len(op)] == op:
2447
+ # For single-char + or -, check if this is a binary or unary operator.
2448
+ # It's unary if preceded by nothing or by an operator character.
2449
+ if op in ('+', '-') and len(op) == 1:
2450
+ # Find the last non-whitespace char before this position
2451
+ prev_nonws = ''
2452
+ for k in range(i - 1, -1, -1):
2453
+ if expr[k] not in ' \t':
2454
+ prev_nonws = expr[k]
2455
+ break
2456
+ if not prev_nonws or prev_nonws in _op_chars:
2457
+ # This is a unary operator, not binary - skip it
2458
+ i += 1
2459
+ matched = True
2460
+ break
2002
2461
  positions.append((i, op))
2003
2462
  matched = True
2004
2463
  i += len(op)
@@ -2067,11 +2526,7 @@ class Parser:
2067
2526
  # Parenthesized expression
2068
2527
  if expr.startswith('(') and expr.endswith(')'):
2069
2528
  inner = expr[1:-1].strip()
2070
- return ArithGroupNode(expression=self._parse_arith_ternary(inner))
2071
-
2072
- # Number
2073
- if expr.isdigit() or (expr.startswith('-') and expr[1:].isdigit()):
2074
- return ArithNumberNode(value=int(expr))
2529
+ return ArithGroupNode(expression=self._parse_arith_comma(inner))
2075
2530
 
2076
2531
  # Hex number
2077
2532
  if expr.startswith('0x') or expr.startswith('0X'):
@@ -2080,13 +2535,17 @@ class Parser:
2080
2535
  except ValueError:
2081
2536
  pass
2082
2537
 
2083
- # Octal number
2538
+ # Octal number (starts with 0, more than one digit)
2084
2539
  if expr.startswith('0') and len(expr) > 1 and expr[1:].isdigit():
2085
2540
  try:
2086
2541
  return ArithNumberNode(value=int(expr, 8))
2087
2542
  except ValueError:
2088
2543
  pass
2089
2544
 
2545
+ # Number
2546
+ if expr.isdigit() or (expr.startswith('-') and expr[1:].isdigit()):
2547
+ return ArithNumberNode(value=int(expr))
2548
+
2090
2549
  # Base N constant: base#value (e.g., 2#101, 16#ff, 36#z)
2091
2550
  base_match = re.match(r'^(\d+)#([a-zA-Z0-9@_]+)$', expr)
2092
2551
  if base_match: