just-bash 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (193) hide show
  1. just_bash/__init__.py +55 -0
  2. just_bash/ast/__init__.py +213 -0
  3. just_bash/ast/factory.py +320 -0
  4. just_bash/ast/types.py +953 -0
  5. just_bash/bash.py +220 -0
  6. just_bash/commands/__init__.py +23 -0
  7. just_bash/commands/argv/__init__.py +5 -0
  8. just_bash/commands/argv/argv.py +21 -0
  9. just_bash/commands/awk/__init__.py +5 -0
  10. just_bash/commands/awk/awk.py +1168 -0
  11. just_bash/commands/base64/__init__.py +5 -0
  12. just_bash/commands/base64/base64.py +138 -0
  13. just_bash/commands/basename/__init__.py +5 -0
  14. just_bash/commands/basename/basename.py +72 -0
  15. just_bash/commands/bash/__init__.py +5 -0
  16. just_bash/commands/bash/bash.py +188 -0
  17. just_bash/commands/cat/__init__.py +5 -0
  18. just_bash/commands/cat/cat.py +173 -0
  19. just_bash/commands/checksum/__init__.py +5 -0
  20. just_bash/commands/checksum/checksum.py +179 -0
  21. just_bash/commands/chmod/__init__.py +5 -0
  22. just_bash/commands/chmod/chmod.py +216 -0
  23. just_bash/commands/column/__init__.py +5 -0
  24. just_bash/commands/column/column.py +180 -0
  25. just_bash/commands/comm/__init__.py +5 -0
  26. just_bash/commands/comm/comm.py +150 -0
  27. just_bash/commands/compression/__init__.py +5 -0
  28. just_bash/commands/compression/compression.py +298 -0
  29. just_bash/commands/cp/__init__.py +5 -0
  30. just_bash/commands/cp/cp.py +149 -0
  31. just_bash/commands/curl/__init__.py +5 -0
  32. just_bash/commands/curl/curl.py +801 -0
  33. just_bash/commands/cut/__init__.py +5 -0
  34. just_bash/commands/cut/cut.py +327 -0
  35. just_bash/commands/date/__init__.py +5 -0
  36. just_bash/commands/date/date.py +258 -0
  37. just_bash/commands/diff/__init__.py +5 -0
  38. just_bash/commands/diff/diff.py +118 -0
  39. just_bash/commands/dirname/__init__.py +5 -0
  40. just_bash/commands/dirname/dirname.py +56 -0
  41. just_bash/commands/du/__init__.py +5 -0
  42. just_bash/commands/du/du.py +150 -0
  43. just_bash/commands/echo/__init__.py +5 -0
  44. just_bash/commands/echo/echo.py +125 -0
  45. just_bash/commands/env/__init__.py +5 -0
  46. just_bash/commands/env/env.py +163 -0
  47. just_bash/commands/expand/__init__.py +5 -0
  48. just_bash/commands/expand/expand.py +299 -0
  49. just_bash/commands/expr/__init__.py +5 -0
  50. just_bash/commands/expr/expr.py +273 -0
  51. just_bash/commands/file/__init__.py +5 -0
  52. just_bash/commands/file/file.py +274 -0
  53. just_bash/commands/find/__init__.py +5 -0
  54. just_bash/commands/find/find.py +623 -0
  55. just_bash/commands/fold/__init__.py +5 -0
  56. just_bash/commands/fold/fold.py +160 -0
  57. just_bash/commands/grep/__init__.py +5 -0
  58. just_bash/commands/grep/grep.py +418 -0
  59. just_bash/commands/head/__init__.py +5 -0
  60. just_bash/commands/head/head.py +167 -0
  61. just_bash/commands/help/__init__.py +5 -0
  62. just_bash/commands/help/help.py +67 -0
  63. just_bash/commands/hostname/__init__.py +5 -0
  64. just_bash/commands/hostname/hostname.py +21 -0
  65. just_bash/commands/html_to_markdown/__init__.py +5 -0
  66. just_bash/commands/html_to_markdown/html_to_markdown.py +191 -0
  67. just_bash/commands/join/__init__.py +5 -0
  68. just_bash/commands/join/join.py +252 -0
  69. just_bash/commands/jq/__init__.py +5 -0
  70. just_bash/commands/jq/jq.py +280 -0
  71. just_bash/commands/ln/__init__.py +5 -0
  72. just_bash/commands/ln/ln.py +127 -0
  73. just_bash/commands/ls/__init__.py +5 -0
  74. just_bash/commands/ls/ls.py +280 -0
  75. just_bash/commands/mkdir/__init__.py +5 -0
  76. just_bash/commands/mkdir/mkdir.py +92 -0
  77. just_bash/commands/mv/__init__.py +5 -0
  78. just_bash/commands/mv/mv.py +142 -0
  79. just_bash/commands/nl/__init__.py +5 -0
  80. just_bash/commands/nl/nl.py +180 -0
  81. just_bash/commands/od/__init__.py +5 -0
  82. just_bash/commands/od/od.py +157 -0
  83. just_bash/commands/paste/__init__.py +5 -0
  84. just_bash/commands/paste/paste.py +100 -0
  85. just_bash/commands/printf/__init__.py +5 -0
  86. just_bash/commands/printf/printf.py +157 -0
  87. just_bash/commands/pwd/__init__.py +5 -0
  88. just_bash/commands/pwd/pwd.py +23 -0
  89. just_bash/commands/read/__init__.py +5 -0
  90. just_bash/commands/read/read.py +185 -0
  91. just_bash/commands/readlink/__init__.py +5 -0
  92. just_bash/commands/readlink/readlink.py +86 -0
  93. just_bash/commands/registry.py +844 -0
  94. just_bash/commands/rev/__init__.py +5 -0
  95. just_bash/commands/rev/rev.py +74 -0
  96. just_bash/commands/rg/__init__.py +5 -0
  97. just_bash/commands/rg/rg.py +1048 -0
  98. just_bash/commands/rm/__init__.py +5 -0
  99. just_bash/commands/rm/rm.py +106 -0
  100. just_bash/commands/search_engine/__init__.py +13 -0
  101. just_bash/commands/search_engine/matcher.py +170 -0
  102. just_bash/commands/search_engine/regex.py +159 -0
  103. just_bash/commands/sed/__init__.py +5 -0
  104. just_bash/commands/sed/sed.py +863 -0
  105. just_bash/commands/seq/__init__.py +5 -0
  106. just_bash/commands/seq/seq.py +190 -0
  107. just_bash/commands/shell/__init__.py +5 -0
  108. just_bash/commands/shell/shell.py +206 -0
  109. just_bash/commands/sleep/__init__.py +5 -0
  110. just_bash/commands/sleep/sleep.py +62 -0
  111. just_bash/commands/sort/__init__.py +5 -0
  112. just_bash/commands/sort/sort.py +411 -0
  113. just_bash/commands/split/__init__.py +5 -0
  114. just_bash/commands/split/split.py +237 -0
  115. just_bash/commands/sqlite3/__init__.py +5 -0
  116. just_bash/commands/sqlite3/sqlite3_cmd.py +505 -0
  117. just_bash/commands/stat/__init__.py +5 -0
  118. just_bash/commands/stat/stat.py +150 -0
  119. just_bash/commands/strings/__init__.py +5 -0
  120. just_bash/commands/strings/strings.py +150 -0
  121. just_bash/commands/tac/__init__.py +5 -0
  122. just_bash/commands/tac/tac.py +158 -0
  123. just_bash/commands/tail/__init__.py +5 -0
  124. just_bash/commands/tail/tail.py +180 -0
  125. just_bash/commands/tar/__init__.py +5 -0
  126. just_bash/commands/tar/tar.py +1067 -0
  127. just_bash/commands/tee/__init__.py +5 -0
  128. just_bash/commands/tee/tee.py +63 -0
  129. just_bash/commands/timeout/__init__.py +5 -0
  130. just_bash/commands/timeout/timeout.py +188 -0
  131. just_bash/commands/touch/__init__.py +5 -0
  132. just_bash/commands/touch/touch.py +91 -0
  133. just_bash/commands/tr/__init__.py +5 -0
  134. just_bash/commands/tr/tr.py +297 -0
  135. just_bash/commands/tree/__init__.py +5 -0
  136. just_bash/commands/tree/tree.py +139 -0
  137. just_bash/commands/true/__init__.py +5 -0
  138. just_bash/commands/true/true.py +32 -0
  139. just_bash/commands/uniq/__init__.py +5 -0
  140. just_bash/commands/uniq/uniq.py +323 -0
  141. just_bash/commands/wc/__init__.py +5 -0
  142. just_bash/commands/wc/wc.py +169 -0
  143. just_bash/commands/which/__init__.py +5 -0
  144. just_bash/commands/which/which.py +52 -0
  145. just_bash/commands/xan/__init__.py +5 -0
  146. just_bash/commands/xan/xan.py +1663 -0
  147. just_bash/commands/xargs/__init__.py +5 -0
  148. just_bash/commands/xargs/xargs.py +136 -0
  149. just_bash/commands/yq/__init__.py +5 -0
  150. just_bash/commands/yq/yq.py +848 -0
  151. just_bash/fs/__init__.py +29 -0
  152. just_bash/fs/in_memory_fs.py +621 -0
  153. just_bash/fs/mountable_fs.py +504 -0
  154. just_bash/fs/overlay_fs.py +894 -0
  155. just_bash/fs/read_write_fs.py +455 -0
  156. just_bash/interpreter/__init__.py +37 -0
  157. just_bash/interpreter/builtins/__init__.py +92 -0
  158. just_bash/interpreter/builtins/alias.py +154 -0
  159. just_bash/interpreter/builtins/cd.py +76 -0
  160. just_bash/interpreter/builtins/control.py +127 -0
  161. just_bash/interpreter/builtins/declare.py +336 -0
  162. just_bash/interpreter/builtins/export.py +56 -0
  163. just_bash/interpreter/builtins/let.py +44 -0
  164. just_bash/interpreter/builtins/local.py +57 -0
  165. just_bash/interpreter/builtins/mapfile.py +152 -0
  166. just_bash/interpreter/builtins/misc.py +378 -0
  167. just_bash/interpreter/builtins/readonly.py +80 -0
  168. just_bash/interpreter/builtins/set.py +234 -0
  169. just_bash/interpreter/builtins/shopt.py +201 -0
  170. just_bash/interpreter/builtins/source.py +136 -0
  171. just_bash/interpreter/builtins/test.py +290 -0
  172. just_bash/interpreter/builtins/unset.py +53 -0
  173. just_bash/interpreter/conditionals.py +387 -0
  174. just_bash/interpreter/control_flow.py +381 -0
  175. just_bash/interpreter/errors.py +116 -0
  176. just_bash/interpreter/expansion.py +1156 -0
  177. just_bash/interpreter/interpreter.py +813 -0
  178. just_bash/interpreter/types.py +134 -0
  179. just_bash/network/__init__.py +1 -0
  180. just_bash/parser/__init__.py +39 -0
  181. just_bash/parser/lexer.py +948 -0
  182. just_bash/parser/parser.py +2162 -0
  183. just_bash/py.typed +0 -0
  184. just_bash/query_engine/__init__.py +83 -0
  185. just_bash/query_engine/builtins/__init__.py +1283 -0
  186. just_bash/query_engine/evaluator.py +578 -0
  187. just_bash/query_engine/parser.py +525 -0
  188. just_bash/query_engine/tokenizer.py +329 -0
  189. just_bash/query_engine/types.py +373 -0
  190. just_bash/types.py +180 -0
  191. just_bash-0.1.5.dist-info/METADATA +410 -0
  192. just_bash-0.1.5.dist-info/RECORD +193 -0
  193. just_bash-0.1.5.dist-info/WHEEL +4 -0
@@ -0,0 +1,2162 @@
1
+ """
2
+ Recursive Descent Parser for Bash Scripts
3
+
4
+ This parser consumes tokens from the lexer and produces an AST.
5
+ It follows the bash grammar structure for correctness.
6
+
7
+ Grammar (simplified):
8
+ script ::= statement*
9
+ statement ::= pipeline ((&&|'||') pipeline)* [&]
10
+ pipeline ::= [!] command (| command)*
11
+ command ::= simple_command | compound_command | function_def
12
+ simple_cmd ::= (assignment)* [word] (word)* (redirection)*
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import re
18
+ from typing import Optional, Sequence
19
+
20
+ from ..ast import (
21
+ AST,
22
+ ScriptNode,
23
+ StatementNode,
24
+ PipelineNode,
25
+ SimpleCommandNode,
26
+ CommandNode,
27
+ WordNode,
28
+ WordPart,
29
+ LiteralPart,
30
+ SingleQuotedPart,
31
+ DoubleQuotedPart,
32
+ EscapedPart,
33
+ ParameterExpansionPart,
34
+ CommandSubstitutionPart,
35
+ GlobPart,
36
+ TildeExpansionPart,
37
+ AssignmentNode,
38
+ RedirectionNode,
39
+ RedirectionOperator,
40
+ HereDocNode,
41
+ # Compound command nodes
42
+ IfNode,
43
+ IfClause,
44
+ ForNode,
45
+ WhileNode,
46
+ UntilNode,
47
+ CaseNode,
48
+ CaseItemNode,
49
+ SubshellNode,
50
+ GroupNode,
51
+ FunctionDefNode,
52
+ CompoundCommandNode,
53
+ # Conditional command nodes
54
+ ConditionalCommandNode,
55
+ ArithmeticCommandNode,
56
+ CondBinaryNode,
57
+ CondUnaryNode,
58
+ CondNotNode,
59
+ CondAndNode,
60
+ CondOrNode,
61
+ CondGroupNode,
62
+ CondWordNode,
63
+ # Arithmetic nodes
64
+ ArithmeticExpansionPart,
65
+ ArithmeticExpressionNode,
66
+ ArithNumberNode,
67
+ ArithVariableNode,
68
+ ArithBinaryNode,
69
+ ArithUnaryNode,
70
+ ArithGroupNode,
71
+ ArithTernaryNode,
72
+ ArithAssignmentNode,
73
+ ArithExpr,
74
+ # Parameter expansion operations
75
+ DefaultValueOp,
76
+ AssignDefaultOp,
77
+ ErrorIfUnsetOp,
78
+ UseAlternativeOp,
79
+ LengthOp,
80
+ SubstringOp,
81
+ PatternRemovalOp,
82
+ PatternReplacementOp,
83
+ CaseModificationOp,
84
+ TransformOp,
85
+ ParameterOperation,
86
+ )
87
+ from .lexer import Lexer, Token, TokenType
88
+
89
+
90
+ # Limits to prevent runaway parsing
91
+ MAX_INPUT_SIZE = 1_000_000 # 1MB
92
+ MAX_TOKENS = 100_000
93
+ MAX_PARSE_ITERATIONS = 1_000_000
94
+
95
+
96
+ class ParseException(Exception):
97
+ """Exception raised during parsing."""
98
+
99
+ def __init__(
100
+ self,
101
+ message: str,
102
+ line: int = 1,
103
+ column: int = 1,
104
+ token: Optional[Token] = None,
105
+ ) -> None:
106
+ self.message = message
107
+ self.line = line
108
+ self.column = column
109
+ self.token = token
110
+ super().__init__(f"{message} at line {line}, column {column}")
111
+
112
+
113
+ class Parser:
114
+ """Parser class - transforms tokens into AST."""
115
+
116
+ def __init__(self) -> None:
117
+ self.tokens: list[Token] = []
118
+ self.pos = 0
119
+ self.pending_heredocs: list[dict] = []
120
+ self.parse_iterations = 0
121
+
122
+ def _check_iteration_limit(self) -> None:
123
+ """Check parse iteration limit to prevent infinite loops."""
124
+ self.parse_iterations += 1
125
+ if self.parse_iterations > MAX_PARSE_ITERATIONS:
126
+ raise ParseException(
127
+ "Maximum parse iterations exceeded (possible infinite loop)",
128
+ self._current().line,
129
+ self._current().column,
130
+ )
131
+
132
+ def parse(self, input_text: str) -> ScriptNode:
133
+ """Parse a bash script string."""
134
+ # Check input size limit
135
+ if len(input_text) > MAX_INPUT_SIZE:
136
+ raise ParseException(
137
+ f"Input too large: {len(input_text)} bytes exceeds limit of {MAX_INPUT_SIZE}",
138
+ 1,
139
+ 1,
140
+ )
141
+
142
+ lexer = Lexer(input_text)
143
+ self.tokens = lexer.tokenize()
144
+
145
+ # Check token count limit
146
+ if len(self.tokens) > MAX_TOKENS:
147
+ raise ParseException(
148
+ f"Too many tokens: {len(self.tokens)} exceeds limit of {MAX_TOKENS}",
149
+ 1,
150
+ 1,
151
+ )
152
+
153
+ self.pos = 0
154
+ self.pending_heredocs = []
155
+ self.parse_iterations = 0
156
+ return self._parse_script()
157
+
158
+ def parse_tokens(self, tokens: list[Token]) -> ScriptNode:
159
+ """Parse from pre-tokenized input."""
160
+ self.tokens = tokens
161
+ self.pos = 0
162
+ self.pending_heredocs = []
163
+ self.parse_iterations = 0
164
+ return self._parse_script()
165
+
166
+ # =========================================================================
167
+ # Helper methods
168
+ # =========================================================================
169
+
170
+ def _current(self) -> Token:
171
+ """Get current token."""
172
+ if self.pos < len(self.tokens):
173
+ return self.tokens[self.pos]
174
+ return self.tokens[-1]
175
+
176
+ def _peek(self, offset: int = 0) -> Token:
177
+ """Peek at token at offset from current position."""
178
+ idx = self.pos + offset
179
+ if idx < len(self.tokens):
180
+ return self.tokens[idx]
181
+ return self.tokens[-1]
182
+
183
+ def _advance(self) -> Token:
184
+ """Advance to next token and return current."""
185
+ token = self._current()
186
+ if self.pos < len(self.tokens) - 1:
187
+ self.pos += 1
188
+ return token
189
+
190
+ def _check(self, *types: TokenType) -> bool:
191
+ """Check if current token matches any of the given types."""
192
+ current_type = self._current().type
193
+ return current_type in types
194
+
195
+ def _expect(self, type_: TokenType, message: Optional[str] = None) -> Token:
196
+ """Expect current token to be of given type, advance if so."""
197
+ if self._check(type_):
198
+ return self._advance()
199
+ token = self._current()
200
+ msg = message or f"Expected {type_.name}, got {token.type.name}"
201
+ raise ParseException(msg, token.line, token.column, token)
202
+
203
+ def _error(self, message: str) -> ParseException:
204
+ """Create a parse error at current position."""
205
+ token = self._current()
206
+ return ParseException(message, token.line, token.column, token)
207
+
208
+ def _skip_newlines(self) -> None:
209
+ """Skip newlines and comments."""
210
+ while self._check(TokenType.NEWLINE, TokenType.COMMENT):
211
+ if self._check(TokenType.NEWLINE):
212
+ self._advance()
213
+ self._process_heredocs()
214
+ else:
215
+ self._advance()
216
+
217
+ def _skip_separators(self) -> None:
218
+ """Skip statement separators (newlines, semicolons, comments)."""
219
+ while True:
220
+ if self._check(TokenType.NEWLINE):
221
+ self._advance()
222
+ self._process_heredocs()
223
+ continue
224
+ if self._check(TokenType.SEMICOLON, TokenType.COMMENT):
225
+ self._advance()
226
+ continue
227
+ break
228
+
229
+ def _is_statement_end(self) -> bool:
230
+ """Check if we're at a statement boundary."""
231
+ return self._check(
232
+ TokenType.EOF,
233
+ TokenType.NEWLINE,
234
+ TokenType.SEMICOLON,
235
+ TokenType.AMP,
236
+ TokenType.AND_AND,
237
+ TokenType.OR_OR,
238
+ TokenType.RPAREN,
239
+ TokenType.RBRACE,
240
+ TokenType.DSEMI,
241
+ TokenType.SEMI_AND,
242
+ TokenType.SEMI_SEMI_AND,
243
+ )
244
+
245
+ def _is_command_start(self) -> bool:
246
+ """Check if current token can start a command."""
247
+ t = self._current().type
248
+ return t in (
249
+ TokenType.WORD,
250
+ TokenType.NAME,
251
+ TokenType.NUMBER,
252
+ TokenType.ASSIGNMENT_WORD,
253
+ TokenType.IF,
254
+ TokenType.FOR,
255
+ TokenType.WHILE,
256
+ TokenType.UNTIL,
257
+ TokenType.CASE,
258
+ TokenType.LPAREN,
259
+ TokenType.LBRACE,
260
+ TokenType.DPAREN_START,
261
+ TokenType.DBRACK_START,
262
+ TokenType.FUNCTION,
263
+ TokenType.BANG,
264
+ TokenType.IN,
265
+ # Redirections can appear before command name
266
+ TokenType.LESS,
267
+ TokenType.GREAT,
268
+ TokenType.DLESS,
269
+ TokenType.DGREAT,
270
+ TokenType.LESSAND,
271
+ TokenType.GREATAND,
272
+ TokenType.LESSGREAT,
273
+ TokenType.DLESSDASH,
274
+ TokenType.CLOBBER,
275
+ TokenType.TLESS,
276
+ TokenType.AND_GREAT,
277
+ TokenType.AND_DGREAT,
278
+ )
279
+
280
+ def _process_heredocs(self) -> None:
281
+ """Process pending here-documents (old method, no longer used directly)."""
282
+ for heredoc in self.pending_heredocs:
283
+ if self._check(TokenType.HEREDOC_CONTENT):
284
+ content_token = self._advance()
285
+ # If delimiter was quoted, treat content as literal (no expansion)
286
+ content_word = self._parse_word_from_string(
287
+ content_token.value,
288
+ quoted=False,
289
+ single_quoted=heredoc["quoted"]
290
+ )
291
+ heredoc["redirect_target"] = AST.here_doc(
292
+ heredoc["delimiter"],
293
+ content_word,
294
+ heredoc["strip_tabs"],
295
+ heredoc["quoted"],
296
+ )
297
+ self.pending_heredocs = []
298
+
299
+ def _resolve_pending_heredocs(
300
+ self, redirections: list[RedirectionNode]
301
+ ) -> list[RedirectionNode]:
302
+ """Resolve pending heredocs by reading their content and updating redirections."""
303
+ if not self.pending_heredocs:
304
+ return redirections
305
+
306
+ # We need to skip past the current line to find heredoc content
307
+ # Save position and scan for heredoc content
308
+ saved_pos = self.pos
309
+
310
+ # Skip to find HEREDOC_CONTENT tokens (they come after newline)
311
+ while self.pos < len(self.tokens):
312
+ token = self.tokens[self.pos]
313
+ if token.type == TokenType.HEREDOC_CONTENT:
314
+ break
315
+ elif token.type == TokenType.NEWLINE:
316
+ self.pos += 1
317
+ else:
318
+ break
319
+
320
+ # Process each pending heredoc
321
+ new_redirections = list(redirections)
322
+ heredoc_idx = 0
323
+ for heredoc_info in self.pending_heredocs:
324
+ if self._check(TokenType.HEREDOC_CONTENT):
325
+ content_token = self._advance()
326
+ # If delimiter was quoted, treat content as literal (no expansion)
327
+ content_word = self._parse_word_from_string(
328
+ content_token.value,
329
+ quoted=False,
330
+ single_quoted=heredoc_info["quoted"]
331
+ )
332
+ heredoc_node = AST.here_doc(
333
+ heredoc_info["delimiter"],
334
+ content_word,
335
+ heredoc_info["strip_tabs"],
336
+ heredoc_info["quoted"],
337
+ )
338
+ # Find the corresponding placeholder redirection and replace it
339
+ for i, redir in enumerate(new_redirections):
340
+ if redir.operator in ("<<", "<<-"):
341
+ # Check if this looks like our placeholder
342
+ if (redir.target and redir.target.parts and
343
+ len(redir.target.parts) == 1 and
344
+ hasattr(redir.target.parts[0], 'value') and
345
+ redir.target.parts[0].value == ""):
346
+ new_redirections[i] = AST.redirection(
347
+ redir.operator, heredoc_node, redir.fd
348
+ )
349
+ break
350
+ heredoc_idx += 1
351
+
352
+ self.pending_heredocs = []
353
+ return new_redirections
354
+
355
+ # =========================================================================
356
+ # Main parsing methods
357
+ # =========================================================================
358
+
359
+ def _parse_script(self) -> ScriptNode:
360
+ """Parse a complete script."""
361
+ statements: list[StatementNode] = []
362
+ self._skip_newlines()
363
+
364
+ while not self._check(TokenType.EOF):
365
+ self._check_iteration_limit()
366
+ stmt = self._parse_statement()
367
+ if stmt:
368
+ statements.append(stmt)
369
+ self._skip_separators()
370
+
371
+ return AST.script(statements)
372
+
373
+ def _parse_statement(self) -> Optional[StatementNode]:
374
+ """Parse a statement (pipeline list with && / || operators)."""
375
+ if not self._is_command_start():
376
+ return None
377
+
378
+ pipelines: list[PipelineNode] = []
379
+ operators: list[str] = []
380
+
381
+ # Parse first pipeline
382
+ pipeline = self._parse_pipeline()
383
+ if not pipeline:
384
+ return None
385
+ pipelines.append(pipeline)
386
+
387
+ # Parse additional pipelines with operators
388
+ while self._check(TokenType.AND_AND, TokenType.OR_OR):
389
+ self._check_iteration_limit()
390
+ op_token = self._advance()
391
+ operators.append(op_token.value)
392
+ self._skip_newlines()
393
+
394
+ next_pipeline = self._parse_pipeline()
395
+ if not next_pipeline:
396
+ raise self._error("Expected command after operator")
397
+ pipelines.append(next_pipeline)
398
+
399
+ # Check for background execution
400
+ background = False
401
+ if self._check(TokenType.AMP):
402
+ self._advance()
403
+ background = True
404
+
405
+ return AST.statement(pipelines, operators, background)
406
+
407
+ def _parse_pipeline(self) -> Optional[PipelineNode]:
408
+ """Parse a pipeline (commands connected by |)."""
409
+ # Check for negation
410
+ negated = False
411
+ if self._check(TokenType.BANG):
412
+ self._advance()
413
+ negated = True
414
+ self._skip_newlines()
415
+
416
+ # Parse first command
417
+ command = self._parse_command()
418
+ if not command:
419
+ if negated:
420
+ raise self._error("Expected command after !")
421
+ return None
422
+
423
+ commands: list[CommandNode] = [command]
424
+
425
+ # Parse additional commands with pipe
426
+ while self._check(TokenType.PIPE, TokenType.PIPE_AMP):
427
+ self._check_iteration_limit()
428
+ self._advance()
429
+ self._skip_newlines()
430
+
431
+ next_command = self._parse_command()
432
+ if not next_command:
433
+ raise self._error("Expected command after pipe")
434
+ commands.append(next_command)
435
+
436
+ return AST.pipeline(commands, negated)
437
+
438
+ def _parse_command(self) -> Optional[CommandNode]:
439
+ """Parse a command (simple, compound, or function definition)."""
440
+ # Check for compound commands
441
+ if self._check(TokenType.IF):
442
+ return self._parse_if()
443
+ if self._check(TokenType.FOR):
444
+ return self._parse_for()
445
+ if self._check(TokenType.WHILE):
446
+ return self._parse_while()
447
+ if self._check(TokenType.UNTIL):
448
+ return self._parse_until()
449
+ if self._check(TokenType.CASE):
450
+ return self._parse_case()
451
+ if self._check(TokenType.LPAREN):
452
+ return self._parse_subshell()
453
+ if self._check(TokenType.LBRACE):
454
+ return self._parse_group()
455
+ if self._check(TokenType.DBRACK_START):
456
+ return self._parse_conditional_command()
457
+ if self._check(TokenType.DPAREN_START):
458
+ return self._parse_arithmetic_command()
459
+ if self._check(TokenType.FUNCTION):
460
+ return self._parse_function_def()
461
+
462
+ # Check for function definition: name() { ... }
463
+ if self._check(TokenType.NAME, TokenType.WORD):
464
+ if self._peek(1).type == TokenType.LPAREN:
465
+ return self._parse_function_def()
466
+
467
+ # Default to simple command
468
+ return self._parse_simple_command()
469
+
470
+ def _parse_simple_command(self) -> Optional[SimpleCommandNode]:
471
+ """Parse a simple command with assignments, name, args, redirections."""
472
+ assignments: list[AssignmentNode] = []
473
+ name: Optional[WordNode] = None
474
+ args: list[WordNode] = []
475
+ redirections: list[RedirectionNode] = []
476
+
477
+ # Parse leading redirections and assignments
478
+ while True:
479
+ self._check_iteration_limit()
480
+
481
+ # Check for assignment (including array assignment VAR=(...))
482
+ if self._check(TokenType.ASSIGNMENT_WORD):
483
+ assign_tok = self._current()
484
+ # Check if next token is LPAREN for array assignment
485
+ if self._peek(1).type == TokenType.LPAREN:
486
+ assignments.append(self._parse_array_assignment())
487
+ else:
488
+ assignments.append(self._parse_assignment())
489
+ continue
490
+
491
+ # Check for redirection before command name
492
+ redir = self._try_parse_redirection()
493
+ if redir:
494
+ redirections.append(redir)
495
+ continue
496
+
497
+ break
498
+
499
+ # Parse command name
500
+ if self._check(
501
+ TokenType.WORD, TokenType.NAME, TokenType.NUMBER, TokenType.IN
502
+ ):
503
+ name = self._parse_word()
504
+
505
+ # Parse arguments and trailing redirections
506
+ while not self._is_statement_end():
507
+ self._check_iteration_limit()
508
+
509
+ # Check for redirection
510
+ redir = self._try_parse_redirection()
511
+ if redir:
512
+ redirections.append(redir)
513
+ continue
514
+
515
+ # Check for array assignment: VAR=(...) - combine into single argument
516
+ # This handles cases like: declare -a arr=(a b c)
517
+ if self._check(TokenType.ASSIGNMENT_WORD):
518
+ assign_tok = self._current()
519
+ if self._peek(1).type == TokenType.LPAREN:
520
+ # Collect the entire array assignment
521
+ array_str = assign_tok.value
522
+ self._advance() # consume ASSIGNMENT_WORD
523
+ self._advance() # consume LPAREN
524
+ array_str += "("
525
+
526
+ # Collect elements until RPAREN
527
+ first = True
528
+ while not self._check(TokenType.RPAREN, TokenType.EOF):
529
+ if not first:
530
+ array_str += " "
531
+ first = False
532
+ elem_tok = self._current()
533
+ array_str += elem_tok.value
534
+ self._advance()
535
+
536
+ if self._check(TokenType.RPAREN):
537
+ self._advance()
538
+ array_str += ")"
539
+
540
+ # Create word node with the full array assignment
541
+ args.append(AST.word([LiteralPart(value=array_str)]))
542
+ continue
543
+
544
+ # Check for word argument - include reserved words that can be arguments
545
+ # Reserved words are only special at command position, not as arguments
546
+ # ASSIGNMENT_WORD is also valid as argument to builtins like declare, export, local
547
+ if self._check(
548
+ TokenType.WORD,
549
+ TokenType.NAME,
550
+ TokenType.NUMBER,
551
+ TokenType.ASSIGNMENT_WORD, # For declare, export, local, etc.
552
+ # Reserved words that can appear as arguments:
553
+ TokenType.IN,
554
+ TokenType.DO,
555
+ TokenType.DONE,
556
+ TokenType.IF,
557
+ TokenType.THEN,
558
+ TokenType.ELSE,
559
+ TokenType.ELIF,
560
+ TokenType.FI,
561
+ TokenType.FOR,
562
+ TokenType.WHILE,
563
+ TokenType.UNTIL,
564
+ TokenType.CASE,
565
+ TokenType.ESAC,
566
+ TokenType.FUNCTION,
567
+ ):
568
+ args.append(self._parse_word())
569
+ continue
570
+
571
+ break
572
+
573
+ # Must have at least an assignment or a command name
574
+ if not assignments and name is None and not redirections:
575
+ return None
576
+
577
+ # If we have pending heredocs, we need to resolve them before creating the command
578
+ if self.pending_heredocs:
579
+ redirections = self._resolve_pending_heredocs(redirections)
580
+
581
+ return AST.simple_command(name, args, assignments, redirections)
582
+
583
+ def _parse_assignment(self) -> AssignmentNode:
584
+ """Parse a variable assignment."""
585
+ token = self._expect(TokenType.ASSIGNMENT_WORD)
586
+ value = token.value
587
+
588
+ # Find the = sign
589
+ eq_idx = value.find("=")
590
+ if eq_idx == -1:
591
+ raise self._error(f"Invalid assignment: {value}")
592
+
593
+ # Check for +=
594
+ append = False
595
+ if eq_idx > 0 and value[eq_idx - 1] == "+":
596
+ name = value[: eq_idx - 1]
597
+ append = True
598
+ else:
599
+ name = value[:eq_idx]
600
+
601
+ # Get value part
602
+ value_str = value[eq_idx + 1 :]
603
+
604
+ # Check for array assignment: VAR=(a b c)
605
+ if value_str.startswith("("):
606
+ # TODO: Parse array assignment
607
+ # For now, treat as simple value
608
+ value_word = self._parse_word_from_string(value_str, quoted=False)
609
+ return AST.assignment(name, value_word, append)
610
+
611
+ # Simple value
612
+ if value_str:
613
+ value_word = self._parse_word_from_string(value_str, quoted=False)
614
+ else:
615
+ value_word = None
616
+
617
+ return AST.assignment(name, value_word, append)
618
+
619
+ def _parse_array_assignment(self) -> AssignmentNode:
620
+ """Parse an array assignment: VAR=(elem1 elem2 ...)."""
621
+ token = self._expect(TokenType.ASSIGNMENT_WORD)
622
+ value = token.value
623
+
624
+ # Find the = sign
625
+ eq_idx = value.find("=")
626
+ if eq_idx == -1:
627
+ raise self._error(f"Invalid assignment: {value}")
628
+
629
+ # Check for +=
630
+ append = False
631
+ if eq_idx > 0 and value[eq_idx - 1] == "+":
632
+ name = value[: eq_idx - 1]
633
+ append = True
634
+ else:
635
+ name = value[:eq_idx]
636
+
637
+ # Expect LPAREN
638
+ self._expect(TokenType.LPAREN)
639
+
640
+ # Collect array elements until RPAREN
641
+ elements: list[WordNode] = []
642
+ while not self._check(TokenType.RPAREN, TokenType.EOF):
643
+ if self._check(TokenType.WORD, TokenType.NAME, TokenType.NUMBER,
644
+ TokenType.ASSIGNMENT_WORD):
645
+ elements.append(self._parse_word())
646
+ else:
647
+ # Skip unexpected tokens
648
+ self._advance()
649
+
650
+ # Expect RPAREN
651
+ if self._check(TokenType.RPAREN):
652
+ self._advance()
653
+
654
+ return AST.assignment(name, None, append, array=elements)
655
+
656
+ def _try_parse_redirection(self) -> Optional[RedirectionNode]:
657
+ """Try to parse a redirection, return None if not a redirection."""
658
+ # Check for file descriptor number prefix
659
+ # Only treat NUMBER as fd if it's immediately adjacent to the redirect operator
660
+ # (no whitespace between them). E.g., "3>file" but not "3 >file"
661
+ fd: Optional[int] = None
662
+ if self._check(TokenType.NUMBER):
663
+ num_token = self._current()
664
+ next_token = self._peek(1)
665
+ if next_token.type in (
666
+ TokenType.LESS,
667
+ TokenType.GREAT,
668
+ TokenType.DGREAT,
669
+ TokenType.LESSAND,
670
+ TokenType.GREATAND,
671
+ TokenType.LESSGREAT,
672
+ TokenType.CLOBBER,
673
+ ):
674
+ # Check if immediately adjacent (no whitespace)
675
+ # Number ends at column + len(value), redirect should start there
676
+ num_end_col = num_token.column + len(num_token.value)
677
+ if next_token.column == num_end_col:
678
+ fd = int(self._advance().value)
679
+
680
+ # Check for redirection operator
681
+ op_map: dict[TokenType, RedirectionOperator] = {
682
+ TokenType.LESS: "<",
683
+ TokenType.GREAT: ">",
684
+ TokenType.DGREAT: ">>",
685
+ TokenType.LESSAND: "<&",
686
+ TokenType.GREATAND: ">&",
687
+ TokenType.LESSGREAT: "<>",
688
+ TokenType.CLOBBER: ">|",
689
+ TokenType.TLESS: "<<<",
690
+ TokenType.DLESS: "<<",
691
+ TokenType.DLESSDASH: "<<-",
692
+ TokenType.AND_GREAT: "&>",
693
+ TokenType.AND_DGREAT: "&>>",
694
+ }
695
+
696
+ for token_type, op in op_map.items():
697
+ if self._check(token_type):
698
+ self._advance()
699
+
700
+ # Handle here-document
701
+ if op in ("<<", "<<-"):
702
+ return self._parse_heredoc_start(op, fd)
703
+
704
+ # Parse target
705
+ if not self._check(TokenType.WORD, TokenType.NAME, TokenType.NUMBER):
706
+ raise self._error(f"Expected target for redirection {op}")
707
+
708
+ target = self._parse_word()
709
+ return AST.redirection(op, target, fd)
710
+
711
+ return None
712
+
713
+ def _parse_heredoc_start(
714
+ self, op: RedirectionOperator, fd: Optional[int]
715
+ ) -> RedirectionNode:
716
+ """Parse the start of a here-document."""
717
+ strip_tabs = op == "<<-"
718
+
719
+ # Get delimiter
720
+ if not self._check(TokenType.WORD, TokenType.NAME):
721
+ raise self._error("Expected here-document delimiter")
722
+
723
+ delim_token = self._advance()
724
+ delimiter = delim_token.value
725
+ quoted = delim_token.quoted or delim_token.single_quoted
726
+
727
+ # Strip quotes from delimiter if present
728
+ if delimiter.startswith("'") and delimiter.endswith("'"):
729
+ delimiter = delimiter[1:-1]
730
+ quoted = True
731
+ elif delimiter.startswith('"') and delimiter.endswith('"'):
732
+ delimiter = delimiter[1:-1]
733
+ quoted = True
734
+
735
+ # Create placeholder target (will be filled when heredoc content is read)
736
+ placeholder = AST.word([AST.literal("")])
737
+
738
+ # Register pending heredoc
739
+ heredoc_info = {
740
+ "delimiter": delimiter,
741
+ "strip_tabs": strip_tabs,
742
+ "quoted": quoted,
743
+ "redirect_target": None,
744
+ }
745
+ self.pending_heredocs.append(heredoc_info)
746
+
747
+ return AST.redirection(op, placeholder, fd)
748
+
749
+ # =========================================================================
750
+ # Compound command parsing
751
+ # =========================================================================
752
+
753
+ def _parse_compound_list(self) -> list[StatementNode]:
754
+ """Parse a compound list (body of if/for/while/etc.)."""
755
+ statements: list[StatementNode] = []
756
+ self._skip_newlines()
757
+
758
+ while not self._check(
759
+ TokenType.EOF,
760
+ TokenType.THEN,
761
+ TokenType.ELSE,
762
+ TokenType.ELIF,
763
+ TokenType.FI,
764
+ TokenType.DO,
765
+ TokenType.DONE,
766
+ TokenType.ESAC,
767
+ TokenType.RBRACE,
768
+ TokenType.RPAREN,
769
+ ):
770
+ self._check_iteration_limit()
771
+ if not self._is_command_start():
772
+ break
773
+ stmt = self._parse_statement()
774
+ if stmt:
775
+ statements.append(stmt)
776
+ self._skip_separators()
777
+
778
+ return statements
779
+
780
+ def _parse_if(self) -> IfNode:
781
+ """Parse an if statement."""
782
+ self._expect(TokenType.IF)
783
+ self._skip_newlines()
784
+
785
+ clauses: list[IfClause] = []
786
+
787
+ # Parse condition
788
+ condition = self._parse_compound_list()
789
+ if not condition:
790
+ raise self._error("Expected condition after 'if'")
791
+
792
+ self._skip_newlines()
793
+ self._expect(TokenType.THEN, "Expected 'then' after condition")
794
+ self._skip_newlines()
795
+
796
+ # Parse body
797
+ body = self._parse_compound_list()
798
+
799
+ clauses.append(AST.if_clause(condition, body))
800
+
801
+ # Parse elif clauses
802
+ while self._check(TokenType.ELIF):
803
+ self._advance()
804
+ self._skip_newlines()
805
+
806
+ elif_condition = self._parse_compound_list()
807
+ if not elif_condition:
808
+ raise self._error("Expected condition after 'elif'")
809
+
810
+ self._skip_newlines()
811
+ self._expect(TokenType.THEN, "Expected 'then' after condition")
812
+ self._skip_newlines()
813
+
814
+ elif_body = self._parse_compound_list()
815
+ clauses.append(AST.if_clause(elif_condition, elif_body))
816
+
817
+ # Parse else clause
818
+ else_body: Optional[list[StatementNode]] = None
819
+ if self._check(TokenType.ELSE):
820
+ self._advance()
821
+ self._skip_newlines()
822
+ else_body = self._parse_compound_list()
823
+
824
+ self._skip_newlines()
825
+ self._expect(TokenType.FI, "Expected 'fi' to close if statement")
826
+
827
+ # Parse optional redirections
828
+ redirections: list[RedirectionNode] = []
829
+ while True:
830
+ redir = self._try_parse_redirection()
831
+ if not redir:
832
+ break
833
+ redirections.append(redir)
834
+
835
+ return AST.if_node(clauses, else_body, redirections)
836
+
837
+ def _parse_for(self) -> ForNode:
838
+ """Parse a for loop."""
839
+ self._expect(TokenType.FOR)
840
+ self._skip_newlines()
841
+
842
+ # Get variable name
843
+ if not self._check(TokenType.NAME, TokenType.WORD):
844
+ raise self._error("Expected variable name after 'for'")
845
+ variable = self._advance().value
846
+
847
+ self._skip_newlines()
848
+
849
+ # Parse optional 'in word...'
850
+ words: Optional[list[WordNode]] = None
851
+ if self._check(TokenType.IN):
852
+ self._advance()
853
+ words = []
854
+ while not self._check(
855
+ TokenType.SEMICOLON,
856
+ TokenType.NEWLINE,
857
+ TokenType.DO,
858
+ TokenType.EOF,
859
+ ):
860
+ self._check_iteration_limit()
861
+ if self._check(TokenType.WORD, TokenType.NAME, TokenType.NUMBER):
862
+ words.append(self._parse_word())
863
+ else:
864
+ break
865
+
866
+ # Skip to 'do'
867
+ self._skip_separators()
868
+ self._expect(TokenType.DO, "Expected 'do' in for loop")
869
+ self._skip_newlines()
870
+
871
+ # Parse body
872
+ body = self._parse_compound_list()
873
+
874
+ self._skip_newlines()
875
+ self._expect(TokenType.DONE, "Expected 'done' to close for loop")
876
+
877
+ # Parse optional redirections
878
+ redirections: list[RedirectionNode] = []
879
+ while True:
880
+ redir = self._try_parse_redirection()
881
+ if not redir:
882
+ break
883
+ redirections.append(redir)
884
+
885
+ return AST.for_node(variable, words, body, redirections)
886
+
887
+ def _parse_while(self) -> WhileNode:
888
+ """Parse a while loop."""
889
+ self._expect(TokenType.WHILE)
890
+ self._skip_newlines()
891
+
892
+ # Parse condition
893
+ condition = self._parse_compound_list()
894
+ if not condition:
895
+ raise self._error("Expected condition after 'while'")
896
+
897
+ self._skip_newlines()
898
+ self._expect(TokenType.DO, "Expected 'do' after condition")
899
+ self._skip_newlines()
900
+
901
+ # Parse body
902
+ body = self._parse_compound_list()
903
+
904
+ self._skip_newlines()
905
+ self._expect(TokenType.DONE, "Expected 'done' to close while loop")
906
+
907
+ # Parse optional redirections
908
+ redirections: list[RedirectionNode] = []
909
+ while True:
910
+ redir = self._try_parse_redirection()
911
+ if not redir:
912
+ break
913
+ redirections.append(redir)
914
+
915
+ return AST.while_node(condition, body, redirections)
916
+
917
+ def _parse_until(self) -> UntilNode:
918
+ """Parse an until loop."""
919
+ self._expect(TokenType.UNTIL)
920
+ self._skip_newlines()
921
+
922
+ # Parse condition
923
+ condition = self._parse_compound_list()
924
+ if not condition:
925
+ raise self._error("Expected condition after 'until'")
926
+
927
+ self._skip_newlines()
928
+ self._expect(TokenType.DO, "Expected 'do' after condition")
929
+ self._skip_newlines()
930
+
931
+ # Parse body
932
+ body = self._parse_compound_list()
933
+
934
+ self._skip_newlines()
935
+ self._expect(TokenType.DONE, "Expected 'done' to close until loop")
936
+
937
+ # Parse optional redirections
938
+ redirections: list[RedirectionNode] = []
939
+ while True:
940
+ redir = self._try_parse_redirection()
941
+ if not redir:
942
+ break
943
+ redirections.append(redir)
944
+
945
+ return AST.until_node(condition, body, redirections)
946
+
947
+ def _parse_case(self) -> CaseNode:
948
+ """Parse a case statement."""
949
+ self._expect(TokenType.CASE)
950
+ self._skip_newlines()
951
+
952
+ # Parse word to match
953
+ if not self._check(TokenType.WORD, TokenType.NAME, TokenType.NUMBER):
954
+ raise self._error("Expected word after 'case'")
955
+ word = self._parse_word()
956
+
957
+ self._skip_newlines()
958
+ self._expect(TokenType.IN, "Expected 'in' after case word")
959
+ self._skip_newlines()
960
+
961
+ # Parse case items
962
+ items: list[CaseItemNode] = []
963
+ while not self._check(TokenType.ESAC, TokenType.EOF):
964
+ self._check_iteration_limit()
965
+ self._skip_newlines()
966
+
967
+ if self._check(TokenType.ESAC):
968
+ break
969
+
970
+ # Skip optional leading (
971
+ if self._check(TokenType.LPAREN):
972
+ self._advance()
973
+
974
+ # Parse patterns
975
+ patterns: list[WordNode] = []
976
+ while True:
977
+ self._check_iteration_limit()
978
+ if self._check(TokenType.WORD, TokenType.NAME, TokenType.NUMBER):
979
+ patterns.append(self._parse_word())
980
+ elif not patterns:
981
+ raise self._error("Expected pattern in case item")
982
+ else:
983
+ break
984
+
985
+ # Check for pattern separator |
986
+ if self._check(TokenType.PIPE):
987
+ self._advance()
988
+ else:
989
+ break
990
+
991
+ # Expect )
992
+ self._expect(TokenType.RPAREN, "Expected ')' after patterns")
993
+ self._skip_newlines()
994
+
995
+ # Parse body
996
+ item_body = self._parse_compound_list()
997
+
998
+ # Parse terminator (;;, ;&, ;;&)
999
+ terminator = ";;"
1000
+ if self._check(TokenType.DSEMI):
1001
+ self._advance()
1002
+ elif self._check(TokenType.SEMI_AND):
1003
+ self._advance()
1004
+ terminator = ";&"
1005
+ elif self._check(TokenType.SEMI_SEMI_AND):
1006
+ self._advance()
1007
+ terminator = ";;&"
1008
+
1009
+ items.append(AST.case_item(patterns, item_body, terminator))
1010
+ self._skip_newlines()
1011
+
1012
+ self._expect(TokenType.ESAC, "Expected 'esac' to close case statement")
1013
+
1014
+ # Parse optional redirections
1015
+ redirections: list[RedirectionNode] = []
1016
+ while True:
1017
+ redir = self._try_parse_redirection()
1018
+ if not redir:
1019
+ break
1020
+ redirections.append(redir)
1021
+
1022
+ return AST.case_node(word, items, redirections)
1023
+
1024
+ def _parse_subshell(self) -> SubshellNode:
1025
+ """Parse a subshell: ( ... )."""
1026
+ self._expect(TokenType.LPAREN)
1027
+ self._skip_newlines()
1028
+
1029
+ # Parse body
1030
+ body = self._parse_compound_list()
1031
+
1032
+ self._skip_newlines()
1033
+ self._expect(TokenType.RPAREN, "Expected ')' to close subshell")
1034
+
1035
+ # Parse optional redirections
1036
+ redirections: list[RedirectionNode] = []
1037
+ while True:
1038
+ redir = self._try_parse_redirection()
1039
+ if not redir:
1040
+ break
1041
+ redirections.append(redir)
1042
+
1043
+ return AST.subshell(body, redirections)
1044
+
1045
+ def _parse_group(self) -> GroupNode:
1046
+ """Parse a command group: { ...; }."""
1047
+ self._expect(TokenType.LBRACE)
1048
+ self._skip_newlines()
1049
+
1050
+ # Parse body
1051
+ body = self._parse_compound_list()
1052
+
1053
+ self._skip_separators()
1054
+ self._expect(TokenType.RBRACE, "Expected '}' to close command group")
1055
+
1056
+ # Parse optional redirections
1057
+ redirections: list[RedirectionNode] = []
1058
+ while True:
1059
+ redir = self._try_parse_redirection()
1060
+ if not redir:
1061
+ break
1062
+ redirections.append(redir)
1063
+
1064
+ return AST.group(body, redirections)
1065
+
1066
+ # Unary operators for conditional expressions
1067
+ _COND_UNARY_OPS = {
1068
+ "-a", "-b", "-c", "-d", "-e", "-f", "-g", "-h", "-k", "-p",
1069
+ "-r", "-s", "-t", "-u", "-w", "-x", "-G", "-L", "-N", "-O",
1070
+ "-S", "-z", "-n", "-o", "-v", "-R",
1071
+ }
1072
+
1073
+ # Binary operators for conditional expressions
1074
+ _COND_BINARY_OPS = {
1075
+ "==", "!=", "=~", "<", ">", "=",
1076
+ "-eq", "-ne", "-lt", "-le", "-gt", "-ge",
1077
+ "-nt", "-ot", "-ef",
1078
+ }
1079
+
1080
+ def _parse_conditional_command(self) -> ConditionalCommandNode:
1081
+ """Parse a conditional command: [[ expr ]]."""
1082
+ line = self._current().line
1083
+ self._expect(TokenType.DBRACK_START)
1084
+ self._skip_newlines()
1085
+
1086
+ # Parse the conditional expression
1087
+ expr = self._parse_cond_or()
1088
+
1089
+ self._skip_newlines()
1090
+ self._expect(TokenType.DBRACK_END, "Expected ']]' to close conditional")
1091
+
1092
+ # Parse optional redirections
1093
+ redirections: list[RedirectionNode] = []
1094
+ while True:
1095
+ redir = self._try_parse_redirection()
1096
+ if not redir:
1097
+ break
1098
+ redirections.append(redir)
1099
+
1100
+ return ConditionalCommandNode(
1101
+ expression=expr,
1102
+ redirections=tuple(redirections),
1103
+ line=line,
1104
+ )
1105
+
1106
+ def _parse_cond_or(self):
1107
+ """Parse conditional OR: expr || expr."""
1108
+ left = self._parse_cond_and()
1109
+
1110
+ self._skip_newlines()
1111
+ while self._check(TokenType.OR_OR):
1112
+ self._advance()
1113
+ self._skip_newlines()
1114
+ right = self._parse_cond_and()
1115
+ left = CondOrNode(left=left, right=right)
1116
+ self._skip_newlines()
1117
+
1118
+ return left
1119
+
1120
+ def _parse_cond_and(self):
1121
+ """Parse conditional AND: expr && expr."""
1122
+ left = self._parse_cond_not()
1123
+
1124
+ self._skip_newlines()
1125
+ while self._check(TokenType.AND_AND):
1126
+ self._advance()
1127
+ self._skip_newlines()
1128
+ right = self._parse_cond_not()
1129
+ left = CondAndNode(left=left, right=right)
1130
+ self._skip_newlines()
1131
+
1132
+ return left
1133
+
1134
+ def _parse_cond_not(self):
1135
+ """Parse conditional NOT: ! expr."""
1136
+ self._skip_newlines()
1137
+ if self._check(TokenType.BANG):
1138
+ self._advance()
1139
+ self._skip_newlines()
1140
+ operand = self._parse_cond_not()
1141
+ return CondNotNode(operand=operand)
1142
+
1143
+ return self._parse_cond_primary()
1144
+
1145
+ def _parse_cond_primary(self):
1146
+ """Parse conditional primary: unary/binary/grouping/word."""
1147
+ # Handle grouping: ( expr )
1148
+ if self._check(TokenType.LPAREN):
1149
+ self._advance()
1150
+ expr = self._parse_cond_or()
1151
+ self._expect(TokenType.RPAREN)
1152
+ return CondGroupNode(expression=expr)
1153
+
1154
+ # Check for end of conditional
1155
+ if self._check(TokenType.DBRACK_END):
1156
+ raise self._error("Expected conditional expression")
1157
+
1158
+ # Handle unary operators: -f file, -z string, etc.
1159
+ if self._check(TokenType.WORD, TokenType.NAME, TokenType.NUMBER):
1160
+ first_token = self._current()
1161
+ first = first_token.value
1162
+
1163
+ # Check for unary operators (not quoted)
1164
+ if first_token.type in (TokenType.WORD, TokenType.NAME) and first in self._COND_UNARY_OPS:
1165
+ self._advance()
1166
+ # Unary operators require an operand
1167
+ if self._check(TokenType.DBRACK_END):
1168
+ raise self._error(f"Expected operand after {first}")
1169
+ # Parse the operand - could be any word including quoted empty strings
1170
+ operand = self._parse_word()
1171
+ return CondUnaryNode(operator=first, operand=operand)
1172
+
1173
+ # Parse as word, then check for binary operator
1174
+ left = self._parse_word()
1175
+
1176
+ # Check for binary operators as words
1177
+ if self._check(TokenType.WORD, TokenType.NAME):
1178
+ op_token = self._current()
1179
+ if op_token.value in self._COND_BINARY_OPS:
1180
+ self._advance()
1181
+ # For =~ operator, parse RHS as regex pattern (includes parens)
1182
+ if op_token.value == "=~":
1183
+ right = self._parse_cond_regex_pattern()
1184
+ else:
1185
+ right = self._parse_word()
1186
+ # Normalize = to ==
1187
+ op = "==" if op_token.value == "=" else op_token.value
1188
+ return CondBinaryNode(operator=op, left=left, right=right)
1189
+
1190
+ # Check for < and > which are tokenized as LESS and GREAT
1191
+ if self._check(TokenType.LESS):
1192
+ self._advance()
1193
+ right = self._parse_word()
1194
+ return CondBinaryNode(operator="<", left=left, right=right)
1195
+ if self._check(TokenType.GREAT):
1196
+ self._advance()
1197
+ right = self._parse_word()
1198
+ return CondBinaryNode(operator=">", left=left, right=right)
1199
+
1200
+ # Just a word (non-empty string test)
1201
+ return CondWordNode(word=left)
1202
+
1203
+ raise self._error("Expected conditional expression")
1204
+
1205
+ def _parse_cond_regex_pattern(self) -> Optional[WordNode]:
1206
+ """Parse a regex pattern for =~ operator.
1207
+
1208
+ In bash, the RHS of =~ can include unquoted parentheses, pipes, etc.
1209
+ which are literal regex characters, not shell operators.
1210
+ """
1211
+ parts: list = []
1212
+ line = self._current().line
1213
+
1214
+ # Collect tokens until we hit ]], &&, ||, or newline
1215
+ while not self._check(TokenType.EOF):
1216
+ tok = self._current()
1217
+
1218
+ # Stop at conditional terminators
1219
+ if tok.type == TokenType.DBRACK_END:
1220
+ break
1221
+ if tok.type in (TokenType.AND_AND, TokenType.OR_OR, TokenType.NEWLINE):
1222
+ break
1223
+
1224
+ # Handle parentheses as literal parts of the regex
1225
+ if tok.type == TokenType.LPAREN:
1226
+ parts.append(LiteralPart(value="("))
1227
+ self._advance()
1228
+ continue
1229
+ if tok.type == TokenType.RPAREN:
1230
+ parts.append(LiteralPart(value=")"))
1231
+ self._advance()
1232
+ continue
1233
+
1234
+ # Handle pipe as literal
1235
+ if tok.type == TokenType.PIPE:
1236
+ parts.append(LiteralPart(value="|"))
1237
+ self._advance()
1238
+ continue
1239
+
1240
+ # Handle other word-like tokens
1241
+ if tok.type in (TokenType.WORD, TokenType.NAME, TokenType.NUMBER,
1242
+ TokenType.ASSIGNMENT_WORD):
1243
+ word = self._parse_word()
1244
+ if word and word.parts:
1245
+ parts.extend(word.parts)
1246
+ continue
1247
+
1248
+ # Unknown token, stop
1249
+ break
1250
+
1251
+ if not parts:
1252
+ return None
1253
+
1254
+ return WordNode(parts=tuple(parts), line=line)
1255
+
1256
+ def _is_cond_word_token(self) -> bool:
1257
+ """Check if current token can be a word in conditional context."""
1258
+ return self._check(
1259
+ TokenType.WORD, TokenType.NAME, TokenType.NUMBER,
1260
+ TokenType.ASSIGNMENT_WORD, # Might appear in conditionals
1261
+ )
1262
+
1263
+ def _parse_arithmetic_command(self) -> ArithmeticCommandNode:
1264
+ """Parse an arithmetic command: (( expr ))."""
1265
+ line = self._current().line
1266
+ self._expect(TokenType.DPAREN_START)
1267
+
1268
+ # Collect everything until ))
1269
+ expr_text = ""
1270
+ depth = 1 # We've consumed one ((
1271
+
1272
+ while depth > 0 and not self._check(TokenType.EOF):
1273
+ if self._check(TokenType.DPAREN_START):
1274
+ depth += 1
1275
+ expr_text += "(("
1276
+ self._advance()
1277
+ elif self._check(TokenType.DPAREN_END):
1278
+ depth -= 1
1279
+ if depth > 0:
1280
+ expr_text += "))"
1281
+ self._advance()
1282
+ elif self._check(TokenType.LPAREN):
1283
+ expr_text += "("
1284
+ self._advance()
1285
+ elif self._check(TokenType.RPAREN):
1286
+ expr_text += ")"
1287
+ self._advance()
1288
+ else:
1289
+ expr_text += self._current().value
1290
+ self._advance()
1291
+
1292
+ # Parse the arithmetic expression
1293
+ expr_text = expr_text.strip()
1294
+ if expr_text:
1295
+ try:
1296
+ arith_expr = self._parse_arithmetic_expression(expr_text)
1297
+ expr_node = ArithmeticExpressionNode(expression=arith_expr)
1298
+ except Exception:
1299
+ # If parsing fails, create a simple expression
1300
+ expr_node = ArithmeticExpressionNode(expression=None)
1301
+ else:
1302
+ expr_node = None
1303
+
1304
+ return ArithmeticCommandNode(
1305
+ expression=expr_node,
1306
+ line=line,
1307
+ )
1308
+
1309
+ def _parse_function_def(self) -> FunctionDefNode:
1310
+ """Parse a function definition."""
1311
+ # Check for 'function' keyword
1312
+ has_function_keyword = False
1313
+ if self._check(TokenType.FUNCTION):
1314
+ self._advance()
1315
+ has_function_keyword = True
1316
+ self._skip_newlines()
1317
+
1318
+ # Get function name
1319
+ if not self._check(TokenType.NAME, TokenType.WORD):
1320
+ raise self._error("Expected function name")
1321
+ name = self._advance().value
1322
+
1323
+ # Optional () after name
1324
+ if self._check(TokenType.LPAREN):
1325
+ self._advance()
1326
+ self._expect(TokenType.RPAREN, "Expected ')' after '(' in function definition")
1327
+
1328
+ self._skip_newlines()
1329
+
1330
+ # Parse function body (must be a compound command)
1331
+ if self._check(TokenType.LBRACE):
1332
+ body = self._parse_group()
1333
+ elif self._check(TokenType.LPAREN):
1334
+ body = self._parse_subshell()
1335
+ elif self._check(TokenType.IF):
1336
+ body = self._parse_if()
1337
+ elif self._check(TokenType.FOR):
1338
+ body = self._parse_for()
1339
+ elif self._check(TokenType.WHILE):
1340
+ body = self._parse_while()
1341
+ elif self._check(TokenType.UNTIL):
1342
+ body = self._parse_until()
1343
+ elif self._check(TokenType.CASE):
1344
+ body = self._parse_case()
1345
+ else:
1346
+ raise self._error("Expected compound command as function body")
1347
+
1348
+ # Parse optional redirections (after function body)
1349
+ redirections: list[RedirectionNode] = []
1350
+ while True:
1351
+ redir = self._try_parse_redirection()
1352
+ if not redir:
1353
+ break
1354
+ redirections.append(redir)
1355
+
1356
+ return AST.function_def(name, body, redirections)
1357
+
1358
+ def _parse_word(self) -> WordNode:
1359
+ """Parse a word token into a WordNode with parts."""
1360
+ token = self._advance()
1361
+ return self._parse_word_from_string(
1362
+ token.value,
1363
+ quoted=token.quoted,
1364
+ single_quoted=token.single_quoted,
1365
+ )
1366
+
1367
+ def _parse_word_from_string(self, value: str, quoted: bool = False, single_quoted: bool = False) -> WordNode:
1368
+ """Parse a string into a WordNode with appropriate parts."""
1369
+ parts = self._parse_word_parts(value, quoted, single_quoted)
1370
+ # Wrap double-quoted content in DoubleQuotedPart to preserve quote context
1371
+ if quoted and not single_quoted:
1372
+ return AST.word([DoubleQuotedPart(parts=tuple(parts))])
1373
+ # Wrap single-quoted content in SingleQuotedPart
1374
+ if single_quoted and len(parts) == 1 and isinstance(parts[0], LiteralPart):
1375
+ return AST.word([SingleQuotedPart(value=parts[0].value)])
1376
+ return AST.word(parts)
1377
+
1378
+ def _parse_word_parts(self, value: str, quoted: bool = False, single_quoted: bool = False) -> list[WordPart]:
1379
+ """Parse word parts from a string value."""
1380
+ # Single-quoted strings are completely literal - no expansions
1381
+ if single_quoted:
1382
+ return [AST.literal(value)] if value else []
1383
+
1384
+ parts: list[WordPart] = []
1385
+ i = 0
1386
+ literal_buffer = ""
1387
+
1388
+ def flush_literal() -> None:
1389
+ nonlocal literal_buffer
1390
+ if literal_buffer:
1391
+ parts.append(AST.literal(literal_buffer))
1392
+ literal_buffer = ""
1393
+
1394
+ while i < len(value):
1395
+ c = value[i]
1396
+
1397
+ # Handle $((...)) arithmetic expansion - MUST come before $(...) check
1398
+ if c == "$" and i + 2 < len(value) and value[i + 1] == "(" and value[i + 2] == "(":
1399
+ flush_literal()
1400
+ # Find matching closing ))
1401
+ depth = 2 # We need to find ))
1402
+ start = i + 3
1403
+ j = start
1404
+ while j < len(value):
1405
+ if value[j] == "(" and j + 1 < len(value) and value[j + 1] == "(":
1406
+ depth += 2
1407
+ j += 2
1408
+ elif value[j] == ")" and j + 1 < len(value) and value[j + 1] == ")":
1409
+ depth -= 2
1410
+ j += 2
1411
+ if depth <= 0:
1412
+ break
1413
+ elif value[j] == "(":
1414
+ depth += 1
1415
+ j += 1
1416
+ elif value[j] == ")":
1417
+ depth -= 1
1418
+ j += 1
1419
+ else:
1420
+ j += 1
1421
+ arith_expr = value[start : j - 2]
1422
+ # Parse the arithmetic expression
1423
+ arith_node = self._parse_arithmetic_expression(arith_expr)
1424
+ parts.append(
1425
+ ArithmeticExpansionPart(
1426
+ expression=ArithmeticExpressionNode(expression=arith_node),
1427
+ )
1428
+ )
1429
+ i = j
1430
+ continue
1431
+
1432
+ # Handle $(...) command substitution
1433
+ if c == "$" and i + 1 < len(value) and value[i + 1] == "(":
1434
+ flush_literal()
1435
+ # Find matching closing paren
1436
+ depth = 1
1437
+ start = i + 2
1438
+ j = start
1439
+ while j < len(value) and depth > 0:
1440
+ if value[j] == "(":
1441
+ depth += 1
1442
+ elif value[j] == ")":
1443
+ depth -= 1
1444
+ j += 1
1445
+ cmd_body = value[start : j - 1]
1446
+ # Recursively parse the command body
1447
+ try:
1448
+ parsed_body = Parser().parse(cmd_body)
1449
+ parts.append(
1450
+ CommandSubstitutionPart(
1451
+ body=parsed_body,
1452
+ legacy=False,
1453
+ )
1454
+ )
1455
+ except Exception:
1456
+ # If parsing fails, treat as literal
1457
+ parts.append(AST.literal(f"$({cmd_body})"))
1458
+ i = j
1459
+ continue
1460
+
1461
+ # Handle ${...} parameter expansion
1462
+ if c == "$" and i + 1 < len(value) and value[i + 1] == "{":
1463
+ flush_literal()
1464
+ # Find matching closing brace
1465
+ depth = 1
1466
+ start = i + 2
1467
+ j = start
1468
+ while j < len(value) and depth > 0:
1469
+ if value[j] == "{":
1470
+ depth += 1
1471
+ elif value[j] == "}":
1472
+ depth -= 1
1473
+ j += 1
1474
+ param_content = value[start : j - 1]
1475
+ # Parse the parameter expansion content
1476
+ parts.append(self._parse_parameter_expansion(param_content))
1477
+ i = j
1478
+ continue
1479
+
1480
+ # Handle simple $VAR expansion
1481
+ if c == "$" and i + 1 < len(value):
1482
+ next_c = value[i + 1]
1483
+ # Special parameters
1484
+ if next_c in "?$#@*!_-0123456789":
1485
+ flush_literal()
1486
+ parts.append(ParameterExpansionPart(parameter=next_c))
1487
+ i += 2
1488
+ continue
1489
+ # Variable name
1490
+ if next_c.isalpha() or next_c == "_":
1491
+ flush_literal()
1492
+ j = i + 1
1493
+ while j < len(value) and (value[j].isalnum() or value[j] == "_"):
1494
+ j += 1
1495
+ var_name = value[i + 1 : j]
1496
+ parts.append(ParameterExpansionPart(parameter=var_name))
1497
+ i = j
1498
+ continue
1499
+
1500
+ # Handle backtick command substitution
1501
+ if c == "`":
1502
+ flush_literal()
1503
+ j = i + 1
1504
+ while j < len(value) and value[j] != "`":
1505
+ if value[j] == "\\" and j + 1 < len(value):
1506
+ j += 2
1507
+ else:
1508
+ j += 1
1509
+ cmd_raw = value[i + 1 : j]
1510
+ # Process backslash escapes in backtick substitution
1511
+ # Only \`, \\, and \$ are special inside backticks
1512
+ cmd = []
1513
+ k = 0
1514
+ while k < len(cmd_raw):
1515
+ if cmd_raw[k] == "\\" and k + 1 < len(cmd_raw):
1516
+ next_c = cmd_raw[k + 1]
1517
+ if next_c in "`\\$":
1518
+ cmd.append(next_c)
1519
+ k += 2
1520
+ else:
1521
+ cmd.append(cmd_raw[k])
1522
+ k += 1
1523
+ else:
1524
+ cmd.append(cmd_raw[k])
1525
+ k += 1
1526
+ cmd_body = "".join(cmd)
1527
+ try:
1528
+ parsed_body = Parser().parse(cmd_body)
1529
+ parts.append(
1530
+ CommandSubstitutionPart(
1531
+ body=parsed_body,
1532
+ legacy=True, # Mark as backtick style
1533
+ )
1534
+ )
1535
+ except Exception:
1536
+ # If parsing fails, treat as literal
1537
+ parts.append(AST.literal(f"`{cmd_body}`"))
1538
+ i = j + 1
1539
+ continue
1540
+
1541
+ # Handle single-quoted strings - completely literal, no expansions
1542
+ if c == "'" and not quoted:
1543
+ flush_literal()
1544
+ j = i + 1
1545
+ while j < len(value) and value[j] != "'":
1546
+ j += 1
1547
+ content = value[i + 1 : j]
1548
+ parts.append(SingleQuotedPart(value=content))
1549
+ i = j + 1 if j < len(value) else j
1550
+ continue
1551
+
1552
+ # Handle double-quoted strings - expansions occur but no word splitting
1553
+ if c == '"' and not quoted:
1554
+ flush_literal()
1555
+ j = i + 1
1556
+ # Find matching close quote, respecting escapes
1557
+ while j < len(value) and value[j] != '"':
1558
+ if value[j] == "\\" and j + 1 < len(value):
1559
+ j += 2
1560
+ else:
1561
+ j += 1
1562
+ content = value[i + 1 : j]
1563
+ # Recursively parse the content with quoted=True
1564
+ inner_parts = self._parse_word_parts(content, quoted=True)
1565
+ parts.append(DoubleQuotedPart(parts=tuple(inner_parts)))
1566
+ i = j + 1 if j < len(value) else j
1567
+ continue
1568
+
1569
+ # Handle glob patterns (only if unquoted)
1570
+ if not quoted and c in "*?[":
1571
+ flush_literal()
1572
+ parts.append(GlobPart(pattern=c))
1573
+ i += 1
1574
+ continue
1575
+
1576
+ # Handle tilde expansion at start
1577
+ if c == "~" and i == 0 and not quoted:
1578
+ flush_literal()
1579
+ # Check for ~user
1580
+ j = 1
1581
+ while j < len(value) and (value[j].isalnum() or value[j] == "_"):
1582
+ j += 1
1583
+ if j > 1:
1584
+ user = value[1:j]
1585
+ parts.append(TildeExpansionPart(user=user))
1586
+ else:
1587
+ parts.append(TildeExpansionPart(user=None))
1588
+ i = j
1589
+ continue
1590
+
1591
+ # Handle escape sequences (only in unquoted context - lexer already handled quoted escapes)
1592
+ if c == "\\" and i + 1 < len(value) and not quoted:
1593
+ flush_literal()
1594
+ parts.append(EscapedPart(value=value[i + 1]))
1595
+ i += 2
1596
+ continue
1597
+
1598
+ # Regular character
1599
+ literal_buffer += c
1600
+ i += 1
1601
+
1602
+ flush_literal()
1603
+ return parts if parts else [AST.literal("")]
1604
+
1605
+ def _parse_parameter_expansion(self, content: str) -> ParameterExpansionPart:
1606
+ """Parse the content inside ${...} into a ParameterExpansionPart.
1607
+
1608
+ Handles:
1609
+ - ${VAR} - simple expansion
1610
+ - ${VAR:-default} - use default if unset
1611
+ - ${VAR:=default} - assign default if unset
1612
+ - ${VAR:?error} - error if unset
1613
+ - ${VAR:+alt} - use alternative if set
1614
+ - ${#VAR} - string length
1615
+ - ${VAR:offset:length} - substring
1616
+ - ${VAR#pattern} - remove shortest prefix
1617
+ - ${VAR##pattern} - remove longest prefix
1618
+ - ${VAR%pattern} - remove shortest suffix
1619
+ - ${VAR%%pattern} - remove longest suffix
1620
+ - ${VAR/pattern/replacement} - replace first match
1621
+ - ${VAR//pattern/replacement} - replace all matches
1622
+ - ${VAR^} - uppercase first char
1623
+ - ${VAR^^} - uppercase all
1624
+ - ${VAR,} - lowercase first char
1625
+ - ${VAR,,} - lowercase all
1626
+ """
1627
+ if not content:
1628
+ return ParameterExpansionPart(parameter="")
1629
+
1630
+ # Handle length operator ${#VAR}
1631
+ if content.startswith("#"):
1632
+ param = content[1:]
1633
+ return ParameterExpansionPart(parameter=param, operation=LengthOp())
1634
+
1635
+ # Find the parameter name (alphanumeric, _, or special chars)
1636
+ i = 0
1637
+ # Handle special parameters like @, *, ?, $, #, !, -, 0-9
1638
+ if content and content[0] in "@*?$#!-0123456789":
1639
+ param = content[0]
1640
+ i = 1
1641
+ else:
1642
+ # Regular variable name
1643
+ while i < len(content) and (content[i].isalnum() or content[i] == "_"):
1644
+ i += 1
1645
+ param = content[:i]
1646
+
1647
+ # If no operation follows, return simple expansion
1648
+ if i >= len(content):
1649
+ return ParameterExpansionPart(parameter=param)
1650
+
1651
+ rest = content[i:]
1652
+
1653
+ # Handle :- := :? :+ (with colon = check empty too)
1654
+ if rest.startswith(":-"):
1655
+ word = self._parse_word_from_string(rest[2:])
1656
+ return ParameterExpansionPart(
1657
+ parameter=param,
1658
+ operation=DefaultValueOp(word=word, check_empty=True),
1659
+ )
1660
+ if rest.startswith("-"):
1661
+ word = self._parse_word_from_string(rest[1:])
1662
+ return ParameterExpansionPart(
1663
+ parameter=param,
1664
+ operation=DefaultValueOp(word=word, check_empty=False),
1665
+ )
1666
+ if rest.startswith(":="):
1667
+ word = self._parse_word_from_string(rest[2:])
1668
+ return ParameterExpansionPart(
1669
+ parameter=param,
1670
+ operation=AssignDefaultOp(word=word, check_empty=True),
1671
+ )
1672
+ if rest.startswith("="):
1673
+ word = self._parse_word_from_string(rest[1:])
1674
+ return ParameterExpansionPart(
1675
+ parameter=param,
1676
+ operation=AssignDefaultOp(word=word, check_empty=False),
1677
+ )
1678
+ if rest.startswith(":?"):
1679
+ word = self._parse_word_from_string(rest[2:])
1680
+ return ParameterExpansionPart(
1681
+ parameter=param,
1682
+ operation=ErrorIfUnsetOp(word=word, check_empty=True),
1683
+ )
1684
+ if rest.startswith("?"):
1685
+ word = self._parse_word_from_string(rest[1:])
1686
+ return ParameterExpansionPart(
1687
+ parameter=param,
1688
+ operation=ErrorIfUnsetOp(word=word, check_empty=False),
1689
+ )
1690
+ if rest.startswith(":+"):
1691
+ word = self._parse_word_from_string(rest[2:])
1692
+ return ParameterExpansionPart(
1693
+ parameter=param,
1694
+ operation=UseAlternativeOp(word=word, check_empty=True),
1695
+ )
1696
+ if rest.startswith("+"):
1697
+ word = self._parse_word_from_string(rest[1:])
1698
+ return ParameterExpansionPart(
1699
+ parameter=param,
1700
+ operation=UseAlternativeOp(word=word, check_empty=False),
1701
+ )
1702
+
1703
+ # Handle substring ${VAR:offset} or ${VAR:offset:length}
1704
+ if rest.startswith(":"):
1705
+ # Find offset and length
1706
+ parts_str = rest[1:]
1707
+ colon_pos = parts_str.find(":")
1708
+ if colon_pos >= 0:
1709
+ offset_str = parts_str[:colon_pos]
1710
+ length_str = parts_str[colon_pos + 1:]
1711
+ try:
1712
+ offset = int(offset_str) if offset_str else 0
1713
+ length = int(length_str) if length_str else None
1714
+ return ParameterExpansionPart(
1715
+ parameter=param,
1716
+ operation=SubstringOp(offset=offset, length=length),
1717
+ )
1718
+ except ValueError:
1719
+ pass # Not a valid substring, fall through
1720
+ else:
1721
+ try:
1722
+ offset = int(parts_str) if parts_str else 0
1723
+ return ParameterExpansionPart(
1724
+ parameter=param,
1725
+ operation=SubstringOp(offset=offset, length=None),
1726
+ )
1727
+ except ValueError:
1728
+ pass
1729
+
1730
+ # Handle pattern removal ${VAR#pattern} ${VAR##pattern} ${VAR%pattern} ${VAR%%pattern}
1731
+ if rest.startswith("##"):
1732
+ pattern = self._parse_word_from_string(rest[2:])
1733
+ return ParameterExpansionPart(
1734
+ parameter=param,
1735
+ operation=PatternRemovalOp(pattern=pattern, greedy=True, side="prefix"),
1736
+ )
1737
+ if rest.startswith("#"):
1738
+ pattern = self._parse_word_from_string(rest[1:])
1739
+ return ParameterExpansionPart(
1740
+ parameter=param,
1741
+ operation=PatternRemovalOp(pattern=pattern, greedy=False, side="prefix"),
1742
+ )
1743
+ if rest.startswith("%%"):
1744
+ pattern = self._parse_word_from_string(rest[2:])
1745
+ return ParameterExpansionPart(
1746
+ parameter=param,
1747
+ operation=PatternRemovalOp(pattern=pattern, greedy=True, side="suffix"),
1748
+ )
1749
+ if rest.startswith("%"):
1750
+ pattern = self._parse_word_from_string(rest[1:])
1751
+ return ParameterExpansionPart(
1752
+ parameter=param,
1753
+ operation=PatternRemovalOp(pattern=pattern, greedy=False, side="suffix"),
1754
+ )
1755
+
1756
+ # Handle pattern replacement ${VAR/pattern/replacement} ${VAR//pattern/replacement}
1757
+ if rest.startswith("//"):
1758
+ slash_pos = rest.find("/", 2)
1759
+ if slash_pos >= 0:
1760
+ pattern = self._parse_word_from_string(rest[2:slash_pos])
1761
+ replacement = self._parse_word_from_string(rest[slash_pos + 1:])
1762
+ else:
1763
+ pattern = self._parse_word_from_string(rest[2:])
1764
+ replacement = self._parse_word_from_string("")
1765
+ return ParameterExpansionPart(
1766
+ parameter=param,
1767
+ operation=PatternReplacementOp(
1768
+ pattern=pattern, replacement=replacement, replace_all=True
1769
+ ),
1770
+ )
1771
+ if rest.startswith("/"):
1772
+ slash_pos = rest.find("/", 1)
1773
+ if slash_pos >= 0:
1774
+ pattern = self._parse_word_from_string(rest[1:slash_pos])
1775
+ replacement = self._parse_word_from_string(rest[slash_pos + 1:])
1776
+ else:
1777
+ pattern = self._parse_word_from_string(rest[1:])
1778
+ replacement = self._parse_word_from_string("")
1779
+ return ParameterExpansionPart(
1780
+ parameter=param,
1781
+ operation=PatternReplacementOp(
1782
+ pattern=pattern, replacement=replacement, replace_all=False
1783
+ ),
1784
+ )
1785
+
1786
+ # Handle case modification ${VAR^} ${VAR^^} ${VAR,} ${VAR,,}
1787
+ if rest.startswith("^^"):
1788
+ return ParameterExpansionPart(
1789
+ parameter=param,
1790
+ operation=CaseModificationOp(direction="upper", all=True),
1791
+ )
1792
+ if rest.startswith("^"):
1793
+ return ParameterExpansionPart(
1794
+ parameter=param,
1795
+ operation=CaseModificationOp(direction="upper", all=False),
1796
+ )
1797
+ if rest.startswith(",,"):
1798
+ return ParameterExpansionPart(
1799
+ parameter=param,
1800
+ operation=CaseModificationOp(direction="lower", all=True),
1801
+ )
1802
+ if rest.startswith(","):
1803
+ return ParameterExpansionPart(
1804
+ parameter=param,
1805
+ operation=CaseModificationOp(direction="lower", all=False),
1806
+ )
1807
+
1808
+ # Handle transforms ${VAR@Q} ${VAR@a} ${VAR@A} ${VAR@E} ${VAR@P} ${VAR@K}
1809
+ if rest.startswith("@") and len(rest) >= 2 and rest[1] in "QaAEPK":
1810
+ op_char = rest[1]
1811
+ return ParameterExpansionPart(
1812
+ parameter=param,
1813
+ operation=TransformOp(operator=op_char),
1814
+ )
1815
+
1816
+ # Default: treat the whole thing as parameter name (for compatibility)
1817
+ return ParameterExpansionPart(parameter=content)
1818
+
1819
+ def _parse_arithmetic_expression(self, expr: str) -> ArithExpr:
1820
+ """Parse an arithmetic expression string into an ArithExpr AST.
1821
+
1822
+ This is a simple recursive descent parser supporting:
1823
+ - Numbers (integers)
1824
+ - Variables
1825
+ - Binary operators: + - * / % ** < > <= >= == != && || & | ^ ,
1826
+ - Unary operators: - + ! ~ ++ --
1827
+ - Parentheses
1828
+ - Ternary: cond ? a : b
1829
+ """
1830
+ expr = expr.strip()
1831
+ if not expr:
1832
+ return ArithNumberNode(value=0)
1833
+
1834
+ return self._parse_arith_comma(expr)
1835
+
1836
+ def _parse_arith_comma(self, expr: str) -> ArithExpr:
1837
+ """Parse comma operator (lowest precedence, left-to-right)."""
1838
+ return self._parse_arith_binary(expr, [','], self._parse_arith_assignment)
1839
+
1840
+ def _parse_arith_assignment(self, expr: str) -> ArithExpr:
1841
+ """Parse assignment operators: = += -= *= /= %= <<= >>= &= |= ^="""
1842
+ expr = expr.strip()
1843
+ # Assignment operators (right-to-left, check longest first)
1844
+ assign_ops = ['<<=', '>>=', '+=', '-=', '*=', '/=', '%=', '&=', '|=', '^=', '=']
1845
+
1846
+ # Scan right-to-left for assignment operator (right-associative)
1847
+ depth = 0
1848
+ for i in range(len(expr) - 1, -1, -1):
1849
+ c = expr[i]
1850
+ if c == ')':
1851
+ depth += 1
1852
+ elif c == '(':
1853
+ depth -= 1
1854
+ elif depth == 0:
1855
+ for op in assign_ops:
1856
+ op_start = i - len(op) + 1
1857
+ if op_start >= 0 and expr[op_start:i + 1] == op:
1858
+ # Make sure it's not == or != or <= or >=
1859
+ if op == '=' and op_start > 0 and expr[op_start - 1] in '=!<>':
1860
+ continue
1861
+ left = expr[:op_start].strip()
1862
+ right = expr[i + 1:].strip()
1863
+ if left and right:
1864
+ # Left must be a variable name (or array access)
1865
+ var_match = re.match(r'^([a-zA-Z_][a-zA-Z0-9_]*)(\[.+\])?$', left)
1866
+ if var_match:
1867
+ var_name = var_match.group(1)
1868
+ subscript = var_match.group(2)
1869
+ subscript_expr = None
1870
+ if subscript:
1871
+ subscript_expr = self._parse_arith_ternary(subscript[1:-1])
1872
+ value_expr = self._parse_arith_assignment(right)
1873
+ return ArithAssignmentNode(
1874
+ operator=op,
1875
+ variable=var_name,
1876
+ subscript=subscript_expr,
1877
+ value=value_expr
1878
+ )
1879
+ return self._parse_arith_ternary(expr)
1880
+
1881
+ def _parse_arith_ternary(self, expr: str) -> ArithExpr:
1882
+ """Parse ternary: cond ? a : b"""
1883
+ # Find unquoted ? and : for ternary
1884
+ depth = 0
1885
+ question_pos = -1
1886
+ for i, c in enumerate(expr):
1887
+ if c == '(':
1888
+ depth += 1
1889
+ elif c == ')':
1890
+ depth -= 1
1891
+ elif c == '?' and depth == 0:
1892
+ question_pos = i
1893
+ break
1894
+
1895
+ if question_pos > 0:
1896
+ # Find the matching : (must track nested ternary depth)
1897
+ colon_pos = -1
1898
+ ternary_depth = 0
1899
+ paren_depth = 0
1900
+ for i in range(question_pos + 1, len(expr)):
1901
+ c = expr[i]
1902
+ if c == '(':
1903
+ paren_depth += 1
1904
+ elif c == ')':
1905
+ paren_depth -= 1
1906
+ elif c == '?' and paren_depth == 0:
1907
+ ternary_depth += 1 # Nested ternary
1908
+ elif c == ':' and paren_depth == 0:
1909
+ if ternary_depth > 0:
1910
+ ternary_depth -= 1 # Close nested ternary
1911
+ else:
1912
+ colon_pos = i
1913
+ break
1914
+
1915
+ if colon_pos > 0:
1916
+ condition = self._parse_arith_or(expr[:question_pos].strip())
1917
+ consequent = self._parse_arith_ternary(expr[question_pos + 1:colon_pos].strip())
1918
+ alternate = self._parse_arith_ternary(expr[colon_pos + 1:].strip())
1919
+ return ArithTernaryNode(condition=condition, consequent=consequent, alternate=alternate)
1920
+
1921
+ return self._parse_arith_or(expr)
1922
+
1923
+ def _parse_arith_or(self, expr: str) -> ArithExpr:
1924
+ """Parse ||"""
1925
+ return self._parse_arith_binary(expr, ['||'], self._parse_arith_and)
1926
+
1927
+ def _parse_arith_and(self, expr: str) -> ArithExpr:
1928
+ """Parse &&"""
1929
+ return self._parse_arith_binary(expr, ['&&'], self._parse_arith_bitor)
1930
+
1931
+ def _parse_arith_bitor(self, expr: str) -> ArithExpr:
1932
+ """Parse |"""
1933
+ return self._parse_arith_binary(expr, ['|'], self._parse_arith_bitxor, exclude=['||'])
1934
+
1935
+ def _parse_arith_bitxor(self, expr: str) -> ArithExpr:
1936
+ """Parse ^"""
1937
+ return self._parse_arith_binary(expr, ['^'], self._parse_arith_bitand)
1938
+
1939
+ def _parse_arith_bitand(self, expr: str) -> ArithExpr:
1940
+ """Parse &"""
1941
+ return self._parse_arith_binary(expr, ['&'], self._parse_arith_equality, exclude=['&&'])
1942
+
1943
+ def _parse_arith_equality(self, expr: str) -> ArithExpr:
1944
+ """Parse == !="""
1945
+ return self._parse_arith_binary(expr, ['==', '!='], self._parse_arith_comparison)
1946
+
1947
+ def _parse_arith_comparison(self, expr: str) -> ArithExpr:
1948
+ """Parse < > <= >="""
1949
+ return self._parse_arith_binary(expr, ['<=', '>=', '<', '>'], self._parse_arith_shift, exclude=['<<', '>>'])
1950
+
1951
+ def _parse_arith_shift(self, expr: str) -> ArithExpr:
1952
+ """Parse << >>"""
1953
+ return self._parse_arith_binary(expr, ['<<', '>>'], self._parse_arith_additive)
1954
+
1955
+ def _parse_arith_additive(self, expr: str) -> ArithExpr:
1956
+ """Parse + -"""
1957
+ return self._parse_arith_binary(expr, ['+', '-'], self._parse_arith_multiplicative, exclude=['++', '--'])
1958
+
1959
+ def _parse_arith_multiplicative(self, expr: str) -> ArithExpr:
1960
+ """Parse * / %"""
1961
+ return self._parse_arith_binary(expr, ['*', '/', '%'], self._parse_arith_power, exclude=['**'])
1962
+
1963
+ def _parse_arith_power(self, expr: str) -> ArithExpr:
1964
+ """Parse ** (right associative)"""
1965
+ return self._parse_arith_binary(expr, ['**'], self._parse_arith_unary, right_assoc=True)
1966
+
1967
+ def _parse_arith_binary(self, expr: str, operators: list[str], next_level,
1968
+ exclude: list[str] | None = None, right_assoc: bool = False) -> ArithExpr:
1969
+ """Parse binary operators at a given precedence level."""
1970
+ expr = expr.strip()
1971
+ depth = 0
1972
+
1973
+ # Sort operators by length (longest first) to match ** before *
1974
+ ops = sorted(operators, key=len, reverse=True)
1975
+ exclude = exclude or []
1976
+
1977
+ # Scan for operator (right-to-left for left-assoc, left-to-right for right-assoc)
1978
+ positions = []
1979
+ i = 0
1980
+ while i < len(expr):
1981
+ c = expr[i]
1982
+ if c == '(':
1983
+ depth += 1
1984
+ i += 1
1985
+ elif c == ')':
1986
+ depth -= 1
1987
+ i += 1
1988
+ elif depth == 0:
1989
+ # First check exclusions - skip past them entirely
1990
+ skip_len = 0
1991
+ for ex in exclude:
1992
+ if expr[i:i+len(ex)] == ex:
1993
+ skip_len = len(ex)
1994
+ break
1995
+ if skip_len:
1996
+ i += skip_len
1997
+ continue
1998
+ # Check for operators
1999
+ matched = False
2000
+ for op in ops:
2001
+ if expr[i:i+len(op)] == op:
2002
+ positions.append((i, op))
2003
+ matched = True
2004
+ i += len(op)
2005
+ break
2006
+ if not matched:
2007
+ i += 1
2008
+ else:
2009
+ i += 1
2010
+
2011
+ if positions:
2012
+ # For left-associative, take rightmost; for right-associative, take leftmost
2013
+ pos, op = positions[-1] if not right_assoc else positions[0]
2014
+ left = expr[:pos].strip()
2015
+ right = expr[pos + len(op):].strip()
2016
+ if left and right:
2017
+ left_node = self._parse_arith_binary(left, operators, next_level, exclude, right_assoc) if not right_assoc else next_level(left)
2018
+ right_node = next_level(right) if not right_assoc else self._parse_arith_binary(right, operators, next_level, exclude, right_assoc)
2019
+ return ArithBinaryNode(operator=op, left=left_node, right=right_node)
2020
+
2021
+ return next_level(expr)
2022
+
2023
+ def _parse_arith_unary(self, expr: str) -> ArithExpr:
2024
+ """Parse unary operators: - + ! ~ ++ --"""
2025
+ expr = expr.strip()
2026
+ # Pre-increment/decrement (must check before single +/-)
2027
+ if expr.startswith('++'):
2028
+ operand = self._parse_arith_unary(expr[2:].strip())
2029
+ return ArithUnaryNode(operator='++', operand=operand, prefix=True)
2030
+ if expr.startswith('--'):
2031
+ operand = self._parse_arith_unary(expr[2:].strip())
2032
+ return ArithUnaryNode(operator='--', operand=operand, prefix=True)
2033
+ if expr.startswith('-') and not expr[1:].lstrip().startswith('-'):
2034
+ operand = self._parse_arith_unary(expr[1:].strip())
2035
+ return ArithUnaryNode(operator='-', operand=operand, prefix=True)
2036
+ if expr.startswith('+') and len(expr) > 1:
2037
+ operand = self._parse_arith_unary(expr[1:].strip())
2038
+ return ArithUnaryNode(operator='+', operand=operand, prefix=True)
2039
+ if expr.startswith('!'):
2040
+ operand = self._parse_arith_unary(expr[1:].strip())
2041
+ return ArithUnaryNode(operator='!', operand=operand, prefix=True)
2042
+ if expr.startswith('~'):
2043
+ operand = self._parse_arith_unary(expr[1:].strip())
2044
+ return ArithUnaryNode(operator='~', operand=operand, prefix=True)
2045
+ return self._parse_arith_postfix(expr)
2046
+
2047
+ def _parse_arith_postfix(self, expr: str) -> ArithExpr:
2048
+ """Parse postfix operators: ++ --"""
2049
+ expr = expr.strip()
2050
+ # Post-increment/decrement
2051
+ if expr.endswith('++'):
2052
+ operand = self._parse_arith_primary(expr[:-2].strip())
2053
+ return ArithUnaryNode(operator='++', operand=operand, prefix=False)
2054
+ if expr.endswith('--'):
2055
+ operand = self._parse_arith_primary(expr[:-2].strip())
2056
+ return ArithUnaryNode(operator='--', operand=operand, prefix=False)
2057
+ return self._parse_arith_primary(expr)
2058
+
2059
+ def _parse_arith_primary(self, expr: str) -> ArithExpr:
2060
+ """Parse primary: numbers, variables, parentheses."""
2061
+ expr = expr.strip()
2062
+
2063
+ # Empty expression
2064
+ if not expr:
2065
+ return ArithNumberNode(value=0)
2066
+
2067
+ # Parenthesized expression
2068
+ if expr.startswith('(') and expr.endswith(')'):
2069
+ inner = expr[1:-1].strip()
2070
+ return ArithGroupNode(expression=self._parse_arith_ternary(inner))
2071
+
2072
+ # Number
2073
+ if expr.isdigit() or (expr.startswith('-') and expr[1:].isdigit()):
2074
+ return ArithNumberNode(value=int(expr))
2075
+
2076
+ # Hex number
2077
+ if expr.startswith('0x') or expr.startswith('0X'):
2078
+ try:
2079
+ return ArithNumberNode(value=int(expr, 16))
2080
+ except ValueError:
2081
+ pass
2082
+
2083
+ # Octal number
2084
+ if expr.startswith('0') and len(expr) > 1 and expr[1:].isdigit():
2085
+ try:
2086
+ return ArithNumberNode(value=int(expr, 8))
2087
+ except ValueError:
2088
+ pass
2089
+
2090
+ # Base N constant: base#value (e.g., 2#101, 16#ff, 36#z)
2091
+ base_match = re.match(r'^(\d+)#([a-zA-Z0-9@_]+)$', expr)
2092
+ if base_match:
2093
+ base = int(base_match.group(1))
2094
+ value_str = base_match.group(2) # Keep case for bases > 36
2095
+ if 2 <= base <= 64:
2096
+ try:
2097
+ result = self._parse_base_n_value(value_str, base)
2098
+ return ArithNumberNode(value=result)
2099
+ except ValueError:
2100
+ pass
2101
+
2102
+ # Variable (possibly with $)
2103
+ var_name = expr
2104
+ if var_name.startswith('$'):
2105
+ var_name = var_name[1:]
2106
+ if var_name.startswith('{') and var_name.endswith('}'):
2107
+ var_name = var_name[1:-1]
2108
+
2109
+ # Check if it's a valid identifier
2110
+ if var_name and (var_name[0].isalpha() or var_name[0] == '_'):
2111
+ if all(c.isalnum() or c == '_' for c in var_name):
2112
+ return ArithVariableNode(name=var_name)
2113
+
2114
+ # Try as number anyway
2115
+ try:
2116
+ return ArithNumberNode(value=int(expr))
2117
+ except ValueError:
2118
+ pass
2119
+
2120
+ # Fallback: treat as variable
2121
+ return ArithVariableNode(name=expr)
2122
+
2123
+ def _parse_base_n_value(self, value_str: str, base: int) -> int:
2124
+ """Parse a value in base N (2-64).
2125
+
2126
+ Digits:
2127
+ - 0-9 = values 0-9
2128
+ - a-z = values 10-35
2129
+ - A-Z = values 36-61 (or 10-35 if base <= 36)
2130
+ - @ = 62, _ = 63
2131
+ """
2132
+ result = 0
2133
+ for char in value_str:
2134
+ if char.isdigit():
2135
+ digit = int(char)
2136
+ elif 'a' <= char <= 'z':
2137
+ digit = ord(char) - ord('a') + 10
2138
+ elif 'A' <= char <= 'Z':
2139
+ if base <= 36:
2140
+ # Case insensitive for bases <= 36
2141
+ digit = ord(char.lower()) - ord('a') + 10
2142
+ else:
2143
+ # A-Z are 36-61 for bases > 36
2144
+ digit = ord(char) - ord('A') + 36
2145
+ elif char == '@':
2146
+ digit = 62
2147
+ elif char == '_':
2148
+ digit = 63
2149
+ else:
2150
+ raise ValueError(f"Invalid digit {char} for base {base}")
2151
+
2152
+ if digit >= base:
2153
+ raise ValueError(f"Digit {char} out of range for base {base}")
2154
+
2155
+ result = result * base + digit
2156
+ return result
2157
+
2158
+
2159
+ def parse(input_text: str) -> ScriptNode:
2160
+ """Convenience function to parse input."""
2161
+ parser = Parser()
2162
+ return parser.parse(input_text)