just-bash 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- just_bash/__init__.py +55 -0
- just_bash/ast/__init__.py +213 -0
- just_bash/ast/factory.py +320 -0
- just_bash/ast/types.py +953 -0
- just_bash/bash.py +220 -0
- just_bash/commands/__init__.py +23 -0
- just_bash/commands/argv/__init__.py +5 -0
- just_bash/commands/argv/argv.py +21 -0
- just_bash/commands/awk/__init__.py +5 -0
- just_bash/commands/awk/awk.py +1168 -0
- just_bash/commands/base64/__init__.py +5 -0
- just_bash/commands/base64/base64.py +138 -0
- just_bash/commands/basename/__init__.py +5 -0
- just_bash/commands/basename/basename.py +72 -0
- just_bash/commands/bash/__init__.py +5 -0
- just_bash/commands/bash/bash.py +188 -0
- just_bash/commands/cat/__init__.py +5 -0
- just_bash/commands/cat/cat.py +173 -0
- just_bash/commands/checksum/__init__.py +5 -0
- just_bash/commands/checksum/checksum.py +179 -0
- just_bash/commands/chmod/__init__.py +5 -0
- just_bash/commands/chmod/chmod.py +216 -0
- just_bash/commands/column/__init__.py +5 -0
- just_bash/commands/column/column.py +180 -0
- just_bash/commands/comm/__init__.py +5 -0
- just_bash/commands/comm/comm.py +150 -0
- just_bash/commands/compression/__init__.py +5 -0
- just_bash/commands/compression/compression.py +298 -0
- just_bash/commands/cp/__init__.py +5 -0
- just_bash/commands/cp/cp.py +149 -0
- just_bash/commands/curl/__init__.py +5 -0
- just_bash/commands/curl/curl.py +801 -0
- just_bash/commands/cut/__init__.py +5 -0
- just_bash/commands/cut/cut.py +327 -0
- just_bash/commands/date/__init__.py +5 -0
- just_bash/commands/date/date.py +258 -0
- just_bash/commands/diff/__init__.py +5 -0
- just_bash/commands/diff/diff.py +118 -0
- just_bash/commands/dirname/__init__.py +5 -0
- just_bash/commands/dirname/dirname.py +56 -0
- just_bash/commands/du/__init__.py +5 -0
- just_bash/commands/du/du.py +150 -0
- just_bash/commands/echo/__init__.py +5 -0
- just_bash/commands/echo/echo.py +125 -0
- just_bash/commands/env/__init__.py +5 -0
- just_bash/commands/env/env.py +163 -0
- just_bash/commands/expand/__init__.py +5 -0
- just_bash/commands/expand/expand.py +299 -0
- just_bash/commands/expr/__init__.py +5 -0
- just_bash/commands/expr/expr.py +273 -0
- just_bash/commands/file/__init__.py +5 -0
- just_bash/commands/file/file.py +274 -0
- just_bash/commands/find/__init__.py +5 -0
- just_bash/commands/find/find.py +623 -0
- just_bash/commands/fold/__init__.py +5 -0
- just_bash/commands/fold/fold.py +160 -0
- just_bash/commands/grep/__init__.py +5 -0
- just_bash/commands/grep/grep.py +418 -0
- just_bash/commands/head/__init__.py +5 -0
- just_bash/commands/head/head.py +167 -0
- just_bash/commands/help/__init__.py +5 -0
- just_bash/commands/help/help.py +67 -0
- just_bash/commands/hostname/__init__.py +5 -0
- just_bash/commands/hostname/hostname.py +21 -0
- just_bash/commands/html_to_markdown/__init__.py +5 -0
- just_bash/commands/html_to_markdown/html_to_markdown.py +191 -0
- just_bash/commands/join/__init__.py +5 -0
- just_bash/commands/join/join.py +252 -0
- just_bash/commands/jq/__init__.py +5 -0
- just_bash/commands/jq/jq.py +280 -0
- just_bash/commands/ln/__init__.py +5 -0
- just_bash/commands/ln/ln.py +127 -0
- just_bash/commands/ls/__init__.py +5 -0
- just_bash/commands/ls/ls.py +280 -0
- just_bash/commands/mkdir/__init__.py +5 -0
- just_bash/commands/mkdir/mkdir.py +92 -0
- just_bash/commands/mv/__init__.py +5 -0
- just_bash/commands/mv/mv.py +142 -0
- just_bash/commands/nl/__init__.py +5 -0
- just_bash/commands/nl/nl.py +180 -0
- just_bash/commands/od/__init__.py +5 -0
- just_bash/commands/od/od.py +157 -0
- just_bash/commands/paste/__init__.py +5 -0
- just_bash/commands/paste/paste.py +100 -0
- just_bash/commands/printf/__init__.py +5 -0
- just_bash/commands/printf/printf.py +157 -0
- just_bash/commands/pwd/__init__.py +5 -0
- just_bash/commands/pwd/pwd.py +23 -0
- just_bash/commands/read/__init__.py +5 -0
- just_bash/commands/read/read.py +185 -0
- just_bash/commands/readlink/__init__.py +5 -0
- just_bash/commands/readlink/readlink.py +86 -0
- just_bash/commands/registry.py +844 -0
- just_bash/commands/rev/__init__.py +5 -0
- just_bash/commands/rev/rev.py +74 -0
- just_bash/commands/rg/__init__.py +5 -0
- just_bash/commands/rg/rg.py +1048 -0
- just_bash/commands/rm/__init__.py +5 -0
- just_bash/commands/rm/rm.py +106 -0
- just_bash/commands/search_engine/__init__.py +13 -0
- just_bash/commands/search_engine/matcher.py +170 -0
- just_bash/commands/search_engine/regex.py +159 -0
- just_bash/commands/sed/__init__.py +5 -0
- just_bash/commands/sed/sed.py +863 -0
- just_bash/commands/seq/__init__.py +5 -0
- just_bash/commands/seq/seq.py +190 -0
- just_bash/commands/shell/__init__.py +5 -0
- just_bash/commands/shell/shell.py +206 -0
- just_bash/commands/sleep/__init__.py +5 -0
- just_bash/commands/sleep/sleep.py +62 -0
- just_bash/commands/sort/__init__.py +5 -0
- just_bash/commands/sort/sort.py +411 -0
- just_bash/commands/split/__init__.py +5 -0
- just_bash/commands/split/split.py +237 -0
- just_bash/commands/sqlite3/__init__.py +5 -0
- just_bash/commands/sqlite3/sqlite3_cmd.py +505 -0
- just_bash/commands/stat/__init__.py +5 -0
- just_bash/commands/stat/stat.py +150 -0
- just_bash/commands/strings/__init__.py +5 -0
- just_bash/commands/strings/strings.py +150 -0
- just_bash/commands/tac/__init__.py +5 -0
- just_bash/commands/tac/tac.py +158 -0
- just_bash/commands/tail/__init__.py +5 -0
- just_bash/commands/tail/tail.py +180 -0
- just_bash/commands/tar/__init__.py +5 -0
- just_bash/commands/tar/tar.py +1067 -0
- just_bash/commands/tee/__init__.py +5 -0
- just_bash/commands/tee/tee.py +63 -0
- just_bash/commands/timeout/__init__.py +5 -0
- just_bash/commands/timeout/timeout.py +188 -0
- just_bash/commands/touch/__init__.py +5 -0
- just_bash/commands/touch/touch.py +91 -0
- just_bash/commands/tr/__init__.py +5 -0
- just_bash/commands/tr/tr.py +297 -0
- just_bash/commands/tree/__init__.py +5 -0
- just_bash/commands/tree/tree.py +139 -0
- just_bash/commands/true/__init__.py +5 -0
- just_bash/commands/true/true.py +32 -0
- just_bash/commands/uniq/__init__.py +5 -0
- just_bash/commands/uniq/uniq.py +323 -0
- just_bash/commands/wc/__init__.py +5 -0
- just_bash/commands/wc/wc.py +169 -0
- just_bash/commands/which/__init__.py +5 -0
- just_bash/commands/which/which.py +52 -0
- just_bash/commands/xan/__init__.py +5 -0
- just_bash/commands/xan/xan.py +1663 -0
- just_bash/commands/xargs/__init__.py +5 -0
- just_bash/commands/xargs/xargs.py +136 -0
- just_bash/commands/yq/__init__.py +5 -0
- just_bash/commands/yq/yq.py +848 -0
- just_bash/fs/__init__.py +29 -0
- just_bash/fs/in_memory_fs.py +621 -0
- just_bash/fs/mountable_fs.py +504 -0
- just_bash/fs/overlay_fs.py +894 -0
- just_bash/fs/read_write_fs.py +455 -0
- just_bash/interpreter/__init__.py +37 -0
- just_bash/interpreter/builtins/__init__.py +92 -0
- just_bash/interpreter/builtins/alias.py +154 -0
- just_bash/interpreter/builtins/cd.py +76 -0
- just_bash/interpreter/builtins/control.py +127 -0
- just_bash/interpreter/builtins/declare.py +336 -0
- just_bash/interpreter/builtins/export.py +56 -0
- just_bash/interpreter/builtins/let.py +44 -0
- just_bash/interpreter/builtins/local.py +57 -0
- just_bash/interpreter/builtins/mapfile.py +152 -0
- just_bash/interpreter/builtins/misc.py +378 -0
- just_bash/interpreter/builtins/readonly.py +80 -0
- just_bash/interpreter/builtins/set.py +234 -0
- just_bash/interpreter/builtins/shopt.py +201 -0
- just_bash/interpreter/builtins/source.py +136 -0
- just_bash/interpreter/builtins/test.py +290 -0
- just_bash/interpreter/builtins/unset.py +53 -0
- just_bash/interpreter/conditionals.py +387 -0
- just_bash/interpreter/control_flow.py +381 -0
- just_bash/interpreter/errors.py +116 -0
- just_bash/interpreter/expansion.py +1156 -0
- just_bash/interpreter/interpreter.py +813 -0
- just_bash/interpreter/types.py +134 -0
- just_bash/network/__init__.py +1 -0
- just_bash/parser/__init__.py +39 -0
- just_bash/parser/lexer.py +948 -0
- just_bash/parser/parser.py +2162 -0
- just_bash/py.typed +0 -0
- just_bash/query_engine/__init__.py +83 -0
- just_bash/query_engine/builtins/__init__.py +1283 -0
- just_bash/query_engine/evaluator.py +578 -0
- just_bash/query_engine/parser.py +525 -0
- just_bash/query_engine/tokenizer.py +329 -0
- just_bash/query_engine/types.py +373 -0
- just_bash/types.py +180 -0
- just_bash-0.1.5.dist-info/METADATA +410 -0
- just_bash-0.1.5.dist-info/RECORD +193 -0
- just_bash-0.1.5.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,2162 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Recursive Descent Parser for Bash Scripts
|
|
3
|
+
|
|
4
|
+
This parser consumes tokens from the lexer and produces an AST.
|
|
5
|
+
It follows the bash grammar structure for correctness.
|
|
6
|
+
|
|
7
|
+
Grammar (simplified):
|
|
8
|
+
script ::= statement*
|
|
9
|
+
statement ::= pipeline ((&&|'||') pipeline)* [&]
|
|
10
|
+
pipeline ::= [!] command (| command)*
|
|
11
|
+
command ::= simple_command | compound_command | function_def
|
|
12
|
+
simple_cmd ::= (assignment)* [word] (word)* (redirection)*
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import re
|
|
18
|
+
from typing import Optional, Sequence
|
|
19
|
+
|
|
20
|
+
from ..ast import (
|
|
21
|
+
AST,
|
|
22
|
+
ScriptNode,
|
|
23
|
+
StatementNode,
|
|
24
|
+
PipelineNode,
|
|
25
|
+
SimpleCommandNode,
|
|
26
|
+
CommandNode,
|
|
27
|
+
WordNode,
|
|
28
|
+
WordPart,
|
|
29
|
+
LiteralPart,
|
|
30
|
+
SingleQuotedPart,
|
|
31
|
+
DoubleQuotedPart,
|
|
32
|
+
EscapedPart,
|
|
33
|
+
ParameterExpansionPart,
|
|
34
|
+
CommandSubstitutionPart,
|
|
35
|
+
GlobPart,
|
|
36
|
+
TildeExpansionPart,
|
|
37
|
+
AssignmentNode,
|
|
38
|
+
RedirectionNode,
|
|
39
|
+
RedirectionOperator,
|
|
40
|
+
HereDocNode,
|
|
41
|
+
# Compound command nodes
|
|
42
|
+
IfNode,
|
|
43
|
+
IfClause,
|
|
44
|
+
ForNode,
|
|
45
|
+
WhileNode,
|
|
46
|
+
UntilNode,
|
|
47
|
+
CaseNode,
|
|
48
|
+
CaseItemNode,
|
|
49
|
+
SubshellNode,
|
|
50
|
+
GroupNode,
|
|
51
|
+
FunctionDefNode,
|
|
52
|
+
CompoundCommandNode,
|
|
53
|
+
# Conditional command nodes
|
|
54
|
+
ConditionalCommandNode,
|
|
55
|
+
ArithmeticCommandNode,
|
|
56
|
+
CondBinaryNode,
|
|
57
|
+
CondUnaryNode,
|
|
58
|
+
CondNotNode,
|
|
59
|
+
CondAndNode,
|
|
60
|
+
CondOrNode,
|
|
61
|
+
CondGroupNode,
|
|
62
|
+
CondWordNode,
|
|
63
|
+
# Arithmetic nodes
|
|
64
|
+
ArithmeticExpansionPart,
|
|
65
|
+
ArithmeticExpressionNode,
|
|
66
|
+
ArithNumberNode,
|
|
67
|
+
ArithVariableNode,
|
|
68
|
+
ArithBinaryNode,
|
|
69
|
+
ArithUnaryNode,
|
|
70
|
+
ArithGroupNode,
|
|
71
|
+
ArithTernaryNode,
|
|
72
|
+
ArithAssignmentNode,
|
|
73
|
+
ArithExpr,
|
|
74
|
+
# Parameter expansion operations
|
|
75
|
+
DefaultValueOp,
|
|
76
|
+
AssignDefaultOp,
|
|
77
|
+
ErrorIfUnsetOp,
|
|
78
|
+
UseAlternativeOp,
|
|
79
|
+
LengthOp,
|
|
80
|
+
SubstringOp,
|
|
81
|
+
PatternRemovalOp,
|
|
82
|
+
PatternReplacementOp,
|
|
83
|
+
CaseModificationOp,
|
|
84
|
+
TransformOp,
|
|
85
|
+
ParameterOperation,
|
|
86
|
+
)
|
|
87
|
+
from .lexer import Lexer, Token, TokenType
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
# Limits to prevent runaway parsing
|
|
91
|
+
MAX_INPUT_SIZE = 1_000_000 # 1MB
|
|
92
|
+
MAX_TOKENS = 100_000
|
|
93
|
+
MAX_PARSE_ITERATIONS = 1_000_000
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
class ParseException(Exception):
|
|
97
|
+
"""Exception raised during parsing."""
|
|
98
|
+
|
|
99
|
+
def __init__(
|
|
100
|
+
self,
|
|
101
|
+
message: str,
|
|
102
|
+
line: int = 1,
|
|
103
|
+
column: int = 1,
|
|
104
|
+
token: Optional[Token] = None,
|
|
105
|
+
) -> None:
|
|
106
|
+
self.message = message
|
|
107
|
+
self.line = line
|
|
108
|
+
self.column = column
|
|
109
|
+
self.token = token
|
|
110
|
+
super().__init__(f"{message} at line {line}, column {column}")
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
class Parser:
|
|
114
|
+
"""Parser class - transforms tokens into AST."""
|
|
115
|
+
|
|
116
|
+
def __init__(self) -> None:
|
|
117
|
+
self.tokens: list[Token] = []
|
|
118
|
+
self.pos = 0
|
|
119
|
+
self.pending_heredocs: list[dict] = []
|
|
120
|
+
self.parse_iterations = 0
|
|
121
|
+
|
|
122
|
+
def _check_iteration_limit(self) -> None:
|
|
123
|
+
"""Check parse iteration limit to prevent infinite loops."""
|
|
124
|
+
self.parse_iterations += 1
|
|
125
|
+
if self.parse_iterations > MAX_PARSE_ITERATIONS:
|
|
126
|
+
raise ParseException(
|
|
127
|
+
"Maximum parse iterations exceeded (possible infinite loop)",
|
|
128
|
+
self._current().line,
|
|
129
|
+
self._current().column,
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
def parse(self, input_text: str) -> ScriptNode:
|
|
133
|
+
"""Parse a bash script string."""
|
|
134
|
+
# Check input size limit
|
|
135
|
+
if len(input_text) > MAX_INPUT_SIZE:
|
|
136
|
+
raise ParseException(
|
|
137
|
+
f"Input too large: {len(input_text)} bytes exceeds limit of {MAX_INPUT_SIZE}",
|
|
138
|
+
1,
|
|
139
|
+
1,
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
lexer = Lexer(input_text)
|
|
143
|
+
self.tokens = lexer.tokenize()
|
|
144
|
+
|
|
145
|
+
# Check token count limit
|
|
146
|
+
if len(self.tokens) > MAX_TOKENS:
|
|
147
|
+
raise ParseException(
|
|
148
|
+
f"Too many tokens: {len(self.tokens)} exceeds limit of {MAX_TOKENS}",
|
|
149
|
+
1,
|
|
150
|
+
1,
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
self.pos = 0
|
|
154
|
+
self.pending_heredocs = []
|
|
155
|
+
self.parse_iterations = 0
|
|
156
|
+
return self._parse_script()
|
|
157
|
+
|
|
158
|
+
def parse_tokens(self, tokens: list[Token]) -> ScriptNode:
|
|
159
|
+
"""Parse from pre-tokenized input."""
|
|
160
|
+
self.tokens = tokens
|
|
161
|
+
self.pos = 0
|
|
162
|
+
self.pending_heredocs = []
|
|
163
|
+
self.parse_iterations = 0
|
|
164
|
+
return self._parse_script()
|
|
165
|
+
|
|
166
|
+
# =========================================================================
|
|
167
|
+
# Helper methods
|
|
168
|
+
# =========================================================================
|
|
169
|
+
|
|
170
|
+
def _current(self) -> Token:
|
|
171
|
+
"""Get current token."""
|
|
172
|
+
if self.pos < len(self.tokens):
|
|
173
|
+
return self.tokens[self.pos]
|
|
174
|
+
return self.tokens[-1]
|
|
175
|
+
|
|
176
|
+
def _peek(self, offset: int = 0) -> Token:
|
|
177
|
+
"""Peek at token at offset from current position."""
|
|
178
|
+
idx = self.pos + offset
|
|
179
|
+
if idx < len(self.tokens):
|
|
180
|
+
return self.tokens[idx]
|
|
181
|
+
return self.tokens[-1]
|
|
182
|
+
|
|
183
|
+
def _advance(self) -> Token:
|
|
184
|
+
"""Advance to next token and return current."""
|
|
185
|
+
token = self._current()
|
|
186
|
+
if self.pos < len(self.tokens) - 1:
|
|
187
|
+
self.pos += 1
|
|
188
|
+
return token
|
|
189
|
+
|
|
190
|
+
def _check(self, *types: TokenType) -> bool:
|
|
191
|
+
"""Check if current token matches any of the given types."""
|
|
192
|
+
current_type = self._current().type
|
|
193
|
+
return current_type in types
|
|
194
|
+
|
|
195
|
+
def _expect(self, type_: TokenType, message: Optional[str] = None) -> Token:
|
|
196
|
+
"""Expect current token to be of given type, advance if so."""
|
|
197
|
+
if self._check(type_):
|
|
198
|
+
return self._advance()
|
|
199
|
+
token = self._current()
|
|
200
|
+
msg = message or f"Expected {type_.name}, got {token.type.name}"
|
|
201
|
+
raise ParseException(msg, token.line, token.column, token)
|
|
202
|
+
|
|
203
|
+
def _error(self, message: str) -> ParseException:
|
|
204
|
+
"""Create a parse error at current position."""
|
|
205
|
+
token = self._current()
|
|
206
|
+
return ParseException(message, token.line, token.column, token)
|
|
207
|
+
|
|
208
|
+
def _skip_newlines(self) -> None:
|
|
209
|
+
"""Skip newlines and comments."""
|
|
210
|
+
while self._check(TokenType.NEWLINE, TokenType.COMMENT):
|
|
211
|
+
if self._check(TokenType.NEWLINE):
|
|
212
|
+
self._advance()
|
|
213
|
+
self._process_heredocs()
|
|
214
|
+
else:
|
|
215
|
+
self._advance()
|
|
216
|
+
|
|
217
|
+
def _skip_separators(self) -> None:
|
|
218
|
+
"""Skip statement separators (newlines, semicolons, comments)."""
|
|
219
|
+
while True:
|
|
220
|
+
if self._check(TokenType.NEWLINE):
|
|
221
|
+
self._advance()
|
|
222
|
+
self._process_heredocs()
|
|
223
|
+
continue
|
|
224
|
+
if self._check(TokenType.SEMICOLON, TokenType.COMMENT):
|
|
225
|
+
self._advance()
|
|
226
|
+
continue
|
|
227
|
+
break
|
|
228
|
+
|
|
229
|
+
def _is_statement_end(self) -> bool:
|
|
230
|
+
"""Check if we're at a statement boundary."""
|
|
231
|
+
return self._check(
|
|
232
|
+
TokenType.EOF,
|
|
233
|
+
TokenType.NEWLINE,
|
|
234
|
+
TokenType.SEMICOLON,
|
|
235
|
+
TokenType.AMP,
|
|
236
|
+
TokenType.AND_AND,
|
|
237
|
+
TokenType.OR_OR,
|
|
238
|
+
TokenType.RPAREN,
|
|
239
|
+
TokenType.RBRACE,
|
|
240
|
+
TokenType.DSEMI,
|
|
241
|
+
TokenType.SEMI_AND,
|
|
242
|
+
TokenType.SEMI_SEMI_AND,
|
|
243
|
+
)
|
|
244
|
+
|
|
245
|
+
def _is_command_start(self) -> bool:
|
|
246
|
+
"""Check if current token can start a command."""
|
|
247
|
+
t = self._current().type
|
|
248
|
+
return t in (
|
|
249
|
+
TokenType.WORD,
|
|
250
|
+
TokenType.NAME,
|
|
251
|
+
TokenType.NUMBER,
|
|
252
|
+
TokenType.ASSIGNMENT_WORD,
|
|
253
|
+
TokenType.IF,
|
|
254
|
+
TokenType.FOR,
|
|
255
|
+
TokenType.WHILE,
|
|
256
|
+
TokenType.UNTIL,
|
|
257
|
+
TokenType.CASE,
|
|
258
|
+
TokenType.LPAREN,
|
|
259
|
+
TokenType.LBRACE,
|
|
260
|
+
TokenType.DPAREN_START,
|
|
261
|
+
TokenType.DBRACK_START,
|
|
262
|
+
TokenType.FUNCTION,
|
|
263
|
+
TokenType.BANG,
|
|
264
|
+
TokenType.IN,
|
|
265
|
+
# Redirections can appear before command name
|
|
266
|
+
TokenType.LESS,
|
|
267
|
+
TokenType.GREAT,
|
|
268
|
+
TokenType.DLESS,
|
|
269
|
+
TokenType.DGREAT,
|
|
270
|
+
TokenType.LESSAND,
|
|
271
|
+
TokenType.GREATAND,
|
|
272
|
+
TokenType.LESSGREAT,
|
|
273
|
+
TokenType.DLESSDASH,
|
|
274
|
+
TokenType.CLOBBER,
|
|
275
|
+
TokenType.TLESS,
|
|
276
|
+
TokenType.AND_GREAT,
|
|
277
|
+
TokenType.AND_DGREAT,
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
def _process_heredocs(self) -> None:
|
|
281
|
+
"""Process pending here-documents (old method, no longer used directly)."""
|
|
282
|
+
for heredoc in self.pending_heredocs:
|
|
283
|
+
if self._check(TokenType.HEREDOC_CONTENT):
|
|
284
|
+
content_token = self._advance()
|
|
285
|
+
# If delimiter was quoted, treat content as literal (no expansion)
|
|
286
|
+
content_word = self._parse_word_from_string(
|
|
287
|
+
content_token.value,
|
|
288
|
+
quoted=False,
|
|
289
|
+
single_quoted=heredoc["quoted"]
|
|
290
|
+
)
|
|
291
|
+
heredoc["redirect_target"] = AST.here_doc(
|
|
292
|
+
heredoc["delimiter"],
|
|
293
|
+
content_word,
|
|
294
|
+
heredoc["strip_tabs"],
|
|
295
|
+
heredoc["quoted"],
|
|
296
|
+
)
|
|
297
|
+
self.pending_heredocs = []
|
|
298
|
+
|
|
299
|
+
def _resolve_pending_heredocs(
|
|
300
|
+
self, redirections: list[RedirectionNode]
|
|
301
|
+
) -> list[RedirectionNode]:
|
|
302
|
+
"""Resolve pending heredocs by reading their content and updating redirections."""
|
|
303
|
+
if not self.pending_heredocs:
|
|
304
|
+
return redirections
|
|
305
|
+
|
|
306
|
+
# We need to skip past the current line to find heredoc content
|
|
307
|
+
# Save position and scan for heredoc content
|
|
308
|
+
saved_pos = self.pos
|
|
309
|
+
|
|
310
|
+
# Skip to find HEREDOC_CONTENT tokens (they come after newline)
|
|
311
|
+
while self.pos < len(self.tokens):
|
|
312
|
+
token = self.tokens[self.pos]
|
|
313
|
+
if token.type == TokenType.HEREDOC_CONTENT:
|
|
314
|
+
break
|
|
315
|
+
elif token.type == TokenType.NEWLINE:
|
|
316
|
+
self.pos += 1
|
|
317
|
+
else:
|
|
318
|
+
break
|
|
319
|
+
|
|
320
|
+
# Process each pending heredoc
|
|
321
|
+
new_redirections = list(redirections)
|
|
322
|
+
heredoc_idx = 0
|
|
323
|
+
for heredoc_info in self.pending_heredocs:
|
|
324
|
+
if self._check(TokenType.HEREDOC_CONTENT):
|
|
325
|
+
content_token = self._advance()
|
|
326
|
+
# If delimiter was quoted, treat content as literal (no expansion)
|
|
327
|
+
content_word = self._parse_word_from_string(
|
|
328
|
+
content_token.value,
|
|
329
|
+
quoted=False,
|
|
330
|
+
single_quoted=heredoc_info["quoted"]
|
|
331
|
+
)
|
|
332
|
+
heredoc_node = AST.here_doc(
|
|
333
|
+
heredoc_info["delimiter"],
|
|
334
|
+
content_word,
|
|
335
|
+
heredoc_info["strip_tabs"],
|
|
336
|
+
heredoc_info["quoted"],
|
|
337
|
+
)
|
|
338
|
+
# Find the corresponding placeholder redirection and replace it
|
|
339
|
+
for i, redir in enumerate(new_redirections):
|
|
340
|
+
if redir.operator in ("<<", "<<-"):
|
|
341
|
+
# Check if this looks like our placeholder
|
|
342
|
+
if (redir.target and redir.target.parts and
|
|
343
|
+
len(redir.target.parts) == 1 and
|
|
344
|
+
hasattr(redir.target.parts[0], 'value') and
|
|
345
|
+
redir.target.parts[0].value == ""):
|
|
346
|
+
new_redirections[i] = AST.redirection(
|
|
347
|
+
redir.operator, heredoc_node, redir.fd
|
|
348
|
+
)
|
|
349
|
+
break
|
|
350
|
+
heredoc_idx += 1
|
|
351
|
+
|
|
352
|
+
self.pending_heredocs = []
|
|
353
|
+
return new_redirections
|
|
354
|
+
|
|
355
|
+
# =========================================================================
|
|
356
|
+
# Main parsing methods
|
|
357
|
+
# =========================================================================
|
|
358
|
+
|
|
359
|
+
def _parse_script(self) -> ScriptNode:
|
|
360
|
+
"""Parse a complete script."""
|
|
361
|
+
statements: list[StatementNode] = []
|
|
362
|
+
self._skip_newlines()
|
|
363
|
+
|
|
364
|
+
while not self._check(TokenType.EOF):
|
|
365
|
+
self._check_iteration_limit()
|
|
366
|
+
stmt = self._parse_statement()
|
|
367
|
+
if stmt:
|
|
368
|
+
statements.append(stmt)
|
|
369
|
+
self._skip_separators()
|
|
370
|
+
|
|
371
|
+
return AST.script(statements)
|
|
372
|
+
|
|
373
|
+
def _parse_statement(self) -> Optional[StatementNode]:
|
|
374
|
+
"""Parse a statement (pipeline list with && / || operators)."""
|
|
375
|
+
if not self._is_command_start():
|
|
376
|
+
return None
|
|
377
|
+
|
|
378
|
+
pipelines: list[PipelineNode] = []
|
|
379
|
+
operators: list[str] = []
|
|
380
|
+
|
|
381
|
+
# Parse first pipeline
|
|
382
|
+
pipeline = self._parse_pipeline()
|
|
383
|
+
if not pipeline:
|
|
384
|
+
return None
|
|
385
|
+
pipelines.append(pipeline)
|
|
386
|
+
|
|
387
|
+
# Parse additional pipelines with operators
|
|
388
|
+
while self._check(TokenType.AND_AND, TokenType.OR_OR):
|
|
389
|
+
self._check_iteration_limit()
|
|
390
|
+
op_token = self._advance()
|
|
391
|
+
operators.append(op_token.value)
|
|
392
|
+
self._skip_newlines()
|
|
393
|
+
|
|
394
|
+
next_pipeline = self._parse_pipeline()
|
|
395
|
+
if not next_pipeline:
|
|
396
|
+
raise self._error("Expected command after operator")
|
|
397
|
+
pipelines.append(next_pipeline)
|
|
398
|
+
|
|
399
|
+
# Check for background execution
|
|
400
|
+
background = False
|
|
401
|
+
if self._check(TokenType.AMP):
|
|
402
|
+
self._advance()
|
|
403
|
+
background = True
|
|
404
|
+
|
|
405
|
+
return AST.statement(pipelines, operators, background)
|
|
406
|
+
|
|
407
|
+
def _parse_pipeline(self) -> Optional[PipelineNode]:
|
|
408
|
+
"""Parse a pipeline (commands connected by |)."""
|
|
409
|
+
# Check for negation
|
|
410
|
+
negated = False
|
|
411
|
+
if self._check(TokenType.BANG):
|
|
412
|
+
self._advance()
|
|
413
|
+
negated = True
|
|
414
|
+
self._skip_newlines()
|
|
415
|
+
|
|
416
|
+
# Parse first command
|
|
417
|
+
command = self._parse_command()
|
|
418
|
+
if not command:
|
|
419
|
+
if negated:
|
|
420
|
+
raise self._error("Expected command after !")
|
|
421
|
+
return None
|
|
422
|
+
|
|
423
|
+
commands: list[CommandNode] = [command]
|
|
424
|
+
|
|
425
|
+
# Parse additional commands with pipe
|
|
426
|
+
while self._check(TokenType.PIPE, TokenType.PIPE_AMP):
|
|
427
|
+
self._check_iteration_limit()
|
|
428
|
+
self._advance()
|
|
429
|
+
self._skip_newlines()
|
|
430
|
+
|
|
431
|
+
next_command = self._parse_command()
|
|
432
|
+
if not next_command:
|
|
433
|
+
raise self._error("Expected command after pipe")
|
|
434
|
+
commands.append(next_command)
|
|
435
|
+
|
|
436
|
+
return AST.pipeline(commands, negated)
|
|
437
|
+
|
|
438
|
+
def _parse_command(self) -> Optional[CommandNode]:
|
|
439
|
+
"""Parse a command (simple, compound, or function definition)."""
|
|
440
|
+
# Check for compound commands
|
|
441
|
+
if self._check(TokenType.IF):
|
|
442
|
+
return self._parse_if()
|
|
443
|
+
if self._check(TokenType.FOR):
|
|
444
|
+
return self._parse_for()
|
|
445
|
+
if self._check(TokenType.WHILE):
|
|
446
|
+
return self._parse_while()
|
|
447
|
+
if self._check(TokenType.UNTIL):
|
|
448
|
+
return self._parse_until()
|
|
449
|
+
if self._check(TokenType.CASE):
|
|
450
|
+
return self._parse_case()
|
|
451
|
+
if self._check(TokenType.LPAREN):
|
|
452
|
+
return self._parse_subshell()
|
|
453
|
+
if self._check(TokenType.LBRACE):
|
|
454
|
+
return self._parse_group()
|
|
455
|
+
if self._check(TokenType.DBRACK_START):
|
|
456
|
+
return self._parse_conditional_command()
|
|
457
|
+
if self._check(TokenType.DPAREN_START):
|
|
458
|
+
return self._parse_arithmetic_command()
|
|
459
|
+
if self._check(TokenType.FUNCTION):
|
|
460
|
+
return self._parse_function_def()
|
|
461
|
+
|
|
462
|
+
# Check for function definition: name() { ... }
|
|
463
|
+
if self._check(TokenType.NAME, TokenType.WORD):
|
|
464
|
+
if self._peek(1).type == TokenType.LPAREN:
|
|
465
|
+
return self._parse_function_def()
|
|
466
|
+
|
|
467
|
+
# Default to simple command
|
|
468
|
+
return self._parse_simple_command()
|
|
469
|
+
|
|
470
|
+
def _parse_simple_command(self) -> Optional[SimpleCommandNode]:
|
|
471
|
+
"""Parse a simple command with assignments, name, args, redirections."""
|
|
472
|
+
assignments: list[AssignmentNode] = []
|
|
473
|
+
name: Optional[WordNode] = None
|
|
474
|
+
args: list[WordNode] = []
|
|
475
|
+
redirections: list[RedirectionNode] = []
|
|
476
|
+
|
|
477
|
+
# Parse leading redirections and assignments
|
|
478
|
+
while True:
|
|
479
|
+
self._check_iteration_limit()
|
|
480
|
+
|
|
481
|
+
# Check for assignment (including array assignment VAR=(...))
|
|
482
|
+
if self._check(TokenType.ASSIGNMENT_WORD):
|
|
483
|
+
assign_tok = self._current()
|
|
484
|
+
# Check if next token is LPAREN for array assignment
|
|
485
|
+
if self._peek(1).type == TokenType.LPAREN:
|
|
486
|
+
assignments.append(self._parse_array_assignment())
|
|
487
|
+
else:
|
|
488
|
+
assignments.append(self._parse_assignment())
|
|
489
|
+
continue
|
|
490
|
+
|
|
491
|
+
# Check for redirection before command name
|
|
492
|
+
redir = self._try_parse_redirection()
|
|
493
|
+
if redir:
|
|
494
|
+
redirections.append(redir)
|
|
495
|
+
continue
|
|
496
|
+
|
|
497
|
+
break
|
|
498
|
+
|
|
499
|
+
# Parse command name
|
|
500
|
+
if self._check(
|
|
501
|
+
TokenType.WORD, TokenType.NAME, TokenType.NUMBER, TokenType.IN
|
|
502
|
+
):
|
|
503
|
+
name = self._parse_word()
|
|
504
|
+
|
|
505
|
+
# Parse arguments and trailing redirections
|
|
506
|
+
while not self._is_statement_end():
|
|
507
|
+
self._check_iteration_limit()
|
|
508
|
+
|
|
509
|
+
# Check for redirection
|
|
510
|
+
redir = self._try_parse_redirection()
|
|
511
|
+
if redir:
|
|
512
|
+
redirections.append(redir)
|
|
513
|
+
continue
|
|
514
|
+
|
|
515
|
+
# Check for array assignment: VAR=(...) - combine into single argument
|
|
516
|
+
# This handles cases like: declare -a arr=(a b c)
|
|
517
|
+
if self._check(TokenType.ASSIGNMENT_WORD):
|
|
518
|
+
assign_tok = self._current()
|
|
519
|
+
if self._peek(1).type == TokenType.LPAREN:
|
|
520
|
+
# Collect the entire array assignment
|
|
521
|
+
array_str = assign_tok.value
|
|
522
|
+
self._advance() # consume ASSIGNMENT_WORD
|
|
523
|
+
self._advance() # consume LPAREN
|
|
524
|
+
array_str += "("
|
|
525
|
+
|
|
526
|
+
# Collect elements until RPAREN
|
|
527
|
+
first = True
|
|
528
|
+
while not self._check(TokenType.RPAREN, TokenType.EOF):
|
|
529
|
+
if not first:
|
|
530
|
+
array_str += " "
|
|
531
|
+
first = False
|
|
532
|
+
elem_tok = self._current()
|
|
533
|
+
array_str += elem_tok.value
|
|
534
|
+
self._advance()
|
|
535
|
+
|
|
536
|
+
if self._check(TokenType.RPAREN):
|
|
537
|
+
self._advance()
|
|
538
|
+
array_str += ")"
|
|
539
|
+
|
|
540
|
+
# Create word node with the full array assignment
|
|
541
|
+
args.append(AST.word([LiteralPart(value=array_str)]))
|
|
542
|
+
continue
|
|
543
|
+
|
|
544
|
+
# Check for word argument - include reserved words that can be arguments
|
|
545
|
+
# Reserved words are only special at command position, not as arguments
|
|
546
|
+
# ASSIGNMENT_WORD is also valid as argument to builtins like declare, export, local
|
|
547
|
+
if self._check(
|
|
548
|
+
TokenType.WORD,
|
|
549
|
+
TokenType.NAME,
|
|
550
|
+
TokenType.NUMBER,
|
|
551
|
+
TokenType.ASSIGNMENT_WORD, # For declare, export, local, etc.
|
|
552
|
+
# Reserved words that can appear as arguments:
|
|
553
|
+
TokenType.IN,
|
|
554
|
+
TokenType.DO,
|
|
555
|
+
TokenType.DONE,
|
|
556
|
+
TokenType.IF,
|
|
557
|
+
TokenType.THEN,
|
|
558
|
+
TokenType.ELSE,
|
|
559
|
+
TokenType.ELIF,
|
|
560
|
+
TokenType.FI,
|
|
561
|
+
TokenType.FOR,
|
|
562
|
+
TokenType.WHILE,
|
|
563
|
+
TokenType.UNTIL,
|
|
564
|
+
TokenType.CASE,
|
|
565
|
+
TokenType.ESAC,
|
|
566
|
+
TokenType.FUNCTION,
|
|
567
|
+
):
|
|
568
|
+
args.append(self._parse_word())
|
|
569
|
+
continue
|
|
570
|
+
|
|
571
|
+
break
|
|
572
|
+
|
|
573
|
+
# Must have at least an assignment or a command name
|
|
574
|
+
if not assignments and name is None and not redirections:
|
|
575
|
+
return None
|
|
576
|
+
|
|
577
|
+
# If we have pending heredocs, we need to resolve them before creating the command
|
|
578
|
+
if self.pending_heredocs:
|
|
579
|
+
redirections = self._resolve_pending_heredocs(redirections)
|
|
580
|
+
|
|
581
|
+
return AST.simple_command(name, args, assignments, redirections)
|
|
582
|
+
|
|
583
|
+
def _parse_assignment(self) -> AssignmentNode:
|
|
584
|
+
"""Parse a variable assignment."""
|
|
585
|
+
token = self._expect(TokenType.ASSIGNMENT_WORD)
|
|
586
|
+
value = token.value
|
|
587
|
+
|
|
588
|
+
# Find the = sign
|
|
589
|
+
eq_idx = value.find("=")
|
|
590
|
+
if eq_idx == -1:
|
|
591
|
+
raise self._error(f"Invalid assignment: {value}")
|
|
592
|
+
|
|
593
|
+
# Check for +=
|
|
594
|
+
append = False
|
|
595
|
+
if eq_idx > 0 and value[eq_idx - 1] == "+":
|
|
596
|
+
name = value[: eq_idx - 1]
|
|
597
|
+
append = True
|
|
598
|
+
else:
|
|
599
|
+
name = value[:eq_idx]
|
|
600
|
+
|
|
601
|
+
# Get value part
|
|
602
|
+
value_str = value[eq_idx + 1 :]
|
|
603
|
+
|
|
604
|
+
# Check for array assignment: VAR=(a b c)
|
|
605
|
+
if value_str.startswith("("):
|
|
606
|
+
# TODO: Parse array assignment
|
|
607
|
+
# For now, treat as simple value
|
|
608
|
+
value_word = self._parse_word_from_string(value_str, quoted=False)
|
|
609
|
+
return AST.assignment(name, value_word, append)
|
|
610
|
+
|
|
611
|
+
# Simple value
|
|
612
|
+
if value_str:
|
|
613
|
+
value_word = self._parse_word_from_string(value_str, quoted=False)
|
|
614
|
+
else:
|
|
615
|
+
value_word = None
|
|
616
|
+
|
|
617
|
+
return AST.assignment(name, value_word, append)
|
|
618
|
+
|
|
619
|
+
def _parse_array_assignment(self) -> AssignmentNode:
|
|
620
|
+
"""Parse an array assignment: VAR=(elem1 elem2 ...)."""
|
|
621
|
+
token = self._expect(TokenType.ASSIGNMENT_WORD)
|
|
622
|
+
value = token.value
|
|
623
|
+
|
|
624
|
+
# Find the = sign
|
|
625
|
+
eq_idx = value.find("=")
|
|
626
|
+
if eq_idx == -1:
|
|
627
|
+
raise self._error(f"Invalid assignment: {value}")
|
|
628
|
+
|
|
629
|
+
# Check for +=
|
|
630
|
+
append = False
|
|
631
|
+
if eq_idx > 0 and value[eq_idx - 1] == "+":
|
|
632
|
+
name = value[: eq_idx - 1]
|
|
633
|
+
append = True
|
|
634
|
+
else:
|
|
635
|
+
name = value[:eq_idx]
|
|
636
|
+
|
|
637
|
+
# Expect LPAREN
|
|
638
|
+
self._expect(TokenType.LPAREN)
|
|
639
|
+
|
|
640
|
+
# Collect array elements until RPAREN
|
|
641
|
+
elements: list[WordNode] = []
|
|
642
|
+
while not self._check(TokenType.RPAREN, TokenType.EOF):
|
|
643
|
+
if self._check(TokenType.WORD, TokenType.NAME, TokenType.NUMBER,
|
|
644
|
+
TokenType.ASSIGNMENT_WORD):
|
|
645
|
+
elements.append(self._parse_word())
|
|
646
|
+
else:
|
|
647
|
+
# Skip unexpected tokens
|
|
648
|
+
self._advance()
|
|
649
|
+
|
|
650
|
+
# Expect RPAREN
|
|
651
|
+
if self._check(TokenType.RPAREN):
|
|
652
|
+
self._advance()
|
|
653
|
+
|
|
654
|
+
return AST.assignment(name, None, append, array=elements)
|
|
655
|
+
|
|
656
|
+
def _try_parse_redirection(self) -> Optional[RedirectionNode]:
|
|
657
|
+
"""Try to parse a redirection, return None if not a redirection."""
|
|
658
|
+
# Check for file descriptor number prefix
|
|
659
|
+
# Only treat NUMBER as fd if it's immediately adjacent to the redirect operator
|
|
660
|
+
# (no whitespace between them). E.g., "3>file" but not "3 >file"
|
|
661
|
+
fd: Optional[int] = None
|
|
662
|
+
if self._check(TokenType.NUMBER):
|
|
663
|
+
num_token = self._current()
|
|
664
|
+
next_token = self._peek(1)
|
|
665
|
+
if next_token.type in (
|
|
666
|
+
TokenType.LESS,
|
|
667
|
+
TokenType.GREAT,
|
|
668
|
+
TokenType.DGREAT,
|
|
669
|
+
TokenType.LESSAND,
|
|
670
|
+
TokenType.GREATAND,
|
|
671
|
+
TokenType.LESSGREAT,
|
|
672
|
+
TokenType.CLOBBER,
|
|
673
|
+
):
|
|
674
|
+
# Check if immediately adjacent (no whitespace)
|
|
675
|
+
# Number ends at column + len(value), redirect should start there
|
|
676
|
+
num_end_col = num_token.column + len(num_token.value)
|
|
677
|
+
if next_token.column == num_end_col:
|
|
678
|
+
fd = int(self._advance().value)
|
|
679
|
+
|
|
680
|
+
# Check for redirection operator
|
|
681
|
+
op_map: dict[TokenType, RedirectionOperator] = {
|
|
682
|
+
TokenType.LESS: "<",
|
|
683
|
+
TokenType.GREAT: ">",
|
|
684
|
+
TokenType.DGREAT: ">>",
|
|
685
|
+
TokenType.LESSAND: "<&",
|
|
686
|
+
TokenType.GREATAND: ">&",
|
|
687
|
+
TokenType.LESSGREAT: "<>",
|
|
688
|
+
TokenType.CLOBBER: ">|",
|
|
689
|
+
TokenType.TLESS: "<<<",
|
|
690
|
+
TokenType.DLESS: "<<",
|
|
691
|
+
TokenType.DLESSDASH: "<<-",
|
|
692
|
+
TokenType.AND_GREAT: "&>",
|
|
693
|
+
TokenType.AND_DGREAT: "&>>",
|
|
694
|
+
}
|
|
695
|
+
|
|
696
|
+
for token_type, op in op_map.items():
|
|
697
|
+
if self._check(token_type):
|
|
698
|
+
self._advance()
|
|
699
|
+
|
|
700
|
+
# Handle here-document
|
|
701
|
+
if op in ("<<", "<<-"):
|
|
702
|
+
return self._parse_heredoc_start(op, fd)
|
|
703
|
+
|
|
704
|
+
# Parse target
|
|
705
|
+
if not self._check(TokenType.WORD, TokenType.NAME, TokenType.NUMBER):
|
|
706
|
+
raise self._error(f"Expected target for redirection {op}")
|
|
707
|
+
|
|
708
|
+
target = self._parse_word()
|
|
709
|
+
return AST.redirection(op, target, fd)
|
|
710
|
+
|
|
711
|
+
return None
|
|
712
|
+
|
|
713
|
+
def _parse_heredoc_start(
|
|
714
|
+
self, op: RedirectionOperator, fd: Optional[int]
|
|
715
|
+
) -> RedirectionNode:
|
|
716
|
+
"""Parse the start of a here-document."""
|
|
717
|
+
strip_tabs = op == "<<-"
|
|
718
|
+
|
|
719
|
+
# Get delimiter
|
|
720
|
+
if not self._check(TokenType.WORD, TokenType.NAME):
|
|
721
|
+
raise self._error("Expected here-document delimiter")
|
|
722
|
+
|
|
723
|
+
delim_token = self._advance()
|
|
724
|
+
delimiter = delim_token.value
|
|
725
|
+
quoted = delim_token.quoted or delim_token.single_quoted
|
|
726
|
+
|
|
727
|
+
# Strip quotes from delimiter if present
|
|
728
|
+
if delimiter.startswith("'") and delimiter.endswith("'"):
|
|
729
|
+
delimiter = delimiter[1:-1]
|
|
730
|
+
quoted = True
|
|
731
|
+
elif delimiter.startswith('"') and delimiter.endswith('"'):
|
|
732
|
+
delimiter = delimiter[1:-1]
|
|
733
|
+
quoted = True
|
|
734
|
+
|
|
735
|
+
# Create placeholder target (will be filled when heredoc content is read)
|
|
736
|
+
placeholder = AST.word([AST.literal("")])
|
|
737
|
+
|
|
738
|
+
# Register pending heredoc
|
|
739
|
+
heredoc_info = {
|
|
740
|
+
"delimiter": delimiter,
|
|
741
|
+
"strip_tabs": strip_tabs,
|
|
742
|
+
"quoted": quoted,
|
|
743
|
+
"redirect_target": None,
|
|
744
|
+
}
|
|
745
|
+
self.pending_heredocs.append(heredoc_info)
|
|
746
|
+
|
|
747
|
+
return AST.redirection(op, placeholder, fd)
|
|
748
|
+
|
|
749
|
+
# =========================================================================
|
|
750
|
+
# Compound command parsing
|
|
751
|
+
# =========================================================================
|
|
752
|
+
|
|
753
|
+
def _parse_compound_list(self) -> list[StatementNode]:
|
|
754
|
+
"""Parse a compound list (body of if/for/while/etc.)."""
|
|
755
|
+
statements: list[StatementNode] = []
|
|
756
|
+
self._skip_newlines()
|
|
757
|
+
|
|
758
|
+
while not self._check(
|
|
759
|
+
TokenType.EOF,
|
|
760
|
+
TokenType.THEN,
|
|
761
|
+
TokenType.ELSE,
|
|
762
|
+
TokenType.ELIF,
|
|
763
|
+
TokenType.FI,
|
|
764
|
+
TokenType.DO,
|
|
765
|
+
TokenType.DONE,
|
|
766
|
+
TokenType.ESAC,
|
|
767
|
+
TokenType.RBRACE,
|
|
768
|
+
TokenType.RPAREN,
|
|
769
|
+
):
|
|
770
|
+
self._check_iteration_limit()
|
|
771
|
+
if not self._is_command_start():
|
|
772
|
+
break
|
|
773
|
+
stmt = self._parse_statement()
|
|
774
|
+
if stmt:
|
|
775
|
+
statements.append(stmt)
|
|
776
|
+
self._skip_separators()
|
|
777
|
+
|
|
778
|
+
return statements
|
|
779
|
+
|
|
780
|
+
def _parse_if(self) -> IfNode:
|
|
781
|
+
"""Parse an if statement."""
|
|
782
|
+
self._expect(TokenType.IF)
|
|
783
|
+
self._skip_newlines()
|
|
784
|
+
|
|
785
|
+
clauses: list[IfClause] = []
|
|
786
|
+
|
|
787
|
+
# Parse condition
|
|
788
|
+
condition = self._parse_compound_list()
|
|
789
|
+
if not condition:
|
|
790
|
+
raise self._error("Expected condition after 'if'")
|
|
791
|
+
|
|
792
|
+
self._skip_newlines()
|
|
793
|
+
self._expect(TokenType.THEN, "Expected 'then' after condition")
|
|
794
|
+
self._skip_newlines()
|
|
795
|
+
|
|
796
|
+
# Parse body
|
|
797
|
+
body = self._parse_compound_list()
|
|
798
|
+
|
|
799
|
+
clauses.append(AST.if_clause(condition, body))
|
|
800
|
+
|
|
801
|
+
# Parse elif clauses
|
|
802
|
+
while self._check(TokenType.ELIF):
|
|
803
|
+
self._advance()
|
|
804
|
+
self._skip_newlines()
|
|
805
|
+
|
|
806
|
+
elif_condition = self._parse_compound_list()
|
|
807
|
+
if not elif_condition:
|
|
808
|
+
raise self._error("Expected condition after 'elif'")
|
|
809
|
+
|
|
810
|
+
self._skip_newlines()
|
|
811
|
+
self._expect(TokenType.THEN, "Expected 'then' after condition")
|
|
812
|
+
self._skip_newlines()
|
|
813
|
+
|
|
814
|
+
elif_body = self._parse_compound_list()
|
|
815
|
+
clauses.append(AST.if_clause(elif_condition, elif_body))
|
|
816
|
+
|
|
817
|
+
# Parse else clause
|
|
818
|
+
else_body: Optional[list[StatementNode]] = None
|
|
819
|
+
if self._check(TokenType.ELSE):
|
|
820
|
+
self._advance()
|
|
821
|
+
self._skip_newlines()
|
|
822
|
+
else_body = self._parse_compound_list()
|
|
823
|
+
|
|
824
|
+
self._skip_newlines()
|
|
825
|
+
self._expect(TokenType.FI, "Expected 'fi' to close if statement")
|
|
826
|
+
|
|
827
|
+
# Parse optional redirections
|
|
828
|
+
redirections: list[RedirectionNode] = []
|
|
829
|
+
while True:
|
|
830
|
+
redir = self._try_parse_redirection()
|
|
831
|
+
if not redir:
|
|
832
|
+
break
|
|
833
|
+
redirections.append(redir)
|
|
834
|
+
|
|
835
|
+
return AST.if_node(clauses, else_body, redirections)
|
|
836
|
+
|
|
837
|
+
def _parse_for(self) -> ForNode:
|
|
838
|
+
"""Parse a for loop."""
|
|
839
|
+
self._expect(TokenType.FOR)
|
|
840
|
+
self._skip_newlines()
|
|
841
|
+
|
|
842
|
+
# Get variable name
|
|
843
|
+
if not self._check(TokenType.NAME, TokenType.WORD):
|
|
844
|
+
raise self._error("Expected variable name after 'for'")
|
|
845
|
+
variable = self._advance().value
|
|
846
|
+
|
|
847
|
+
self._skip_newlines()
|
|
848
|
+
|
|
849
|
+
# Parse optional 'in word...'
|
|
850
|
+
words: Optional[list[WordNode]] = None
|
|
851
|
+
if self._check(TokenType.IN):
|
|
852
|
+
self._advance()
|
|
853
|
+
words = []
|
|
854
|
+
while not self._check(
|
|
855
|
+
TokenType.SEMICOLON,
|
|
856
|
+
TokenType.NEWLINE,
|
|
857
|
+
TokenType.DO,
|
|
858
|
+
TokenType.EOF,
|
|
859
|
+
):
|
|
860
|
+
self._check_iteration_limit()
|
|
861
|
+
if self._check(TokenType.WORD, TokenType.NAME, TokenType.NUMBER):
|
|
862
|
+
words.append(self._parse_word())
|
|
863
|
+
else:
|
|
864
|
+
break
|
|
865
|
+
|
|
866
|
+
# Skip to 'do'
|
|
867
|
+
self._skip_separators()
|
|
868
|
+
self._expect(TokenType.DO, "Expected 'do' in for loop")
|
|
869
|
+
self._skip_newlines()
|
|
870
|
+
|
|
871
|
+
# Parse body
|
|
872
|
+
body = self._parse_compound_list()
|
|
873
|
+
|
|
874
|
+
self._skip_newlines()
|
|
875
|
+
self._expect(TokenType.DONE, "Expected 'done' to close for loop")
|
|
876
|
+
|
|
877
|
+
# Parse optional redirections
|
|
878
|
+
redirections: list[RedirectionNode] = []
|
|
879
|
+
while True:
|
|
880
|
+
redir = self._try_parse_redirection()
|
|
881
|
+
if not redir:
|
|
882
|
+
break
|
|
883
|
+
redirections.append(redir)
|
|
884
|
+
|
|
885
|
+
return AST.for_node(variable, words, body, redirections)
|
|
886
|
+
|
|
887
|
+
def _parse_while(self) -> WhileNode:
|
|
888
|
+
"""Parse a while loop."""
|
|
889
|
+
self._expect(TokenType.WHILE)
|
|
890
|
+
self._skip_newlines()
|
|
891
|
+
|
|
892
|
+
# Parse condition
|
|
893
|
+
condition = self._parse_compound_list()
|
|
894
|
+
if not condition:
|
|
895
|
+
raise self._error("Expected condition after 'while'")
|
|
896
|
+
|
|
897
|
+
self._skip_newlines()
|
|
898
|
+
self._expect(TokenType.DO, "Expected 'do' after condition")
|
|
899
|
+
self._skip_newlines()
|
|
900
|
+
|
|
901
|
+
# Parse body
|
|
902
|
+
body = self._parse_compound_list()
|
|
903
|
+
|
|
904
|
+
self._skip_newlines()
|
|
905
|
+
self._expect(TokenType.DONE, "Expected 'done' to close while loop")
|
|
906
|
+
|
|
907
|
+
# Parse optional redirections
|
|
908
|
+
redirections: list[RedirectionNode] = []
|
|
909
|
+
while True:
|
|
910
|
+
redir = self._try_parse_redirection()
|
|
911
|
+
if not redir:
|
|
912
|
+
break
|
|
913
|
+
redirections.append(redir)
|
|
914
|
+
|
|
915
|
+
return AST.while_node(condition, body, redirections)
|
|
916
|
+
|
|
917
|
+
def _parse_until(self) -> UntilNode:
|
|
918
|
+
"""Parse an until loop."""
|
|
919
|
+
self._expect(TokenType.UNTIL)
|
|
920
|
+
self._skip_newlines()
|
|
921
|
+
|
|
922
|
+
# Parse condition
|
|
923
|
+
condition = self._parse_compound_list()
|
|
924
|
+
if not condition:
|
|
925
|
+
raise self._error("Expected condition after 'until'")
|
|
926
|
+
|
|
927
|
+
self._skip_newlines()
|
|
928
|
+
self._expect(TokenType.DO, "Expected 'do' after condition")
|
|
929
|
+
self._skip_newlines()
|
|
930
|
+
|
|
931
|
+
# Parse body
|
|
932
|
+
body = self._parse_compound_list()
|
|
933
|
+
|
|
934
|
+
self._skip_newlines()
|
|
935
|
+
self._expect(TokenType.DONE, "Expected 'done' to close until loop")
|
|
936
|
+
|
|
937
|
+
# Parse optional redirections
|
|
938
|
+
redirections: list[RedirectionNode] = []
|
|
939
|
+
while True:
|
|
940
|
+
redir = self._try_parse_redirection()
|
|
941
|
+
if not redir:
|
|
942
|
+
break
|
|
943
|
+
redirections.append(redir)
|
|
944
|
+
|
|
945
|
+
return AST.until_node(condition, body, redirections)
|
|
946
|
+
|
|
947
|
+
def _parse_case(self) -> CaseNode:
|
|
948
|
+
"""Parse a case statement."""
|
|
949
|
+
self._expect(TokenType.CASE)
|
|
950
|
+
self._skip_newlines()
|
|
951
|
+
|
|
952
|
+
# Parse word to match
|
|
953
|
+
if not self._check(TokenType.WORD, TokenType.NAME, TokenType.NUMBER):
|
|
954
|
+
raise self._error("Expected word after 'case'")
|
|
955
|
+
word = self._parse_word()
|
|
956
|
+
|
|
957
|
+
self._skip_newlines()
|
|
958
|
+
self._expect(TokenType.IN, "Expected 'in' after case word")
|
|
959
|
+
self._skip_newlines()
|
|
960
|
+
|
|
961
|
+
# Parse case items
|
|
962
|
+
items: list[CaseItemNode] = []
|
|
963
|
+
while not self._check(TokenType.ESAC, TokenType.EOF):
|
|
964
|
+
self._check_iteration_limit()
|
|
965
|
+
self._skip_newlines()
|
|
966
|
+
|
|
967
|
+
if self._check(TokenType.ESAC):
|
|
968
|
+
break
|
|
969
|
+
|
|
970
|
+
# Skip optional leading (
|
|
971
|
+
if self._check(TokenType.LPAREN):
|
|
972
|
+
self._advance()
|
|
973
|
+
|
|
974
|
+
# Parse patterns
|
|
975
|
+
patterns: list[WordNode] = []
|
|
976
|
+
while True:
|
|
977
|
+
self._check_iteration_limit()
|
|
978
|
+
if self._check(TokenType.WORD, TokenType.NAME, TokenType.NUMBER):
|
|
979
|
+
patterns.append(self._parse_word())
|
|
980
|
+
elif not patterns:
|
|
981
|
+
raise self._error("Expected pattern in case item")
|
|
982
|
+
else:
|
|
983
|
+
break
|
|
984
|
+
|
|
985
|
+
# Check for pattern separator |
|
|
986
|
+
if self._check(TokenType.PIPE):
|
|
987
|
+
self._advance()
|
|
988
|
+
else:
|
|
989
|
+
break
|
|
990
|
+
|
|
991
|
+
# Expect )
|
|
992
|
+
self._expect(TokenType.RPAREN, "Expected ')' after patterns")
|
|
993
|
+
self._skip_newlines()
|
|
994
|
+
|
|
995
|
+
# Parse body
|
|
996
|
+
item_body = self._parse_compound_list()
|
|
997
|
+
|
|
998
|
+
# Parse terminator (;;, ;&, ;;&)
|
|
999
|
+
terminator = ";;"
|
|
1000
|
+
if self._check(TokenType.DSEMI):
|
|
1001
|
+
self._advance()
|
|
1002
|
+
elif self._check(TokenType.SEMI_AND):
|
|
1003
|
+
self._advance()
|
|
1004
|
+
terminator = ";&"
|
|
1005
|
+
elif self._check(TokenType.SEMI_SEMI_AND):
|
|
1006
|
+
self._advance()
|
|
1007
|
+
terminator = ";;&"
|
|
1008
|
+
|
|
1009
|
+
items.append(AST.case_item(patterns, item_body, terminator))
|
|
1010
|
+
self._skip_newlines()
|
|
1011
|
+
|
|
1012
|
+
self._expect(TokenType.ESAC, "Expected 'esac' to close case statement")
|
|
1013
|
+
|
|
1014
|
+
# Parse optional redirections
|
|
1015
|
+
redirections: list[RedirectionNode] = []
|
|
1016
|
+
while True:
|
|
1017
|
+
redir = self._try_parse_redirection()
|
|
1018
|
+
if not redir:
|
|
1019
|
+
break
|
|
1020
|
+
redirections.append(redir)
|
|
1021
|
+
|
|
1022
|
+
return AST.case_node(word, items, redirections)
|
|
1023
|
+
|
|
1024
|
+
def _parse_subshell(self) -> SubshellNode:
|
|
1025
|
+
"""Parse a subshell: ( ... )."""
|
|
1026
|
+
self._expect(TokenType.LPAREN)
|
|
1027
|
+
self._skip_newlines()
|
|
1028
|
+
|
|
1029
|
+
# Parse body
|
|
1030
|
+
body = self._parse_compound_list()
|
|
1031
|
+
|
|
1032
|
+
self._skip_newlines()
|
|
1033
|
+
self._expect(TokenType.RPAREN, "Expected ')' to close subshell")
|
|
1034
|
+
|
|
1035
|
+
# Parse optional redirections
|
|
1036
|
+
redirections: list[RedirectionNode] = []
|
|
1037
|
+
while True:
|
|
1038
|
+
redir = self._try_parse_redirection()
|
|
1039
|
+
if not redir:
|
|
1040
|
+
break
|
|
1041
|
+
redirections.append(redir)
|
|
1042
|
+
|
|
1043
|
+
return AST.subshell(body, redirections)
|
|
1044
|
+
|
|
1045
|
+
def _parse_group(self) -> GroupNode:
|
|
1046
|
+
"""Parse a command group: { ...; }."""
|
|
1047
|
+
self._expect(TokenType.LBRACE)
|
|
1048
|
+
self._skip_newlines()
|
|
1049
|
+
|
|
1050
|
+
# Parse body
|
|
1051
|
+
body = self._parse_compound_list()
|
|
1052
|
+
|
|
1053
|
+
self._skip_separators()
|
|
1054
|
+
self._expect(TokenType.RBRACE, "Expected '}' to close command group")
|
|
1055
|
+
|
|
1056
|
+
# Parse optional redirections
|
|
1057
|
+
redirections: list[RedirectionNode] = []
|
|
1058
|
+
while True:
|
|
1059
|
+
redir = self._try_parse_redirection()
|
|
1060
|
+
if not redir:
|
|
1061
|
+
break
|
|
1062
|
+
redirections.append(redir)
|
|
1063
|
+
|
|
1064
|
+
return AST.group(body, redirections)
|
|
1065
|
+
|
|
1066
|
+
# Unary operators for conditional expressions
|
|
1067
|
+
_COND_UNARY_OPS = {
|
|
1068
|
+
"-a", "-b", "-c", "-d", "-e", "-f", "-g", "-h", "-k", "-p",
|
|
1069
|
+
"-r", "-s", "-t", "-u", "-w", "-x", "-G", "-L", "-N", "-O",
|
|
1070
|
+
"-S", "-z", "-n", "-o", "-v", "-R",
|
|
1071
|
+
}
|
|
1072
|
+
|
|
1073
|
+
# Binary operators for conditional expressions
|
|
1074
|
+
_COND_BINARY_OPS = {
|
|
1075
|
+
"==", "!=", "=~", "<", ">", "=",
|
|
1076
|
+
"-eq", "-ne", "-lt", "-le", "-gt", "-ge",
|
|
1077
|
+
"-nt", "-ot", "-ef",
|
|
1078
|
+
}
|
|
1079
|
+
|
|
1080
|
+
def _parse_conditional_command(self) -> ConditionalCommandNode:
|
|
1081
|
+
"""Parse a conditional command: [[ expr ]]."""
|
|
1082
|
+
line = self._current().line
|
|
1083
|
+
self._expect(TokenType.DBRACK_START)
|
|
1084
|
+
self._skip_newlines()
|
|
1085
|
+
|
|
1086
|
+
# Parse the conditional expression
|
|
1087
|
+
expr = self._parse_cond_or()
|
|
1088
|
+
|
|
1089
|
+
self._skip_newlines()
|
|
1090
|
+
self._expect(TokenType.DBRACK_END, "Expected ']]' to close conditional")
|
|
1091
|
+
|
|
1092
|
+
# Parse optional redirections
|
|
1093
|
+
redirections: list[RedirectionNode] = []
|
|
1094
|
+
while True:
|
|
1095
|
+
redir = self._try_parse_redirection()
|
|
1096
|
+
if not redir:
|
|
1097
|
+
break
|
|
1098
|
+
redirections.append(redir)
|
|
1099
|
+
|
|
1100
|
+
return ConditionalCommandNode(
|
|
1101
|
+
expression=expr,
|
|
1102
|
+
redirections=tuple(redirections),
|
|
1103
|
+
line=line,
|
|
1104
|
+
)
|
|
1105
|
+
|
|
1106
|
+
def _parse_cond_or(self):
|
|
1107
|
+
"""Parse conditional OR: expr || expr."""
|
|
1108
|
+
left = self._parse_cond_and()
|
|
1109
|
+
|
|
1110
|
+
self._skip_newlines()
|
|
1111
|
+
while self._check(TokenType.OR_OR):
|
|
1112
|
+
self._advance()
|
|
1113
|
+
self._skip_newlines()
|
|
1114
|
+
right = self._parse_cond_and()
|
|
1115
|
+
left = CondOrNode(left=left, right=right)
|
|
1116
|
+
self._skip_newlines()
|
|
1117
|
+
|
|
1118
|
+
return left
|
|
1119
|
+
|
|
1120
|
+
def _parse_cond_and(self):
|
|
1121
|
+
"""Parse conditional AND: expr && expr."""
|
|
1122
|
+
left = self._parse_cond_not()
|
|
1123
|
+
|
|
1124
|
+
self._skip_newlines()
|
|
1125
|
+
while self._check(TokenType.AND_AND):
|
|
1126
|
+
self._advance()
|
|
1127
|
+
self._skip_newlines()
|
|
1128
|
+
right = self._parse_cond_not()
|
|
1129
|
+
left = CondAndNode(left=left, right=right)
|
|
1130
|
+
self._skip_newlines()
|
|
1131
|
+
|
|
1132
|
+
return left
|
|
1133
|
+
|
|
1134
|
+
def _parse_cond_not(self):
|
|
1135
|
+
"""Parse conditional NOT: ! expr."""
|
|
1136
|
+
self._skip_newlines()
|
|
1137
|
+
if self._check(TokenType.BANG):
|
|
1138
|
+
self._advance()
|
|
1139
|
+
self._skip_newlines()
|
|
1140
|
+
operand = self._parse_cond_not()
|
|
1141
|
+
return CondNotNode(operand=operand)
|
|
1142
|
+
|
|
1143
|
+
return self._parse_cond_primary()
|
|
1144
|
+
|
|
1145
|
+
def _parse_cond_primary(self):
|
|
1146
|
+
"""Parse conditional primary: unary/binary/grouping/word."""
|
|
1147
|
+
# Handle grouping: ( expr )
|
|
1148
|
+
if self._check(TokenType.LPAREN):
|
|
1149
|
+
self._advance()
|
|
1150
|
+
expr = self._parse_cond_or()
|
|
1151
|
+
self._expect(TokenType.RPAREN)
|
|
1152
|
+
return CondGroupNode(expression=expr)
|
|
1153
|
+
|
|
1154
|
+
# Check for end of conditional
|
|
1155
|
+
if self._check(TokenType.DBRACK_END):
|
|
1156
|
+
raise self._error("Expected conditional expression")
|
|
1157
|
+
|
|
1158
|
+
# Handle unary operators: -f file, -z string, etc.
|
|
1159
|
+
if self._check(TokenType.WORD, TokenType.NAME, TokenType.NUMBER):
|
|
1160
|
+
first_token = self._current()
|
|
1161
|
+
first = first_token.value
|
|
1162
|
+
|
|
1163
|
+
# Check for unary operators (not quoted)
|
|
1164
|
+
if first_token.type in (TokenType.WORD, TokenType.NAME) and first in self._COND_UNARY_OPS:
|
|
1165
|
+
self._advance()
|
|
1166
|
+
# Unary operators require an operand
|
|
1167
|
+
if self._check(TokenType.DBRACK_END):
|
|
1168
|
+
raise self._error(f"Expected operand after {first}")
|
|
1169
|
+
# Parse the operand - could be any word including quoted empty strings
|
|
1170
|
+
operand = self._parse_word()
|
|
1171
|
+
return CondUnaryNode(operator=first, operand=operand)
|
|
1172
|
+
|
|
1173
|
+
# Parse as word, then check for binary operator
|
|
1174
|
+
left = self._parse_word()
|
|
1175
|
+
|
|
1176
|
+
# Check for binary operators as words
|
|
1177
|
+
if self._check(TokenType.WORD, TokenType.NAME):
|
|
1178
|
+
op_token = self._current()
|
|
1179
|
+
if op_token.value in self._COND_BINARY_OPS:
|
|
1180
|
+
self._advance()
|
|
1181
|
+
# For =~ operator, parse RHS as regex pattern (includes parens)
|
|
1182
|
+
if op_token.value == "=~":
|
|
1183
|
+
right = self._parse_cond_regex_pattern()
|
|
1184
|
+
else:
|
|
1185
|
+
right = self._parse_word()
|
|
1186
|
+
# Normalize = to ==
|
|
1187
|
+
op = "==" if op_token.value == "=" else op_token.value
|
|
1188
|
+
return CondBinaryNode(operator=op, left=left, right=right)
|
|
1189
|
+
|
|
1190
|
+
# Check for < and > which are tokenized as LESS and GREAT
|
|
1191
|
+
if self._check(TokenType.LESS):
|
|
1192
|
+
self._advance()
|
|
1193
|
+
right = self._parse_word()
|
|
1194
|
+
return CondBinaryNode(operator="<", left=left, right=right)
|
|
1195
|
+
if self._check(TokenType.GREAT):
|
|
1196
|
+
self._advance()
|
|
1197
|
+
right = self._parse_word()
|
|
1198
|
+
return CondBinaryNode(operator=">", left=left, right=right)
|
|
1199
|
+
|
|
1200
|
+
# Just a word (non-empty string test)
|
|
1201
|
+
return CondWordNode(word=left)
|
|
1202
|
+
|
|
1203
|
+
raise self._error("Expected conditional expression")
|
|
1204
|
+
|
|
1205
|
+
def _parse_cond_regex_pattern(self) -> Optional[WordNode]:
|
|
1206
|
+
"""Parse a regex pattern for =~ operator.
|
|
1207
|
+
|
|
1208
|
+
In bash, the RHS of =~ can include unquoted parentheses, pipes, etc.
|
|
1209
|
+
which are literal regex characters, not shell operators.
|
|
1210
|
+
"""
|
|
1211
|
+
parts: list = []
|
|
1212
|
+
line = self._current().line
|
|
1213
|
+
|
|
1214
|
+
# Collect tokens until we hit ]], &&, ||, or newline
|
|
1215
|
+
while not self._check(TokenType.EOF):
|
|
1216
|
+
tok = self._current()
|
|
1217
|
+
|
|
1218
|
+
# Stop at conditional terminators
|
|
1219
|
+
if tok.type == TokenType.DBRACK_END:
|
|
1220
|
+
break
|
|
1221
|
+
if tok.type in (TokenType.AND_AND, TokenType.OR_OR, TokenType.NEWLINE):
|
|
1222
|
+
break
|
|
1223
|
+
|
|
1224
|
+
# Handle parentheses as literal parts of the regex
|
|
1225
|
+
if tok.type == TokenType.LPAREN:
|
|
1226
|
+
parts.append(LiteralPart(value="("))
|
|
1227
|
+
self._advance()
|
|
1228
|
+
continue
|
|
1229
|
+
if tok.type == TokenType.RPAREN:
|
|
1230
|
+
parts.append(LiteralPart(value=")"))
|
|
1231
|
+
self._advance()
|
|
1232
|
+
continue
|
|
1233
|
+
|
|
1234
|
+
# Handle pipe as literal
|
|
1235
|
+
if tok.type == TokenType.PIPE:
|
|
1236
|
+
parts.append(LiteralPart(value="|"))
|
|
1237
|
+
self._advance()
|
|
1238
|
+
continue
|
|
1239
|
+
|
|
1240
|
+
# Handle other word-like tokens
|
|
1241
|
+
if tok.type in (TokenType.WORD, TokenType.NAME, TokenType.NUMBER,
|
|
1242
|
+
TokenType.ASSIGNMENT_WORD):
|
|
1243
|
+
word = self._parse_word()
|
|
1244
|
+
if word and word.parts:
|
|
1245
|
+
parts.extend(word.parts)
|
|
1246
|
+
continue
|
|
1247
|
+
|
|
1248
|
+
# Unknown token, stop
|
|
1249
|
+
break
|
|
1250
|
+
|
|
1251
|
+
if not parts:
|
|
1252
|
+
return None
|
|
1253
|
+
|
|
1254
|
+
return WordNode(parts=tuple(parts), line=line)
|
|
1255
|
+
|
|
1256
|
+
def _is_cond_word_token(self) -> bool:
|
|
1257
|
+
"""Check if current token can be a word in conditional context."""
|
|
1258
|
+
return self._check(
|
|
1259
|
+
TokenType.WORD, TokenType.NAME, TokenType.NUMBER,
|
|
1260
|
+
TokenType.ASSIGNMENT_WORD, # Might appear in conditionals
|
|
1261
|
+
)
|
|
1262
|
+
|
|
1263
|
+
def _parse_arithmetic_command(self) -> ArithmeticCommandNode:
|
|
1264
|
+
"""Parse an arithmetic command: (( expr ))."""
|
|
1265
|
+
line = self._current().line
|
|
1266
|
+
self._expect(TokenType.DPAREN_START)
|
|
1267
|
+
|
|
1268
|
+
# Collect everything until ))
|
|
1269
|
+
expr_text = ""
|
|
1270
|
+
depth = 1 # We've consumed one ((
|
|
1271
|
+
|
|
1272
|
+
while depth > 0 and not self._check(TokenType.EOF):
|
|
1273
|
+
if self._check(TokenType.DPAREN_START):
|
|
1274
|
+
depth += 1
|
|
1275
|
+
expr_text += "(("
|
|
1276
|
+
self._advance()
|
|
1277
|
+
elif self._check(TokenType.DPAREN_END):
|
|
1278
|
+
depth -= 1
|
|
1279
|
+
if depth > 0:
|
|
1280
|
+
expr_text += "))"
|
|
1281
|
+
self._advance()
|
|
1282
|
+
elif self._check(TokenType.LPAREN):
|
|
1283
|
+
expr_text += "("
|
|
1284
|
+
self._advance()
|
|
1285
|
+
elif self._check(TokenType.RPAREN):
|
|
1286
|
+
expr_text += ")"
|
|
1287
|
+
self._advance()
|
|
1288
|
+
else:
|
|
1289
|
+
expr_text += self._current().value
|
|
1290
|
+
self._advance()
|
|
1291
|
+
|
|
1292
|
+
# Parse the arithmetic expression
|
|
1293
|
+
expr_text = expr_text.strip()
|
|
1294
|
+
if expr_text:
|
|
1295
|
+
try:
|
|
1296
|
+
arith_expr = self._parse_arithmetic_expression(expr_text)
|
|
1297
|
+
expr_node = ArithmeticExpressionNode(expression=arith_expr)
|
|
1298
|
+
except Exception:
|
|
1299
|
+
# If parsing fails, create a simple expression
|
|
1300
|
+
expr_node = ArithmeticExpressionNode(expression=None)
|
|
1301
|
+
else:
|
|
1302
|
+
expr_node = None
|
|
1303
|
+
|
|
1304
|
+
return ArithmeticCommandNode(
|
|
1305
|
+
expression=expr_node,
|
|
1306
|
+
line=line,
|
|
1307
|
+
)
|
|
1308
|
+
|
|
1309
|
+
def _parse_function_def(self) -> FunctionDefNode:
|
|
1310
|
+
"""Parse a function definition."""
|
|
1311
|
+
# Check for 'function' keyword
|
|
1312
|
+
has_function_keyword = False
|
|
1313
|
+
if self._check(TokenType.FUNCTION):
|
|
1314
|
+
self._advance()
|
|
1315
|
+
has_function_keyword = True
|
|
1316
|
+
self._skip_newlines()
|
|
1317
|
+
|
|
1318
|
+
# Get function name
|
|
1319
|
+
if not self._check(TokenType.NAME, TokenType.WORD):
|
|
1320
|
+
raise self._error("Expected function name")
|
|
1321
|
+
name = self._advance().value
|
|
1322
|
+
|
|
1323
|
+
# Optional () after name
|
|
1324
|
+
if self._check(TokenType.LPAREN):
|
|
1325
|
+
self._advance()
|
|
1326
|
+
self._expect(TokenType.RPAREN, "Expected ')' after '(' in function definition")
|
|
1327
|
+
|
|
1328
|
+
self._skip_newlines()
|
|
1329
|
+
|
|
1330
|
+
# Parse function body (must be a compound command)
|
|
1331
|
+
if self._check(TokenType.LBRACE):
|
|
1332
|
+
body = self._parse_group()
|
|
1333
|
+
elif self._check(TokenType.LPAREN):
|
|
1334
|
+
body = self._parse_subshell()
|
|
1335
|
+
elif self._check(TokenType.IF):
|
|
1336
|
+
body = self._parse_if()
|
|
1337
|
+
elif self._check(TokenType.FOR):
|
|
1338
|
+
body = self._parse_for()
|
|
1339
|
+
elif self._check(TokenType.WHILE):
|
|
1340
|
+
body = self._parse_while()
|
|
1341
|
+
elif self._check(TokenType.UNTIL):
|
|
1342
|
+
body = self._parse_until()
|
|
1343
|
+
elif self._check(TokenType.CASE):
|
|
1344
|
+
body = self._parse_case()
|
|
1345
|
+
else:
|
|
1346
|
+
raise self._error("Expected compound command as function body")
|
|
1347
|
+
|
|
1348
|
+
# Parse optional redirections (after function body)
|
|
1349
|
+
redirections: list[RedirectionNode] = []
|
|
1350
|
+
while True:
|
|
1351
|
+
redir = self._try_parse_redirection()
|
|
1352
|
+
if not redir:
|
|
1353
|
+
break
|
|
1354
|
+
redirections.append(redir)
|
|
1355
|
+
|
|
1356
|
+
return AST.function_def(name, body, redirections)
|
|
1357
|
+
|
|
1358
|
+
def _parse_word(self) -> WordNode:
|
|
1359
|
+
"""Parse a word token into a WordNode with parts."""
|
|
1360
|
+
token = self._advance()
|
|
1361
|
+
return self._parse_word_from_string(
|
|
1362
|
+
token.value,
|
|
1363
|
+
quoted=token.quoted,
|
|
1364
|
+
single_quoted=token.single_quoted,
|
|
1365
|
+
)
|
|
1366
|
+
|
|
1367
|
+
def _parse_word_from_string(self, value: str, quoted: bool = False, single_quoted: bool = False) -> WordNode:
|
|
1368
|
+
"""Parse a string into a WordNode with appropriate parts."""
|
|
1369
|
+
parts = self._parse_word_parts(value, quoted, single_quoted)
|
|
1370
|
+
# Wrap double-quoted content in DoubleQuotedPart to preserve quote context
|
|
1371
|
+
if quoted and not single_quoted:
|
|
1372
|
+
return AST.word([DoubleQuotedPart(parts=tuple(parts))])
|
|
1373
|
+
# Wrap single-quoted content in SingleQuotedPart
|
|
1374
|
+
if single_quoted and len(parts) == 1 and isinstance(parts[0], LiteralPart):
|
|
1375
|
+
return AST.word([SingleQuotedPart(value=parts[0].value)])
|
|
1376
|
+
return AST.word(parts)
|
|
1377
|
+
|
|
1378
|
+
def _parse_word_parts(self, value: str, quoted: bool = False, single_quoted: bool = False) -> list[WordPart]:
|
|
1379
|
+
"""Parse word parts from a string value."""
|
|
1380
|
+
# Single-quoted strings are completely literal - no expansions
|
|
1381
|
+
if single_quoted:
|
|
1382
|
+
return [AST.literal(value)] if value else []
|
|
1383
|
+
|
|
1384
|
+
parts: list[WordPart] = []
|
|
1385
|
+
i = 0
|
|
1386
|
+
literal_buffer = ""
|
|
1387
|
+
|
|
1388
|
+
def flush_literal() -> None:
|
|
1389
|
+
nonlocal literal_buffer
|
|
1390
|
+
if literal_buffer:
|
|
1391
|
+
parts.append(AST.literal(literal_buffer))
|
|
1392
|
+
literal_buffer = ""
|
|
1393
|
+
|
|
1394
|
+
while i < len(value):
|
|
1395
|
+
c = value[i]
|
|
1396
|
+
|
|
1397
|
+
# Handle $((...)) arithmetic expansion - MUST come before $(...) check
|
|
1398
|
+
if c == "$" and i + 2 < len(value) and value[i + 1] == "(" and value[i + 2] == "(":
|
|
1399
|
+
flush_literal()
|
|
1400
|
+
# Find matching closing ))
|
|
1401
|
+
depth = 2 # We need to find ))
|
|
1402
|
+
start = i + 3
|
|
1403
|
+
j = start
|
|
1404
|
+
while j < len(value):
|
|
1405
|
+
if value[j] == "(" and j + 1 < len(value) and value[j + 1] == "(":
|
|
1406
|
+
depth += 2
|
|
1407
|
+
j += 2
|
|
1408
|
+
elif value[j] == ")" and j + 1 < len(value) and value[j + 1] == ")":
|
|
1409
|
+
depth -= 2
|
|
1410
|
+
j += 2
|
|
1411
|
+
if depth <= 0:
|
|
1412
|
+
break
|
|
1413
|
+
elif value[j] == "(":
|
|
1414
|
+
depth += 1
|
|
1415
|
+
j += 1
|
|
1416
|
+
elif value[j] == ")":
|
|
1417
|
+
depth -= 1
|
|
1418
|
+
j += 1
|
|
1419
|
+
else:
|
|
1420
|
+
j += 1
|
|
1421
|
+
arith_expr = value[start : j - 2]
|
|
1422
|
+
# Parse the arithmetic expression
|
|
1423
|
+
arith_node = self._parse_arithmetic_expression(arith_expr)
|
|
1424
|
+
parts.append(
|
|
1425
|
+
ArithmeticExpansionPart(
|
|
1426
|
+
expression=ArithmeticExpressionNode(expression=arith_node),
|
|
1427
|
+
)
|
|
1428
|
+
)
|
|
1429
|
+
i = j
|
|
1430
|
+
continue
|
|
1431
|
+
|
|
1432
|
+
# Handle $(...) command substitution
|
|
1433
|
+
if c == "$" and i + 1 < len(value) and value[i + 1] == "(":
|
|
1434
|
+
flush_literal()
|
|
1435
|
+
# Find matching closing paren
|
|
1436
|
+
depth = 1
|
|
1437
|
+
start = i + 2
|
|
1438
|
+
j = start
|
|
1439
|
+
while j < len(value) and depth > 0:
|
|
1440
|
+
if value[j] == "(":
|
|
1441
|
+
depth += 1
|
|
1442
|
+
elif value[j] == ")":
|
|
1443
|
+
depth -= 1
|
|
1444
|
+
j += 1
|
|
1445
|
+
cmd_body = value[start : j - 1]
|
|
1446
|
+
# Recursively parse the command body
|
|
1447
|
+
try:
|
|
1448
|
+
parsed_body = Parser().parse(cmd_body)
|
|
1449
|
+
parts.append(
|
|
1450
|
+
CommandSubstitutionPart(
|
|
1451
|
+
body=parsed_body,
|
|
1452
|
+
legacy=False,
|
|
1453
|
+
)
|
|
1454
|
+
)
|
|
1455
|
+
except Exception:
|
|
1456
|
+
# If parsing fails, treat as literal
|
|
1457
|
+
parts.append(AST.literal(f"$({cmd_body})"))
|
|
1458
|
+
i = j
|
|
1459
|
+
continue
|
|
1460
|
+
|
|
1461
|
+
# Handle ${...} parameter expansion
|
|
1462
|
+
if c == "$" and i + 1 < len(value) and value[i + 1] == "{":
|
|
1463
|
+
flush_literal()
|
|
1464
|
+
# Find matching closing brace
|
|
1465
|
+
depth = 1
|
|
1466
|
+
start = i + 2
|
|
1467
|
+
j = start
|
|
1468
|
+
while j < len(value) and depth > 0:
|
|
1469
|
+
if value[j] == "{":
|
|
1470
|
+
depth += 1
|
|
1471
|
+
elif value[j] == "}":
|
|
1472
|
+
depth -= 1
|
|
1473
|
+
j += 1
|
|
1474
|
+
param_content = value[start : j - 1]
|
|
1475
|
+
# Parse the parameter expansion content
|
|
1476
|
+
parts.append(self._parse_parameter_expansion(param_content))
|
|
1477
|
+
i = j
|
|
1478
|
+
continue
|
|
1479
|
+
|
|
1480
|
+
# Handle simple $VAR expansion
|
|
1481
|
+
if c == "$" and i + 1 < len(value):
|
|
1482
|
+
next_c = value[i + 1]
|
|
1483
|
+
# Special parameters
|
|
1484
|
+
if next_c in "?$#@*!_-0123456789":
|
|
1485
|
+
flush_literal()
|
|
1486
|
+
parts.append(ParameterExpansionPart(parameter=next_c))
|
|
1487
|
+
i += 2
|
|
1488
|
+
continue
|
|
1489
|
+
# Variable name
|
|
1490
|
+
if next_c.isalpha() or next_c == "_":
|
|
1491
|
+
flush_literal()
|
|
1492
|
+
j = i + 1
|
|
1493
|
+
while j < len(value) and (value[j].isalnum() or value[j] == "_"):
|
|
1494
|
+
j += 1
|
|
1495
|
+
var_name = value[i + 1 : j]
|
|
1496
|
+
parts.append(ParameterExpansionPart(parameter=var_name))
|
|
1497
|
+
i = j
|
|
1498
|
+
continue
|
|
1499
|
+
|
|
1500
|
+
# Handle backtick command substitution
|
|
1501
|
+
if c == "`":
|
|
1502
|
+
flush_literal()
|
|
1503
|
+
j = i + 1
|
|
1504
|
+
while j < len(value) and value[j] != "`":
|
|
1505
|
+
if value[j] == "\\" and j + 1 < len(value):
|
|
1506
|
+
j += 2
|
|
1507
|
+
else:
|
|
1508
|
+
j += 1
|
|
1509
|
+
cmd_raw = value[i + 1 : j]
|
|
1510
|
+
# Process backslash escapes in backtick substitution
|
|
1511
|
+
# Only \`, \\, and \$ are special inside backticks
|
|
1512
|
+
cmd = []
|
|
1513
|
+
k = 0
|
|
1514
|
+
while k < len(cmd_raw):
|
|
1515
|
+
if cmd_raw[k] == "\\" and k + 1 < len(cmd_raw):
|
|
1516
|
+
next_c = cmd_raw[k + 1]
|
|
1517
|
+
if next_c in "`\\$":
|
|
1518
|
+
cmd.append(next_c)
|
|
1519
|
+
k += 2
|
|
1520
|
+
else:
|
|
1521
|
+
cmd.append(cmd_raw[k])
|
|
1522
|
+
k += 1
|
|
1523
|
+
else:
|
|
1524
|
+
cmd.append(cmd_raw[k])
|
|
1525
|
+
k += 1
|
|
1526
|
+
cmd_body = "".join(cmd)
|
|
1527
|
+
try:
|
|
1528
|
+
parsed_body = Parser().parse(cmd_body)
|
|
1529
|
+
parts.append(
|
|
1530
|
+
CommandSubstitutionPart(
|
|
1531
|
+
body=parsed_body,
|
|
1532
|
+
legacy=True, # Mark as backtick style
|
|
1533
|
+
)
|
|
1534
|
+
)
|
|
1535
|
+
except Exception:
|
|
1536
|
+
# If parsing fails, treat as literal
|
|
1537
|
+
parts.append(AST.literal(f"`{cmd_body}`"))
|
|
1538
|
+
i = j + 1
|
|
1539
|
+
continue
|
|
1540
|
+
|
|
1541
|
+
# Handle single-quoted strings - completely literal, no expansions
|
|
1542
|
+
if c == "'" and not quoted:
|
|
1543
|
+
flush_literal()
|
|
1544
|
+
j = i + 1
|
|
1545
|
+
while j < len(value) and value[j] != "'":
|
|
1546
|
+
j += 1
|
|
1547
|
+
content = value[i + 1 : j]
|
|
1548
|
+
parts.append(SingleQuotedPart(value=content))
|
|
1549
|
+
i = j + 1 if j < len(value) else j
|
|
1550
|
+
continue
|
|
1551
|
+
|
|
1552
|
+
# Handle double-quoted strings - expansions occur but no word splitting
|
|
1553
|
+
if c == '"' and not quoted:
|
|
1554
|
+
flush_literal()
|
|
1555
|
+
j = i + 1
|
|
1556
|
+
# Find matching close quote, respecting escapes
|
|
1557
|
+
while j < len(value) and value[j] != '"':
|
|
1558
|
+
if value[j] == "\\" and j + 1 < len(value):
|
|
1559
|
+
j += 2
|
|
1560
|
+
else:
|
|
1561
|
+
j += 1
|
|
1562
|
+
content = value[i + 1 : j]
|
|
1563
|
+
# Recursively parse the content with quoted=True
|
|
1564
|
+
inner_parts = self._parse_word_parts(content, quoted=True)
|
|
1565
|
+
parts.append(DoubleQuotedPart(parts=tuple(inner_parts)))
|
|
1566
|
+
i = j + 1 if j < len(value) else j
|
|
1567
|
+
continue
|
|
1568
|
+
|
|
1569
|
+
# Handle glob patterns (only if unquoted)
|
|
1570
|
+
if not quoted and c in "*?[":
|
|
1571
|
+
flush_literal()
|
|
1572
|
+
parts.append(GlobPart(pattern=c))
|
|
1573
|
+
i += 1
|
|
1574
|
+
continue
|
|
1575
|
+
|
|
1576
|
+
# Handle tilde expansion at start
|
|
1577
|
+
if c == "~" and i == 0 and not quoted:
|
|
1578
|
+
flush_literal()
|
|
1579
|
+
# Check for ~user
|
|
1580
|
+
j = 1
|
|
1581
|
+
while j < len(value) and (value[j].isalnum() or value[j] == "_"):
|
|
1582
|
+
j += 1
|
|
1583
|
+
if j > 1:
|
|
1584
|
+
user = value[1:j]
|
|
1585
|
+
parts.append(TildeExpansionPart(user=user))
|
|
1586
|
+
else:
|
|
1587
|
+
parts.append(TildeExpansionPart(user=None))
|
|
1588
|
+
i = j
|
|
1589
|
+
continue
|
|
1590
|
+
|
|
1591
|
+
# Handle escape sequences (only in unquoted context - lexer already handled quoted escapes)
|
|
1592
|
+
if c == "\\" and i + 1 < len(value) and not quoted:
|
|
1593
|
+
flush_literal()
|
|
1594
|
+
parts.append(EscapedPart(value=value[i + 1]))
|
|
1595
|
+
i += 2
|
|
1596
|
+
continue
|
|
1597
|
+
|
|
1598
|
+
# Regular character
|
|
1599
|
+
literal_buffer += c
|
|
1600
|
+
i += 1
|
|
1601
|
+
|
|
1602
|
+
flush_literal()
|
|
1603
|
+
return parts if parts else [AST.literal("")]
|
|
1604
|
+
|
|
1605
|
+
def _parse_parameter_expansion(self, content: str) -> ParameterExpansionPart:
|
|
1606
|
+
"""Parse the content inside ${...} into a ParameterExpansionPart.
|
|
1607
|
+
|
|
1608
|
+
Handles:
|
|
1609
|
+
- ${VAR} - simple expansion
|
|
1610
|
+
- ${VAR:-default} - use default if unset
|
|
1611
|
+
- ${VAR:=default} - assign default if unset
|
|
1612
|
+
- ${VAR:?error} - error if unset
|
|
1613
|
+
- ${VAR:+alt} - use alternative if set
|
|
1614
|
+
- ${#VAR} - string length
|
|
1615
|
+
- ${VAR:offset:length} - substring
|
|
1616
|
+
- ${VAR#pattern} - remove shortest prefix
|
|
1617
|
+
- ${VAR##pattern} - remove longest prefix
|
|
1618
|
+
- ${VAR%pattern} - remove shortest suffix
|
|
1619
|
+
- ${VAR%%pattern} - remove longest suffix
|
|
1620
|
+
- ${VAR/pattern/replacement} - replace first match
|
|
1621
|
+
- ${VAR//pattern/replacement} - replace all matches
|
|
1622
|
+
- ${VAR^} - uppercase first char
|
|
1623
|
+
- ${VAR^^} - uppercase all
|
|
1624
|
+
- ${VAR,} - lowercase first char
|
|
1625
|
+
- ${VAR,,} - lowercase all
|
|
1626
|
+
"""
|
|
1627
|
+
if not content:
|
|
1628
|
+
return ParameterExpansionPart(parameter="")
|
|
1629
|
+
|
|
1630
|
+
# Handle length operator ${#VAR}
|
|
1631
|
+
if content.startswith("#"):
|
|
1632
|
+
param = content[1:]
|
|
1633
|
+
return ParameterExpansionPart(parameter=param, operation=LengthOp())
|
|
1634
|
+
|
|
1635
|
+
# Find the parameter name (alphanumeric, _, or special chars)
|
|
1636
|
+
i = 0
|
|
1637
|
+
# Handle special parameters like @, *, ?, $, #, !, -, 0-9
|
|
1638
|
+
if content and content[0] in "@*?$#!-0123456789":
|
|
1639
|
+
param = content[0]
|
|
1640
|
+
i = 1
|
|
1641
|
+
else:
|
|
1642
|
+
# Regular variable name
|
|
1643
|
+
while i < len(content) and (content[i].isalnum() or content[i] == "_"):
|
|
1644
|
+
i += 1
|
|
1645
|
+
param = content[:i]
|
|
1646
|
+
|
|
1647
|
+
# If no operation follows, return simple expansion
|
|
1648
|
+
if i >= len(content):
|
|
1649
|
+
return ParameterExpansionPart(parameter=param)
|
|
1650
|
+
|
|
1651
|
+
rest = content[i:]
|
|
1652
|
+
|
|
1653
|
+
# Handle :- := :? :+ (with colon = check empty too)
|
|
1654
|
+
if rest.startswith(":-"):
|
|
1655
|
+
word = self._parse_word_from_string(rest[2:])
|
|
1656
|
+
return ParameterExpansionPart(
|
|
1657
|
+
parameter=param,
|
|
1658
|
+
operation=DefaultValueOp(word=word, check_empty=True),
|
|
1659
|
+
)
|
|
1660
|
+
if rest.startswith("-"):
|
|
1661
|
+
word = self._parse_word_from_string(rest[1:])
|
|
1662
|
+
return ParameterExpansionPart(
|
|
1663
|
+
parameter=param,
|
|
1664
|
+
operation=DefaultValueOp(word=word, check_empty=False),
|
|
1665
|
+
)
|
|
1666
|
+
if rest.startswith(":="):
|
|
1667
|
+
word = self._parse_word_from_string(rest[2:])
|
|
1668
|
+
return ParameterExpansionPart(
|
|
1669
|
+
parameter=param,
|
|
1670
|
+
operation=AssignDefaultOp(word=word, check_empty=True),
|
|
1671
|
+
)
|
|
1672
|
+
if rest.startswith("="):
|
|
1673
|
+
word = self._parse_word_from_string(rest[1:])
|
|
1674
|
+
return ParameterExpansionPart(
|
|
1675
|
+
parameter=param,
|
|
1676
|
+
operation=AssignDefaultOp(word=word, check_empty=False),
|
|
1677
|
+
)
|
|
1678
|
+
if rest.startswith(":?"):
|
|
1679
|
+
word = self._parse_word_from_string(rest[2:])
|
|
1680
|
+
return ParameterExpansionPart(
|
|
1681
|
+
parameter=param,
|
|
1682
|
+
operation=ErrorIfUnsetOp(word=word, check_empty=True),
|
|
1683
|
+
)
|
|
1684
|
+
if rest.startswith("?"):
|
|
1685
|
+
word = self._parse_word_from_string(rest[1:])
|
|
1686
|
+
return ParameterExpansionPart(
|
|
1687
|
+
parameter=param,
|
|
1688
|
+
operation=ErrorIfUnsetOp(word=word, check_empty=False),
|
|
1689
|
+
)
|
|
1690
|
+
if rest.startswith(":+"):
|
|
1691
|
+
word = self._parse_word_from_string(rest[2:])
|
|
1692
|
+
return ParameterExpansionPart(
|
|
1693
|
+
parameter=param,
|
|
1694
|
+
operation=UseAlternativeOp(word=word, check_empty=True),
|
|
1695
|
+
)
|
|
1696
|
+
if rest.startswith("+"):
|
|
1697
|
+
word = self._parse_word_from_string(rest[1:])
|
|
1698
|
+
return ParameterExpansionPart(
|
|
1699
|
+
parameter=param,
|
|
1700
|
+
operation=UseAlternativeOp(word=word, check_empty=False),
|
|
1701
|
+
)
|
|
1702
|
+
|
|
1703
|
+
# Handle substring ${VAR:offset} or ${VAR:offset:length}
|
|
1704
|
+
if rest.startswith(":"):
|
|
1705
|
+
# Find offset and length
|
|
1706
|
+
parts_str = rest[1:]
|
|
1707
|
+
colon_pos = parts_str.find(":")
|
|
1708
|
+
if colon_pos >= 0:
|
|
1709
|
+
offset_str = parts_str[:colon_pos]
|
|
1710
|
+
length_str = parts_str[colon_pos + 1:]
|
|
1711
|
+
try:
|
|
1712
|
+
offset = int(offset_str) if offset_str else 0
|
|
1713
|
+
length = int(length_str) if length_str else None
|
|
1714
|
+
return ParameterExpansionPart(
|
|
1715
|
+
parameter=param,
|
|
1716
|
+
operation=SubstringOp(offset=offset, length=length),
|
|
1717
|
+
)
|
|
1718
|
+
except ValueError:
|
|
1719
|
+
pass # Not a valid substring, fall through
|
|
1720
|
+
else:
|
|
1721
|
+
try:
|
|
1722
|
+
offset = int(parts_str) if parts_str else 0
|
|
1723
|
+
return ParameterExpansionPart(
|
|
1724
|
+
parameter=param,
|
|
1725
|
+
operation=SubstringOp(offset=offset, length=None),
|
|
1726
|
+
)
|
|
1727
|
+
except ValueError:
|
|
1728
|
+
pass
|
|
1729
|
+
|
|
1730
|
+
# Handle pattern removal ${VAR#pattern} ${VAR##pattern} ${VAR%pattern} ${VAR%%pattern}
|
|
1731
|
+
if rest.startswith("##"):
|
|
1732
|
+
pattern = self._parse_word_from_string(rest[2:])
|
|
1733
|
+
return ParameterExpansionPart(
|
|
1734
|
+
parameter=param,
|
|
1735
|
+
operation=PatternRemovalOp(pattern=pattern, greedy=True, side="prefix"),
|
|
1736
|
+
)
|
|
1737
|
+
if rest.startswith("#"):
|
|
1738
|
+
pattern = self._parse_word_from_string(rest[1:])
|
|
1739
|
+
return ParameterExpansionPart(
|
|
1740
|
+
parameter=param,
|
|
1741
|
+
operation=PatternRemovalOp(pattern=pattern, greedy=False, side="prefix"),
|
|
1742
|
+
)
|
|
1743
|
+
if rest.startswith("%%"):
|
|
1744
|
+
pattern = self._parse_word_from_string(rest[2:])
|
|
1745
|
+
return ParameterExpansionPart(
|
|
1746
|
+
parameter=param,
|
|
1747
|
+
operation=PatternRemovalOp(pattern=pattern, greedy=True, side="suffix"),
|
|
1748
|
+
)
|
|
1749
|
+
if rest.startswith("%"):
|
|
1750
|
+
pattern = self._parse_word_from_string(rest[1:])
|
|
1751
|
+
return ParameterExpansionPart(
|
|
1752
|
+
parameter=param,
|
|
1753
|
+
operation=PatternRemovalOp(pattern=pattern, greedy=False, side="suffix"),
|
|
1754
|
+
)
|
|
1755
|
+
|
|
1756
|
+
# Handle pattern replacement ${VAR/pattern/replacement} ${VAR//pattern/replacement}
|
|
1757
|
+
if rest.startswith("//"):
|
|
1758
|
+
slash_pos = rest.find("/", 2)
|
|
1759
|
+
if slash_pos >= 0:
|
|
1760
|
+
pattern = self._parse_word_from_string(rest[2:slash_pos])
|
|
1761
|
+
replacement = self._parse_word_from_string(rest[slash_pos + 1:])
|
|
1762
|
+
else:
|
|
1763
|
+
pattern = self._parse_word_from_string(rest[2:])
|
|
1764
|
+
replacement = self._parse_word_from_string("")
|
|
1765
|
+
return ParameterExpansionPart(
|
|
1766
|
+
parameter=param,
|
|
1767
|
+
operation=PatternReplacementOp(
|
|
1768
|
+
pattern=pattern, replacement=replacement, replace_all=True
|
|
1769
|
+
),
|
|
1770
|
+
)
|
|
1771
|
+
if rest.startswith("/"):
|
|
1772
|
+
slash_pos = rest.find("/", 1)
|
|
1773
|
+
if slash_pos >= 0:
|
|
1774
|
+
pattern = self._parse_word_from_string(rest[1:slash_pos])
|
|
1775
|
+
replacement = self._parse_word_from_string(rest[slash_pos + 1:])
|
|
1776
|
+
else:
|
|
1777
|
+
pattern = self._parse_word_from_string(rest[1:])
|
|
1778
|
+
replacement = self._parse_word_from_string("")
|
|
1779
|
+
return ParameterExpansionPart(
|
|
1780
|
+
parameter=param,
|
|
1781
|
+
operation=PatternReplacementOp(
|
|
1782
|
+
pattern=pattern, replacement=replacement, replace_all=False
|
|
1783
|
+
),
|
|
1784
|
+
)
|
|
1785
|
+
|
|
1786
|
+
# Handle case modification ${VAR^} ${VAR^^} ${VAR,} ${VAR,,}
|
|
1787
|
+
if rest.startswith("^^"):
|
|
1788
|
+
return ParameterExpansionPart(
|
|
1789
|
+
parameter=param,
|
|
1790
|
+
operation=CaseModificationOp(direction="upper", all=True),
|
|
1791
|
+
)
|
|
1792
|
+
if rest.startswith("^"):
|
|
1793
|
+
return ParameterExpansionPart(
|
|
1794
|
+
parameter=param,
|
|
1795
|
+
operation=CaseModificationOp(direction="upper", all=False),
|
|
1796
|
+
)
|
|
1797
|
+
if rest.startswith(",,"):
|
|
1798
|
+
return ParameterExpansionPart(
|
|
1799
|
+
parameter=param,
|
|
1800
|
+
operation=CaseModificationOp(direction="lower", all=True),
|
|
1801
|
+
)
|
|
1802
|
+
if rest.startswith(","):
|
|
1803
|
+
return ParameterExpansionPart(
|
|
1804
|
+
parameter=param,
|
|
1805
|
+
operation=CaseModificationOp(direction="lower", all=False),
|
|
1806
|
+
)
|
|
1807
|
+
|
|
1808
|
+
# Handle transforms ${VAR@Q} ${VAR@a} ${VAR@A} ${VAR@E} ${VAR@P} ${VAR@K}
|
|
1809
|
+
if rest.startswith("@") and len(rest) >= 2 and rest[1] in "QaAEPK":
|
|
1810
|
+
op_char = rest[1]
|
|
1811
|
+
return ParameterExpansionPart(
|
|
1812
|
+
parameter=param,
|
|
1813
|
+
operation=TransformOp(operator=op_char),
|
|
1814
|
+
)
|
|
1815
|
+
|
|
1816
|
+
# Default: treat the whole thing as parameter name (for compatibility)
|
|
1817
|
+
return ParameterExpansionPart(parameter=content)
|
|
1818
|
+
|
|
1819
|
+
def _parse_arithmetic_expression(self, expr: str) -> ArithExpr:
|
|
1820
|
+
"""Parse an arithmetic expression string into an ArithExpr AST.
|
|
1821
|
+
|
|
1822
|
+
This is a simple recursive descent parser supporting:
|
|
1823
|
+
- Numbers (integers)
|
|
1824
|
+
- Variables
|
|
1825
|
+
- Binary operators: + - * / % ** < > <= >= == != && || & | ^ ,
|
|
1826
|
+
- Unary operators: - + ! ~ ++ --
|
|
1827
|
+
- Parentheses
|
|
1828
|
+
- Ternary: cond ? a : b
|
|
1829
|
+
"""
|
|
1830
|
+
expr = expr.strip()
|
|
1831
|
+
if not expr:
|
|
1832
|
+
return ArithNumberNode(value=0)
|
|
1833
|
+
|
|
1834
|
+
return self._parse_arith_comma(expr)
|
|
1835
|
+
|
|
1836
|
+
def _parse_arith_comma(self, expr: str) -> ArithExpr:
|
|
1837
|
+
"""Parse comma operator (lowest precedence, left-to-right)."""
|
|
1838
|
+
return self._parse_arith_binary(expr, [','], self._parse_arith_assignment)
|
|
1839
|
+
|
|
1840
|
+
def _parse_arith_assignment(self, expr: str) -> ArithExpr:
|
|
1841
|
+
"""Parse assignment operators: = += -= *= /= %= <<= >>= &= |= ^="""
|
|
1842
|
+
expr = expr.strip()
|
|
1843
|
+
# Assignment operators (right-to-left, check longest first)
|
|
1844
|
+
assign_ops = ['<<=', '>>=', '+=', '-=', '*=', '/=', '%=', '&=', '|=', '^=', '=']
|
|
1845
|
+
|
|
1846
|
+
# Scan right-to-left for assignment operator (right-associative)
|
|
1847
|
+
depth = 0
|
|
1848
|
+
for i in range(len(expr) - 1, -1, -1):
|
|
1849
|
+
c = expr[i]
|
|
1850
|
+
if c == ')':
|
|
1851
|
+
depth += 1
|
|
1852
|
+
elif c == '(':
|
|
1853
|
+
depth -= 1
|
|
1854
|
+
elif depth == 0:
|
|
1855
|
+
for op in assign_ops:
|
|
1856
|
+
op_start = i - len(op) + 1
|
|
1857
|
+
if op_start >= 0 and expr[op_start:i + 1] == op:
|
|
1858
|
+
# Make sure it's not == or != or <= or >=
|
|
1859
|
+
if op == '=' and op_start > 0 and expr[op_start - 1] in '=!<>':
|
|
1860
|
+
continue
|
|
1861
|
+
left = expr[:op_start].strip()
|
|
1862
|
+
right = expr[i + 1:].strip()
|
|
1863
|
+
if left and right:
|
|
1864
|
+
# Left must be a variable name (or array access)
|
|
1865
|
+
var_match = re.match(r'^([a-zA-Z_][a-zA-Z0-9_]*)(\[.+\])?$', left)
|
|
1866
|
+
if var_match:
|
|
1867
|
+
var_name = var_match.group(1)
|
|
1868
|
+
subscript = var_match.group(2)
|
|
1869
|
+
subscript_expr = None
|
|
1870
|
+
if subscript:
|
|
1871
|
+
subscript_expr = self._parse_arith_ternary(subscript[1:-1])
|
|
1872
|
+
value_expr = self._parse_arith_assignment(right)
|
|
1873
|
+
return ArithAssignmentNode(
|
|
1874
|
+
operator=op,
|
|
1875
|
+
variable=var_name,
|
|
1876
|
+
subscript=subscript_expr,
|
|
1877
|
+
value=value_expr
|
|
1878
|
+
)
|
|
1879
|
+
return self._parse_arith_ternary(expr)
|
|
1880
|
+
|
|
1881
|
+
def _parse_arith_ternary(self, expr: str) -> ArithExpr:
|
|
1882
|
+
"""Parse ternary: cond ? a : b"""
|
|
1883
|
+
# Find unquoted ? and : for ternary
|
|
1884
|
+
depth = 0
|
|
1885
|
+
question_pos = -1
|
|
1886
|
+
for i, c in enumerate(expr):
|
|
1887
|
+
if c == '(':
|
|
1888
|
+
depth += 1
|
|
1889
|
+
elif c == ')':
|
|
1890
|
+
depth -= 1
|
|
1891
|
+
elif c == '?' and depth == 0:
|
|
1892
|
+
question_pos = i
|
|
1893
|
+
break
|
|
1894
|
+
|
|
1895
|
+
if question_pos > 0:
|
|
1896
|
+
# Find the matching : (must track nested ternary depth)
|
|
1897
|
+
colon_pos = -1
|
|
1898
|
+
ternary_depth = 0
|
|
1899
|
+
paren_depth = 0
|
|
1900
|
+
for i in range(question_pos + 1, len(expr)):
|
|
1901
|
+
c = expr[i]
|
|
1902
|
+
if c == '(':
|
|
1903
|
+
paren_depth += 1
|
|
1904
|
+
elif c == ')':
|
|
1905
|
+
paren_depth -= 1
|
|
1906
|
+
elif c == '?' and paren_depth == 0:
|
|
1907
|
+
ternary_depth += 1 # Nested ternary
|
|
1908
|
+
elif c == ':' and paren_depth == 0:
|
|
1909
|
+
if ternary_depth > 0:
|
|
1910
|
+
ternary_depth -= 1 # Close nested ternary
|
|
1911
|
+
else:
|
|
1912
|
+
colon_pos = i
|
|
1913
|
+
break
|
|
1914
|
+
|
|
1915
|
+
if colon_pos > 0:
|
|
1916
|
+
condition = self._parse_arith_or(expr[:question_pos].strip())
|
|
1917
|
+
consequent = self._parse_arith_ternary(expr[question_pos + 1:colon_pos].strip())
|
|
1918
|
+
alternate = self._parse_arith_ternary(expr[colon_pos + 1:].strip())
|
|
1919
|
+
return ArithTernaryNode(condition=condition, consequent=consequent, alternate=alternate)
|
|
1920
|
+
|
|
1921
|
+
return self._parse_arith_or(expr)
|
|
1922
|
+
|
|
1923
|
+
def _parse_arith_or(self, expr: str) -> ArithExpr:
|
|
1924
|
+
"""Parse ||"""
|
|
1925
|
+
return self._parse_arith_binary(expr, ['||'], self._parse_arith_and)
|
|
1926
|
+
|
|
1927
|
+
def _parse_arith_and(self, expr: str) -> ArithExpr:
|
|
1928
|
+
"""Parse &&"""
|
|
1929
|
+
return self._parse_arith_binary(expr, ['&&'], self._parse_arith_bitor)
|
|
1930
|
+
|
|
1931
|
+
def _parse_arith_bitor(self, expr: str) -> ArithExpr:
|
|
1932
|
+
"""Parse |"""
|
|
1933
|
+
return self._parse_arith_binary(expr, ['|'], self._parse_arith_bitxor, exclude=['||'])
|
|
1934
|
+
|
|
1935
|
+
def _parse_arith_bitxor(self, expr: str) -> ArithExpr:
|
|
1936
|
+
"""Parse ^"""
|
|
1937
|
+
return self._parse_arith_binary(expr, ['^'], self._parse_arith_bitand)
|
|
1938
|
+
|
|
1939
|
+
def _parse_arith_bitand(self, expr: str) -> ArithExpr:
|
|
1940
|
+
"""Parse &"""
|
|
1941
|
+
return self._parse_arith_binary(expr, ['&'], self._parse_arith_equality, exclude=['&&'])
|
|
1942
|
+
|
|
1943
|
+
def _parse_arith_equality(self, expr: str) -> ArithExpr:
|
|
1944
|
+
"""Parse == !="""
|
|
1945
|
+
return self._parse_arith_binary(expr, ['==', '!='], self._parse_arith_comparison)
|
|
1946
|
+
|
|
1947
|
+
def _parse_arith_comparison(self, expr: str) -> ArithExpr:
|
|
1948
|
+
"""Parse < > <= >="""
|
|
1949
|
+
return self._parse_arith_binary(expr, ['<=', '>=', '<', '>'], self._parse_arith_shift, exclude=['<<', '>>'])
|
|
1950
|
+
|
|
1951
|
+
def _parse_arith_shift(self, expr: str) -> ArithExpr:
|
|
1952
|
+
"""Parse << >>"""
|
|
1953
|
+
return self._parse_arith_binary(expr, ['<<', '>>'], self._parse_arith_additive)
|
|
1954
|
+
|
|
1955
|
+
def _parse_arith_additive(self, expr: str) -> ArithExpr:
|
|
1956
|
+
"""Parse + -"""
|
|
1957
|
+
return self._parse_arith_binary(expr, ['+', '-'], self._parse_arith_multiplicative, exclude=['++', '--'])
|
|
1958
|
+
|
|
1959
|
+
def _parse_arith_multiplicative(self, expr: str) -> ArithExpr:
|
|
1960
|
+
"""Parse * / %"""
|
|
1961
|
+
return self._parse_arith_binary(expr, ['*', '/', '%'], self._parse_arith_power, exclude=['**'])
|
|
1962
|
+
|
|
1963
|
+
def _parse_arith_power(self, expr: str) -> ArithExpr:
|
|
1964
|
+
"""Parse ** (right associative)"""
|
|
1965
|
+
return self._parse_arith_binary(expr, ['**'], self._parse_arith_unary, right_assoc=True)
|
|
1966
|
+
|
|
1967
|
+
def _parse_arith_binary(self, expr: str, operators: list[str], next_level,
|
|
1968
|
+
exclude: list[str] | None = None, right_assoc: bool = False) -> ArithExpr:
|
|
1969
|
+
"""Parse binary operators at a given precedence level."""
|
|
1970
|
+
expr = expr.strip()
|
|
1971
|
+
depth = 0
|
|
1972
|
+
|
|
1973
|
+
# Sort operators by length (longest first) to match ** before *
|
|
1974
|
+
ops = sorted(operators, key=len, reverse=True)
|
|
1975
|
+
exclude = exclude or []
|
|
1976
|
+
|
|
1977
|
+
# Scan for operator (right-to-left for left-assoc, left-to-right for right-assoc)
|
|
1978
|
+
positions = []
|
|
1979
|
+
i = 0
|
|
1980
|
+
while i < len(expr):
|
|
1981
|
+
c = expr[i]
|
|
1982
|
+
if c == '(':
|
|
1983
|
+
depth += 1
|
|
1984
|
+
i += 1
|
|
1985
|
+
elif c == ')':
|
|
1986
|
+
depth -= 1
|
|
1987
|
+
i += 1
|
|
1988
|
+
elif depth == 0:
|
|
1989
|
+
# First check exclusions - skip past them entirely
|
|
1990
|
+
skip_len = 0
|
|
1991
|
+
for ex in exclude:
|
|
1992
|
+
if expr[i:i+len(ex)] == ex:
|
|
1993
|
+
skip_len = len(ex)
|
|
1994
|
+
break
|
|
1995
|
+
if skip_len:
|
|
1996
|
+
i += skip_len
|
|
1997
|
+
continue
|
|
1998
|
+
# Check for operators
|
|
1999
|
+
matched = False
|
|
2000
|
+
for op in ops:
|
|
2001
|
+
if expr[i:i+len(op)] == op:
|
|
2002
|
+
positions.append((i, op))
|
|
2003
|
+
matched = True
|
|
2004
|
+
i += len(op)
|
|
2005
|
+
break
|
|
2006
|
+
if not matched:
|
|
2007
|
+
i += 1
|
|
2008
|
+
else:
|
|
2009
|
+
i += 1
|
|
2010
|
+
|
|
2011
|
+
if positions:
|
|
2012
|
+
# For left-associative, take rightmost; for right-associative, take leftmost
|
|
2013
|
+
pos, op = positions[-1] if not right_assoc else positions[0]
|
|
2014
|
+
left = expr[:pos].strip()
|
|
2015
|
+
right = expr[pos + len(op):].strip()
|
|
2016
|
+
if left and right:
|
|
2017
|
+
left_node = self._parse_arith_binary(left, operators, next_level, exclude, right_assoc) if not right_assoc else next_level(left)
|
|
2018
|
+
right_node = next_level(right) if not right_assoc else self._parse_arith_binary(right, operators, next_level, exclude, right_assoc)
|
|
2019
|
+
return ArithBinaryNode(operator=op, left=left_node, right=right_node)
|
|
2020
|
+
|
|
2021
|
+
return next_level(expr)
|
|
2022
|
+
|
|
2023
|
+
def _parse_arith_unary(self, expr: str) -> ArithExpr:
|
|
2024
|
+
"""Parse unary operators: - + ! ~ ++ --"""
|
|
2025
|
+
expr = expr.strip()
|
|
2026
|
+
# Pre-increment/decrement (must check before single +/-)
|
|
2027
|
+
if expr.startswith('++'):
|
|
2028
|
+
operand = self._parse_arith_unary(expr[2:].strip())
|
|
2029
|
+
return ArithUnaryNode(operator='++', operand=operand, prefix=True)
|
|
2030
|
+
if expr.startswith('--'):
|
|
2031
|
+
operand = self._parse_arith_unary(expr[2:].strip())
|
|
2032
|
+
return ArithUnaryNode(operator='--', operand=operand, prefix=True)
|
|
2033
|
+
if expr.startswith('-') and not expr[1:].lstrip().startswith('-'):
|
|
2034
|
+
operand = self._parse_arith_unary(expr[1:].strip())
|
|
2035
|
+
return ArithUnaryNode(operator='-', operand=operand, prefix=True)
|
|
2036
|
+
if expr.startswith('+') and len(expr) > 1:
|
|
2037
|
+
operand = self._parse_arith_unary(expr[1:].strip())
|
|
2038
|
+
return ArithUnaryNode(operator='+', operand=operand, prefix=True)
|
|
2039
|
+
if expr.startswith('!'):
|
|
2040
|
+
operand = self._parse_arith_unary(expr[1:].strip())
|
|
2041
|
+
return ArithUnaryNode(operator='!', operand=operand, prefix=True)
|
|
2042
|
+
if expr.startswith('~'):
|
|
2043
|
+
operand = self._parse_arith_unary(expr[1:].strip())
|
|
2044
|
+
return ArithUnaryNode(operator='~', operand=operand, prefix=True)
|
|
2045
|
+
return self._parse_arith_postfix(expr)
|
|
2046
|
+
|
|
2047
|
+
def _parse_arith_postfix(self, expr: str) -> ArithExpr:
|
|
2048
|
+
"""Parse postfix operators: ++ --"""
|
|
2049
|
+
expr = expr.strip()
|
|
2050
|
+
# Post-increment/decrement
|
|
2051
|
+
if expr.endswith('++'):
|
|
2052
|
+
operand = self._parse_arith_primary(expr[:-2].strip())
|
|
2053
|
+
return ArithUnaryNode(operator='++', operand=operand, prefix=False)
|
|
2054
|
+
if expr.endswith('--'):
|
|
2055
|
+
operand = self._parse_arith_primary(expr[:-2].strip())
|
|
2056
|
+
return ArithUnaryNode(operator='--', operand=operand, prefix=False)
|
|
2057
|
+
return self._parse_arith_primary(expr)
|
|
2058
|
+
|
|
2059
|
+
def _parse_arith_primary(self, expr: str) -> ArithExpr:
|
|
2060
|
+
"""Parse primary: numbers, variables, parentheses."""
|
|
2061
|
+
expr = expr.strip()
|
|
2062
|
+
|
|
2063
|
+
# Empty expression
|
|
2064
|
+
if not expr:
|
|
2065
|
+
return ArithNumberNode(value=0)
|
|
2066
|
+
|
|
2067
|
+
# Parenthesized expression
|
|
2068
|
+
if expr.startswith('(') and expr.endswith(')'):
|
|
2069
|
+
inner = expr[1:-1].strip()
|
|
2070
|
+
return ArithGroupNode(expression=self._parse_arith_ternary(inner))
|
|
2071
|
+
|
|
2072
|
+
# Number
|
|
2073
|
+
if expr.isdigit() or (expr.startswith('-') and expr[1:].isdigit()):
|
|
2074
|
+
return ArithNumberNode(value=int(expr))
|
|
2075
|
+
|
|
2076
|
+
# Hex number
|
|
2077
|
+
if expr.startswith('0x') or expr.startswith('0X'):
|
|
2078
|
+
try:
|
|
2079
|
+
return ArithNumberNode(value=int(expr, 16))
|
|
2080
|
+
except ValueError:
|
|
2081
|
+
pass
|
|
2082
|
+
|
|
2083
|
+
# Octal number
|
|
2084
|
+
if expr.startswith('0') and len(expr) > 1 and expr[1:].isdigit():
|
|
2085
|
+
try:
|
|
2086
|
+
return ArithNumberNode(value=int(expr, 8))
|
|
2087
|
+
except ValueError:
|
|
2088
|
+
pass
|
|
2089
|
+
|
|
2090
|
+
# Base N constant: base#value (e.g., 2#101, 16#ff, 36#z)
|
|
2091
|
+
base_match = re.match(r'^(\d+)#([a-zA-Z0-9@_]+)$', expr)
|
|
2092
|
+
if base_match:
|
|
2093
|
+
base = int(base_match.group(1))
|
|
2094
|
+
value_str = base_match.group(2) # Keep case for bases > 36
|
|
2095
|
+
if 2 <= base <= 64:
|
|
2096
|
+
try:
|
|
2097
|
+
result = self._parse_base_n_value(value_str, base)
|
|
2098
|
+
return ArithNumberNode(value=result)
|
|
2099
|
+
except ValueError:
|
|
2100
|
+
pass
|
|
2101
|
+
|
|
2102
|
+
# Variable (possibly with $)
|
|
2103
|
+
var_name = expr
|
|
2104
|
+
if var_name.startswith('$'):
|
|
2105
|
+
var_name = var_name[1:]
|
|
2106
|
+
if var_name.startswith('{') and var_name.endswith('}'):
|
|
2107
|
+
var_name = var_name[1:-1]
|
|
2108
|
+
|
|
2109
|
+
# Check if it's a valid identifier
|
|
2110
|
+
if var_name and (var_name[0].isalpha() or var_name[0] == '_'):
|
|
2111
|
+
if all(c.isalnum() or c == '_' for c in var_name):
|
|
2112
|
+
return ArithVariableNode(name=var_name)
|
|
2113
|
+
|
|
2114
|
+
# Try as number anyway
|
|
2115
|
+
try:
|
|
2116
|
+
return ArithNumberNode(value=int(expr))
|
|
2117
|
+
except ValueError:
|
|
2118
|
+
pass
|
|
2119
|
+
|
|
2120
|
+
# Fallback: treat as variable
|
|
2121
|
+
return ArithVariableNode(name=expr)
|
|
2122
|
+
|
|
2123
|
+
def _parse_base_n_value(self, value_str: str, base: int) -> int:
|
|
2124
|
+
"""Parse a value in base N (2-64).
|
|
2125
|
+
|
|
2126
|
+
Digits:
|
|
2127
|
+
- 0-9 = values 0-9
|
|
2128
|
+
- a-z = values 10-35
|
|
2129
|
+
- A-Z = values 36-61 (or 10-35 if base <= 36)
|
|
2130
|
+
- @ = 62, _ = 63
|
|
2131
|
+
"""
|
|
2132
|
+
result = 0
|
|
2133
|
+
for char in value_str:
|
|
2134
|
+
if char.isdigit():
|
|
2135
|
+
digit = int(char)
|
|
2136
|
+
elif 'a' <= char <= 'z':
|
|
2137
|
+
digit = ord(char) - ord('a') + 10
|
|
2138
|
+
elif 'A' <= char <= 'Z':
|
|
2139
|
+
if base <= 36:
|
|
2140
|
+
# Case insensitive for bases <= 36
|
|
2141
|
+
digit = ord(char.lower()) - ord('a') + 10
|
|
2142
|
+
else:
|
|
2143
|
+
# A-Z are 36-61 for bases > 36
|
|
2144
|
+
digit = ord(char) - ord('A') + 36
|
|
2145
|
+
elif char == '@':
|
|
2146
|
+
digit = 62
|
|
2147
|
+
elif char == '_':
|
|
2148
|
+
digit = 63
|
|
2149
|
+
else:
|
|
2150
|
+
raise ValueError(f"Invalid digit {char} for base {base}")
|
|
2151
|
+
|
|
2152
|
+
if digit >= base:
|
|
2153
|
+
raise ValueError(f"Digit {char} out of range for base {base}")
|
|
2154
|
+
|
|
2155
|
+
result = result * base + digit
|
|
2156
|
+
return result
|
|
2157
|
+
|
|
2158
|
+
|
|
2159
|
+
def parse(input_text: str) -> ScriptNode:
|
|
2160
|
+
"""Convenience function to parse input."""
|
|
2161
|
+
parser = Parser()
|
|
2162
|
+
return parser.parse(input_text)
|