rubish-gem 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.dockerignore +23 -0
- data/Dockerfile +54 -0
- data/LICENSE.txt +21 -0
- data/README.md +39 -0
- data/Rakefile +12 -0
- data/lib/rubish/arithmetic.rb +140 -0
- data/lib/rubish/ast.rb +168 -0
- data/lib/rubish/builtins/arithmetic.rb +129 -0
- data/lib/rubish/builtins/bind_readline.rb +834 -0
- data/lib/rubish/builtins/directory_stack.rb +182 -0
- data/lib/rubish/builtins/echo_printf.rb +510 -0
- data/lib/rubish/builtins/hash_directories.rb +260 -0
- data/lib/rubish/builtins/read.rb +299 -0
- data/lib/rubish/builtins/trap.rb +324 -0
- data/lib/rubish/codegen.rb +1273 -0
- data/lib/rubish/completion.rb +840 -0
- data/lib/rubish/completions/bash_helpers.rb +530 -0
- data/lib/rubish/completions/git.rb +431 -0
- data/lib/rubish/completions/help_parser.rb +453 -0
- data/lib/rubish/completions/ssh.rb +114 -0
- data/lib/rubish/config.rb +267 -0
- data/lib/rubish/data/builtin_help.rb +716 -0
- data/lib/rubish/data/completion_data.rb +53 -0
- data/lib/rubish/data/readline_config.rb +47 -0
- data/lib/rubish/data/shell_options.rb +251 -0
- data/lib/rubish/data_define.rb +65 -0
- data/lib/rubish/execution_context.rb +1124 -0
- data/lib/rubish/expansion.rb +988 -0
- data/lib/rubish/history.rb +663 -0
- data/lib/rubish/lazy_loader.rb +127 -0
- data/lib/rubish/lexer.rb +1194 -0
- data/lib/rubish/parser.rb +1167 -0
- data/lib/rubish/prompt.rb +766 -0
- data/lib/rubish/repl.rb +2267 -0
- data/lib/rubish/runtime/builtins.rb +7222 -0
- data/lib/rubish/runtime/command.rb +1153 -0
- data/lib/rubish/runtime/job.rb +153 -0
- data/lib/rubish/runtime.rb +1169 -0
- data/lib/rubish/shell_state.rb +241 -0
- data/lib/rubish/startup_profiler.rb +67 -0
- data/lib/rubish/version.rb +5 -0
- data/lib/rubish.rb +60 -0
- data/sig/rubish.rbs +4 -0
- metadata +85 -0
data/lib/rubish/lexer.rb
ADDED
|
@@ -0,0 +1,1194 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Rubish
|
|
4
|
+
class Lexer
|
|
5
|
+
Token = Data.define(:type, :value)
|
|
6
|
+
|
|
7
|
+
OPERATORS = {
|
|
8
|
+
'|' => :PIPE,
|
|
9
|
+
'|&' => :PIPE_BOTH, # Pipe stdout and stderr: cmd1 |& cmd2 = cmd1 2>&1 | cmd2
|
|
10
|
+
';' => :SEMICOLON,
|
|
11
|
+
';;' => :DOUBLE_SEMI, # For case statement pattern terminators
|
|
12
|
+
';&' => :CASE_FALL, # Case fall-through (execute next pattern)
|
|
13
|
+
';;&' => :CASE_CONT, # Case continue (test next pattern)
|
|
14
|
+
'&' => :AMPERSAND,
|
|
15
|
+
'>' => :REDIRECT_OUT,
|
|
16
|
+
'>|' => :REDIRECT_CLOBBER, # Force overwrite even with noclobber
|
|
17
|
+
'>>' => :REDIRECT_APPEND,
|
|
18
|
+
'<' => :REDIRECT_IN,
|
|
19
|
+
'<<' => :HEREDOC, # Here document
|
|
20
|
+
'<<-' => :HEREDOC_INDENT, # Here document with indented delimiter
|
|
21
|
+
'<<<' => :HERESTRING, # Here string
|
|
22
|
+
'2>' => :REDIRECT_ERR,
|
|
23
|
+
'>&' => :DUP_OUT, # Duplicate output FD
|
|
24
|
+
'<&' => :DUP_IN, # Duplicate input FD
|
|
25
|
+
'&&' => :AND,
|
|
26
|
+
'||' => :OR,
|
|
27
|
+
'(' => :LPAREN,
|
|
28
|
+
')' => :RPAREN,
|
|
29
|
+
'()' => :PARENS, # For function definitions: name() { }
|
|
30
|
+
'{' => :LBRACE,
|
|
31
|
+
'}' => :RBRACE
|
|
32
|
+
}.freeze
|
|
33
|
+
|
|
34
|
+
KEYWORDS = {
|
|
35
|
+
'if' => :IF,
|
|
36
|
+
'unless' => :UNLESS,
|
|
37
|
+
'then' => :THEN,
|
|
38
|
+
'else' => :ELSE,
|
|
39
|
+
'elif' => :ELIF,
|
|
40
|
+
'elsif' => :ELSIF,
|
|
41
|
+
'fi' => :FI,
|
|
42
|
+
'while' => :WHILE,
|
|
43
|
+
'until' => :UNTIL,
|
|
44
|
+
'for' => :FOR,
|
|
45
|
+
'select' => :SELECT,
|
|
46
|
+
'function' => :FUNCTION,
|
|
47
|
+
'def' => :DEF,
|
|
48
|
+
'case' => :CASE,
|
|
49
|
+
'when' => :WHEN,
|
|
50
|
+
'esac' => :ESAC,
|
|
51
|
+
'coproc' => :COPROC,
|
|
52
|
+
'time' => :TIME,
|
|
53
|
+
'lazy_load' => :LAZY_LOAD
|
|
54
|
+
# Note: 'do', 'done', 'in', 'end' are handled as WORD tokens and checked by parser
|
|
55
|
+
# to allow them as command arguments (e.g., "echo done")
|
|
56
|
+
}.freeze
|
|
57
|
+
|
|
58
|
+
def initialize(input)
|
|
59
|
+
@input = input
|
|
60
|
+
@pos = 0
|
|
61
|
+
@last_token_type = nil
|
|
62
|
+
@last_word_value = nil
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def tokenize
|
|
66
|
+
tokens = []
|
|
67
|
+
while @pos < @input.length
|
|
68
|
+
skip_whitespace
|
|
69
|
+
break if @pos >= @input.length
|
|
70
|
+
|
|
71
|
+
token = read_token
|
|
72
|
+
if token
|
|
73
|
+
tokens << token
|
|
74
|
+
@last_token_type = token.type
|
|
75
|
+
# Track word value for block detection (also SELECT for filtering select)
|
|
76
|
+
@last_word_value = token.value if token.type == :WORD || token.type == :SELECT
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
tokens
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
private
|
|
83
|
+
|
|
84
|
+
def skip_whitespace
|
|
85
|
+
# Only skip spaces and tabs, not newlines
|
|
86
|
+
# Newlines act as command separators (like semicolons)
|
|
87
|
+
@pos += 1 while @pos < @input.length && @input[@pos] =~ /[ \t]/
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def skip_newlines
|
|
91
|
+
# Skip consecutive newlines (used after reading a newline as separator)
|
|
92
|
+
@pos += 1 while @pos < @input.length && @input[@pos] == "\n"
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def read_token
|
|
96
|
+
# Handle newlines as command separators (like semicolons)
|
|
97
|
+
# Collapse consecutive newlines into one separator
|
|
98
|
+
if @input[@pos] == "\n"
|
|
99
|
+
skip_newlines
|
|
100
|
+
skip_whitespace
|
|
101
|
+
# Don't emit separator if we're at EOF or if previous token was already a separator
|
|
102
|
+
return nil if @pos >= @input.length
|
|
103
|
+
return nil if @last_token_type == :SEMICOLON
|
|
104
|
+
return Token.new(:SEMICOLON, "\n")
|
|
105
|
+
end
|
|
106
|
+
# Check for multi-char operators first
|
|
107
|
+
three_char = @input[@pos, 3]
|
|
108
|
+
if three_char == '<<<'
|
|
109
|
+
@pos += 3
|
|
110
|
+
return read_herestring
|
|
111
|
+
elsif three_char == '<<-'
|
|
112
|
+
@pos += 3
|
|
113
|
+
return read_heredoc_delimiter(:HEREDOC_INDENT)
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
# Check for {varname} redirection pattern: {fd}>file, {fd}<file, etc.
|
|
117
|
+
if @input[@pos] == '{' && looks_like_varname_redirect?
|
|
118
|
+
return read_varname_redirect
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
two_char = @input[@pos, 2]
|
|
122
|
+
if two_char == '<<'
|
|
123
|
+
@pos += 2
|
|
124
|
+
return read_heredoc_delimiter(:HEREDOC)
|
|
125
|
+
end
|
|
126
|
+
# Arithmetic command (( )) - only when in command position
|
|
127
|
+
# Distinguish from nested subshell: ((cmd)) vs (( expr ))
|
|
128
|
+
# If followed by a word then space (like "echo "), it's likely a nested subshell
|
|
129
|
+
if two_char == '(('
|
|
130
|
+
# Look ahead to see if this looks like an arithmetic expression
|
|
131
|
+
# Skip whitespace to find what comes after ((
|
|
132
|
+
lookahead_pos = @pos + 2
|
|
133
|
+
while lookahead_pos < @input.length && @input[lookahead_pos] =~ /[ \t]/
|
|
134
|
+
lookahead_pos += 1
|
|
135
|
+
end
|
|
136
|
+
# Arithmetic expressions start with: number, variable (optionally with $),
|
|
137
|
+
# unary operators (!, -, ~, ++, --), or (
|
|
138
|
+
# Commands start with: letter followed by space, or are builtins like echo, cd, etc.
|
|
139
|
+
first_content = @input[lookahead_pos, 30] || ''
|
|
140
|
+
# It's arithmetic if it starts with:
|
|
141
|
+
# - A number (possibly negative)
|
|
142
|
+
# - $ (variable reference)
|
|
143
|
+
# - ! or ~ (unary operators)
|
|
144
|
+
# - identifier followed by arithmetic operator (=, +, -, ++, --, *, /, etc.)
|
|
145
|
+
# - ( followed by space or non-alpha (grouped expression, not command)
|
|
146
|
+
# Note: We must NOT match patterns like ((abc)(123)) which is regex grouping
|
|
147
|
+
is_arithmetic = case first_content
|
|
148
|
+
when /\A-?\d/ then true # Number
|
|
149
|
+
when /\A\$/ then true # Variable reference
|
|
150
|
+
when /\A[!~]/ then true # Unary operators
|
|
151
|
+
when /\A(\+\+|--)[a-zA-Z_]/ then true # Pre-increment/decrement
|
|
152
|
+
when /\A[a-zA-Z_][a-zA-Z0-9_]*\s*(\+\+|--|[=+\-*\/%<>&|^]=?|\[)/ then true # Identifier with operator
|
|
153
|
+
when /\A\(\s*[\d$!~(+-]/ then true # Grouped expression starting with arith
|
|
154
|
+
when /\A;/ then true # Empty init in for ((; cond; update))
|
|
155
|
+
else false
|
|
156
|
+
end
|
|
157
|
+
if is_arithmetic
|
|
158
|
+
@pos += 2
|
|
159
|
+
return read_arithmetic_command
|
|
160
|
+
end
|
|
161
|
+
# Otherwise fall through to handle as nested subshells
|
|
162
|
+
end
|
|
163
|
+
# Extended test command [[ ]] - only when in command position
|
|
164
|
+
# Not when it's a nested array like [[1, 2], [3, 4]]
|
|
165
|
+
if two_char == '[['
|
|
166
|
+
# Check if followed by space (conditional) or digit/quote (array)
|
|
167
|
+
next_char = @input[@pos + 2]
|
|
168
|
+
if next_char.nil? || next_char =~ /[\s\-!]/
|
|
169
|
+
@pos += 2
|
|
170
|
+
return Token.new(:DOUBLE_LBRACKET, '[[')
|
|
171
|
+
end
|
|
172
|
+
# Otherwise it's a nested array, fall through to array handling
|
|
173
|
+
end
|
|
174
|
+
if two_char == ']]'
|
|
175
|
+
@pos += 2
|
|
176
|
+
return Token.new(:DOUBLE_RBRACKET, ']]')
|
|
177
|
+
end
|
|
178
|
+
# Process substitution: <(...) and >(...)
|
|
179
|
+
if two_char == '<('
|
|
180
|
+
return read_process_substitution(:PROC_SUB_IN)
|
|
181
|
+
end
|
|
182
|
+
if two_char == '>('
|
|
183
|
+
return read_process_substitution(:PROC_SUB_OUT)
|
|
184
|
+
end
|
|
185
|
+
# Check for three-char operators first: ;;&
|
|
186
|
+
three_char_op = @input[@pos, 3]
|
|
187
|
+
if three_char_op == ';;&'
|
|
188
|
+
@pos += 3
|
|
189
|
+
return Token.new(:CASE_CONT, ';;&')
|
|
190
|
+
end
|
|
191
|
+
if %w[>> >| 2> >& <& && || () ;; ;& |&].include?(two_char)
|
|
192
|
+
@pos += 2
|
|
193
|
+
return Token.new(OPERATORS[two_char], two_char)
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
# Single char operators
|
|
197
|
+
# Note: () is handled above as two-char for function defs, so ( here is for subshells
|
|
198
|
+
char = @input[@pos]
|
|
199
|
+
if %w[| ; & > ) (].include?(char)
|
|
200
|
+
@pos += 1
|
|
201
|
+
return Token.new(OPERATORS[char], char)
|
|
202
|
+
end
|
|
203
|
+
# < alone is redirect in (heredocs handled above)
|
|
204
|
+
if char == '<'
|
|
205
|
+
@pos += 1
|
|
206
|
+
return Token.new(:REDIRECT_IN, char)
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
# Ruby literals
|
|
210
|
+
case char
|
|
211
|
+
when '['
|
|
212
|
+
# Check if this is a command [ (test) or an array literal
|
|
213
|
+
# [ as command is followed by space, array literal is not
|
|
214
|
+
if @input[@pos + 1] =~ /[\s]/
|
|
215
|
+
@pos += 1
|
|
216
|
+
return Token.new(:WORD, '[')
|
|
217
|
+
end
|
|
218
|
+
# Check if this is a glob pattern like [abc]file vs array [1, 2, 3]
|
|
219
|
+
# Glob pattern: [chars] followed by more word characters
|
|
220
|
+
# Array: [value, value, ...] with commas inside
|
|
221
|
+
if looks_like_glob_bracket?
|
|
222
|
+
read_word
|
|
223
|
+
else
|
|
224
|
+
read_array
|
|
225
|
+
end
|
|
226
|
+
when '/'
|
|
227
|
+
read_regexp_or_word
|
|
228
|
+
when '{'
|
|
229
|
+
# Check if this is a brace expansion pattern like {a,b,c} or {1..5}
|
|
230
|
+
if looks_like_brace_expansion?
|
|
231
|
+
read_word
|
|
232
|
+
else
|
|
233
|
+
# Check if this is a Ruby block { |x| ... } or shell function body { cmd; }
|
|
234
|
+
# Ruby blocks have | after optional whitespace
|
|
235
|
+
lookahead = @pos + 1
|
|
236
|
+
lookahead += 1 while lookahead < @input.length && @input[lookahead] =~ /\s/
|
|
237
|
+
if @input[lookahead] == '|'
|
|
238
|
+
read_block
|
|
239
|
+
elsif @last_word_value == 'each' || @last_word_value == '.each' ||
|
|
240
|
+
@last_word_value == 'map' || @last_word_value == '.map' ||
|
|
241
|
+
@last_word_value == 'select' || @last_word_value == '.select' ||
|
|
242
|
+
@last_word_value == 'detect' || @last_word_value == '.detect'
|
|
243
|
+
# Block after 'each'/'map'/'select'/'detect' without explicit variable: each { body }
|
|
244
|
+
# Uses implicit 'it' variable (accessed as $it)
|
|
245
|
+
read_block
|
|
246
|
+
elsif %i[IF WHILE UNTIL ELIF ELSIF UNLESS CASE].include?(@last_token_type)
|
|
247
|
+
# Ruby expression block after if/while/until/elif/elsif/unless: { condition }
|
|
248
|
+
# Or after case: case { expression } in ...
|
|
249
|
+
read_ruby_condition
|
|
250
|
+
else
|
|
251
|
+
# Shell function body or standalone brace
|
|
252
|
+
@pos += 1
|
|
253
|
+
Token.new(:LBRACE, '{')
|
|
254
|
+
end
|
|
255
|
+
end
|
|
256
|
+
when '}'
|
|
257
|
+
@pos += 1
|
|
258
|
+
Token.new(:RBRACE, '}')
|
|
259
|
+
when '.'
|
|
260
|
+
# Check if this is a method chain: .identifier(
|
|
261
|
+
# Not: .hidden (hidden file), ./path (relative path)
|
|
262
|
+
if looks_like_method_chain_start?
|
|
263
|
+
@pos += 1
|
|
264
|
+
Token.new(:DOT, '.')
|
|
265
|
+
else
|
|
266
|
+
read_word
|
|
267
|
+
end
|
|
268
|
+
when 'd'
|
|
269
|
+
# Check for Ruby 'do' block (do |x| ... end or do ... end after 'each')
|
|
270
|
+
# Only treat as block if followed by space/| (not 'done' or other words)
|
|
271
|
+
if @input[@pos, 2] == 'do' && @input[@pos + 2] =~ /[\s|]/
|
|
272
|
+
# Look ahead to see if this has block args (|...|) - distinguishes from shell 'do'
|
|
273
|
+
lookahead = @pos + 2
|
|
274
|
+
lookahead += 1 while lookahead < @input.length && @input[lookahead] =~ /\s/
|
|
275
|
+
if @input[lookahead] == '|'
|
|
276
|
+
read_do_block
|
|
277
|
+
elsif @last_word_value == 'each' || @last_word_value == '.each' ||
|
|
278
|
+
@last_word_value == 'map' || @last_word_value == '.map' ||
|
|
279
|
+
@last_word_value == 'select' || @last_word_value == '.select' ||
|
|
280
|
+
@last_word_value == 'detect' || @last_word_value == '.detect'
|
|
281
|
+
# Block after 'each'/'map'/'select'/'detect' without explicit variable: each do body end
|
|
282
|
+
# Uses implicit 'it' variable (accessed as $it)
|
|
283
|
+
read_do_block
|
|
284
|
+
else
|
|
285
|
+
read_word
|
|
286
|
+
end
|
|
287
|
+
else
|
|
288
|
+
read_word
|
|
289
|
+
end
|
|
290
|
+
else
|
|
291
|
+
read_word
|
|
292
|
+
end
|
|
293
|
+
end
|
|
294
|
+
|
|
295
|
+
def looks_like_glob_bracket?
|
|
296
|
+
# Glob pattern: [abc] or [a-z] followed by more word characters
|
|
297
|
+
# Array: [1, 2, 3] or ["a", "b"] with commas
|
|
298
|
+
lookahead = @pos + 1
|
|
299
|
+
has_comma = false
|
|
300
|
+
while lookahead < @input.length
|
|
301
|
+
char = @input[lookahead]
|
|
302
|
+
if char == ']'
|
|
303
|
+
# Found closing bracket - check what follows
|
|
304
|
+
next_char = @input[lookahead + 1]
|
|
305
|
+
# If followed by word characters, it's a glob pattern
|
|
306
|
+
return true if next_char && next_char =~ /[a-zA-Z0-9_.\-]/
|
|
307
|
+
# If followed by space/operator/end, could be either
|
|
308
|
+
# Check if we saw commas inside - if so, it's an array
|
|
309
|
+
return !has_comma
|
|
310
|
+
elsif char == ','
|
|
311
|
+
has_comma = true
|
|
312
|
+
elsif char =~ /[\s]/
|
|
313
|
+
# Whitespace inside brackets suggests array (glob patterns are compact)
|
|
314
|
+
return false
|
|
315
|
+
end
|
|
316
|
+
lookahead += 1
|
|
317
|
+
end
|
|
318
|
+
false # Unclosed bracket, treat as array
|
|
319
|
+
end
|
|
320
|
+
|
|
321
|
+
def looks_like_brace_expansion?
|
|
322
|
+
# Brace expansion: {a,b,c} or {1..5} or prefix{a,b}suffix
|
|
323
|
+
# Must have matching braces with comma or ..
|
|
324
|
+
# Not: ${VAR} (variable) or { cmd; } (function body)
|
|
325
|
+
lookahead = @pos + 1
|
|
326
|
+
depth = 1
|
|
327
|
+
has_comma = false
|
|
328
|
+
has_dotdot = false
|
|
329
|
+
|
|
330
|
+
while lookahead < @input.length && depth > 0
|
|
331
|
+
char = @input[lookahead]
|
|
332
|
+
case char
|
|
333
|
+
when '{'
|
|
334
|
+
depth += 1
|
|
335
|
+
when '}'
|
|
336
|
+
depth -= 1
|
|
337
|
+
when ','
|
|
338
|
+
has_comma = true if depth == 1
|
|
339
|
+
when '.'
|
|
340
|
+
if @input[lookahead + 1] == '.'
|
|
341
|
+
has_dotdot = true if depth == 1
|
|
342
|
+
lookahead += 1 # Skip second dot
|
|
343
|
+
end
|
|
344
|
+
when ' ', "\t", "\n"
|
|
345
|
+
# Whitespace inside braces suggests function body, not brace expansion
|
|
346
|
+
return false if depth > 0
|
|
347
|
+
end
|
|
348
|
+
lookahead += 1
|
|
349
|
+
end
|
|
350
|
+
|
|
351
|
+
# Must have found closing brace and have either comma or ..
|
|
352
|
+
depth == 0 && (has_comma || has_dotdot)
|
|
353
|
+
end
|
|
354
|
+
|
|
355
|
+
def read_array
|
|
356
|
+
start = @pos
|
|
357
|
+
depth = 0
|
|
358
|
+
while @pos < @input.length
|
|
359
|
+
char = @input[@pos]
|
|
360
|
+
if char == '['
|
|
361
|
+
depth += 1
|
|
362
|
+
elsif char == ']'
|
|
363
|
+
depth -= 1
|
|
364
|
+
if depth == 0
|
|
365
|
+
@pos += 1
|
|
366
|
+
break
|
|
367
|
+
end
|
|
368
|
+
elsif char == '"'
|
|
369
|
+
read_double_quoted_string
|
|
370
|
+
next
|
|
371
|
+
elsif char == "'"
|
|
372
|
+
read_single_quoted_string
|
|
373
|
+
next
|
|
374
|
+
end
|
|
375
|
+
@pos += 1
|
|
376
|
+
end
|
|
377
|
+
Token.new(:ARRAY, @input[start...@pos])
|
|
378
|
+
end
|
|
379
|
+
|
|
380
|
+
def read_regexp_or_word
|
|
381
|
+
# Look ahead to see if this is a regexp or a path
|
|
382
|
+
# Regexp: /pattern/ followed by whitespace, operator, or end
|
|
383
|
+
# Path: /foo/bar (continues after the closing /) or /bin/ (trailing slash)
|
|
384
|
+
lookahead = @pos + 1
|
|
385
|
+
while lookahead < @input.length
|
|
386
|
+
char = @input[lookahead]
|
|
387
|
+
break if char =~ /[ \t]/
|
|
388
|
+
if char == '/' && lookahead > @pos + 1
|
|
389
|
+
# Check what comes after the potential closing /
|
|
390
|
+
after_slash = lookahead + 1
|
|
391
|
+
# Skip optional regexp flags
|
|
392
|
+
after_slash += 1 while after_slash < @input.length && @input[after_slash] =~ /[imxo]/
|
|
393
|
+
# If followed by whitespace, operator (except {), or end, it might be a regexp
|
|
394
|
+
# Exclude { because it could be brace expansion in a path like /tmp/{a,b}
|
|
395
|
+
next_char = @input[after_slash]
|
|
396
|
+
if next_char.nil? || next_char =~ /[ \t]/ || (OPERATORS.key?(next_char) && next_char != '{')
|
|
397
|
+
# Check if content looks like a path (no regex metacharacters) or a regexp
|
|
398
|
+
# Paths like /bin/ or /opt/homebrew/ contain alphanumeric, underscore, dash, dot, slash
|
|
399
|
+
# Regexps typically have metacharacters like * + ? ^ $ [ ] ( ) | \
|
|
400
|
+
content = @input[@pos + 1...lookahead]
|
|
401
|
+
if content =~ /\A[a-zA-Z0-9_.\-\/]+\z/
|
|
402
|
+
# Looks like a path component, not a regexp - treat as word
|
|
403
|
+
break
|
|
404
|
+
end
|
|
405
|
+
return read_regexp
|
|
406
|
+
end
|
|
407
|
+
# Otherwise continue - it's a path like /tmp/file
|
|
408
|
+
end
|
|
409
|
+
# Check for escape in regexp
|
|
410
|
+
if char == '\\'
|
|
411
|
+
lookahead += 2
|
|
412
|
+
next
|
|
413
|
+
end
|
|
414
|
+
lookahead += 1
|
|
415
|
+
end
|
|
416
|
+
# Not a regexp, treat as word
|
|
417
|
+
read_word
|
|
418
|
+
end
|
|
419
|
+
|
|
420
|
+
def read_regexp
|
|
421
|
+
start = @pos
|
|
422
|
+
@pos += 1 # skip opening /
|
|
423
|
+
while @pos < @input.length
|
|
424
|
+
char = @input[@pos]
|
|
425
|
+
if char == '\\'
|
|
426
|
+
@pos += 2 # skip escaped char
|
|
427
|
+
next
|
|
428
|
+
end
|
|
429
|
+
if char == '/'
|
|
430
|
+
@pos += 1
|
|
431
|
+
# Read optional flags (i, m, x, etc.)
|
|
432
|
+
@pos += 1 while @pos < @input.length && @input[@pos] =~ /[imxo]/
|
|
433
|
+
break
|
|
434
|
+
end
|
|
435
|
+
@pos += 1
|
|
436
|
+
end
|
|
437
|
+
Token.new(:REGEXP, @input[start...@pos])
|
|
438
|
+
end
|
|
439
|
+
|
|
440
|
+
def read_block
|
|
441
|
+
start = @pos
|
|
442
|
+
depth = 0
|
|
443
|
+
while @pos < @input.length
|
|
444
|
+
char = @input[@pos]
|
|
445
|
+
if char == '{'
|
|
446
|
+
depth += 1
|
|
447
|
+
elsif char == '}'
|
|
448
|
+
depth -= 1
|
|
449
|
+
if depth == 0
|
|
450
|
+
@pos += 1
|
|
451
|
+
break
|
|
452
|
+
end
|
|
453
|
+
elsif char == '"'
|
|
454
|
+
read_double_quoted_string
|
|
455
|
+
next
|
|
456
|
+
elsif char == "'"
|
|
457
|
+
read_single_quoted_string
|
|
458
|
+
next
|
|
459
|
+
end
|
|
460
|
+
@pos += 1
|
|
461
|
+
end
|
|
462
|
+
Token.new(:BLOCK, @input[start...@pos])
|
|
463
|
+
end
|
|
464
|
+
|
|
465
|
+
# Read Ruby condition block: { expression }
|
|
466
|
+
# Returns raw expression content without braces
|
|
467
|
+
def read_ruby_condition
|
|
468
|
+
@pos += 1 # skip opening {
|
|
469
|
+
start = @pos
|
|
470
|
+
depth = 1
|
|
471
|
+
|
|
472
|
+
while @pos < @input.length && depth > 0
|
|
473
|
+
char = @input[@pos]
|
|
474
|
+
if char == '{'
|
|
475
|
+
depth += 1
|
|
476
|
+
elsif char == '}'
|
|
477
|
+
depth -= 1
|
|
478
|
+
break if depth == 0
|
|
479
|
+
elsif char == '"'
|
|
480
|
+
read_double_quoted_string
|
|
481
|
+
next
|
|
482
|
+
elsif char == "'"
|
|
483
|
+
read_single_quoted_string
|
|
484
|
+
next
|
|
485
|
+
end
|
|
486
|
+
@pos += 1
|
|
487
|
+
end
|
|
488
|
+
|
|
489
|
+
content = @input[start...@pos].strip
|
|
490
|
+
@pos += 1 # skip closing }
|
|
491
|
+
Token.new(:RUBY_CONDITION, content)
|
|
492
|
+
end
|
|
493
|
+
|
|
494
|
+
def read_do_block
|
|
495
|
+
start = @pos
|
|
496
|
+
depth = 1
|
|
497
|
+
@pos += 2 # skip 'do'
|
|
498
|
+
while @pos < @input.length
|
|
499
|
+
# Check for 'do' (increase depth)
|
|
500
|
+
if @input[@pos, 2] == 'do' && (@pos == 0 || @input[@pos - 1] =~ /\s/) &&
|
|
501
|
+
(@input[@pos + 2].nil? || @input[@pos + 2] =~ /[\s|]/)
|
|
502
|
+
depth += 1
|
|
503
|
+
@pos += 2
|
|
504
|
+
next
|
|
505
|
+
end
|
|
506
|
+
# Check for 'end' (decrease depth)
|
|
507
|
+
if @input[@pos, 3] == 'end' && (@pos == 0 || @input[@pos - 1] =~ /\s/) &&
|
|
508
|
+
(@input[@pos + 3].nil? || @input[@pos + 3] =~ /[\s|;]/)
|
|
509
|
+
depth -= 1
|
|
510
|
+
if depth == 0
|
|
511
|
+
@pos += 3
|
|
512
|
+
break
|
|
513
|
+
end
|
|
514
|
+
end
|
|
515
|
+
if @input[@pos] == '"'
|
|
516
|
+
read_double_quoted_string
|
|
517
|
+
next
|
|
518
|
+
elsif @input[@pos] == "'"
|
|
519
|
+
read_single_quoted_string
|
|
520
|
+
next
|
|
521
|
+
end
|
|
522
|
+
@pos += 1
|
|
523
|
+
end
|
|
524
|
+
Token.new(:BLOCK, @input[start...@pos])
|
|
525
|
+
end
|
|
526
|
+
|
|
527
|
+
def read_word
|
|
528
|
+
start = @pos
|
|
529
|
+
while @pos < @input.length
|
|
530
|
+
char = @input[@pos]
|
|
531
|
+
|
|
532
|
+
# Handle { specially BEFORE the general operator check
|
|
533
|
+
# { could be brace expansion (part of word) or operator
|
|
534
|
+
if char == '{'
|
|
535
|
+
if @pos > start && @input[@pos - 1] == '$'
|
|
536
|
+
# ${VAR} - variable expansion, let read_braced_variable handle it below
|
|
537
|
+
elsif looks_like_brace_expansion?
|
|
538
|
+
# Brace expansion pattern like {a,b,c} - read the whole thing
|
|
539
|
+
read_brace_expansion
|
|
540
|
+
next
|
|
541
|
+
else
|
|
542
|
+
# Not brace expansion (e.g. shell function body), treat as operator
|
|
543
|
+
break
|
|
544
|
+
end
|
|
545
|
+
end
|
|
546
|
+
|
|
547
|
+
# General break conditions - exclude { since it's handled above
|
|
548
|
+
break if char =~ /[ \t\n]/ || (OPERATORS.key?(char) && char != '{')
|
|
549
|
+
break if @input[@pos, 2] == '>>' || @input[@pos, 2] == '2>' || @input[@pos, 2] == ';;'
|
|
550
|
+
# Stop at Ruby literal starters only at the start of a word
|
|
551
|
+
# In the middle of a word, [ is a glob pattern like file[12].txt
|
|
552
|
+
# At the start, [ might be a glob pattern like [abc]file
|
|
553
|
+
# Exception: ${VAR} is a shell variable, not a Ruby block
|
|
554
|
+
break if char == '[' && @pos == start && !looks_like_glob_bracket?
|
|
555
|
+
# Stop at . if it's a method chain (e.g., ls.grep(/foo/))
|
|
556
|
+
# But not for filenames like file.txt or paths like ./script
|
|
557
|
+
break if char == '.' && looks_like_method_chain_start?
|
|
558
|
+
|
|
559
|
+
if char == '\\'
|
|
560
|
+
# Backslash escape - skip the next character
|
|
561
|
+
@pos += 2
|
|
562
|
+
elsif char == '"'
|
|
563
|
+
read_double_quoted_string
|
|
564
|
+
elsif char == '$' && @input[@pos + 1] == "'"
|
|
565
|
+
# $'...' ANSI-C quoting - handle escape sequences including \'
|
|
566
|
+
read_ansi_c_quoted_string
|
|
567
|
+
elsif char == "'"
|
|
568
|
+
read_single_quoted_string
|
|
569
|
+
elsif char == '`'
|
|
570
|
+
# Backtick command substitution `...`
|
|
571
|
+
read_backtick_substitution
|
|
572
|
+
elsif char == '$' && @input[@pos + 1] == '('
|
|
573
|
+
# Command substitution $(...)
|
|
574
|
+
read_command_substitution
|
|
575
|
+
elsif char == '$' && @input[@pos + 1] == '{'
|
|
576
|
+
# Variable expansion ${VAR}
|
|
577
|
+
read_braced_variable
|
|
578
|
+
else
|
|
579
|
+
@pos += 1
|
|
580
|
+
end
|
|
581
|
+
end
|
|
582
|
+
value = @input[start...@pos]
|
|
583
|
+
return nil if value.empty?
|
|
584
|
+
|
|
585
|
+
# Check for array assignment: VAR=(...) or VAR+=(...)
|
|
586
|
+
if (value.end_with?('=') || value.end_with?('+=')) && @input[@pos] == '('
|
|
587
|
+
return read_array_assignment(value)
|
|
588
|
+
end
|
|
589
|
+
|
|
590
|
+
# Check for function call syntax: cmd(arg1, arg2) - but not:
|
|
591
|
+
# - cmd() which is function def
|
|
592
|
+
# - extglob patterns like word?(pat), word*(pat), word+(pat), @(pat), !(pat)
|
|
593
|
+
# - after def/function keywords (where the word is a function name being defined)
|
|
594
|
+
# - words that don't look like command names (e.g., regex metacharacters like ^ or $)
|
|
595
|
+
# - Ruby-like code (contains keyword args with :, nested method calls, etc.)
|
|
596
|
+
if @input[@pos] == '(' && @input[@pos + 1] != ')' &&
|
|
597
|
+
!extglob_prefix?(value) && ![:DEF, :FUNCTION].include?(@last_token_type) &&
|
|
598
|
+
valid_func_call_name?(value) && !looks_like_ruby_call?
|
|
599
|
+
return read_func_call(value)
|
|
600
|
+
end
|
|
601
|
+
|
|
602
|
+
# Check if word is a keyword
|
|
603
|
+
if KEYWORDS.key?(value)
|
|
604
|
+
Token.new(KEYWORDS[value], value)
|
|
605
|
+
else
|
|
606
|
+
Token.new(:WORD, value)
|
|
607
|
+
end
|
|
608
|
+
end
|
|
609
|
+
|
|
610
|
+
def read_array_assignment(var_part)
|
|
611
|
+
# Read array contents: (elem1 elem2 elem3)
|
|
612
|
+
@pos += 1 # skip opening (
|
|
613
|
+
elements = []
|
|
614
|
+
|
|
615
|
+
while @pos < @input.length
|
|
616
|
+
skip_whitespace
|
|
617
|
+
break if @input[@pos] == ')'
|
|
618
|
+
|
|
619
|
+
elem = read_array_element
|
|
620
|
+
elements << elem if elem && !elem.empty?
|
|
621
|
+
end
|
|
622
|
+
|
|
623
|
+
@pos += 1 if @input[@pos] == ')' # skip closing )
|
|
624
|
+
|
|
625
|
+
Token.new(:ARRAY_ASSIGN, {var: var_part, elements: elements})
|
|
626
|
+
end
|
|
627
|
+
|
|
628
|
+
def read_array_element
|
|
629
|
+
start = @pos
|
|
630
|
+
|
|
631
|
+
while @pos < @input.length
|
|
632
|
+
char = @input[@pos]
|
|
633
|
+
|
|
634
|
+
# Stop at whitespace or closing paren
|
|
635
|
+
break if char =~ /[ \t\n]/ || char == ')'
|
|
636
|
+
|
|
637
|
+
if char == '"'
|
|
638
|
+
read_double_quoted_string
|
|
639
|
+
elsif char == '$' && @input[@pos + 1] == "'"
|
|
640
|
+
read_ansi_c_quoted_string
|
|
641
|
+
elsif char == "'"
|
|
642
|
+
read_single_quoted_string
|
|
643
|
+
elsif char == '$' && @input[@pos + 1] == '('
|
|
644
|
+
read_command_substitution
|
|
645
|
+
elsif char == '$' && @input[@pos + 1] == '{'
|
|
646
|
+
read_braced_variable
|
|
647
|
+
else
|
|
648
|
+
@pos += 1
|
|
649
|
+
end
|
|
650
|
+
end
|
|
651
|
+
|
|
652
|
+
@input[start...@pos]
|
|
653
|
+
end
|
|
654
|
+
|
|
655
|
+
def read_func_call(name)
|
|
656
|
+
# Read function call syntax: cmd(arg1, arg2, ...)
|
|
657
|
+
@pos += 1 # skip opening (
|
|
658
|
+
args = []
|
|
659
|
+
|
|
660
|
+
while @pos < @input.length
|
|
661
|
+
# Skip whitespace
|
|
662
|
+
@pos += 1 while @pos < @input.length && @input[@pos] =~ /[ \t]/
|
|
663
|
+
|
|
664
|
+
break if @input[@pos] == ')'
|
|
665
|
+
|
|
666
|
+
arg = read_func_call_arg
|
|
667
|
+
args << arg if arg && !arg.empty?
|
|
668
|
+
|
|
669
|
+
# Skip whitespace after arg
|
|
670
|
+
@pos += 1 while @pos < @input.length && @input[@pos] =~ /[ \t]/
|
|
671
|
+
|
|
672
|
+
# Check for comma or closing paren
|
|
673
|
+
if @input[@pos] == ','
|
|
674
|
+
@pos += 1 # skip comma
|
|
675
|
+
elsif @input[@pos] == ')'
|
|
676
|
+
break
|
|
677
|
+
else
|
|
678
|
+
# Unexpected character, stop parsing
|
|
679
|
+
break
|
|
680
|
+
end
|
|
681
|
+
end
|
|
682
|
+
|
|
683
|
+
@pos += 1 if @input[@pos] == ')' # skip closing )
|
|
684
|
+
|
|
685
|
+
Token.new(:FUNC_CALL, {name: name, args: args})
|
|
686
|
+
end
|
|
687
|
+
|
|
688
|
+
def read_func_call_arg
|
|
689
|
+
start = @pos
|
|
690
|
+
|
|
691
|
+
# Check for special cases first
|
|
692
|
+
char = @input[@pos]
|
|
693
|
+
|
|
694
|
+
# Quoted strings
|
|
695
|
+
if char == '"'
|
|
696
|
+
read_double_quoted_string
|
|
697
|
+
return @input[start...@pos]
|
|
698
|
+
elsif char == "'"
|
|
699
|
+
read_single_quoted_string
|
|
700
|
+
return @input[start...@pos]
|
|
701
|
+
elsif char == '$' && @input[@pos + 1] == "'"
|
|
702
|
+
read_ansi_c_quoted_string
|
|
703
|
+
return @input[start...@pos]
|
|
704
|
+
end
|
|
705
|
+
|
|
706
|
+
# Check for regexp or path starting with /
|
|
707
|
+
if char == '/'
|
|
708
|
+
return read_func_call_slash_arg
|
|
709
|
+
end
|
|
710
|
+
|
|
711
|
+
# Check for array literal
|
|
712
|
+
if char == '['
|
|
713
|
+
read_array
|
|
714
|
+
return @input[start...@pos]
|
|
715
|
+
end
|
|
716
|
+
|
|
717
|
+
# Regular word argument
|
|
718
|
+
while @pos < @input.length
|
|
719
|
+
char = @input[@pos]
|
|
720
|
+
|
|
721
|
+
# Stop at comma, closing paren, or whitespace
|
|
722
|
+
break if char =~ /[ \t]/ || char == ',' || char == ')'
|
|
723
|
+
|
|
724
|
+
if char == '\\'
|
|
725
|
+
@pos += 2
|
|
726
|
+
elsif char == '"'
|
|
727
|
+
read_double_quoted_string
|
|
728
|
+
elsif char == "'"
|
|
729
|
+
read_single_quoted_string
|
|
730
|
+
elsif char == '$' && @input[@pos + 1] == '('
|
|
731
|
+
read_command_substitution
|
|
732
|
+
elsif char == '$' && @input[@pos + 1] == '{'
|
|
733
|
+
read_braced_variable
|
|
734
|
+
else
|
|
735
|
+
@pos += 1
|
|
736
|
+
end
|
|
737
|
+
end
|
|
738
|
+
|
|
739
|
+
@input[start...@pos]
|
|
740
|
+
end
|
|
741
|
+
|
|
742
|
+
def read_func_call_slash_arg
|
|
743
|
+
# Determine if /.../ is a path or regexp inside function call
|
|
744
|
+
# Path: contains only alphanumeric, _, ., -, /
|
|
745
|
+
# Regexp: contains metacharacters like *, +, ?, ^, $, [, ], (, ), |, \
|
|
746
|
+
start = @pos
|
|
747
|
+
@pos += 1 # skip opening /
|
|
748
|
+
|
|
749
|
+
has_metachar = false
|
|
750
|
+
closed = false
|
|
751
|
+
|
|
752
|
+
while @pos < @input.length
|
|
753
|
+
char = @input[@pos]
|
|
754
|
+
|
|
755
|
+
# Stop at comma, closing paren (without closing /), or whitespace
|
|
756
|
+
if char =~ /[ \t]/ || char == ',' || char == ')'
|
|
757
|
+
break
|
|
758
|
+
end
|
|
759
|
+
|
|
760
|
+
if char == '/'
|
|
761
|
+
# Check if this looks like end of regexp or middle of path
|
|
762
|
+
# If we've seen metacharacters, it's likely a regexp
|
|
763
|
+
# If content is path-like, continue as path
|
|
764
|
+
content = @input[start + 1...@pos]
|
|
765
|
+
if has_metachar || content !~ /\A[a-zA-Z0-9_.\-\/]*\z/
|
|
766
|
+
# Regexp - consume closing / and optional flags
|
|
767
|
+
@pos += 1
|
|
768
|
+
@pos += 1 while @pos < @input.length && @input[@pos] =~ /[imxo]/
|
|
769
|
+
closed = true
|
|
770
|
+
break
|
|
771
|
+
else
|
|
772
|
+
# Path - continue reading
|
|
773
|
+
@pos += 1
|
|
774
|
+
end
|
|
775
|
+
elsif char == '\\' && has_metachar
|
|
776
|
+
# Escape in regexp
|
|
777
|
+
@pos += 2
|
|
778
|
+
elsif char =~ /[*+?^$\[\]()|\\.]/
|
|
779
|
+
has_metachar = true
|
|
780
|
+
@pos += 1
|
|
781
|
+
else
|
|
782
|
+
@pos += 1
|
|
783
|
+
end
|
|
784
|
+
end
|
|
785
|
+
|
|
786
|
+
@input[start...@pos]
|
|
787
|
+
end
|
|
788
|
+
|
|
789
|
+
def extglob_prefix?(word)
|
|
790
|
+
# Check if word ends with extglob prefix: ?, *, +, @, !
|
|
791
|
+
# These form patterns like foo?(bar), *(pat), @(a|b), !(neg)
|
|
792
|
+
return true if word.empty? # standalone @( or !( etc.
|
|
793
|
+
return true if word =~ /[?*+@!]\z/
|
|
794
|
+
# Also check for patterns that are entirely glob characters
|
|
795
|
+
return true if word =~ /\A[*?@!]+\z/
|
|
796
|
+
false
|
|
797
|
+
end
|
|
798
|
+
|
|
799
|
+
def looks_like_method_chain_start?
|
|
800
|
+
# Check if current position (at '.') starts a method chain:
|
|
801
|
+
# - .identifier(args) - method call with args
|
|
802
|
+
# - .identifier { block } - method call with block (like .each {|x| ...})
|
|
803
|
+
return false unless @input[@pos] == '.'
|
|
804
|
+
|
|
805
|
+
lookahead = @pos + 1
|
|
806
|
+
# Must start with letter or underscore (not / for paths or digit for decimals)
|
|
807
|
+
return false unless lookahead < @input.length && @input[lookahead] =~ /[a-zA-Z_]/
|
|
808
|
+
|
|
809
|
+
# Read the identifier
|
|
810
|
+
id_start = lookahead
|
|
811
|
+
lookahead += 1
|
|
812
|
+
lookahead += 1 while lookahead < @input.length && @input[lookahead] =~ /[a-zA-Z0-9_]/
|
|
813
|
+
identifier = @input[id_start...lookahead]
|
|
814
|
+
|
|
815
|
+
# Skip optional whitespace
|
|
816
|
+
block_lookahead = lookahead
|
|
817
|
+
block_lookahead += 1 while block_lookahead < @input.length && @input[block_lookahead] =~ /[ \t]/
|
|
818
|
+
|
|
819
|
+
# Check for block: { followed by |
|
|
820
|
+
if block_lookahead < @input.length && @input[block_lookahead] == '{'
|
|
821
|
+
# Check if this is a Ruby block {|...| or { |...|
|
|
822
|
+
inner = block_lookahead + 1
|
|
823
|
+
inner += 1 while inner < @input.length && @input[inner] =~ /\s/
|
|
824
|
+
return true if inner < @input.length && @input[inner] == '|'
|
|
825
|
+
# For each/map/select/detect, also allow implicit 'it' blocks without |
|
|
826
|
+
return true if %w[each map select detect].include?(identifier)
|
|
827
|
+
end
|
|
828
|
+
|
|
829
|
+
# Must be followed by ( for method call
|
|
830
|
+
return false unless lookahead < @input.length && @input[lookahead] == '('
|
|
831
|
+
|
|
832
|
+
# Additional check: not Ruby keyword args inside (to avoid false positives)
|
|
833
|
+
!looks_like_ruby_method_chain?(lookahead)
|
|
834
|
+
end
|
|
835
|
+
|
|
836
|
+
def looks_like_ruby_method_chain?(paren_pos)
|
|
837
|
+
# Check if the content inside parens looks like Ruby code
|
|
838
|
+
# Similar to looks_like_ruby_call? but starting from a specific position
|
|
839
|
+
lookahead = paren_pos + 1
|
|
840
|
+
depth = 1
|
|
841
|
+
in_string = false
|
|
842
|
+
string_char = nil
|
|
843
|
+
|
|
844
|
+
while lookahead < @input.length && depth > 0
|
|
845
|
+
char = @input[lookahead]
|
|
846
|
+
|
|
847
|
+
if !in_string && (char == '"' || char == "'")
|
|
848
|
+
in_string = true
|
|
849
|
+
string_char = char
|
|
850
|
+
elsif in_string && char == string_char && @input[lookahead - 1] != '\\'
|
|
851
|
+
in_string = false
|
|
852
|
+
elsif !in_string
|
|
853
|
+
case char
|
|
854
|
+
when '('
|
|
855
|
+
depth += 1
|
|
856
|
+
when ')'
|
|
857
|
+
depth -= 1
|
|
858
|
+
when ':'
|
|
859
|
+
prev_char = lookahead > 0 ? @input[lookahead - 1] : nil
|
|
860
|
+
next_char = @input[lookahead + 1]
|
|
861
|
+
if prev_char =~ /[a-zA-Z0-9_]/ && (next_char.nil? || next_char =~ /[\s\w]/)
|
|
862
|
+
return true
|
|
863
|
+
end
|
|
864
|
+
end
|
|
865
|
+
end
|
|
866
|
+
|
|
867
|
+
lookahead += 1
|
|
868
|
+
end
|
|
869
|
+
|
|
870
|
+
false
|
|
871
|
+
end
|
|
872
|
+
|
|
873
|
+
def valid_func_call_name?(name)
|
|
874
|
+
# Valid function/command names must start with letter, underscore, or be a path
|
|
875
|
+
# Not valid: regex metacharacters like ^, $, or single special chars
|
|
876
|
+
return false if name.empty?
|
|
877
|
+
# Must start with letter, underscore, digit, dot, or / (for paths like /bin/ls)
|
|
878
|
+
return false unless name =~ /\A[a-zA-Z_0-9.\/]/
|
|
879
|
+
# Must not be just special characters
|
|
880
|
+
return false if name =~ /\A[\^$]+\z/
|
|
881
|
+
true
|
|
882
|
+
end
|
|
883
|
+
|
|
884
|
+
def looks_like_ruby_call?
|
|
885
|
+
# Check if the content inside parens looks like Ruby code rather than shell args
|
|
886
|
+
# Look ahead from current position (which is at '(')
|
|
887
|
+
lookahead = @pos + 1
|
|
888
|
+
depth = 1
|
|
889
|
+
in_string = false
|
|
890
|
+
string_char = nil
|
|
891
|
+
|
|
892
|
+
while lookahead < @input.length && depth > 0
|
|
893
|
+
char = @input[lookahead]
|
|
894
|
+
|
|
895
|
+
# Track string state
|
|
896
|
+
if !in_string && (char == '"' || char == "'")
|
|
897
|
+
in_string = true
|
|
898
|
+
string_char = char
|
|
899
|
+
elsif in_string && char == string_char && @input[lookahead - 1] != '\\'
|
|
900
|
+
in_string = false
|
|
901
|
+
elsif !in_string
|
|
902
|
+
case char
|
|
903
|
+
when '('
|
|
904
|
+
depth += 1
|
|
905
|
+
when ')'
|
|
906
|
+
depth -= 1
|
|
907
|
+
when ':'
|
|
908
|
+
# Check for Ruby keyword arg syntax: identifier followed by : and space/value
|
|
909
|
+
# e.g., "foo: bar" or "foo:bar" but not ":/path" or "$:"
|
|
910
|
+
prev_char = lookahead > 0 ? @input[lookahead - 1] : nil
|
|
911
|
+
next_char = @input[lookahead + 1]
|
|
912
|
+
# If : follows a word character and precedes space or word, it's likely Ruby
|
|
913
|
+
if prev_char =~ /[a-zA-Z0-9_]/ && (next_char.nil? || next_char =~ /[\s\w]/)
|
|
914
|
+
return true
|
|
915
|
+
end
|
|
916
|
+
end
|
|
917
|
+
end
|
|
918
|
+
|
|
919
|
+
lookahead += 1
|
|
920
|
+
end
|
|
921
|
+
|
|
922
|
+
false
|
|
923
|
+
end
|
|
924
|
+
|
|
925
|
+
def read_double_quoted_string
|
|
926
|
+
@pos += 1 # skip opening "
|
|
927
|
+
while @pos < @input.length && @input[@pos] != '"'
|
|
928
|
+
if @input[@pos] == '\\'
|
|
929
|
+
@pos += 2 # skip escaped char
|
|
930
|
+
next
|
|
931
|
+
end
|
|
932
|
+
@pos += 1
|
|
933
|
+
end
|
|
934
|
+
@pos += 1 # skip closing "
|
|
935
|
+
end
|
|
936
|
+
|
|
937
|
+
def read_single_quoted_string
|
|
938
|
+
@pos += 1 # skip opening '
|
|
939
|
+
@pos += 1 while @pos < @input.length && @input[@pos] != "'"
|
|
940
|
+
@pos += 1 # skip closing '
|
|
941
|
+
end
|
|
942
|
+
|
|
943
|
+
def read_ansi_c_quoted_string
|
|
944
|
+
# $'...' - ANSI-C quoting with escape sequences
|
|
945
|
+
@pos += 2 # skip $'
|
|
946
|
+
while @pos < @input.length
|
|
947
|
+
char = @input[@pos]
|
|
948
|
+
if char == '\\'
|
|
949
|
+
# Skip escaped character (including \')
|
|
950
|
+
@pos += 2
|
|
951
|
+
elsif char == "'"
|
|
952
|
+
@pos += 1 # skip closing '
|
|
953
|
+
break
|
|
954
|
+
else
|
|
955
|
+
@pos += 1
|
|
956
|
+
end
|
|
957
|
+
end
|
|
958
|
+
end
|
|
959
|
+
|
|
960
|
+
def read_command_substitution
|
|
961
|
+
# $(...)
|
|
962
|
+
@pos += 2 # skip $(
|
|
963
|
+
depth = 1
|
|
964
|
+
while @pos < @input.length && depth > 0
|
|
965
|
+
char = @input[@pos]
|
|
966
|
+
if char == '('
|
|
967
|
+
depth += 1
|
|
968
|
+
elsif char == ')'
|
|
969
|
+
depth -= 1
|
|
970
|
+
elsif char == '"'
|
|
971
|
+
read_double_quoted_string
|
|
972
|
+
next
|
|
973
|
+
elsif char == "'"
|
|
974
|
+
read_single_quoted_string
|
|
975
|
+
next
|
|
976
|
+
end
|
|
977
|
+
@pos += 1
|
|
978
|
+
end
|
|
979
|
+
end
|
|
980
|
+
|
|
981
|
+
def read_backtick_substitution
|
|
982
|
+
# `...`
|
|
983
|
+
@pos += 1 # skip opening `
|
|
984
|
+
while @pos < @input.length
|
|
985
|
+
char = @input[@pos]
|
|
986
|
+
if char == '\\'
|
|
987
|
+
# Skip escaped character (including escaped backtick)
|
|
988
|
+
@pos += 2
|
|
989
|
+
next
|
|
990
|
+
elsif char == '`'
|
|
991
|
+
@pos += 1 # skip closing `
|
|
992
|
+
break
|
|
993
|
+
end
|
|
994
|
+
@pos += 1
|
|
995
|
+
end
|
|
996
|
+
end
|
|
997
|
+
|
|
998
|
+
def read_braced_variable
|
|
999
|
+
# ${VAR}
|
|
1000
|
+
@pos += 2 # skip ${
|
|
1001
|
+
@pos += 1 while @pos < @input.length && @input[@pos] != '}'
|
|
1002
|
+
@pos += 1 if @pos < @input.length # skip closing }
|
|
1003
|
+
end
|
|
1004
|
+
|
|
1005
|
+
def read_brace_expansion
|
|
1006
|
+
# Read a brace expansion pattern like {a,b,c} or {1..5}
|
|
1007
|
+
# Handles nested braces
|
|
1008
|
+
depth = 0
|
|
1009
|
+
while @pos < @input.length
|
|
1010
|
+
char = @input[@pos]
|
|
1011
|
+
if char == '{'
|
|
1012
|
+
depth += 1
|
|
1013
|
+
elsif char == '}'
|
|
1014
|
+
depth -= 1
|
|
1015
|
+
@pos += 1
|
|
1016
|
+
break if depth == 0
|
|
1017
|
+
next
|
|
1018
|
+
end
|
|
1019
|
+
@pos += 1
|
|
1020
|
+
end
|
|
1021
|
+
end
|
|
1022
|
+
|
|
1023
|
+
def read_process_substitution(type)
|
|
1024
|
+
# Read <(...) or >(...) - the command inside parens
|
|
1025
|
+
@pos += 2 # skip <( or >(
|
|
1026
|
+
start = @pos
|
|
1027
|
+
depth = 1
|
|
1028
|
+
while @pos < @input.length && depth > 0
|
|
1029
|
+
char = @input[@pos]
|
|
1030
|
+
if char == '('
|
|
1031
|
+
depth += 1
|
|
1032
|
+
elsif char == ')'
|
|
1033
|
+
depth -= 1
|
|
1034
|
+
break if depth == 0
|
|
1035
|
+
elsif char == '"'
|
|
1036
|
+
read_double_quoted_string
|
|
1037
|
+
next
|
|
1038
|
+
elsif char == "'"
|
|
1039
|
+
read_single_quoted_string
|
|
1040
|
+
next
|
|
1041
|
+
end
|
|
1042
|
+
@pos += 1
|
|
1043
|
+
end
|
|
1044
|
+
command = @input[start...@pos]
|
|
1045
|
+
@pos += 1 if @pos < @input.length # skip closing )
|
|
1046
|
+
Token.new(type, command)
|
|
1047
|
+
end
|
|
1048
|
+
|
|
1049
|
+
def read_heredoc_delimiter(type)
|
|
1050
|
+
skip_whitespace
|
|
1051
|
+
|
|
1052
|
+
# Check for quoted delimiter (no variable expansion)
|
|
1053
|
+
quoted = false
|
|
1054
|
+
if @input[@pos] == "'" || @input[@pos] == '"'
|
|
1055
|
+
quote = @input[@pos]
|
|
1056
|
+
@pos += 1
|
|
1057
|
+
start = @pos
|
|
1058
|
+
@pos += 1 while @pos < @input.length && @input[@pos] != quote
|
|
1059
|
+
delimiter = @input[start...@pos]
|
|
1060
|
+
@pos += 1 if @pos < @input.length # skip closing quote
|
|
1061
|
+
quoted = true
|
|
1062
|
+
else
|
|
1063
|
+
# Unquoted delimiter
|
|
1064
|
+
start = @pos
|
|
1065
|
+
@pos += 1 while @pos < @input.length && @input[@pos] =~ /[a-zA-Z0-9_]/
|
|
1066
|
+
delimiter = @input[start...@pos]
|
|
1067
|
+
end
|
|
1068
|
+
|
|
1069
|
+
# Return token with delimiter info: "delimiter:quoted" format
|
|
1070
|
+
# quoted=true means no variable expansion
|
|
1071
|
+
value = quoted ? "#{delimiter}:quoted" : delimiter
|
|
1072
|
+
Token.new(type, value)
|
|
1073
|
+
end
|
|
1074
|
+
|
|
1075
|
+
def read_herestring
|
|
1076
|
+
skip_whitespace
|
|
1077
|
+
|
|
1078
|
+
# Read the string (can be quoted or unquoted)
|
|
1079
|
+
if @input[@pos] == '"'
|
|
1080
|
+
start = @pos
|
|
1081
|
+
read_double_quoted_string
|
|
1082
|
+
value = @input[start...@pos]
|
|
1083
|
+
elsif @input[@pos] == "'"
|
|
1084
|
+
start = @pos
|
|
1085
|
+
read_single_quoted_string
|
|
1086
|
+
value = @input[start...@pos]
|
|
1087
|
+
else
|
|
1088
|
+
# Unquoted - read until whitespace or operator
|
|
1089
|
+
start = @pos
|
|
1090
|
+
while @pos < @input.length
|
|
1091
|
+
char = @input[@pos]
|
|
1092
|
+
break if char =~ /[ \t]/ || OPERATORS.key?(char)
|
|
1093
|
+
@pos += 1
|
|
1094
|
+
end
|
|
1095
|
+
value = @input[start...@pos]
|
|
1096
|
+
end
|
|
1097
|
+
|
|
1098
|
+
Token.new(:HERESTRING, value)
|
|
1099
|
+
end
|
|
1100
|
+
|
|
1101
|
+
def read_arithmetic_command
|
|
1102
|
+
# Read the arithmetic expression until ))
|
|
1103
|
+
# Need to handle nested parentheses
|
|
1104
|
+
expression = +''
|
|
1105
|
+
depth = 1 # We've already consumed the opening ((
|
|
1106
|
+
|
|
1107
|
+
while @pos < @input.length && depth > 0
|
|
1108
|
+
char = @input[@pos]
|
|
1109
|
+
two_char = @input[@pos, 2]
|
|
1110
|
+
|
|
1111
|
+
if two_char == '))'
|
|
1112
|
+
depth -= 1
|
|
1113
|
+
if depth == 0
|
|
1114
|
+
@pos += 2
|
|
1115
|
+
break
|
|
1116
|
+
else
|
|
1117
|
+
expression << '))'
|
|
1118
|
+
@pos += 2
|
|
1119
|
+
end
|
|
1120
|
+
elsif two_char == '(('
|
|
1121
|
+
depth += 1
|
|
1122
|
+
expression << '(('
|
|
1123
|
+
@pos += 2
|
|
1124
|
+
elsif char == '('
|
|
1125
|
+
expression << char
|
|
1126
|
+
@pos += 1
|
|
1127
|
+
elsif char == ')'
|
|
1128
|
+
expression << char
|
|
1129
|
+
@pos += 1
|
|
1130
|
+
else
|
|
1131
|
+
expression << char
|
|
1132
|
+
@pos += 1
|
|
1133
|
+
end
|
|
1134
|
+
end
|
|
1135
|
+
|
|
1136
|
+
raise 'Expected ")))" to close arithmetic command' if depth > 0
|
|
1137
|
+
|
|
1138
|
+
Token.new(:ARITH_CMD, expression.strip)
|
|
1139
|
+
end
|
|
1140
|
+
|
|
1141
|
+
# Check if current position is a {varname} redirection pattern
|
|
1142
|
+
# Pattern: {identifier} followed by >, >>, <, >&, <&
|
|
1143
|
+
def looks_like_varname_redirect?
|
|
1144
|
+
return false unless @input[@pos] == '{'
|
|
1145
|
+
|
|
1146
|
+
# Look for closing } followed by redirection operator
|
|
1147
|
+
lookahead = @pos + 1
|
|
1148
|
+
# Identifier must start with letter or underscore
|
|
1149
|
+
return false unless lookahead < @input.length && @input[lookahead] =~ /[a-zA-Z_]/
|
|
1150
|
+
|
|
1151
|
+
# Find the closing brace
|
|
1152
|
+
lookahead += 1
|
|
1153
|
+
lookahead += 1 while lookahead < @input.length && @input[lookahead] =~ /[a-zA-Z0-9_]/
|
|
1154
|
+
|
|
1155
|
+
# Must be followed by }
|
|
1156
|
+
return false unless lookahead < @input.length && @input[lookahead] == '}'
|
|
1157
|
+
|
|
1158
|
+
# Must be followed by a redirection operator
|
|
1159
|
+
after_brace = lookahead + 1
|
|
1160
|
+
return false unless after_brace < @input.length
|
|
1161
|
+
|
|
1162
|
+
next_two = @input[after_brace, 2]
|
|
1163
|
+
next_one = @input[after_brace]
|
|
1164
|
+
|
|
1165
|
+
# Check for valid redirection operators
|
|
1166
|
+
%w[>> >| >& <& < >].any? { |op| @input[after_brace, op.length] == op }
|
|
1167
|
+
end
|
|
1168
|
+
|
|
1169
|
+
# Read a {varname} redirection: {fd}>file or {fd}<file
|
|
1170
|
+
def read_varname_redirect
|
|
1171
|
+
@pos += 1 # skip opening {
|
|
1172
|
+
|
|
1173
|
+
# Read variable name
|
|
1174
|
+
start = @pos
|
|
1175
|
+
@pos += 1 while @pos < @input.length && @input[@pos] =~ /[a-zA-Z0-9_]/
|
|
1176
|
+
varname = @input[start...@pos]
|
|
1177
|
+
|
|
1178
|
+
@pos += 1 # skip closing }
|
|
1179
|
+
|
|
1180
|
+
# Read the redirection operator
|
|
1181
|
+
two_char = @input[@pos, 2]
|
|
1182
|
+
if %w[>> >| >& <&].include?(two_char)
|
|
1183
|
+
op = two_char
|
|
1184
|
+
@pos += 2
|
|
1185
|
+
else
|
|
1186
|
+
op = @input[@pos] # Single char: > or <
|
|
1187
|
+
@pos += 1
|
|
1188
|
+
end
|
|
1189
|
+
|
|
1190
|
+
# Return token with varname and operator info
|
|
1191
|
+
Token.new(:VARNAME_REDIRECT, {varname: varname, operator: op})
|
|
1192
|
+
end
|
|
1193
|
+
end
|
|
1194
|
+
end
|