yap-shell-parser 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +14 -0
- data/.rspec +2 -0
- data/.travis.yml +10 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +33 -0
- data/Rakefile +3 -0
- data/bin/compile_debug_parser +3 -0
- data/bin/compile_parser +3 -0
- data/lib/tasks/gem.rake +60 -0
- data/lib/yap/shell.rb +2 -0
- data/lib/yap/shell/parser.rb +381 -0
- data/lib/yap/shell/parser/grammar.y +151 -0
- data/lib/yap/shell/parser/lexer.rb +311 -0
- data/lib/yap/shell/parser/nodes.rb +205 -0
- data/lib/yap/shell/parser/version.rb +5 -0
- data/spec/spec_helper.rb +91 -0
- data/spec/yap/shell/lexer_spec.rb +697 -0
- data/yap-shell-parser.gemspec +25 -0
- metadata +109 -0
@@ -0,0 +1,151 @@
|
|
1
|
+
# $Id$
|
2
|
+
#
|
3
|
+
# convert Array-like string into Ruby's Array.
|
4
|
+
|
5
|
+
class Yap::Shell::Parser
|
6
|
+
token Command LiteralCommand Argument Heredoc InternalEval Separator Conditional Pipe Redirection LValue RValue
|
7
|
+
#
|
8
|
+
# prechigh
|
9
|
+
# # left '**' '*' '/' '%'
|
10
|
+
# # left '+' '-'
|
11
|
+
# # left '&&' '||'
|
12
|
+
# # left '|' '^' '&'
|
13
|
+
# # # right Not
|
14
|
+
# left Separator
|
15
|
+
# left Conditional
|
16
|
+
# right Pipe
|
17
|
+
# preclow
|
18
|
+
|
19
|
+
rule
|
20
|
+
|
21
|
+
program : stmts
|
22
|
+
|
23
|
+
stmts : stmts Separator stmt
|
24
|
+
{ result = StatementsNode.new(val[0], val[2]) }
|
25
|
+
| stmt
|
26
|
+
{ result = StatementsNode.new(val[0]) }
|
27
|
+
|
28
|
+
stmt : stmt Conditional pipeline
|
29
|
+
{ result = ConditionalNode.new(val[1].value, val[0], val[2]) }
|
30
|
+
| pipeline
|
31
|
+
|
32
|
+
pipeline : pipeline Pipe stmts2
|
33
|
+
{ result = PipelineNode.new(val[0], val[2]) }
|
34
|
+
| stmts2
|
35
|
+
|
36
|
+
stmts2 : '(' stmts ')'
|
37
|
+
{ result = val[1] }
|
38
|
+
| command_w_heredoc
|
39
|
+
| internal_eval
|
40
|
+
|
41
|
+
command_w_heredoc : command_w_redirects Heredoc
|
42
|
+
{ val[0].heredoc = val[1] ; result = val[0] }
|
43
|
+
| command_w_redirects
|
44
|
+
|
45
|
+
command_w_redirects : command_w_redirects Redirection
|
46
|
+
{ val[0].redirects << RedirectionNode.new(val[1].value, val[1].attrs[:target]) ; result = val[0] }
|
47
|
+
| command_w_vars
|
48
|
+
| command
|
49
|
+
| vars
|
50
|
+
|
51
|
+
command_w_vars : vars command
|
52
|
+
{ result = EnvWrapperNode.new(val[0], val[1]) }
|
53
|
+
|
54
|
+
vars : vars LValue RValue
|
55
|
+
{ val[0].add_var(val[1].value, val[2].value) ; result = val[0] }
|
56
|
+
| LValue RValue
|
57
|
+
{ result = EnvNode.new(val[0].value, val[1].value) }
|
58
|
+
|
59
|
+
command : command2
|
60
|
+
|
61
|
+
command2: Command
|
62
|
+
{ result = CommandNode.new(val[0].value) }
|
63
|
+
| Command args
|
64
|
+
{ result = CommandNode.new(val[0].value, val[1].flatten) }
|
65
|
+
| LiteralCommand
|
66
|
+
{ result = CommandNode.new(val[0].value, literal:true) }
|
67
|
+
| LiteralCommand args
|
68
|
+
{ result = CommandNode.new(val[0].value, val[1].flatten, literal:true) }
|
69
|
+
|
70
|
+
args : Argument
|
71
|
+
{ result = [val[0].value] }
|
72
|
+
| args Argument
|
73
|
+
{ result = [val[0], val[1].value] }
|
74
|
+
|
75
|
+
internal_eval : InternalEval
|
76
|
+
{ result = InternalEvalNode.new(val[0].value) }
|
77
|
+
|
78
|
+
|
79
|
+
---- inner
|
80
|
+
$LOAD_PATH.unshift File.dirname(__FILE__) + "/../../"
|
81
|
+
require 'yap/shell/parser/lexer'
|
82
|
+
require 'yap/shell/parser/nodes'
|
83
|
+
|
84
|
+
include Yap::Shell::Parser::Nodes
|
85
|
+
|
86
|
+
def parse(str)
|
87
|
+
# @yydebug = true
|
88
|
+
|
89
|
+
@q = Yap::Shell::Parser::Lexer.new.tokenize(str)
|
90
|
+
# @q.push [false, '$'] # is optional from Racc 1.3.7
|
91
|
+
# puts @q.inspect
|
92
|
+
# puts "---- parse tree follows ----"
|
93
|
+
__send__(Racc_Main_Parsing_Routine, _racc_setup(), false)
|
94
|
+
#do_parse
|
95
|
+
end
|
96
|
+
|
97
|
+
def next_token
|
98
|
+
@q.shift
|
99
|
+
end
|
100
|
+
|
101
|
+
---- footer
|
102
|
+
|
103
|
+
if $0 == __FILE__
|
104
|
+
$LOAD_PATH.unshift File.dirname(__FILE__) + "/../../"
|
105
|
+
require 'yap/shell/parser/lexer'
|
106
|
+
require 'yap/shell/parser/nodes'
|
107
|
+
[
|
108
|
+
# "echo foo",
|
109
|
+
# "echo foo ; echo bar baz yep",
|
110
|
+
# "echo foo && echo bar baz yep",
|
111
|
+
# "echo foo && echo bar && ls foo && ls bar",
|
112
|
+
# "echo foo ; echo bar baz yep ; ls foo",
|
113
|
+
# "echo foo && echo bar ; ls baz",
|
114
|
+
# "echo foo && echo bar ; ls baz ; echo zach || echo gretchen",
|
115
|
+
# "echo foo | bar",
|
116
|
+
# "echo foo | bar && foo | bar",
|
117
|
+
# "foo && bar ; word || baz ; yep | grep -v foo",
|
118
|
+
# "( foo )",
|
119
|
+
# "( foo a b && bar c d )",
|
120
|
+
# "( foo a b && (bar c d | baz e f))",
|
121
|
+
# "((((foo))))",
|
122
|
+
# "foo -b -c ; (this ;that ;the; other ;thing) && yep",
|
123
|
+
# "foo -b -c ; (this ;that && other ;thing) && yep",
|
124
|
+
# "4 + 5",
|
125
|
+
# "!'hello' ; 4 - 4 && 10 + 3",
|
126
|
+
# "\\foo <<-EOT\nbar\nEOT",
|
127
|
+
# "ls | grep md | grep WISH",
|
128
|
+
# "(!upcase)",
|
129
|
+
# "echo foo > bar.txt",
|
130
|
+
# "ls -l > a.txt ; echo f 2> b.txt ; cat b &> c.txt ; du -sh 1>&2 1>hey.txt",
|
131
|
+
# "!Dir.chdir('..')",
|
132
|
+
# "FOO=123",
|
133
|
+
# "FOO=123 BAR=345",
|
134
|
+
# "FOO=abc bar=2314 car=14ab ls -l",
|
135
|
+
"FOO=abc BAR='hello world' ls -l ; CAR=f echo foo && say hi"
|
136
|
+
].each do |src|
|
137
|
+
puts 'parsing:'
|
138
|
+
print src
|
139
|
+
puts
|
140
|
+
puts 'result:'
|
141
|
+
require 'pp'
|
142
|
+
ast = Yap::Shell::Parser.new.parse(src)
|
143
|
+
pp ast
|
144
|
+
end
|
145
|
+
|
146
|
+
|
147
|
+
# puts "---- Evaluating"
|
148
|
+
# require 'pry'
|
149
|
+
# binding.pry
|
150
|
+
# Evaluator.new.evaltree(ast)
|
151
|
+
end
|
@@ -0,0 +1,311 @@
|
|
1
|
+
require 'ostruct'
|
2
|
+
|
3
|
+
module Yap::Shell
|
4
|
+
class Parser::Lexer
|
5
|
+
class Token
|
6
|
+
include Comparable
|
7
|
+
|
8
|
+
attr_reader :tag, :value, :lineno, :attrs
|
9
|
+
|
10
|
+
def initialize(tag, value, lineno:,attrs:{})
|
11
|
+
@tag = tag
|
12
|
+
@value = value
|
13
|
+
@lineno = lineno
|
14
|
+
@attrs = attrs
|
15
|
+
end
|
16
|
+
|
17
|
+
def <=>(other)
|
18
|
+
return -1 if self.class != other.class
|
19
|
+
return 0 if [tag, value, lineno, attrs] == [other.tag, other.value, other.lineno, other.attrs]
|
20
|
+
-1
|
21
|
+
end
|
22
|
+
|
23
|
+
def inspect
|
24
|
+
"#{tag.inspect} '#{value}' #{attrs.inspect}"
|
25
|
+
end
|
26
|
+
|
27
|
+
def to_s
|
28
|
+
"Token(#{tag.inspect} #{value.inspect} on #{lineno} with #{attrs.inspect})"
|
29
|
+
end
|
30
|
+
|
31
|
+
def length
|
32
|
+
to_s.length
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
ARG = /[^\s;\|\(\)\{\}\[\]\&\!\\\<][^\s;\|\(\)\{\}\[\]\&\!\>\<]*/
|
37
|
+
COMMAND = /\A(#{ARG})/
|
38
|
+
LITERAL_COMMAND = /\A\\(#{ARG})/
|
39
|
+
WHITESPACE = /\A[^\n\S]+/
|
40
|
+
ARGUMENT = /\A(#{ARG}+)/
|
41
|
+
LH_ASSIGNMENT = /\A(([A-z_][\w]*)=)/
|
42
|
+
RH_VALUE = /\A(\S+)/
|
43
|
+
STATEMENT_TERMINATOR = /\A(;)/
|
44
|
+
PIPE_TERMINATOR = /\A(\|)/
|
45
|
+
CONDITIONAL_TERMINATOR = /\A(&&|\|\|)/
|
46
|
+
HEREDOC = /\A<<-?([A-z0-9]+)\s*^(.*)?(^\s*\1\s*$)/m
|
47
|
+
INTERNAL_EVAL = /\A(?:(\!)|([0-9]+))/
|
48
|
+
SUBGROUP = /\A(\(|\))/
|
49
|
+
REDIRECTION = /\A(([12]?>&?[12]?)\s*(?![12]>)(#{ARG})?)/
|
50
|
+
REDIRECTION2 = /\A((&>|<)\s*(#{ARG}))/
|
51
|
+
|
52
|
+
def tokenize(str)
|
53
|
+
@str = str
|
54
|
+
@tokens = []
|
55
|
+
@lineno = 0
|
56
|
+
@looking_for_args = false
|
57
|
+
|
58
|
+
max = 100
|
59
|
+
count = 0
|
60
|
+
@current_position = 0
|
61
|
+
process_next_chunk = -> { @chunk = str.slice(@current_position..-1) ; @chunk != "" }
|
62
|
+
|
63
|
+
while process_next_chunk.call
|
64
|
+
result = subgroup_token ||
|
65
|
+
assignment_token ||
|
66
|
+
literal_command_token ||
|
67
|
+
command_token ||
|
68
|
+
whitespace_token ||
|
69
|
+
terminator_token ||
|
70
|
+
redirection_token ||
|
71
|
+
heredoc_token ||
|
72
|
+
string_argument_token ||
|
73
|
+
argument_token ||
|
74
|
+
internal_eval_token
|
75
|
+
|
76
|
+
count += 1
|
77
|
+
raise "Infinite loop detected on #{@chunk.inspect}" if count == max
|
78
|
+
|
79
|
+
@current_position += result.to_i
|
80
|
+
end
|
81
|
+
|
82
|
+
@tokens
|
83
|
+
end
|
84
|
+
|
85
|
+
private
|
86
|
+
|
87
|
+
def token(tag, value, attrs:{})
|
88
|
+
@tokens.push [tag, Token.new(tag, value, lineno:@lineno, attrs:attrs)]
|
89
|
+
end
|
90
|
+
|
91
|
+
def command_token
|
92
|
+
if !@looking_for_args && md=@chunk.match(COMMAND)
|
93
|
+
@looking_for_args = true
|
94
|
+
token :Command, md[1]
|
95
|
+
md[0].length
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
def literal_command_token
|
100
|
+
if !@looking_for_args && md=@chunk.match(LITERAL_COMMAND)
|
101
|
+
@looking_for_args = true
|
102
|
+
token :LiteralCommand, md[1]
|
103
|
+
md[0].length
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
def numeric_expr_token
|
108
|
+
if !@looking_for_args && md=@chunk.match(NUMERIC_EXPR)
|
109
|
+
@looking_for_args = true
|
110
|
+
token :NumericExpr, md[1]
|
111
|
+
md[0].length
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
def heredoc_token
|
116
|
+
if md=@chunk.match(HEREDOC)
|
117
|
+
token :Heredoc, md[2]
|
118
|
+
md[0].length
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
def internal_eval_token
|
123
|
+
if md=@chunk.match(INTERNAL_EVAL)
|
124
|
+
consumed = 0
|
125
|
+
substr = if md[1] # begins with !
|
126
|
+
consumed = md[1].length
|
127
|
+
@chunk[consumed..-1]
|
128
|
+
elsif md[2] # begins with a number
|
129
|
+
@chunk[consumed..-1]
|
130
|
+
end
|
131
|
+
result = process_internal_eval substr, consumed: consumed
|
132
|
+
token :InternalEval, result.str
|
133
|
+
return result.consumed_length
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
def redirection_token
|
138
|
+
if md=@chunk.match(REDIRECTION)
|
139
|
+
target = nil
|
140
|
+
target = md[3] if md[3] && md[3].length > 0
|
141
|
+
token :Redirection, md[2], attrs: { target: target }
|
142
|
+
md[0].length
|
143
|
+
elsif md=@chunk.match(REDIRECTION2)
|
144
|
+
token :Redirection, md[2], attrs: { target: md[3] }
|
145
|
+
md[0].length
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
def subgroup_token
|
150
|
+
if md=@chunk.match(SUBGROUP)
|
151
|
+
token md[0], md[0]
|
152
|
+
return md[0].length
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
# Matches and consumes non-meaningful whitespace.
|
157
|
+
def whitespace_token
|
158
|
+
return nil unless md=WHITESPACE.match(@chunk)
|
159
|
+
input = md.to_a[0]
|
160
|
+
input.length
|
161
|
+
end
|
162
|
+
|
163
|
+
def argument_token
|
164
|
+
if @looking_for_args && md=@chunk.match(ARGUMENT)
|
165
|
+
str = ''
|
166
|
+
i = 0
|
167
|
+
loop do
|
168
|
+
ch = @chunk[i]
|
169
|
+
if %w(' ").include?(ch)
|
170
|
+
result = process_string @chunk[i..-1], ch
|
171
|
+
str << result.str
|
172
|
+
i += result.consumed_length
|
173
|
+
|
174
|
+
elsif ch !~ ARGUMENT
|
175
|
+
break
|
176
|
+
else
|
177
|
+
str << ch
|
178
|
+
i += 1
|
179
|
+
end
|
180
|
+
|
181
|
+
break if i >= @chunk.length
|
182
|
+
end
|
183
|
+
|
184
|
+
token :Argument, str
|
185
|
+
i
|
186
|
+
end
|
187
|
+
end
|
188
|
+
|
189
|
+
def assignment_token
|
190
|
+
if !@looking_for_args && md=@chunk.match(LH_ASSIGNMENT)
|
191
|
+
token :LValue, md[2]
|
192
|
+
consumed_length = md[1].length
|
193
|
+
i = consumed_length
|
194
|
+
|
195
|
+
@chunk = @chunk[i..-1]
|
196
|
+
if %w(' ").include?(@chunk[0])
|
197
|
+
result = process_string @chunk[0..-1], @chunk[0]
|
198
|
+
token :RValue, result.str
|
199
|
+
consumed_length += result.consumed_length
|
200
|
+
elsif md=@chunk.match(RH_VALUE)
|
201
|
+
token :RValue, md[1]
|
202
|
+
consumed_length += md[0].length
|
203
|
+
end
|
204
|
+
consumed_length
|
205
|
+
end
|
206
|
+
end
|
207
|
+
|
208
|
+
def terminator_token
|
209
|
+
if md=@chunk.match(CONDITIONAL_TERMINATOR)
|
210
|
+
@looking_for_args = false
|
211
|
+
token :Conditional, md[0]
|
212
|
+
md[0].length
|
213
|
+
elsif md=@chunk.match(STATEMENT_TERMINATOR)
|
214
|
+
@looking_for_args = false
|
215
|
+
token :Separator, md[0]
|
216
|
+
md[0].length
|
217
|
+
elsif md=@chunk.match(PIPE_TERMINATOR)
|
218
|
+
@looking_for_args = false
|
219
|
+
token :Pipe, md[0]
|
220
|
+
md[0].length
|
221
|
+
end
|
222
|
+
end
|
223
|
+
|
224
|
+
# Matches single and double quoted strings
|
225
|
+
def string_argument_token
|
226
|
+
if %w(' ").include?(@chunk[0])
|
227
|
+
result = process_string @chunk[0..-1], @chunk[0]
|
228
|
+
token :Argument, result.str
|
229
|
+
return result.consumed_length
|
230
|
+
end
|
231
|
+
end
|
232
|
+
|
233
|
+
def process_internal_eval(input_str, consumed:0)
|
234
|
+
scope = []
|
235
|
+
words = []
|
236
|
+
str = ''
|
237
|
+
|
238
|
+
i = 0
|
239
|
+
loop do
|
240
|
+
ch = input_str[i]
|
241
|
+
popped = false
|
242
|
+
|
243
|
+
if scope.empty? && md=input_str[i..-1].match(/\A(;|\||&&|\))/)
|
244
|
+
return OpenStruct.new(str:str.strip, consumed_length:i+consumed)
|
245
|
+
|
246
|
+
elsif (i == input_str.length)
|
247
|
+
return OpenStruct.new(str:str.strip, consumed_length:i+consumed)
|
248
|
+
|
249
|
+
else
|
250
|
+
if scope.last == ch
|
251
|
+
scope.pop
|
252
|
+
popped = true
|
253
|
+
end
|
254
|
+
|
255
|
+
if !popped
|
256
|
+
if %w(' ").include?(ch)
|
257
|
+
scope << ch
|
258
|
+
elsif ch == "{"
|
259
|
+
scope << "}"
|
260
|
+
elsif ch == "["
|
261
|
+
scope << "]"
|
262
|
+
elsif ch == "("
|
263
|
+
scope << ")"
|
264
|
+
end
|
265
|
+
end
|
266
|
+
str << ch
|
267
|
+
end
|
268
|
+
i += 1
|
269
|
+
end
|
270
|
+
end
|
271
|
+
|
272
|
+
def process_string(input_str, delimiter, indent=0)
|
273
|
+
return input_str if input_str.length == 0
|
274
|
+
nested_delimiter = "\\#{delimiter}"
|
275
|
+
|
276
|
+
i = delimiter.length # start string matching after our delimiter
|
277
|
+
result_str = ''
|
278
|
+
|
279
|
+
loop do
|
280
|
+
chunk = input_str[i..-1]
|
281
|
+
|
282
|
+
puts "#{' '*indent}I: #{i}" if ENV["DEBUG"]
|
283
|
+
|
284
|
+
if i >= input_str.length
|
285
|
+
puts "#{' '*indent}C-yah: result:#{result_str.inspect} length: #{input_str.length}" if ENV["DEBUG"]
|
286
|
+
return OpenStruct.new(str:result_str, consumed_length: input_str.length)
|
287
|
+
end
|
288
|
+
|
289
|
+
if chunk.start_with?(nested_delimiter) # we found a nested escaped string
|
290
|
+
puts "#{' '*indent}A-pre: chunk:#{chunk.inspect} nested_delimiter:#{nested_delimiter.inspect}" if ENV["DEBUG"]
|
291
|
+
result = process_string(chunk[0..-1], nested_delimiter, indent+2)
|
292
|
+
result_str << [delimiter, result.str, delimiter].join
|
293
|
+
puts "#{' '*indent}A-pos: result:#{result.inspect} result_str:#{result_str.inspect} #{nested_delimiter.length} + #{result.consumed_length} + #{nested_delimiter.length}" if ENV["DEBUG"]
|
294
|
+
|
295
|
+
i += result.consumed_length
|
296
|
+
|
297
|
+
elsif chunk.start_with?(delimiter) # we found the end of our current nested escaped string
|
298
|
+
puts "#{' '*indent}B-yah: result:#{result_str.inspect} length: #{i}" if ENV["DEBUG"]
|
299
|
+
return OpenStruct.new(str:result_str, consumed_length: i+delimiter.length)
|
300
|
+
|
301
|
+
else
|
302
|
+
char = input_str[i]
|
303
|
+
result_str << char
|
304
|
+
puts "#{' '*indent}D-yah: i:#{i} char: #{char} result_str:#{result_str.inspect}" if ENV["DEBUG"]
|
305
|
+
i += 1
|
306
|
+
end
|
307
|
+
end
|
308
|
+
end
|
309
|
+
|
310
|
+
end
|
311
|
+
end
|