yap-shell-parser 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,151 @@
1
+ # $Id$
2
+ #
3
+ # convert Array-like string into Ruby's Array.
4
+
5
+ class Yap::Shell::Parser
6
+ token Command LiteralCommand Argument Heredoc InternalEval Separator Conditional Pipe Redirection LValue RValue
7
+ #
8
+ # prechigh
9
+ # # left '**' '*' '/' '%'
10
+ # # left '+' '-'
11
+ # # left '&&' '||'
12
+ # # left '|' '^' '&'
13
+ # # # right Not
14
+ # left Separator
15
+ # left Conditional
16
+ # right Pipe
17
+ # preclow
18
+
19
+ rule
20
+
21
+ program : stmts
22
+
23
+ stmts : stmts Separator stmt
24
+ { result = StatementsNode.new(val[0], val[2]) }
25
+ | stmt
26
+ { result = StatementsNode.new(val[0]) }
27
+
28
+ stmt : stmt Conditional pipeline
29
+ { result = ConditionalNode.new(val[1].value, val[0], val[2]) }
30
+ | pipeline
31
+
32
+ pipeline : pipeline Pipe stmts2
33
+ { result = PipelineNode.new(val[0], val[2]) }
34
+ | stmts2
35
+
36
+ stmts2 : '(' stmts ')'
37
+ { result = val[1] }
38
+ | command_w_heredoc
39
+ | internal_eval
40
+
41
+ command_w_heredoc : command_w_redirects Heredoc
42
+ { val[0].heredoc = val[1] ; result = val[0] }
43
+ | command_w_redirects
44
+
45
+ command_w_redirects : command_w_redirects Redirection
46
+ { val[0].redirects << RedirectionNode.new(val[1].value, val[1].attrs[:target]) ; result = val[0] }
47
+ | command_w_vars
48
+ | command
49
+ | vars
50
+
51
+ command_w_vars : vars command
52
+ { result = EnvWrapperNode.new(val[0], val[1]) }
53
+
54
+ vars : vars LValue RValue
55
+ { val[0].add_var(val[1].value, val[2].value) ; result = val[0] }
56
+ | LValue RValue
57
+ { result = EnvNode.new(val[0].value, val[1].value) }
58
+
59
+ command : command2
60
+
61
+ command2: Command
62
+ { result = CommandNode.new(val[0].value) }
63
+ | Command args
64
+ { result = CommandNode.new(val[0].value, val[1].flatten) }
65
+ | LiteralCommand
66
+ { result = CommandNode.new(val[0].value, literal:true) }
67
+ | LiteralCommand args
68
+ { result = CommandNode.new(val[0].value, val[1].flatten, literal:true) }
69
+
70
+ args : Argument
71
+ { result = [val[0].value] }
72
+ | args Argument
73
+ { result = [val[0], val[1].value] }
74
+
75
+ internal_eval : InternalEval
76
+ { result = InternalEvalNode.new(val[0].value) }
77
+
78
+
79
+ ---- inner
80
+ $LOAD_PATH.unshift File.dirname(__FILE__) + "/../../"
81
+ require 'yap/shell/parser/lexer'
82
+ require 'yap/shell/parser/nodes'
83
+
84
+ include Yap::Shell::Parser::Nodes
85
+
86
+ def parse(str)
87
+ # @yydebug = true
88
+
89
+ @q = Yap::Shell::Parser::Lexer.new.tokenize(str)
90
+ # @q.push [false, '$'] # is optional from Racc 1.3.7
91
+ # puts @q.inspect
92
+ # puts "---- parse tree follows ----"
93
+ __send__(Racc_Main_Parsing_Routine, _racc_setup(), false)
94
+ #do_parse
95
+ end
96
+
97
+ def next_token
98
+ @q.shift
99
+ end
100
+
101
+ ---- footer
102
+
103
+ if $0 == __FILE__
104
+ $LOAD_PATH.unshift File.dirname(__FILE__) + "/../../"
105
+ require 'yap/shell/parser/lexer'
106
+ require 'yap/shell/parser/nodes'
107
+ [
108
+ # "echo foo",
109
+ # "echo foo ; echo bar baz yep",
110
+ # "echo foo && echo bar baz yep",
111
+ # "echo foo && echo bar && ls foo && ls bar",
112
+ # "echo foo ; echo bar baz yep ; ls foo",
113
+ # "echo foo && echo bar ; ls baz",
114
+ # "echo foo && echo bar ; ls baz ; echo zach || echo gretchen",
115
+ # "echo foo | bar",
116
+ # "echo foo | bar && foo | bar",
117
+ # "foo && bar ; word || baz ; yep | grep -v foo",
118
+ # "( foo )",
119
+ # "( foo a b && bar c d )",
120
+ # "( foo a b && (bar c d | baz e f))",
121
+ # "((((foo))))",
122
+ # "foo -b -c ; (this ;that ;the; other ;thing) && yep",
123
+ # "foo -b -c ; (this ;that && other ;thing) && yep",
124
+ # "4 + 5",
125
+ # "!'hello' ; 4 - 4 && 10 + 3",
126
+ # "\\foo <<-EOT\nbar\nEOT",
127
+ # "ls | grep md | grep WISH",
128
+ # "(!upcase)",
129
+ # "echo foo > bar.txt",
130
+ # "ls -l > a.txt ; echo f 2> b.txt ; cat b &> c.txt ; du -sh 1>&2 1>hey.txt",
131
+ # "!Dir.chdir('..')",
132
+ # "FOO=123",
133
+ # "FOO=123 BAR=345",
134
+ # "FOO=abc bar=2314 car=14ab ls -l",
135
+ "FOO=abc BAR='hello world' ls -l ; CAR=f echo foo && say hi"
136
+ ].each do |src|
137
+ puts 'parsing:'
138
+ print src
139
+ puts
140
+ puts 'result:'
141
+ require 'pp'
142
+ ast = Yap::Shell::Parser.new.parse(src)
143
+ pp ast
144
+ end
145
+
146
+
147
+ # puts "---- Evaluating"
148
+ # require 'pry'
149
+ # binding.pry
150
+ # Evaluator.new.evaltree(ast)
151
+ end
@@ -0,0 +1,311 @@
1
+ require 'ostruct'
2
+
3
+ module Yap::Shell
4
+ class Parser::Lexer
5
+ class Token
6
+ include Comparable
7
+
8
+ attr_reader :tag, :value, :lineno, :attrs
9
+
10
+ def initialize(tag, value, lineno:,attrs:{})
11
+ @tag = tag
12
+ @value = value
13
+ @lineno = lineno
14
+ @attrs = attrs
15
+ end
16
+
17
+ def <=>(other)
18
+ return -1 if self.class != other.class
19
+ return 0 if [tag, value, lineno, attrs] == [other.tag, other.value, other.lineno, other.attrs]
20
+ -1
21
+ end
22
+
23
+ def inspect
24
+ "#{tag.inspect} '#{value}' #{attrs.inspect}"
25
+ end
26
+
27
+ def to_s
28
+ "Token(#{tag.inspect} #{value.inspect} on #{lineno} with #{attrs.inspect})"
29
+ end
30
+
31
+ def length
32
+ to_s.length
33
+ end
34
+ end
35
+
36
+ ARG = /[^\s;\|\(\)\{\}\[\]\&\!\\\<][^\s;\|\(\)\{\}\[\]\&\!\>\<]*/
37
+ COMMAND = /\A(#{ARG})/
38
+ LITERAL_COMMAND = /\A\\(#{ARG})/
39
+ WHITESPACE = /\A[^\n\S]+/
40
+ ARGUMENT = /\A(#{ARG}+)/
41
+ LH_ASSIGNMENT = /\A(([A-z_][\w]*)=)/
42
+ RH_VALUE = /\A(\S+)/
43
+ STATEMENT_TERMINATOR = /\A(;)/
44
+ PIPE_TERMINATOR = /\A(\|)/
45
+ CONDITIONAL_TERMINATOR = /\A(&&|\|\|)/
46
+ HEREDOC = /\A<<-?([A-z0-9]+)\s*^(.*)?(^\s*\1\s*$)/m
47
+ INTERNAL_EVAL = /\A(?:(\!)|([0-9]+))/
48
+ SUBGROUP = /\A(\(|\))/
49
+ REDIRECTION = /\A(([12]?>&?[12]?)\s*(?![12]>)(#{ARG})?)/
50
+ REDIRECTION2 = /\A((&>|<)\s*(#{ARG}))/
51
+
52
+ def tokenize(str)
53
+ @str = str
54
+ @tokens = []
55
+ @lineno = 0
56
+ @looking_for_args = false
57
+
58
+ max = 100
59
+ count = 0
60
+ @current_position = 0
61
+ process_next_chunk = -> { @chunk = str.slice(@current_position..-1) ; @chunk != "" }
62
+
63
+ while process_next_chunk.call
64
+ result = subgroup_token ||
65
+ assignment_token ||
66
+ literal_command_token ||
67
+ command_token ||
68
+ whitespace_token ||
69
+ terminator_token ||
70
+ redirection_token ||
71
+ heredoc_token ||
72
+ string_argument_token ||
73
+ argument_token ||
74
+ internal_eval_token
75
+
76
+ count += 1
77
+ raise "Infinite loop detected on #{@chunk.inspect}" if count == max
78
+
79
+ @current_position += result.to_i
80
+ end
81
+
82
+ @tokens
83
+ end
84
+
85
+ private
86
+
87
+ def token(tag, value, attrs:{})
88
+ @tokens.push [tag, Token.new(tag, value, lineno:@lineno, attrs:attrs)]
89
+ end
90
+
91
+ def command_token
92
+ if !@looking_for_args && md=@chunk.match(COMMAND)
93
+ @looking_for_args = true
94
+ token :Command, md[1]
95
+ md[0].length
96
+ end
97
+ end
98
+
99
+ def literal_command_token
100
+ if !@looking_for_args && md=@chunk.match(LITERAL_COMMAND)
101
+ @looking_for_args = true
102
+ token :LiteralCommand, md[1]
103
+ md[0].length
104
+ end
105
+ end
106
+
107
+ def numeric_expr_token
108
+ if !@looking_for_args && md=@chunk.match(NUMERIC_EXPR)
109
+ @looking_for_args = true
110
+ token :NumericExpr, md[1]
111
+ md[0].length
112
+ end
113
+ end
114
+
115
+ def heredoc_token
116
+ if md=@chunk.match(HEREDOC)
117
+ token :Heredoc, md[2]
118
+ md[0].length
119
+ end
120
+ end
121
+
122
+ def internal_eval_token
123
+ if md=@chunk.match(INTERNAL_EVAL)
124
+ consumed = 0
125
+ substr = if md[1] # begins with !
126
+ consumed = md[1].length
127
+ @chunk[consumed..-1]
128
+ elsif md[2] # begins with a number
129
+ @chunk[consumed..-1]
130
+ end
131
+ result = process_internal_eval substr, consumed: consumed
132
+ token :InternalEval, result.str
133
+ return result.consumed_length
134
+ end
135
+ end
136
+
137
+ def redirection_token
138
+ if md=@chunk.match(REDIRECTION)
139
+ target = nil
140
+ target = md[3] if md[3] && md[3].length > 0
141
+ token :Redirection, md[2], attrs: { target: target }
142
+ md[0].length
143
+ elsif md=@chunk.match(REDIRECTION2)
144
+ token :Redirection, md[2], attrs: { target: md[3] }
145
+ md[0].length
146
+ end
147
+ end
148
+
149
+ def subgroup_token
150
+ if md=@chunk.match(SUBGROUP)
151
+ token md[0], md[0]
152
+ return md[0].length
153
+ end
154
+ end
155
+
156
+ # Matches and consumes non-meaningful whitespace.
157
+ def whitespace_token
158
+ return nil unless md=WHITESPACE.match(@chunk)
159
+ input = md.to_a[0]
160
+ input.length
161
+ end
162
+
163
+ def argument_token
164
+ if @looking_for_args && md=@chunk.match(ARGUMENT)
165
+ str = ''
166
+ i = 0
167
+ loop do
168
+ ch = @chunk[i]
169
+ if %w(' ").include?(ch)
170
+ result = process_string @chunk[i..-1], ch
171
+ str << result.str
172
+ i += result.consumed_length
173
+
174
+ elsif ch !~ ARGUMENT
175
+ break
176
+ else
177
+ str << ch
178
+ i += 1
179
+ end
180
+
181
+ break if i >= @chunk.length
182
+ end
183
+
184
+ token :Argument, str
185
+ i
186
+ end
187
+ end
188
+
189
+ def assignment_token
190
+ if !@looking_for_args && md=@chunk.match(LH_ASSIGNMENT)
191
+ token :LValue, md[2]
192
+ consumed_length = md[1].length
193
+ i = consumed_length
194
+
195
+ @chunk = @chunk[i..-1]
196
+ if %w(' ").include?(@chunk[0])
197
+ result = process_string @chunk[0..-1], @chunk[0]
198
+ token :RValue, result.str
199
+ consumed_length += result.consumed_length
200
+ elsif md=@chunk.match(RH_VALUE)
201
+ token :RValue, md[1]
202
+ consumed_length += md[0].length
203
+ end
204
+ consumed_length
205
+ end
206
+ end
207
+
208
+ def terminator_token
209
+ if md=@chunk.match(CONDITIONAL_TERMINATOR)
210
+ @looking_for_args = false
211
+ token :Conditional, md[0]
212
+ md[0].length
213
+ elsif md=@chunk.match(STATEMENT_TERMINATOR)
214
+ @looking_for_args = false
215
+ token :Separator, md[0]
216
+ md[0].length
217
+ elsif md=@chunk.match(PIPE_TERMINATOR)
218
+ @looking_for_args = false
219
+ token :Pipe, md[0]
220
+ md[0].length
221
+ end
222
+ end
223
+
224
+ # Matches single and double quoted strings
225
+ def string_argument_token
226
+ if %w(' ").include?(@chunk[0])
227
+ result = process_string @chunk[0..-1], @chunk[0]
228
+ token :Argument, result.str
229
+ return result.consumed_length
230
+ end
231
+ end
232
+
233
+ def process_internal_eval(input_str, consumed:0)
234
+ scope = []
235
+ words = []
236
+ str = ''
237
+
238
+ i = 0
239
+ loop do
240
+ ch = input_str[i]
241
+ popped = false
242
+
243
+ if scope.empty? && md=input_str[i..-1].match(/\A(;|\||&&|\))/)
244
+ return OpenStruct.new(str:str.strip, consumed_length:i+consumed)
245
+
246
+ elsif (i == input_str.length)
247
+ return OpenStruct.new(str:str.strip, consumed_length:i+consumed)
248
+
249
+ else
250
+ if scope.last == ch
251
+ scope.pop
252
+ popped = true
253
+ end
254
+
255
+ if !popped
256
+ if %w(' ").include?(ch)
257
+ scope << ch
258
+ elsif ch == "{"
259
+ scope << "}"
260
+ elsif ch == "["
261
+ scope << "]"
262
+ elsif ch == "("
263
+ scope << ")"
264
+ end
265
+ end
266
+ str << ch
267
+ end
268
+ i += 1
269
+ end
270
+ end
271
+
272
+ def process_string(input_str, delimiter, indent=0)
273
+ return input_str if input_str.length == 0
274
+ nested_delimiter = "\\#{delimiter}"
275
+
276
+ i = delimiter.length # start string matching after our delimiter
277
+ result_str = ''
278
+
279
+ loop do
280
+ chunk = input_str[i..-1]
281
+
282
+ puts "#{' '*indent}I: #{i}" if ENV["DEBUG"]
283
+
284
+ if i >= input_str.length
285
+ puts "#{' '*indent}C-yah: result:#{result_str.inspect} length: #{input_str.length}" if ENV["DEBUG"]
286
+ return OpenStruct.new(str:result_str, consumed_length: input_str.length)
287
+ end
288
+
289
+ if chunk.start_with?(nested_delimiter) # we found a nested escaped string
290
+ puts "#{' '*indent}A-pre: chunk:#{chunk.inspect} nested_delimiter:#{nested_delimiter.inspect}" if ENV["DEBUG"]
291
+ result = process_string(chunk[0..-1], nested_delimiter, indent+2)
292
+ result_str << [delimiter, result.str, delimiter].join
293
+ puts "#{' '*indent}A-pos: result:#{result.inspect} result_str:#{result_str.inspect} #{nested_delimiter.length} + #{result.consumed_length} + #{nested_delimiter.length}" if ENV["DEBUG"]
294
+
295
+ i += result.consumed_length
296
+
297
+ elsif chunk.start_with?(delimiter) # we found the end of our current nested escaped string
298
+ puts "#{' '*indent}B-yah: result:#{result_str.inspect} length: #{i}" if ENV["DEBUG"]
299
+ return OpenStruct.new(str:result_str, consumed_length: i+delimiter.length)
300
+
301
+ else
302
+ char = input_str[i]
303
+ result_str << char
304
+ puts "#{' '*indent}D-yah: i:#{i} char: #{char} result_str:#{result_str.inspect}" if ENV["DEBUG"]
305
+ i += 1
306
+ end
307
+ end
308
+ end
309
+
310
+ end
311
+ end