yap-shell-parser 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,151 @@
1
+ # $Id$
2
+ #
3
+ # convert Array-like string into Ruby's Array.
4
+
5
+ class Yap::Shell::Parser
6
+ token Command LiteralCommand Argument Heredoc InternalEval Separator Conditional Pipe Redirection LValue RValue
7
+ #
8
+ # prechigh
9
+ # # left '**' '*' '/' '%'
10
+ # # left '+' '-'
11
+ # # left '&&' '||'
12
+ # # left '|' '^' '&'
13
+ # # # right Not
14
+ # left Separator
15
+ # left Conditional
16
+ # right Pipe
17
+ # preclow
18
+
19
+ rule
20
+
21
+ program : stmts
22
+
23
+ stmts : stmts Separator stmt
24
+ { result = StatementsNode.new(val[0], val[2]) }
25
+ | stmt
26
+ { result = StatementsNode.new(val[0]) }
27
+
28
+ stmt : stmt Conditional pipeline
29
+ { result = ConditionalNode.new(val[1].value, val[0], val[2]) }
30
+ | pipeline
31
+
32
+ pipeline : pipeline Pipe stmts2
33
+ { result = PipelineNode.new(val[0], val[2]) }
34
+ | stmts2
35
+
36
+ stmts2 : '(' stmts ')'
37
+ { result = val[1] }
38
+ | command_w_heredoc
39
+ | internal_eval
40
+
41
+ command_w_heredoc : command_w_redirects Heredoc
42
+ { val[0].heredoc = val[1] ; result = val[0] }
43
+ | command_w_redirects
44
+
45
+ command_w_redirects : command_w_redirects Redirection
46
+ { val[0].redirects << RedirectionNode.new(val[1].value, val[1].attrs[:target]) ; result = val[0] }
47
+ | command_w_vars
48
+ | command
49
+ | vars
50
+
51
+ command_w_vars : vars command
52
+ { result = EnvWrapperNode.new(val[0], val[1]) }
53
+
54
+ vars : vars LValue RValue
55
+ { val[0].add_var(val[1].value, val[2].value) ; result = val[0] }
56
+ | LValue RValue
57
+ { result = EnvNode.new(val[0].value, val[1].value) }
58
+
59
+ command : command2
60
+
61
+ command2: Command
62
+ { result = CommandNode.new(val[0].value) }
63
+ | Command args
64
+ { result = CommandNode.new(val[0].value, val[1].flatten) }
65
+ | LiteralCommand
66
+ { result = CommandNode.new(val[0].value, literal:true) }
67
+ | LiteralCommand args
68
+ { result = CommandNode.new(val[0].value, val[1].flatten, literal:true) }
69
+
70
+ args : Argument
71
+ { result = [val[0].value] }
72
+ | args Argument
73
+ { result = [val[0], val[1].value] }
74
+
75
+ internal_eval : InternalEval
76
+ { result = InternalEvalNode.new(val[0].value) }
77
+
78
+
79
+ ---- inner
80
+ $LOAD_PATH.unshift File.dirname(__FILE__) + "/../../"
81
+ require 'yap/shell/parser/lexer'
82
+ require 'yap/shell/parser/nodes'
83
+
84
+ include Yap::Shell::Parser::Nodes
85
+
86
+ def parse(str)
87
+ # @yydebug = true
88
+
89
+ @q = Yap::Shell::Parser::Lexer.new.tokenize(str)
90
+ # @q.push [false, '$'] # is optional from Racc 1.3.7
91
+ # puts @q.inspect
92
+ # puts "---- parse tree follows ----"
93
+ __send__(Racc_Main_Parsing_Routine, _racc_setup(), false)
94
+ #do_parse
95
+ end
96
+
97
+ def next_token
98
+ @q.shift
99
+ end
100
+
101
+ ---- footer
102
+
103
+ if $0 == __FILE__
104
+ $LOAD_PATH.unshift File.dirname(__FILE__) + "/../../"
105
+ require 'yap/shell/parser/lexer'
106
+ require 'yap/shell/parser/nodes'
107
+ [
108
+ # "echo foo",
109
+ # "echo foo ; echo bar baz yep",
110
+ # "echo foo && echo bar baz yep",
111
+ # "echo foo && echo bar && ls foo && ls bar",
112
+ # "echo foo ; echo bar baz yep ; ls foo",
113
+ # "echo foo && echo bar ; ls baz",
114
+ # "echo foo && echo bar ; ls baz ; echo zach || echo gretchen",
115
+ # "echo foo | bar",
116
+ # "echo foo | bar && foo | bar",
117
+ # "foo && bar ; word || baz ; yep | grep -v foo",
118
+ # "( foo )",
119
+ # "( foo a b && bar c d )",
120
+ # "( foo a b && (bar c d | baz e f))",
121
+ # "((((foo))))",
122
+ # "foo -b -c ; (this ;that ;the; other ;thing) && yep",
123
+ # "foo -b -c ; (this ;that && other ;thing) && yep",
124
+ # "4 + 5",
125
+ # "!'hello' ; 4 - 4 && 10 + 3",
126
+ # "\\foo <<-EOT\nbar\nEOT",
127
+ # "ls | grep md | grep WISH",
128
+ # "(!upcase)",
129
+ # "echo foo > bar.txt",
130
+ # "ls -l > a.txt ; echo f 2> b.txt ; cat b &> c.txt ; du -sh 1>&2 1>hey.txt",
131
+ # "!Dir.chdir('..')",
132
+ # "FOO=123",
133
+ # "FOO=123 BAR=345",
134
+ # "FOO=abc bar=2314 car=14ab ls -l",
135
+ "FOO=abc BAR='hello world' ls -l ; CAR=f echo foo && say hi"
136
+ ].each do |src|
137
+ puts 'parsing:'
138
+ print src
139
+ puts
140
+ puts 'result:'
141
+ require 'pp'
142
+ ast = Yap::Shell::Parser.new.parse(src)
143
+ pp ast
144
+ end
145
+
146
+
147
+ # puts "---- Evaluating"
148
+ # require 'pry'
149
+ # binding.pry
150
+ # Evaluator.new.evaltree(ast)
151
+ end
@@ -0,0 +1,311 @@
1
+ require 'ostruct'
2
+
3
+ module Yap::Shell
4
+ class Parser::Lexer
5
+ class Token
6
+ include Comparable
7
+
8
+ attr_reader :tag, :value, :lineno, :attrs
9
+
10
+ def initialize(tag, value, lineno:,attrs:{})
11
+ @tag = tag
12
+ @value = value
13
+ @lineno = lineno
14
+ @attrs = attrs
15
+ end
16
+
17
+ def <=>(other)
18
+ return -1 if self.class != other.class
19
+ return 0 if [tag, value, lineno, attrs] == [other.tag, other.value, other.lineno, other.attrs]
20
+ -1
21
+ end
22
+
23
+ def inspect
24
+ "#{tag.inspect} '#{value}' #{attrs.inspect}"
25
+ end
26
+
27
+ def to_s
28
+ "Token(#{tag.inspect} #{value.inspect} on #{lineno} with #{attrs.inspect})"
29
+ end
30
+
31
+ def length
32
+ to_s.length
33
+ end
34
+ end
35
+
36
+ ARG = /[^\s;\|\(\)\{\}\[\]\&\!\\\<][^\s;\|\(\)\{\}\[\]\&\!\>\<]*/
37
+ COMMAND = /\A(#{ARG})/
38
+ LITERAL_COMMAND = /\A\\(#{ARG})/
39
+ WHITESPACE = /\A[^\n\S]+/
40
+ ARGUMENT = /\A(#{ARG}+)/
41
+ LH_ASSIGNMENT = /\A(([A-z_][\w]*)=)/
42
+ RH_VALUE = /\A(\S+)/
43
+ STATEMENT_TERMINATOR = /\A(;)/
44
+ PIPE_TERMINATOR = /\A(\|)/
45
+ CONDITIONAL_TERMINATOR = /\A(&&|\|\|)/
46
+ HEREDOC = /\A<<-?([A-z0-9]+)\s*^(.*)?(^\s*\1\s*$)/m
47
+ INTERNAL_EVAL = /\A(?:(\!)|([0-9]+))/
48
+ SUBGROUP = /\A(\(|\))/
49
+ REDIRECTION = /\A(([12]?>&?[12]?)\s*(?![12]>)(#{ARG})?)/
50
+ REDIRECTION2 = /\A((&>|<)\s*(#{ARG}))/
51
+
52
+ def tokenize(str)
53
+ @str = str
54
+ @tokens = []
55
+ @lineno = 0
56
+ @looking_for_args = false
57
+
58
+ max = 100
59
+ count = 0
60
+ @current_position = 0
61
+ process_next_chunk = -> { @chunk = str.slice(@current_position..-1) ; @chunk != "" }
62
+
63
+ while process_next_chunk.call
64
+ result = subgroup_token ||
65
+ assignment_token ||
66
+ literal_command_token ||
67
+ command_token ||
68
+ whitespace_token ||
69
+ terminator_token ||
70
+ redirection_token ||
71
+ heredoc_token ||
72
+ string_argument_token ||
73
+ argument_token ||
74
+ internal_eval_token
75
+
76
+ count += 1
77
+ raise "Infinite loop detected on #{@chunk.inspect}" if count == max
78
+
79
+ @current_position += result.to_i
80
+ end
81
+
82
+ @tokens
83
+ end
84
+
85
+ private
86
+
87
+ def token(tag, value, attrs:{})
88
+ @tokens.push [tag, Token.new(tag, value, lineno:@lineno, attrs:attrs)]
89
+ end
90
+
91
+ def command_token
92
+ if !@looking_for_args && md=@chunk.match(COMMAND)
93
+ @looking_for_args = true
94
+ token :Command, md[1]
95
+ md[0].length
96
+ end
97
+ end
98
+
99
+ def literal_command_token
100
+ if !@looking_for_args && md=@chunk.match(LITERAL_COMMAND)
101
+ @looking_for_args = true
102
+ token :LiteralCommand, md[1]
103
+ md[0].length
104
+ end
105
+ end
106
+
107
+ def numeric_expr_token
108
+ if !@looking_for_args && md=@chunk.match(NUMERIC_EXPR)
109
+ @looking_for_args = true
110
+ token :NumericExpr, md[1]
111
+ md[0].length
112
+ end
113
+ end
114
+
115
+ def heredoc_token
116
+ if md=@chunk.match(HEREDOC)
117
+ token :Heredoc, md[2]
118
+ md[0].length
119
+ end
120
+ end
121
+
122
+ def internal_eval_token
123
+ if md=@chunk.match(INTERNAL_EVAL)
124
+ consumed = 0
125
+ substr = if md[1] # begins with !
126
+ consumed = md[1].length
127
+ @chunk[consumed..-1]
128
+ elsif md[2] # begins with a number
129
+ @chunk[consumed..-1]
130
+ end
131
+ result = process_internal_eval substr, consumed: consumed
132
+ token :InternalEval, result.str
133
+ return result.consumed_length
134
+ end
135
+ end
136
+
137
+ def redirection_token
138
+ if md=@chunk.match(REDIRECTION)
139
+ target = nil
140
+ target = md[3] if md[3] && md[3].length > 0
141
+ token :Redirection, md[2], attrs: { target: target }
142
+ md[0].length
143
+ elsif md=@chunk.match(REDIRECTION2)
144
+ token :Redirection, md[2], attrs: { target: md[3] }
145
+ md[0].length
146
+ end
147
+ end
148
+
149
+ def subgroup_token
150
+ if md=@chunk.match(SUBGROUP)
151
+ token md[0], md[0]
152
+ return md[0].length
153
+ end
154
+ end
155
+
156
+ # Matches and consumes non-meaningful whitespace.
157
+ def whitespace_token
158
+ return nil unless md=WHITESPACE.match(@chunk)
159
+ input = md.to_a[0]
160
+ input.length
161
+ end
162
+
163
+ def argument_token
164
+ if @looking_for_args && md=@chunk.match(ARGUMENT)
165
+ str = ''
166
+ i = 0
167
+ loop do
168
+ ch = @chunk[i]
169
+ if %w(' ").include?(ch)
170
+ result = process_string @chunk[i..-1], ch
171
+ str << result.str
172
+ i += result.consumed_length
173
+
174
+ elsif ch !~ ARGUMENT
175
+ break
176
+ else
177
+ str << ch
178
+ i += 1
179
+ end
180
+
181
+ break if i >= @chunk.length
182
+ end
183
+
184
+ token :Argument, str
185
+ i
186
+ end
187
+ end
188
+
189
+ def assignment_token
190
+ if !@looking_for_args && md=@chunk.match(LH_ASSIGNMENT)
191
+ token :LValue, md[2]
192
+ consumed_length = md[1].length
193
+ i = consumed_length
194
+
195
+ @chunk = @chunk[i..-1]
196
+ if %w(' ").include?(@chunk[0])
197
+ result = process_string @chunk[0..-1], @chunk[0]
198
+ token :RValue, result.str
199
+ consumed_length += result.consumed_length
200
+ elsif md=@chunk.match(RH_VALUE)
201
+ token :RValue, md[1]
202
+ consumed_length += md[0].length
203
+ end
204
+ consumed_length
205
+ end
206
+ end
207
+
208
+ def terminator_token
209
+ if md=@chunk.match(CONDITIONAL_TERMINATOR)
210
+ @looking_for_args = false
211
+ token :Conditional, md[0]
212
+ md[0].length
213
+ elsif md=@chunk.match(STATEMENT_TERMINATOR)
214
+ @looking_for_args = false
215
+ token :Separator, md[0]
216
+ md[0].length
217
+ elsif md=@chunk.match(PIPE_TERMINATOR)
218
+ @looking_for_args = false
219
+ token :Pipe, md[0]
220
+ md[0].length
221
+ end
222
+ end
223
+
224
+ # Matches single and double quoted strings
225
+ def string_argument_token
226
+ if %w(' ").include?(@chunk[0])
227
+ result = process_string @chunk[0..-1], @chunk[0]
228
+ token :Argument, result.str
229
+ return result.consumed_length
230
+ end
231
+ end
232
+
233
+ def process_internal_eval(input_str, consumed:0)
234
+ scope = []
235
+ words = []
236
+ str = ''
237
+
238
+ i = 0
239
+ loop do
240
+ ch = input_str[i]
241
+ popped = false
242
+
243
+ if scope.empty? && md=input_str[i..-1].match(/\A(;|\||&&|\))/)
244
+ return OpenStruct.new(str:str.strip, consumed_length:i+consumed)
245
+
246
+ elsif (i == input_str.length)
247
+ return OpenStruct.new(str:str.strip, consumed_length:i+consumed)
248
+
249
+ else
250
+ if scope.last == ch
251
+ scope.pop
252
+ popped = true
253
+ end
254
+
255
+ if !popped
256
+ if %w(' ").include?(ch)
257
+ scope << ch
258
+ elsif ch == "{"
259
+ scope << "}"
260
+ elsif ch == "["
261
+ scope << "]"
262
+ elsif ch == "("
263
+ scope << ")"
264
+ end
265
+ end
266
+ str << ch
267
+ end
268
+ i += 1
269
+ end
270
+ end
271
+
272
+ def process_string(input_str, delimiter, indent=0)
273
+ return input_str if input_str.length == 0
274
+ nested_delimiter = "\\#{delimiter}"
275
+
276
+ i = delimiter.length # start string matching after our delimiter
277
+ result_str = ''
278
+
279
+ loop do
280
+ chunk = input_str[i..-1]
281
+
282
+ puts "#{' '*indent}I: #{i}" if ENV["DEBUG"]
283
+
284
+ if i >= input_str.length
285
+ puts "#{' '*indent}C-yah: result:#{result_str.inspect} length: #{input_str.length}" if ENV["DEBUG"]
286
+ return OpenStruct.new(str:result_str, consumed_length: input_str.length)
287
+ end
288
+
289
+ if chunk.start_with?(nested_delimiter) # we found a nested escaped string
290
+ puts "#{' '*indent}A-pre: chunk:#{chunk.inspect} nested_delimiter:#{nested_delimiter.inspect}" if ENV["DEBUG"]
291
+ result = process_string(chunk[0..-1], nested_delimiter, indent+2)
292
+ result_str << [delimiter, result.str, delimiter].join
293
+ puts "#{' '*indent}A-pos: result:#{result.inspect} result_str:#{result_str.inspect} #{nested_delimiter.length} + #{result.consumed_length} + #{nested_delimiter.length}" if ENV["DEBUG"]
294
+
295
+ i += result.consumed_length
296
+
297
+ elsif chunk.start_with?(delimiter) # we found the end of our current nested escaped string
298
+ puts "#{' '*indent}B-yah: result:#{result_str.inspect} length: #{i}" if ENV["DEBUG"]
299
+ return OpenStruct.new(str:result_str, consumed_length: i+delimiter.length)
300
+
301
+ else
302
+ char = input_str[i]
303
+ result_str << char
304
+ puts "#{' '*indent}D-yah: i:#{i} char: #{char} result_str:#{result_str.inspect}" if ENV["DEBUG"]
305
+ i += 1
306
+ end
307
+ end
308
+ end
309
+
310
+ end
311
+ end