voodoo 0.7.0 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
data/lib/voodoo/parser.rb CHANGED
@@ -1,3 +1,5 @@
1
+ require 'voodoo/validator'
2
+
1
3
  module Voodoo
2
4
  # Voodoo parser.
3
5
  # The parser reads Voodoo[http://inglorion.net/documents/designs/voodoo/]
@@ -24,156 +26,118 @@ module Voodoo
24
26
  # the end of the input has been reached.
25
27
  def initialize input
26
28
  @input = input
27
- @line = 1
28
- @char = 0
29
+ @input_name = input.respond_to?(:path) ? input.path : nil
30
+ @start_line = @line = 1
31
+ @start_column = @column = 0
29
32
  @lookahead = nil
33
+ @text = ''
30
34
  end
31
35
 
32
- # Consumes the current lookahead character
33
- def consume
34
- old = @lookahead
35
- if old == 10
36
- @line = @line.succ
37
- @char = 0
36
+ class ParseError < StandardError
37
+ def initialize message, input_name, start_line, start_column, text
38
+ @message = message
39
+ @input_name = input_name
40
+ @start_line = start_line
41
+ @start_column = start_column
42
+ @text = text
38
43
  end
39
- @lookahead = @input.getc
40
- @lookahead = :eof if @lookahead == nil
41
- @char = @char.succ unless @lookahead == :eof
42
- old
43
- end
44
44
 
45
- # Returns the current lookahead character,
46
- # or +nil+ when the end of the input has been reached.
47
- def lookahead
48
- if @lookahead == nil
49
- @lookahead = @input.getc
50
- @char = @char.succ
51
- end
52
- case @lookahead
53
- when :eof
54
- :eof
55
- else
56
- @lookahead.chr
57
- end
45
+ attr_reader :message, :input_name, :start_line, :start_column, :text
58
46
  end
59
47
 
60
48
  # Parses a top-level element.
61
49
  # Returns an array containing the parts of the element.
62
- # Eeach element of the array is a Symbol, a String, or an
63
- # Integer.
64
50
  #
65
- # For a label, returns:
66
- # [:label, label_name]
51
+ # Some examples (Voodoo code, Ruby return values in comments):
67
52
  #
68
- # For a function definition, returns:
69
- # [:function, [formala, formalb, ...], [statementa, statementb, ...]]
70
- #
71
- # For a conditional, returns:
72
- # [condition, expression, [truea, trueb, ...], [falsea, falseb, ...]]
53
+ # section functions
54
+ # # [:section, :functions]
55
+ #
56
+ # call foo x 12
57
+ # # [:call, :foo, :x, 12]
58
+ #
59
+ # set x add x 42
60
+ # # [:set, :x, :add, :x, 42]
61
+ #
62
+ # set-byte @x 1 10
63
+ # # [:"set-byte", [:"@", :x], 1, 10]
64
+ #
65
+ # ifeq x y
66
+ # set z equal
67
+ # else
68
+ # set z not-equal
69
+ # end if
70
+ # # [:ifeq, [:x, :y], [[:set, :z, :equal]], [[:set, :z, :"not-equal"]]]
71
+ #
72
+ # foo:
73
+ # # [:label, :foo]
74
+ #
75
+ # function x y
76
+ # let z add x y
77
+ # return z
78
+ # end function
79
+ # # [:function, [:x, :y], [:let, :z, :add, :x, :y], [:return, :z]]
73
80
  #
74
81
  def parse_top_level
75
- words = []
76
- word = ""
82
+ # Skip whitespace, comments, and empty lines
83
+ skip_to_next_top_level
77
84
 
78
- while true
79
- case lookahead
80
- when :eof
81
- # End of input
82
- break
83
- when "\n"
84
- # Newline
85
- consume
86
- # Exit the loop, but only if the line wasn't empty
87
- break unless words.empty?
88
- when "#"
89
- # Skip comment
90
- while lookahead != :eof && lookahead != "\n"
91
- word << lookahead
92
- consume
93
- end
94
- when /\d|-/
95
- # Digit; parse number
96
- words << parse_number
97
- when /\w|\\/
98
- # Letter, underscore, or backslash; parse symbol
99
- # Note: \w matches digites, too, so keep this case after \d
100
- words << parse_symbol
101
- if words.length == 1 && is_label?(words[-1])
102
- # We have a label; return it
103
- return [:label, words[-1].to_s[0..-2].to_sym]
104
- end
105
- when "\""
106
- # Double quote; parse string
107
- words << parse_string
108
- when /\s/
109
- # Skip whitespace
110
- consume
111
- when '@'
112
- # Parse at-expression.
113
- # '@' must be followed by a number or symbol.
114
- consume
115
- case lookahead
116
- when /\d|-/
117
- expr = parse_number
118
- when /\w|\\/
119
- expr = parse_symbol
120
- else
121
- raise "Invalid character (#{@lookahead.chr.inspect})" +
122
- " at #{@line}:#{@char}; expecting number or symbol"
123
- end
124
- words << [:'@', expr]
125
- else
126
- raise "Invalid character (#{@lookahead.chr.inspect})" +
127
- " at #{@line}:#{@char}"
128
- end
129
- end
130
-
131
- # We have a line of input. Conditionals and function declarations
132
- # must be handled specially, because they consist of more than one
133
- # line.
134
- if words.empty?
135
- # Nothing to parse; return nil
136
- nil
137
- elsif words[0] == :function
138
- # Function declaration. Parse function body
139
- body = parse_body :function
140
- [:function, words[1..-1], body]
141
- elsif is_conditional?(words[0])
142
- parse_conditional1 words[0], words[1..-1]
143
- elsif words[0] == :block
144
- body = parse_body :block
145
- [:block] + body
146
- else
147
- # Statement or data declaration; simply return it
148
- words
85
+ validate_top_level do
86
+ parse_top_level_nonvalidating
149
87
  end
150
88
  end
151
89
 
152
90
  # Parses a body for a function or a conditional
153
91
  def parse_body kind
154
92
  body = []
93
+ error = nil
155
94
  case kind
156
95
  when :function
157
96
  kind_text = 'function definition'
158
97
  else
159
98
  kind_text = kind.to_s
160
99
  end
161
- while true
162
- statement = parse_top_level
163
- if statement == nil
164
- raise "End of input while inside #{kind_text}"
165
- elsif statement[0] == :end
166
- # Done parsing body
167
- break
168
- elsif kind == :conditional && statement[0] == :else
169
- # Done parsing body, but there is another one coming up
170
- body << statement
171
- break
172
- else
173
- # Parsed a statement. Add it to body.
174
- body << statement
100
+ done = false
101
+ until done
102
+ begin
103
+ with_position do
104
+ statement = parse_top_level_nonvalidating
105
+ if statement == nil
106
+ done = true
107
+ parse_error "End of input while inside #{kind_text}", nil
108
+ elsif statement[0] == :end
109
+ # Done parsing body
110
+ done = true
111
+ elsif kind == :conditional && statement[0] == :else
112
+ # Done parsing body, but there is another one coming up
113
+ body << statement
114
+ done = true
115
+ else
116
+ # Should be a normal statement. Validate it, then add it to body
117
+ if statement[0] == :function
118
+ parse_error "Function definitions are only allowed at top-level"
119
+ end
120
+ begin
121
+ Validator.validate_statement statement
122
+ body << statement
123
+ rescue Validator::ValidationError => e
124
+ parse_error e.message
125
+ end
126
+ end
127
+ end
128
+ rescue => e
129
+ # Got some kind of error. Still try to parse the rest of the body.
130
+ # Save the error if it was the first one.
131
+ if error == nil
132
+ error = e
133
+ end
175
134
  end
176
135
  end
136
+
137
+ if error != nil
138
+ raise error
139
+ end
140
+
177
141
  body
178
142
  end
179
143
 
@@ -186,7 +150,7 @@ module Voodoo
186
150
  consume
187
151
  case lookahead
188
152
  when :eof
189
- raise "Unexpected end of input in escape sequence"
153
+ parse_error "Unexpected end of input in escape sequence", nil
190
154
  when "\\", "\"", " "
191
155
  result = lookahead
192
156
  consume
@@ -201,8 +165,9 @@ module Voodoo
201
165
  when "x"
202
166
  # \xXX is byte with hex value XX
203
167
  code = @input.read 2
204
- @char = @char + 2
168
+ @column = @column + 2
205
169
  consume
170
+ @text << code
206
171
  result = [code].pack('H2')
207
172
  when "\n"
208
173
  # \<newline> is line continuation character
@@ -278,6 +243,26 @@ module Voodoo
278
243
  name.to_sym
279
244
  end
280
245
 
246
+ #
247
+ # Private methods
248
+ #
249
+ private
250
+
251
+ # Consumes the current lookahead character.
252
+ # The character is appended to @text.
253
+ def consume
254
+ old = @lookahead
255
+ if old == 10
256
+ @line = @line.succ
257
+ @column = 0
258
+ end
259
+ @lookahead = @input.getc
260
+ @lookahead = :eof if @lookahead == nil
261
+ @column = @column.succ unless @lookahead == :eof
262
+ @text << old
263
+ old
264
+ end
265
+
281
266
  # Tests if a symbol is a label
282
267
  def is_label? symbol
283
268
  symbol.to_s[-1] == ?:
@@ -288,10 +273,20 @@ module Voodoo
288
273
  [:ifeq, :ifge, :ifgt, :ifle, :iflt, :ifne].member? symbol
289
274
  end
290
275
 
291
- #
292
- # Private methods
293
- #
294
- private
276
+ # Returns the current lookahead character,
277
+ # or +nil+ when the end of the input has been reached.
278
+ def lookahead
279
+ if @lookahead == nil
280
+ @lookahead = @input.getc
281
+ @column = @column.succ
282
+ end
283
+ case @lookahead
284
+ when :eof
285
+ :eof
286
+ else
287
+ @lookahead.chr
288
+ end
289
+ end
295
290
 
296
291
  # Parses a conditional statement
297
292
  def parse_conditional1 condition, operands
@@ -311,6 +306,116 @@ module Voodoo
311
306
  [condition, operands, consequent, alternative]
312
307
  end
313
308
 
309
+ # Raises a ParseError at the current input position
310
+ def parse_error message, text = @text
311
+ # Create the error object
312
+ error = ParseError.new(message, @input_name, @start_line,
313
+ @start_column, text)
314
+
315
+ # Set a backtrace to the calling method
316
+ error.set_backtrace caller
317
+
318
+ # If we are not at a new line, skip until the next line
319
+ while @column != 1 && lookahead != :eof
320
+ consume
321
+ end
322
+
323
+ # Raise the error
324
+ raise error
325
+ end
326
+
327
+ # Parses a top-level directive without validating it
328
+ def parse_top_level_nonvalidating
329
+ # Skip whitespace, comments, and empty lines
330
+ skip_to_next_top_level
331
+
332
+ words = []
333
+ while true
334
+ # Parse next token
335
+ skip_whitespace
336
+ word = try_parse_token
337
+ if word == nil
338
+ # Word is nil; that means we did not get a token
339
+ case lookahead
340
+ when :eof
341
+ # End of input
342
+ break
343
+ when "\n"
344
+ # Newline
345
+ consume
346
+ # Exit the loop, but only if the line wasn't empty
347
+ break unless words.empty?
348
+ when "#"
349
+ # Skip comment
350
+ while lookahead != :eof && lookahead != "\n"
351
+ word << lookahead
352
+ consume
353
+ end
354
+ else
355
+ parse_error "Unexpected character (#{lookahead}) in input"
356
+ end
357
+ else
358
+ # Word is not nil - we got a token
359
+ if words.empty? && word.kind_of?(::Symbol) && word.to_s[-1] == ?:
360
+ # First word is a label
361
+ words = [:label, word.to_s[0..-2].to_sym]
362
+ break
363
+ end
364
+ # Add word to statement
365
+ words << word
366
+ end
367
+ end
368
+
369
+ # We have a line of input. Conditionals and function declarations
370
+ # must be handled specially, because they consist of more than one
371
+ # line.
372
+ if words.empty?
373
+ # Nothing to parse; return nil
374
+ nil
375
+ elsif words[0] == :function
376
+ # Function declaration. Parse function body
377
+ body = parse_body :function
378
+ [:function, words[1..-1]] + body
379
+ elsif is_conditional?(words[0])
380
+ parse_conditional1 words[0], words[1..-1]
381
+ elsif words[0] == :block
382
+ body = parse_body :block
383
+ [:block] + body
384
+ else
385
+ # Statement or data declaration; simply return it
386
+ words
387
+ end
388
+ end
389
+
390
+ # Skips whitespace, newlines, and comments before a top-level directive
391
+ def skip_to_next_top_level
392
+ while true
393
+ case lookahead
394
+ when /\s/
395
+ # Skip whitespace
396
+ consume
397
+ when "\n"
398
+ # Newline
399
+ consume
400
+ when "#"
401
+ # Skip comment
402
+ while lookahead != :eof && lookahead != "\n"
403
+ consume
404
+ end
405
+ else
406
+ break
407
+ end
408
+ end
409
+ end
410
+
411
+ # Consumes characters until a character other than space or tab is
412
+ # encountered.
413
+ def skip_whitespace
414
+ while lookahead == " " || lookahead == "\t"
415
+ consume
416
+ end
417
+ end
418
+
314
419
  # Splits a parsed if-clause into two parts:
315
420
  # 1. The list of statements making up the clause proper
316
421
  # 2. The condition for the next clause:
@@ -335,5 +440,79 @@ module Voodoo
335
440
  end
336
441
  end
337
442
 
443
+ # Tries to parse a symbol, number, string, or at-expression. If
444
+ # such a token starts at the current position, it is parsed and returned.
445
+ # Else, nil is returned.
446
+ def try_parse_token
447
+ case lookahead
448
+ when /\d|-/
449
+ # Digit; parse number
450
+ parse_number
451
+ when /\w|\\/
452
+ # Letter, underscore, or backslash; parse symbol
453
+ # Note: \w matches digits, too, so keep this case after \d
454
+ parse_symbol
455
+ when "\""
456
+ # Double quote; parse string
457
+ parse_string
458
+ when '@'
459
+ # Parse at-expression.
460
+ # '@' must be followed by a number or symbol.
461
+ consume
462
+ case lookahead
463
+ when /\d|-/
464
+ expr = parse_number
465
+ when /\w|\\/
466
+ expr = parse_symbol
467
+ else
468
+ parse_error "Invalid character (#{lookahead}) " +
469
+ "in at-expression; expecting number or symbol"
470
+ end
471
+ [:'@', expr]
472
+ else
473
+ # No valid starter for a token, return nil
474
+ nil
475
+ end
476
+ end
477
+
478
+ # Evaluate block and check that the result is a valid top-level
479
+ # directive.
480
+ def validate_top_level &block
481
+ with_position do
482
+ result = yield
483
+ begin
484
+ if result != nil
485
+ Validator.validate_top_level result
486
+ end
487
+ result
488
+ rescue Validator::ValidationError => e
489
+ parse_error e.message
490
+ end
491
+ end
492
+ end
493
+
494
+ # Evaluate block, keeping track of @start_line, @start_column
495
+ # at the beginning of the block, and @text during the evaluation
496
+ # of block.
497
+ def with_position &block
498
+ # Save old values
499
+ old_line = @start_line
500
+ old_column = @start_column
501
+ old_text = @text
502
+
503
+ # Evaluate block with new values
504
+ begin
505
+ @start_line = @line
506
+ @start_column = @column
507
+ @text = ''
508
+ yield
509
+ ensure
510
+ # Restore old values
511
+ @start_line = old_line
512
+ @start_column = old_column
513
+ @text = old_text + @text
514
+ end
515
+ end
516
+
338
517
  end
339
518
  end