voodoo 0.7.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/voodoo/parser.rb CHANGED
@@ -1,3 +1,5 @@
1
+ require 'voodoo/validator'
2
+
1
3
  module Voodoo
2
4
  # Voodoo parser.
3
5
  # The parser reads Voodoo[http://inglorion.net/documents/designs/voodoo/]
@@ -24,156 +26,118 @@ module Voodoo
24
26
  # the end of the input has been reached.
25
27
  def initialize input
26
28
  @input = input
27
- @line = 1
28
- @char = 0
29
+ @input_name = input.respond_to?(:path) ? input.path : nil
30
+ @start_line = @line = 1
31
+ @start_column = @column = 0
29
32
  @lookahead = nil
33
+ @text = ''
30
34
  end
31
35
 
32
- # Consumes the current lookahead character
33
- def consume
34
- old = @lookahead
35
- if old == 10
36
- @line = @line.succ
37
- @char = 0
36
+ class ParseError < StandardError
37
+ def initialize message, input_name, start_line, start_column, text
38
+ @message = message
39
+ @input_name = input_name
40
+ @start_line = start_line
41
+ @start_column = start_column
42
+ @text = text
38
43
  end
39
- @lookahead = @input.getc
40
- @lookahead = :eof if @lookahead == nil
41
- @char = @char.succ unless @lookahead == :eof
42
- old
43
- end
44
44
 
45
- # Returns the current lookahead character,
46
- # or +nil+ when the end of the input has been reached.
47
- def lookahead
48
- if @lookahead == nil
49
- @lookahead = @input.getc
50
- @char = @char.succ
51
- end
52
- case @lookahead
53
- when :eof
54
- :eof
55
- else
56
- @lookahead.chr
57
- end
45
+ attr_reader :message, :input_name, :start_line, :start_column, :text
58
46
  end
59
47
 
60
48
  # Parses a top-level element.
61
49
  # Returns an array containing the parts of the element.
62
- # Eeach element of the array is a Symbol, a String, or an
63
- # Integer.
64
50
  #
65
- # For a label, returns:
66
- # [:label, label_name]
51
+ # Some examples (Voodoo code, Ruby return values in comments):
67
52
  #
68
- # For a function definition, returns:
69
- # [:function, [formala, formalb, ...], [statementa, statementb, ...]]
70
- #
71
- # For a conditional, returns:
72
- # [condition, expression, [truea, trueb, ...], [falsea, falseb, ...]]
53
+ # section functions
54
+ # # [:section, :functions]
55
+ #
56
+ # call foo x 12
57
+ # # [:call, :foo, :x, 12]
58
+ #
59
+ # set x add x 42
60
+ # # [:set, :x, :add, :x, 42]
61
+ #
62
+ # set-byte @x 1 10
63
+ # # [:"set-byte", [:"@", :x], 1, 10]
64
+ #
65
+ # ifeq x y
66
+ # set z equal
67
+ # else
68
+ # set z not-equal
69
+ # end if
70
+ # # [:ifeq, [:x, :y], [[:set, :z, :equal]], [[:set, :z, :"not-equal"]]]
71
+ #
72
+ # foo:
73
+ # # [:label, :foo]
74
+ #
75
+ # function x y
76
+ # let z add x y
77
+ # return z
78
+ # end function
79
+ # # [:function, [:x, :y], [:let, :z, :add, :x, :y], [:return, :z]]
73
80
  #
74
81
  def parse_top_level
75
- words = []
76
- word = ""
82
+ # Skip whitespace, comments, and empty lines
83
+ skip_to_next_top_level
77
84
 
78
- while true
79
- case lookahead
80
- when :eof
81
- # End of input
82
- break
83
- when "\n"
84
- # Newline
85
- consume
86
- # Exit the loop, but only if the line wasn't empty
87
- break unless words.empty?
88
- when "#"
89
- # Skip comment
90
- while lookahead != :eof && lookahead != "\n"
91
- word << lookahead
92
- consume
93
- end
94
- when /\d|-/
95
- # Digit; parse number
96
- words << parse_number
97
- when /\w|\\/
98
- # Letter, underscore, or backslash; parse symbol
99
- # Note: \w matches digites, too, so keep this case after \d
100
- words << parse_symbol
101
- if words.length == 1 && is_label?(words[-1])
102
- # We have a label; return it
103
- return [:label, words[-1].to_s[0..-2].to_sym]
104
- end
105
- when "\""
106
- # Double quote; parse string
107
- words << parse_string
108
- when /\s/
109
- # Skip whitespace
110
- consume
111
- when '@'
112
- # Parse at-expression.
113
- # '@' must be followed by a number or symbol.
114
- consume
115
- case lookahead
116
- when /\d|-/
117
- expr = parse_number
118
- when /\w|\\/
119
- expr = parse_symbol
120
- else
121
- raise "Invalid character (#{@lookahead.chr.inspect})" +
122
- " at #{@line}:#{@char}; expecting number or symbol"
123
- end
124
- words << [:'@', expr]
125
- else
126
- raise "Invalid character (#{@lookahead.chr.inspect})" +
127
- " at #{@line}:#{@char}"
128
- end
129
- end
130
-
131
- # We have a line of input. Conditionals and function declarations
132
- # must be handled specially, because they consist of more than one
133
- # line.
134
- if words.empty?
135
- # Nothing to parse; return nil
136
- nil
137
- elsif words[0] == :function
138
- # Function declaration. Parse function body
139
- body = parse_body :function
140
- [:function, words[1..-1], body]
141
- elsif is_conditional?(words[0])
142
- parse_conditional1 words[0], words[1..-1]
143
- elsif words[0] == :block
144
- body = parse_body :block
145
- [:block] + body
146
- else
147
- # Statement or data declaration; simply return it
148
- words
85
+ validate_top_level do
86
+ parse_top_level_nonvalidating
149
87
  end
150
88
  end
151
89
 
152
90
  # Parses a body for a function or a conditional
153
91
  def parse_body kind
154
92
  body = []
93
+ error = nil
155
94
  case kind
156
95
  when :function
157
96
  kind_text = 'function definition'
158
97
  else
159
98
  kind_text = kind.to_s
160
99
  end
161
- while true
162
- statement = parse_top_level
163
- if statement == nil
164
- raise "End of input while inside #{kind_text}"
165
- elsif statement[0] == :end
166
- # Done parsing body
167
- break
168
- elsif kind == :conditional && statement[0] == :else
169
- # Done parsing body, but there is another one coming up
170
- body << statement
171
- break
172
- else
173
- # Parsed a statement. Add it to body.
174
- body << statement
100
+ done = false
101
+ until done
102
+ begin
103
+ with_position do
104
+ statement = parse_top_level_nonvalidating
105
+ if statement == nil
106
+ done = true
107
+ parse_error "End of input while inside #{kind_text}", nil
108
+ elsif statement[0] == :end
109
+ # Done parsing body
110
+ done = true
111
+ elsif kind == :conditional && statement[0] == :else
112
+ # Done parsing body, but there is another one coming up
113
+ body << statement
114
+ done = true
115
+ else
116
+ # Should be a normal statement. Validate it, then add it to body
117
+ if statement[0] == :function
118
+ parse_error "Function definitions are only allowed at top-level"
119
+ end
120
+ begin
121
+ Validator.validate_statement statement
122
+ body << statement
123
+ rescue Validator::ValidationError => e
124
+ parse_error e.message
125
+ end
126
+ end
127
+ end
128
+ rescue => e
129
+ # Got some kind of error. Still try to parse the rest of the body.
130
+ # Save the error if it was the first one.
131
+ if error == nil
132
+ error = e
133
+ end
175
134
  end
176
135
  end
136
+
137
+ if error != nil
138
+ raise error
139
+ end
140
+
177
141
  body
178
142
  end
179
143
 
@@ -186,7 +150,7 @@ module Voodoo
186
150
  consume
187
151
  case lookahead
188
152
  when :eof
189
- raise "Unexpected end of input in escape sequence"
153
+ parse_error "Unexpected end of input in escape sequence", nil
190
154
  when "\\", "\"", " "
191
155
  result = lookahead
192
156
  consume
@@ -201,8 +165,9 @@ module Voodoo
201
165
  when "x"
202
166
  # \xXX is byte with hex value XX
203
167
  code = @input.read 2
204
- @char = @char + 2
168
+ @column = @column + 2
205
169
  consume
170
+ @text << code
206
171
  result = [code].pack('H2')
207
172
  when "\n"
208
173
  # \<newline> is line continuation character
@@ -278,6 +243,26 @@ module Voodoo
278
243
  name.to_sym
279
244
  end
280
245
 
246
+ #
247
+ # Private methods
248
+ #
249
+ private
250
+
251
+ # Consumes the current lookahead character.
252
+ # The character is appended to @text.
253
+ def consume
254
+ old = @lookahead
255
+ if old == 10
256
+ @line = @line.succ
257
+ @column = 0
258
+ end
259
+ @lookahead = @input.getc
260
+ @lookahead = :eof if @lookahead == nil
261
+ @column = @column.succ unless @lookahead == :eof
262
+ @text << old
263
+ old
264
+ end
265
+
281
266
  # Tests if a symbol is a label
282
267
  def is_label? symbol
283
268
  symbol.to_s[-1] == ?:
@@ -288,10 +273,20 @@ module Voodoo
288
273
  [:ifeq, :ifge, :ifgt, :ifle, :iflt, :ifne].member? symbol
289
274
  end
290
275
 
291
- #
292
- # Private methods
293
- #
294
- private
276
+ # Returns the current lookahead character,
277
+ # or +nil+ when the end of the input has been reached.
278
+ def lookahead
279
+ if @lookahead == nil
280
+ @lookahead = @input.getc
281
+ @column = @column.succ
282
+ end
283
+ case @lookahead
284
+ when :eof
285
+ :eof
286
+ else
287
+ @lookahead.chr
288
+ end
289
+ end
295
290
 
296
291
  # Parses a conditional statement
297
292
  def parse_conditional1 condition, operands
@@ -311,6 +306,116 @@ module Voodoo
311
306
  [condition, operands, consequent, alternative]
312
307
  end
313
308
 
309
+ # Raises a ParseError at the current input position
310
+ def parse_error message, text = @text
311
+ # Create the error object
312
+ error = ParseError.new(message, @input_name, @start_line,
313
+ @start_column, text)
314
+
315
+ # Set a backtrace to the calling method
316
+ error.set_backtrace caller
317
+
318
+ # If we are not at a new line, skip until the next line
319
+ while @column != 1 && lookahead != :eof
320
+ consume
321
+ end
322
+
323
+ # Raise the error
324
+ raise error
325
+ end
326
+
327
+ # Parses a top-level directive without validating it
328
+ def parse_top_level_nonvalidating
329
+ # Skip whitespace, comments, and empty lines
330
+ skip_to_next_top_level
331
+
332
+ words = []
333
+ while true
334
+ # Parse next token
335
+ skip_whitespace
336
+ word = try_parse_token
337
+ if word == nil
338
+ # Word is nil; that means we did not get a token
339
+ case lookahead
340
+ when :eof
341
+ # End of input
342
+ break
343
+ when "\n"
344
+ # Newline
345
+ consume
346
+ # Exit the loop, but only if the line wasn't empty
347
+ break unless words.empty?
348
+ when "#"
349
+ # Skip comment
350
+ while lookahead != :eof && lookahead != "\n"
351
+ word << lookahead
352
+ consume
353
+ end
354
+ else
355
+ parse_error "Unexpected character (#{lookahead}) in input"
356
+ end
357
+ else
358
+ # Word is not nil - we got a token
359
+ if words.empty? && word.kind_of?(::Symbol) && word.to_s[-1] == ?:
360
+ # First word is a label
361
+ words = [:label, word.to_s[0..-2].to_sym]
362
+ break
363
+ end
364
+ # Add word to statement
365
+ words << word
366
+ end
367
+ end
368
+
369
+ # We have a line of input. Conditionals and function declarations
370
+ # must be handled specially, because they consist of more than one
371
+ # line.
372
+ if words.empty?
373
+ # Nothing to parse; return nil
374
+ nil
375
+ elsif words[0] == :function
376
+ # Function declaration. Parse function body
377
+ body = parse_body :function
378
+ [:function, words[1..-1]] + body
379
+ elsif is_conditional?(words[0])
380
+ parse_conditional1 words[0], words[1..-1]
381
+ elsif words[0] == :block
382
+ body = parse_body :block
383
+ [:block] + body
384
+ else
385
+ # Statement or data declaration; simply return it
386
+ words
387
+ end
388
+ end
389
+
390
+ # Skips whitespace, newlines, and comments before a top-level directive
391
+ def skip_to_next_top_level
392
+ while true
393
+ case lookahead
394
+ when /\s/
395
+ # Skip whitespace
396
+ consume
397
+ when "\n"
398
+ # Newline
399
+ consume
400
+ when "#"
401
+ # Skip comment
402
+ while lookahead != :eof && lookahead != "\n"
403
+ consume
404
+ end
405
+ else
406
+ break
407
+ end
408
+ end
409
+ end
410
+
411
+ # Consumes characters until a character other than space or tab is
412
+ # encountered.
413
+ def skip_whitespace
414
+ while lookahead == " " || lookahead == "\t"
415
+ consume
416
+ end
417
+ end
418
+
314
419
  # Splits a parsed if-clause into two parts:
315
420
  # 1. The list of statements making up the clause proper
316
421
  # 2. The condition for the next clause:
@@ -335,5 +440,79 @@ module Voodoo
335
440
  end
336
441
  end
337
442
 
443
+ # Tries to parse a symbol, number, string, or at-expression. If
444
+ # such a token starts at the current position, it is parsed and returned.
445
+ # Else, nil is returned.
446
+ def try_parse_token
447
+ case lookahead
448
+ when /\d|-/
449
+ # Digit; parse number
450
+ parse_number
451
+ when /\w|\\/
452
+ # Letter, underscore, or backslash; parse symbol
453
+ # Note: \w matches digits, too, so keep this case after \d
454
+ parse_symbol
455
+ when "\""
456
+ # Double quote; parse string
457
+ parse_string
458
+ when '@'
459
+ # Parse at-expression.
460
+ # '@' must be followed by a number or symbol.
461
+ consume
462
+ case lookahead
463
+ when /\d|-/
464
+ expr = parse_number
465
+ when /\w|\\/
466
+ expr = parse_symbol
467
+ else
468
+ parse_error "Invalid character (#{lookahead}) " +
469
+ "in at-expression; expecting number or symbol"
470
+ end
471
+ [:'@', expr]
472
+ else
473
+ # No valid starter for a token, return nil
474
+ nil
475
+ end
476
+ end
477
+
478
+ # Evaluate block and check that the result is a valid top-level
479
+ # directive.
480
+ def validate_top_level &block
481
+ with_position do
482
+ result = yield
483
+ begin
484
+ if result != nil
485
+ Validator.validate_top_level result
486
+ end
487
+ result
488
+ rescue Validator::ValidationError => e
489
+ parse_error e.message
490
+ end
491
+ end
492
+ end
493
+
494
+ # Evaluate block, keeping track of @start_line, @start_column
495
+ # at the beginning of the block, and @text during the evaluation
496
+ # of block.
497
+ def with_position &block
498
+ # Save old values
499
+ old_line = @start_line
500
+ old_column = @start_column
501
+ old_text = @text
502
+
503
+ # Evaluate block with new values
504
+ begin
505
+ @start_line = @line
506
+ @start_column = @column
507
+ @text = ''
508
+ yield
509
+ ensure
510
+ # Restore old values
511
+ @start_line = old_line
512
+ @start_column = old_column
513
+ @text = old_text + @text
514
+ end
515
+ end
516
+
338
517
  end
339
518
  end