ebnf 0.2.3 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- NDg4MzMwZTA0Yzg3MzY5Y2UyZDMwMWI2ZWRiODNjZWZjZmU5NGRlZQ==
4
+ ZDI4YWE4ZjExOGI5N2NkZDVmYzA4ZDMwMzM0ZGRhZThhNWU0ZGI2MQ==
5
5
  data.tar.gz: !binary |-
6
- NzA2ODg3ZTI1NzIwNjVjNmE1YWJjMDYzNTEyNjI1NGIzZWI5OTg1ZQ==
6
+ MWZmMjVkYjMwYzY1NmNlMDc0ZDM5MzUxOWU1ZTUwNGU2NDQ2YzFiYQ==
7
7
  !binary "U0hBNTEy":
8
8
  metadata.gz: !binary |-
9
- N2FjYTRhNzI5NDFkZWI3OTNiOWFkNTNmYWY5NjUwMjA4YTM0ZmNiZTQ5NDYy
10
- MjFjODYxODc1MzRjMTZjNGM0N2U4NDk2NTM2ZjM0MmY0ZGI3ZDE3OTg3OWFi
11
- NjQwYTdjNzcxYTc3ZWJmYzcyMzlmOWJiZDlmZjc3YTJiOWVkODA=
9
+ M2E2MTcyM2MyZWYyNTg5Y2E3ZDVkNWI2ODgwYjdhZjhlYmQ1YTUzZGE5Nzc3
10
+ M2M3MDFiYzllMzE5YjNmMDA2Zjc3NjQ5YzViMzhmMTNmZTBkYWZhMDk0M2Rl
11
+ MjlkNWMxNzY3ODM4ZDYxZjBiZWI2NzA4ZGRjYzQ2MGY2MTBiZDI=
12
12
  data.tar.gz: !binary |-
13
- ODVmZDZlMjVkMjY0ZDkwYWFiZGY4OGE3ZTRhNDQzZWRiZDlkZTQzODQ4ZWMw
14
- MTZkZGMzNWYzZTMxNDc0MDc3YWFkNmU2NjExNWM2ZTkzOTJlZWE0MDQ1Yzdm
15
- N2RmOTU5NGFhZTVlZDEwYzhlMjJjMDNhYmIwNDkyOTc3ZGVkNDQ=
13
+ OGIxZGZjNzNhZTlkZDAwZTM0ZmExYzMzMzUxMjQ4ZmNmYjBmNTM4OGJlMmM2
14
+ NmFmODM4MjMwODYyY2E1YjhjNjNlY2E1YjI0MTg1NjFlYWI0MzI4ZThhMjA2
15
+ MmMyMjBiZjA5NGQ2MjlhYzA0Yzg1YzYzNzhjNjNkZmQ2ZTg5N2I=
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2.3
1
+ 0.3.0
@@ -103,12 +103,12 @@ module EBNF
103
103
  include Parser
104
104
 
105
105
  # Abstract syntax tree from parse
106
- # @!attribute [r] ast
106
+ #
107
107
  # @return [Array<Rule>]
108
108
  attr_reader :ast
109
109
 
110
110
  # Grammar errors, or errors found genering parse tables
111
- # @!attribute [r] errors
111
+ #
112
112
  # @return [Array<String>]
113
113
  attr_accessor :errors
114
114
 
@@ -6,29 +6,29 @@ module EBNF
6
6
 
7
7
  # Branch table, represented as a recursive hash.
8
8
  # The table is indexed by rule symbol, which in-turn references a hash of terminals (which are the first terminals of the production), which in turn reference the sequence of rules that follow, given that terminal as input
9
- # @!attribute [r] branch
9
+ #
10
10
  # @return [Hash{Symbol => Hash{String, Symbol => Array<Symbol>}}]
11
11
  attr_reader :branch
12
12
 
13
13
  # First table
14
- # @!attribute [r] first
14
+ #
15
15
  # @return [Hash{Symbol, String => Symbol}]
16
16
  attr_reader :first
17
17
 
18
18
  # Follow table
19
- # @!attribute [r] first
19
+ #
20
20
  # @return [Hash{Symbol, String => Symbol}]
21
21
  attr_reader :follow
22
22
 
23
23
  # Terminal table
24
24
  # The list of terminals used in the grammar.
25
- # @!attribute [r] terminals
25
+ #
26
26
  # @return [Array<String, Symbol>]
27
27
  attr_reader :terminals
28
28
 
29
29
  # Start symbol
30
30
  # The rule which starts the grammar
31
- # @!attribute[r] start
31
+ #
32
32
  # @return [Symbol]
33
33
  attr_reader :start
34
34
 
@@ -53,12 +53,10 @@ module EBNF::LL1
53
53
  ML_START = /\'\'\'|\"\"\"/.freeze # Beginning of terminals that may span lines
54
54
 
55
55
  ##
56
- # @!attribute whitespace
57
56
  # @return [Regexp] defines whitespace, defaults to WS
58
57
  attr_reader :whitespace
59
58
 
60
59
  ##
61
- # @!attribute comment
62
60
  # @return [Regexp] defines single-line comment, defaults to COMMENT
63
61
  attr_reader :comment
64
62
 
@@ -233,15 +231,16 @@ module EBNF::LL1
233
231
  #
234
232
  # @return [Token]
235
233
  def recover
236
- until scanner.eos? do
237
- begin
238
- shift
239
- return first
240
- rescue Error, ArgumentError
241
- # Ignore errors until something scans, or EOS.
242
- scanner.pos = scanner.pos + 1
234
+ until scanner.eos? || tok = match_token
235
+ if scanner.skip_until(@whitespace).nil? # Skip past current "token"
236
+ # No whitespace at the end, must be and end of string
237
+ scanner.terminate
238
+ else
239
+ skip_whitespace
243
240
  end
244
241
  end
242
+ scanner.unscan if tok
243
+ first
245
244
  end
246
245
  protected
247
246
 
@@ -253,9 +252,10 @@ module EBNF::LL1
253
252
  def skip_whitespace
254
253
  # skip all white space, but keep track of the current line number
255
254
  while !scanner.eos?
256
- if matched = scanner.scan(@whitespace)
255
+ if matched = scanner.scan(@whitespace)
257
256
  @lineno += matched.count("\n")
258
- elsif (com = scanner.scan(@comment))
257
+ elsif (scanner.scan(@comment))
258
+ #
259
259
  else
260
260
  return
261
261
  end
@@ -472,7 +472,7 @@ module EBNF::LL1
472
472
  #
473
473
  # @return [String]
474
474
  def inspect
475
- to_hash.inspect
475
+ "#{@value.inspect}#{'(' + @type.to_s + ')' if @type}"
476
476
  end
477
477
  end # class Token
478
478
 
@@ -10,7 +10,6 @@ module EBNF::LL1
10
10
  DEBUG_LEVEL = 10
11
11
 
12
12
  ##
13
- # @!attribute [r] lineno
14
13
  # @return [Integer] line number of current token
15
14
  attr_reader :lineno
16
15
 
@@ -20,10 +19,10 @@ module EBNF::LL1
20
19
 
21
20
  # DSL for creating terminals and productions
22
21
  module ClassMethods
23
- def start_handlers; @@start_handlers || {}; end
24
- def production_handlers; @@production_handlers || {}; end
25
- def terminal_handlers; @@terminal_handlers || {}; end
26
- def patterns; @@patterns || []; end
22
+ def start_handlers; @start_handlers || {}; end
23
+ def production_handlers; @production_handlers || {}; end
24
+ def terminal_handlers; @terminal_handlers || {}; end
25
+ def patterns; @patterns || []; end
27
26
 
28
27
  ##
29
28
  # Defines the pattern for a terminal node and a block to be invoked
@@ -53,11 +52,11 @@ module EBNF::LL1
53
52
  # Block passed to initialization for yielding to calling parser.
54
53
  # Should conform to the yield specs for #initialize
55
54
  def terminal(term, regexp, options = {}, &block)
56
- @@patterns ||= []
55
+ @patterns ||= []
57
56
  # Passed in order to define evaulation sequence
58
- @@patterns << EBNF::LL1::Lexer::Terminal.new(term, regexp, options)
59
- @@terminal_handlers ||= {}
60
- @@terminal_handlers[term] = block if block_given?
57
+ @patterns << EBNF::LL1::Lexer::Terminal.new(term, regexp, options)
58
+ @terminal_handlers ||= {}
59
+ @terminal_handlers[term] = block if block_given?
61
60
  end
62
61
 
63
62
  ##
@@ -80,8 +79,8 @@ module EBNF::LL1
80
79
  # Should conform to the yield specs for #initialize
81
80
  # Yield to generate a triple
82
81
  def start_production(term, &block)
83
- @@start_handlers ||= {}
84
- @@start_handlers[term] = block
82
+ @start_handlers ||= {}
83
+ @start_handlers[term] = block
85
84
  end
86
85
 
87
86
  ##
@@ -105,8 +104,8 @@ module EBNF::LL1
105
104
  # Should conform to the yield specs for #initialize
106
105
  # Yield to generate a triple
107
106
  def production(term, &block)
108
- @@production_handlers ||= {}
109
- @@production_handlers[term] = block
107
+ @production_handlers ||= {}
108
+ @production_handlers[term] = block
110
109
  end
111
110
 
112
111
  # Evaluate a handler, delegating to the specified object.
@@ -115,15 +114,15 @@ module EBNF::LL1
115
114
  # @param [Object] object
116
115
  # @return [Object]
117
116
  def eval_with_binding(object)
118
- @@delegate = object
117
+ @delegate = object
119
118
  object.instance_eval {yield}
120
119
  end
121
120
 
122
121
  private
123
122
 
124
123
  def method_missing(method, *args, &block)
125
- if @@delegate ||= nil
126
- @@delegate.send method, *args, &block
124
+ if @delegate ||= nil
125
+ @delegate.send method, *args, &block
127
126
  else
128
127
  super
129
128
  end
@@ -137,29 +136,40 @@ module EBNF::LL1
137
136
  #
138
137
  # @example
139
138
  # require 'rdf/ll1/parser'
140
- #
139
+ #
141
140
  # class MyParser
142
141
  # include EBNF::LL1::Parser
143
- #
142
+ #
144
143
  # branch MyParser::BRANCH
145
- #
144
+ #
145
+ # ##
146
+ # # Defines a production called during before parsing a non-terminal
147
+ # # with data from previous production along with data defined for the
148
+ # # current production
149
+ # #
150
+ # start_production :object do |input, current, callback|
151
+ # # Note production as triples for blankNodePropertyList
152
+ # # to set :subject instead of :resource
153
+ # current[:triples] = true
154
+ # end
155
+ #
146
156
  # ##
147
- # # Defines a production called during different phases of parsing
157
+ # # Defines a production called during after parsing a non-terminal
148
158
  # # with data from previous production along with data defined for the
149
159
  # # current production
150
160
  # #
151
- # # Yield to generate a triple
152
- # production :object do |parser, phase, input, current|
161
+ # # callback to processor block
162
+ # production :object do |input, current, callback|
153
163
  # object = current[:resource]
154
- # yield :statement, RDF::Statement.new(input[:subject], input[:predicate], object)
164
+ # callback.call :statement, RDF::Statement.new(input[:subject], input[:predicate], object)
155
165
  # end
156
- #
166
+ #
157
167
  # ##
158
168
  # # Defines the pattern for a terminal node
159
- # terminal :BLANK_NODE_LABEL, %r(_:(#{PN_LOCAL})) do |parser, production, token, input|
169
+ # terminal :BLANK_NODE_LABEL, %r(_:(#{PN_LOCAL})) do |production, token, input|
160
170
  # input[:BLANK_NODE_LABEL] = RDF::Node.new(token)
161
171
  # end
162
- #
172
+ #
163
173
  # ##
164
174
  # # Iterates the given block for each RDF statement in the input.
165
175
  # #
@@ -168,7 +178,7 @@ module EBNF::LL1
168
178
  # # @return [void]
169
179
  # def each_statement(&block)
170
180
  # @callback = block
171
- #
181
+ #
172
182
  # parse(START.to_sym) do |context, *data|
173
183
  # case context
174
184
  # when :statement
@@ -176,11 +186,12 @@ module EBNF::LL1
176
186
  # end
177
187
  # end
178
188
  # end
179
- #
189
+ #
180
190
  # end
181
191
  #
182
192
  # @param [String, #to_s] input
183
- # @param [Symbol, #to_s] prod The starting production for the parser. It may be a URI from the grammar, or a symbol representing the local_name portion of the grammar URI.
193
+ # @param [Symbol, #to_s] start
194
+ # The starting production for the parser. It may be a URI from the grammar, or a symbol representing the local_name portion of the grammar URI.
184
195
  # @param [Hash{Symbol => Object}] options
185
196
  # @option options [Hash{Symbol,String => Hash{Symbol,String => Array<Symbol,String>}}] :branch LL1 branch table.
186
197
  # @option options [HHash{Symbol,String => Array<Symbol,String>}] :first ({})
@@ -202,8 +213,11 @@ module EBNF::LL1
202
213
  # @yieldparam [Symbol] *data
203
214
  # Data specific to the call
204
215
  # @return [EBNF::LL1::Parser]
216
+ # @raise [Exception] Raises exceptions for parsing errors
217
+ # or errors raised during processing callbacks. Internal
218
+ # errors are raised using {Error}.
205
219
  # @see http://cs.adelaide.edu.au/~charles/lt/Lectures/07-ErrorRecovery.pdf
206
- def parse(input = nil, prod = nil, options = {}, &block)
220
+ def parse(input = nil, start = nil, options = {}, &block)
207
221
  @options = options.dup
208
222
  @branch = options[:branch]
209
223
  @first = options[:first] ||= {}
@@ -217,110 +231,171 @@ module EBNF::LL1
217
231
 
218
232
  # Unrecoverable errors
219
233
  raise Error, "Branch table not defined" unless @branch && @branch.length > 0
220
- raise Error, "Starting production not defined" unless prod
234
+ raise Error, "Starting production not defined" unless start
221
235
 
222
236
  @prod_data = [{}]
223
- prod = prod.split('#').last.to_sym unless prod.is_a?(Symbol)
224
- todo_stack = [{:prod => prod, :terms => nil}]
237
+ start = start.split('#').last.to_sym unless start.is_a?(Symbol)
238
+ todo_stack = [{:prod => start, :terms => nil}]
225
239
 
226
240
  while !todo_stack.empty?
227
- pushed = false
228
- if todo_stack.last[:terms].nil?
229
- todo_stack.last[:terms] = []
230
- cur_prod = todo_stack.last[:prod]
231
-
232
- # If cur_prod is the starting production, we can reset the stack
233
- # to the beginning to avoid excessive growth in the production
234
- # stack
235
- if options[:reset_on_start] && cur_prod == prod
236
- todo_stack = [{:prod => prod, :terms => []}]
237
- @productions = []
238
- @prod_data = [{}]
239
- end
240
-
241
- # Get this first valid token appropriate for the stacked productions,
242
- # skipping invalid tokens until either a valid token is found (from @first),
243
- # or a token appearing in @follow appears.
244
- token = skip_until_valid(todo_stack)
245
-
246
- # At this point, token is either nil, in the first set of the production,
247
- # or in the follow set of this production or any previous production
248
- debug("parse(production)") do
249
- "token #{token ? token.representation.inspect : 'nil'}, " +
250
- "prod #{cur_prod.inspect}, " +
251
- "depth #{depth}"
252
- end
241
+ begin
242
+ @recovering = false
243
+ pushed = false
244
+ if todo_stack.last[:terms].nil?
245
+ todo_stack.last[:terms] = []
246
+ cur_prod = todo_stack.last[:prod]
247
+
248
+ # If cur_prod is the starting production, we can reset the stack
249
+ # to the beginning to avoid excessive growth in the production
250
+ # stack
251
+ if options[:reset_on_start] && cur_prod == start
252
+ todo_stack = [{:prod => start, :terms => []}]
253
+ @productions = []
254
+ @prod_data = [{}]
255
+ end
253
256
 
254
- # Got an opened production
255
- onStart(cur_prod)
256
- break if token.nil?
257
+ # Fetch the current token
258
+ token = get_token(:recover)
257
259
 
258
- if prod_branch = @branch[cur_prod]
259
- @recovering = false
260
- sequence = prod_branch[token.representation]
261
- debug("parse(production)", :level => 2) do
262
- "token #{token.representation.inspect} " +
263
- "prod #{cur_prod.inspect}, " +
264
- "prod_branch #{prod_branch.keys.inspect}, " +
265
- "sequence #{sequence.inspect}"
260
+ # At this point, token is either nil, in the first set of the production,
261
+ # or in the follow set of this production or any previous production
262
+ debug("parse(production)") do
263
+ "token #{token ? token.representation.inspect : 'nil'}, " +
264
+ "prod #{cur_prod.inspect}, " +
265
+ "depth #{depth}"
266
266
  end
267
267
 
268
- if sequence.nil?
269
- if prod_branch.has_key?(:_empty)
270
- debug("parse(production)", :level => 2) {"empty sequence for _empty"}
268
+ # Got an opened production
269
+ onStart(cur_prod)
270
+
271
+ if token.nil?
272
+ if !(first_include?(cur_prod, :_eps) && follow_include?(cur_prod, :_eof))
273
+ # End of file, and production does not contain eps, or it does, but follow does not contain eof
274
+ raise Error.new("Unexpected end of input", :production => cur_prod)
271
275
  else
272
- # If there is no sequence for this production, we're
273
- # in error recovery, and _token_ has been advanced to
274
- # the point where it can reasonably follow this production
276
+ debug("parse(production)") {"End of input prod #{cur_prod.inspect}"}
277
+ end
278
+ elsif prod_branch = @branch[cur_prod]
279
+ sequence = prod_branch.fetch(token.representation) do
280
+ raise Error.new("#{token.inspect} does not match production #{cur_prod.inspect}",
281
+ :production => cur_prod)
282
+ end
283
+ debug("parse(production)") do
284
+ "token #{token.representation.inspect} " +
285
+ "prod #{cur_prod.inspect}, " +
286
+ "prod_branch #{prod_branch.keys.inspect}, " +
287
+ "sequence #{sequence.inspect}"
275
288
  end
289
+ todo_stack.last[:terms] += sequence
290
+ else
291
+ raise Error.new("No branches found for #{cur_prod.inspect}",
292
+ :production => cur_prod, :token => token)
276
293
  end
277
- todo_stack.last[:terms] += sequence if sequence
278
- else
279
- # Is this a fatal error?
280
- error("parse(fatal?)", "No branches found for #{cur_prod.inspect}",
281
- :production => cur_prod, :token => token)
282
294
  end
283
- end
284
-
285
- debug("parse(terms)", :level => 2) {"todo #{todo_stack.last.inspect}, depth #{depth}"}
286
- while !todo_stack.last[:terms].to_a.empty?
287
- begin
295
+
296
+ debug("parse(terms)") {"todo #{todo_stack.last.inspect}, depth #{depth}"}
297
+ while !todo_stack.last[:terms].to_a.empty?
288
298
  # Get the next term in this sequence
289
299
  term = todo_stack.last[:terms].shift
290
300
  debug("parse(token)") {"accept #{term.inspect}"}
301
+
291
302
  if token = accept(term)
292
- @recovering = false
293
303
  debug("parse(token)") {"token #{token.inspect}, term #{term.inspect}"}
294
- onToken(term, token)
295
- elsif terminals.include?(term)
304
+ onTerminal(term, token)
305
+ elsif terminals.include?(term)
296
306
  # If term is a terminal, then it is an error if token does not
297
307
  # match it
298
- skip_until_valid(todo_stack)
308
+ raise Error.new("#{get_token.inspect} does not match terminal #{term.inspect}",
309
+ :production => cur_prod)
299
310
  else
300
- # If it's not a string (a symbol), it is a non-terminal and we push the new state
301
- todo_stack << {:prod => term, :terms => nil}
302
- debug("parse(push)", :level => 2) {"term #{term.inspect}, depth #{depth}"}
303
- pushed = true
304
- break
311
+ token = get_token
312
+
313
+ # If token is not in firsts of term, but eps is, skip to next
314
+ # term
315
+ if first_include?(term, :_eps) && !first_include?(term, token)
316
+ debug("parse(token)") {"skip optional term #{term.inspect} on #{token.inspect}"}
317
+ break
318
+ else
319
+ # Push term onto stack
320
+ todo_stack << {:prod => term, :terms => nil}
321
+ debug("parse(push)") {"term #{term.inspect}, depth #{depth}"}
322
+ pushed = true
323
+ break
324
+ end
305
325
  end
306
326
  end
307
- end
308
-
309
- # After completing the last production in a sequence, pop down until we find a production
310
- #
311
- # If in recovery mode, continue popping until we find a term with a follow list
312
- while !pushed &&
313
- !todo_stack.empty? &&
314
- ( (terms = todo_stack.last.fetch(:terms, [])).empty? ||
315
- (@recovering && @follow.fetch(terms.last, []).none? {|t| (token || :_eps) == t}))
316
- debug("parse(pop)", :level => 2) {"todo #{todo_stack.last.inspect}, depth #{depth}"}
317
- if terms.empty?
327
+ rescue Lexer::Error, Error => e
328
+ # Lexer encountered an illegal token or the parser encountered
329
+ # a terminal which is inappropriate for the current production.
330
+ # Perform error recovery to find a reasonable terminal based
331
+ # on the follow sets of the relevant productions. This includes
332
+ # remaining terms from the current production and the stacked
333
+ # productions
334
+ @lineno = e.lineno
335
+ if e.is_a?(Lexer::Error)
336
+ # Skip to the next valid terminal
337
+ @lexer.recover
338
+ error("parse(#{e.class})", "With input '#{e.input}': #{e.message}, skipped to #{(get_token(:recover) || :eof).inspect}",
339
+ :production => @productions.last)
340
+ else
341
+ # Otherwise, the terminal is fine, just not for this production.
342
+ @lexer.shift
343
+ error("parse(#{e.class})", "#{e.message}, skipped to #{(get_token(:recover) || :eof).inspect}",
344
+ :production => @productions.last, :token => e.token)
345
+ end
346
+
347
+ # Get the list of follows for this sequence, this production and the stacked productions.
348
+ debug("recovery", "stack follows:", :level => 4)
349
+ todo_stack.reverse.each do |todo|
350
+ debug("recovery", :level => 4) {" #{todo[:prod]}: #{@follow[todo[:prod]].inspect}"}
351
+ end
352
+
353
+ # Find all follows to the top of the stack
354
+ follows = todo_stack.inject([]) do |follow, todo|
355
+ prod = todo[:prod]
356
+ follow += @follow[prod] || []
357
+ end.uniq
358
+ debug("recovery") {"follows: #{follows.inspect}"}
359
+
360
+ # Skip tokens until one is found in follows
361
+ while (token = get_token(:recover)) && follows.none? {|t| token === t}
362
+ skipped = @lexer.shift
363
+ progress("recovery") {"skip #{skipped.inspect}"}
364
+ end
365
+ debug("recovery") {"found #{token.inspect} in follows"}
366
+
367
+ # Pop stack elements until token is in follows
368
+ while !todo_stack.empty? &&
369
+ !follow_include?(todo_stack.last[:prod], token || :_eof)
370
+ debug("recovery(pop)") {"todo #{todo_stack.last.inspect}, depth #{depth}"}
318
371
  todo_stack.pop
319
372
  onFinish
373
+ end
374
+
375
+ # Token is now in the first of the top production
376
+ unless todo_stack.empty?
377
+ todo_stack.pop
378
+ onFinish
379
+ end
380
+
381
+ if todo_stack.empty?
382
+ # Recovered to end of last production
383
+ warn("recover", "recovered to end of productions")
320
384
  else
321
- # Stop recovering when we a production which starts with the term
322
- debug("parse(pop)", :level => 2) {"recovery complete"}
323
- @recovering = false
385
+ warn("recover", "recovered to #{todo_stack.last[:prod].inspect} with #{token.inspect}")
386
+ end
387
+
388
+ @recovering = false
389
+ ensure
390
+ # After completing the last production in a sequence, pop down until we find a production
391
+ #
392
+ # If in recovery mode, continue popping until we find a term with a follow list
393
+ while !pushed &&
394
+ !todo_stack.empty? &&
395
+ todo_stack.last.fetch(:terms, []).empty?
396
+ debug("parse(pop)") {"todo #{todo_stack.last.inspect}, depth #{depth}"}
397
+ todo_stack.pop
398
+ onFinish
324
399
  end
325
400
  end
326
401
  end
@@ -329,12 +404,10 @@ module EBNF::LL1
329
404
 
330
405
  # Continue popping contexts off of the stack
331
406
  while !todo_stack.empty?
332
- debug("parse(eof)", :level => 2) {"stack #{todo_stack.last.inspect}, depth #{depth}"}
407
+ debug("parse(eof)") {"stack #{todo_stack.last.inspect}, depth #{depth}"}
333
408
  # There can't be anything left to do, or if there is, it must be optional
334
409
  last_terms = todo_stack.last[:terms]
335
- if last_terms.length > 0 && last_terms.none? {|t|
336
- @first.has_key?(t) && @first[t].include?(:_eps)
337
- }
410
+ if last_terms.length > 0 && last_terms.none? {|t|first_include?(t, :_eps)}
338
411
  error("parse(eof)",
339
412
  "End of input before end of production: stack #{todo_stack.last.inspect}, depth #{depth}"
340
413
  )
@@ -342,10 +415,10 @@ module EBNF::LL1
342
415
  todo_stack.pop
343
416
  onFinish
344
417
  end
345
-
418
+
346
419
  # When all is said and done, raise the error log
347
420
  unless @error_log.empty?
348
- raise Error, @error_log.join("\n\t")
421
+ raise Error, @error_log.join("\n\t")
349
422
  end
350
423
  end
351
424
 
@@ -369,16 +442,108 @@ module EBNF::LL1
369
442
  prod_data[sym] << values
370
443
  end
371
444
  end
372
-
445
+
373
446
  # Add values to production data, values aranged as an array
374
447
  def add_prod_data(sym, *values)
375
448
  return if values.compact.empty?
376
-
449
+
377
450
  prod_data[sym] ||= []
378
451
  prod_data[sym] += values
379
452
  debug("add_prod_data(#{sym})") {"#{prod_data[sym].inspect} += #{values.inspect}"}
380
453
  end
381
-
454
+
455
+ protected
456
+
457
+ ##
458
+ # Error information, used as level `0` debug messages.
459
+ #
460
+ # @param [String] node Relevant location associated with message
461
+ # @param [String] message Error string
462
+ # @param [Hash] options
463
+ # @option options [URI, #to_s] :production
464
+ # @option options [Token] :token
465
+ # @see {#debug}
466
+ def error(node, message, options = {})
467
+ message += ", found #{options[:token].inspect}" if options[:token]
468
+ message += " at line #{@lineno}" if @lineno
469
+ message += ", production = #{options[:production].inspect}" if options[:production]
470
+ @error_log << message unless @recovering
471
+ @recovering = true
472
+ debug(node, message, options.merge(:level => 0))
473
+ end
474
+
475
+ ##
476
+ # Warning information, used as level `1` debug messages.
477
+ #
478
+ # @param [String] node Relevant location associated with message
479
+ # @param [String] message Error string
480
+ # @param [Hash] options
481
+ # @option options [URI, #to_s] :production
482
+ # @option options [Token] :token
483
+ # @see {#debug}
484
+ def warn(node, message, options = {})
485
+ message += ", with token #{options[:token].inspect}" if options[:token]
486
+ message += " at line #{@lineno}" if @lineno
487
+ message += ", production = #{options[:production].inspect}" if options[:production]
488
+ @error_log << message unless @recovering
489
+ debug(node, message, options.merge(:level => 1))
490
+ end
491
+
492
+ ##
493
+ # Progress output when parsing. Passed as level `2` debug messages.
494
+ #
495
+ # @overload progress(node, message, options)
496
+ # @param [String] node Relevant location associated with message
497
+ # @param [String] message ("")
498
+ # @param [Hash] options
499
+ # @option options [Integer] :depth
500
+ # Recursion depth for indenting output
501
+ # @see {#debug}
502
+ def progress(node, *args)
503
+ return unless @options[:progress] || @options[:debug]
504
+ options = args.last.is_a?(Hash) ? args.pop : {}
505
+ message = args.join(",")
506
+ message += yield.to_s if block_given?
507
+ debug(node, message, options.merge(:level => 2))
508
+ end
509
+
510
+ ##
511
+ # Progress output when debugging.
512
+ #
513
+ # The call is ignored, unless `@options[:debug]` is set, in which
514
+ # case it yields tracing information as indicated. Additionally,
515
+ # if `@options[:debug]` is an Integer, the call is aborted if the
516
+ # `:level` option is less than than `:level`.
517
+ #
518
+ # @overload debug(node, message, options)
519
+ # @param [Array<String>] args Relevant location associated with message
520
+ # @param [Hash] options
521
+ # @option options [Integer] :depth
522
+ # Recursion depth for indenting output
523
+ # @option options [Integer] :level
524
+ # Level assigned to message, by convention, level `0` is for
525
+ # errors, level `1` is for warnings, level `2` is for parser
526
+ # progress information, and anything higher is for various levels
527
+ # of debug information.
528
+ #
529
+ # @yield trace, level, lineno, depth, args
530
+ # @yieldparam [:trace] trace
531
+ # @yieldparam [Integer] level
532
+ # @yieldparam [Integer] lineno
533
+ # @yieldparam [Integer] depth Recursive depth of productions
534
+ # @yieldparam [Array<String>] args
535
+ # @yieldreturn [String] added to message
536
+ def debug(*args)
537
+ return unless @options[:debug] && @parse_callback
538
+ options = args.last.is_a?(Hash) ? args.pop : {}
539
+ debug_level = options.fetch(:level, 3)
540
+ return if @options[:debug].is_a?(Integer) && debug_level > @options[:debug]
541
+
542
+ depth = options[:depth] || self.depth
543
+ args << yield if block_given?
544
+ @parse_callback.call(:trace, debug_level, @lineno, depth, *args)
545
+ end
546
+
382
547
  private
383
548
  # Start for production
384
549
  def onStart(prod)
@@ -389,15 +554,20 @@ module EBNF::LL1
389
554
  # to customize before pushing on the @prod_data stack
390
555
  progress("#{prod}(:start):#{@prod_data.length}") {@prod_data.last}
391
556
  data = {}
392
- self.class.eval_with_binding(self) {
393
- handler.call(@prod_data.last, data, @parse_callback)
394
- }
557
+ begin
558
+ self.class.eval_with_binding(self) {
559
+ handler.call(@prod_data.last, data, @parse_callback)
560
+ }
561
+ rescue Exception => e
562
+ error("start", "#{e.class}: #{e.message}", :production => prod)
563
+ @recovering = false
564
+ end
395
565
  @prod_data << data
396
566
  else
397
567
  # Make sure we push as many was we pop, even if there is no
398
568
  # explicit start handler
399
569
  @prod_data << {} if self.class.production_handlers[prod]
400
- progress("#{prod}(:start)") { get_token.inspect}
570
+ progress("#{prod}(:start)") { get_token.inspect + (@recovering ? ' recovering' : '')}
401
571
  end
402
572
  #puts "prod_data(s): " + @prod_data.inspect
403
573
  end
@@ -410,201 +580,105 @@ module EBNF::LL1
410
580
  if handler && !@recovering
411
581
  # Pop production data element from stack, potentially allowing handler to use it
412
582
  data = @prod_data.pop
413
- self.class.eval_with_binding(self) {
414
- handler.call(@prod_data.last, data, @parse_callback)
415
- }
583
+ begin
584
+ self.class.eval_with_binding(self) {
585
+ handler.call(@prod_data.last, data, @parse_callback)
586
+ }
587
+ rescue Exception => e
588
+ error("finish", "#{e.class}: #{e.message}", :production => prod)
589
+ @recovering = false
590
+ end
416
591
  progress("#{prod}(:finish):#{@prod_data.length}") {@prod_data.last}
417
592
  else
418
- progress("#{prod}(:finish)", "recovering: #{@recovering.inspect}")
593
+ progress("#{prod}(:finish)") { "recovering" if @recovering }
419
594
  end
420
595
  @productions.pop
421
596
  end
422
597
 
423
- # A token
424
- def onToken(prod, token)
598
+ # A terminal
599
+ def onTerminal(prod, token)
425
600
  unless @productions.empty?
426
601
  parentProd = @productions.last
427
602
  handler = self.class.terminal_handlers[prod]
428
603
  # Allows catch-all for simple string terminals
429
604
  handler ||= self.class.terminal_handlers[nil] if prod.is_a?(String)
430
605
  if handler
431
- self.class.eval_with_binding(self) {
432
- handler.call(parentProd, token, @prod_data.last)
433
- }
434
- progress("#{prod}(:token)", "", :depth => (depth + 1)) {"#{token}: #{@prod_data.last}"}
606
+ begin
607
+ self.class.eval_with_binding(self) {
608
+ handler.call(parentProd, token, @prod_data.last, @parse_callback)
609
+ }
610
+ rescue Exception => e
611
+ error("terminal", "#{e.class}: #{e.message}", :production => prod)
612
+ @recovering = false
613
+ end
614
+ progress("#{prod}(:terminal)", "", :depth => (depth + 1)) {"#{token}: #{@prod_data.last}"}
435
615
  else
436
- progress("#{prod}(:token)", "", :depth => (depth + 1)) {token.to_s}
616
+ progress("#{prod}(:terminal)", "", :depth => (depth + 1)) {token.to_s}
437
617
  end
438
618
  else
439
- error("#{parentProd}(:token)", "Token has no parent production", :production => prod)
619
+ error("#{parentProd}(:terminal)", "Terminal has no parent production", :production => prod)
440
620
  end
441
621
  end
442
-
443
- # Skip through the input stream until something is found that
444
- # is either valid based on the content of the production stack,
445
- # or can follow a production in the stack.
622
+
623
+ ##
624
+ # Does first include the specified token
446
625
  #
447
- # @return [Token]
448
- def skip_until_valid(todo_stack)
449
- cur_prod = todo_stack.last[:prod]
450
- token = get_token
451
- first = @first[cur_prod] || []
452
- expected = @branch.fetch(cur_prod, {}).keys
453
- expected << :_eps if first.include?(:_eps) # Helps when testing
454
-
455
- # If we've reached EOF, token is nil. This is fine, if _eof is in @follow
456
- return if token.nil? && @follow.fetch(cur_prod, []).include?(:_eof)
457
-
458
- # If this token can be used by the top production, return it
459
- # Otherwise, if the banch table allows empty, also return the token
460
- return token if !@recovering && (expected.any? {|t| (token || :_eps) === t})
461
-
462
- # Otherwise, it's an error condition, and skip either until
463
- # we find a valid token for this production, or until we find
464
- # something that can follow this production
465
- error("skip_until_valid", "expected one of #{expected.map(&:inspect).join(", ")}, found #{token.inspect}",
466
- :production => cur_prod, :token => token)
467
-
468
- debug("recovery", "stack follows:")
469
- todo_stack.reverse.each do |todo|
470
- debug("recovery") {" #{todo[:prod]}: #{@follow[todo[:prod]].inspect}"}
626
+ # @param [Symbol] production
627
+ # @param [Symbol, Lexer::Token] token
628
+ # A terminal, or symbol or string
629
+ # @return [Boolean]
630
+ def first_include?(production, token)
631
+ if token.is_a?(Lexer::Token)
632
+ @first.fetch(production, []).any? {|t| token === t}
633
+ else
634
+ @first.fetch(production, []).include?(token)
471
635
  end
636
+ end
472
637
 
473
- # Find all follows to the top of the stack
474
- follows = todo_stack.inject([]) do |follow, todo|
475
- prod = todo[:prod]
476
- follow += @follow[prod] || []
477
- end.uniq
478
- debug("recovery") {"follows: #{follows.inspect}"}
479
-
480
- # Skip tokens until one is found in first or follows
481
- while (token = get_token) && (first + follows).none? {|t| token === t}
482
- skipped = @lexer.shift
483
- progress("recovery") {"skip #{skipped.inspect}"}
484
- end
485
- debug("recovery") {"found #{token.inspect} in #{first.include?(token) ? 'first' : 'follows'}"}
486
-
487
- # If the token is a first, just return it. Otherwise, it is a follow
488
- # and we need to skip to the end of the production
489
- unless first.any? {|t| token == t} || todo_stack.last[:terms].empty?
490
- debug("recovery") {"token in follows, skip past #{todo_stack.last[:terms].inspect}"}
491
- todo_stack.last[:terms] = []
638
+ ##
639
+ # Does follow include the specified terminal
640
+ #
641
+ # @param [Symbol] production
642
+ # @param [Symbol, Lexer::Token] token
643
+ # A terminal, or symbol or string
644
+ # @return [Boolean]
645
+ def follow_include?(production, token)
646
+ if token.is_a?(Lexer::Token)
647
+ @follow.fetch(production, []).any? {|t| token === t}
648
+ else
649
+ @follow.fetch(production, []).include?(token)
492
650
  end
493
- token
494
651
  end
495
652
 
496
653
  ##
497
- # Return the next token, entering error recovery if the token is invalid
654
+ # Return the next token, raising an error if the token is invalid
498
655
  #
656
+ # @param [:recover] recover
657
+ # Recover from errors and go until next valid token or end of file
499
658
  # @return [Token]
500
- def get_token
501
- token = begin
502
- @lexer.first
503
- rescue EBNF::LL1::Lexer::Error => e
504
- # Recover from lexer error
505
- @lineno = e.lineno
506
- error("get_token", "With input '#{e.input}': #{e.message}",
507
- :production => @productions.last)
508
-
509
- # Retrieve next valid token
510
- t = @lexer.recover
511
- debug("get_token", :level => 2) {"skipped to #{t.inspect}"}
512
- t
513
- end
659
+ # @raise [Lexer::Error]
660
+ def get_token(recover = nil)
661
+ token = @lexer.first
514
662
  #progress("token") {token.inspect}
515
663
  @lineno = token.lineno if token
516
664
  token
517
- end
518
-
519
- ##
520
- # @param [String] node Relevant location associated with message
521
- # @param [String] message Error string
522
- # @param [Hash] options
523
- # @option options [URI, #to_s] :production
524
- # @option options [Token] :token
525
- def error(node, message, options = {})
526
- message += ", found #{options[:token].representation.inspect}" if options[:token]
527
- message += " at line #{@lineno}" if @lineno
528
- message += ", production = #{options[:production].inspect}" if options[:production]
529
- @error_log << message unless @recovering
530
- @recovering = true
531
- debug(node, message, options.merge(:level => 0))
532
- end
533
-
534
- ##
535
- # Progress output when parsing
536
- # param [String] node Relevant location associated with message
537
- # param [String] message ("")
538
- # param [Hash] options
539
- # option options [Integer] :depth
540
- # Recursion depth for indenting output
541
- # yieldreturn [String] added to message
542
- def progress(node, *args)
543
- return unless @options[:progress] || @options[:debug]
544
- options = args.last.is_a?(Hash) ? args.pop : {}
545
- message = args.join(",")
546
- depth = options[:depth] || self.depth
547
- message += yield.to_s if block_given?
548
- debug(node, message, options.merge(:level => 1))
549
- end
550
-
551
- ##
552
- # Progress output when debugging.
553
- # Captures output to `@options[:debug]` if it is an array.
554
- # Otherwise, if `@options[:debug]` is set, or
555
- # `@options[:progress]` is set and `:level` <= 1, or
556
- # `@options[:validate]` is set and `:level` == 0 output
557
- # to standard error.
558
- #
559
- # @overload debug(node, message)
560
- # @param [String] node Relevant location associated with message
561
- # @param [String] message ("")
562
- # @param [Hash] options
563
- # @option options [Integer] :depth
564
- # Recursion depth for indenting output
565
- # @option options [Integer] :level
566
- # Debug level, `0` for errors, `1` for progress, anything else
567
- # for debug output.
568
- #
569
- # @overload debug(message)
570
- # @param [String] node Relevant location associated with message
571
- # @param [Hash] options
572
- # @option options [Integer] :depth
573
- # Recursion depth for indenting output
574
- # @option options [Integer] :level
575
- # Debug level, `0` for errors, `1` for progress, anything else
576
- # for debug output.
577
- # @yieldreturn [String] added to message
578
- def debug(*args)
579
- options = args.last.is_a?(Hash) ? args.pop : {}
580
- debug_level = options.fetch(:level, 2)
581
- return unless @options[:debug] && debug_level <= DEBUG_LEVEL ||
582
- @options[:progress] && debug_level <= 1 ||
583
- @options[:validate] && debug_level == 0
584
- depth = options[:depth] || self.depth
585
- d_str = depth > 20 ? ' ' * 20 + '+' : ' ' * depth
586
- args << yield if block_given?
587
- message = "#{args.join(': ')}"
588
- str = "[#{@lineno}](#{debug_level})#{d_str}#{message}"
589
- @options[:debug] << str if @options[:debug].is_a?(Array)
590
- case
591
- when @options[:yield]
592
- @parse_callback.call(:trace, node, message, options)
593
- when @options[:debug] == true
594
- $stderr.puts str
595
- when @options[:progress] && debug_level <= 1
596
- $stderr.puts str
597
- when @options[:validate] && debug_level == 0
598
- $stderr.puts str
665
+ rescue Lexer::Error => e
666
+ if recover
667
+ # Recover from lexer error so that we can not bail out too early
668
+ @lexer.recover
669
+ error("get_token", "With input '#{e.input}': #{e.message}}")
670
+ retry
599
671
  end
672
+ raise
600
673
  end
601
674
 
602
675
  ##
603
676
  # Accept the first token in the input stream if it matches
604
- # _type\_or\_value_. Return nil otherwise.
677
+ # `type\_or\_value`. Raise Error, otherwise.
605
678
  #
606
679
  # @param [Symbol, String] type_or_value
607
680
  # @return [Token]
681
+ # @raise [Error, Lexer::Error]
608
682
  def accept(type_or_value)
609
683
  if (token = get_token) && token === type_or_value
610
684
  debug("accept") {"#{token.inspect} === #{type_or_value.inspect}"}
@@ -13,7 +13,6 @@ module EBNF::LL1
13
13
  LOW_WATER = 2048 # Hopefully large enough to deal with long multi-line comments
14
14
 
15
15
  ##
16
- # @!attribute [r] input
17
16
  # @return [IO, StringIO]
18
17
  attr_reader :input
19
18
 
@@ -10,40 +10,47 @@ module EBNF
10
10
  diff hex range
11
11
  }.map(&:to_sym).freeze
12
12
 
13
- # @!attribute [rw] sym for rule
13
+ # Symbol of rule
14
+ #
14
15
  # @return [Symbol]
15
16
  attr_accessor :sym
16
17
 
17
- # @!attribute [rw] id of rule
18
+ # ID of rule
18
19
  # @return [String]
19
20
  attr_accessor :id
20
21
 
21
22
  # A comprehension is a sequence which contains all elements but the first of the original rule.
22
- # @!attribute [rw] comprehension of this rule
23
+ #
23
24
  # @return [Rule]
24
25
  attr_accessor :comp
25
26
 
26
- # @!attribute [rw] kind of rule
27
+ # Kind of rule
28
+ #
27
29
  # @return [:rule, :terminal, or :pass]
28
30
  attr_accessor :kind
29
31
 
30
- # @!attribute [rw] expr rule expression
32
+ # Rule expression
33
+ #
31
34
  # @return [Array]
32
35
  attr_accessor :expr
33
36
 
34
- # @!attribute [rw] orig original rule
37
+ # Original EBNF
38
+ #
35
39
  # @return [String]
36
40
  attr_accessor :orig
37
41
 
38
- # @!attribute [r] first terminals that immediately procede this rule
42
+ # Terminals that immediately procede this rule
43
+ #
39
44
  # @return [Array<Rule>]
40
45
  attr_reader :first
41
46
 
42
- # @!attribute [r] follow terminals that immediately follow this rule
47
+ # Terminals that immediately follow this rule
48
+ #
43
49
  # @return [Array<Rule>]
44
50
  attr_reader :follow
45
51
 
46
- # @!attribute [rw] start indicates that this is a starting rule
52
+ # Indicates that this is a starting rule
53
+ #
47
54
  # @return [Boolean]
48
55
  attr_accessor :start
49
56
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ebnf
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.3
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gregg Kellogg
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-03-28 00:00:00.000000000 Z
11
+ date: 2013-03-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: sxp