ebnf 0.2.3 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- NDg4MzMwZTA0Yzg3MzY5Y2UyZDMwMWI2ZWRiODNjZWZjZmU5NGRlZQ==
4
+ ZDI4YWE4ZjExOGI5N2NkZDVmYzA4ZDMwMzM0ZGRhZThhNWU0ZGI2MQ==
5
5
  data.tar.gz: !binary |-
6
- NzA2ODg3ZTI1NzIwNjVjNmE1YWJjMDYzNTEyNjI1NGIzZWI5OTg1ZQ==
6
+ MWZmMjVkYjMwYzY1NmNlMDc0ZDM5MzUxOWU1ZTUwNGU2NDQ2YzFiYQ==
7
7
  !binary "U0hBNTEy":
8
8
  metadata.gz: !binary |-
9
- N2FjYTRhNzI5NDFkZWI3OTNiOWFkNTNmYWY5NjUwMjA4YTM0ZmNiZTQ5NDYy
10
- MjFjODYxODc1MzRjMTZjNGM0N2U4NDk2NTM2ZjM0MmY0ZGI3ZDE3OTg3OWFi
11
- NjQwYTdjNzcxYTc3ZWJmYzcyMzlmOWJiZDlmZjc3YTJiOWVkODA=
9
+ M2E2MTcyM2MyZWYyNTg5Y2E3ZDVkNWI2ODgwYjdhZjhlYmQ1YTUzZGE5Nzc3
10
+ M2M3MDFiYzllMzE5YjNmMDA2Zjc3NjQ5YzViMzhmMTNmZTBkYWZhMDk0M2Rl
11
+ MjlkNWMxNzY3ODM4ZDYxZjBiZWI2NzA4ZGRjYzQ2MGY2MTBiZDI=
12
12
  data.tar.gz: !binary |-
13
- ODVmZDZlMjVkMjY0ZDkwYWFiZGY4OGE3ZTRhNDQzZWRiZDlkZTQzODQ4ZWMw
14
- MTZkZGMzNWYzZTMxNDc0MDc3YWFkNmU2NjExNWM2ZTkzOTJlZWE0MDQ1Yzdm
15
- N2RmOTU5NGFhZTVlZDEwYzhlMjJjMDNhYmIwNDkyOTc3ZGVkNDQ=
13
+ OGIxZGZjNzNhZTlkZDAwZTM0ZmExYzMzMzUxMjQ4ZmNmYjBmNTM4OGJlMmM2
14
+ NmFmODM4MjMwODYyY2E1YjhjNjNlY2E1YjI0MTg1NjFlYWI0MzI4ZThhMjA2
15
+ MmMyMjBiZjA5NGQ2MjlhYzA0Yzg1YzYzNzhjNjNkZmQ2ZTg5N2I=
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2.3
1
+ 0.3.0
@@ -103,12 +103,12 @@ module EBNF
103
103
  include Parser
104
104
 
105
105
  # Abstract syntax tree from parse
106
- # @!attribute [r] ast
106
+ #
107
107
  # @return [Array<Rule>]
108
108
  attr_reader :ast
109
109
 
110
110
  # Grammar errors, or errors found genering parse tables
111
- # @!attribute [r] errors
111
+ #
112
112
  # @return [Array<String>]
113
113
  attr_accessor :errors
114
114
 
@@ -6,29 +6,29 @@ module EBNF
6
6
 
7
7
  # Branch table, represented as a recursive hash.
8
8
  # The table is indexed by rule symbol, which in-turn references a hash of terminals (which are the first terminals of the production), which in turn reference the sequence of rules that follow, given that terminal as input
9
- # @!attribute [r] branch
9
+ #
10
10
  # @return [Hash{Symbol => Hash{String, Symbol => Array<Symbol>}}]
11
11
  attr_reader :branch
12
12
 
13
13
  # First table
14
- # @!attribute [r] first
14
+ #
15
15
  # @return [Hash{Symbol, String => Symbol}]
16
16
  attr_reader :first
17
17
 
18
18
  # Follow table
19
- # @!attribute [r] first
19
+ #
20
20
  # @return [Hash{Symbol, String => Symbol}]
21
21
  attr_reader :follow
22
22
 
23
23
  # Terminal table
24
24
  # The list of terminals used in the grammar.
25
- # @!attribute [r] terminals
25
+ #
26
26
  # @return [Array<String, Symbol>]
27
27
  attr_reader :terminals
28
28
 
29
29
  # Start symbol
30
30
  # The rule which starts the grammar
31
- # @!attribute[r] start
31
+ #
32
32
  # @return [Symbol]
33
33
  attr_reader :start
34
34
 
@@ -53,12 +53,10 @@ module EBNF::LL1
53
53
  ML_START = /\'\'\'|\"\"\"/.freeze # Beginning of terminals that may span lines
54
54
 
55
55
  ##
56
- # @!attribute whitespace
57
56
  # @return [Regexp] defines whitespace, defaults to WS
58
57
  attr_reader :whitespace
59
58
 
60
59
  ##
61
- # @!attribute comment
62
60
  # @return [Regexp] defines single-line comment, defaults to COMMENT
63
61
  attr_reader :comment
64
62
 
@@ -233,15 +231,16 @@ module EBNF::LL1
233
231
  #
234
232
  # @return [Token]
235
233
  def recover
236
- until scanner.eos? do
237
- begin
238
- shift
239
- return first
240
- rescue Error, ArgumentError
241
- # Ignore errors until something scans, or EOS.
242
- scanner.pos = scanner.pos + 1
234
+ until scanner.eos? || tok = match_token
235
+ if scanner.skip_until(@whitespace).nil? # Skip past current "token"
236
+ # No whitespace at the end, must be and end of string
237
+ scanner.terminate
238
+ else
239
+ skip_whitespace
243
240
  end
244
241
  end
242
+ scanner.unscan if tok
243
+ first
245
244
  end
246
245
  protected
247
246
 
@@ -253,9 +252,10 @@ module EBNF::LL1
253
252
  def skip_whitespace
254
253
  # skip all white space, but keep track of the current line number
255
254
  while !scanner.eos?
256
- if matched = scanner.scan(@whitespace)
255
+ if matched = scanner.scan(@whitespace)
257
256
  @lineno += matched.count("\n")
258
- elsif (com = scanner.scan(@comment))
257
+ elsif (scanner.scan(@comment))
258
+ #
259
259
  else
260
260
  return
261
261
  end
@@ -472,7 +472,7 @@ module EBNF::LL1
472
472
  #
473
473
  # @return [String]
474
474
  def inspect
475
- to_hash.inspect
475
+ "#{@value.inspect}#{'(' + @type.to_s + ')' if @type}"
476
476
  end
477
477
  end # class Token
478
478
 
@@ -10,7 +10,6 @@ module EBNF::LL1
10
10
  DEBUG_LEVEL = 10
11
11
 
12
12
  ##
13
- # @!attribute [r] lineno
14
13
  # @return [Integer] line number of current token
15
14
  attr_reader :lineno
16
15
 
@@ -20,10 +19,10 @@ module EBNF::LL1
20
19
 
21
20
  # DSL for creating terminals and productions
22
21
  module ClassMethods
23
- def start_handlers; @@start_handlers || {}; end
24
- def production_handlers; @@production_handlers || {}; end
25
- def terminal_handlers; @@terminal_handlers || {}; end
26
- def patterns; @@patterns || []; end
22
+ def start_handlers; @start_handlers || {}; end
23
+ def production_handlers; @production_handlers || {}; end
24
+ def terminal_handlers; @terminal_handlers || {}; end
25
+ def patterns; @patterns || []; end
27
26
 
28
27
  ##
29
28
  # Defines the pattern for a terminal node and a block to be invoked
@@ -53,11 +52,11 @@ module EBNF::LL1
53
52
  # Block passed to initialization for yielding to calling parser.
54
53
  # Should conform to the yield specs for #initialize
55
54
  def terminal(term, regexp, options = {}, &block)
56
- @@patterns ||= []
55
+ @patterns ||= []
57
56
  # Passed in order to define evaulation sequence
58
- @@patterns << EBNF::LL1::Lexer::Terminal.new(term, regexp, options)
59
- @@terminal_handlers ||= {}
60
- @@terminal_handlers[term] = block if block_given?
57
+ @patterns << EBNF::LL1::Lexer::Terminal.new(term, regexp, options)
58
+ @terminal_handlers ||= {}
59
+ @terminal_handlers[term] = block if block_given?
61
60
  end
62
61
 
63
62
  ##
@@ -80,8 +79,8 @@ module EBNF::LL1
80
79
  # Should conform to the yield specs for #initialize
81
80
  # Yield to generate a triple
82
81
  def start_production(term, &block)
83
- @@start_handlers ||= {}
84
- @@start_handlers[term] = block
82
+ @start_handlers ||= {}
83
+ @start_handlers[term] = block
85
84
  end
86
85
 
87
86
  ##
@@ -105,8 +104,8 @@ module EBNF::LL1
105
104
  # Should conform to the yield specs for #initialize
106
105
  # Yield to generate a triple
107
106
  def production(term, &block)
108
- @@production_handlers ||= {}
109
- @@production_handlers[term] = block
107
+ @production_handlers ||= {}
108
+ @production_handlers[term] = block
110
109
  end
111
110
 
112
111
  # Evaluate a handler, delegating to the specified object.
@@ -115,15 +114,15 @@ module EBNF::LL1
115
114
  # @param [Object] object
116
115
  # @return [Object]
117
116
  def eval_with_binding(object)
118
- @@delegate = object
117
+ @delegate = object
119
118
  object.instance_eval {yield}
120
119
  end
121
120
 
122
121
  private
123
122
 
124
123
  def method_missing(method, *args, &block)
125
- if @@delegate ||= nil
126
- @@delegate.send method, *args, &block
124
+ if @delegate ||= nil
125
+ @delegate.send method, *args, &block
127
126
  else
128
127
  super
129
128
  end
@@ -137,29 +136,40 @@ module EBNF::LL1
137
136
  #
138
137
  # @example
139
138
  # require 'rdf/ll1/parser'
140
- #
139
+ #
141
140
  # class MyParser
142
141
  # include EBNF::LL1::Parser
143
- #
142
+ #
144
143
  # branch MyParser::BRANCH
145
- #
144
+ #
145
+ # ##
146
+ # # Defines a production called during before parsing a non-terminal
147
+ # # with data from previous production along with data defined for the
148
+ # # current production
149
+ # #
150
+ # start_production :object do |input, current, callback|
151
+ # # Note production as triples for blankNodePropertyList
152
+ # # to set :subject instead of :resource
153
+ # current[:triples] = true
154
+ # end
155
+ #
146
156
  # ##
147
- # # Defines a production called during different phases of parsing
157
+ # # Defines a production called during after parsing a non-terminal
148
158
  # # with data from previous production along with data defined for the
149
159
  # # current production
150
160
  # #
151
- # # Yield to generate a triple
152
- # production :object do |parser, phase, input, current|
161
+ # # callback to processor block
162
+ # production :object do |input, current, callback|
153
163
  # object = current[:resource]
154
- # yield :statement, RDF::Statement.new(input[:subject], input[:predicate], object)
164
+ # callback.call :statement, RDF::Statement.new(input[:subject], input[:predicate], object)
155
165
  # end
156
- #
166
+ #
157
167
  # ##
158
168
  # # Defines the pattern for a terminal node
159
- # terminal :BLANK_NODE_LABEL, %r(_:(#{PN_LOCAL})) do |parser, production, token, input|
169
+ # terminal :BLANK_NODE_LABEL, %r(_:(#{PN_LOCAL})) do |production, token, input|
160
170
  # input[:BLANK_NODE_LABEL] = RDF::Node.new(token)
161
171
  # end
162
- #
172
+ #
163
173
  # ##
164
174
  # # Iterates the given block for each RDF statement in the input.
165
175
  # #
@@ -168,7 +178,7 @@ module EBNF::LL1
168
178
  # # @return [void]
169
179
  # def each_statement(&block)
170
180
  # @callback = block
171
- #
181
+ #
172
182
  # parse(START.to_sym) do |context, *data|
173
183
  # case context
174
184
  # when :statement
@@ -176,11 +186,12 @@ module EBNF::LL1
176
186
  # end
177
187
  # end
178
188
  # end
179
- #
189
+ #
180
190
  # end
181
191
  #
182
192
  # @param [String, #to_s] input
183
- # @param [Symbol, #to_s] prod The starting production for the parser. It may be a URI from the grammar, or a symbol representing the local_name portion of the grammar URI.
193
+ # @param [Symbol, #to_s] start
194
+ # The starting production for the parser. It may be a URI from the grammar, or a symbol representing the local_name portion of the grammar URI.
184
195
  # @param [Hash{Symbol => Object}] options
185
196
  # @option options [Hash{Symbol,String => Hash{Symbol,String => Array<Symbol,String>}}] :branch LL1 branch table.
186
197
  # @option options [HHash{Symbol,String => Array<Symbol,String>}] :first ({})
@@ -202,8 +213,11 @@ module EBNF::LL1
202
213
  # @yieldparam [Symbol] *data
203
214
  # Data specific to the call
204
215
  # @return [EBNF::LL1::Parser]
216
+ # @raise [Exception] Raises exceptions for parsing errors
217
+ # or errors raised during processing callbacks. Internal
218
+ # errors are raised using {Error}.
205
219
  # @see http://cs.adelaide.edu.au/~charles/lt/Lectures/07-ErrorRecovery.pdf
206
- def parse(input = nil, prod = nil, options = {}, &block)
220
+ def parse(input = nil, start = nil, options = {}, &block)
207
221
  @options = options.dup
208
222
  @branch = options[:branch]
209
223
  @first = options[:first] ||= {}
@@ -217,110 +231,171 @@ module EBNF::LL1
217
231
 
218
232
  # Unrecoverable errors
219
233
  raise Error, "Branch table not defined" unless @branch && @branch.length > 0
220
- raise Error, "Starting production not defined" unless prod
234
+ raise Error, "Starting production not defined" unless start
221
235
 
222
236
  @prod_data = [{}]
223
- prod = prod.split('#').last.to_sym unless prod.is_a?(Symbol)
224
- todo_stack = [{:prod => prod, :terms => nil}]
237
+ start = start.split('#').last.to_sym unless start.is_a?(Symbol)
238
+ todo_stack = [{:prod => start, :terms => nil}]
225
239
 
226
240
  while !todo_stack.empty?
227
- pushed = false
228
- if todo_stack.last[:terms].nil?
229
- todo_stack.last[:terms] = []
230
- cur_prod = todo_stack.last[:prod]
231
-
232
- # If cur_prod is the starting production, we can reset the stack
233
- # to the beginning to avoid excessive growth in the production
234
- # stack
235
- if options[:reset_on_start] && cur_prod == prod
236
- todo_stack = [{:prod => prod, :terms => []}]
237
- @productions = []
238
- @prod_data = [{}]
239
- end
240
-
241
- # Get this first valid token appropriate for the stacked productions,
242
- # skipping invalid tokens until either a valid token is found (from @first),
243
- # or a token appearing in @follow appears.
244
- token = skip_until_valid(todo_stack)
245
-
246
- # At this point, token is either nil, in the first set of the production,
247
- # or in the follow set of this production or any previous production
248
- debug("parse(production)") do
249
- "token #{token ? token.representation.inspect : 'nil'}, " +
250
- "prod #{cur_prod.inspect}, " +
251
- "depth #{depth}"
252
- end
241
+ begin
242
+ @recovering = false
243
+ pushed = false
244
+ if todo_stack.last[:terms].nil?
245
+ todo_stack.last[:terms] = []
246
+ cur_prod = todo_stack.last[:prod]
247
+
248
+ # If cur_prod is the starting production, we can reset the stack
249
+ # to the beginning to avoid excessive growth in the production
250
+ # stack
251
+ if options[:reset_on_start] && cur_prod == start
252
+ todo_stack = [{:prod => start, :terms => []}]
253
+ @productions = []
254
+ @prod_data = [{}]
255
+ end
253
256
 
254
- # Got an opened production
255
- onStart(cur_prod)
256
- break if token.nil?
257
+ # Fetch the current token
258
+ token = get_token(:recover)
257
259
 
258
- if prod_branch = @branch[cur_prod]
259
- @recovering = false
260
- sequence = prod_branch[token.representation]
261
- debug("parse(production)", :level => 2) do
262
- "token #{token.representation.inspect} " +
263
- "prod #{cur_prod.inspect}, " +
264
- "prod_branch #{prod_branch.keys.inspect}, " +
265
- "sequence #{sequence.inspect}"
260
+ # At this point, token is either nil, in the first set of the production,
261
+ # or in the follow set of this production or any previous production
262
+ debug("parse(production)") do
263
+ "token #{token ? token.representation.inspect : 'nil'}, " +
264
+ "prod #{cur_prod.inspect}, " +
265
+ "depth #{depth}"
266
266
  end
267
267
 
268
- if sequence.nil?
269
- if prod_branch.has_key?(:_empty)
270
- debug("parse(production)", :level => 2) {"empty sequence for _empty"}
268
+ # Got an opened production
269
+ onStart(cur_prod)
270
+
271
+ if token.nil?
272
+ if !(first_include?(cur_prod, :_eps) && follow_include?(cur_prod, :_eof))
273
+ # End of file, and production does not contain eps, or it does, but follow does not contain eof
274
+ raise Error.new("Unexpected end of input", :production => cur_prod)
271
275
  else
272
- # If there is no sequence for this production, we're
273
- # in error recovery, and _token_ has been advanced to
274
- # the point where it can reasonably follow this production
276
+ debug("parse(production)") {"End of input prod #{cur_prod.inspect}"}
277
+ end
278
+ elsif prod_branch = @branch[cur_prod]
279
+ sequence = prod_branch.fetch(token.representation) do
280
+ raise Error.new("#{token.inspect} does not match production #{cur_prod.inspect}",
281
+ :production => cur_prod)
282
+ end
283
+ debug("parse(production)") do
284
+ "token #{token.representation.inspect} " +
285
+ "prod #{cur_prod.inspect}, " +
286
+ "prod_branch #{prod_branch.keys.inspect}, " +
287
+ "sequence #{sequence.inspect}"
275
288
  end
289
+ todo_stack.last[:terms] += sequence
290
+ else
291
+ raise Error.new("No branches found for #{cur_prod.inspect}",
292
+ :production => cur_prod, :token => token)
276
293
  end
277
- todo_stack.last[:terms] += sequence if sequence
278
- else
279
- # Is this a fatal error?
280
- error("parse(fatal?)", "No branches found for #{cur_prod.inspect}",
281
- :production => cur_prod, :token => token)
282
294
  end
283
- end
284
-
285
- debug("parse(terms)", :level => 2) {"todo #{todo_stack.last.inspect}, depth #{depth}"}
286
- while !todo_stack.last[:terms].to_a.empty?
287
- begin
295
+
296
+ debug("parse(terms)") {"todo #{todo_stack.last.inspect}, depth #{depth}"}
297
+ while !todo_stack.last[:terms].to_a.empty?
288
298
  # Get the next term in this sequence
289
299
  term = todo_stack.last[:terms].shift
290
300
  debug("parse(token)") {"accept #{term.inspect}"}
301
+
291
302
  if token = accept(term)
292
- @recovering = false
293
303
  debug("parse(token)") {"token #{token.inspect}, term #{term.inspect}"}
294
- onToken(term, token)
295
- elsif terminals.include?(term)
304
+ onTerminal(term, token)
305
+ elsif terminals.include?(term)
296
306
  # If term is a terminal, then it is an error if token does not
297
307
  # match it
298
- skip_until_valid(todo_stack)
308
+ raise Error.new("#{get_token.inspect} does not match terminal #{term.inspect}",
309
+ :production => cur_prod)
299
310
  else
300
- # If it's not a string (a symbol), it is a non-terminal and we push the new state
301
- todo_stack << {:prod => term, :terms => nil}
302
- debug("parse(push)", :level => 2) {"term #{term.inspect}, depth #{depth}"}
303
- pushed = true
304
- break
311
+ token = get_token
312
+
313
+ # If token is not in firsts of term, but eps is, skip to next
314
+ # term
315
+ if first_include?(term, :_eps) && !first_include?(term, token)
316
+ debug("parse(token)") {"skip optional term #{term.inspect} on #{token.inspect}"}
317
+ break
318
+ else
319
+ # Push term onto stack
320
+ todo_stack << {:prod => term, :terms => nil}
321
+ debug("parse(push)") {"term #{term.inspect}, depth #{depth}"}
322
+ pushed = true
323
+ break
324
+ end
305
325
  end
306
326
  end
307
- end
308
-
309
- # After completing the last production in a sequence, pop down until we find a production
310
- #
311
- # If in recovery mode, continue popping until we find a term with a follow list
312
- while !pushed &&
313
- !todo_stack.empty? &&
314
- ( (terms = todo_stack.last.fetch(:terms, [])).empty? ||
315
- (@recovering && @follow.fetch(terms.last, []).none? {|t| (token || :_eps) == t}))
316
- debug("parse(pop)", :level => 2) {"todo #{todo_stack.last.inspect}, depth #{depth}"}
317
- if terms.empty?
327
+ rescue Lexer::Error, Error => e
328
+ # Lexer encountered an illegal token or the parser encountered
329
+ # a terminal which is inappropriate for the current production.
330
+ # Perform error recovery to find a reasonable terminal based
331
+ # on the follow sets of the relevant productions. This includes
332
+ # remaining terms from the current production and the stacked
333
+ # productions
334
+ @lineno = e.lineno
335
+ if e.is_a?(Lexer::Error)
336
+ # Skip to the next valid terminal
337
+ @lexer.recover
338
+ error("parse(#{e.class})", "With input '#{e.input}': #{e.message}, skipped to #{(get_token(:recover) || :eof).inspect}",
339
+ :production => @productions.last)
340
+ else
341
+ # Otherwise, the terminal is fine, just not for this production.
342
+ @lexer.shift
343
+ error("parse(#{e.class})", "#{e.message}, skipped to #{(get_token(:recover) || :eof).inspect}",
344
+ :production => @productions.last, :token => e.token)
345
+ end
346
+
347
+ # Get the list of follows for this sequence, this production and the stacked productions.
348
+ debug("recovery", "stack follows:", :level => 4)
349
+ todo_stack.reverse.each do |todo|
350
+ debug("recovery", :level => 4) {" #{todo[:prod]}: #{@follow[todo[:prod]].inspect}"}
351
+ end
352
+
353
+ # Find all follows to the top of the stack
354
+ follows = todo_stack.inject([]) do |follow, todo|
355
+ prod = todo[:prod]
356
+ follow += @follow[prod] || []
357
+ end.uniq
358
+ debug("recovery") {"follows: #{follows.inspect}"}
359
+
360
+ # Skip tokens until one is found in follows
361
+ while (token = get_token(:recover)) && follows.none? {|t| token === t}
362
+ skipped = @lexer.shift
363
+ progress("recovery") {"skip #{skipped.inspect}"}
364
+ end
365
+ debug("recovery") {"found #{token.inspect} in follows"}
366
+
367
+ # Pop stack elements until token is in follows
368
+ while !todo_stack.empty? &&
369
+ !follow_include?(todo_stack.last[:prod], token || :_eof)
370
+ debug("recovery(pop)") {"todo #{todo_stack.last.inspect}, depth #{depth}"}
318
371
  todo_stack.pop
319
372
  onFinish
373
+ end
374
+
375
+ # Token is now in the first of the top production
376
+ unless todo_stack.empty?
377
+ todo_stack.pop
378
+ onFinish
379
+ end
380
+
381
+ if todo_stack.empty?
382
+ # Recovered to end of last production
383
+ warn("recover", "recovered to end of productions")
320
384
  else
321
- # Stop recovering when we a production which starts with the term
322
- debug("parse(pop)", :level => 2) {"recovery complete"}
323
- @recovering = false
385
+ warn("recover", "recovered to #{todo_stack.last[:prod].inspect} with #{token.inspect}")
386
+ end
387
+
388
+ @recovering = false
389
+ ensure
390
+ # After completing the last production in a sequence, pop down until we find a production
391
+ #
392
+ # If in recovery mode, continue popping until we find a term with a follow list
393
+ while !pushed &&
394
+ !todo_stack.empty? &&
395
+ todo_stack.last.fetch(:terms, []).empty?
396
+ debug("parse(pop)") {"todo #{todo_stack.last.inspect}, depth #{depth}"}
397
+ todo_stack.pop
398
+ onFinish
324
399
  end
325
400
  end
326
401
  end
@@ -329,12 +404,10 @@ module EBNF::LL1
329
404
 
330
405
  # Continue popping contexts off of the stack
331
406
  while !todo_stack.empty?
332
- debug("parse(eof)", :level => 2) {"stack #{todo_stack.last.inspect}, depth #{depth}"}
407
+ debug("parse(eof)") {"stack #{todo_stack.last.inspect}, depth #{depth}"}
333
408
  # There can't be anything left to do, or if there is, it must be optional
334
409
  last_terms = todo_stack.last[:terms]
335
- if last_terms.length > 0 && last_terms.none? {|t|
336
- @first.has_key?(t) && @first[t].include?(:_eps)
337
- }
410
+ if last_terms.length > 0 && last_terms.none? {|t|first_include?(t, :_eps)}
338
411
  error("parse(eof)",
339
412
  "End of input before end of production: stack #{todo_stack.last.inspect}, depth #{depth}"
340
413
  )
@@ -342,10 +415,10 @@ module EBNF::LL1
342
415
  todo_stack.pop
343
416
  onFinish
344
417
  end
345
-
418
+
346
419
  # When all is said and done, raise the error log
347
420
  unless @error_log.empty?
348
- raise Error, @error_log.join("\n\t")
421
+ raise Error, @error_log.join("\n\t")
349
422
  end
350
423
  end
351
424
 
@@ -369,16 +442,108 @@ module EBNF::LL1
369
442
  prod_data[sym] << values
370
443
  end
371
444
  end
372
-
445
+
373
446
  # Add values to production data, values aranged as an array
374
447
  def add_prod_data(sym, *values)
375
448
  return if values.compact.empty?
376
-
449
+
377
450
  prod_data[sym] ||= []
378
451
  prod_data[sym] += values
379
452
  debug("add_prod_data(#{sym})") {"#{prod_data[sym].inspect} += #{values.inspect}"}
380
453
  end
381
-
454
+
455
+ protected
456
+
457
+ ##
458
+ # Error information, used as level `0` debug messages.
459
+ #
460
+ # @param [String] node Relevant location associated with message
461
+ # @param [String] message Error string
462
+ # @param [Hash] options
463
+ # @option options [URI, #to_s] :production
464
+ # @option options [Token] :token
465
+ # @see {#debug}
466
+ def error(node, message, options = {})
467
+ message += ", found #{options[:token].inspect}" if options[:token]
468
+ message += " at line #{@lineno}" if @lineno
469
+ message += ", production = #{options[:production].inspect}" if options[:production]
470
+ @error_log << message unless @recovering
471
+ @recovering = true
472
+ debug(node, message, options.merge(:level => 0))
473
+ end
474
+
475
+ ##
476
+ # Warning information, used as level `1` debug messages.
477
+ #
478
+ # @param [String] node Relevant location associated with message
479
+ # @param [String] message Error string
480
+ # @param [Hash] options
481
+ # @option options [URI, #to_s] :production
482
+ # @option options [Token] :token
483
+ # @see {#debug}
484
+ def warn(node, message, options = {})
485
+ message += ", with token #{options[:token].inspect}" if options[:token]
486
+ message += " at line #{@lineno}" if @lineno
487
+ message += ", production = #{options[:production].inspect}" if options[:production]
488
+ @error_log << message unless @recovering
489
+ debug(node, message, options.merge(:level => 1))
490
+ end
491
+
492
+ ##
493
+ # Progress output when parsing. Passed as level `2` debug messages.
494
+ #
495
+ # @overload progress(node, message, options)
496
+ # @param [String] node Relevant location associated with message
497
+ # @param [String] message ("")
498
+ # @param [Hash] options
499
+ # @option options [Integer] :depth
500
+ # Recursion depth for indenting output
501
+ # @see {#debug}
502
+ def progress(node, *args)
503
+ return unless @options[:progress] || @options[:debug]
504
+ options = args.last.is_a?(Hash) ? args.pop : {}
505
+ message = args.join(",")
506
+ message += yield.to_s if block_given?
507
+ debug(node, message, options.merge(:level => 2))
508
+ end
509
+
510
+ ##
511
+ # Progress output when debugging.
512
+ #
513
+ # The call is ignored, unless `@options[:debug]` is set, in which
514
+ # case it yields tracing information as indicated. Additionally,
515
+ # if `@options[:debug]` is an Integer, the call is aborted if the
516
+ # `:level` option is less than than `:level`.
517
+ #
518
+ # @overload debug(node, message, options)
519
+ # @param [Array<String>] args Relevant location associated with message
520
+ # @param [Hash] options
521
+ # @option options [Integer] :depth
522
+ # Recursion depth for indenting output
523
+ # @option options [Integer] :level
524
+ # Level assigned to message, by convention, level `0` is for
525
+ # errors, level `1` is for warnings, level `2` is for parser
526
+ # progress information, and anything higher is for various levels
527
+ # of debug information.
528
+ #
529
+ # @yield trace, level, lineno, depth, args
530
+ # @yieldparam [:trace] trace
531
+ # @yieldparam [Integer] level
532
+ # @yieldparam [Integer] lineno
533
+ # @yieldparam [Integer] depth Recursive depth of productions
534
+ # @yieldparam [Array<String>] args
535
+ # @yieldreturn [String] added to message
536
+ def debug(*args)
537
+ return unless @options[:debug] && @parse_callback
538
+ options = args.last.is_a?(Hash) ? args.pop : {}
539
+ debug_level = options.fetch(:level, 3)
540
+ return if @options[:debug].is_a?(Integer) && debug_level > @options[:debug]
541
+
542
+ depth = options[:depth] || self.depth
543
+ args << yield if block_given?
544
+ @parse_callback.call(:trace, debug_level, @lineno, depth, *args)
545
+ end
546
+
382
547
  private
383
548
  # Start for production
384
549
  def onStart(prod)
@@ -389,15 +554,20 @@ module EBNF::LL1
389
554
  # to customize before pushing on the @prod_data stack
390
555
  progress("#{prod}(:start):#{@prod_data.length}") {@prod_data.last}
391
556
  data = {}
392
- self.class.eval_with_binding(self) {
393
- handler.call(@prod_data.last, data, @parse_callback)
394
- }
557
+ begin
558
+ self.class.eval_with_binding(self) {
559
+ handler.call(@prod_data.last, data, @parse_callback)
560
+ }
561
+ rescue Exception => e
562
+ error("start", "#{e.class}: #{e.message}", :production => prod)
563
+ @recovering = false
564
+ end
395
565
  @prod_data << data
396
566
  else
397
567
  # Make sure we push as many was we pop, even if there is no
398
568
  # explicit start handler
399
569
  @prod_data << {} if self.class.production_handlers[prod]
400
- progress("#{prod}(:start)") { get_token.inspect}
570
+ progress("#{prod}(:start)") { get_token.inspect + (@recovering ? ' recovering' : '')}
401
571
  end
402
572
  #puts "prod_data(s): " + @prod_data.inspect
403
573
  end
@@ -410,201 +580,105 @@ module EBNF::LL1
410
580
  if handler && !@recovering
411
581
  # Pop production data element from stack, potentially allowing handler to use it
412
582
  data = @prod_data.pop
413
- self.class.eval_with_binding(self) {
414
- handler.call(@prod_data.last, data, @parse_callback)
415
- }
583
+ begin
584
+ self.class.eval_with_binding(self) {
585
+ handler.call(@prod_data.last, data, @parse_callback)
586
+ }
587
+ rescue Exception => e
588
+ error("finish", "#{e.class}: #{e.message}", :production => prod)
589
+ @recovering = false
590
+ end
416
591
  progress("#{prod}(:finish):#{@prod_data.length}") {@prod_data.last}
417
592
  else
418
- progress("#{prod}(:finish)", "recovering: #{@recovering.inspect}")
593
+ progress("#{prod}(:finish)") { "recovering" if @recovering }
419
594
  end
420
595
  @productions.pop
421
596
  end
422
597
 
423
- # A token
424
- def onToken(prod, token)
598
+ # A terminal
599
+ def onTerminal(prod, token)
425
600
  unless @productions.empty?
426
601
  parentProd = @productions.last
427
602
  handler = self.class.terminal_handlers[prod]
428
603
  # Allows catch-all for simple string terminals
429
604
  handler ||= self.class.terminal_handlers[nil] if prod.is_a?(String)
430
605
  if handler
431
- self.class.eval_with_binding(self) {
432
- handler.call(parentProd, token, @prod_data.last)
433
- }
434
- progress("#{prod}(:token)", "", :depth => (depth + 1)) {"#{token}: #{@prod_data.last}"}
606
+ begin
607
+ self.class.eval_with_binding(self) {
608
+ handler.call(parentProd, token, @prod_data.last, @parse_callback)
609
+ }
610
+ rescue Exception => e
611
+ error("terminal", "#{e.class}: #{e.message}", :production => prod)
612
+ @recovering = false
613
+ end
614
+ progress("#{prod}(:terminal)", "", :depth => (depth + 1)) {"#{token}: #{@prod_data.last}"}
435
615
  else
436
- progress("#{prod}(:token)", "", :depth => (depth + 1)) {token.to_s}
616
+ progress("#{prod}(:terminal)", "", :depth => (depth + 1)) {token.to_s}
437
617
  end
438
618
  else
439
- error("#{parentProd}(:token)", "Token has no parent production", :production => prod)
619
+ error("#{parentProd}(:terminal)", "Terminal has no parent production", :production => prod)
440
620
  end
441
621
  end
442
-
443
- # Skip through the input stream until something is found that
444
- # is either valid based on the content of the production stack,
445
- # or can follow a production in the stack.
622
+
623
+ ##
624
+ # Does first include the specified token
446
625
  #
447
- # @return [Token]
448
- def skip_until_valid(todo_stack)
449
- cur_prod = todo_stack.last[:prod]
450
- token = get_token
451
- first = @first[cur_prod] || []
452
- expected = @branch.fetch(cur_prod, {}).keys
453
- expected << :_eps if first.include?(:_eps) # Helps when testing
454
-
455
- # If we've reached EOF, token is nil. This is fine, if _eof is in @follow
456
- return if token.nil? && @follow.fetch(cur_prod, []).include?(:_eof)
457
-
458
- # If this token can be used by the top production, return it
459
- # Otherwise, if the banch table allows empty, also return the token
460
- return token if !@recovering && (expected.any? {|t| (token || :_eps) === t})
461
-
462
- # Otherwise, it's an error condition, and skip either until
463
- # we find a valid token for this production, or until we find
464
- # something that can follow this production
465
- error("skip_until_valid", "expected one of #{expected.map(&:inspect).join(", ")}, found #{token.inspect}",
466
- :production => cur_prod, :token => token)
467
-
468
- debug("recovery", "stack follows:")
469
- todo_stack.reverse.each do |todo|
470
- debug("recovery") {" #{todo[:prod]}: #{@follow[todo[:prod]].inspect}"}
626
+ # @param [Symbol] production
627
+ # @param [Symbol, Lexer::Token] token
628
+ # A terminal, or symbol or string
629
+ # @return [Boolean]
630
+ def first_include?(production, token)
631
+ if token.is_a?(Lexer::Token)
632
+ @first.fetch(production, []).any? {|t| token === t}
633
+ else
634
+ @first.fetch(production, []).include?(token)
471
635
  end
636
+ end
472
637
 
473
- # Find all follows to the top of the stack
474
- follows = todo_stack.inject([]) do |follow, todo|
475
- prod = todo[:prod]
476
- follow += @follow[prod] || []
477
- end.uniq
478
- debug("recovery") {"follows: #{follows.inspect}"}
479
-
480
- # Skip tokens until one is found in first or follows
481
- while (token = get_token) && (first + follows).none? {|t| token === t}
482
- skipped = @lexer.shift
483
- progress("recovery") {"skip #{skipped.inspect}"}
484
- end
485
- debug("recovery") {"found #{token.inspect} in #{first.include?(token) ? 'first' : 'follows'}"}
486
-
487
- # If the token is a first, just return it. Otherwise, it is a follow
488
- # and we need to skip to the end of the production
489
- unless first.any? {|t| token == t} || todo_stack.last[:terms].empty?
490
- debug("recovery") {"token in follows, skip past #{todo_stack.last[:terms].inspect}"}
491
- todo_stack.last[:terms] = []
638
+ ##
639
+ # Does follow include the specified terminal
640
+ #
641
+ # @param [Symbol] production
642
+ # @param [Symbol, Lexer::Token] token
643
+ # A terminal, or symbol or string
644
+ # @return [Boolean]
645
+ def follow_include?(production, token)
646
+ if token.is_a?(Lexer::Token)
647
+ @follow.fetch(production, []).any? {|t| token === t}
648
+ else
649
+ @follow.fetch(production, []).include?(token)
492
650
  end
493
- token
494
651
  end
495
652
 
496
653
  ##
497
- # Return the next token, entering error recovery if the token is invalid
654
+ # Return the next token, raising an error if the token is invalid
498
655
  #
656
+ # @param [:recover] recover
657
+ # Recover from errors and go until next valid token or end of file
499
658
  # @return [Token]
500
- def get_token
501
- token = begin
502
- @lexer.first
503
- rescue EBNF::LL1::Lexer::Error => e
504
- # Recover from lexer error
505
- @lineno = e.lineno
506
- error("get_token", "With input '#{e.input}': #{e.message}",
507
- :production => @productions.last)
508
-
509
- # Retrieve next valid token
510
- t = @lexer.recover
511
- debug("get_token", :level => 2) {"skipped to #{t.inspect}"}
512
- t
513
- end
659
+ # @raise [Lexer::Error]
660
+ def get_token(recover = nil)
661
+ token = @lexer.first
514
662
  #progress("token") {token.inspect}
515
663
  @lineno = token.lineno if token
516
664
  token
517
- end
518
-
519
- ##
520
- # @param [String] node Relevant location associated with message
521
- # @param [String] message Error string
522
- # @param [Hash] options
523
- # @option options [URI, #to_s] :production
524
- # @option options [Token] :token
525
- def error(node, message, options = {})
526
- message += ", found #{options[:token].representation.inspect}" if options[:token]
527
- message += " at line #{@lineno}" if @lineno
528
- message += ", production = #{options[:production].inspect}" if options[:production]
529
- @error_log << message unless @recovering
530
- @recovering = true
531
- debug(node, message, options.merge(:level => 0))
532
- end
533
-
534
- ##
535
- # Progress output when parsing
536
- # param [String] node Relevant location associated with message
537
- # param [String] message ("")
538
- # param [Hash] options
539
- # option options [Integer] :depth
540
- # Recursion depth for indenting output
541
- # yieldreturn [String] added to message
542
- def progress(node, *args)
543
- return unless @options[:progress] || @options[:debug]
544
- options = args.last.is_a?(Hash) ? args.pop : {}
545
- message = args.join(",")
546
- depth = options[:depth] || self.depth
547
- message += yield.to_s if block_given?
548
- debug(node, message, options.merge(:level => 1))
549
- end
550
-
551
- ##
552
- # Progress output when debugging.
553
- # Captures output to `@options[:debug]` if it is an array.
554
- # Otherwise, if `@options[:debug]` is set, or
555
- # `@options[:progress]` is set and `:level` <= 1, or
556
- # `@options[:validate]` is set and `:level` == 0 output
557
- # to standard error.
558
- #
559
- # @overload debug(node, message)
560
- # @param [String] node Relevant location associated with message
561
- # @param [String] message ("")
562
- # @param [Hash] options
563
- # @option options [Integer] :depth
564
- # Recursion depth for indenting output
565
- # @option options [Integer] :level
566
- # Debug level, `0` for errors, `1` for progress, anything else
567
- # for debug output.
568
- #
569
- # @overload debug(message)
570
- # @param [String] node Relevant location associated with message
571
- # @param [Hash] options
572
- # @option options [Integer] :depth
573
- # Recursion depth for indenting output
574
- # @option options [Integer] :level
575
- # Debug level, `0` for errors, `1` for progress, anything else
576
- # for debug output.
577
- # @yieldreturn [String] added to message
578
- def debug(*args)
579
- options = args.last.is_a?(Hash) ? args.pop : {}
580
- debug_level = options.fetch(:level, 2)
581
- return unless @options[:debug] && debug_level <= DEBUG_LEVEL ||
582
- @options[:progress] && debug_level <= 1 ||
583
- @options[:validate] && debug_level == 0
584
- depth = options[:depth] || self.depth
585
- d_str = depth > 20 ? ' ' * 20 + '+' : ' ' * depth
586
- args << yield if block_given?
587
- message = "#{args.join(': ')}"
588
- str = "[#{@lineno}](#{debug_level})#{d_str}#{message}"
589
- @options[:debug] << str if @options[:debug].is_a?(Array)
590
- case
591
- when @options[:yield]
592
- @parse_callback.call(:trace, node, message, options)
593
- when @options[:debug] == true
594
- $stderr.puts str
595
- when @options[:progress] && debug_level <= 1
596
- $stderr.puts str
597
- when @options[:validate] && debug_level == 0
598
- $stderr.puts str
665
+ rescue Lexer::Error => e
666
+ if recover
667
+ # Recover from lexer error so that we can not bail out too early
668
+ @lexer.recover
669
+ error("get_token", "With input '#{e.input}': #{e.message}}")
670
+ retry
599
671
  end
672
+ raise
600
673
  end
601
674
 
602
675
  ##
603
676
  # Accept the first token in the input stream if it matches
604
- # _type\_or\_value_. Return nil otherwise.
677
+ # `type\_or\_value`. Raise Error, otherwise.
605
678
  #
606
679
  # @param [Symbol, String] type_or_value
607
680
  # @return [Token]
681
+ # @raise [Error, Lexer::Error]
608
682
  def accept(type_or_value)
609
683
  if (token = get_token) && token === type_or_value
610
684
  debug("accept") {"#{token.inspect} === #{type_or_value.inspect}"}
@@ -13,7 +13,6 @@ module EBNF::LL1
13
13
  LOW_WATER = 2048 # Hopefully large enough to deal with long multi-line comments
14
14
 
15
15
  ##
16
- # @!attribute [r] input
17
16
  # @return [IO, StringIO]
18
17
  attr_reader :input
19
18
 
@@ -10,40 +10,47 @@ module EBNF
10
10
  diff hex range
11
11
  }.map(&:to_sym).freeze
12
12
 
13
- # @!attribute [rw] sym for rule
13
+ # Symbol of rule
14
+ #
14
15
  # @return [Symbol]
15
16
  attr_accessor :sym
16
17
 
17
- # @!attribute [rw] id of rule
18
+ # ID of rule
18
19
  # @return [String]
19
20
  attr_accessor :id
20
21
 
21
22
  # A comprehension is a sequence which contains all elements but the first of the original rule.
22
- # @!attribute [rw] comprehension of this rule
23
+ #
23
24
  # @return [Rule]
24
25
  attr_accessor :comp
25
26
 
26
- # @!attribute [rw] kind of rule
27
+ # Kind of rule
28
+ #
27
29
  # @return [:rule, :terminal, or :pass]
28
30
  attr_accessor :kind
29
31
 
30
- # @!attribute [rw] expr rule expression
32
+ # Rule expression
33
+ #
31
34
  # @return [Array]
32
35
  attr_accessor :expr
33
36
 
34
- # @!attribute [rw] orig original rule
37
+ # Original EBNF
38
+ #
35
39
  # @return [String]
36
40
  attr_accessor :orig
37
41
 
38
- # @!attribute [r] first terminals that immediately procede this rule
42
+ # Terminals that immediately procede this rule
43
+ #
39
44
  # @return [Array<Rule>]
40
45
  attr_reader :first
41
46
 
42
- # @!attribute [r] follow terminals that immediately follow this rule
47
+ # Terminals that immediately follow this rule
48
+ #
43
49
  # @return [Array<Rule>]
44
50
  attr_reader :follow
45
51
 
46
- # @!attribute [rw] start indicates that this is a starting rule
52
+ # Indicates that this is a starting rule
53
+ #
47
54
  # @return [Boolean]
48
55
  attr_accessor :start
49
56
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ebnf
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.3
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gregg Kellogg
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-03-28 00:00:00.000000000 Z
11
+ date: 2013-03-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: sxp