ebnf 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -20,70 +20,113 @@ module EBNF::LL1
20
20
 
21
21
  # DSL for creating terminals and productions
22
22
  module ClassMethods
23
+ def start_handlers; @@start_handlers || {}; end
23
24
  def production_handlers; @@production_handlers || {}; end
24
25
  def terminal_handlers; @@terminal_handlers || {}; end
25
26
  def patterns; @@patterns || []; end
26
- def unescape_terms; @@unescape_terms || []; end
27
-
28
- ##
29
- # Defines a production called during different phases of parsing
30
- # with data from previous production along with data defined for the
31
- # current production
32
- #
33
- # @param [Symbol] term
34
- # Term which is a key in the branch table
35
- # @yield [parse, phase, input, current]
36
- # @yieldparam [Object] parse
37
- # Parser instance
38
- # @yieldparam [Symbol] phase
39
- # Phase of parsing, one of :start, or :finish
40
- # @yieldparam [Hash] input
41
- # A Hash containing input from the parent production
42
- # @yieldparam [Hash] current
43
- # A Hash defined for the current production, during :start
44
- # may be initialized with data to pass to further productions,
45
- # during :finish, it contains data placed by earlier productions
46
- # @yieldparam [Prod] block
47
- # Block passed to initialization for yielding to calling parser.
48
- # Should conform to the yield specs for #initialize
49
- # Yield to generate a triple
50
- def production(term, &block)
51
- @@production_handlers ||= {}
52
- @@production_handlers[term] = block
53
- end
54
27
 
55
28
  ##
56
29
  # Defines the pattern for a terminal node and a block to be invoked
57
30
  # when ther terminal is encountered. If the block is missing, the
58
31
  # value of the terminal will be placed on the input hash to be returned
59
- # to a previous production.
32
+ # to a previous production. Block is called in an evaluation block from
33
+ # the enclosing parser.
60
34
  #
61
35
  # @param [Symbol, String] term
62
36
  # Defines a terminal production, which appears as within a sequence in the branch table
63
37
  # @param [Regexp] regexp
64
38
  # Pattern used to scan for this terminal
65
39
  # @param [Hash] options
40
+ # @option options [Hash{String => String}] :map ({})
41
+ # A mapping from terminals, in lower-case form, to
42
+ # their canonical value
66
43
  # @option options [Boolean] :unescape
67
44
  # Cause strings and codepoints to be unescaped.
68
- # @yield [parser, term, token, input]
69
- # @yieldparam [Object] parser
70
- # Parser instance
45
+ # @yield [term, token, input, block]
71
46
  # @yieldparam [Symbol] term
72
47
  # A symbol indicating the production which referenced this terminal
73
48
  # @yieldparam [String] token
74
49
  # The scanned token
75
50
  # @yieldparam [Hash] input
76
51
  # A Hash containing input from the parent production
77
- # @yieldparam [Prod] block
52
+ # @yieldparam [Proc] block
78
53
  # Block passed to initialization for yielding to calling parser.
79
54
  # Should conform to the yield specs for #initialize
80
55
  def terminal(term, regexp, options = {}, &block)
81
56
  @@patterns ||= []
82
- @@patterns << [term, regexp] # Passed in order to define evaulation sequence
57
+ # Passed in order to define evaulation sequence
58
+ @@patterns << EBNF::LL1::Lexer::Terminal.new(term, regexp, options)
83
59
  @@terminal_handlers ||= {}
84
60
  @@terminal_handlers[term] = block if block_given?
85
- @@unescape_terms ||= []
86
- @@unescape_terms << term if options[:unescape]
61
+ end
62
+
63
+ ##
64
+ # Defines a production called at the beggining of a particular production
65
+ # with data from previous production along with data defined for the
66
+ # current production. Block is called in an evaluation block from
67
+ # the enclosing parser.
68
+ #
69
+ # @param [Symbol] term
70
+ # Term which is a key in the branch table
71
+ # @yield [input, current, block]
72
+ # @yieldparam [Hash] input
73
+ # A Hash containing input from the parent production
74
+ # @yieldparam [Hash] current
75
+ # A Hash defined for the current production, during :start
76
+ # may be initialized with data to pass to further productions,
77
+ # during :finish, it contains data placed by earlier productions
78
+ # @yieldparam [Proc] block
79
+ # Block passed to initialization for yielding to calling parser.
80
+ # Should conform to the yield specs for #initialize
81
+ # Yield to generate a triple
82
+ def start_production(term, &block)
83
+ @@start_handlers ||= {}
84
+ @@start_handlers[term] = block
85
+ end
86
+
87
+ ##
88
+ # Defines a production called when production of associated
89
+ # terminals and non-terminals has completed
90
+ # with data from previous production along with data defined for the
91
+ # current production. Block is called in an evaluation block from
92
+ # the enclosing parser.
93
+ #
94
+ # @param [Symbol] term
95
+ # Term which is a key in the branch table
96
+ # @yield [input, current, block]
97
+ # @yieldparam [Hash] input
98
+ # A Hash containing input from the parent production
99
+ # @yieldparam [Hash] current
100
+ # A Hash defined for the current production, during :start
101
+ # may be initialized with data to pass to further productions,
102
+ # during :finish, it contains data placed by earlier productions
103
+ # @yieldparam [Proc] block
104
+ # Block passed to initialization for yielding to calling parser.
105
+ # Should conform to the yield specs for #initialize
106
+ # Yield to generate a triple
107
+ def production(term, &block)
108
+ @@production_handlers ||= {}
109
+ @@production_handlers[term] = block
110
+ end
111
+
112
+ # Evaluate a handler, delegating to the specified object.
113
+ # This is necessary so that handlers can operate within the
114
+ # binding context of the parser in which they're invoked.
115
+ # @param [Object] object
116
+ # @return [Object]
117
+ def eval_with_binding(object)
118
+ @@delegate = object
119
+ object.instance_eval {yield}
120
+ end
121
+
122
+ private
123
+
124
+ def method_missing(method, *args, &block)
125
+ if @@delegate ||= nil
126
+ @@delegate.send method, *args, &block
127
+ else
128
+ super
129
+ end
87
130
  end
88
131
  end
89
132
 
@@ -163,12 +206,12 @@ module EBNF::LL1
163
206
  @branch = options[:branch]
164
207
  @first = options[:first] ||= {}
165
208
  @follow = options[:follow] ||= {}
166
- @lexer = input.is_a?(Lexer) ? input : Lexer.new(input, self.class.patterns, @options.merge(:unescape_terms => self.class.unescape_terms))
209
+ @lexer = input.is_a?(Lexer) ? input : Lexer.new(input, self.class.patterns, @options)
167
210
  @productions = []
168
211
  @parse_callback = block
169
212
  @recovering = false
170
213
  @error_log = []
171
- terminals = self.class.patterns.map(&:first) # Get defined terminals to help with branching
214
+ terminals = self.class.patterns.map(&:type) # Get defined terminals to help with branching
172
215
 
173
216
  # Unrecoverable errors
174
217
  raise Error, "Branch table not defined" unless @branch && @branch.length > 0
@@ -257,13 +300,18 @@ module EBNF::LL1
257
300
  # If in recovery mode, continue popping until we find a term with a follow list
258
301
  while !pushed &&
259
302
  !todo_stack.empty? &&
260
- ( todo_stack.last[:terms].to_a.empty? ||
261
- (@recovering && @follow[todo_stack.last[:term]].nil?))
262
- debug("parse(pop)", :level => 2) {"todo #{todo_stack.last.inspect}, depth #{depth}, recovering? #{@recovering.inspect}"}
263
- prod = todo_stack.last[:prod]
264
- @recovering = false if @follow[prod] # Stop recovering when we might have a match
265
- todo_stack.pop
266
- onFinish
303
+ ( (terms = todo_stack.last.fetch(:terms, [])).empty? ||
304
+ (@recovering && @follow.fetch(terms.last, []).none? {|t| token == t}))
305
+ debug("parse(pop)", :level => 2) {"todo #{todo_stack.last.inspect}, depth #{depth}"}
306
+ if terms.empty?
307
+ prod = todo_stack.last[:prod]
308
+ todo_stack.pop
309
+ onFinish
310
+ else
311
+ # Stop recovering when we a production which starts with the term
312
+ debug("parse(pop)", :level => 2) {"recovery complete"}
313
+ @recovering = false
314
+ end
267
315
  end
268
316
  end
269
317
 
@@ -272,7 +320,11 @@ module EBNF::LL1
272
320
  # Continue popping contexts off of the stack
273
321
  while !todo_stack.empty?
274
322
  debug("parse(eof)", :level => 2) {"stack #{todo_stack.last.inspect}, depth #{depth}"}
275
- if todo_stack.last[:terms].length > 0
323
+ # There can't be anything left to do, or if there is, it must be optional
324
+ last_terms = todo_stack.last[:terms]
325
+ if last_terms.length > 0 && last_terms.none? {|t|
326
+ @first.has_key?(t) && @first[t].include?(:_eps)
327
+ }
276
328
  error("parse(eof)",
277
329
  "End of input before end of production: stack #{todo_stack.last.inspect}, depth #{depth}"
278
330
  )
@@ -289,35 +341,72 @@ module EBNF::LL1
289
341
 
290
342
  def depth; (@productions || []).length; end
291
343
 
344
+ # Current ProdData element
345
+ def prod_data; @prod_data.last; end
346
+
347
+ # Add a single value to prod_data, allows for values to be an array
348
+ def add_prod_datum(sym, values)
349
+ case values
350
+ when Array
351
+ prod_data[sym] ||= []
352
+ debug("add_prod_datum(#{sym})") {"#{prod_data[sym].inspect} += #{values.inspect}"}
353
+ prod_data[sym] += values
354
+ when nil
355
+ return
356
+ else
357
+ prod_data[sym] ||= []
358
+ debug("add_prod_datum(#{sym})") {"#{prod_data[sym].inspect} << #{values.inspect}"}
359
+ prod_data[sym] << values
360
+ end
361
+ end
362
+
363
+ # Add values to production data, values aranged as an array
364
+ def add_prod_data(sym, *values)
365
+ return if values.compact.empty?
366
+
367
+ prod_data[sym] ||= []
368
+ prod_data[sym] += values
369
+ debug("add_prod_data(#{sym})") {"#{prod_data[sym].inspect} += #{values.inspect}"}
370
+ end
371
+
292
372
  private
293
373
  # Start for production
294
374
  def onStart(prod)
295
- handler = self.class.production_handlers[prod]
375
+ handler = self.class.start_handlers[prod]
296
376
  @productions << prod
297
377
  if handler
298
378
  # Create a new production data element, potentially allowing handler
299
379
  # to customize before pushing on the @prod_data stack
300
380
  progress("#{prod}(:start):#{@prod_data.length}") {@prod_data.last}
301
381
  data = {}
302
- handler.call(self, :start, @prod_data.last, data, @parse_callback)
382
+ self.class.eval_with_binding(self) {
383
+ handler.call(@prod_data.last, data, @parse_callback)
384
+ }
303
385
  @prod_data << data
304
386
  else
387
+ # Make sure we push as many was we pop, even if there is no
388
+ # explicit start handler
389
+ @prod_data << {} if self.class.production_handlers[prod]
305
390
  progress("#{prod}(:start)") { get_token.inspect}
306
391
  end
307
- #puts @prod_data.inspect
392
+ #puts "prod_data(s): " + @prod_data.inspect
308
393
  end
309
394
 
310
395
  # Finish of production
311
396
  def onFinish
397
+ #puts "prod_data(f): " + @prod_data.inspect
312
398
  prod = @productions.last
313
399
  handler = self.class.production_handlers[prod]
314
- if handler
400
+ if handler && !@recovering
315
401
  # Pop production data element from stack, potentially allowing handler to use it
316
402
  data = @prod_data.pop
317
- handler.call(self, :finish, @prod_data.last, data, @parse_callback)
403
+ self.class.eval_with_binding(self) {
404
+ handler.call(@prod_data.last, data, @parse_callback)
405
+ }
406
+ #require 'debugger'; breakpoint
318
407
  progress("#{prod}(:finish):#{@prod_data.length}") {@prod_data.last}
319
408
  else
320
- progress("#{prod}(:finish)", '')
409
+ progress("#{prod}(:finish)", "recovering: #{@recovering.inspect}")
321
410
  end
322
411
  @productions.pop
323
412
  end
@@ -330,7 +419,9 @@ module EBNF::LL1
330
419
  # Allows catch-all for simple string terminals
331
420
  handler ||= self.class.terminal_handlers[nil] if prod.is_a?(String)
332
421
  if handler
333
- handler.call(self, parentProd, token, @prod_data.last)
422
+ self.class.eval_with_binding(self) {
423
+ handler.call(parentProd, token, @prod_data.last)
424
+ }
334
425
  progress("#{prod}(:token)", "", :depth => (depth + 1)) {"#{token}: #{@prod_data.last}"}
335
426
  else
336
427
  progress("#{prod}(:token)", "", :depth => (depth + 1)) {token.to_s}
@@ -380,7 +471,7 @@ module EBNF::LL1
380
471
  skipped = @lexer.shift
381
472
  progress("recovery") {"skip #{skipped.inspect}"}
382
473
  end
383
- debug("recovery") {"found #{token.inspect}"}
474
+ debug("recovery") {"found #{token.inspect} in #{first.include?(token) ? 'first' : 'follows'}"}
384
475
 
385
476
  # If the token is a first, just return it. Otherwise, it is a follow
386
477
  # and we need to skip to the end of the production
@@ -400,7 +491,7 @@ module EBNF::LL1
400
491
  def error(node, message, options = {})
401
492
  message += ", found #{options[:token].representation.inspect}" if options[:token]
402
493
  message += " at line #{@lineno}" if @lineno
403
- message += ", production = #{options[:production].inspect}" if options[:production] && @options[:debug]
494
+ message += ", production = #{options[:production].inspect}" if options[:production]
404
495
  @error_log << message unless @recovering
405
496
  @recovering = true
406
497
  debug(node, message, options.merge(:level => 0))
@@ -446,7 +537,6 @@ module EBNF::LL1
446
537
  if @options[:debug]
447
538
  return debug(node, message, {:level => 0}.merge(options))
448
539
  else
449
- str = "[#{@lineno}]#{' ' * depth}#{node}: #{message}"
450
540
  $stderr.puts("[#{@lineno}]#{' ' * depth}#{node}: #{message}")
451
541
  end
452
542
  end
data/lib/ebnf/rule.rb CHANGED
@@ -186,7 +186,7 @@ module EBNF
186
186
  # @param [Symbol, class] sym
187
187
  # Symbol matching any start element, or if it is String, any start element which is a String
188
188
  # @return [Array<Symbol, String>] list of symbol (singular), or strings which are start symbol, or nil if there are none
189
- def starts_with(sym)
189
+ def starts_with?(sym)
190
190
  if seq? && sym === (v = expr.fetch(1, nil))
191
191
  [v]
192
192
  elsif alt? && expr.any? {|e| sym === e}
@@ -196,6 +196,12 @@ module EBNF
196
196
  end
197
197
  end
198
198
 
199
+ # Do the firsts of this rule include the empty string?
200
+ # @return [Boolean]
201
+ def first_includes_eps?
202
+ @first && @first.include?(:_eps)
203
+ end
204
+
199
205
  # Add terminal as proceding this rule
200
206
  # @param [Array<Rule>] terminals
201
207
  # @return [Integer] if number of terminals added
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ebnf
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-02-26 00:00:00.000000000 Z
12
+ date: 2013-03-02 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: sxp
@@ -27,6 +27,22 @@ dependencies:
27
27
  - - ! '>='
28
28
  - !ruby/object:Gem::Version
29
29
  version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: rdf
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
30
46
  - !ruby/object:Gem::Dependency
31
47
  name: rspec
32
48
  requirement: !ruby/object:Gem::Requirement
@@ -101,6 +117,7 @@ files:
101
117
  - etc/doap.ttl
102
118
  - etc/ebnf.ebnf
103
119
  - etc/ebnf.ll1
120
+ - etc/ebnf.rb
104
121
  - etc/turtle.ebnf
105
122
  - etc/turtle.ll1
106
123
  - etc/turtle.rb
@@ -117,13 +134,16 @@ required_ruby_version: !ruby/object:Gem::Requirement
117
134
  requirements:
118
135
  - - ! '>='
119
136
  - !ruby/object:Gem::Version
120
- version: 1.9.3
137
+ version: 1.8.7
121
138
  required_rubygems_version: !ruby/object:Gem::Requirement
122
139
  none: false
123
140
  requirements:
124
141
  - - ! '>='
125
142
  - !ruby/object:Gem::Version
126
143
  version: '0'
144
+ segments:
145
+ - 0
146
+ hash: 1950185345296250945
127
147
  requirements: []
128
148
  rubyforge_project:
129
149
  rubygems_version: 1.8.25