ebnf 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -20,70 +20,113 @@ module EBNF::LL1
20
20
 
21
21
  # DSL for creating terminals and productions
22
22
  module ClassMethods
23
+ def start_handlers; @@start_handlers || {}; end
23
24
  def production_handlers; @@production_handlers || {}; end
24
25
  def terminal_handlers; @@terminal_handlers || {}; end
25
26
  def patterns; @@patterns || []; end
26
- def unescape_terms; @@unescape_terms || []; end
27
-
28
- ##
29
- # Defines a production called during different phases of parsing
30
- # with data from previous production along with data defined for the
31
- # current production
32
- #
33
- # @param [Symbol] term
34
- # Term which is a key in the branch table
35
- # @yield [parse, phase, input, current]
36
- # @yieldparam [Object] parse
37
- # Parser instance
38
- # @yieldparam [Symbol] phase
39
- # Phase of parsing, one of :start, or :finish
40
- # @yieldparam [Hash] input
41
- # A Hash containing input from the parent production
42
- # @yieldparam [Hash] current
43
- # A Hash defined for the current production, during :start
44
- # may be initialized with data to pass to further productions,
45
- # during :finish, it contains data placed by earlier productions
46
- # @yieldparam [Prod] block
47
- # Block passed to initialization for yielding to calling parser.
48
- # Should conform to the yield specs for #initialize
49
- # Yield to generate a triple
50
- def production(term, &block)
51
- @@production_handlers ||= {}
52
- @@production_handlers[term] = block
53
- end
54
27
 
55
28
  ##
56
29
  # Defines the pattern for a terminal node and a block to be invoked
57
30
  # when ther terminal is encountered. If the block is missing, the
58
31
  # value of the terminal will be placed on the input hash to be returned
59
- # to a previous production.
32
+ # to a previous production. Block is called in an evaluation block from
33
+ # the enclosing parser.
60
34
  #
61
35
  # @param [Symbol, String] term
62
36
  # Defines a terminal production, which appears as within a sequence in the branch table
63
37
  # @param [Regexp] regexp
64
38
  # Pattern used to scan for this terminal
65
39
  # @param [Hash] options
40
+ # @option options [Hash{String => String}] :map ({})
41
+ # A mapping from terminals, in lower-case form, to
42
+ # their canonical value
66
43
  # @option options [Boolean] :unescape
67
44
  # Cause strings and codepoints to be unescaped.
68
- # @yield [parser, term, token, input]
69
- # @yieldparam [Object] parser
70
- # Parser instance
45
+ # @yield [term, token, input, block]
71
46
  # @yieldparam [Symbol] term
72
47
  # A symbol indicating the production which referenced this terminal
73
48
  # @yieldparam [String] token
74
49
  # The scanned token
75
50
  # @yieldparam [Hash] input
76
51
  # A Hash containing input from the parent production
77
- # @yieldparam [Prod] block
52
+ # @yieldparam [Proc] block
78
53
  # Block passed to initialization for yielding to calling parser.
79
54
  # Should conform to the yield specs for #initialize
80
55
  def terminal(term, regexp, options = {}, &block)
81
56
  @@patterns ||= []
82
- @@patterns << [term, regexp] # Passed in order to define evaulation sequence
57
+ # Passed in order to define evaulation sequence
58
+ @@patterns << EBNF::LL1::Lexer::Terminal.new(term, regexp, options)
83
59
  @@terminal_handlers ||= {}
84
60
  @@terminal_handlers[term] = block if block_given?
85
- @@unescape_terms ||= []
86
- @@unescape_terms << term if options[:unescape]
61
+ end
62
+
63
+ ##
64
+ # Defines a production called at the beggining of a particular production
65
+ # with data from previous production along with data defined for the
66
+ # current production. Block is called in an evaluation block from
67
+ # the enclosing parser.
68
+ #
69
+ # @param [Symbol] term
70
+ # Term which is a key in the branch table
71
+ # @yield [input, current, block]
72
+ # @yieldparam [Hash] input
73
+ # A Hash containing input from the parent production
74
+ # @yieldparam [Hash] current
75
+ # A Hash defined for the current production, during :start
76
+ # may be initialized with data to pass to further productions,
77
+ # during :finish, it contains data placed by earlier productions
78
+ # @yieldparam [Proc] block
79
+ # Block passed to initialization for yielding to calling parser.
80
+ # Should conform to the yield specs for #initialize
81
+ # Yield to generate a triple
82
+ def start_production(term, &block)
83
+ @@start_handlers ||= {}
84
+ @@start_handlers[term] = block
85
+ end
86
+
87
+ ##
88
+ # Defines a production called when production of associated
89
+ # terminals and non-terminals has completed
90
+ # with data from previous production along with data defined for the
91
+ # current production. Block is called in an evaluation block from
92
+ # the enclosing parser.
93
+ #
94
+ # @param [Symbol] term
95
+ # Term which is a key in the branch table
96
+ # @yield [input, current, block]
97
+ # @yieldparam [Hash] input
98
+ # A Hash containing input from the parent production
99
+ # @yieldparam [Hash] current
100
+ # A Hash defined for the current production, during :start
101
+ # may be initialized with data to pass to further productions,
102
+ # during :finish, it contains data placed by earlier productions
103
+ # @yieldparam [Proc] block
104
+ # Block passed to initialization for yielding to calling parser.
105
+ # Should conform to the yield specs for #initialize
106
+ # Yield to generate a triple
107
+ def production(term, &block)
108
+ @@production_handlers ||= {}
109
+ @@production_handlers[term] = block
110
+ end
111
+
112
+ # Evaluate a handler, delegating to the specified object.
113
+ # This is necessary so that handlers can operate within the
114
+ # binding context of the parser in which they're invoked.
115
+ # @param [Object] object
116
+ # @return [Object]
117
+ def eval_with_binding(object)
118
+ @@delegate = object
119
+ object.instance_eval {yield}
120
+ end
121
+
122
+ private
123
+
124
+ def method_missing(method, *args, &block)
125
+ if @@delegate ||= nil
126
+ @@delegate.send method, *args, &block
127
+ else
128
+ super
129
+ end
87
130
  end
88
131
  end
89
132
 
@@ -163,12 +206,12 @@ module EBNF::LL1
163
206
  @branch = options[:branch]
164
207
  @first = options[:first] ||= {}
165
208
  @follow = options[:follow] ||= {}
166
- @lexer = input.is_a?(Lexer) ? input : Lexer.new(input, self.class.patterns, @options.merge(:unescape_terms => self.class.unescape_terms))
209
+ @lexer = input.is_a?(Lexer) ? input : Lexer.new(input, self.class.patterns, @options)
167
210
  @productions = []
168
211
  @parse_callback = block
169
212
  @recovering = false
170
213
  @error_log = []
171
- terminals = self.class.patterns.map(&:first) # Get defined terminals to help with branching
214
+ terminals = self.class.patterns.map(&:type) # Get defined terminals to help with branching
172
215
 
173
216
  # Unrecoverable errors
174
217
  raise Error, "Branch table not defined" unless @branch && @branch.length > 0
@@ -257,13 +300,18 @@ module EBNF::LL1
257
300
  # If in recovery mode, continue popping until we find a term with a follow list
258
301
  while !pushed &&
259
302
  !todo_stack.empty? &&
260
- ( todo_stack.last[:terms].to_a.empty? ||
261
- (@recovering && @follow[todo_stack.last[:term]].nil?))
262
- debug("parse(pop)", :level => 2) {"todo #{todo_stack.last.inspect}, depth #{depth}, recovering? #{@recovering.inspect}"}
263
- prod = todo_stack.last[:prod]
264
- @recovering = false if @follow[prod] # Stop recovering when we might have a match
265
- todo_stack.pop
266
- onFinish
303
+ ( (terms = todo_stack.last.fetch(:terms, [])).empty? ||
304
+ (@recovering && @follow.fetch(terms.last, []).none? {|t| token == t}))
305
+ debug("parse(pop)", :level => 2) {"todo #{todo_stack.last.inspect}, depth #{depth}"}
306
+ if terms.empty?
307
+ prod = todo_stack.last[:prod]
308
+ todo_stack.pop
309
+ onFinish
310
+ else
311
+ # Stop recovering when we a production which starts with the term
312
+ debug("parse(pop)", :level => 2) {"recovery complete"}
313
+ @recovering = false
314
+ end
267
315
  end
268
316
  end
269
317
 
@@ -272,7 +320,11 @@ module EBNF::LL1
272
320
  # Continue popping contexts off of the stack
273
321
  while !todo_stack.empty?
274
322
  debug("parse(eof)", :level => 2) {"stack #{todo_stack.last.inspect}, depth #{depth}"}
275
- if todo_stack.last[:terms].length > 0
323
+ # There can't be anything left to do, or if there is, it must be optional
324
+ last_terms = todo_stack.last[:terms]
325
+ if last_terms.length > 0 && last_terms.none? {|t|
326
+ @first.has_key?(t) && @first[t].include?(:_eps)
327
+ }
276
328
  error("parse(eof)",
277
329
  "End of input before end of production: stack #{todo_stack.last.inspect}, depth #{depth}"
278
330
  )
@@ -289,35 +341,72 @@ module EBNF::LL1
289
341
 
290
342
  def depth; (@productions || []).length; end
291
343
 
344
+ # Current ProdData element
345
+ def prod_data; @prod_data.last; end
346
+
347
+ # Add a single value to prod_data, allows for values to be an array
348
+ def add_prod_datum(sym, values)
349
+ case values
350
+ when Array
351
+ prod_data[sym] ||= []
352
+ debug("add_prod_datum(#{sym})") {"#{prod_data[sym].inspect} += #{values.inspect}"}
353
+ prod_data[sym] += values
354
+ when nil
355
+ return
356
+ else
357
+ prod_data[sym] ||= []
358
+ debug("add_prod_datum(#{sym})") {"#{prod_data[sym].inspect} << #{values.inspect}"}
359
+ prod_data[sym] << values
360
+ end
361
+ end
362
+
363
+ # Add values to production data, values aranged as an array
364
+ def add_prod_data(sym, *values)
365
+ return if values.compact.empty?
366
+
367
+ prod_data[sym] ||= []
368
+ prod_data[sym] += values
369
+ debug("add_prod_data(#{sym})") {"#{prod_data[sym].inspect} += #{values.inspect}"}
370
+ end
371
+
292
372
  private
293
373
  # Start for production
294
374
  def onStart(prod)
295
- handler = self.class.production_handlers[prod]
375
+ handler = self.class.start_handlers[prod]
296
376
  @productions << prod
297
377
  if handler
298
378
  # Create a new production data element, potentially allowing handler
299
379
  # to customize before pushing on the @prod_data stack
300
380
  progress("#{prod}(:start):#{@prod_data.length}") {@prod_data.last}
301
381
  data = {}
302
- handler.call(self, :start, @prod_data.last, data, @parse_callback)
382
+ self.class.eval_with_binding(self) {
383
+ handler.call(@prod_data.last, data, @parse_callback)
384
+ }
303
385
  @prod_data << data
304
386
  else
387
+ # Make sure we push as many was we pop, even if there is no
388
+ # explicit start handler
389
+ @prod_data << {} if self.class.production_handlers[prod]
305
390
  progress("#{prod}(:start)") { get_token.inspect}
306
391
  end
307
- #puts @prod_data.inspect
392
+ #puts "prod_data(s): " + @prod_data.inspect
308
393
  end
309
394
 
310
395
  # Finish of production
311
396
  def onFinish
397
+ #puts "prod_data(f): " + @prod_data.inspect
312
398
  prod = @productions.last
313
399
  handler = self.class.production_handlers[prod]
314
- if handler
400
+ if handler && !@recovering
315
401
  # Pop production data element from stack, potentially allowing handler to use it
316
402
  data = @prod_data.pop
317
- handler.call(self, :finish, @prod_data.last, data, @parse_callback)
403
+ self.class.eval_with_binding(self) {
404
+ handler.call(@prod_data.last, data, @parse_callback)
405
+ }
406
+ #require 'debugger'; breakpoint
318
407
  progress("#{prod}(:finish):#{@prod_data.length}") {@prod_data.last}
319
408
  else
320
- progress("#{prod}(:finish)", '')
409
+ progress("#{prod}(:finish)", "recovering: #{@recovering.inspect}")
321
410
  end
322
411
  @productions.pop
323
412
  end
@@ -330,7 +419,9 @@ module EBNF::LL1
330
419
  # Allows catch-all for simple string terminals
331
420
  handler ||= self.class.terminal_handlers[nil] if prod.is_a?(String)
332
421
  if handler
333
- handler.call(self, parentProd, token, @prod_data.last)
422
+ self.class.eval_with_binding(self) {
423
+ handler.call(parentProd, token, @prod_data.last)
424
+ }
334
425
  progress("#{prod}(:token)", "", :depth => (depth + 1)) {"#{token}: #{@prod_data.last}"}
335
426
  else
336
427
  progress("#{prod}(:token)", "", :depth => (depth + 1)) {token.to_s}
@@ -380,7 +471,7 @@ module EBNF::LL1
380
471
  skipped = @lexer.shift
381
472
  progress("recovery") {"skip #{skipped.inspect}"}
382
473
  end
383
- debug("recovery") {"found #{token.inspect}"}
474
+ debug("recovery") {"found #{token.inspect} in #{first.include?(token) ? 'first' : 'follows'}"}
384
475
 
385
476
  # If the token is a first, just return it. Otherwise, it is a follow
386
477
  # and we need to skip to the end of the production
@@ -400,7 +491,7 @@ module EBNF::LL1
400
491
  def error(node, message, options = {})
401
492
  message += ", found #{options[:token].representation.inspect}" if options[:token]
402
493
  message += " at line #{@lineno}" if @lineno
403
- message += ", production = #{options[:production].inspect}" if options[:production] && @options[:debug]
494
+ message += ", production = #{options[:production].inspect}" if options[:production]
404
495
  @error_log << message unless @recovering
405
496
  @recovering = true
406
497
  debug(node, message, options.merge(:level => 0))
@@ -446,7 +537,6 @@ module EBNF::LL1
446
537
  if @options[:debug]
447
538
  return debug(node, message, {:level => 0}.merge(options))
448
539
  else
449
- str = "[#{@lineno}]#{' ' * depth}#{node}: #{message}"
450
540
  $stderr.puts("[#{@lineno}]#{' ' * depth}#{node}: #{message}")
451
541
  end
452
542
  end
data/lib/ebnf/rule.rb CHANGED
@@ -186,7 +186,7 @@ module EBNF
186
186
  # @param [Symbol, class] sym
187
187
  # Symbol matching any start element, or if it is String, any start element which is a String
188
188
  # @return [Array<Symbol, String>] list of symbol (singular), or strings which are start symbol, or nil if there are none
189
- def starts_with(sym)
189
+ def starts_with?(sym)
190
190
  if seq? && sym === (v = expr.fetch(1, nil))
191
191
  [v]
192
192
  elsif alt? && expr.any? {|e| sym === e}
@@ -196,6 +196,12 @@ module EBNF
196
196
  end
197
197
  end
198
198
 
199
+ # Do the firsts of this rule include the empty string?
200
+ # @return [Boolean]
201
+ def first_includes_eps?
202
+ @first && @first.include?(:_eps)
203
+ end
204
+
199
205
  # Add terminal as proceding this rule
200
206
  # @param [Array<Rule>] terminals
201
207
  # @return [Integer] if number of terminals added
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ebnf
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-02-26 00:00:00.000000000 Z
12
+ date: 2013-03-02 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: sxp
@@ -27,6 +27,22 @@ dependencies:
27
27
  - - ! '>='
28
28
  - !ruby/object:Gem::Version
29
29
  version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: rdf
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
30
46
  - !ruby/object:Gem::Dependency
31
47
  name: rspec
32
48
  requirement: !ruby/object:Gem::Requirement
@@ -101,6 +117,7 @@ files:
101
117
  - etc/doap.ttl
102
118
  - etc/ebnf.ebnf
103
119
  - etc/ebnf.ll1
120
+ - etc/ebnf.rb
104
121
  - etc/turtle.ebnf
105
122
  - etc/turtle.ll1
106
123
  - etc/turtle.rb
@@ -117,13 +134,16 @@ required_ruby_version: !ruby/object:Gem::Requirement
117
134
  requirements:
118
135
  - - ! '>='
119
136
  - !ruby/object:Gem::Version
120
- version: 1.9.3
137
+ version: 1.8.7
121
138
  required_rubygems_version: !ruby/object:Gem::Requirement
122
139
  none: false
123
140
  requirements:
124
141
  - - ! '>='
125
142
  - !ruby/object:Gem::Version
126
143
  version: '0'
144
+ segments:
145
+ - 0
146
+ hash: 1950185345296250945
127
147
  requirements: []
128
148
  rubyforge_project:
129
149
  rubygems_version: 1.8.25