ebnf 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +11 -6
- data/VERSION +1 -1
- data/etc/ebnf.ll1 +122 -268
- data/etc/ebnf.rb +899 -0
- data/etc/turtle.ll1 +61 -733
- data/etc/turtle.rb +56 -559
- data/lib/ebnf/base.rb +2 -2
- data/lib/ebnf/ll1.rb +126 -104
- data/lib/ebnf/ll1/lexer.rb +98 -40
- data/lib/ebnf/ll1/parser.rb +146 -56
- data/lib/ebnf/rule.rb +7 -1
- metadata +23 -3
data/lib/ebnf/ll1/parser.rb
CHANGED
@@ -20,70 +20,113 @@ module EBNF::LL1
|
|
20
20
|
|
21
21
|
# DSL for creating terminals and productions
|
22
22
|
module ClassMethods
|
23
|
+
def start_handlers; @@start_handlers || {}; end
|
23
24
|
def production_handlers; @@production_handlers || {}; end
|
24
25
|
def terminal_handlers; @@terminal_handlers || {}; end
|
25
26
|
def patterns; @@patterns || []; end
|
26
|
-
def unescape_terms; @@unescape_terms || []; end
|
27
|
-
|
28
|
-
##
|
29
|
-
# Defines a production called during different phases of parsing
|
30
|
-
# with data from previous production along with data defined for the
|
31
|
-
# current production
|
32
|
-
#
|
33
|
-
# @param [Symbol] term
|
34
|
-
# Term which is a key in the branch table
|
35
|
-
# @yield [parse, phase, input, current]
|
36
|
-
# @yieldparam [Object] parse
|
37
|
-
# Parser instance
|
38
|
-
# @yieldparam [Symbol] phase
|
39
|
-
# Phase of parsing, one of :start, or :finish
|
40
|
-
# @yieldparam [Hash] input
|
41
|
-
# A Hash containing input from the parent production
|
42
|
-
# @yieldparam [Hash] current
|
43
|
-
# A Hash defined for the current production, during :start
|
44
|
-
# may be initialized with data to pass to further productions,
|
45
|
-
# during :finish, it contains data placed by earlier productions
|
46
|
-
# @yieldparam [Prod] block
|
47
|
-
# Block passed to initialization for yielding to calling parser.
|
48
|
-
# Should conform to the yield specs for #initialize
|
49
|
-
# Yield to generate a triple
|
50
|
-
def production(term, &block)
|
51
|
-
@@production_handlers ||= {}
|
52
|
-
@@production_handlers[term] = block
|
53
|
-
end
|
54
27
|
|
55
28
|
##
|
56
29
|
# Defines the pattern for a terminal node and a block to be invoked
|
57
30
|
# when ther terminal is encountered. If the block is missing, the
|
58
31
|
# value of the terminal will be placed on the input hash to be returned
|
59
|
-
# to a previous production.
|
32
|
+
# to a previous production. Block is called in an evaluation block from
|
33
|
+
# the enclosing parser.
|
60
34
|
#
|
61
35
|
# @param [Symbol, String] term
|
62
36
|
# Defines a terminal production, which appears as within a sequence in the branch table
|
63
37
|
# @param [Regexp] regexp
|
64
38
|
# Pattern used to scan for this terminal
|
65
39
|
# @param [Hash] options
|
40
|
+
# @option options [Hash{String => String}] :map ({})
|
41
|
+
# A mapping from terminals, in lower-case form, to
|
42
|
+
# their canonical value
|
66
43
|
# @option options [Boolean] :unescape
|
67
44
|
# Cause strings and codepoints to be unescaped.
|
68
|
-
# @yield [
|
69
|
-
# @yieldparam [Object] parser
|
70
|
-
# Parser instance
|
45
|
+
# @yield [term, token, input, block]
|
71
46
|
# @yieldparam [Symbol] term
|
72
47
|
# A symbol indicating the production which referenced this terminal
|
73
48
|
# @yieldparam [String] token
|
74
49
|
# The scanned token
|
75
50
|
# @yieldparam [Hash] input
|
76
51
|
# A Hash containing input from the parent production
|
77
|
-
# @yieldparam [
|
52
|
+
# @yieldparam [Proc] block
|
78
53
|
# Block passed to initialization for yielding to calling parser.
|
79
54
|
# Should conform to the yield specs for #initialize
|
80
55
|
def terminal(term, regexp, options = {}, &block)
|
81
56
|
@@patterns ||= []
|
82
|
-
|
57
|
+
# Passed in order to define evaulation sequence
|
58
|
+
@@patterns << EBNF::LL1::Lexer::Terminal.new(term, regexp, options)
|
83
59
|
@@terminal_handlers ||= {}
|
84
60
|
@@terminal_handlers[term] = block if block_given?
|
85
|
-
|
86
|
-
|
61
|
+
end
|
62
|
+
|
63
|
+
##
|
64
|
+
# Defines a production called at the beggining of a particular production
|
65
|
+
# with data from previous production along with data defined for the
|
66
|
+
# current production. Block is called in an evaluation block from
|
67
|
+
# the enclosing parser.
|
68
|
+
#
|
69
|
+
# @param [Symbol] term
|
70
|
+
# Term which is a key in the branch table
|
71
|
+
# @yield [input, current, block]
|
72
|
+
# @yieldparam [Hash] input
|
73
|
+
# A Hash containing input from the parent production
|
74
|
+
# @yieldparam [Hash] current
|
75
|
+
# A Hash defined for the current production, during :start
|
76
|
+
# may be initialized with data to pass to further productions,
|
77
|
+
# during :finish, it contains data placed by earlier productions
|
78
|
+
# @yieldparam [Proc] block
|
79
|
+
# Block passed to initialization for yielding to calling parser.
|
80
|
+
# Should conform to the yield specs for #initialize
|
81
|
+
# Yield to generate a triple
|
82
|
+
def start_production(term, &block)
|
83
|
+
@@start_handlers ||= {}
|
84
|
+
@@start_handlers[term] = block
|
85
|
+
end
|
86
|
+
|
87
|
+
##
|
88
|
+
# Defines a production called when production of associated
|
89
|
+
# terminals and non-terminals has completed
|
90
|
+
# with data from previous production along with data defined for the
|
91
|
+
# current production. Block is called in an evaluation block from
|
92
|
+
# the enclosing parser.
|
93
|
+
#
|
94
|
+
# @param [Symbol] term
|
95
|
+
# Term which is a key in the branch table
|
96
|
+
# @yield [input, current, block]
|
97
|
+
# @yieldparam [Hash] input
|
98
|
+
# A Hash containing input from the parent production
|
99
|
+
# @yieldparam [Hash] current
|
100
|
+
# A Hash defined for the current production, during :start
|
101
|
+
# may be initialized with data to pass to further productions,
|
102
|
+
# during :finish, it contains data placed by earlier productions
|
103
|
+
# @yieldparam [Proc] block
|
104
|
+
# Block passed to initialization for yielding to calling parser.
|
105
|
+
# Should conform to the yield specs for #initialize
|
106
|
+
# Yield to generate a triple
|
107
|
+
def production(term, &block)
|
108
|
+
@@production_handlers ||= {}
|
109
|
+
@@production_handlers[term] = block
|
110
|
+
end
|
111
|
+
|
112
|
+
# Evaluate a handler, delegating to the specified object.
|
113
|
+
# This is necessary so that handlers can operate within the
|
114
|
+
# binding context of the parser in which they're invoked.
|
115
|
+
# @param [Object] object
|
116
|
+
# @return [Object]
|
117
|
+
def eval_with_binding(object)
|
118
|
+
@@delegate = object
|
119
|
+
object.instance_eval {yield}
|
120
|
+
end
|
121
|
+
|
122
|
+
private
|
123
|
+
|
124
|
+
def method_missing(method, *args, &block)
|
125
|
+
if @@delegate ||= nil
|
126
|
+
@@delegate.send method, *args, &block
|
127
|
+
else
|
128
|
+
super
|
129
|
+
end
|
87
130
|
end
|
88
131
|
end
|
89
132
|
|
@@ -163,12 +206,12 @@ module EBNF::LL1
|
|
163
206
|
@branch = options[:branch]
|
164
207
|
@first = options[:first] ||= {}
|
165
208
|
@follow = options[:follow] ||= {}
|
166
|
-
@lexer = input.is_a?(Lexer) ? input : Lexer.new(input, self.class.patterns, @options
|
209
|
+
@lexer = input.is_a?(Lexer) ? input : Lexer.new(input, self.class.patterns, @options)
|
167
210
|
@productions = []
|
168
211
|
@parse_callback = block
|
169
212
|
@recovering = false
|
170
213
|
@error_log = []
|
171
|
-
terminals = self.class.patterns.map(&:
|
214
|
+
terminals = self.class.patterns.map(&:type) # Get defined terminals to help with branching
|
172
215
|
|
173
216
|
# Unrecoverable errors
|
174
217
|
raise Error, "Branch table not defined" unless @branch && @branch.length > 0
|
@@ -257,13 +300,18 @@ module EBNF::LL1
|
|
257
300
|
# If in recovery mode, continue popping until we find a term with a follow list
|
258
301
|
while !pushed &&
|
259
302
|
!todo_stack.empty? &&
|
260
|
-
( todo_stack.last
|
261
|
-
(@recovering && @follow
|
262
|
-
debug("parse(pop)", :level => 2) {"todo #{todo_stack.last.inspect}, depth #{depth}
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
303
|
+
( (terms = todo_stack.last.fetch(:terms, [])).empty? ||
|
304
|
+
(@recovering && @follow.fetch(terms.last, []).none? {|t| token == t}))
|
305
|
+
debug("parse(pop)", :level => 2) {"todo #{todo_stack.last.inspect}, depth #{depth}"}
|
306
|
+
if terms.empty?
|
307
|
+
prod = todo_stack.last[:prod]
|
308
|
+
todo_stack.pop
|
309
|
+
onFinish
|
310
|
+
else
|
311
|
+
# Stop recovering when we a production which starts with the term
|
312
|
+
debug("parse(pop)", :level => 2) {"recovery complete"}
|
313
|
+
@recovering = false
|
314
|
+
end
|
267
315
|
end
|
268
316
|
end
|
269
317
|
|
@@ -272,7 +320,11 @@ module EBNF::LL1
|
|
272
320
|
# Continue popping contexts off of the stack
|
273
321
|
while !todo_stack.empty?
|
274
322
|
debug("parse(eof)", :level => 2) {"stack #{todo_stack.last.inspect}, depth #{depth}"}
|
275
|
-
if
|
323
|
+
# There can't be anything left to do, or if there is, it must be optional
|
324
|
+
last_terms = todo_stack.last[:terms]
|
325
|
+
if last_terms.length > 0 && last_terms.none? {|t|
|
326
|
+
@first.has_key?(t) && @first[t].include?(:_eps)
|
327
|
+
}
|
276
328
|
error("parse(eof)",
|
277
329
|
"End of input before end of production: stack #{todo_stack.last.inspect}, depth #{depth}"
|
278
330
|
)
|
@@ -289,35 +341,72 @@ module EBNF::LL1
|
|
289
341
|
|
290
342
|
def depth; (@productions || []).length; end
|
291
343
|
|
344
|
+
# Current ProdData element
|
345
|
+
def prod_data; @prod_data.last; end
|
346
|
+
|
347
|
+
# Add a single value to prod_data, allows for values to be an array
|
348
|
+
def add_prod_datum(sym, values)
|
349
|
+
case values
|
350
|
+
when Array
|
351
|
+
prod_data[sym] ||= []
|
352
|
+
debug("add_prod_datum(#{sym})") {"#{prod_data[sym].inspect} += #{values.inspect}"}
|
353
|
+
prod_data[sym] += values
|
354
|
+
when nil
|
355
|
+
return
|
356
|
+
else
|
357
|
+
prod_data[sym] ||= []
|
358
|
+
debug("add_prod_datum(#{sym})") {"#{prod_data[sym].inspect} << #{values.inspect}"}
|
359
|
+
prod_data[sym] << values
|
360
|
+
end
|
361
|
+
end
|
362
|
+
|
363
|
+
# Add values to production data, values aranged as an array
|
364
|
+
def add_prod_data(sym, *values)
|
365
|
+
return if values.compact.empty?
|
366
|
+
|
367
|
+
prod_data[sym] ||= []
|
368
|
+
prod_data[sym] += values
|
369
|
+
debug("add_prod_data(#{sym})") {"#{prod_data[sym].inspect} += #{values.inspect}"}
|
370
|
+
end
|
371
|
+
|
292
372
|
private
|
293
373
|
# Start for production
|
294
374
|
def onStart(prod)
|
295
|
-
handler = self.class.
|
375
|
+
handler = self.class.start_handlers[prod]
|
296
376
|
@productions << prod
|
297
377
|
if handler
|
298
378
|
# Create a new production data element, potentially allowing handler
|
299
379
|
# to customize before pushing on the @prod_data stack
|
300
380
|
progress("#{prod}(:start):#{@prod_data.length}") {@prod_data.last}
|
301
381
|
data = {}
|
302
|
-
|
382
|
+
self.class.eval_with_binding(self) {
|
383
|
+
handler.call(@prod_data.last, data, @parse_callback)
|
384
|
+
}
|
303
385
|
@prod_data << data
|
304
386
|
else
|
387
|
+
# Make sure we push as many was we pop, even if there is no
|
388
|
+
# explicit start handler
|
389
|
+
@prod_data << {} if self.class.production_handlers[prod]
|
305
390
|
progress("#{prod}(:start)") { get_token.inspect}
|
306
391
|
end
|
307
|
-
#puts @prod_data.inspect
|
392
|
+
#puts "prod_data(s): " + @prod_data.inspect
|
308
393
|
end
|
309
394
|
|
310
395
|
# Finish of production
|
311
396
|
def onFinish
|
397
|
+
#puts "prod_data(f): " + @prod_data.inspect
|
312
398
|
prod = @productions.last
|
313
399
|
handler = self.class.production_handlers[prod]
|
314
|
-
if handler
|
400
|
+
if handler && !@recovering
|
315
401
|
# Pop production data element from stack, potentially allowing handler to use it
|
316
402
|
data = @prod_data.pop
|
317
|
-
|
403
|
+
self.class.eval_with_binding(self) {
|
404
|
+
handler.call(@prod_data.last, data, @parse_callback)
|
405
|
+
}
|
406
|
+
#require 'debugger'; breakpoint
|
318
407
|
progress("#{prod}(:finish):#{@prod_data.length}") {@prod_data.last}
|
319
408
|
else
|
320
|
-
progress("#{prod}(:finish)",
|
409
|
+
progress("#{prod}(:finish)", "recovering: #{@recovering.inspect}")
|
321
410
|
end
|
322
411
|
@productions.pop
|
323
412
|
end
|
@@ -330,7 +419,9 @@ module EBNF::LL1
|
|
330
419
|
# Allows catch-all for simple string terminals
|
331
420
|
handler ||= self.class.terminal_handlers[nil] if prod.is_a?(String)
|
332
421
|
if handler
|
333
|
-
|
422
|
+
self.class.eval_with_binding(self) {
|
423
|
+
handler.call(parentProd, token, @prod_data.last)
|
424
|
+
}
|
334
425
|
progress("#{prod}(:token)", "", :depth => (depth + 1)) {"#{token}: #{@prod_data.last}"}
|
335
426
|
else
|
336
427
|
progress("#{prod}(:token)", "", :depth => (depth + 1)) {token.to_s}
|
@@ -380,7 +471,7 @@ module EBNF::LL1
|
|
380
471
|
skipped = @lexer.shift
|
381
472
|
progress("recovery") {"skip #{skipped.inspect}"}
|
382
473
|
end
|
383
|
-
debug("recovery") {"found #{token.inspect}"}
|
474
|
+
debug("recovery") {"found #{token.inspect} in #{first.include?(token) ? 'first' : 'follows'}"}
|
384
475
|
|
385
476
|
# If the token is a first, just return it. Otherwise, it is a follow
|
386
477
|
# and we need to skip to the end of the production
|
@@ -400,7 +491,7 @@ module EBNF::LL1
|
|
400
491
|
def error(node, message, options = {})
|
401
492
|
message += ", found #{options[:token].representation.inspect}" if options[:token]
|
402
493
|
message += " at line #{@lineno}" if @lineno
|
403
|
-
message += ", production = #{options[:production].inspect}" if options[:production]
|
494
|
+
message += ", production = #{options[:production].inspect}" if options[:production]
|
404
495
|
@error_log << message unless @recovering
|
405
496
|
@recovering = true
|
406
497
|
debug(node, message, options.merge(:level => 0))
|
@@ -446,7 +537,6 @@ module EBNF::LL1
|
|
446
537
|
if @options[:debug]
|
447
538
|
return debug(node, message, {:level => 0}.merge(options))
|
448
539
|
else
|
449
|
-
str = "[#{@lineno}]#{' ' * depth}#{node}: #{message}"
|
450
540
|
$stderr.puts("[#{@lineno}]#{' ' * depth}#{node}: #{message}")
|
451
541
|
end
|
452
542
|
end
|
data/lib/ebnf/rule.rb
CHANGED
@@ -186,7 +186,7 @@ module EBNF
|
|
186
186
|
# @param [Symbol, class] sym
|
187
187
|
# Symbol matching any start element, or if it is String, any start element which is a String
|
188
188
|
# @return [Array<Symbol, String>] list of symbol (singular), or strings which are start symbol, or nil if there are none
|
189
|
-
def starts_with(sym)
|
189
|
+
def starts_with?(sym)
|
190
190
|
if seq? && sym === (v = expr.fetch(1, nil))
|
191
191
|
[v]
|
192
192
|
elsif alt? && expr.any? {|e| sym === e}
|
@@ -196,6 +196,12 @@ module EBNF
|
|
196
196
|
end
|
197
197
|
end
|
198
198
|
|
199
|
+
# Do the firsts of this rule include the empty string?
|
200
|
+
# @return [Boolean]
|
201
|
+
def first_includes_eps?
|
202
|
+
@first && @first.include?(:_eps)
|
203
|
+
end
|
204
|
+
|
199
205
|
# Add terminal as proceding this rule
|
200
206
|
# @param [Array<Rule>] terminals
|
201
207
|
# @return [Integer] if number of terminals added
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ebnf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-02
|
12
|
+
date: 2013-03-02 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: sxp
|
@@ -27,6 +27,22 @@ dependencies:
|
|
27
27
|
- - ! '>='
|
28
28
|
- !ruby/object:Gem::Version
|
29
29
|
version: '0'
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: rdf
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ! '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
38
|
+
type: :runtime
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
30
46
|
- !ruby/object:Gem::Dependency
|
31
47
|
name: rspec
|
32
48
|
requirement: !ruby/object:Gem::Requirement
|
@@ -101,6 +117,7 @@ files:
|
|
101
117
|
- etc/doap.ttl
|
102
118
|
- etc/ebnf.ebnf
|
103
119
|
- etc/ebnf.ll1
|
120
|
+
- etc/ebnf.rb
|
104
121
|
- etc/turtle.ebnf
|
105
122
|
- etc/turtle.ll1
|
106
123
|
- etc/turtle.rb
|
@@ -117,13 +134,16 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
117
134
|
requirements:
|
118
135
|
- - ! '>='
|
119
136
|
- !ruby/object:Gem::Version
|
120
|
-
version: 1.
|
137
|
+
version: 1.8.7
|
121
138
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
122
139
|
none: false
|
123
140
|
requirements:
|
124
141
|
- - ! '>='
|
125
142
|
- !ruby/object:Gem::Version
|
126
143
|
version: '0'
|
144
|
+
segments:
|
145
|
+
- 0
|
146
|
+
hash: 1950185345296250945
|
127
147
|
requirements: []
|
128
148
|
rubyforge_project:
|
129
149
|
rubygems_version: 1.8.25
|