ebnf 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +11 -6
- data/VERSION +1 -1
- data/etc/ebnf.ll1 +122 -268
- data/etc/ebnf.rb +899 -0
- data/etc/turtle.ll1 +61 -733
- data/etc/turtle.rb +56 -559
- data/lib/ebnf/base.rb +2 -2
- data/lib/ebnf/ll1.rb +126 -104
- data/lib/ebnf/ll1/lexer.rb +98 -40
- data/lib/ebnf/ll1/parser.rb +146 -56
- data/lib/ebnf/rule.rb +7 -1
- metadata +23 -3
data/lib/ebnf/ll1/parser.rb
CHANGED
@@ -20,70 +20,113 @@ module EBNF::LL1
|
|
20
20
|
|
21
21
|
# DSL for creating terminals and productions
|
22
22
|
module ClassMethods
|
23
|
+
def start_handlers; @@start_handlers || {}; end
|
23
24
|
def production_handlers; @@production_handlers || {}; end
|
24
25
|
def terminal_handlers; @@terminal_handlers || {}; end
|
25
26
|
def patterns; @@patterns || []; end
|
26
|
-
def unescape_terms; @@unescape_terms || []; end
|
27
|
-
|
28
|
-
##
|
29
|
-
# Defines a production called during different phases of parsing
|
30
|
-
# with data from previous production along with data defined for the
|
31
|
-
# current production
|
32
|
-
#
|
33
|
-
# @param [Symbol] term
|
34
|
-
# Term which is a key in the branch table
|
35
|
-
# @yield [parse, phase, input, current]
|
36
|
-
# @yieldparam [Object] parse
|
37
|
-
# Parser instance
|
38
|
-
# @yieldparam [Symbol] phase
|
39
|
-
# Phase of parsing, one of :start, or :finish
|
40
|
-
# @yieldparam [Hash] input
|
41
|
-
# A Hash containing input from the parent production
|
42
|
-
# @yieldparam [Hash] current
|
43
|
-
# A Hash defined for the current production, during :start
|
44
|
-
# may be initialized with data to pass to further productions,
|
45
|
-
# during :finish, it contains data placed by earlier productions
|
46
|
-
# @yieldparam [Prod] block
|
47
|
-
# Block passed to initialization for yielding to calling parser.
|
48
|
-
# Should conform to the yield specs for #initialize
|
49
|
-
# Yield to generate a triple
|
50
|
-
def production(term, &block)
|
51
|
-
@@production_handlers ||= {}
|
52
|
-
@@production_handlers[term] = block
|
53
|
-
end
|
54
27
|
|
55
28
|
##
|
56
29
|
# Defines the pattern for a terminal node and a block to be invoked
|
57
30
|
# when ther terminal is encountered. If the block is missing, the
|
58
31
|
# value of the terminal will be placed on the input hash to be returned
|
59
|
-
# to a previous production.
|
32
|
+
# to a previous production. Block is called in an evaluation block from
|
33
|
+
# the enclosing parser.
|
60
34
|
#
|
61
35
|
# @param [Symbol, String] term
|
62
36
|
# Defines a terminal production, which appears as within a sequence in the branch table
|
63
37
|
# @param [Regexp] regexp
|
64
38
|
# Pattern used to scan for this terminal
|
65
39
|
# @param [Hash] options
|
40
|
+
# @option options [Hash{String => String}] :map ({})
|
41
|
+
# A mapping from terminals, in lower-case form, to
|
42
|
+
# their canonical value
|
66
43
|
# @option options [Boolean] :unescape
|
67
44
|
# Cause strings and codepoints to be unescaped.
|
68
|
-
# @yield [
|
69
|
-
# @yieldparam [Object] parser
|
70
|
-
# Parser instance
|
45
|
+
# @yield [term, token, input, block]
|
71
46
|
# @yieldparam [Symbol] term
|
72
47
|
# A symbol indicating the production which referenced this terminal
|
73
48
|
# @yieldparam [String] token
|
74
49
|
# The scanned token
|
75
50
|
# @yieldparam [Hash] input
|
76
51
|
# A Hash containing input from the parent production
|
77
|
-
# @yieldparam [
|
52
|
+
# @yieldparam [Proc] block
|
78
53
|
# Block passed to initialization for yielding to calling parser.
|
79
54
|
# Should conform to the yield specs for #initialize
|
80
55
|
def terminal(term, regexp, options = {}, &block)
|
81
56
|
@@patterns ||= []
|
82
|
-
|
57
|
+
# Passed in order to define evaulation sequence
|
58
|
+
@@patterns << EBNF::LL1::Lexer::Terminal.new(term, regexp, options)
|
83
59
|
@@terminal_handlers ||= {}
|
84
60
|
@@terminal_handlers[term] = block if block_given?
|
85
|
-
|
86
|
-
|
61
|
+
end
|
62
|
+
|
63
|
+
##
|
64
|
+
# Defines a production called at the beggining of a particular production
|
65
|
+
# with data from previous production along with data defined for the
|
66
|
+
# current production. Block is called in an evaluation block from
|
67
|
+
# the enclosing parser.
|
68
|
+
#
|
69
|
+
# @param [Symbol] term
|
70
|
+
# Term which is a key in the branch table
|
71
|
+
# @yield [input, current, block]
|
72
|
+
# @yieldparam [Hash] input
|
73
|
+
# A Hash containing input from the parent production
|
74
|
+
# @yieldparam [Hash] current
|
75
|
+
# A Hash defined for the current production, during :start
|
76
|
+
# may be initialized with data to pass to further productions,
|
77
|
+
# during :finish, it contains data placed by earlier productions
|
78
|
+
# @yieldparam [Proc] block
|
79
|
+
# Block passed to initialization for yielding to calling parser.
|
80
|
+
# Should conform to the yield specs for #initialize
|
81
|
+
# Yield to generate a triple
|
82
|
+
def start_production(term, &block)
|
83
|
+
@@start_handlers ||= {}
|
84
|
+
@@start_handlers[term] = block
|
85
|
+
end
|
86
|
+
|
87
|
+
##
|
88
|
+
# Defines a production called when production of associated
|
89
|
+
# terminals and non-terminals has completed
|
90
|
+
# with data from previous production along with data defined for the
|
91
|
+
# current production. Block is called in an evaluation block from
|
92
|
+
# the enclosing parser.
|
93
|
+
#
|
94
|
+
# @param [Symbol] term
|
95
|
+
# Term which is a key in the branch table
|
96
|
+
# @yield [input, current, block]
|
97
|
+
# @yieldparam [Hash] input
|
98
|
+
# A Hash containing input from the parent production
|
99
|
+
# @yieldparam [Hash] current
|
100
|
+
# A Hash defined for the current production, during :start
|
101
|
+
# may be initialized with data to pass to further productions,
|
102
|
+
# during :finish, it contains data placed by earlier productions
|
103
|
+
# @yieldparam [Proc] block
|
104
|
+
# Block passed to initialization for yielding to calling parser.
|
105
|
+
# Should conform to the yield specs for #initialize
|
106
|
+
# Yield to generate a triple
|
107
|
+
def production(term, &block)
|
108
|
+
@@production_handlers ||= {}
|
109
|
+
@@production_handlers[term] = block
|
110
|
+
end
|
111
|
+
|
112
|
+
# Evaluate a handler, delegating to the specified object.
|
113
|
+
# This is necessary so that handlers can operate within the
|
114
|
+
# binding context of the parser in which they're invoked.
|
115
|
+
# @param [Object] object
|
116
|
+
# @return [Object]
|
117
|
+
def eval_with_binding(object)
|
118
|
+
@@delegate = object
|
119
|
+
object.instance_eval {yield}
|
120
|
+
end
|
121
|
+
|
122
|
+
private
|
123
|
+
|
124
|
+
def method_missing(method, *args, &block)
|
125
|
+
if @@delegate ||= nil
|
126
|
+
@@delegate.send method, *args, &block
|
127
|
+
else
|
128
|
+
super
|
129
|
+
end
|
87
130
|
end
|
88
131
|
end
|
89
132
|
|
@@ -163,12 +206,12 @@ module EBNF::LL1
|
|
163
206
|
@branch = options[:branch]
|
164
207
|
@first = options[:first] ||= {}
|
165
208
|
@follow = options[:follow] ||= {}
|
166
|
-
@lexer = input.is_a?(Lexer) ? input : Lexer.new(input, self.class.patterns, @options
|
209
|
+
@lexer = input.is_a?(Lexer) ? input : Lexer.new(input, self.class.patterns, @options)
|
167
210
|
@productions = []
|
168
211
|
@parse_callback = block
|
169
212
|
@recovering = false
|
170
213
|
@error_log = []
|
171
|
-
terminals = self.class.patterns.map(&:
|
214
|
+
terminals = self.class.patterns.map(&:type) # Get defined terminals to help with branching
|
172
215
|
|
173
216
|
# Unrecoverable errors
|
174
217
|
raise Error, "Branch table not defined" unless @branch && @branch.length > 0
|
@@ -257,13 +300,18 @@ module EBNF::LL1
|
|
257
300
|
# If in recovery mode, continue popping until we find a term with a follow list
|
258
301
|
while !pushed &&
|
259
302
|
!todo_stack.empty? &&
|
260
|
-
( todo_stack.last
|
261
|
-
(@recovering && @follow
|
262
|
-
debug("parse(pop)", :level => 2) {"todo #{todo_stack.last.inspect}, depth #{depth}
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
303
|
+
( (terms = todo_stack.last.fetch(:terms, [])).empty? ||
|
304
|
+
(@recovering && @follow.fetch(terms.last, []).none? {|t| token == t}))
|
305
|
+
debug("parse(pop)", :level => 2) {"todo #{todo_stack.last.inspect}, depth #{depth}"}
|
306
|
+
if terms.empty?
|
307
|
+
prod = todo_stack.last[:prod]
|
308
|
+
todo_stack.pop
|
309
|
+
onFinish
|
310
|
+
else
|
311
|
+
# Stop recovering when we a production which starts with the term
|
312
|
+
debug("parse(pop)", :level => 2) {"recovery complete"}
|
313
|
+
@recovering = false
|
314
|
+
end
|
267
315
|
end
|
268
316
|
end
|
269
317
|
|
@@ -272,7 +320,11 @@ module EBNF::LL1
|
|
272
320
|
# Continue popping contexts off of the stack
|
273
321
|
while !todo_stack.empty?
|
274
322
|
debug("parse(eof)", :level => 2) {"stack #{todo_stack.last.inspect}, depth #{depth}"}
|
275
|
-
if
|
323
|
+
# There can't be anything left to do, or if there is, it must be optional
|
324
|
+
last_terms = todo_stack.last[:terms]
|
325
|
+
if last_terms.length > 0 && last_terms.none? {|t|
|
326
|
+
@first.has_key?(t) && @first[t].include?(:_eps)
|
327
|
+
}
|
276
328
|
error("parse(eof)",
|
277
329
|
"End of input before end of production: stack #{todo_stack.last.inspect}, depth #{depth}"
|
278
330
|
)
|
@@ -289,35 +341,72 @@ module EBNF::LL1
|
|
289
341
|
|
290
342
|
def depth; (@productions || []).length; end
|
291
343
|
|
344
|
+
# Current ProdData element
|
345
|
+
def prod_data; @prod_data.last; end
|
346
|
+
|
347
|
+
# Add a single value to prod_data, allows for values to be an array
|
348
|
+
def add_prod_datum(sym, values)
|
349
|
+
case values
|
350
|
+
when Array
|
351
|
+
prod_data[sym] ||= []
|
352
|
+
debug("add_prod_datum(#{sym})") {"#{prod_data[sym].inspect} += #{values.inspect}"}
|
353
|
+
prod_data[sym] += values
|
354
|
+
when nil
|
355
|
+
return
|
356
|
+
else
|
357
|
+
prod_data[sym] ||= []
|
358
|
+
debug("add_prod_datum(#{sym})") {"#{prod_data[sym].inspect} << #{values.inspect}"}
|
359
|
+
prod_data[sym] << values
|
360
|
+
end
|
361
|
+
end
|
362
|
+
|
363
|
+
# Add values to production data, values aranged as an array
|
364
|
+
def add_prod_data(sym, *values)
|
365
|
+
return if values.compact.empty?
|
366
|
+
|
367
|
+
prod_data[sym] ||= []
|
368
|
+
prod_data[sym] += values
|
369
|
+
debug("add_prod_data(#{sym})") {"#{prod_data[sym].inspect} += #{values.inspect}"}
|
370
|
+
end
|
371
|
+
|
292
372
|
private
|
293
373
|
# Start for production
|
294
374
|
def onStart(prod)
|
295
|
-
handler = self.class.
|
375
|
+
handler = self.class.start_handlers[prod]
|
296
376
|
@productions << prod
|
297
377
|
if handler
|
298
378
|
# Create a new production data element, potentially allowing handler
|
299
379
|
# to customize before pushing on the @prod_data stack
|
300
380
|
progress("#{prod}(:start):#{@prod_data.length}") {@prod_data.last}
|
301
381
|
data = {}
|
302
|
-
|
382
|
+
self.class.eval_with_binding(self) {
|
383
|
+
handler.call(@prod_data.last, data, @parse_callback)
|
384
|
+
}
|
303
385
|
@prod_data << data
|
304
386
|
else
|
387
|
+
# Make sure we push as many was we pop, even if there is no
|
388
|
+
# explicit start handler
|
389
|
+
@prod_data << {} if self.class.production_handlers[prod]
|
305
390
|
progress("#{prod}(:start)") { get_token.inspect}
|
306
391
|
end
|
307
|
-
#puts @prod_data.inspect
|
392
|
+
#puts "prod_data(s): " + @prod_data.inspect
|
308
393
|
end
|
309
394
|
|
310
395
|
# Finish of production
|
311
396
|
def onFinish
|
397
|
+
#puts "prod_data(f): " + @prod_data.inspect
|
312
398
|
prod = @productions.last
|
313
399
|
handler = self.class.production_handlers[prod]
|
314
|
-
if handler
|
400
|
+
if handler && !@recovering
|
315
401
|
# Pop production data element from stack, potentially allowing handler to use it
|
316
402
|
data = @prod_data.pop
|
317
|
-
|
403
|
+
self.class.eval_with_binding(self) {
|
404
|
+
handler.call(@prod_data.last, data, @parse_callback)
|
405
|
+
}
|
406
|
+
#require 'debugger'; breakpoint
|
318
407
|
progress("#{prod}(:finish):#{@prod_data.length}") {@prod_data.last}
|
319
408
|
else
|
320
|
-
progress("#{prod}(:finish)",
|
409
|
+
progress("#{prod}(:finish)", "recovering: #{@recovering.inspect}")
|
321
410
|
end
|
322
411
|
@productions.pop
|
323
412
|
end
|
@@ -330,7 +419,9 @@ module EBNF::LL1
|
|
330
419
|
# Allows catch-all for simple string terminals
|
331
420
|
handler ||= self.class.terminal_handlers[nil] if prod.is_a?(String)
|
332
421
|
if handler
|
333
|
-
|
422
|
+
self.class.eval_with_binding(self) {
|
423
|
+
handler.call(parentProd, token, @prod_data.last)
|
424
|
+
}
|
334
425
|
progress("#{prod}(:token)", "", :depth => (depth + 1)) {"#{token}: #{@prod_data.last}"}
|
335
426
|
else
|
336
427
|
progress("#{prod}(:token)", "", :depth => (depth + 1)) {token.to_s}
|
@@ -380,7 +471,7 @@ module EBNF::LL1
|
|
380
471
|
skipped = @lexer.shift
|
381
472
|
progress("recovery") {"skip #{skipped.inspect}"}
|
382
473
|
end
|
383
|
-
debug("recovery") {"found #{token.inspect}"}
|
474
|
+
debug("recovery") {"found #{token.inspect} in #{first.include?(token) ? 'first' : 'follows'}"}
|
384
475
|
|
385
476
|
# If the token is a first, just return it. Otherwise, it is a follow
|
386
477
|
# and we need to skip to the end of the production
|
@@ -400,7 +491,7 @@ module EBNF::LL1
|
|
400
491
|
def error(node, message, options = {})
|
401
492
|
message += ", found #{options[:token].representation.inspect}" if options[:token]
|
402
493
|
message += " at line #{@lineno}" if @lineno
|
403
|
-
message += ", production = #{options[:production].inspect}" if options[:production]
|
494
|
+
message += ", production = #{options[:production].inspect}" if options[:production]
|
404
495
|
@error_log << message unless @recovering
|
405
496
|
@recovering = true
|
406
497
|
debug(node, message, options.merge(:level => 0))
|
@@ -446,7 +537,6 @@ module EBNF::LL1
|
|
446
537
|
if @options[:debug]
|
447
538
|
return debug(node, message, {:level => 0}.merge(options))
|
448
539
|
else
|
449
|
-
str = "[#{@lineno}]#{' ' * depth}#{node}: #{message}"
|
450
540
|
$stderr.puts("[#{@lineno}]#{' ' * depth}#{node}: #{message}")
|
451
541
|
end
|
452
542
|
end
|
data/lib/ebnf/rule.rb
CHANGED
@@ -186,7 +186,7 @@ module EBNF
|
|
186
186
|
# @param [Symbol, class] sym
|
187
187
|
# Symbol matching any start element, or if it is String, any start element which is a String
|
188
188
|
# @return [Array<Symbol, String>] list of symbol (singular), or strings which are start symbol, or nil if there are none
|
189
|
-
def starts_with(sym)
|
189
|
+
def starts_with?(sym)
|
190
190
|
if seq? && sym === (v = expr.fetch(1, nil))
|
191
191
|
[v]
|
192
192
|
elsif alt? && expr.any? {|e| sym === e}
|
@@ -196,6 +196,12 @@ module EBNF
|
|
196
196
|
end
|
197
197
|
end
|
198
198
|
|
199
|
+
# Do the firsts of this rule include the empty string?
|
200
|
+
# @return [Boolean]
|
201
|
+
def first_includes_eps?
|
202
|
+
@first && @first.include?(:_eps)
|
203
|
+
end
|
204
|
+
|
199
205
|
# Add terminal as proceding this rule
|
200
206
|
# @param [Array<Rule>] terminals
|
201
207
|
# @return [Integer] if number of terminals added
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ebnf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-02
|
12
|
+
date: 2013-03-02 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: sxp
|
@@ -27,6 +27,22 @@ dependencies:
|
|
27
27
|
- - ! '>='
|
28
28
|
- !ruby/object:Gem::Version
|
29
29
|
version: '0'
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: rdf
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ! '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
38
|
+
type: :runtime
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
30
46
|
- !ruby/object:Gem::Dependency
|
31
47
|
name: rspec
|
32
48
|
requirement: !ruby/object:Gem::Requirement
|
@@ -101,6 +117,7 @@ files:
|
|
101
117
|
- etc/doap.ttl
|
102
118
|
- etc/ebnf.ebnf
|
103
119
|
- etc/ebnf.ll1
|
120
|
+
- etc/ebnf.rb
|
104
121
|
- etc/turtle.ebnf
|
105
122
|
- etc/turtle.ll1
|
106
123
|
- etc/turtle.rb
|
@@ -117,13 +134,16 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
117
134
|
requirements:
|
118
135
|
- - ! '>='
|
119
136
|
- !ruby/object:Gem::Version
|
120
|
-
version: 1.
|
137
|
+
version: 1.8.7
|
121
138
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
122
139
|
none: false
|
123
140
|
requirements:
|
124
141
|
- - ! '>='
|
125
142
|
- !ruby/object:Gem::Version
|
126
143
|
version: '0'
|
144
|
+
segments:
|
145
|
+
- 0
|
146
|
+
hash: 1950185345296250945
|
127
147
|
requirements: []
|
128
148
|
rubyforge_project:
|
129
149
|
rubygems_version: 1.8.25
|