ebnf 0.2.3 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/VERSION +1 -1
- data/lib/ebnf/base.rb +2 -2
- data/lib/ebnf/ll1.rb +5 -5
- data/lib/ebnf/ll1/lexer.rb +12 -12
- data/lib/ebnf/ll1/parser.rb +354 -280
- data/lib/ebnf/ll1/scanner.rb +0 -1
- data/lib/ebnf/rule.rb +16 -9
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
ZDI4YWE4ZjExOGI5N2NkZDVmYzA4ZDMwMzM0ZGRhZThhNWU0ZGI2MQ==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
MWZmMjVkYjMwYzY1NmNlMDc0ZDM5MzUxOWU1ZTUwNGU2NDQ2YzFiYQ==
|
7
7
|
!binary "U0hBNTEy":
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
M2E2MTcyM2MyZWYyNTg5Y2E3ZDVkNWI2ODgwYjdhZjhlYmQ1YTUzZGE5Nzc3
|
10
|
+
M2M3MDFiYzllMzE5YjNmMDA2Zjc3NjQ5YzViMzhmMTNmZTBkYWZhMDk0M2Rl
|
11
|
+
MjlkNWMxNzY3ODM4ZDYxZjBiZWI2NzA4ZGRjYzQ2MGY2MTBiZDI=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
OGIxZGZjNzNhZTlkZDAwZTM0ZmExYzMzMzUxMjQ4ZmNmYjBmNTM4OGJlMmM2
|
14
|
+
NmFmODM4MjMwODYyY2E1YjhjNjNlY2E1YjI0MTg1NjFlYWI0MzI4ZThhMjA2
|
15
|
+
MmMyMjBiZjA5NGQ2MjlhYzA0Yzg1YzYzNzhjNjNkZmQ2ZTg5N2I=
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.3.0
|
data/lib/ebnf/base.rb
CHANGED
@@ -103,12 +103,12 @@ module EBNF
|
|
103
103
|
include Parser
|
104
104
|
|
105
105
|
# Abstract syntax tree from parse
|
106
|
-
#
|
106
|
+
#
|
107
107
|
# @return [Array<Rule>]
|
108
108
|
attr_reader :ast
|
109
109
|
|
110
110
|
# Grammar errors, or errors found genering parse tables
|
111
|
-
#
|
111
|
+
#
|
112
112
|
# @return [Array<String>]
|
113
113
|
attr_accessor :errors
|
114
114
|
|
data/lib/ebnf/ll1.rb
CHANGED
@@ -6,29 +6,29 @@ module EBNF
|
|
6
6
|
|
7
7
|
# Branch table, represented as a recursive hash.
|
8
8
|
# The table is indexed by rule symbol, which in-turn references a hash of terminals (which are the first terminals of the production), which in turn reference the sequence of rules that follow, given that terminal as input
|
9
|
-
#
|
9
|
+
#
|
10
10
|
# @return [Hash{Symbol => Hash{String, Symbol => Array<Symbol>}}]
|
11
11
|
attr_reader :branch
|
12
12
|
|
13
13
|
# First table
|
14
|
-
#
|
14
|
+
#
|
15
15
|
# @return [Hash{Symbol, String => Symbol}]
|
16
16
|
attr_reader :first
|
17
17
|
|
18
18
|
# Follow table
|
19
|
-
#
|
19
|
+
#
|
20
20
|
# @return [Hash{Symbol, String => Symbol}]
|
21
21
|
attr_reader :follow
|
22
22
|
|
23
23
|
# Terminal table
|
24
24
|
# The list of terminals used in the grammar.
|
25
|
-
#
|
25
|
+
#
|
26
26
|
# @return [Array<String, Symbol>]
|
27
27
|
attr_reader :terminals
|
28
28
|
|
29
29
|
# Start symbol
|
30
30
|
# The rule which starts the grammar
|
31
|
-
#
|
31
|
+
#
|
32
32
|
# @return [Symbol]
|
33
33
|
attr_reader :start
|
34
34
|
|
data/lib/ebnf/ll1/lexer.rb
CHANGED
@@ -53,12 +53,10 @@ module EBNF::LL1
|
|
53
53
|
ML_START = /\'\'\'|\"\"\"/.freeze # Beginning of terminals that may span lines
|
54
54
|
|
55
55
|
##
|
56
|
-
# @!attribute whitespace
|
57
56
|
# @return [Regexp] defines whitespace, defaults to WS
|
58
57
|
attr_reader :whitespace
|
59
58
|
|
60
59
|
##
|
61
|
-
# @!attribute comment
|
62
60
|
# @return [Regexp] defines single-line comment, defaults to COMMENT
|
63
61
|
attr_reader :comment
|
64
62
|
|
@@ -233,15 +231,16 @@ module EBNF::LL1
|
|
233
231
|
#
|
234
232
|
# @return [Token]
|
235
233
|
def recover
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
scanner.pos = scanner.pos + 1
|
234
|
+
until scanner.eos? || tok = match_token
|
235
|
+
if scanner.skip_until(@whitespace).nil? # Skip past current "token"
|
236
|
+
# No whitespace at the end, must be and end of string
|
237
|
+
scanner.terminate
|
238
|
+
else
|
239
|
+
skip_whitespace
|
243
240
|
end
|
244
241
|
end
|
242
|
+
scanner.unscan if tok
|
243
|
+
first
|
245
244
|
end
|
246
245
|
protected
|
247
246
|
|
@@ -253,9 +252,10 @@ module EBNF::LL1
|
|
253
252
|
def skip_whitespace
|
254
253
|
# skip all white space, but keep track of the current line number
|
255
254
|
while !scanner.eos?
|
256
|
-
|
255
|
+
if matched = scanner.scan(@whitespace)
|
257
256
|
@lineno += matched.count("\n")
|
258
|
-
elsif (
|
257
|
+
elsif (scanner.scan(@comment))
|
258
|
+
#
|
259
259
|
else
|
260
260
|
return
|
261
261
|
end
|
@@ -472,7 +472,7 @@ module EBNF::LL1
|
|
472
472
|
#
|
473
473
|
# @return [String]
|
474
474
|
def inspect
|
475
|
-
|
475
|
+
"#{@value.inspect}#{'(' + @type.to_s + ')' if @type}"
|
476
476
|
end
|
477
477
|
end # class Token
|
478
478
|
|
data/lib/ebnf/ll1/parser.rb
CHANGED
@@ -10,7 +10,6 @@ module EBNF::LL1
|
|
10
10
|
DEBUG_LEVEL = 10
|
11
11
|
|
12
12
|
##
|
13
|
-
# @!attribute [r] lineno
|
14
13
|
# @return [Integer] line number of current token
|
15
14
|
attr_reader :lineno
|
16
15
|
|
@@ -20,10 +19,10 @@ module EBNF::LL1
|
|
20
19
|
|
21
20
|
# DSL for creating terminals and productions
|
22
21
|
module ClassMethods
|
23
|
-
def start_handlers;
|
24
|
-
def production_handlers;
|
25
|
-
def terminal_handlers;
|
26
|
-
def patterns;
|
22
|
+
def start_handlers; @start_handlers || {}; end
|
23
|
+
def production_handlers; @production_handlers || {}; end
|
24
|
+
def terminal_handlers; @terminal_handlers || {}; end
|
25
|
+
def patterns; @patterns || []; end
|
27
26
|
|
28
27
|
##
|
29
28
|
# Defines the pattern for a terminal node and a block to be invoked
|
@@ -53,11 +52,11 @@ module EBNF::LL1
|
|
53
52
|
# Block passed to initialization for yielding to calling parser.
|
54
53
|
# Should conform to the yield specs for #initialize
|
55
54
|
def terminal(term, regexp, options = {}, &block)
|
56
|
-
|
55
|
+
@patterns ||= []
|
57
56
|
# Passed in order to define evaulation sequence
|
58
|
-
|
59
|
-
|
60
|
-
|
57
|
+
@patterns << EBNF::LL1::Lexer::Terminal.new(term, regexp, options)
|
58
|
+
@terminal_handlers ||= {}
|
59
|
+
@terminal_handlers[term] = block if block_given?
|
61
60
|
end
|
62
61
|
|
63
62
|
##
|
@@ -80,8 +79,8 @@ module EBNF::LL1
|
|
80
79
|
# Should conform to the yield specs for #initialize
|
81
80
|
# Yield to generate a triple
|
82
81
|
def start_production(term, &block)
|
83
|
-
|
84
|
-
|
82
|
+
@start_handlers ||= {}
|
83
|
+
@start_handlers[term] = block
|
85
84
|
end
|
86
85
|
|
87
86
|
##
|
@@ -105,8 +104,8 @@ module EBNF::LL1
|
|
105
104
|
# Should conform to the yield specs for #initialize
|
106
105
|
# Yield to generate a triple
|
107
106
|
def production(term, &block)
|
108
|
-
|
109
|
-
|
107
|
+
@production_handlers ||= {}
|
108
|
+
@production_handlers[term] = block
|
110
109
|
end
|
111
110
|
|
112
111
|
# Evaluate a handler, delegating to the specified object.
|
@@ -115,15 +114,15 @@ module EBNF::LL1
|
|
115
114
|
# @param [Object] object
|
116
115
|
# @return [Object]
|
117
116
|
def eval_with_binding(object)
|
118
|
-
|
117
|
+
@delegate = object
|
119
118
|
object.instance_eval {yield}
|
120
119
|
end
|
121
120
|
|
122
121
|
private
|
123
122
|
|
124
123
|
def method_missing(method, *args, &block)
|
125
|
-
if
|
126
|
-
|
124
|
+
if @delegate ||= nil
|
125
|
+
@delegate.send method, *args, &block
|
127
126
|
else
|
128
127
|
super
|
129
128
|
end
|
@@ -137,29 +136,40 @@ module EBNF::LL1
|
|
137
136
|
#
|
138
137
|
# @example
|
139
138
|
# require 'rdf/ll1/parser'
|
140
|
-
#
|
139
|
+
#
|
141
140
|
# class MyParser
|
142
141
|
# include EBNF::LL1::Parser
|
143
|
-
#
|
142
|
+
#
|
144
143
|
# branch MyParser::BRANCH
|
145
|
-
#
|
144
|
+
#
|
145
|
+
# ##
|
146
|
+
# # Defines a production called during before parsing a non-terminal
|
147
|
+
# # with data from previous production along with data defined for the
|
148
|
+
# # current production
|
149
|
+
# #
|
150
|
+
# start_production :object do |input, current, callback|
|
151
|
+
# # Note production as triples for blankNodePropertyList
|
152
|
+
# # to set :subject instead of :resource
|
153
|
+
# current[:triples] = true
|
154
|
+
# end
|
155
|
+
#
|
146
156
|
# ##
|
147
|
-
# # Defines a production called during
|
157
|
+
# # Defines a production called during after parsing a non-terminal
|
148
158
|
# # with data from previous production along with data defined for the
|
149
159
|
# # current production
|
150
160
|
# #
|
151
|
-
# #
|
152
|
-
# production :object do |
|
161
|
+
# # callback to processor block
|
162
|
+
# production :object do |input, current, callback|
|
153
163
|
# object = current[:resource]
|
154
|
-
#
|
164
|
+
# callback.call :statement, RDF::Statement.new(input[:subject], input[:predicate], object)
|
155
165
|
# end
|
156
|
-
#
|
166
|
+
#
|
157
167
|
# ##
|
158
168
|
# # Defines the pattern for a terminal node
|
159
|
-
# terminal :BLANK_NODE_LABEL, %r(_:(#{PN_LOCAL})) do |
|
169
|
+
# terminal :BLANK_NODE_LABEL, %r(_:(#{PN_LOCAL})) do |production, token, input|
|
160
170
|
# input[:BLANK_NODE_LABEL] = RDF::Node.new(token)
|
161
171
|
# end
|
162
|
-
#
|
172
|
+
#
|
163
173
|
# ##
|
164
174
|
# # Iterates the given block for each RDF statement in the input.
|
165
175
|
# #
|
@@ -168,7 +178,7 @@ module EBNF::LL1
|
|
168
178
|
# # @return [void]
|
169
179
|
# def each_statement(&block)
|
170
180
|
# @callback = block
|
171
|
-
#
|
181
|
+
#
|
172
182
|
# parse(START.to_sym) do |context, *data|
|
173
183
|
# case context
|
174
184
|
# when :statement
|
@@ -176,11 +186,12 @@ module EBNF::LL1
|
|
176
186
|
# end
|
177
187
|
# end
|
178
188
|
# end
|
179
|
-
#
|
189
|
+
#
|
180
190
|
# end
|
181
191
|
#
|
182
192
|
# @param [String, #to_s] input
|
183
|
-
# @param [Symbol, #to_s]
|
193
|
+
# @param [Symbol, #to_s] start
|
194
|
+
# The starting production for the parser. It may be a URI from the grammar, or a symbol representing the local_name portion of the grammar URI.
|
184
195
|
# @param [Hash{Symbol => Object}] options
|
185
196
|
# @option options [Hash{Symbol,String => Hash{Symbol,String => Array<Symbol,String>}}] :branch LL1 branch table.
|
186
197
|
# @option options [HHash{Symbol,String => Array<Symbol,String>}] :first ({})
|
@@ -202,8 +213,11 @@ module EBNF::LL1
|
|
202
213
|
# @yieldparam [Symbol] *data
|
203
214
|
# Data specific to the call
|
204
215
|
# @return [EBNF::LL1::Parser]
|
216
|
+
# @raise [Exception] Raises exceptions for parsing errors
|
217
|
+
# or errors raised during processing callbacks. Internal
|
218
|
+
# errors are raised using {Error}.
|
205
219
|
# @see http://cs.adelaide.edu.au/~charles/lt/Lectures/07-ErrorRecovery.pdf
|
206
|
-
def parse(input = nil,
|
220
|
+
def parse(input = nil, start = nil, options = {}, &block)
|
207
221
|
@options = options.dup
|
208
222
|
@branch = options[:branch]
|
209
223
|
@first = options[:first] ||= {}
|
@@ -217,110 +231,171 @@ module EBNF::LL1
|
|
217
231
|
|
218
232
|
# Unrecoverable errors
|
219
233
|
raise Error, "Branch table not defined" unless @branch && @branch.length > 0
|
220
|
-
raise Error, "Starting production not defined" unless
|
234
|
+
raise Error, "Starting production not defined" unless start
|
221
235
|
|
222
236
|
@prod_data = [{}]
|
223
|
-
|
224
|
-
todo_stack = [{:prod =>
|
237
|
+
start = start.split('#').last.to_sym unless start.is_a?(Symbol)
|
238
|
+
todo_stack = [{:prod => start, :terms => nil}]
|
225
239
|
|
226
240
|
while !todo_stack.empty?
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
# skipping invalid tokens until either a valid token is found (from @first),
|
243
|
-
# or a token appearing in @follow appears.
|
244
|
-
token = skip_until_valid(todo_stack)
|
245
|
-
|
246
|
-
# At this point, token is either nil, in the first set of the production,
|
247
|
-
# or in the follow set of this production or any previous production
|
248
|
-
debug("parse(production)") do
|
249
|
-
"token #{token ? token.representation.inspect : 'nil'}, " +
|
250
|
-
"prod #{cur_prod.inspect}, " +
|
251
|
-
"depth #{depth}"
|
252
|
-
end
|
241
|
+
begin
|
242
|
+
@recovering = false
|
243
|
+
pushed = false
|
244
|
+
if todo_stack.last[:terms].nil?
|
245
|
+
todo_stack.last[:terms] = []
|
246
|
+
cur_prod = todo_stack.last[:prod]
|
247
|
+
|
248
|
+
# If cur_prod is the starting production, we can reset the stack
|
249
|
+
# to the beginning to avoid excessive growth in the production
|
250
|
+
# stack
|
251
|
+
if options[:reset_on_start] && cur_prod == start
|
252
|
+
todo_stack = [{:prod => start, :terms => []}]
|
253
|
+
@productions = []
|
254
|
+
@prod_data = [{}]
|
255
|
+
end
|
253
256
|
|
254
|
-
|
255
|
-
|
256
|
-
break if token.nil?
|
257
|
+
# Fetch the current token
|
258
|
+
token = get_token(:recover)
|
257
259
|
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
"
|
263
|
-
"
|
264
|
-
"prod_branch #{prod_branch.keys.inspect}, " +
|
265
|
-
"sequence #{sequence.inspect}"
|
260
|
+
# At this point, token is either nil, in the first set of the production,
|
261
|
+
# or in the follow set of this production or any previous production
|
262
|
+
debug("parse(production)") do
|
263
|
+
"token #{token ? token.representation.inspect : 'nil'}, " +
|
264
|
+
"prod #{cur_prod.inspect}, " +
|
265
|
+
"depth #{depth}"
|
266
266
|
end
|
267
267
|
|
268
|
-
|
269
|
-
|
270
|
-
|
268
|
+
# Got an opened production
|
269
|
+
onStart(cur_prod)
|
270
|
+
|
271
|
+
if token.nil?
|
272
|
+
if !(first_include?(cur_prod, :_eps) && follow_include?(cur_prod, :_eof))
|
273
|
+
# End of file, and production does not contain eps, or it does, but follow does not contain eof
|
274
|
+
raise Error.new("Unexpected end of input", :production => cur_prod)
|
271
275
|
else
|
272
|
-
|
273
|
-
|
274
|
-
|
276
|
+
debug("parse(production)") {"End of input prod #{cur_prod.inspect}"}
|
277
|
+
end
|
278
|
+
elsif prod_branch = @branch[cur_prod]
|
279
|
+
sequence = prod_branch.fetch(token.representation) do
|
280
|
+
raise Error.new("#{token.inspect} does not match production #{cur_prod.inspect}",
|
281
|
+
:production => cur_prod)
|
282
|
+
end
|
283
|
+
debug("parse(production)") do
|
284
|
+
"token #{token.representation.inspect} " +
|
285
|
+
"prod #{cur_prod.inspect}, " +
|
286
|
+
"prod_branch #{prod_branch.keys.inspect}, " +
|
287
|
+
"sequence #{sequence.inspect}"
|
275
288
|
end
|
289
|
+
todo_stack.last[:terms] += sequence
|
290
|
+
else
|
291
|
+
raise Error.new("No branches found for #{cur_prod.inspect}",
|
292
|
+
:production => cur_prod, :token => token)
|
276
293
|
end
|
277
|
-
todo_stack.last[:terms] += sequence if sequence
|
278
|
-
else
|
279
|
-
# Is this a fatal error?
|
280
|
-
error("parse(fatal?)", "No branches found for #{cur_prod.inspect}",
|
281
|
-
:production => cur_prod, :token => token)
|
282
294
|
end
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
while !todo_stack.last[:terms].to_a.empty?
|
287
|
-
begin
|
295
|
+
|
296
|
+
debug("parse(terms)") {"todo #{todo_stack.last.inspect}, depth #{depth}"}
|
297
|
+
while !todo_stack.last[:terms].to_a.empty?
|
288
298
|
# Get the next term in this sequence
|
289
299
|
term = todo_stack.last[:terms].shift
|
290
300
|
debug("parse(token)") {"accept #{term.inspect}"}
|
301
|
+
|
291
302
|
if token = accept(term)
|
292
|
-
@recovering = false
|
293
303
|
debug("parse(token)") {"token #{token.inspect}, term #{term.inspect}"}
|
294
|
-
|
295
|
-
elsif terminals.include?(term)
|
304
|
+
onTerminal(term, token)
|
305
|
+
elsif terminals.include?(term)
|
296
306
|
# If term is a terminal, then it is an error if token does not
|
297
307
|
# match it
|
298
|
-
|
308
|
+
raise Error.new("#{get_token.inspect} does not match terminal #{term.inspect}",
|
309
|
+
:production => cur_prod)
|
299
310
|
else
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
311
|
+
token = get_token
|
312
|
+
|
313
|
+
# If token is not in firsts of term, but eps is, skip to next
|
314
|
+
# term
|
315
|
+
if first_include?(term, :_eps) && !first_include?(term, token)
|
316
|
+
debug("parse(token)") {"skip optional term #{term.inspect} on #{token.inspect}"}
|
317
|
+
break
|
318
|
+
else
|
319
|
+
# Push term onto stack
|
320
|
+
todo_stack << {:prod => term, :terms => nil}
|
321
|
+
debug("parse(push)") {"term #{term.inspect}, depth #{depth}"}
|
322
|
+
pushed = true
|
323
|
+
break
|
324
|
+
end
|
305
325
|
end
|
306
326
|
end
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
327
|
+
rescue Lexer::Error, Error => e
|
328
|
+
# Lexer encountered an illegal token or the parser encountered
|
329
|
+
# a terminal which is inappropriate for the current production.
|
330
|
+
# Perform error recovery to find a reasonable terminal based
|
331
|
+
# on the follow sets of the relevant productions. This includes
|
332
|
+
# remaining terms from the current production and the stacked
|
333
|
+
# productions
|
334
|
+
@lineno = e.lineno
|
335
|
+
if e.is_a?(Lexer::Error)
|
336
|
+
# Skip to the next valid terminal
|
337
|
+
@lexer.recover
|
338
|
+
error("parse(#{e.class})", "With input '#{e.input}': #{e.message}, skipped to #{(get_token(:recover) || :eof).inspect}",
|
339
|
+
:production => @productions.last)
|
340
|
+
else
|
341
|
+
# Otherwise, the terminal is fine, just not for this production.
|
342
|
+
@lexer.shift
|
343
|
+
error("parse(#{e.class})", "#{e.message}, skipped to #{(get_token(:recover) || :eof).inspect}",
|
344
|
+
:production => @productions.last, :token => e.token)
|
345
|
+
end
|
346
|
+
|
347
|
+
# Get the list of follows for this sequence, this production and the stacked productions.
|
348
|
+
debug("recovery", "stack follows:", :level => 4)
|
349
|
+
todo_stack.reverse.each do |todo|
|
350
|
+
debug("recovery", :level => 4) {" #{todo[:prod]}: #{@follow[todo[:prod]].inspect}"}
|
351
|
+
end
|
352
|
+
|
353
|
+
# Find all follows to the top of the stack
|
354
|
+
follows = todo_stack.inject([]) do |follow, todo|
|
355
|
+
prod = todo[:prod]
|
356
|
+
follow += @follow[prod] || []
|
357
|
+
end.uniq
|
358
|
+
debug("recovery") {"follows: #{follows.inspect}"}
|
359
|
+
|
360
|
+
# Skip tokens until one is found in follows
|
361
|
+
while (token = get_token(:recover)) && follows.none? {|t| token === t}
|
362
|
+
skipped = @lexer.shift
|
363
|
+
progress("recovery") {"skip #{skipped.inspect}"}
|
364
|
+
end
|
365
|
+
debug("recovery") {"found #{token.inspect} in follows"}
|
366
|
+
|
367
|
+
# Pop stack elements until token is in follows
|
368
|
+
while !todo_stack.empty? &&
|
369
|
+
!follow_include?(todo_stack.last[:prod], token || :_eof)
|
370
|
+
debug("recovery(pop)") {"todo #{todo_stack.last.inspect}, depth #{depth}"}
|
318
371
|
todo_stack.pop
|
319
372
|
onFinish
|
373
|
+
end
|
374
|
+
|
375
|
+
# Token is now in the first of the top production
|
376
|
+
unless todo_stack.empty?
|
377
|
+
todo_stack.pop
|
378
|
+
onFinish
|
379
|
+
end
|
380
|
+
|
381
|
+
if todo_stack.empty?
|
382
|
+
# Recovered to end of last production
|
383
|
+
warn("recover", "recovered to end of productions")
|
320
384
|
else
|
321
|
-
|
322
|
-
|
323
|
-
|
385
|
+
warn("recover", "recovered to #{todo_stack.last[:prod].inspect} with #{token.inspect}")
|
386
|
+
end
|
387
|
+
|
388
|
+
@recovering = false
|
389
|
+
ensure
|
390
|
+
# After completing the last production in a sequence, pop down until we find a production
|
391
|
+
#
|
392
|
+
# If in recovery mode, continue popping until we find a term with a follow list
|
393
|
+
while !pushed &&
|
394
|
+
!todo_stack.empty? &&
|
395
|
+
todo_stack.last.fetch(:terms, []).empty?
|
396
|
+
debug("parse(pop)") {"todo #{todo_stack.last.inspect}, depth #{depth}"}
|
397
|
+
todo_stack.pop
|
398
|
+
onFinish
|
324
399
|
end
|
325
400
|
end
|
326
401
|
end
|
@@ -329,12 +404,10 @@ module EBNF::LL1
|
|
329
404
|
|
330
405
|
# Continue popping contexts off of the stack
|
331
406
|
while !todo_stack.empty?
|
332
|
-
debug("parse(eof)"
|
407
|
+
debug("parse(eof)") {"stack #{todo_stack.last.inspect}, depth #{depth}"}
|
333
408
|
# There can't be anything left to do, or if there is, it must be optional
|
334
409
|
last_terms = todo_stack.last[:terms]
|
335
|
-
if last_terms.length > 0 && last_terms.none? {|t|
|
336
|
-
@first.has_key?(t) && @first[t].include?(:_eps)
|
337
|
-
}
|
410
|
+
if last_terms.length > 0 && last_terms.none? {|t|first_include?(t, :_eps)}
|
338
411
|
error("parse(eof)",
|
339
412
|
"End of input before end of production: stack #{todo_stack.last.inspect}, depth #{depth}"
|
340
413
|
)
|
@@ -342,10 +415,10 @@ module EBNF::LL1
|
|
342
415
|
todo_stack.pop
|
343
416
|
onFinish
|
344
417
|
end
|
345
|
-
|
418
|
+
|
346
419
|
# When all is said and done, raise the error log
|
347
420
|
unless @error_log.empty?
|
348
|
-
raise Error, @error_log.join("\n\t")
|
421
|
+
raise Error, @error_log.join("\n\t")
|
349
422
|
end
|
350
423
|
end
|
351
424
|
|
@@ -369,16 +442,108 @@ module EBNF::LL1
|
|
369
442
|
prod_data[sym] << values
|
370
443
|
end
|
371
444
|
end
|
372
|
-
|
445
|
+
|
373
446
|
# Add values to production data, values aranged as an array
|
374
447
|
def add_prod_data(sym, *values)
|
375
448
|
return if values.compact.empty?
|
376
|
-
|
449
|
+
|
377
450
|
prod_data[sym] ||= []
|
378
451
|
prod_data[sym] += values
|
379
452
|
debug("add_prod_data(#{sym})") {"#{prod_data[sym].inspect} += #{values.inspect}"}
|
380
453
|
end
|
381
|
-
|
454
|
+
|
455
|
+
protected
|
456
|
+
|
457
|
+
##
|
458
|
+
# Error information, used as level `0` debug messages.
|
459
|
+
#
|
460
|
+
# @param [String] node Relevant location associated with message
|
461
|
+
# @param [String] message Error string
|
462
|
+
# @param [Hash] options
|
463
|
+
# @option options [URI, #to_s] :production
|
464
|
+
# @option options [Token] :token
|
465
|
+
# @see {#debug}
|
466
|
+
def error(node, message, options = {})
|
467
|
+
message += ", found #{options[:token].inspect}" if options[:token]
|
468
|
+
message += " at line #{@lineno}" if @lineno
|
469
|
+
message += ", production = #{options[:production].inspect}" if options[:production]
|
470
|
+
@error_log << message unless @recovering
|
471
|
+
@recovering = true
|
472
|
+
debug(node, message, options.merge(:level => 0))
|
473
|
+
end
|
474
|
+
|
475
|
+
##
|
476
|
+
# Warning information, used as level `1` debug messages.
|
477
|
+
#
|
478
|
+
# @param [String] node Relevant location associated with message
|
479
|
+
# @param [String] message Error string
|
480
|
+
# @param [Hash] options
|
481
|
+
# @option options [URI, #to_s] :production
|
482
|
+
# @option options [Token] :token
|
483
|
+
# @see {#debug}
|
484
|
+
def warn(node, message, options = {})
|
485
|
+
message += ", with token #{options[:token].inspect}" if options[:token]
|
486
|
+
message += " at line #{@lineno}" if @lineno
|
487
|
+
message += ", production = #{options[:production].inspect}" if options[:production]
|
488
|
+
@error_log << message unless @recovering
|
489
|
+
debug(node, message, options.merge(:level => 1))
|
490
|
+
end
|
491
|
+
|
492
|
+
##
|
493
|
+
# Progress output when parsing. Passed as level `2` debug messages.
|
494
|
+
#
|
495
|
+
# @overload progress(node, message, options)
|
496
|
+
# @param [String] node Relevant location associated with message
|
497
|
+
# @param [String] message ("")
|
498
|
+
# @param [Hash] options
|
499
|
+
# @option options [Integer] :depth
|
500
|
+
# Recursion depth for indenting output
|
501
|
+
# @see {#debug}
|
502
|
+
def progress(node, *args)
|
503
|
+
return unless @options[:progress] || @options[:debug]
|
504
|
+
options = args.last.is_a?(Hash) ? args.pop : {}
|
505
|
+
message = args.join(",")
|
506
|
+
message += yield.to_s if block_given?
|
507
|
+
debug(node, message, options.merge(:level => 2))
|
508
|
+
end
|
509
|
+
|
510
|
+
##
|
511
|
+
# Progress output when debugging.
|
512
|
+
#
|
513
|
+
# The call is ignored, unless `@options[:debug]` is set, in which
|
514
|
+
# case it yields tracing information as indicated. Additionally,
|
515
|
+
# if `@options[:debug]` is an Integer, the call is aborted if the
|
516
|
+
# `:level` option is less than than `:level`.
|
517
|
+
#
|
518
|
+
# @overload debug(node, message, options)
|
519
|
+
# @param [Array<String>] args Relevant location associated with message
|
520
|
+
# @param [Hash] options
|
521
|
+
# @option options [Integer] :depth
|
522
|
+
# Recursion depth for indenting output
|
523
|
+
# @option options [Integer] :level
|
524
|
+
# Level assigned to message, by convention, level `0` is for
|
525
|
+
# errors, level `1` is for warnings, level `2` is for parser
|
526
|
+
# progress information, and anything higher is for various levels
|
527
|
+
# of debug information.
|
528
|
+
#
|
529
|
+
# @yield trace, level, lineno, depth, args
|
530
|
+
# @yieldparam [:trace] trace
|
531
|
+
# @yieldparam [Integer] level
|
532
|
+
# @yieldparam [Integer] lineno
|
533
|
+
# @yieldparam [Integer] depth Recursive depth of productions
|
534
|
+
# @yieldparam [Array<String>] args
|
535
|
+
# @yieldreturn [String] added to message
|
536
|
+
def debug(*args)
|
537
|
+
return unless @options[:debug] && @parse_callback
|
538
|
+
options = args.last.is_a?(Hash) ? args.pop : {}
|
539
|
+
debug_level = options.fetch(:level, 3)
|
540
|
+
return if @options[:debug].is_a?(Integer) && debug_level > @options[:debug]
|
541
|
+
|
542
|
+
depth = options[:depth] || self.depth
|
543
|
+
args << yield if block_given?
|
544
|
+
@parse_callback.call(:trace, debug_level, @lineno, depth, *args)
|
545
|
+
end
|
546
|
+
|
382
547
|
private
|
383
548
|
# Start for production
|
384
549
|
def onStart(prod)
|
@@ -389,15 +554,20 @@ module EBNF::LL1
|
|
389
554
|
# to customize before pushing on the @prod_data stack
|
390
555
|
progress("#{prod}(:start):#{@prod_data.length}") {@prod_data.last}
|
391
556
|
data = {}
|
392
|
-
|
393
|
-
|
394
|
-
|
557
|
+
begin
|
558
|
+
self.class.eval_with_binding(self) {
|
559
|
+
handler.call(@prod_data.last, data, @parse_callback)
|
560
|
+
}
|
561
|
+
rescue Exception => e
|
562
|
+
error("start", "#{e.class}: #{e.message}", :production => prod)
|
563
|
+
@recovering = false
|
564
|
+
end
|
395
565
|
@prod_data << data
|
396
566
|
else
|
397
567
|
# Make sure we push as many was we pop, even if there is no
|
398
568
|
# explicit start handler
|
399
569
|
@prod_data << {} if self.class.production_handlers[prod]
|
400
|
-
progress("#{prod}(:start)") { get_token.inspect}
|
570
|
+
progress("#{prod}(:start)") { get_token.inspect + (@recovering ? ' recovering' : '')}
|
401
571
|
end
|
402
572
|
#puts "prod_data(s): " + @prod_data.inspect
|
403
573
|
end
|
@@ -410,201 +580,105 @@ module EBNF::LL1
|
|
410
580
|
if handler && !@recovering
|
411
581
|
# Pop production data element from stack, potentially allowing handler to use it
|
412
582
|
data = @prod_data.pop
|
413
|
-
|
414
|
-
|
415
|
-
|
583
|
+
begin
|
584
|
+
self.class.eval_with_binding(self) {
|
585
|
+
handler.call(@prod_data.last, data, @parse_callback)
|
586
|
+
}
|
587
|
+
rescue Exception => e
|
588
|
+
error("finish", "#{e.class}: #{e.message}", :production => prod)
|
589
|
+
@recovering = false
|
590
|
+
end
|
416
591
|
progress("#{prod}(:finish):#{@prod_data.length}") {@prod_data.last}
|
417
592
|
else
|
418
|
-
progress("#{prod}(:finish)"
|
593
|
+
progress("#{prod}(:finish)") { "recovering" if @recovering }
|
419
594
|
end
|
420
595
|
@productions.pop
|
421
596
|
end
|
422
597
|
|
423
|
-
# A
|
424
|
-
def
|
598
|
+
# A terminal
|
599
|
+
def onTerminal(prod, token)
|
425
600
|
unless @productions.empty?
|
426
601
|
parentProd = @productions.last
|
427
602
|
handler = self.class.terminal_handlers[prod]
|
428
603
|
# Allows catch-all for simple string terminals
|
429
604
|
handler ||= self.class.terminal_handlers[nil] if prod.is_a?(String)
|
430
605
|
if handler
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
606
|
+
begin
|
607
|
+
self.class.eval_with_binding(self) {
|
608
|
+
handler.call(parentProd, token, @prod_data.last, @parse_callback)
|
609
|
+
}
|
610
|
+
rescue Exception => e
|
611
|
+
error("terminal", "#{e.class}: #{e.message}", :production => prod)
|
612
|
+
@recovering = false
|
613
|
+
end
|
614
|
+
progress("#{prod}(:terminal)", "", :depth => (depth + 1)) {"#{token}: #{@prod_data.last}"}
|
435
615
|
else
|
436
|
-
progress("#{prod}(:
|
616
|
+
progress("#{prod}(:terminal)", "", :depth => (depth + 1)) {token.to_s}
|
437
617
|
end
|
438
618
|
else
|
439
|
-
error("#{parentProd}(:
|
619
|
+
error("#{parentProd}(:terminal)", "Terminal has no parent production", :production => prod)
|
440
620
|
end
|
441
621
|
end
|
442
|
-
|
443
|
-
|
444
|
-
#
|
445
|
-
# or can follow a production in the stack.
|
622
|
+
|
623
|
+
##
|
624
|
+
# Does first include the specified token
|
446
625
|
#
|
447
|
-
# @
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
return if token.nil? && @follow.fetch(cur_prod, []).include?(:_eof)
|
457
|
-
|
458
|
-
# If this token can be used by the top production, return it
|
459
|
-
# Otherwise, if the banch table allows empty, also return the token
|
460
|
-
return token if !@recovering && (expected.any? {|t| (token || :_eps) === t})
|
461
|
-
|
462
|
-
# Otherwise, it's an error condition, and skip either until
|
463
|
-
# we find a valid token for this production, or until we find
|
464
|
-
# something that can follow this production
|
465
|
-
error("skip_until_valid", "expected one of #{expected.map(&:inspect).join(", ")}, found #{token.inspect}",
|
466
|
-
:production => cur_prod, :token => token)
|
467
|
-
|
468
|
-
debug("recovery", "stack follows:")
|
469
|
-
todo_stack.reverse.each do |todo|
|
470
|
-
debug("recovery") {" #{todo[:prod]}: #{@follow[todo[:prod]].inspect}"}
|
626
|
+
# @param [Symbol] production
|
627
|
+
# @param [Symbol, Lexer::Token] token
|
628
|
+
# A terminal, or symbol or string
|
629
|
+
# @return [Boolean]
|
630
|
+
def first_include?(production, token)
|
631
|
+
if token.is_a?(Lexer::Token)
|
632
|
+
@first.fetch(production, []).any? {|t| token === t}
|
633
|
+
else
|
634
|
+
@first.fetch(production, []).include?(token)
|
471
635
|
end
|
636
|
+
end
|
472
637
|
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
debug("recovery") {"found #{token.inspect} in #{first.include?(token) ? 'first' : 'follows'}"}
|
486
|
-
|
487
|
-
# If the token is a first, just return it. Otherwise, it is a follow
|
488
|
-
# and we need to skip to the end of the production
|
489
|
-
unless first.any? {|t| token == t} || todo_stack.last[:terms].empty?
|
490
|
-
debug("recovery") {"token in follows, skip past #{todo_stack.last[:terms].inspect}"}
|
491
|
-
todo_stack.last[:terms] = []
|
638
|
+
##
|
639
|
+
# Does follow include the specified terminal
|
640
|
+
#
|
641
|
+
# @param [Symbol] production
|
642
|
+
# @param [Symbol, Lexer::Token] token
|
643
|
+
# A terminal, or symbol or string
|
644
|
+
# @return [Boolean]
|
645
|
+
def follow_include?(production, token)
|
646
|
+
if token.is_a?(Lexer::Token)
|
647
|
+
@follow.fetch(production, []).any? {|t| token === t}
|
648
|
+
else
|
649
|
+
@follow.fetch(production, []).include?(token)
|
492
650
|
end
|
493
|
-
token
|
494
651
|
end
|
495
652
|
|
496
653
|
##
|
497
|
-
# Return the next token,
|
654
|
+
# Return the next token, raising an error if the token is invalid
|
498
655
|
#
|
656
|
+
# @param [:recover] recover
|
657
|
+
# Recover from errors and go until next valid token or end of file
|
499
658
|
# @return [Token]
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
rescue EBNF::LL1::Lexer::Error => e
|
504
|
-
# Recover from lexer error
|
505
|
-
@lineno = e.lineno
|
506
|
-
error("get_token", "With input '#{e.input}': #{e.message}",
|
507
|
-
:production => @productions.last)
|
508
|
-
|
509
|
-
# Retrieve next valid token
|
510
|
-
t = @lexer.recover
|
511
|
-
debug("get_token", :level => 2) {"skipped to #{t.inspect}"}
|
512
|
-
t
|
513
|
-
end
|
659
|
+
# @raise [Lexer::Error]
|
660
|
+
def get_token(recover = nil)
|
661
|
+
token = @lexer.first
|
514
662
|
#progress("token") {token.inspect}
|
515
663
|
@lineno = token.lineno if token
|
516
664
|
token
|
517
|
-
|
518
|
-
|
519
|
-
|
520
|
-
|
521
|
-
|
522
|
-
|
523
|
-
# @option options [URI, #to_s] :production
|
524
|
-
# @option options [Token] :token
|
525
|
-
def error(node, message, options = {})
|
526
|
-
message += ", found #{options[:token].representation.inspect}" if options[:token]
|
527
|
-
message += " at line #{@lineno}" if @lineno
|
528
|
-
message += ", production = #{options[:production].inspect}" if options[:production]
|
529
|
-
@error_log << message unless @recovering
|
530
|
-
@recovering = true
|
531
|
-
debug(node, message, options.merge(:level => 0))
|
532
|
-
end
|
533
|
-
|
534
|
-
##
|
535
|
-
# Progress output when parsing
|
536
|
-
# param [String] node Relevant location associated with message
|
537
|
-
# param [String] message ("")
|
538
|
-
# param [Hash] options
|
539
|
-
# option options [Integer] :depth
|
540
|
-
# Recursion depth for indenting output
|
541
|
-
# yieldreturn [String] added to message
|
542
|
-
def progress(node, *args)
|
543
|
-
return unless @options[:progress] || @options[:debug]
|
544
|
-
options = args.last.is_a?(Hash) ? args.pop : {}
|
545
|
-
message = args.join(",")
|
546
|
-
depth = options[:depth] || self.depth
|
547
|
-
message += yield.to_s if block_given?
|
548
|
-
debug(node, message, options.merge(:level => 1))
|
549
|
-
end
|
550
|
-
|
551
|
-
##
|
552
|
-
# Progress output when debugging.
|
553
|
-
# Captures output to `@options[:debug]` if it is an array.
|
554
|
-
# Otherwise, if `@options[:debug]` is set, or
|
555
|
-
# `@options[:progress]` is set and `:level` <= 1, or
|
556
|
-
# `@options[:validate]` is set and `:level` == 0 output
|
557
|
-
# to standard error.
|
558
|
-
#
|
559
|
-
# @overload debug(node, message)
|
560
|
-
# @param [String] node Relevant location associated with message
|
561
|
-
# @param [String] message ("")
|
562
|
-
# @param [Hash] options
|
563
|
-
# @option options [Integer] :depth
|
564
|
-
# Recursion depth for indenting output
|
565
|
-
# @option options [Integer] :level
|
566
|
-
# Debug level, `0` for errors, `1` for progress, anything else
|
567
|
-
# for debug output.
|
568
|
-
#
|
569
|
-
# @overload debug(message)
|
570
|
-
# @param [String] node Relevant location associated with message
|
571
|
-
# @param [Hash] options
|
572
|
-
# @option options [Integer] :depth
|
573
|
-
# Recursion depth for indenting output
|
574
|
-
# @option options [Integer] :level
|
575
|
-
# Debug level, `0` for errors, `1` for progress, anything else
|
576
|
-
# for debug output.
|
577
|
-
# @yieldreturn [String] added to message
|
578
|
-
def debug(*args)
|
579
|
-
options = args.last.is_a?(Hash) ? args.pop : {}
|
580
|
-
debug_level = options.fetch(:level, 2)
|
581
|
-
return unless @options[:debug] && debug_level <= DEBUG_LEVEL ||
|
582
|
-
@options[:progress] && debug_level <= 1 ||
|
583
|
-
@options[:validate] && debug_level == 0
|
584
|
-
depth = options[:depth] || self.depth
|
585
|
-
d_str = depth > 20 ? ' ' * 20 + '+' : ' ' * depth
|
586
|
-
args << yield if block_given?
|
587
|
-
message = "#{args.join(': ')}"
|
588
|
-
str = "[#{@lineno}](#{debug_level})#{d_str}#{message}"
|
589
|
-
@options[:debug] << str if @options[:debug].is_a?(Array)
|
590
|
-
case
|
591
|
-
when @options[:yield]
|
592
|
-
@parse_callback.call(:trace, node, message, options)
|
593
|
-
when @options[:debug] == true
|
594
|
-
$stderr.puts str
|
595
|
-
when @options[:progress] && debug_level <= 1
|
596
|
-
$stderr.puts str
|
597
|
-
when @options[:validate] && debug_level == 0
|
598
|
-
$stderr.puts str
|
665
|
+
rescue Lexer::Error => e
|
666
|
+
if recover
|
667
|
+
# Recover from lexer error so that we can not bail out too early
|
668
|
+
@lexer.recover
|
669
|
+
error("get_token", "With input '#{e.input}': #{e.message}}")
|
670
|
+
retry
|
599
671
|
end
|
672
|
+
raise
|
600
673
|
end
|
601
674
|
|
602
675
|
##
|
603
676
|
# Accept the first token in the input stream if it matches
|
604
|
-
#
|
677
|
+
# `type\_or\_value`. Raise Error, otherwise.
|
605
678
|
#
|
606
679
|
# @param [Symbol, String] type_or_value
|
607
680
|
# @return [Token]
|
681
|
+
# @raise [Error, Lexer::Error]
|
608
682
|
def accept(type_or_value)
|
609
683
|
if (token = get_token) && token === type_or_value
|
610
684
|
debug("accept") {"#{token.inspect} === #{type_or_value.inspect}"}
|
data/lib/ebnf/ll1/scanner.rb
CHANGED
data/lib/ebnf/rule.rb
CHANGED
@@ -10,40 +10,47 @@ module EBNF
|
|
10
10
|
diff hex range
|
11
11
|
}.map(&:to_sym).freeze
|
12
12
|
|
13
|
-
#
|
13
|
+
# Symbol of rule
|
14
|
+
#
|
14
15
|
# @return [Symbol]
|
15
16
|
attr_accessor :sym
|
16
17
|
|
17
|
-
#
|
18
|
+
# ID of rule
|
18
19
|
# @return [String]
|
19
20
|
attr_accessor :id
|
20
21
|
|
21
22
|
# A comprehension is a sequence which contains all elements but the first of the original rule.
|
22
|
-
#
|
23
|
+
#
|
23
24
|
# @return [Rule]
|
24
25
|
attr_accessor :comp
|
25
26
|
|
26
|
-
#
|
27
|
+
# Kind of rule
|
28
|
+
#
|
27
29
|
# @return [:rule, :terminal, or :pass]
|
28
30
|
attr_accessor :kind
|
29
31
|
|
30
|
-
#
|
32
|
+
# Rule expression
|
33
|
+
#
|
31
34
|
# @return [Array]
|
32
35
|
attr_accessor :expr
|
33
36
|
|
34
|
-
#
|
37
|
+
# Original EBNF
|
38
|
+
#
|
35
39
|
# @return [String]
|
36
40
|
attr_accessor :orig
|
37
41
|
|
38
|
-
#
|
42
|
+
# Terminals that immediately procede this rule
|
43
|
+
#
|
39
44
|
# @return [Array<Rule>]
|
40
45
|
attr_reader :first
|
41
46
|
|
42
|
-
#
|
47
|
+
# Terminals that immediately follow this rule
|
48
|
+
#
|
43
49
|
# @return [Array<Rule>]
|
44
50
|
attr_reader :follow
|
45
51
|
|
46
|
-
#
|
52
|
+
# Indicates that this is a starting rule
|
53
|
+
#
|
47
54
|
# @return [Boolean]
|
48
55
|
attr_accessor :start
|
49
56
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ebnf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gregg Kellogg
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-03-
|
11
|
+
date: 2013-03-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: sxp
|