ebnf 0.2.3 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/VERSION +1 -1
- data/lib/ebnf/base.rb +2 -2
- data/lib/ebnf/ll1.rb +5 -5
- data/lib/ebnf/ll1/lexer.rb +12 -12
- data/lib/ebnf/ll1/parser.rb +354 -280
- data/lib/ebnf/ll1/scanner.rb +0 -1
- data/lib/ebnf/rule.rb +16 -9
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
ZDI4YWE4ZjExOGI5N2NkZDVmYzA4ZDMwMzM0ZGRhZThhNWU0ZGI2MQ==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
MWZmMjVkYjMwYzY1NmNlMDc0ZDM5MzUxOWU1ZTUwNGU2NDQ2YzFiYQ==
|
7
7
|
!binary "U0hBNTEy":
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
M2E2MTcyM2MyZWYyNTg5Y2E3ZDVkNWI2ODgwYjdhZjhlYmQ1YTUzZGE5Nzc3
|
10
|
+
M2M3MDFiYzllMzE5YjNmMDA2Zjc3NjQ5YzViMzhmMTNmZTBkYWZhMDk0M2Rl
|
11
|
+
MjlkNWMxNzY3ODM4ZDYxZjBiZWI2NzA4ZGRjYzQ2MGY2MTBiZDI=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
OGIxZGZjNzNhZTlkZDAwZTM0ZmExYzMzMzUxMjQ4ZmNmYjBmNTM4OGJlMmM2
|
14
|
+
NmFmODM4MjMwODYyY2E1YjhjNjNlY2E1YjI0MTg1NjFlYWI0MzI4ZThhMjA2
|
15
|
+
MmMyMjBiZjA5NGQ2MjlhYzA0Yzg1YzYzNzhjNjNkZmQ2ZTg5N2I=
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.3.0
|
data/lib/ebnf/base.rb
CHANGED
@@ -103,12 +103,12 @@ module EBNF
|
|
103
103
|
include Parser
|
104
104
|
|
105
105
|
# Abstract syntax tree from parse
|
106
|
-
#
|
106
|
+
#
|
107
107
|
# @return [Array<Rule>]
|
108
108
|
attr_reader :ast
|
109
109
|
|
110
110
|
# Grammar errors, or errors found genering parse tables
|
111
|
-
#
|
111
|
+
#
|
112
112
|
# @return [Array<String>]
|
113
113
|
attr_accessor :errors
|
114
114
|
|
data/lib/ebnf/ll1.rb
CHANGED
@@ -6,29 +6,29 @@ module EBNF
|
|
6
6
|
|
7
7
|
# Branch table, represented as a recursive hash.
|
8
8
|
# The table is indexed by rule symbol, which in-turn references a hash of terminals (which are the first terminals of the production), which in turn reference the sequence of rules that follow, given that terminal as input
|
9
|
-
#
|
9
|
+
#
|
10
10
|
# @return [Hash{Symbol => Hash{String, Symbol => Array<Symbol>}}]
|
11
11
|
attr_reader :branch
|
12
12
|
|
13
13
|
# First table
|
14
|
-
#
|
14
|
+
#
|
15
15
|
# @return [Hash{Symbol, String => Symbol}]
|
16
16
|
attr_reader :first
|
17
17
|
|
18
18
|
# Follow table
|
19
|
-
#
|
19
|
+
#
|
20
20
|
# @return [Hash{Symbol, String => Symbol}]
|
21
21
|
attr_reader :follow
|
22
22
|
|
23
23
|
# Terminal table
|
24
24
|
# The list of terminals used in the grammar.
|
25
|
-
#
|
25
|
+
#
|
26
26
|
# @return [Array<String, Symbol>]
|
27
27
|
attr_reader :terminals
|
28
28
|
|
29
29
|
# Start symbol
|
30
30
|
# The rule which starts the grammar
|
31
|
-
#
|
31
|
+
#
|
32
32
|
# @return [Symbol]
|
33
33
|
attr_reader :start
|
34
34
|
|
data/lib/ebnf/ll1/lexer.rb
CHANGED
@@ -53,12 +53,10 @@ module EBNF::LL1
|
|
53
53
|
ML_START = /\'\'\'|\"\"\"/.freeze # Beginning of terminals that may span lines
|
54
54
|
|
55
55
|
##
|
56
|
-
# @!attribute whitespace
|
57
56
|
# @return [Regexp] defines whitespace, defaults to WS
|
58
57
|
attr_reader :whitespace
|
59
58
|
|
60
59
|
##
|
61
|
-
# @!attribute comment
|
62
60
|
# @return [Regexp] defines single-line comment, defaults to COMMENT
|
63
61
|
attr_reader :comment
|
64
62
|
|
@@ -233,15 +231,16 @@ module EBNF::LL1
|
|
233
231
|
#
|
234
232
|
# @return [Token]
|
235
233
|
def recover
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
scanner.pos = scanner.pos + 1
|
234
|
+
until scanner.eos? || tok = match_token
|
235
|
+
if scanner.skip_until(@whitespace).nil? # Skip past current "token"
|
236
|
+
# No whitespace at the end, must be and end of string
|
237
|
+
scanner.terminate
|
238
|
+
else
|
239
|
+
skip_whitespace
|
243
240
|
end
|
244
241
|
end
|
242
|
+
scanner.unscan if tok
|
243
|
+
first
|
245
244
|
end
|
246
245
|
protected
|
247
246
|
|
@@ -253,9 +252,10 @@ module EBNF::LL1
|
|
253
252
|
def skip_whitespace
|
254
253
|
# skip all white space, but keep track of the current line number
|
255
254
|
while !scanner.eos?
|
256
|
-
|
255
|
+
if matched = scanner.scan(@whitespace)
|
257
256
|
@lineno += matched.count("\n")
|
258
|
-
elsif (
|
257
|
+
elsif (scanner.scan(@comment))
|
258
|
+
#
|
259
259
|
else
|
260
260
|
return
|
261
261
|
end
|
@@ -472,7 +472,7 @@ module EBNF::LL1
|
|
472
472
|
#
|
473
473
|
# @return [String]
|
474
474
|
def inspect
|
475
|
-
|
475
|
+
"#{@value.inspect}#{'(' + @type.to_s + ')' if @type}"
|
476
476
|
end
|
477
477
|
end # class Token
|
478
478
|
|
data/lib/ebnf/ll1/parser.rb
CHANGED
@@ -10,7 +10,6 @@ module EBNF::LL1
|
|
10
10
|
DEBUG_LEVEL = 10
|
11
11
|
|
12
12
|
##
|
13
|
-
# @!attribute [r] lineno
|
14
13
|
# @return [Integer] line number of current token
|
15
14
|
attr_reader :lineno
|
16
15
|
|
@@ -20,10 +19,10 @@ module EBNF::LL1
|
|
20
19
|
|
21
20
|
# DSL for creating terminals and productions
|
22
21
|
module ClassMethods
|
23
|
-
def start_handlers;
|
24
|
-
def production_handlers;
|
25
|
-
def terminal_handlers;
|
26
|
-
def patterns;
|
22
|
+
def start_handlers; @start_handlers || {}; end
|
23
|
+
def production_handlers; @production_handlers || {}; end
|
24
|
+
def terminal_handlers; @terminal_handlers || {}; end
|
25
|
+
def patterns; @patterns || []; end
|
27
26
|
|
28
27
|
##
|
29
28
|
# Defines the pattern for a terminal node and a block to be invoked
|
@@ -53,11 +52,11 @@ module EBNF::LL1
|
|
53
52
|
# Block passed to initialization for yielding to calling parser.
|
54
53
|
# Should conform to the yield specs for #initialize
|
55
54
|
def terminal(term, regexp, options = {}, &block)
|
56
|
-
|
55
|
+
@patterns ||= []
|
57
56
|
# Passed in order to define evaulation sequence
|
58
|
-
|
59
|
-
|
60
|
-
|
57
|
+
@patterns << EBNF::LL1::Lexer::Terminal.new(term, regexp, options)
|
58
|
+
@terminal_handlers ||= {}
|
59
|
+
@terminal_handlers[term] = block if block_given?
|
61
60
|
end
|
62
61
|
|
63
62
|
##
|
@@ -80,8 +79,8 @@ module EBNF::LL1
|
|
80
79
|
# Should conform to the yield specs for #initialize
|
81
80
|
# Yield to generate a triple
|
82
81
|
def start_production(term, &block)
|
83
|
-
|
84
|
-
|
82
|
+
@start_handlers ||= {}
|
83
|
+
@start_handlers[term] = block
|
85
84
|
end
|
86
85
|
|
87
86
|
##
|
@@ -105,8 +104,8 @@ module EBNF::LL1
|
|
105
104
|
# Should conform to the yield specs for #initialize
|
106
105
|
# Yield to generate a triple
|
107
106
|
def production(term, &block)
|
108
|
-
|
109
|
-
|
107
|
+
@production_handlers ||= {}
|
108
|
+
@production_handlers[term] = block
|
110
109
|
end
|
111
110
|
|
112
111
|
# Evaluate a handler, delegating to the specified object.
|
@@ -115,15 +114,15 @@ module EBNF::LL1
|
|
115
114
|
# @param [Object] object
|
116
115
|
# @return [Object]
|
117
116
|
def eval_with_binding(object)
|
118
|
-
|
117
|
+
@delegate = object
|
119
118
|
object.instance_eval {yield}
|
120
119
|
end
|
121
120
|
|
122
121
|
private
|
123
122
|
|
124
123
|
def method_missing(method, *args, &block)
|
125
|
-
if
|
126
|
-
|
124
|
+
if @delegate ||= nil
|
125
|
+
@delegate.send method, *args, &block
|
127
126
|
else
|
128
127
|
super
|
129
128
|
end
|
@@ -137,29 +136,40 @@ module EBNF::LL1
|
|
137
136
|
#
|
138
137
|
# @example
|
139
138
|
# require 'rdf/ll1/parser'
|
140
|
-
#
|
139
|
+
#
|
141
140
|
# class MyParser
|
142
141
|
# include EBNF::LL1::Parser
|
143
|
-
#
|
142
|
+
#
|
144
143
|
# branch MyParser::BRANCH
|
145
|
-
#
|
144
|
+
#
|
145
|
+
# ##
|
146
|
+
# # Defines a production called during before parsing a non-terminal
|
147
|
+
# # with data from previous production along with data defined for the
|
148
|
+
# # current production
|
149
|
+
# #
|
150
|
+
# start_production :object do |input, current, callback|
|
151
|
+
# # Note production as triples for blankNodePropertyList
|
152
|
+
# # to set :subject instead of :resource
|
153
|
+
# current[:triples] = true
|
154
|
+
# end
|
155
|
+
#
|
146
156
|
# ##
|
147
|
-
# # Defines a production called during
|
157
|
+
# # Defines a production called during after parsing a non-terminal
|
148
158
|
# # with data from previous production along with data defined for the
|
149
159
|
# # current production
|
150
160
|
# #
|
151
|
-
# #
|
152
|
-
# production :object do |
|
161
|
+
# # callback to processor block
|
162
|
+
# production :object do |input, current, callback|
|
153
163
|
# object = current[:resource]
|
154
|
-
#
|
164
|
+
# callback.call :statement, RDF::Statement.new(input[:subject], input[:predicate], object)
|
155
165
|
# end
|
156
|
-
#
|
166
|
+
#
|
157
167
|
# ##
|
158
168
|
# # Defines the pattern for a terminal node
|
159
|
-
# terminal :BLANK_NODE_LABEL, %r(_:(#{PN_LOCAL})) do |
|
169
|
+
# terminal :BLANK_NODE_LABEL, %r(_:(#{PN_LOCAL})) do |production, token, input|
|
160
170
|
# input[:BLANK_NODE_LABEL] = RDF::Node.new(token)
|
161
171
|
# end
|
162
|
-
#
|
172
|
+
#
|
163
173
|
# ##
|
164
174
|
# # Iterates the given block for each RDF statement in the input.
|
165
175
|
# #
|
@@ -168,7 +178,7 @@ module EBNF::LL1
|
|
168
178
|
# # @return [void]
|
169
179
|
# def each_statement(&block)
|
170
180
|
# @callback = block
|
171
|
-
#
|
181
|
+
#
|
172
182
|
# parse(START.to_sym) do |context, *data|
|
173
183
|
# case context
|
174
184
|
# when :statement
|
@@ -176,11 +186,12 @@ module EBNF::LL1
|
|
176
186
|
# end
|
177
187
|
# end
|
178
188
|
# end
|
179
|
-
#
|
189
|
+
#
|
180
190
|
# end
|
181
191
|
#
|
182
192
|
# @param [String, #to_s] input
|
183
|
-
# @param [Symbol, #to_s]
|
193
|
+
# @param [Symbol, #to_s] start
|
194
|
+
# The starting production for the parser. It may be a URI from the grammar, or a symbol representing the local_name portion of the grammar URI.
|
184
195
|
# @param [Hash{Symbol => Object}] options
|
185
196
|
# @option options [Hash{Symbol,String => Hash{Symbol,String => Array<Symbol,String>}}] :branch LL1 branch table.
|
186
197
|
# @option options [HHash{Symbol,String => Array<Symbol,String>}] :first ({})
|
@@ -202,8 +213,11 @@ module EBNF::LL1
|
|
202
213
|
# @yieldparam [Symbol] *data
|
203
214
|
# Data specific to the call
|
204
215
|
# @return [EBNF::LL1::Parser]
|
216
|
+
# @raise [Exception] Raises exceptions for parsing errors
|
217
|
+
# or errors raised during processing callbacks. Internal
|
218
|
+
# errors are raised using {Error}.
|
205
219
|
# @see http://cs.adelaide.edu.au/~charles/lt/Lectures/07-ErrorRecovery.pdf
|
206
|
-
def parse(input = nil,
|
220
|
+
def parse(input = nil, start = nil, options = {}, &block)
|
207
221
|
@options = options.dup
|
208
222
|
@branch = options[:branch]
|
209
223
|
@first = options[:first] ||= {}
|
@@ -217,110 +231,171 @@ module EBNF::LL1
|
|
217
231
|
|
218
232
|
# Unrecoverable errors
|
219
233
|
raise Error, "Branch table not defined" unless @branch && @branch.length > 0
|
220
|
-
raise Error, "Starting production not defined" unless
|
234
|
+
raise Error, "Starting production not defined" unless start
|
221
235
|
|
222
236
|
@prod_data = [{}]
|
223
|
-
|
224
|
-
todo_stack = [{:prod =>
|
237
|
+
start = start.split('#').last.to_sym unless start.is_a?(Symbol)
|
238
|
+
todo_stack = [{:prod => start, :terms => nil}]
|
225
239
|
|
226
240
|
while !todo_stack.empty?
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
# skipping invalid tokens until either a valid token is found (from @first),
|
243
|
-
# or a token appearing in @follow appears.
|
244
|
-
token = skip_until_valid(todo_stack)
|
245
|
-
|
246
|
-
# At this point, token is either nil, in the first set of the production,
|
247
|
-
# or in the follow set of this production or any previous production
|
248
|
-
debug("parse(production)") do
|
249
|
-
"token #{token ? token.representation.inspect : 'nil'}, " +
|
250
|
-
"prod #{cur_prod.inspect}, " +
|
251
|
-
"depth #{depth}"
|
252
|
-
end
|
241
|
+
begin
|
242
|
+
@recovering = false
|
243
|
+
pushed = false
|
244
|
+
if todo_stack.last[:terms].nil?
|
245
|
+
todo_stack.last[:terms] = []
|
246
|
+
cur_prod = todo_stack.last[:prod]
|
247
|
+
|
248
|
+
# If cur_prod is the starting production, we can reset the stack
|
249
|
+
# to the beginning to avoid excessive growth in the production
|
250
|
+
# stack
|
251
|
+
if options[:reset_on_start] && cur_prod == start
|
252
|
+
todo_stack = [{:prod => start, :terms => []}]
|
253
|
+
@productions = []
|
254
|
+
@prod_data = [{}]
|
255
|
+
end
|
253
256
|
|
254
|
-
|
255
|
-
|
256
|
-
break if token.nil?
|
257
|
+
# Fetch the current token
|
258
|
+
token = get_token(:recover)
|
257
259
|
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
"
|
263
|
-
"
|
264
|
-
"prod_branch #{prod_branch.keys.inspect}, " +
|
265
|
-
"sequence #{sequence.inspect}"
|
260
|
+
# At this point, token is either nil, in the first set of the production,
|
261
|
+
# or in the follow set of this production or any previous production
|
262
|
+
debug("parse(production)") do
|
263
|
+
"token #{token ? token.representation.inspect : 'nil'}, " +
|
264
|
+
"prod #{cur_prod.inspect}, " +
|
265
|
+
"depth #{depth}"
|
266
266
|
end
|
267
267
|
|
268
|
-
|
269
|
-
|
270
|
-
|
268
|
+
# Got an opened production
|
269
|
+
onStart(cur_prod)
|
270
|
+
|
271
|
+
if token.nil?
|
272
|
+
if !(first_include?(cur_prod, :_eps) && follow_include?(cur_prod, :_eof))
|
273
|
+
# End of file, and production does not contain eps, or it does, but follow does not contain eof
|
274
|
+
raise Error.new("Unexpected end of input", :production => cur_prod)
|
271
275
|
else
|
272
|
-
|
273
|
-
|
274
|
-
|
276
|
+
debug("parse(production)") {"End of input prod #{cur_prod.inspect}"}
|
277
|
+
end
|
278
|
+
elsif prod_branch = @branch[cur_prod]
|
279
|
+
sequence = prod_branch.fetch(token.representation) do
|
280
|
+
raise Error.new("#{token.inspect} does not match production #{cur_prod.inspect}",
|
281
|
+
:production => cur_prod)
|
282
|
+
end
|
283
|
+
debug("parse(production)") do
|
284
|
+
"token #{token.representation.inspect} " +
|
285
|
+
"prod #{cur_prod.inspect}, " +
|
286
|
+
"prod_branch #{prod_branch.keys.inspect}, " +
|
287
|
+
"sequence #{sequence.inspect}"
|
275
288
|
end
|
289
|
+
todo_stack.last[:terms] += sequence
|
290
|
+
else
|
291
|
+
raise Error.new("No branches found for #{cur_prod.inspect}",
|
292
|
+
:production => cur_prod, :token => token)
|
276
293
|
end
|
277
|
-
todo_stack.last[:terms] += sequence if sequence
|
278
|
-
else
|
279
|
-
# Is this a fatal error?
|
280
|
-
error("parse(fatal?)", "No branches found for #{cur_prod.inspect}",
|
281
|
-
:production => cur_prod, :token => token)
|
282
294
|
end
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
while !todo_stack.last[:terms].to_a.empty?
|
287
|
-
begin
|
295
|
+
|
296
|
+
debug("parse(terms)") {"todo #{todo_stack.last.inspect}, depth #{depth}"}
|
297
|
+
while !todo_stack.last[:terms].to_a.empty?
|
288
298
|
# Get the next term in this sequence
|
289
299
|
term = todo_stack.last[:terms].shift
|
290
300
|
debug("parse(token)") {"accept #{term.inspect}"}
|
301
|
+
|
291
302
|
if token = accept(term)
|
292
|
-
@recovering = false
|
293
303
|
debug("parse(token)") {"token #{token.inspect}, term #{term.inspect}"}
|
294
|
-
|
295
|
-
elsif terminals.include?(term)
|
304
|
+
onTerminal(term, token)
|
305
|
+
elsif terminals.include?(term)
|
296
306
|
# If term is a terminal, then it is an error if token does not
|
297
307
|
# match it
|
298
|
-
|
308
|
+
raise Error.new("#{get_token.inspect} does not match terminal #{term.inspect}",
|
309
|
+
:production => cur_prod)
|
299
310
|
else
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
311
|
+
token = get_token
|
312
|
+
|
313
|
+
# If token is not in firsts of term, but eps is, skip to next
|
314
|
+
# term
|
315
|
+
if first_include?(term, :_eps) && !first_include?(term, token)
|
316
|
+
debug("parse(token)") {"skip optional term #{term.inspect} on #{token.inspect}"}
|
317
|
+
break
|
318
|
+
else
|
319
|
+
# Push term onto stack
|
320
|
+
todo_stack << {:prod => term, :terms => nil}
|
321
|
+
debug("parse(push)") {"term #{term.inspect}, depth #{depth}"}
|
322
|
+
pushed = true
|
323
|
+
break
|
324
|
+
end
|
305
325
|
end
|
306
326
|
end
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
327
|
+
rescue Lexer::Error, Error => e
|
328
|
+
# Lexer encountered an illegal token or the parser encountered
|
329
|
+
# a terminal which is inappropriate for the current production.
|
330
|
+
# Perform error recovery to find a reasonable terminal based
|
331
|
+
# on the follow sets of the relevant productions. This includes
|
332
|
+
# remaining terms from the current production and the stacked
|
333
|
+
# productions
|
334
|
+
@lineno = e.lineno
|
335
|
+
if e.is_a?(Lexer::Error)
|
336
|
+
# Skip to the next valid terminal
|
337
|
+
@lexer.recover
|
338
|
+
error("parse(#{e.class})", "With input '#{e.input}': #{e.message}, skipped to #{(get_token(:recover) || :eof).inspect}",
|
339
|
+
:production => @productions.last)
|
340
|
+
else
|
341
|
+
# Otherwise, the terminal is fine, just not for this production.
|
342
|
+
@lexer.shift
|
343
|
+
error("parse(#{e.class})", "#{e.message}, skipped to #{(get_token(:recover) || :eof).inspect}",
|
344
|
+
:production => @productions.last, :token => e.token)
|
345
|
+
end
|
346
|
+
|
347
|
+
# Get the list of follows for this sequence, this production and the stacked productions.
|
348
|
+
debug("recovery", "stack follows:", :level => 4)
|
349
|
+
todo_stack.reverse.each do |todo|
|
350
|
+
debug("recovery", :level => 4) {" #{todo[:prod]}: #{@follow[todo[:prod]].inspect}"}
|
351
|
+
end
|
352
|
+
|
353
|
+
# Find all follows to the top of the stack
|
354
|
+
follows = todo_stack.inject([]) do |follow, todo|
|
355
|
+
prod = todo[:prod]
|
356
|
+
follow += @follow[prod] || []
|
357
|
+
end.uniq
|
358
|
+
debug("recovery") {"follows: #{follows.inspect}"}
|
359
|
+
|
360
|
+
# Skip tokens until one is found in follows
|
361
|
+
while (token = get_token(:recover)) && follows.none? {|t| token === t}
|
362
|
+
skipped = @lexer.shift
|
363
|
+
progress("recovery") {"skip #{skipped.inspect}"}
|
364
|
+
end
|
365
|
+
debug("recovery") {"found #{token.inspect} in follows"}
|
366
|
+
|
367
|
+
# Pop stack elements until token is in follows
|
368
|
+
while !todo_stack.empty? &&
|
369
|
+
!follow_include?(todo_stack.last[:prod], token || :_eof)
|
370
|
+
debug("recovery(pop)") {"todo #{todo_stack.last.inspect}, depth #{depth}"}
|
318
371
|
todo_stack.pop
|
319
372
|
onFinish
|
373
|
+
end
|
374
|
+
|
375
|
+
# Token is now in the first of the top production
|
376
|
+
unless todo_stack.empty?
|
377
|
+
todo_stack.pop
|
378
|
+
onFinish
|
379
|
+
end
|
380
|
+
|
381
|
+
if todo_stack.empty?
|
382
|
+
# Recovered to end of last production
|
383
|
+
warn("recover", "recovered to end of productions")
|
320
384
|
else
|
321
|
-
|
322
|
-
|
323
|
-
|
385
|
+
warn("recover", "recovered to #{todo_stack.last[:prod].inspect} with #{token.inspect}")
|
386
|
+
end
|
387
|
+
|
388
|
+
@recovering = false
|
389
|
+
ensure
|
390
|
+
# After completing the last production in a sequence, pop down until we find a production
|
391
|
+
#
|
392
|
+
# If in recovery mode, continue popping until we find a term with a follow list
|
393
|
+
while !pushed &&
|
394
|
+
!todo_stack.empty? &&
|
395
|
+
todo_stack.last.fetch(:terms, []).empty?
|
396
|
+
debug("parse(pop)") {"todo #{todo_stack.last.inspect}, depth #{depth}"}
|
397
|
+
todo_stack.pop
|
398
|
+
onFinish
|
324
399
|
end
|
325
400
|
end
|
326
401
|
end
|
@@ -329,12 +404,10 @@ module EBNF::LL1
|
|
329
404
|
|
330
405
|
# Continue popping contexts off of the stack
|
331
406
|
while !todo_stack.empty?
|
332
|
-
debug("parse(eof)"
|
407
|
+
debug("parse(eof)") {"stack #{todo_stack.last.inspect}, depth #{depth}"}
|
333
408
|
# There can't be anything left to do, or if there is, it must be optional
|
334
409
|
last_terms = todo_stack.last[:terms]
|
335
|
-
if last_terms.length > 0 && last_terms.none? {|t|
|
336
|
-
@first.has_key?(t) && @first[t].include?(:_eps)
|
337
|
-
}
|
410
|
+
if last_terms.length > 0 && last_terms.none? {|t|first_include?(t, :_eps)}
|
338
411
|
error("parse(eof)",
|
339
412
|
"End of input before end of production: stack #{todo_stack.last.inspect}, depth #{depth}"
|
340
413
|
)
|
@@ -342,10 +415,10 @@ module EBNF::LL1
|
|
342
415
|
todo_stack.pop
|
343
416
|
onFinish
|
344
417
|
end
|
345
|
-
|
418
|
+
|
346
419
|
# When all is said and done, raise the error log
|
347
420
|
unless @error_log.empty?
|
348
|
-
raise Error, @error_log.join("\n\t")
|
421
|
+
raise Error, @error_log.join("\n\t")
|
349
422
|
end
|
350
423
|
end
|
351
424
|
|
@@ -369,16 +442,108 @@ module EBNF::LL1
|
|
369
442
|
prod_data[sym] << values
|
370
443
|
end
|
371
444
|
end
|
372
|
-
|
445
|
+
|
373
446
|
# Add values to production data, values aranged as an array
|
374
447
|
def add_prod_data(sym, *values)
|
375
448
|
return if values.compact.empty?
|
376
|
-
|
449
|
+
|
377
450
|
prod_data[sym] ||= []
|
378
451
|
prod_data[sym] += values
|
379
452
|
debug("add_prod_data(#{sym})") {"#{prod_data[sym].inspect} += #{values.inspect}"}
|
380
453
|
end
|
381
|
-
|
454
|
+
|
455
|
+
protected
|
456
|
+
|
457
|
+
##
|
458
|
+
# Error information, used as level `0` debug messages.
|
459
|
+
#
|
460
|
+
# @param [String] node Relevant location associated with message
|
461
|
+
# @param [String] message Error string
|
462
|
+
# @param [Hash] options
|
463
|
+
# @option options [URI, #to_s] :production
|
464
|
+
# @option options [Token] :token
|
465
|
+
# @see {#debug}
|
466
|
+
def error(node, message, options = {})
|
467
|
+
message += ", found #{options[:token].inspect}" if options[:token]
|
468
|
+
message += " at line #{@lineno}" if @lineno
|
469
|
+
message += ", production = #{options[:production].inspect}" if options[:production]
|
470
|
+
@error_log << message unless @recovering
|
471
|
+
@recovering = true
|
472
|
+
debug(node, message, options.merge(:level => 0))
|
473
|
+
end
|
474
|
+
|
475
|
+
##
|
476
|
+
# Warning information, used as level `1` debug messages.
|
477
|
+
#
|
478
|
+
# @param [String] node Relevant location associated with message
|
479
|
+
# @param [String] message Error string
|
480
|
+
# @param [Hash] options
|
481
|
+
# @option options [URI, #to_s] :production
|
482
|
+
# @option options [Token] :token
|
483
|
+
# @see {#debug}
|
484
|
+
def warn(node, message, options = {})
|
485
|
+
message += ", with token #{options[:token].inspect}" if options[:token]
|
486
|
+
message += " at line #{@lineno}" if @lineno
|
487
|
+
message += ", production = #{options[:production].inspect}" if options[:production]
|
488
|
+
@error_log << message unless @recovering
|
489
|
+
debug(node, message, options.merge(:level => 1))
|
490
|
+
end
|
491
|
+
|
492
|
+
##
|
493
|
+
# Progress output when parsing. Passed as level `2` debug messages.
|
494
|
+
#
|
495
|
+
# @overload progress(node, message, options)
|
496
|
+
# @param [String] node Relevant location associated with message
|
497
|
+
# @param [String] message ("")
|
498
|
+
# @param [Hash] options
|
499
|
+
# @option options [Integer] :depth
|
500
|
+
# Recursion depth for indenting output
|
501
|
+
# @see {#debug}
|
502
|
+
def progress(node, *args)
|
503
|
+
return unless @options[:progress] || @options[:debug]
|
504
|
+
options = args.last.is_a?(Hash) ? args.pop : {}
|
505
|
+
message = args.join(",")
|
506
|
+
message += yield.to_s if block_given?
|
507
|
+
debug(node, message, options.merge(:level => 2))
|
508
|
+
end
|
509
|
+
|
510
|
+
##
|
511
|
+
# Progress output when debugging.
|
512
|
+
#
|
513
|
+
# The call is ignored, unless `@options[:debug]` is set, in which
|
514
|
+
# case it yields tracing information as indicated. Additionally,
|
515
|
+
# if `@options[:debug]` is an Integer, the call is aborted if the
|
516
|
+
# `:level` option is less than than `:level`.
|
517
|
+
#
|
518
|
+
# @overload debug(node, message, options)
|
519
|
+
# @param [Array<String>] args Relevant location associated with message
|
520
|
+
# @param [Hash] options
|
521
|
+
# @option options [Integer] :depth
|
522
|
+
# Recursion depth for indenting output
|
523
|
+
# @option options [Integer] :level
|
524
|
+
# Level assigned to message, by convention, level `0` is for
|
525
|
+
# errors, level `1` is for warnings, level `2` is for parser
|
526
|
+
# progress information, and anything higher is for various levels
|
527
|
+
# of debug information.
|
528
|
+
#
|
529
|
+
# @yield trace, level, lineno, depth, args
|
530
|
+
# @yieldparam [:trace] trace
|
531
|
+
# @yieldparam [Integer] level
|
532
|
+
# @yieldparam [Integer] lineno
|
533
|
+
# @yieldparam [Integer] depth Recursive depth of productions
|
534
|
+
# @yieldparam [Array<String>] args
|
535
|
+
# @yieldreturn [String] added to message
|
536
|
+
def debug(*args)
|
537
|
+
return unless @options[:debug] && @parse_callback
|
538
|
+
options = args.last.is_a?(Hash) ? args.pop : {}
|
539
|
+
debug_level = options.fetch(:level, 3)
|
540
|
+
return if @options[:debug].is_a?(Integer) && debug_level > @options[:debug]
|
541
|
+
|
542
|
+
depth = options[:depth] || self.depth
|
543
|
+
args << yield if block_given?
|
544
|
+
@parse_callback.call(:trace, debug_level, @lineno, depth, *args)
|
545
|
+
end
|
546
|
+
|
382
547
|
private
|
383
548
|
# Start for production
|
384
549
|
def onStart(prod)
|
@@ -389,15 +554,20 @@ module EBNF::LL1
|
|
389
554
|
# to customize before pushing on the @prod_data stack
|
390
555
|
progress("#{prod}(:start):#{@prod_data.length}") {@prod_data.last}
|
391
556
|
data = {}
|
392
|
-
|
393
|
-
|
394
|
-
|
557
|
+
begin
|
558
|
+
self.class.eval_with_binding(self) {
|
559
|
+
handler.call(@prod_data.last, data, @parse_callback)
|
560
|
+
}
|
561
|
+
rescue Exception => e
|
562
|
+
error("start", "#{e.class}: #{e.message}", :production => prod)
|
563
|
+
@recovering = false
|
564
|
+
end
|
395
565
|
@prod_data << data
|
396
566
|
else
|
397
567
|
# Make sure we push as many was we pop, even if there is no
|
398
568
|
# explicit start handler
|
399
569
|
@prod_data << {} if self.class.production_handlers[prod]
|
400
|
-
progress("#{prod}(:start)") { get_token.inspect}
|
570
|
+
progress("#{prod}(:start)") { get_token.inspect + (@recovering ? ' recovering' : '')}
|
401
571
|
end
|
402
572
|
#puts "prod_data(s): " + @prod_data.inspect
|
403
573
|
end
|
@@ -410,201 +580,105 @@ module EBNF::LL1
|
|
410
580
|
if handler && !@recovering
|
411
581
|
# Pop production data element from stack, potentially allowing handler to use it
|
412
582
|
data = @prod_data.pop
|
413
|
-
|
414
|
-
|
415
|
-
|
583
|
+
begin
|
584
|
+
self.class.eval_with_binding(self) {
|
585
|
+
handler.call(@prod_data.last, data, @parse_callback)
|
586
|
+
}
|
587
|
+
rescue Exception => e
|
588
|
+
error("finish", "#{e.class}: #{e.message}", :production => prod)
|
589
|
+
@recovering = false
|
590
|
+
end
|
416
591
|
progress("#{prod}(:finish):#{@prod_data.length}") {@prod_data.last}
|
417
592
|
else
|
418
|
-
progress("#{prod}(:finish)"
|
593
|
+
progress("#{prod}(:finish)") { "recovering" if @recovering }
|
419
594
|
end
|
420
595
|
@productions.pop
|
421
596
|
end
|
422
597
|
|
423
|
-
# A
|
424
|
-
def
|
598
|
+
# A terminal
|
599
|
+
def onTerminal(prod, token)
|
425
600
|
unless @productions.empty?
|
426
601
|
parentProd = @productions.last
|
427
602
|
handler = self.class.terminal_handlers[prod]
|
428
603
|
# Allows catch-all for simple string terminals
|
429
604
|
handler ||= self.class.terminal_handlers[nil] if prod.is_a?(String)
|
430
605
|
if handler
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
606
|
+
begin
|
607
|
+
self.class.eval_with_binding(self) {
|
608
|
+
handler.call(parentProd, token, @prod_data.last, @parse_callback)
|
609
|
+
}
|
610
|
+
rescue Exception => e
|
611
|
+
error("terminal", "#{e.class}: #{e.message}", :production => prod)
|
612
|
+
@recovering = false
|
613
|
+
end
|
614
|
+
progress("#{prod}(:terminal)", "", :depth => (depth + 1)) {"#{token}: #{@prod_data.last}"}
|
435
615
|
else
|
436
|
-
progress("#{prod}(:
|
616
|
+
progress("#{prod}(:terminal)", "", :depth => (depth + 1)) {token.to_s}
|
437
617
|
end
|
438
618
|
else
|
439
|
-
error("#{parentProd}(:
|
619
|
+
error("#{parentProd}(:terminal)", "Terminal has no parent production", :production => prod)
|
440
620
|
end
|
441
621
|
end
|
442
|
-
|
443
|
-
|
444
|
-
#
|
445
|
-
# or can follow a production in the stack.
|
622
|
+
|
623
|
+
##
|
624
|
+
# Does first include the specified token
|
446
625
|
#
|
447
|
-
# @
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
return if token.nil? && @follow.fetch(cur_prod, []).include?(:_eof)
|
457
|
-
|
458
|
-
# If this token can be used by the top production, return it
|
459
|
-
# Otherwise, if the banch table allows empty, also return the token
|
460
|
-
return token if !@recovering && (expected.any? {|t| (token || :_eps) === t})
|
461
|
-
|
462
|
-
# Otherwise, it's an error condition, and skip either until
|
463
|
-
# we find a valid token for this production, or until we find
|
464
|
-
# something that can follow this production
|
465
|
-
error("skip_until_valid", "expected one of #{expected.map(&:inspect).join(", ")}, found #{token.inspect}",
|
466
|
-
:production => cur_prod, :token => token)
|
467
|
-
|
468
|
-
debug("recovery", "stack follows:")
|
469
|
-
todo_stack.reverse.each do |todo|
|
470
|
-
debug("recovery") {" #{todo[:prod]}: #{@follow[todo[:prod]].inspect}"}
|
626
|
+
# @param [Symbol] production
|
627
|
+
# @param [Symbol, Lexer::Token] token
|
628
|
+
# A terminal, or symbol or string
|
629
|
+
# @return [Boolean]
|
630
|
+
def first_include?(production, token)
|
631
|
+
if token.is_a?(Lexer::Token)
|
632
|
+
@first.fetch(production, []).any? {|t| token === t}
|
633
|
+
else
|
634
|
+
@first.fetch(production, []).include?(token)
|
471
635
|
end
|
636
|
+
end
|
472
637
|
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
debug("recovery") {"found #{token.inspect} in #{first.include?(token) ? 'first' : 'follows'}"}
|
486
|
-
|
487
|
-
# If the token is a first, just return it. Otherwise, it is a follow
|
488
|
-
# and we need to skip to the end of the production
|
489
|
-
unless first.any? {|t| token == t} || todo_stack.last[:terms].empty?
|
490
|
-
debug("recovery") {"token in follows, skip past #{todo_stack.last[:terms].inspect}"}
|
491
|
-
todo_stack.last[:terms] = []
|
638
|
+
##
|
639
|
+
# Does follow include the specified terminal
|
640
|
+
#
|
641
|
+
# @param [Symbol] production
|
642
|
+
# @param [Symbol, Lexer::Token] token
|
643
|
+
# A terminal, or symbol or string
|
644
|
+
# @return [Boolean]
|
645
|
+
def follow_include?(production, token)
|
646
|
+
if token.is_a?(Lexer::Token)
|
647
|
+
@follow.fetch(production, []).any? {|t| token === t}
|
648
|
+
else
|
649
|
+
@follow.fetch(production, []).include?(token)
|
492
650
|
end
|
493
|
-
token
|
494
651
|
end
|
495
652
|
|
496
653
|
##
|
497
|
-
# Return the next token,
|
654
|
+
# Return the next token, raising an error if the token is invalid
|
498
655
|
#
|
656
|
+
# @param [:recover] recover
|
657
|
+
# Recover from errors and go until next valid token or end of file
|
499
658
|
# @return [Token]
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
rescue EBNF::LL1::Lexer::Error => e
|
504
|
-
# Recover from lexer error
|
505
|
-
@lineno = e.lineno
|
506
|
-
error("get_token", "With input '#{e.input}': #{e.message}",
|
507
|
-
:production => @productions.last)
|
508
|
-
|
509
|
-
# Retrieve next valid token
|
510
|
-
t = @lexer.recover
|
511
|
-
debug("get_token", :level => 2) {"skipped to #{t.inspect}"}
|
512
|
-
t
|
513
|
-
end
|
659
|
+
# @raise [Lexer::Error]
|
660
|
+
def get_token(recover = nil)
|
661
|
+
token = @lexer.first
|
514
662
|
#progress("token") {token.inspect}
|
515
663
|
@lineno = token.lineno if token
|
516
664
|
token
|
517
|
-
|
518
|
-
|
519
|
-
|
520
|
-
|
521
|
-
|
522
|
-
|
523
|
-
# @option options [URI, #to_s] :production
|
524
|
-
# @option options [Token] :token
|
525
|
-
def error(node, message, options = {})
|
526
|
-
message += ", found #{options[:token].representation.inspect}" if options[:token]
|
527
|
-
message += " at line #{@lineno}" if @lineno
|
528
|
-
message += ", production = #{options[:production].inspect}" if options[:production]
|
529
|
-
@error_log << message unless @recovering
|
530
|
-
@recovering = true
|
531
|
-
debug(node, message, options.merge(:level => 0))
|
532
|
-
end
|
533
|
-
|
534
|
-
##
|
535
|
-
# Progress output when parsing
|
536
|
-
# param [String] node Relevant location associated with message
|
537
|
-
# param [String] message ("")
|
538
|
-
# param [Hash] options
|
539
|
-
# option options [Integer] :depth
|
540
|
-
# Recursion depth for indenting output
|
541
|
-
# yieldreturn [String] added to message
|
542
|
-
def progress(node, *args)
|
543
|
-
return unless @options[:progress] || @options[:debug]
|
544
|
-
options = args.last.is_a?(Hash) ? args.pop : {}
|
545
|
-
message = args.join(",")
|
546
|
-
depth = options[:depth] || self.depth
|
547
|
-
message += yield.to_s if block_given?
|
548
|
-
debug(node, message, options.merge(:level => 1))
|
549
|
-
end
|
550
|
-
|
551
|
-
##
|
552
|
-
# Progress output when debugging.
|
553
|
-
# Captures output to `@options[:debug]` if it is an array.
|
554
|
-
# Otherwise, if `@options[:debug]` is set, or
|
555
|
-
# `@options[:progress]` is set and `:level` <= 1, or
|
556
|
-
# `@options[:validate]` is set and `:level` == 0 output
|
557
|
-
# to standard error.
|
558
|
-
#
|
559
|
-
# @overload debug(node, message)
|
560
|
-
# @param [String] node Relevant location associated with message
|
561
|
-
# @param [String] message ("")
|
562
|
-
# @param [Hash] options
|
563
|
-
# @option options [Integer] :depth
|
564
|
-
# Recursion depth for indenting output
|
565
|
-
# @option options [Integer] :level
|
566
|
-
# Debug level, `0` for errors, `1` for progress, anything else
|
567
|
-
# for debug output.
|
568
|
-
#
|
569
|
-
# @overload debug(message)
|
570
|
-
# @param [String] node Relevant location associated with message
|
571
|
-
# @param [Hash] options
|
572
|
-
# @option options [Integer] :depth
|
573
|
-
# Recursion depth for indenting output
|
574
|
-
# @option options [Integer] :level
|
575
|
-
# Debug level, `0` for errors, `1` for progress, anything else
|
576
|
-
# for debug output.
|
577
|
-
# @yieldreturn [String] added to message
|
578
|
-
def debug(*args)
|
579
|
-
options = args.last.is_a?(Hash) ? args.pop : {}
|
580
|
-
debug_level = options.fetch(:level, 2)
|
581
|
-
return unless @options[:debug] && debug_level <= DEBUG_LEVEL ||
|
582
|
-
@options[:progress] && debug_level <= 1 ||
|
583
|
-
@options[:validate] && debug_level == 0
|
584
|
-
depth = options[:depth] || self.depth
|
585
|
-
d_str = depth > 20 ? ' ' * 20 + '+' : ' ' * depth
|
586
|
-
args << yield if block_given?
|
587
|
-
message = "#{args.join(': ')}"
|
588
|
-
str = "[#{@lineno}](#{debug_level})#{d_str}#{message}"
|
589
|
-
@options[:debug] << str if @options[:debug].is_a?(Array)
|
590
|
-
case
|
591
|
-
when @options[:yield]
|
592
|
-
@parse_callback.call(:trace, node, message, options)
|
593
|
-
when @options[:debug] == true
|
594
|
-
$stderr.puts str
|
595
|
-
when @options[:progress] && debug_level <= 1
|
596
|
-
$stderr.puts str
|
597
|
-
when @options[:validate] && debug_level == 0
|
598
|
-
$stderr.puts str
|
665
|
+
rescue Lexer::Error => e
|
666
|
+
if recover
|
667
|
+
# Recover from lexer error so that we can not bail out too early
|
668
|
+
@lexer.recover
|
669
|
+
error("get_token", "With input '#{e.input}': #{e.message}}")
|
670
|
+
retry
|
599
671
|
end
|
672
|
+
raise
|
600
673
|
end
|
601
674
|
|
602
675
|
##
|
603
676
|
# Accept the first token in the input stream if it matches
|
604
|
-
#
|
677
|
+
# `type\_or\_value`. Raise Error, otherwise.
|
605
678
|
#
|
606
679
|
# @param [Symbol, String] type_or_value
|
607
680
|
# @return [Token]
|
681
|
+
# @raise [Error, Lexer::Error]
|
608
682
|
def accept(type_or_value)
|
609
683
|
if (token = get_token) && token === type_or_value
|
610
684
|
debug("accept") {"#{token.inspect} === #{type_or_value.inspect}"}
|
data/lib/ebnf/ll1/scanner.rb
CHANGED
data/lib/ebnf/rule.rb
CHANGED
@@ -10,40 +10,47 @@ module EBNF
|
|
10
10
|
diff hex range
|
11
11
|
}.map(&:to_sym).freeze
|
12
12
|
|
13
|
-
#
|
13
|
+
# Symbol of rule
|
14
|
+
#
|
14
15
|
# @return [Symbol]
|
15
16
|
attr_accessor :sym
|
16
17
|
|
17
|
-
#
|
18
|
+
# ID of rule
|
18
19
|
# @return [String]
|
19
20
|
attr_accessor :id
|
20
21
|
|
21
22
|
# A comprehension is a sequence which contains all elements but the first of the original rule.
|
22
|
-
#
|
23
|
+
#
|
23
24
|
# @return [Rule]
|
24
25
|
attr_accessor :comp
|
25
26
|
|
26
|
-
#
|
27
|
+
# Kind of rule
|
28
|
+
#
|
27
29
|
# @return [:rule, :terminal, or :pass]
|
28
30
|
attr_accessor :kind
|
29
31
|
|
30
|
-
#
|
32
|
+
# Rule expression
|
33
|
+
#
|
31
34
|
# @return [Array]
|
32
35
|
attr_accessor :expr
|
33
36
|
|
34
|
-
#
|
37
|
+
# Original EBNF
|
38
|
+
#
|
35
39
|
# @return [String]
|
36
40
|
attr_accessor :orig
|
37
41
|
|
38
|
-
#
|
42
|
+
# Terminals that immediately procede this rule
|
43
|
+
#
|
39
44
|
# @return [Array<Rule>]
|
40
45
|
attr_reader :first
|
41
46
|
|
42
|
-
#
|
47
|
+
# Terminals that immediately follow this rule
|
48
|
+
#
|
43
49
|
# @return [Array<Rule>]
|
44
50
|
attr_reader :follow
|
45
51
|
|
46
|
-
#
|
52
|
+
# Indicates that this is a starting rule
|
53
|
+
#
|
47
54
|
# @return [Boolean]
|
48
55
|
attr_accessor :start
|
49
56
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ebnf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gregg Kellogg
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-03-
|
11
|
+
date: 2013-03-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: sxp
|