ebnf 1.1.2 → 2.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/README.md +218 -196
- data/UNLICENSE +1 -1
- data/VERSION +1 -1
- data/bin/ebnf +40 -21
- data/etc/abnf-core.ebnf +52 -0
- data/etc/abnf.abnf +121 -0
- data/etc/abnf.ebnf +124 -0
- data/etc/abnf.sxp +45 -0
- data/etc/doap.ttl +23 -15
- data/etc/ebnf.ebnf +21 -33
- data/etc/ebnf.html +171 -160
- data/etc/{ebnf.rb → ebnf.ll1.rb} +30 -107
- data/etc/ebnf.ll1.sxp +182 -183
- data/etc/ebnf.peg.rb +90 -0
- data/etc/ebnf.peg.sxp +84 -0
- data/etc/ebnf.sxp +40 -41
- data/etc/iso-ebnf.ebnf +140 -0
- data/etc/iso-ebnf.isoebnf +138 -0
- data/etc/iso-ebnf.sxp +65 -0
- data/etc/sparql.ebnf +4 -4
- data/etc/sparql.html +1603 -1751
- data/etc/sparql.ll1.sxp +7372 -7372
- data/etc/sparql.peg.rb +532 -0
- data/etc/sparql.peg.sxp +597 -0
- data/etc/sparql.sxp +363 -362
- data/etc/turtle.ebnf +3 -3
- data/etc/turtle.html +465 -517
- data/etc/{turtle.rb → turtle.ll1.rb} +3 -4
- data/etc/turtle.ll1.sxp +425 -425
- data/etc/turtle.peg.rb +182 -0
- data/etc/turtle.peg.sxp +199 -0
- data/etc/turtle.sxp +103 -101
- data/lib/ebnf.rb +7 -2
- data/lib/ebnf/abnf.rb +301 -0
- data/lib/ebnf/abnf/core.rb +23 -0
- data/lib/ebnf/abnf/meta.rb +111 -0
- data/lib/ebnf/base.rb +128 -87
- data/lib/ebnf/bnf.rb +1 -26
- data/lib/ebnf/ebnf/meta.rb +90 -0
- data/lib/ebnf/isoebnf.rb +229 -0
- data/lib/ebnf/isoebnf/meta.rb +75 -0
- data/lib/ebnf/ll1.rb +140 -8
- data/lib/ebnf/ll1/lexer.rb +37 -32
- data/lib/ebnf/ll1/parser.rb +113 -73
- data/lib/ebnf/ll1/scanner.rb +83 -51
- data/lib/ebnf/native.rb +320 -0
- data/lib/ebnf/parser.rb +285 -302
- data/lib/ebnf/peg.rb +39 -0
- data/lib/ebnf/peg/parser.rb +561 -0
- data/lib/ebnf/peg/rule.rb +241 -0
- data/lib/ebnf/rule.rb +453 -163
- data/lib/ebnf/terminals.rb +21 -0
- data/lib/ebnf/writer.rb +561 -88
- metadata +114 -28
- data/etc/sparql.rb +0 -45773
data/lib/ebnf/peg.rb
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
module EBNF
|
2
|
+
module PEG
|
3
|
+
autoload :Parser, 'ebnf/peg/parser'
|
4
|
+
autoload :Rule, 'ebnf/peg/rule'
|
5
|
+
|
6
|
+
##
|
7
|
+
# Transform EBNF Rule set for PEG parsing:
|
8
|
+
#
|
9
|
+
# * Transform each rule into a set of sub-rules extracting unnamed sequences into new rules, using {Rule#to_peg}.
|
10
|
+
# @return [ENBF] self
|
11
|
+
def make_peg
|
12
|
+
progress("make_peg") {"Start: #{@ast.length} rules"}
|
13
|
+
new_ast = []
|
14
|
+
|
15
|
+
ast.each do |rule|
|
16
|
+
debug("make_peg") {"expand from: #{rule.inspect}"}
|
17
|
+
new_rules = rule.to_peg
|
18
|
+
debug(" => ") {new_rules.map(&:sym).join(', ')}
|
19
|
+
new_ast += new_rules
|
20
|
+
end
|
21
|
+
|
22
|
+
@ast = new_ast
|
23
|
+
progress("make_peg") {"End: #{@ast.length} rules"}
|
24
|
+
self
|
25
|
+
end
|
26
|
+
|
27
|
+
##
|
28
|
+
# Output Ruby parser files for PEG parsing
|
29
|
+
#
|
30
|
+
# @param [IO, StringIO] output
|
31
|
+
def to_ruby_peg(output, **options)
|
32
|
+
output.puts " RULES = ["
|
33
|
+
ast.each do |rule|
|
34
|
+
output.puts " " + rule.to_ruby + (rule.is_a?(EBNF::PEG::Rule) ? '.extend(EBNF::PEG::Rule)' : '') + ','
|
35
|
+
end
|
36
|
+
output.puts " ]"
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,561 @@
|
|
1
|
+
module EBNF::PEG
|
2
|
+
##
|
3
|
+
# A Generic PEG parser using the parsed rules modified for PEG parseing.
|
4
|
+
module Parser
|
5
|
+
##
|
6
|
+
# @return [Regexp, Rule] how to remove inter-rule whitespace
|
7
|
+
attr_reader :whitespace
|
8
|
+
|
9
|
+
##
|
10
|
+
# @return [Scanner] used for scanning input.
|
11
|
+
attr_reader :scanner
|
12
|
+
|
13
|
+
##
|
14
|
+
# A Hash structure used for memoizing rule results for a given input location.
|
15
|
+
#
|
16
|
+
# @example Partial structure for memoizing results for a particular rule
|
17
|
+
#
|
18
|
+
# {
|
19
|
+
# rule: {
|
20
|
+
# 86: {
|
21
|
+
# pos:
|
22
|
+
# result: [<EBNF::Rule:80 {
|
23
|
+
# sym: :ebnf,
|
24
|
+
# id: "1",
|
25
|
+
# kind: :rule,
|
26
|
+
# expr: [:star, [:alt, :declaration, :rule]]}>],
|
27
|
+
# }
|
28
|
+
# 131: [<EBNF::Rule:80 {sym: :ebnf,
|
29
|
+
# id: "1",
|
30
|
+
# kind: :rule,
|
31
|
+
# expr: [:star, [:alt, :declaration, :rule]]}>,
|
32
|
+
# <EBNF::Rule:100 {
|
33
|
+
# sym: :declaration,
|
34
|
+
# id: "2",
|
35
|
+
# kind: :rule,
|
36
|
+
# expr: [:alt, "@terminals", :pass]}>]
|
37
|
+
# },
|
38
|
+
# POSTFIX: {
|
39
|
+
# 80: "*",
|
40
|
+
# 368: "*",
|
41
|
+
# 399: "+"
|
42
|
+
# }
|
43
|
+
# }
|
44
|
+
# @return [Hash{Integer => Hash{Symbol => Object}}]
|
45
|
+
attr_reader :packrat
|
46
|
+
|
47
|
+
def self.included(base)
|
48
|
+
base.extend(ClassMethods)
|
49
|
+
end
|
50
|
+
|
51
|
+
# DSL for creating terminals and productions
|
52
|
+
module ClassMethods
|
53
|
+
def start_handlers; (@start_handlers ||= {}); end
|
54
|
+
def start_options; (@start_hoptions ||= {}); end
|
55
|
+
def production_handlers; (@production_handlers ||= {}); end
|
56
|
+
def terminal_handlers; (@terminal_handlers ||= {}); end
|
57
|
+
def terminal_regexps; (@terminal_regexps ||= {}); end
|
58
|
+
|
59
|
+
##
|
60
|
+
# Defines the pattern for a terminal node and a block to be invoked
|
61
|
+
# when ther terminal is encountered. If the block is missing, the
|
62
|
+
# value of the terminal will be placed on the input hash to be returned
|
63
|
+
# to a previous production. Block is called in an evaluation block from
|
64
|
+
# the enclosing parser.
|
65
|
+
#
|
66
|
+
# If no block is provided, then the value which would have been passed to the block is used as the result directly.
|
67
|
+
#
|
68
|
+
# @param [Symbol] term
|
69
|
+
# The terminal name.
|
70
|
+
# @param [Regexp] regexp (nil)
|
71
|
+
# Pattern used to scan for this terminal,
|
72
|
+
# defaults to the expression defined in the associated rule.
|
73
|
+
# If unset, the terminal rule is used for matching.
|
74
|
+
# @param [Hash] options
|
75
|
+
# @option options [Hash{String => String}] :map ({})
|
76
|
+
# A mapping from terminals, in lower-case form, to
|
77
|
+
# their canonical value
|
78
|
+
# @yield [value, prod]
|
79
|
+
# @yieldparam [String] value
|
80
|
+
# The scanned terminal value.
|
81
|
+
# @yieldparam [Symbol] prod
|
82
|
+
# A symbol indicating the production which referenced this terminal
|
83
|
+
# @yieldparam [Proc] block
|
84
|
+
# Block passed to initialization for yielding to calling parser.
|
85
|
+
# Should conform to the yield specs for #initialize
|
86
|
+
def terminal(term, regexp = nil, **options, &block)
|
87
|
+
terminal_regexps[term] = regexp if regexp
|
88
|
+
terminal_handlers[term] = block if block_given?
|
89
|
+
end
|
90
|
+
|
91
|
+
##
|
92
|
+
# Defines a production called at the beggining of a particular production
|
93
|
+
# with data from previous production along with data defined for the
|
94
|
+
# current production. Block is called in an evaluation block from
|
95
|
+
# the enclosing parser.
|
96
|
+
#
|
97
|
+
# @param [Symbol] term
|
98
|
+
# The rule name
|
99
|
+
# @param [Hash{Symbol => Object}] options
|
100
|
+
# Options which are returned from {Parser#onStart}.
|
101
|
+
# @option options [Boolean] :as_hash (false)
|
102
|
+
# If the production is a `seq`, causes the value to be represented as a single hash, rather than an array of individual hashes for each sub-production. Note that this is not always advisable due to the possibility of repeated productions within the sequence.
|
103
|
+
# @yield [data, block]
|
104
|
+
# @yieldparam [Hash] data
|
105
|
+
# A Hash defined for the current production, during :start
|
106
|
+
# may be initialized with data to pass to further productions,
|
107
|
+
# during :finish, it contains data placed by earlier productions
|
108
|
+
# @yieldparam [Proc] block
|
109
|
+
# Block passed to initialization for yielding to calling parser.
|
110
|
+
# Should conform to the yield specs for #initialize
|
111
|
+
# Yield to generate a triple
|
112
|
+
def start_production(term, **options, &block)
|
113
|
+
start_handlers[term] = block
|
114
|
+
start_options[term] = options.freeze
|
115
|
+
end
|
116
|
+
|
117
|
+
##
|
118
|
+
# Defines a production called when production of associated
|
119
|
+
# non-terminals has completed
|
120
|
+
# with data from previous production along with data defined for the
|
121
|
+
# current production. Block is called in an evaluation block from
|
122
|
+
# the enclosing parser.
|
123
|
+
#
|
124
|
+
# @param [Symbol] term
|
125
|
+
# Term which is a key in the branch table
|
126
|
+
# @param [Boolean] clear_packrat (false)
|
127
|
+
# Clears the packrat state on completion to reduce memory requirements of parser. Use only on a top-level rule when it is determined that no further backtracking is necessary.
|
128
|
+
# @yield [result, data, block]
|
129
|
+
# @yieldparam [Object] result
|
130
|
+
# The result from sucessfully parsing the production.
|
131
|
+
# @yieldparam [Hash] data
|
132
|
+
# A Hash defined for the current production, during :start
|
133
|
+
# may be initialized with data to pass to further productions,
|
134
|
+
# during :finish, it contains data placed by earlier productions
|
135
|
+
# @yieldparam [Proc] block
|
136
|
+
# Block passed to initialization for yielding to calling parser.
|
137
|
+
# Should conform to the yield specs for #initialize
|
138
|
+
# @yieldreturn [Object] the result of this production.
|
139
|
+
# Yield to generate a triple
|
140
|
+
def production(term, clear_packrat: false, &block)
|
141
|
+
production_handlers[term] = [block, clear_packrat]
|
142
|
+
end
|
143
|
+
|
144
|
+
# Evaluate a handler, delegating to the specified object.
|
145
|
+
# This is necessary so that handlers can operate within the
|
146
|
+
# binding context of the parser in which they're invoked.
|
147
|
+
# @param [Object] object
|
148
|
+
# @return [Object]
|
149
|
+
def eval_with_binding(object)
|
150
|
+
@delegate = object
|
151
|
+
object.instance_eval {yield}
|
152
|
+
end
|
153
|
+
|
154
|
+
private
|
155
|
+
|
156
|
+
def method_missing(method, *args, &block)
|
157
|
+
if @delegate ||= nil
|
158
|
+
# special handling when last arg is **options
|
159
|
+
params = @delegate.method(method).parameters
|
160
|
+
if params.any? {|t, _| t == :keyrest} && args.last.is_a?(Hash)
|
161
|
+
opts = args.pop
|
162
|
+
@delegate.send(method, *args, **opts, &block)
|
163
|
+
else
|
164
|
+
@delegate.send(method, *args, &block)
|
165
|
+
end
|
166
|
+
else
|
167
|
+
super
|
168
|
+
end
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
##
|
173
|
+
# Initializes a new parser instance.
|
174
|
+
#
|
175
|
+
# @param [String, #to_s] input
|
176
|
+
# @param [Symbol, #to_s] start
|
177
|
+
# The starting production for the parser. It may be a URI from the grammar, or a symbol representing the local_name portion of the grammar URI.
|
178
|
+
# @param [Array<EBNF::PEG::Rule>] rules
|
179
|
+
# The parsed rules, which control parsing sequence.
|
180
|
+
# Identify the symbol of the starting rule with `start`.
|
181
|
+
# @param [Hash{Symbol => Object}] options
|
182
|
+
# @option options[Integer] :high_water passed to lexer
|
183
|
+
# @option options [Logger] :logger for errors/progress/debug.
|
184
|
+
# @option options[Integer] :low_water passed to lexer
|
185
|
+
# @option options [Symbol, Regexp] :whitespace
|
186
|
+
# Symbol of whitespace rule (defaults to `@pass`), or a regular expression
|
187
|
+
# for eating whitespace between non-terminal rules (strongly encouraged).
|
188
|
+
# @yield [context, *data]
|
189
|
+
# Yields to return data to parser
|
190
|
+
# @yieldparam [:statement, :trace] context
|
191
|
+
# Context for block
|
192
|
+
# @yieldparam [Symbol] *data
|
193
|
+
# Data specific to the call
|
194
|
+
# @return [Object] AST resulting from parse
|
195
|
+
# @raise [Exception] Raises exceptions for parsing errors
|
196
|
+
# or errors raised during processing callbacks. Internal
|
197
|
+
# errors are raised using {Error}.
|
198
|
+
def parse(input = nil, start = nil, rules = nil, **options, &block)
|
199
|
+
start ||= options[:start]
|
200
|
+
rules ||= options[:rules] || []
|
201
|
+
@rules = rules.inject({}) {|memo, rule| memo.merge(rule.sym => rule)}
|
202
|
+
@packrat = {}
|
203
|
+
|
204
|
+
# Add parser reference to each rule
|
205
|
+
@rules.each_value {|rule| rule.parser = self}
|
206
|
+
|
207
|
+
# Take whitespace from options, a named rule, a `pass` rule, a rule named :WS, or a default
|
208
|
+
@whitespace = case options[:whitespace]
|
209
|
+
when Regexp then options[:whitespace]
|
210
|
+
when Symbol then @rules[options[:whitespace]]
|
211
|
+
else options[:whitespace]
|
212
|
+
end ||
|
213
|
+
@rules.values.detect(&:pass?) ||
|
214
|
+
/(?:\s|(?:#[^x][^\n\r]*))+/m.freeze
|
215
|
+
|
216
|
+
@options = options.dup
|
217
|
+
@productions = []
|
218
|
+
@parse_callback = block
|
219
|
+
@error_log = []
|
220
|
+
@prod_data = []
|
221
|
+
|
222
|
+
@scanner = EBNF::LL1::Scanner.new(input)
|
223
|
+
start = start.split('#').last.to_sym unless start.is_a?(Symbol)
|
224
|
+
start_rule = @rules[start]
|
225
|
+
raise Error, "Starting production #{start.inspect} not defined" unless start_rule
|
226
|
+
|
227
|
+
result = start_rule.parse(scanner)
|
228
|
+
if result == :unmatched
|
229
|
+
# Start rule wasn't matched, which is about the only error condition
|
230
|
+
error("--top--", @furthest_failure.to_s,
|
231
|
+
pos: @furthest_failure.pos,
|
232
|
+
lineno: @furthest_failure.lineno,
|
233
|
+
rest: scanner.string[@furthest_failure.pos, 20])
|
234
|
+
end
|
235
|
+
|
236
|
+
# Eat any remaining whitespace
|
237
|
+
start_rule.eat_whitespace(scanner)
|
238
|
+
if !scanner.eos?
|
239
|
+
error("--top--", @furthest_failure.to_s,
|
240
|
+
pos: @furthest_failure.pos,
|
241
|
+
lineno: @furthest_failure.lineno,
|
242
|
+
rest: scanner.string[@furthest_failure.pos, 20])
|
243
|
+
end
|
244
|
+
|
245
|
+
# When all is said and done, raise the error log
|
246
|
+
unless @error_log.empty?
|
247
|
+
raise Error, @error_log.join("\n")
|
248
|
+
end
|
249
|
+
|
250
|
+
result
|
251
|
+
end
|
252
|
+
|
253
|
+
# Depth of parsing, for log output.
|
254
|
+
def depth; (@productions || []).length; end
|
255
|
+
|
256
|
+
# Current ProdData element
|
257
|
+
def prod_data; @prod_data.last || {}; end
|
258
|
+
|
259
|
+
# Clear out packrat memoizer. This is appropriate when completing a top-level rule when there is no possibility of backtracking.
|
260
|
+
def clear_packrat; @packrat.clear; end
|
261
|
+
|
262
|
+
##
|
263
|
+
# Error information, used as level `3` logger messages.
|
264
|
+
# Messages may be logged and are saved for reporting at end of parsing.
|
265
|
+
#
|
266
|
+
# @param [String] node Relevant location associated with message
|
267
|
+
# @param [String] message Error string
|
268
|
+
# @param [Hash{Symbol => Object}] options
|
269
|
+
# @option options [URI, #to_s] :production
|
270
|
+
# @option options [Boolean] :raise abort furhter processing
|
271
|
+
# @option options [Array] :backtrace state where error occured
|
272
|
+
# @see #debug
|
273
|
+
def error(node, message, **options)
|
274
|
+
lineno = options[:lineno] || (scanner.lineno if scanner)
|
275
|
+
m = "ERROR "
|
276
|
+
m += "[line: #{lineno}] " if lineno
|
277
|
+
m += message
|
278
|
+
m += " (found #{options[:rest].inspect})" if options[:rest]
|
279
|
+
m += ", production = #{options[:production].inspect}" if options[:production]
|
280
|
+
@error_log << m unless @recovering
|
281
|
+
@recovering = true
|
282
|
+
debug(node, m, level: 3, **options)
|
283
|
+
if options[:raise] || @options[:validate]
|
284
|
+
raise Error.new(m,
|
285
|
+
lineno: lineno,
|
286
|
+
rest: options[:rest],
|
287
|
+
production: options[:production],
|
288
|
+
backtrace: options[:backtrace])
|
289
|
+
end
|
290
|
+
end
|
291
|
+
|
292
|
+
##
|
293
|
+
# Warning information, used as level `2` logger messages.
|
294
|
+
# Messages may be logged and are saved for reporting at end of parsing.
|
295
|
+
#
|
296
|
+
# @param [String] node Relevant location associated with message
|
297
|
+
# @param [String] message Error string
|
298
|
+
# @param [Hash] options
|
299
|
+
# @option options [URI, #to_s] :production
|
300
|
+
# @option options [Token] :token
|
301
|
+
# @see #debug
|
302
|
+
def warn(node, message, **options)
|
303
|
+
lineno = options[:lineno] || (scanner.lineno if scanner)
|
304
|
+
m = "WARNING "
|
305
|
+
m += "[line: #{lineno}] " if lineno
|
306
|
+
m += message
|
307
|
+
m += " (found #{options[:rest].inspect})" if options[:rest]
|
308
|
+
m += ", production = #{options[:production].inspect}" if options[:production]
|
309
|
+
debug(node, m, level: 2, **options)
|
310
|
+
end
|
311
|
+
|
312
|
+
##
|
313
|
+
# Progress logged when parsing. Passed as level `1` logger messages.
|
314
|
+
#
|
315
|
+
# The call is ignored, unless `@options[:logger]` is set.
|
316
|
+
#
|
317
|
+
# @overload progress(node, message, **options, &block)
|
318
|
+
# @param [String] node Relevant location associated with message
|
319
|
+
# @param [String] message ("")
|
320
|
+
# @param [Hash] options
|
321
|
+
# @option options [Integer] :depth
|
322
|
+
# Recursion depth for indenting output
|
323
|
+
# @see #debug
|
324
|
+
def progress(node, *args, &block)
|
325
|
+
return unless @options[:logger]
|
326
|
+
args << {} unless args.last.is_a?(Hash)
|
327
|
+
args.last[:level] ||= 1
|
328
|
+
debug(node, *args, &block)
|
329
|
+
end
|
330
|
+
|
331
|
+
##
|
332
|
+
# Debug logging.
|
333
|
+
#
|
334
|
+
# The call is ignored, unless `@options[:logger]` is set.
|
335
|
+
#
|
336
|
+
# @overload debug(node, message, **options)
|
337
|
+
# @param [Array<String>] args Relevant location associated with message
|
338
|
+
# @param [Hash] options
|
339
|
+
# @option options [Integer] :depth
|
340
|
+
# Recursion depth for indenting output
|
341
|
+
# @yieldreturn [String] additional string appended to `message`.
|
342
|
+
def debug(*args, &block)
|
343
|
+
return unless @options[:logger]
|
344
|
+
options = args.last.is_a?(Hash) ? args.pop : {}
|
345
|
+
lineno = options[:lineno] || (scanner.lineno if scanner)
|
346
|
+
level = options.fetch(:level, 0)
|
347
|
+
depth = options[:depth] || self.depth
|
348
|
+
|
349
|
+
if self.respond_to?(:log_debug)
|
350
|
+
level = [:debug, :info, :warn, :error, :fatal][level]
|
351
|
+
log_debug(*args, **options.merge(level: level, lineno: lineno, depth: depth), &block)
|
352
|
+
elsif @options[:logger].respond_to?(:add)
|
353
|
+
args << yield if block_given?
|
354
|
+
@options[:logger].add(level, "[#{lineno}]" + (" " * depth) + args.join(" "))
|
355
|
+
elsif @options[:logger].respond_to?(:<<)
|
356
|
+
args << yield if block_given?
|
357
|
+
@options[:logger] << "[#{lineno}]" + (" " * depth) + args.join(" ")
|
358
|
+
end
|
359
|
+
end
|
360
|
+
|
361
|
+
# Start for production
|
362
|
+
# Adds data avoiable during the processing of the production
|
363
|
+
#
|
364
|
+
# @return [Hash] composed of production options. Currently only `as_hash` is supported.
|
365
|
+
# @see ClassMethods#start_production
|
366
|
+
def onStart(prod)
|
367
|
+
handler = self.class.start_handlers[prod]
|
368
|
+
@productions << prod
|
369
|
+
debug("#{prod}(:start)", "",
|
370
|
+
lineno: (scanner.lineno if scanner),
|
371
|
+
pos: (scanner.pos if scanner)
|
372
|
+
) do
|
373
|
+
"#{prod}, pos: #{scanner ? scanner.pos : '?'}, rest: #{scanner ? scanner.rest[0..20].inspect : '?'}"
|
374
|
+
end
|
375
|
+
if handler
|
376
|
+
# Create a new production data element, potentially allowing handler
|
377
|
+
# to customize before pushing on the @prod_data stack
|
378
|
+
data = {_production: prod}
|
379
|
+
begin
|
380
|
+
self.class.eval_with_binding(self) {
|
381
|
+
handler.call(data, @parse_callback)
|
382
|
+
}
|
383
|
+
rescue ArgumentError, Error => e
|
384
|
+
error("start", "#{e.class}: #{e.message}", production: prod, backtrace: e.backtrace)
|
385
|
+
@recovering = false
|
386
|
+
end
|
387
|
+
@prod_data << data
|
388
|
+
elsif self.class.production_handlers[prod]
|
389
|
+
# Make sure we push as many was we pop, even if there is no
|
390
|
+
# explicit start handler
|
391
|
+
@prod_data << {_production: prod}
|
392
|
+
end
|
393
|
+
return self.class.start_options.fetch(prod, {}) # any options on this production
|
394
|
+
end
|
395
|
+
|
396
|
+
# Finish of production
|
397
|
+
#
|
398
|
+
# @param [Object] result parse result
|
399
|
+
# @return [Object] parse result, or the value returned from the handler
|
400
|
+
def onFinish(result)
|
401
|
+
#puts "prod_data(f): " + @prod_data.inspect
|
402
|
+
prod = @productions.last
|
403
|
+
handler, clear_packrat = self.class.production_handlers[prod]
|
404
|
+
data = @prod_data.pop if handler || self.class.start_handlers[prod]
|
405
|
+
error("finish",
|
406
|
+
"prod_data production mismatch: expected #{prod.inspect}, got #{data[:_production].inspect}",
|
407
|
+
production: prod, prod_data: @prod_data) if data && prod != data[:_production]
|
408
|
+
if handler && !@recovering && result != :unmatched
|
409
|
+
# Pop production data element from stack, potentially allowing handler to use it
|
410
|
+
result = begin
|
411
|
+
self.class.eval_with_binding(self) {
|
412
|
+
handler.call(result, data, @parse_callback)
|
413
|
+
}
|
414
|
+
rescue ArgumentError, Error => e
|
415
|
+
error("finish", "#{e.class}: #{e.message}", production: prod, backtrace: e.backtrace)
|
416
|
+
@recovering = false
|
417
|
+
end
|
418
|
+
end
|
419
|
+
debug("#{prod}(:finish)", "",
|
420
|
+
lineno: (scanner.lineno if scanner),
|
421
|
+
level: result == :unmatched ? 0 : 1) do
|
422
|
+
"#{result.inspect}@(#{scanner ? scanner.pos : '?'}), rest: #{scanner ? scanner.rest[0..20].inspect : '?'}"
|
423
|
+
end
|
424
|
+
self.clear_packrat if clear_packrat
|
425
|
+
@productions.pop
|
426
|
+
result
|
427
|
+
end
|
428
|
+
|
429
|
+
# A terminal with a defined handler
|
430
|
+
#
|
431
|
+
# @param [Symbol] prod from the symbol of the associated rule
|
432
|
+
# @param [String] value the scanned string
|
433
|
+
# @return [String, Object] either the result from the handler, or the token
|
434
|
+
def onTerminal(prod, value)
|
435
|
+
parentProd = @productions.last
|
436
|
+
handler = self.class.terminal_handlers[prod]
|
437
|
+
if handler && value != :unmatched
|
438
|
+
value = begin
|
439
|
+
self.class.eval_with_binding(self) {
|
440
|
+
handler.call(value, parentProd, @parse_callback)
|
441
|
+
}
|
442
|
+
rescue ArgumentError, Error => e
|
443
|
+
error("terminal", "#{e.class}: #{e.message}", value: value, production: prod, backtrace: e.backtrace)
|
444
|
+
@recovering = false
|
445
|
+
end
|
446
|
+
end
|
447
|
+
progress("#{prod}(:terminal)", "",
|
448
|
+
depth: (depth + 1),
|
449
|
+
lineno: (scanner.lineno if scanner),
|
450
|
+
level: value == :unmatched ? 0 : 1) do
|
451
|
+
"#{value.inspect}@(#{scanner ? scanner.pos : '?'})"
|
452
|
+
end
|
453
|
+
value
|
454
|
+
end
|
455
|
+
|
456
|
+
##
|
457
|
+
# Find a rule for a symbol
|
458
|
+
#
|
459
|
+
# @param [Symbol] sym
|
460
|
+
# @return [Rule]
|
461
|
+
def find_rule(sym)
|
462
|
+
@rules[sym]
|
463
|
+
end
|
464
|
+
|
465
|
+
##
|
466
|
+
# Find a regular expression defined for a terminal
|
467
|
+
#
|
468
|
+
# @param [Symbol] sym
|
469
|
+
# @return [Regexp]
|
470
|
+
def find_terminal_regexp(sym)
|
471
|
+
self.class.terminal_regexps[sym]
|
472
|
+
end
|
473
|
+
|
474
|
+
##
|
475
|
+
# Record furthest failure.
|
476
|
+
#
|
477
|
+
# @param [Integer] pos
|
478
|
+
# The position in the input stream where the failure occured.
|
479
|
+
# @param [Integer] lineno
|
480
|
+
# Line where the failure occured.
|
481
|
+
# @param [Symbol, String] token
|
482
|
+
# The terminal token or string which attempted to match.
|
483
|
+
# @see https://arxiv.org/pdf/1405.6646.pdf
|
484
|
+
def update_furthest_failure(pos, lineno, token)
|
485
|
+
# Skip generated productions
|
486
|
+
return if token.is_a?(Symbol) && token.to_s.start_with?('_')
|
487
|
+
if @furthest_failure.nil? || pos > @furthest_failure.pos
|
488
|
+
@furthest_failure = Unmatched.new(pos, lineno, [token])
|
489
|
+
elsif pos == @furthest_failure.pos && !@furthest_failure[:expecting].include?(token)
|
490
|
+
@furthest_failure[:expecting] << token
|
491
|
+
end
|
492
|
+
end
|
493
|
+
|
494
|
+
public
|
495
|
+
|
496
|
+
##
|
497
|
+
# @!parse
|
498
|
+
# # Record details about an inmatched rule, including the following:
|
499
|
+
# #
|
500
|
+
# # * Input location and line number at time of failure.
|
501
|
+
# # * The rule at which this was found (non-terminal, and nat starting with '_').
|
502
|
+
# class Unmatched
|
503
|
+
# # @return [Integer] The position within the scanner which did not match.
|
504
|
+
# attr_reader :pos
|
505
|
+
# # @return [Integer] The line number which did not match.
|
506
|
+
# attr_reader :lineno
|
507
|
+
# # @return [Array<Symbol,String>]
|
508
|
+
# # Strings or production rules that attempted to match at this position.
|
509
|
+
# attr_reader :expecting
|
510
|
+
# end
|
511
|
+
class Unmatched < Struct.new(:pos, :lineno, :expecting)
|
512
|
+
def to_s
|
513
|
+
"syntax error, expecting #{expecting.map(&:inspect).join(', ')}"
|
514
|
+
end
|
515
|
+
end
|
516
|
+
|
517
|
+
##
|
518
|
+
# Raised for errors during parsing.
|
519
|
+
#
|
520
|
+
# @example Raising a parser error
|
521
|
+
# raise Error.new(
|
522
|
+
# "invalid token '%' on line 10",
|
523
|
+
# rest: '%', lineno: 9, production: :turtleDoc)
|
524
|
+
#
|
525
|
+
# @see https://ruby-doc.org/core/classes/StandardError.html
|
526
|
+
class Error < StandardError
|
527
|
+
##
|
528
|
+
# The current production.
|
529
|
+
#
|
530
|
+
# @return [Symbol]
|
531
|
+
attr_reader :production
|
532
|
+
|
533
|
+
##
|
534
|
+
# The read head when scanning failed
|
535
|
+
#
|
536
|
+
# @return [String]
|
537
|
+
attr_reader :rest
|
538
|
+
|
539
|
+
##
|
540
|
+
# The line number where the error occurred.
|
541
|
+
#
|
542
|
+
# @return [Integer]
|
543
|
+
attr_reader :lineno
|
544
|
+
|
545
|
+
##
|
546
|
+
# Initializes a new lexer error instance.
|
547
|
+
#
|
548
|
+
# @param [String, #to_s] message
|
549
|
+
# @param [Hash{Symbol => Object}] options
|
550
|
+
# @option options [Symbol] :production (nil)
|
551
|
+
# @option options [String] :rest (nil)
|
552
|
+
# @option options [Integer] :lineno (nil)
|
553
|
+
def initialize(message, **options)
|
554
|
+
@production = options[:production]
|
555
|
+
@rest = options[:rest]
|
556
|
+
@lineno = options[:lineno]
|
557
|
+
super(message.to_s)
|
558
|
+
end
|
559
|
+
end # class Error
|
560
|
+
end # class Parser
|
561
|
+
end # module EBNF::LL1
|