ebnf 1.2.0 → 2.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +223 -199
- data/UNLICENSE +1 -1
- data/VERSION +1 -1
- data/bin/ebnf +38 -19
- data/etc/abnf-core.ebnf +52 -0
- data/etc/abnf.abnf +121 -0
- data/etc/abnf.ebnf +124 -0
- data/etc/abnf.sxp +45 -0
- data/etc/doap.ttl +23 -18
- data/etc/ebnf.ebnf +21 -33
- data/etc/ebnf.html +76 -160
- data/etc/{ebnf.rb → ebnf.ll1.rb} +30 -107
- data/etc/ebnf.ll1.sxp +182 -183
- data/etc/ebnf.peg.rb +90 -0
- data/etc/ebnf.peg.sxp +84 -0
- data/etc/ebnf.sxp +40 -41
- data/etc/iso-ebnf.ebnf +140 -0
- data/etc/iso-ebnf.isoebnf +138 -0
- data/etc/iso-ebnf.sxp +65 -0
- data/etc/sparql.ebnf +4 -4
- data/etc/sparql.html +1603 -1751
- data/etc/sparql.ll1.sxp +7372 -7372
- data/etc/sparql.peg.rb +532 -0
- data/etc/sparql.peg.sxp +597 -0
- data/etc/sparql.sxp +363 -362
- data/etc/turtle.ebnf +3 -3
- data/etc/turtle.html +465 -517
- data/etc/{turtle.rb → turtle.ll1.rb} +3 -4
- data/etc/turtle.ll1.sxp +425 -425
- data/etc/turtle.peg.rb +182 -0
- data/etc/turtle.peg.sxp +199 -0
- data/etc/turtle.sxp +103 -101
- data/lib/ebnf.rb +6 -1
- data/lib/ebnf/abnf.rb +301 -0
- data/lib/ebnf/abnf/core.rb +23 -0
- data/lib/ebnf/abnf/meta.rb +111 -0
- data/lib/ebnf/base.rb +114 -69
- data/lib/ebnf/bnf.rb +1 -26
- data/lib/ebnf/ebnf/meta.rb +90 -0
- data/lib/ebnf/isoebnf.rb +229 -0
- data/lib/ebnf/isoebnf/meta.rb +75 -0
- data/lib/ebnf/ll1.rb +131 -3
- data/lib/ebnf/ll1/lexer.rb +20 -22
- data/lib/ebnf/ll1/parser.rb +97 -64
- data/lib/ebnf/ll1/scanner.rb +82 -50
- data/lib/ebnf/native.rb +320 -0
- data/lib/ebnf/parser.rb +285 -302
- data/lib/ebnf/peg.rb +39 -0
- data/lib/ebnf/peg/parser.rb +561 -0
- data/lib/ebnf/peg/rule.rb +250 -0
- data/lib/ebnf/rule.rb +442 -148
- data/lib/ebnf/terminals.rb +21 -0
- data/lib/ebnf/writer.rb +587 -82
- metadata +125 -18
- data/etc/sparql.rb +0 -45773
data/lib/ebnf/peg.rb
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
module EBNF
|
2
|
+
module PEG
|
3
|
+
autoload :Parser, 'ebnf/peg/parser'
|
4
|
+
autoload :Rule, 'ebnf/peg/rule'
|
5
|
+
|
6
|
+
##
|
7
|
+
# Transform EBNF Rule set for PEG parsing:
|
8
|
+
#
|
9
|
+
# * Transform each rule into a set of sub-rules extracting unnamed sequences into new rules, using {Rule#to_peg}.
|
10
|
+
# @return [ENBF] self
|
11
|
+
def make_peg
|
12
|
+
progress("make_peg") {"Start: #{@ast.length} rules"}
|
13
|
+
new_ast = []
|
14
|
+
|
15
|
+
ast.each do |rule|
|
16
|
+
debug("make_peg") {"expand from: #{rule.inspect}"}
|
17
|
+
new_rules = rule.to_peg
|
18
|
+
debug(" => ") {new_rules.map(&:sym).join(', ')}
|
19
|
+
new_ast += new_rules
|
20
|
+
end
|
21
|
+
|
22
|
+
@ast = new_ast
|
23
|
+
progress("make_peg") {"End: #{@ast.length} rules"}
|
24
|
+
self
|
25
|
+
end
|
26
|
+
|
27
|
+
##
|
28
|
+
# Output Ruby parser files for PEG parsing
|
29
|
+
#
|
30
|
+
# @param [IO, StringIO] output
|
31
|
+
def to_ruby_peg(output, **options)
|
32
|
+
output.puts " RULES = ["
|
33
|
+
ast.each do |rule|
|
34
|
+
output.puts " " + rule.to_ruby + (rule.is_a?(EBNF::PEG::Rule) ? '.extend(EBNF::PEG::Rule)' : '') + ','
|
35
|
+
end
|
36
|
+
output.puts " ]"
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,561 @@
|
|
1
|
+
module EBNF::PEG
|
2
|
+
##
|
3
|
+
# A Generic PEG parser using the parsed rules modified for PEG parseing.
|
4
|
+
module Parser
|
5
|
+
##
|
6
|
+
# @return [Regexp, Rule] how to remove inter-rule whitespace
|
7
|
+
attr_reader :whitespace
|
8
|
+
|
9
|
+
##
|
10
|
+
# @return [Scanner] used for scanning input.
|
11
|
+
attr_reader :scanner
|
12
|
+
|
13
|
+
##
|
14
|
+
# A Hash structure used for memoizing rule results for a given input location.
|
15
|
+
#
|
16
|
+
# @example Partial structure for memoizing results for a particular rule
|
17
|
+
#
|
18
|
+
# {
|
19
|
+
# rule: {
|
20
|
+
# 86: {
|
21
|
+
# pos:
|
22
|
+
# result: [<EBNF::Rule:80 {
|
23
|
+
# sym: :ebnf,
|
24
|
+
# id: "1",
|
25
|
+
# kind: :rule,
|
26
|
+
# expr: [:star, [:alt, :declaration, :rule]]}>],
|
27
|
+
# }
|
28
|
+
# 131: [<EBNF::Rule:80 {sym: :ebnf,
|
29
|
+
# id: "1",
|
30
|
+
# kind: :rule,
|
31
|
+
# expr: [:star, [:alt, :declaration, :rule]]}>,
|
32
|
+
# <EBNF::Rule:100 {
|
33
|
+
# sym: :declaration,
|
34
|
+
# id: "2",
|
35
|
+
# kind: :rule,
|
36
|
+
# expr: [:alt, "@terminals", :pass]}>]
|
37
|
+
# },
|
38
|
+
# POSTFIX: {
|
39
|
+
# 80: "*",
|
40
|
+
# 368: "*",
|
41
|
+
# 399: "+"
|
42
|
+
# }
|
43
|
+
# }
|
44
|
+
# @return [Hash{Integer => Hash{Symbol => Object}}]
|
45
|
+
attr_reader :packrat
|
46
|
+
|
47
|
+
def self.included(base)
|
48
|
+
base.extend(ClassMethods)
|
49
|
+
end
|
50
|
+
|
51
|
+
# DSL for creating terminals and productions
|
52
|
+
module ClassMethods
|
53
|
+
def start_handlers; (@start_handlers ||= {}); end
|
54
|
+
def start_options; (@start_hoptions ||= {}); end
|
55
|
+
def production_handlers; (@production_handlers ||= {}); end
|
56
|
+
def terminal_handlers; (@terminal_handlers ||= {}); end
|
57
|
+
def terminal_regexps; (@terminal_regexps ||= {}); end
|
58
|
+
|
59
|
+
##
|
60
|
+
# Defines the pattern for a terminal node and a block to be invoked
|
61
|
+
# when ther terminal is encountered. If the block is missing, the
|
62
|
+
# value of the terminal will be placed on the input hash to be returned
|
63
|
+
# to a previous production. Block is called in an evaluation block from
|
64
|
+
# the enclosing parser.
|
65
|
+
#
|
66
|
+
# If no block is provided, then the value which would have been passed to the block is used as the result directly.
|
67
|
+
#
|
68
|
+
# @param [Symbol] term
|
69
|
+
# The terminal name.
|
70
|
+
# @param [Regexp] regexp (nil)
|
71
|
+
# Pattern used to scan for this terminal,
|
72
|
+
# defaults to the expression defined in the associated rule.
|
73
|
+
# If unset, the terminal rule is used for matching.
|
74
|
+
# @param [Hash] options
|
75
|
+
# @option options [Hash{String => String}] :map ({})
|
76
|
+
# A mapping from terminals, in lower-case form, to
|
77
|
+
# their canonical value
|
78
|
+
# @yield [value, prod]
|
79
|
+
# @yieldparam [String] value
|
80
|
+
# The scanned terminal value.
|
81
|
+
# @yieldparam [Symbol] prod
|
82
|
+
# A symbol indicating the production which referenced this terminal
|
83
|
+
# @yieldparam [Proc] block
|
84
|
+
# Block passed to initialization for yielding to calling parser.
|
85
|
+
# Should conform to the yield specs for #initialize
|
86
|
+
def terminal(term, regexp = nil, **options, &block)
|
87
|
+
terminal_regexps[term] = regexp if regexp
|
88
|
+
terminal_handlers[term] = block if block_given?
|
89
|
+
end
|
90
|
+
|
91
|
+
##
|
92
|
+
# Defines a production called at the beggining of a particular production
|
93
|
+
# with data from previous production along with data defined for the
|
94
|
+
# current production. Block is called in an evaluation block from
|
95
|
+
# the enclosing parser.
|
96
|
+
#
|
97
|
+
# @param [Symbol] term
|
98
|
+
# The rule name
|
99
|
+
# @param [Hash{Symbol => Object}] options
|
100
|
+
# Options which are returned from {Parser#onStart}.
|
101
|
+
# @option options [Boolean] :as_hash (false)
|
102
|
+
# If the production is a `seq`, causes the value to be represented as a single hash, rather than an array of individual hashes for each sub-production. Note that this is not always advisable due to the possibility of repeated productions within the sequence.
|
103
|
+
# @yield [data, block]
|
104
|
+
# @yieldparam [Hash] data
|
105
|
+
# A Hash defined for the current production, during :start
|
106
|
+
# may be initialized with data to pass to further productions,
|
107
|
+
# during :finish, it contains data placed by earlier productions
|
108
|
+
# @yieldparam [Proc] block
|
109
|
+
# Block passed to initialization for yielding to calling parser.
|
110
|
+
# Should conform to the yield specs for #initialize
|
111
|
+
# Yield to generate a triple
|
112
|
+
def start_production(term, **options, &block)
|
113
|
+
start_handlers[term] = block
|
114
|
+
start_options[term] = options.freeze
|
115
|
+
end
|
116
|
+
|
117
|
+
##
|
118
|
+
# Defines a production called when production of associated
|
119
|
+
# non-terminals has completed
|
120
|
+
# with data from previous production along with data defined for the
|
121
|
+
# current production. Block is called in an evaluation block from
|
122
|
+
# the enclosing parser.
|
123
|
+
#
|
124
|
+
# @param [Symbol] term
|
125
|
+
# Term which is a key in the branch table
|
126
|
+
# @param [Boolean] clear_packrat (false)
|
127
|
+
# Clears the packrat state on completion to reduce memory requirements of parser. Use only on a top-level rule when it is determined that no further backtracking is necessary.
|
128
|
+
# @yield [result, data, block]
|
129
|
+
# @yieldparam [Object] result
|
130
|
+
# The result from sucessfully parsing the production.
|
131
|
+
# @yieldparam [Hash] data
|
132
|
+
# A Hash defined for the current production, during :start
|
133
|
+
# may be initialized with data to pass to further productions,
|
134
|
+
# during :finish, it contains data placed by earlier productions
|
135
|
+
# @yieldparam [Proc] block
|
136
|
+
# Block passed to initialization for yielding to calling parser.
|
137
|
+
# Should conform to the yield specs for #initialize
|
138
|
+
# @yieldreturn [Object] the result of this production.
|
139
|
+
# Yield to generate a triple
|
140
|
+
def production(term, clear_packrat: false, &block)
|
141
|
+
production_handlers[term] = [block, clear_packrat]
|
142
|
+
end
|
143
|
+
|
144
|
+
# Evaluate a handler, delegating to the specified object.
|
145
|
+
# This is necessary so that handlers can operate within the
|
146
|
+
# binding context of the parser in which they're invoked.
|
147
|
+
# @param [Object] object
|
148
|
+
# @return [Object]
|
149
|
+
def eval_with_binding(object)
|
150
|
+
@delegate = object
|
151
|
+
object.instance_eval {yield}
|
152
|
+
end
|
153
|
+
|
154
|
+
private
|
155
|
+
|
156
|
+
def method_missing(method, *args, &block)
|
157
|
+
if @delegate ||= nil
|
158
|
+
# special handling when last arg is **options
|
159
|
+
params = @delegate.method(method).parameters
|
160
|
+
if params.any? {|t, _| t == :keyrest} && args.last.is_a?(Hash)
|
161
|
+
opts = args.pop
|
162
|
+
@delegate.send(method, *args, **opts, &block)
|
163
|
+
else
|
164
|
+
@delegate.send(method, *args, &block)
|
165
|
+
end
|
166
|
+
else
|
167
|
+
super
|
168
|
+
end
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
##
|
173
|
+
# Initializes a new parser instance.
|
174
|
+
#
|
175
|
+
# @param [String, #to_s] input
|
176
|
+
# @param [Symbol, #to_s] start
|
177
|
+
# The starting production for the parser. It may be a URI from the grammar, or a symbol representing the local_name portion of the grammar URI.
|
178
|
+
# @param [Array<EBNF::PEG::Rule>] rules
|
179
|
+
# The parsed rules, which control parsing sequence.
|
180
|
+
# Identify the symbol of the starting rule with `start`.
|
181
|
+
# @param [Hash{Symbol => Object}] options
|
182
|
+
# @option options[Integer] :high_water passed to lexer
|
183
|
+
# @option options [Logger] :logger for errors/progress/debug.
|
184
|
+
# @option options[Integer] :low_water passed to lexer
|
185
|
+
# @option options [Symbol, Regexp] :whitespace
|
186
|
+
# Symbol of whitespace rule (defaults to `@pass`), or a regular expression
|
187
|
+
# for eating whitespace between non-terminal rules (strongly encouraged).
|
188
|
+
# @yield [context, *data]
|
189
|
+
# Yields to return data to parser
|
190
|
+
# @yieldparam [:statement, :trace] context
|
191
|
+
# Context for block
|
192
|
+
# @yieldparam [Symbol] *data
|
193
|
+
# Data specific to the call
|
194
|
+
# @return [Object] AST resulting from parse
|
195
|
+
# @raise [Exception] Raises exceptions for parsing errors
|
196
|
+
# or errors raised during processing callbacks. Internal
|
197
|
+
# errors are raised using {Error}.
|
198
|
+
def parse(input = nil, start = nil, rules = nil, **options, &block)
|
199
|
+
start ||= options[:start]
|
200
|
+
rules ||= options[:rules] || []
|
201
|
+
@rules = rules.inject({}) {|memo, rule| memo.merge(rule.sym => rule)}
|
202
|
+
@packrat = {}
|
203
|
+
|
204
|
+
# Add parser reference to each rule
|
205
|
+
@rules.each_value {|rule| rule.parser = self}
|
206
|
+
|
207
|
+
# Take whitespace from options, a named rule, a `pass` rule, a rule named :WS, or a default
|
208
|
+
@whitespace = case options[:whitespace]
|
209
|
+
when Regexp then options[:whitespace]
|
210
|
+
when Symbol then @rules[options[:whitespace]]
|
211
|
+
else options[:whitespace]
|
212
|
+
end ||
|
213
|
+
@rules.values.detect(&:pass?) ||
|
214
|
+
/(?:\s|(?:#[^x][^\n\r]*))+/m.freeze
|
215
|
+
|
216
|
+
@options = options.dup
|
217
|
+
@productions = []
|
218
|
+
@parse_callback = block
|
219
|
+
@error_log = []
|
220
|
+
@prod_data = []
|
221
|
+
|
222
|
+
@scanner = EBNF::LL1::Scanner.new(input)
|
223
|
+
start = start.split('#').last.to_sym unless start.is_a?(Symbol)
|
224
|
+
start_rule = @rules[start]
|
225
|
+
raise Error, "Starting production #{start.inspect} not defined" unless start_rule
|
226
|
+
|
227
|
+
result = start_rule.parse(scanner)
|
228
|
+
if result == :unmatched
|
229
|
+
# Start rule wasn't matched, which is about the only error condition
|
230
|
+
error("--top--", @furthest_failure.to_s,
|
231
|
+
pos: @furthest_failure.pos,
|
232
|
+
lineno: @furthest_failure.lineno,
|
233
|
+
rest: scanner.string[@furthest_failure.pos, 20])
|
234
|
+
end
|
235
|
+
|
236
|
+
# Eat any remaining whitespace
|
237
|
+
start_rule.eat_whitespace(scanner)
|
238
|
+
if !scanner.eos?
|
239
|
+
error("--top--", @furthest_failure.to_s,
|
240
|
+
pos: @furthest_failure.pos,
|
241
|
+
lineno: @furthest_failure.lineno,
|
242
|
+
rest: scanner.string[@furthest_failure.pos, 20])
|
243
|
+
end
|
244
|
+
|
245
|
+
# When all is said and done, raise the error log
|
246
|
+
unless @error_log.empty?
|
247
|
+
raise Error, @error_log.join("\n")
|
248
|
+
end
|
249
|
+
|
250
|
+
result
|
251
|
+
end
|
252
|
+
|
253
|
+
# Depth of parsing, for log output.
|
254
|
+
def depth; (@productions || []).length; end
|
255
|
+
|
256
|
+
# Current ProdData element
|
257
|
+
def prod_data; @prod_data.last || {}; end
|
258
|
+
|
259
|
+
# Clear out packrat memoizer. This is appropriate when completing a top-level rule when there is no possibility of backtracking.
|
260
|
+
def clear_packrat; @packrat.clear; end
|
261
|
+
|
262
|
+
##
|
263
|
+
# Error information, used as level `3` logger messages.
|
264
|
+
# Messages may be logged and are saved for reporting at end of parsing.
|
265
|
+
#
|
266
|
+
# @param [String] node Relevant location associated with message
|
267
|
+
# @param [String] message Error string
|
268
|
+
# @param [Hash{Symbol => Object}] options
|
269
|
+
# @option options [URI, #to_s] :production
|
270
|
+
# @option options [Boolean] :raise abort furhter processing
|
271
|
+
# @option options [Array] :backtrace state where error occured
|
272
|
+
# @see #debug
|
273
|
+
def error(node, message, **options)
|
274
|
+
lineno = options[:lineno] || (scanner.lineno if scanner)
|
275
|
+
m = "ERROR "
|
276
|
+
m += "[line: #{lineno}] " if lineno
|
277
|
+
m += message
|
278
|
+
m += " (found #{options[:rest].inspect})" if options[:rest]
|
279
|
+
m += ", production = #{options[:production].inspect}" if options[:production]
|
280
|
+
@error_log << m unless @recovering
|
281
|
+
@recovering = true
|
282
|
+
debug(node, m, level: 3, **options)
|
283
|
+
if options[:raise] || @options[:validate]
|
284
|
+
raise Error.new(m,
|
285
|
+
lineno: lineno,
|
286
|
+
rest: options[:rest],
|
287
|
+
production: options[:production],
|
288
|
+
backtrace: options[:backtrace])
|
289
|
+
end
|
290
|
+
end
|
291
|
+
|
292
|
+
##
|
293
|
+
# Warning information, used as level `2` logger messages.
|
294
|
+
# Messages may be logged and are saved for reporting at end of parsing.
|
295
|
+
#
|
296
|
+
# @param [String] node Relevant location associated with message
|
297
|
+
# @param [String] message Error string
|
298
|
+
# @param [Hash] options
|
299
|
+
# @option options [URI, #to_s] :production
|
300
|
+
# @option options [Token] :token
|
301
|
+
# @see #debug
|
302
|
+
def warn(node, message, **options)
|
303
|
+
lineno = options[:lineno] || (scanner.lineno if scanner)
|
304
|
+
m = "WARNING "
|
305
|
+
m += "[line: #{lineno}] " if lineno
|
306
|
+
m += message
|
307
|
+
m += " (found #{options[:rest].inspect})" if options[:rest]
|
308
|
+
m += ", production = #{options[:production].inspect}" if options[:production]
|
309
|
+
debug(node, m, level: 2, **options)
|
310
|
+
end
|
311
|
+
|
312
|
+
##
|
313
|
+
# Progress logged when parsing. Passed as level `1` logger messages.
|
314
|
+
#
|
315
|
+
# The call is ignored, unless `@options[:logger]` is set.
|
316
|
+
#
|
317
|
+
# @overload progress(node, message, **options, &block)
|
318
|
+
# @param [String] node Relevant location associated with message
|
319
|
+
# @param [String] message ("")
|
320
|
+
# @param [Hash] options
|
321
|
+
# @option options [Integer] :depth
|
322
|
+
# Recursion depth for indenting output
|
323
|
+
# @see #debug
|
324
|
+
def progress(node, *args, &block)
|
325
|
+
return unless @options[:logger]
|
326
|
+
args << {} unless args.last.is_a?(Hash)
|
327
|
+
args.last[:level] ||= 1
|
328
|
+
debug(node, *args, &block)
|
329
|
+
end
|
330
|
+
|
331
|
+
##
|
332
|
+
# Debug logging.
|
333
|
+
#
|
334
|
+
# The call is ignored, unless `@options[:logger]` is set.
|
335
|
+
#
|
336
|
+
# @overload debug(node, message, **options)
|
337
|
+
# @param [Array<String>] args Relevant location associated with message
|
338
|
+
# @param [Hash] options
|
339
|
+
# @option options [Integer] :depth
|
340
|
+
# Recursion depth for indenting output
|
341
|
+
# @yieldreturn [String] additional string appended to `message`.
|
342
|
+
def debug(*args, &block)
|
343
|
+
return unless @options[:logger]
|
344
|
+
options = args.last.is_a?(Hash) ? args.pop : {}
|
345
|
+
lineno = options[:lineno] || (scanner.lineno if scanner)
|
346
|
+
level = options.fetch(:level, 0)
|
347
|
+
depth = options[:depth] || self.depth
|
348
|
+
|
349
|
+
if self.respond_to?(:log_debug)
|
350
|
+
level = [:debug, :info, :warn, :error, :fatal][level]
|
351
|
+
log_debug(*args, **options.merge(level: level, lineno: lineno, depth: depth), &block)
|
352
|
+
elsif @options[:logger].respond_to?(:add)
|
353
|
+
args << yield if block_given?
|
354
|
+
@options[:logger].add(level, "[#{lineno}]" + (" " * depth) + args.join(" "))
|
355
|
+
elsif @options[:logger].respond_to?(:<<)
|
356
|
+
args << yield if block_given?
|
357
|
+
@options[:logger] << "[#{lineno}]" + (" " * depth) + args.join(" ")
|
358
|
+
end
|
359
|
+
end
|
360
|
+
|
361
|
+
# Start for production
|
362
|
+
# Adds data avoiable during the processing of the production
|
363
|
+
#
|
364
|
+
# @return [Hash] composed of production options. Currently only `as_hash` is supported.
|
365
|
+
# @see ClassMethods#start_production
|
366
|
+
def onStart(prod)
|
367
|
+
handler = self.class.start_handlers[prod]
|
368
|
+
@productions << prod
|
369
|
+
debug("#{prod}(:start)", "",
|
370
|
+
lineno: (scanner.lineno if scanner),
|
371
|
+
pos: (scanner.pos if scanner)
|
372
|
+
) do
|
373
|
+
"#{prod}, pos: #{scanner ? scanner.pos : '?'}, rest: #{scanner ? scanner.rest[0..20].inspect : '?'}"
|
374
|
+
end
|
375
|
+
if handler
|
376
|
+
# Create a new production data element, potentially allowing handler
|
377
|
+
# to customize before pushing on the @prod_data stack
|
378
|
+
data = {_production: prod}
|
379
|
+
begin
|
380
|
+
self.class.eval_with_binding(self) {
|
381
|
+
handler.call(data, @parse_callback)
|
382
|
+
}
|
383
|
+
rescue ArgumentError, Error => e
|
384
|
+
error("start", "#{e.class}: #{e.message}", production: prod, backtrace: e.backtrace)
|
385
|
+
@recovering = false
|
386
|
+
end
|
387
|
+
@prod_data << data
|
388
|
+
elsif self.class.production_handlers[prod]
|
389
|
+
# Make sure we push as many was we pop, even if there is no
|
390
|
+
# explicit start handler
|
391
|
+
@prod_data << {_production: prod}
|
392
|
+
end
|
393
|
+
return self.class.start_options.fetch(prod, {}) # any options on this production
|
394
|
+
end
|
395
|
+
|
396
|
+
# Finish of production
|
397
|
+
#
|
398
|
+
# @param [Object] result parse result
|
399
|
+
# @return [Object] parse result, or the value returned from the handler
|
400
|
+
def onFinish(result)
|
401
|
+
#puts "prod_data(f): " + @prod_data.inspect
|
402
|
+
prod = @productions.last
|
403
|
+
handler, clear_packrat = self.class.production_handlers[prod]
|
404
|
+
data = @prod_data.pop if handler || self.class.start_handlers[prod]
|
405
|
+
error("finish",
|
406
|
+
"prod_data production mismatch: expected #{prod.inspect}, got #{data[:_production].inspect}",
|
407
|
+
production: prod, prod_data: @prod_data) if data && prod != data[:_production]
|
408
|
+
if handler && !@recovering && result != :unmatched
|
409
|
+
# Pop production data element from stack, potentially allowing handler to use it
|
410
|
+
result = begin
|
411
|
+
self.class.eval_with_binding(self) {
|
412
|
+
handler.call(result, data, @parse_callback)
|
413
|
+
}
|
414
|
+
rescue ArgumentError, Error => e
|
415
|
+
error("finish", "#{e.class}: #{e.message}", production: prod, backtrace: e.backtrace)
|
416
|
+
@recovering = false
|
417
|
+
end
|
418
|
+
end
|
419
|
+
debug("#{prod}(:finish)", "",
|
420
|
+
lineno: (scanner.lineno if scanner),
|
421
|
+
level: result == :unmatched ? 0 : 1) do
|
422
|
+
"#{result.inspect}@(#{scanner ? scanner.pos : '?'}), rest: #{scanner ? scanner.rest[0..20].inspect : '?'}"
|
423
|
+
end
|
424
|
+
self.clear_packrat if clear_packrat
|
425
|
+
@productions.pop
|
426
|
+
result
|
427
|
+
end
|
428
|
+
|
429
|
+
# A terminal with a defined handler
|
430
|
+
#
|
431
|
+
# @param [Symbol] prod from the symbol of the associated rule
|
432
|
+
# @param [String] value the scanned string
|
433
|
+
# @return [String, Object] either the result from the handler, or the token
|
434
|
+
def onTerminal(prod, value)
|
435
|
+
parentProd = @productions.last
|
436
|
+
handler = self.class.terminal_handlers[prod]
|
437
|
+
if handler && value != :unmatched
|
438
|
+
value = begin
|
439
|
+
self.class.eval_with_binding(self) {
|
440
|
+
handler.call(value, parentProd, @parse_callback)
|
441
|
+
}
|
442
|
+
rescue ArgumentError, Error => e
|
443
|
+
error("terminal", "#{e.class}: #{e.message}", value: value, production: prod, backtrace: e.backtrace)
|
444
|
+
@recovering = false
|
445
|
+
end
|
446
|
+
end
|
447
|
+
progress("#{prod}(:terminal)", "",
|
448
|
+
depth: (depth + 1),
|
449
|
+
lineno: (scanner.lineno if scanner),
|
450
|
+
level: value == :unmatched ? 0 : 1) do
|
451
|
+
"#{value.inspect}@(#{scanner ? scanner.pos : '?'})"
|
452
|
+
end
|
453
|
+
value
|
454
|
+
end
|
455
|
+
|
456
|
+
##
|
457
|
+
# Find a rule for a symbol
|
458
|
+
#
|
459
|
+
# @param [Symbol] sym
|
460
|
+
# @return [Rule]
|
461
|
+
def find_rule(sym)
|
462
|
+
@rules[sym]
|
463
|
+
end
|
464
|
+
|
465
|
+
##
|
466
|
+
# Find a regular expression defined for a terminal
|
467
|
+
#
|
468
|
+
# @param [Symbol] sym
|
469
|
+
# @return [Regexp]
|
470
|
+
def find_terminal_regexp(sym)
|
471
|
+
self.class.terminal_regexps[sym]
|
472
|
+
end
|
473
|
+
|
474
|
+
##
|
475
|
+
# Record furthest failure.
|
476
|
+
#
|
477
|
+
# @param [Integer] pos
|
478
|
+
# The position in the input stream where the failure occured.
|
479
|
+
# @param [Integer] lineno
|
480
|
+
# Line where the failure occured.
|
481
|
+
# @param [Symbol, String] token
|
482
|
+
# The terminal token or string which attempted to match.
|
483
|
+
# @see https://arxiv.org/pdf/1405.6646.pdf
|
484
|
+
def update_furthest_failure(pos, lineno, token)
|
485
|
+
# Skip generated productions
|
486
|
+
return if token.is_a?(Symbol) && token.to_s.start_with?('_')
|
487
|
+
if @furthest_failure.nil? || pos > @furthest_failure.pos
|
488
|
+
@furthest_failure = Unmatched.new(pos, lineno, [token])
|
489
|
+
elsif pos == @furthest_failure.pos && !@furthest_failure[:expecting].include?(token)
|
490
|
+
@furthest_failure[:expecting] << token
|
491
|
+
end
|
492
|
+
end
|
493
|
+
|
494
|
+
public
|
495
|
+
|
496
|
+
##
|
497
|
+
# @!parse
|
498
|
+
# # Record details about an inmatched rule, including the following:
|
499
|
+
# #
|
500
|
+
# # * Input location and line number at time of failure.
|
501
|
+
# # * The rule at which this was found (non-terminal, and nat starting with '_').
|
502
|
+
# class Unmatched
|
503
|
+
# # @return [Integer] The position within the scanner which did not match.
|
504
|
+
# attr_reader :pos
|
505
|
+
# # @return [Integer] The line number which did not match.
|
506
|
+
# attr_reader :lineno
|
507
|
+
# # @return [Array<Symbol,String>]
|
508
|
+
# # Strings or production rules that attempted to match at this position.
|
509
|
+
# attr_reader :expecting
|
510
|
+
# end
|
511
|
+
class Unmatched < Struct.new(:pos, :lineno, :expecting)
|
512
|
+
def to_s
|
513
|
+
"syntax error, expecting #{expecting.map(&:inspect).join(', ')}"
|
514
|
+
end
|
515
|
+
end
|
516
|
+
|
517
|
+
##
|
518
|
+
# Raised for errors during parsing.
|
519
|
+
#
|
520
|
+
# @example Raising a parser error
|
521
|
+
# raise Error.new(
|
522
|
+
# "invalid token '%' on line 10",
|
523
|
+
# rest: '%', lineno: 9, production: :turtleDoc)
|
524
|
+
#
|
525
|
+
# @see https://ruby-doc.org/core/classes/StandardError.html
|
526
|
+
class Error < StandardError
|
527
|
+
##
|
528
|
+
# The current production.
|
529
|
+
#
|
530
|
+
# @return [Symbol]
|
531
|
+
attr_reader :production
|
532
|
+
|
533
|
+
##
|
534
|
+
# The read head when scanning failed
|
535
|
+
#
|
536
|
+
# @return [String]
|
537
|
+
attr_reader :rest
|
538
|
+
|
539
|
+
##
|
540
|
+
# The line number where the error occurred.
|
541
|
+
#
|
542
|
+
# @return [Integer]
|
543
|
+
attr_reader :lineno
|
544
|
+
|
545
|
+
##
|
546
|
+
# Initializes a new lexer error instance.
|
547
|
+
#
|
548
|
+
# @param [String, #to_s] message
|
549
|
+
# @param [Hash{Symbol => Object}] options
|
550
|
+
# @option options [Symbol] :production (nil)
|
551
|
+
# @option options [String] :rest (nil)
|
552
|
+
# @option options [Integer] :lineno (nil)
|
553
|
+
def initialize(message, **options)
|
554
|
+
@production = options[:production]
|
555
|
+
@rest = options[:rest]
|
556
|
+
@lineno = options[:lineno]
|
557
|
+
super(message.to_s)
|
558
|
+
end
|
559
|
+
end # class Error
|
560
|
+
end # class Parser
|
561
|
+
end # module EBNF::LL1
|