ebnf 0.0.1 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +21 -7
- data/VERSION +1 -1
- data/bin/ebnf +73 -16
- data/etc/{ebnf.bnf → ebnf.ebnf} +2 -2
- data/etc/ebnf.ll1 +1010 -0
- data/etc/turtle.ebnf +70 -0
- data/etc/turtle.ll1 +1565 -0
- data/etc/turtle.rb +1375 -0
- data/lib/ebnf.rb +16 -1023
- data/lib/ebnf/base.rb +266 -0
- data/lib/ebnf/bnf.rb +50 -0
- data/lib/ebnf/ll1.rb +321 -0
- data/lib/ebnf/ll1/lexer.rb +11 -11
- data/lib/ebnf/ll1/parser.rb +28 -32
- data/lib/ebnf/ll1/scanner.rb +1 -1
- data/lib/ebnf/parser.rb +297 -0
- data/lib/ebnf/rule.rb +362 -0
- metadata +12 -3
data/lib/ebnf.rb
CHANGED
@@ -1,1029 +1,22 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
# to allow for coloquial variations (such as in the Turtle syntax).
|
9
|
-
#
|
10
|
-
# A rule takes the following form:
|
11
|
-
# \[1\] symbol ::= expression
|
12
|
-
#
|
13
|
-
# Comments include the content between '/*' and '*/'
|
14
|
-
#
|
15
|
-
# @see http://www.w3.org/2000/10/swap/grammar/ebnf2turtle.py
|
16
|
-
# @see http://www.w3.org/2000/10/swap/grammar/ebnf2bnf.n3
|
17
|
-
#
|
18
|
-
# Based on bnf2turtle by Dan Connolly.
|
19
|
-
#
|
20
|
-
# Motivation
|
21
|
-
# ----------
|
22
|
-
#
|
23
|
-
# Many specifications include grammars that look formal but are not
|
24
|
-
# actually checked, by machine, against test data sets. Debugging the
|
25
|
-
# grammar in the XML specification has been a long, tedious manual
|
26
|
-
# process. Only when the loop is closed between a fully formal grammar
|
27
|
-
# and a large test data set can we be confident that we have an accurate
|
28
|
-
# specification of a language (and even then, only the syntax of the language).
|
29
|
-
#
|
30
|
-
#
|
31
|
-
# The grammar in the [N3 design note][] has evolved based on the original
|
32
|
-
# manual transcription into a python recursive-descent parser and
|
33
|
-
# subsequent development of test cases. Rather than maintain the grammar
|
34
|
-
# and the parser independently, our [goal] is to formalize the language
|
35
|
-
# syntax sufficiently to replace the manual implementation with one
|
36
|
-
# derived mechanically from the specification.
|
37
|
-
#
|
38
|
-
#
|
39
|
-
# [N3 design note]: http://www.w3.org/DesignIssues/Notation3
|
40
|
-
#
|
41
|
-
# Related Work
|
42
|
-
# ------------
|
43
|
-
#
|
44
|
-
# Sean Palmer's [n3p announcement][] demonstrated the feasibility of the
|
45
|
-
# approach, though that work did not cover some aspects of N3.
|
46
|
-
#
|
47
|
-
# In development of the [SPARQL specification][], Eric Prud'hommeaux
|
48
|
-
# developed [Yacker][], which converts EBNF syntax to perl and C and C++
|
49
|
-
# yacc grammars. It includes an interactive facility for checking
|
50
|
-
# strings against the resulting grammars.
|
51
|
-
# Yosi Scharf used it in [cwm Release 1.1.0rc1][], which includes
|
52
|
-
# a SPAQRL parser that is *almost* completely mechanically generated.
|
53
|
-
#
|
54
|
-
# The N3/turtle output from yacker is lower level than the EBNF notation
|
55
|
-
# from the XML specification; it has the ?, +, and * operators compiled
|
56
|
-
# down to pure context-free rules, obscuring the grammar
|
57
|
-
# structure. Since that transformation is straightforwardly expressed in
|
58
|
-
# semantic web rules (see [bnf-rules.n3][]), it seems best to keep the RDF
|
59
|
-
# expression of the grammar in terms of the higher level EBNF
|
60
|
-
# constructs.
|
61
|
-
#
|
62
|
-
# [goal]: http://www.w3.org/2002/02/mid/1086902566.21030.1479.camel@dirk;list=public-cwm-bugs
|
63
|
-
# [n3p announcement]: http://lists.w3.org/Archives/Public/public-cwm-talk/2004OctDec/0029.html
|
64
|
-
# [Yacker]: http://www.w3.org/1999/02/26-modules/User/Yacker
|
65
|
-
# [SPARQL specification]: http://www.w3.org/TR/rdf-sparql-query/
|
66
|
-
# [Cwm Release 1.1.0rc1]: http://lists.w3.org/Archives/Public/public-cwm-announce/2005JulSep/0000.html
|
67
|
-
# [bnf-rules.n3]: http://www.w3.org/2000/10/swap/grammar/bnf-rules.n3
|
68
|
-
#
|
69
|
-
# Open Issues and Future Work
|
70
|
-
# ---------------------------
|
71
|
-
#
|
72
|
-
# The yacker output also has the terminals compiled to elaborate regular
|
73
|
-
# expressions. The best strategy for dealing with lexical tokens is not
|
74
|
-
# yet clear. Many tokens in SPARQL are case insensitive; this is not yet
|
75
|
-
# captured formally.
|
76
|
-
#
|
77
|
-
# The schema for the EBNF vocabulary used here (``g:seq``, ``g:alt``, ...)
|
78
|
-
# is not yet published; it should be aligned with [swap/grammar/bnf][]
|
79
|
-
# and the [bnf2html.n3][] rules (and/or the style of linked XHTML grammar
|
80
|
-
# in the SPARQL and XML specificiations).
|
81
|
-
#
|
82
|
-
# It would be interesting to corroborate the claim in the SPARQL spec
|
83
|
-
# that the grammar is LL(1) with a mechanical proof based on N3 rules.
|
84
|
-
#
|
85
|
-
# [swap/grammar/bnf]: http://www.w3.org/2000/10/swap/grammar/bnf
|
86
|
-
# [bnf2html.n3]: http://www.w3.org/2000/10/swap/grammar/bnf2html.n3
|
87
|
-
#
|
88
|
-
# Background
|
89
|
-
# ----------
|
90
|
-
#
|
91
|
-
# The [N3 Primer] by Tim Berners-Lee introduces RDF and the Semantic
|
92
|
-
# web using N3, a teaching and scribbling language. Turtle is a subset
|
93
|
-
# of N3 that maps directly to (and from) the standard XML syntax for
|
94
|
-
# RDF.
|
95
|
-
#
|
96
|
-
# [N3 Primer]: http://www.w3.org/2000/10/swap/Primer.html
|
97
|
-
#
|
98
|
-
# @author Gregg Kellogg
|
99
|
-
class EBNF
|
100
|
-
class Rule
|
101
|
-
# Operations which are flattened to seprate rules in to_bnf
|
102
|
-
BNF_OPS = %w{
|
103
|
-
seq alt diff opt star plus
|
104
|
-
}.map(&:to_sym).freeze
|
105
|
-
|
106
|
-
# @!attribute [rw] sym for rule
|
107
|
-
# @return [Symbol]
|
108
|
-
attr_accessor :sym
|
109
|
-
|
110
|
-
# @!attribute [rw] id of rule
|
111
|
-
# @return [String]
|
112
|
-
attr_accessor :id
|
113
|
-
|
114
|
-
# @!attribute [rw] kind of rule
|
115
|
-
# @return [:rule, :terminal, or :pass]
|
116
|
-
attr_accessor :kind
|
117
|
-
|
118
|
-
# @!attribute [rw] expr rule expression
|
119
|
-
# @return [Array]
|
120
|
-
attr_accessor :expr
|
121
|
-
|
122
|
-
# @!attribute [rw] orig original rule
|
123
|
-
# @return [String]
|
124
|
-
attr_accessor :orig
|
125
|
-
|
126
|
-
# @!attribute [r] first terminals that immediately procede this rule
|
127
|
-
# @return [Array<Rule>]
|
128
|
-
attr_reader :first
|
129
|
-
|
130
|
-
# @!attribute [r] follow terminals that immediately follow this rule
|
131
|
-
# @return [Array<Rule>]
|
132
|
-
attr_reader :follow
|
133
|
-
|
134
|
-
# @!attribute [rw] start indicates that this is a starting rule
|
135
|
-
# @return [Boolean]
|
136
|
-
attr_accessor :start
|
137
|
-
|
138
|
-
# @param [Integer] id
|
139
|
-
# @param [Symbol] sym
|
140
|
-
# @param [Array] expr
|
141
|
-
# @param [EBNF] ebnf
|
142
|
-
# @param [Hash{Symbol => Object}] option
|
143
|
-
# @option options [Symbol] :kind
|
144
|
-
# @option options [String] :ebnf
|
145
|
-
def initialize(sym, id, expr, options = {})
|
146
|
-
@sym, @id = sym, id
|
147
|
-
@expr = expr.is_a?(Array) ? expr : [:seq, expr]
|
148
|
-
@ebnf = options[:ebnf]
|
149
|
-
@kind = case
|
150
|
-
when options[:kind] then options[:kind]
|
151
|
-
when sym.to_s == sym.to_s.upcase then :terminal
|
152
|
-
when !BNF_OPS.include?(@expr.first) then :terminal
|
153
|
-
else :rule
|
154
|
-
end
|
155
|
-
end
|
156
|
-
|
157
|
-
# Serializes this rule to an S-Expression
|
158
|
-
# @return [String]
|
159
|
-
def to_sxp
|
160
|
-
elements = [sym, id, [:kind, kind]]
|
161
|
-
elements << [:start, true] if start
|
162
|
-
elements << first.sort_by(&:to_s).unshift(:first) if first
|
163
|
-
elements << follow.sort_by(&:to_s).unshift(:follow) if follow
|
164
|
-
elements << expr
|
165
|
-
elements.to_sxp
|
166
|
-
end
|
167
|
-
def to_s; to_sxp; end
|
168
|
-
|
169
|
-
# Serializes this rule to an Turtle
|
170
|
-
# @return [String]
|
171
|
-
def to_ttl
|
172
|
-
@ebnf.debug("to_ttl") {inspect}
|
173
|
-
comment = orig.strip.
|
174
|
-
gsub(/"""/, '\"\"\"').
|
175
|
-
gsub("\\", "\\\\").
|
176
|
-
sub(/^\"/, '\"').
|
177
|
-
sub(/\"$/m, '\"')
|
178
|
-
statements = [
|
179
|
-
%{:#{id} rdfs:label "#{id}"; rdf:value "#{sym}";},
|
180
|
-
%{ rdfs:comment #{comment.inspect};},
|
181
|
-
]
|
182
|
-
|
183
|
-
statements += ttl_expr(expr, kind == :terminal ? "re" : "g", 1, false)
|
184
|
-
"\n" + statements.join("\n")
|
185
|
-
end
|
186
|
-
|
187
|
-
##
|
188
|
-
# Transform EBNF rule to BNF rules:
|
189
|
-
#
|
190
|
-
# * Transform (a [n] rule (op1 (op2))) into two rules:
|
191
|
-
# (a [n] rule (op1 a.2))
|
192
|
-
# (_a_1 [n.1] rule (op2))
|
193
|
-
# * Transform (a rule (opt b)) into (a rule (alt _empty "foo"))
|
194
|
-
# * Transform (a rule (star b)) into (a rule (alt _empty (seq b a)))
|
195
|
-
# * Transform (a rule (plus b)) into (a rule (seq b (star b)
|
196
|
-
# @return [Array<Rule>]
|
197
|
-
def to_bnf
|
198
|
-
new_rules = []
|
199
|
-
return [self] unless kind == :rule
|
200
|
-
|
201
|
-
# Look for rules containing recursive definition and rewrite to multiple rules. If `expr` contains elements which are in array form, where the first element of that array is a symbol, create a new rule for it.
|
202
|
-
if expr.any? {|e| e.is_a?(Array) && BNF_OPS.include?(e.first)}
|
203
|
-
# * Transform (a [n] rule (op1 (op2))) into two rules:
|
204
|
-
# (a.1 [n.1] rule (op1 a.2))
|
205
|
-
# (a.2 [n.2] rule (op2))
|
206
|
-
# duplicate ourselves for rewriting
|
207
|
-
this = dup
|
208
|
-
rule_seq = 1
|
209
|
-
new_rules << this
|
210
|
-
|
211
|
-
expr.each_with_index do |e, index|
|
212
|
-
next unless e.is_a?(Array) && e.first.is_a?(Symbol)
|
213
|
-
new_sym, new_id = "_#{sym}_#{rule_seq}".to_sym, "#{id}.#{rule_seq}"
|
214
|
-
rule_seq += 1
|
215
|
-
this.expr[index] = new_sym
|
216
|
-
new_rule = Rule.new(new_sym, new_id, e, :ebnf => @ebnf)
|
217
|
-
new_rules << new_rule
|
218
|
-
end
|
219
|
-
|
220
|
-
# Return new rules after recursively applying #to_bnf
|
221
|
-
new_rules = new_rules.map {|r| r.to_bnf}.flatten
|
222
|
-
elsif expr.first == :opt
|
223
|
-
# * Transform (a rule (opt b)) into (a rule (alt _empty "foo"))
|
224
|
-
new_rules = Rule.new(sym, id, [:alt, :_empty, expr.last], :ebnf => @ebnf).to_bnf
|
225
|
-
elsif expr.first == :star
|
226
|
-
# * Transform (a rule (star b)) into (a rule (alt _empty (seq b a)))
|
227
|
-
new_rules = [Rule.new(sym, id, [:alt, :_empty, "_#{sym}_star".to_sym], :ebnf => @ebnf)] +
|
228
|
-
Rule.new("_#{sym}_star".to_sym, "#{id}*", [:seq, expr.last, sym], :ebnf => @ebnf).to_bnf
|
229
|
-
elsif expr.first == :plus
|
230
|
-
# * Transform (a rule (plus b)) into (a rule (seq b (star b)
|
231
|
-
new_rules = Rule.new(sym, id, [:seq, expr.last, [:star, expr.last]], :ebnf => @ebnf).to_bnf
|
232
|
-
else
|
233
|
-
# Otherwise, no further transformation necessary
|
234
|
-
new_rules << self
|
235
|
-
end
|
236
|
-
|
237
|
-
return new_rules
|
238
|
-
end
|
239
|
-
|
240
|
-
# Does this rule start with a sym? It does if expr is that sym,
|
241
|
-
# expr starts with alt and contains that sym, or
|
242
|
-
# expr starts with seq and the next element is that sym
|
243
|
-
# @param [Symbol, class] sym
|
244
|
-
# Symbol matching any start element, or if it is String, any start element which is a String
|
245
|
-
# @return [Array<Symbol, String>] list of symbol (singular), or strings which are start symbol, or nil if there are none
|
246
|
-
def starts_with(sym)
|
247
|
-
if seq? && sym === (v = expr.fetch(1, nil))
|
248
|
-
[v]
|
249
|
-
elsif alt? && expr.any? {|e| sym === e}
|
250
|
-
expr.select {|e| sym === e}
|
251
|
-
else
|
252
|
-
nil
|
253
|
-
end
|
254
|
-
end
|
255
|
-
|
256
|
-
# Add terminal as proceding this rule
|
257
|
-
# @param [Array<Rule>] terminals
|
258
|
-
# @return [Integer] if number of terminals added
|
259
|
-
def add_first(terminals)
|
260
|
-
@first ||= []
|
261
|
-
terminals -= @first # Remove those already in first
|
262
|
-
@first += terminals
|
263
|
-
terminals.length
|
264
|
-
end
|
265
|
-
|
266
|
-
# Add terminal as following this rule. Don't add _eps as a follow
|
267
|
-
#
|
268
|
-
# @param [Array<Rule>] terminals
|
269
|
-
# @return [Integer] if number of terminals added
|
270
|
-
def add_follow(terminals)
|
271
|
-
terminals -= @follow || [] # Remove those already in first
|
272
|
-
terminals -= [:_eps] # Special case, don't add empty string as a follow terminal
|
273
|
-
unless terminals.empty?
|
274
|
-
@follow ||= []
|
275
|
-
@follow += terminals
|
276
|
-
end
|
277
|
-
terminals.length
|
278
|
-
end
|
279
|
-
|
280
|
-
# Is this rule of the form (seq ...)?
|
281
|
-
def seq?
|
282
|
-
expr.is_a?(Array) && expr.first == :seq
|
283
|
-
end
|
284
|
-
|
285
|
-
# Is this rule of the form (alt ...)?
|
286
|
-
def alt?
|
287
|
-
expr.is_a?(Array) && expr.first == :alt
|
288
|
-
end
|
289
|
-
|
290
|
-
def inspect
|
291
|
-
"#<EBNF::Rule:#{object_id} " +
|
292
|
-
{:sym => sym, :id => id, :kind => kind, :expr => expr}.inspect +
|
293
|
-
">"
|
294
|
-
end
|
295
|
-
|
296
|
-
# Two rules are equal if they have the same {#sym}, {#kind} and {#expr}
|
297
|
-
# @param [Rule] other
|
298
|
-
# @return [Boolean]
|
299
|
-
def ==(other)
|
300
|
-
sym == other.sym &&
|
301
|
-
kind == other.kind &&
|
302
|
-
expr == other.expr
|
303
|
-
end
|
304
|
-
|
305
|
-
# Two rules are equivalent if they have the same {#expr}
|
306
|
-
# @param [Rule] other
|
307
|
-
# @return [Boolean]
|
308
|
-
def equivalent?(other)
|
309
|
-
expr == other.expr
|
310
|
-
end
|
311
|
-
|
312
|
-
# Rewrite the rule substituting src_rule for dst_rule wherever
|
313
|
-
# it is used in the production (first level only).
|
314
|
-
# @param [Rule] src_rule
|
315
|
-
# @param [Rule] dst_rule
|
316
|
-
# @return [Rule]
|
317
|
-
def rewrite(src_rule, dst_rule)
|
318
|
-
case @expr
|
319
|
-
when Array
|
320
|
-
@expr = @expr.map {|e| e == src_rule.sym ? dst_rule.sym : e}
|
321
|
-
else
|
322
|
-
@expr = dst_rule.sym if @expr == src_rule.sym
|
323
|
-
end
|
324
|
-
self
|
325
|
-
end
|
326
|
-
|
327
|
-
# Rules compare using their ids
|
328
|
-
def <=>(other)
|
329
|
-
if id.to_i == other.id.to_i
|
330
|
-
id <=> other.id
|
331
|
-
else
|
332
|
-
id.to_i <=> other.id.to_i
|
333
|
-
end
|
334
|
-
end
|
335
|
-
|
336
|
-
private
|
337
|
-
def ttl_expr(expr, pfx, depth, is_obj = true)
|
338
|
-
indent = ' ' * depth
|
339
|
-
@ebnf.debug("ttl_expr", :depth => depth) {expr.inspect}
|
340
|
-
op = expr.shift if expr.is_a?(Array)
|
341
|
-
statements = []
|
342
|
-
|
343
|
-
if is_obj
|
344
|
-
bra, ket = "[ ", " ]"
|
345
|
-
else
|
346
|
-
bra = ket = ''
|
347
|
-
end
|
348
|
-
|
349
|
-
case op
|
350
|
-
when :seq, :alt, :diff
|
351
|
-
statements << %{#{indent}#{bra}#{pfx}:#{op} (}
|
352
|
-
expr.each {|a| statements += ttl_expr(a, pfx, depth + 1)}
|
353
|
-
statements << %{#{indent} )#{ket}}
|
354
|
-
when :opt, :plus, :star
|
355
|
-
statements << %{#{indent}#{bra}#{pfx}:#{op} }
|
356
|
-
statements += ttl_expr(expr.first, pfx, depth + 1)
|
357
|
-
statements << %{#{indent} #{ket}} unless ket.empty?
|
358
|
-
when :_empty, :_eps, :_empty
|
359
|
-
statements << %{#{indent}"g:#{op.to_s[1..-1]}"}
|
360
|
-
when :"'"
|
361
|
-
statements << %{#{indent}"#{esc(expr)}"}
|
362
|
-
when :range
|
363
|
-
statements << %{#{indent}#{bra} re:matches #{cclass(expr.first).inspect} #{ket}}
|
364
|
-
when :hex
|
365
|
-
raise "didn't expect \" in expr" if expr.include?(:'"')
|
366
|
-
statements << %{#{indent}#{bra} re:matches #{cclass(expr.first).inspect} #{ket}}
|
367
|
-
else
|
368
|
-
if is_obj
|
369
|
-
statements << %{#{indent}#{expr.inspect}}
|
370
|
-
else
|
371
|
-
statements << %{#{indent}g:seq ( #{expr.inspect} )}
|
372
|
-
end
|
373
|
-
end
|
374
|
-
|
375
|
-
statements.last << " ." unless is_obj
|
376
|
-
@ebnf.debug("statements", :depth => depth) {statements.join("\n")}
|
377
|
-
statements
|
378
|
-
end
|
379
|
-
|
380
|
-
##
|
381
|
-
# turn an XML BNF character class into an N3 literal for that
|
382
|
-
# character class (less the outer quote marks)
|
383
|
-
#
|
384
|
-
# >>> cclass("^<>'{}|^`")
|
385
|
-
# "[^<>'{}|^`]"
|
386
|
-
# >>> cclass("#x0300-#x036F")
|
387
|
-
# "[\\u0300-\\u036F]"
|
388
|
-
# >>> cclass("#xC0-#xD6")
|
389
|
-
# "[\\u00C0-\\u00D6]"
|
390
|
-
# >>> cclass("#x370-#x37D")
|
391
|
-
# "[\\u0370-\\u037D]"
|
392
|
-
#
|
393
|
-
# as in: ECHAR ::= '\' [tbnrf\"']
|
394
|
-
# >>> cclass("tbnrf\\\"'")
|
395
|
-
# 'tbnrf\\\\\\"\''
|
396
|
-
#
|
397
|
-
# >>> cclass("^#x22#x5C#x0A#x0D")
|
398
|
-
# '^\\u0022\\\\\\u005C\\u000A\\u000D'
|
399
|
-
def cclass(txt)
|
400
|
-
'[' +
|
401
|
-
txt.gsub(/\#x[0-9a-fA-F]+/) do |hx|
|
402
|
-
hx = hx[2..-1]
|
403
|
-
if hx.length <= 4
|
404
|
-
"\\u#{'0' * (4 - hx.length)}#{hx}"
|
405
|
-
elsif hx.length <= 8
|
406
|
-
"\\U#{'0' * (8 - hx.length)}#{hx}"
|
407
|
-
end
|
408
|
-
end +
|
409
|
-
']'
|
410
|
-
end
|
411
|
-
end
|
412
|
-
|
413
|
-
# Abstract syntax tree from parse
|
414
|
-
attr_reader :ast
|
415
|
-
|
416
|
-
# Parse the string or file input generating an abstract syntax tree
|
417
|
-
# in S-Expressions (similar to SPARQL SSE)
|
418
|
-
#
|
419
|
-
# @param [#read, #to_s] input
|
420
|
-
# @param [Hash{Symbol => Object}] options
|
421
|
-
# @option options [Boolean, Array] :debug
|
422
|
-
# Output debug information to an array or STDOUT.
|
423
|
-
def initialize(input, options = {})
|
424
|
-
@options = options
|
425
|
-
@lineno, @depth = 1, 0
|
426
|
-
terminal = false
|
427
|
-
@ast = []
|
428
|
-
|
429
|
-
input = input.respond_to?(:read) ? input.read : input.to_s
|
430
|
-
scanner = StringScanner.new(input)
|
431
|
-
|
432
|
-
eachRule(scanner) do |r|
|
433
|
-
debug("rule string") {r.inspect}
|
434
|
-
case r
|
435
|
-
when /^@terminals/
|
436
|
-
# Switch mode to parsing terminals
|
437
|
-
terminal = true
|
438
|
-
when /^@pass\s*(.*)$/m
|
439
|
-
rule = depth {ruleParts("[0] " + r)}
|
440
|
-
rule.kind = :pass
|
441
|
-
rule.orig = r
|
442
|
-
@ast << rule
|
443
|
-
else
|
444
|
-
rule = depth {ruleParts(r)}
|
445
|
-
|
446
|
-
rule.kind = :terminal if terminal # Override after we've parsed @terminals
|
447
|
-
rule.orig = r
|
448
|
-
@ast << rule
|
449
|
-
end
|
450
|
-
end
|
451
|
-
end
|
1
|
+
module EBNF
|
2
|
+
autoload :Base, "ebnf/base"
|
3
|
+
autoload :BNF, "ebnf/bnf"
|
4
|
+
autoload :LL1, "ebnf/ll1"
|
5
|
+
autoload :Parser, "ebnf/parser"
|
6
|
+
autoload :Rule, "ebnf/rule"
|
7
|
+
autoload :VERSION, "ebnf/version"
|
452
8
|
|
453
9
|
##
|
454
|
-
#
|
455
|
-
#
|
456
|
-
# * Add rule [0] (_empty rule (seq))
|
457
|
-
# * Transform each rule into a set of rules that are just BNF, using {Rule#to_bnf}.
|
458
|
-
# @return [ENBF] self
|
459
|
-
def make_bnf
|
460
|
-
new_ast = [Rule.new(:_empty, "0", [:seq], :kind => :rule)]
|
461
|
-
|
462
|
-
ast.each do |rule|
|
463
|
-
debug("make_bnf") {"expand from: #{rule.inspect}"}
|
464
|
-
new_rules = rule.to_bnf
|
465
|
-
debug(" => ") {new_rules.map(&:sym).join(', ')}
|
466
|
-
new_ast += new_rules
|
467
|
-
end
|
468
|
-
|
469
|
-
# Consolodate equivalent terminal rules
|
470
|
-
to_rewrite = {}
|
471
|
-
new_ast.select {|r| r.kind == :terminal}.each do |src_rule|
|
472
|
-
new_ast.select {|r| r.kind == :terminal}.each do |dst_rule|
|
473
|
-
if src_rule.equivalent?(dst_rule) && src_rule != dst_rule
|
474
|
-
debug("make_bnf") {"equivalent rules: #{src_rule.inspect} and #{dst_rule.inspect}"}
|
475
|
-
(to_rewrite[src_rule] ||= []) << dst_rule
|
476
|
-
end
|
477
|
-
end
|
478
|
-
end
|
479
|
-
|
480
|
-
# Replace references to equivalent rules with canonical rule
|
481
|
-
to_rewrite.each do |src_rule, dst_rules|
|
482
|
-
dst_rules.each do |dst_rule|
|
483
|
-
new_ast.each do |mod_rule|
|
484
|
-
debug("make_bnf") {"rewrite #{mod_rule.inspect} from #{dst_rule.sym} to #{src_rule.sym}"}
|
485
|
-
mod_rule.rewrite(dst_rule, src_rule)
|
486
|
-
end
|
487
|
-
end
|
488
|
-
end
|
489
|
-
|
490
|
-
# AST now has just rewritten rules
|
491
|
-
compacted_ast = new_ast - to_rewrite.values.flatten.compact
|
492
|
-
|
493
|
-
# Sort AST by number
|
494
|
-
@ast = compacted_ast
|
495
|
-
|
496
|
-
self
|
497
|
-
end
|
498
|
-
|
499
|
-
# Iterate over each rule or terminal
|
500
|
-
# @param [:termina, :rule] kind
|
501
|
-
# @yield rule
|
502
|
-
# @yieldparam [Rule] rule
|
503
|
-
def each(kind, &block)
|
504
|
-
ast.each {|r| block.call(r) if r.kind == kind}
|
505
|
-
end
|
506
|
-
|
507
|
-
##
|
508
|
-
# Create first/follow for each rule using techniques defined for LL(1) parsers.
|
509
|
-
#
|
510
|
-
# @return [EBNF] self
|
511
|
-
# @see http://en.wikipedia.org/wiki/LL_parser#Constructing_an_LL.281.29_parsing_table
|
512
|
-
# @param [Array<String>] starts
|
513
|
-
# Set of symbols which are start rules
|
514
|
-
def first_follow(starts)
|
515
|
-
# Add _eof to follow all start rules
|
516
|
-
starts.map(&:to_sym).each do |sym|
|
517
|
-
rule = ast.detect {|r| r.sym == sym}
|
518
|
-
raise "No rule found for start symbol #{sym}" unless rule
|
519
|
-
rule.add_follow([:_eof])
|
520
|
-
rule.start = true
|
521
|
-
end
|
522
|
-
|
523
|
-
# Comprehnsion rule, create shorter versions of all non-terminal sequences
|
524
|
-
comprehensions = []
|
525
|
-
begin
|
526
|
-
comprehensions = []
|
527
|
-
ast.select {|r| r.seq? && r.kind == :rule && r.expr.length > 2}.each do |rule|
|
528
|
-
new_expr = rule.expr[2..-1].unshift(:seq)
|
529
|
-
unless ast.any? {|r| r.expr == new_expr}
|
530
|
-
debug("first_follow") {"add comprehension rule for #{rule.sym} => #{new_expr.inspect}"}
|
531
|
-
new_rule = Rule.new("_#{rule.sym}_comp".to_sym, "#{rule.id}.comp", new_expr)
|
532
|
-
comprehensions << new_rule
|
533
|
-
end
|
534
|
-
end
|
535
|
-
|
536
|
-
@ast += comprehensions
|
537
|
-
debug("first_follow") {"comprehensions #{comprehensions.length}"}
|
538
|
-
end while !comprehensions.empty?
|
539
|
-
|
540
|
-
# Fi(a w' ) = { a } for every terminal a
|
541
|
-
# For each rule who's expr's first element of a seq a terminal, or having any element of alt a terminal, add that terminal to the first set for this rule
|
542
|
-
each(:rule) do |rule|
|
543
|
-
each(:terminal) do |terminal|
|
544
|
-
rule.add_first([terminal.sym]) if rule.starts_with(terminal.sym)
|
545
|
-
end
|
546
|
-
|
547
|
-
# Add strings to first for strings which are start elements
|
548
|
-
start_strs = rule.starts_with(String)
|
549
|
-
rule.add_first(start_strs) if start_strs
|
550
|
-
end
|
551
|
-
|
552
|
-
# # Fi(ε) = { ε }
|
553
|
-
# Add _eps as a first of _empty
|
554
|
-
empty = ast.detect {|r| r.sym == :_empty}
|
555
|
-
empty.add_first([:_eps])
|
556
|
-
|
557
|
-
# Loop until no more first elements are added
|
558
|
-
firsts, follows = 0, 0
|
559
|
-
begin
|
560
|
-
firsts, follows = 0, 0
|
561
|
-
each(:rule) do |rule|
|
562
|
-
each(:rule) do |first_rule|
|
563
|
-
next if first_rule == rule || first_rule.first.nil?
|
564
|
-
|
565
|
-
# Fi(A w' ) = Fi(A) for every nonterminal A with ε not in Fi(A)
|
566
|
-
# For each rule that starts with another rule having firsts, add the firsts of that rule to this rule, unless it already has those terminals in its first
|
567
|
-
if rule.starts_with(first_rule.sym)
|
568
|
-
depth {debug("FF.1") {"add first #{first_rule.first.inspect} to #{rule.sym}"}}
|
569
|
-
firsts += rule.add_first(first_rule.first)
|
570
|
-
end
|
571
|
-
|
572
|
-
# Fi(A w' ) = Fi(A) \ { ε } ∪ Fi(w' ) for every nonterminal A with ε in Fi(A)
|
573
|
-
# For each rule starting with eps, add the terminals for the comprehension of this rule
|
574
|
-
if rule.seq? &&
|
575
|
-
rule.expr.fetch(1, nil) == first_rule &&
|
576
|
-
first_rule.first.include?(:_eps) &&
|
577
|
-
(comp = find_comp(rule))
|
578
|
-
|
579
|
-
depth {debug("FF.2") {"add first #{first_rule.first.inspect} to #{comp.sym}"}}
|
580
|
-
firsts += comp.add_first(first_rule.first)
|
581
|
-
end
|
582
|
-
end
|
583
|
-
|
584
|
-
# Only run these rules if the rule is a sequence having two or more elements, whos first element is also a sequence and first_rule is the comprehension of rule
|
585
|
-
if rule.seq? && (comp = find_comp(rule))
|
586
|
-
#if there is a rule of the form Aj → wAiw' , then
|
587
|
-
#
|
588
|
-
if (ai = find_rule(rule.expr[1])) && ai.kind == :rule && comp.first
|
589
|
-
# * if the terminal a is in Fi(w' ), then add a to Fo(Ai)
|
590
|
-
#
|
591
|
-
# Add follow terminals based on the first terminals
|
592
|
-
# of a comprehension of this rule (having the same
|
593
|
-
# sequence other than the first rule in the sequence)
|
594
|
-
#
|
595
|
-
# @example
|
596
|
-
# rule: (seq a b c)
|
597
|
-
# first_rule: (seq b c)
|
598
|
-
# if first_rule.first == [T]
|
599
|
-
# => a.follow += [T]
|
600
|
-
depth {debug("FF.3") {"add follow #{comp.first.inspect} to #{ai.sym}"}}
|
601
|
-
follows += ai.add_follow(comp.first)
|
602
|
-
end
|
603
|
-
|
604
|
-
# Follows of a rule are also follows of the comprehension of the rule.
|
605
|
-
if rule.follow
|
606
|
-
depth {debug("FF.4") {"add follow #{rule.follow.inspect} to #{comp.sym}"}}
|
607
|
-
follows += comp.add_follow(rule.follow)
|
608
|
-
end
|
609
|
-
|
610
|
-
# * if ε is in Fi(w' ), then add Fo(Aj) to Fo(Ai)
|
611
|
-
#
|
612
|
-
# If the comprehension of a sequence has an _eps first, then the follows of the rule also become the follows of the first member of the rule
|
613
|
-
if comp.first && comp.first.include?(:_eps) && rule.first
|
614
|
-
member = find_rule(rule.expr.fetch(1, nil))
|
615
|
-
depth {debug("FF.4") {"add follow #{rule.follow.inspect} to #{member.sym}"}}
|
616
|
-
follows += member.add_follow(rule.first) if member.kind == :rule
|
617
|
-
end
|
618
|
-
end
|
619
|
-
|
620
|
-
# Follows of a rule are also follows of the last production in the rule
|
621
|
-
if rule.seq? && rule.follow &&
|
622
|
-
(member = find_rule(rule.expr.last)) &&
|
623
|
-
member.kind == :rule
|
624
|
-
|
625
|
-
depth {debug("FF.5") {"add follow #{rule.follow.inspect} to #{member.sym}"}}
|
626
|
-
follows += member.add_follow(rule.follow)
|
627
|
-
end
|
628
|
-
|
629
|
-
# For alts, anything that follows the rule follows each member of the rule
|
630
|
-
if rule.alt? && rule.follow
|
631
|
-
rule.expr[1..-1].map {|s| find_rule(s)}.each do |mem|
|
632
|
-
if mem && mem.kind == :rule
|
633
|
-
depth {debug("FF.6") {"add follow #{rule.first.inspect} to #{mem.sym}"}}
|
634
|
-
follows += mem.add_follow(rule.follow)
|
635
|
-
end
|
636
|
-
end
|
637
|
-
end
|
638
|
-
end
|
639
|
-
|
640
|
-
debug("first_follow") {"firsts #{firsts}, follows #{follows}"}
|
641
|
-
end while (firsts + follows) > 0
|
642
|
-
end
|
643
|
-
|
644
|
-
##
|
645
|
-
# Write out parsed syntax string as an S-Expression
|
646
|
-
# @return [String]
|
647
|
-
def to_sxp
|
648
|
-
begin
|
649
|
-
require 'sxp'
|
650
|
-
SXP::Generator.string(ast.sort)
|
651
|
-
rescue LoadError
|
652
|
-
ast.to_sxp
|
653
|
-
end
|
654
|
-
end
|
655
|
-
def to_s; to_sxp; end
|
656
|
-
|
657
|
-
def dup
|
658
|
-
new_obj = super
|
659
|
-
new_obj.instance_variable_set(:@ast, @ast.dup)
|
660
|
-
new_obj
|
661
|
-
end
|
662
|
-
|
663
|
-
##
|
664
|
-
# Find a rule given a symbol
|
665
|
-
# @param [Symbol] sym
|
666
|
-
# @return [Rule]
|
667
|
-
def find_rule(sym)
|
668
|
-
(@find ||= {})[sym] ||= ast.detect {|r| r.sym == sym}
|
669
|
-
end
|
670
|
-
|
671
|
-
##
|
672
|
-
# Find the comprehension of a rule
|
673
|
-
# Comprehensions are created in {#first_follow} then the rule is a sequence with more than 1 element. They are named automatically as "_sym_comp" where "sym" is the symbol of the source rule
|
674
|
-
# @param [Rule] source
|
675
|
-
# @return [Rule]
|
676
|
-
def find_comp(source)
|
677
|
-
(@comp ||= {})[source.sym] ||= source.seq? && source.expr.length > 2 && find_rule("_#{source.sym}_comp".to_sym)
|
678
|
-
end
|
679
|
-
|
680
|
-
##
|
681
|
-
# Write out syntax tree as Turtle
|
682
|
-
# @param [String] prefix for language
|
683
|
-
# @param [String] ns URI for language
|
684
|
-
# @return [String]
|
685
|
-
def to_ttl(prefix, ns)
|
686
|
-
unless ast.empty?
|
687
|
-
[
|
688
|
-
"@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>.",
|
689
|
-
"@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>.",
|
690
|
-
"@prefix #{prefix}: <#{ns}>.",
|
691
|
-
"@prefix : <#{ns}>.",
|
692
|
-
"@prefix re: <http://www.w3.org/2000/10/swap/grammar/regex#>.",
|
693
|
-
"@prefix g: <http://www.w3.org/2000/10/swap/grammar/ebnf#>.",
|
694
|
-
"",
|
695
|
-
":language rdfs:isDefinedBy <>; g:start :#{ast.first.id}.",
|
696
|
-
"",
|
697
|
-
]
|
698
|
-
end.join("\n") +
|
699
|
-
|
700
|
-
ast.sort.
|
701
|
-
select {|a| [:rule, :terminal].include?(a.kind)}.
|
702
|
-
map(&:to_ttl).
|
703
|
-
join("\n")
|
704
|
-
end
|
705
|
-
|
706
|
-
##
|
707
|
-
# Iterate over rule strings.
|
708
|
-
# a line that starts with '\[' or '@' starts a new rule
|
709
|
-
#
|
710
|
-
# @param [StringScanner] scanner
|
711
|
-
# @yield rule_string
|
712
|
-
# @yieldparam [String] rule_string
|
713
|
-
def eachRule(scanner)
|
714
|
-
cur_lineno = 1
|
715
|
-
r = ''
|
716
|
-
until scanner.eos?
|
717
|
-
case
|
718
|
-
when s = scanner.scan(%r(\s+)m)
|
719
|
-
# Eat whitespace
|
720
|
-
cur_lineno += s.count("\n")
|
721
|
-
#debug("eachRule(ws)") { "[#{cur_lineno}] #{s.inspect}" }
|
722
|
-
when s = scanner.scan(%r(/\*([^\*]|\*[^\/])*\*/)m)
|
723
|
-
# Eat comments
|
724
|
-
cur_lineno += s.count("\n")
|
725
|
-
debug("eachRule(comment)") { "[#{cur_lineno}] #{s.inspect}" }
|
726
|
-
when s = scanner.scan(%r(^@terminals))
|
727
|
-
#debug("eachRule(@terminals)") { "[#{cur_lineno}] #{s.inspect}" }
|
728
|
-
yield(r) unless r.empty?
|
729
|
-
@lineno = cur_lineno
|
730
|
-
yield(s)
|
731
|
-
r = ''
|
732
|
-
when s = scanner.scan(/@pass/)
|
733
|
-
# Found rule start, if we've already collected a rule, yield it
|
734
|
-
#debug("eachRule(@pass)") { "[#{cur_lineno}] #{s.inspect}" }
|
735
|
-
yield r unless r.empty?
|
736
|
-
@lineno = cur_lineno
|
737
|
-
r = s
|
738
|
-
when s = scanner.scan(/\[(?=\w+\])/)
|
739
|
-
# Found rule start, if we've already collected a rule, yield it
|
740
|
-
yield r unless r.empty?
|
741
|
-
#debug("eachRule(rule)") { "[#{cur_lineno}] #{s.inspect}" }
|
742
|
-
@lineno = cur_lineno
|
743
|
-
r = s
|
744
|
-
else
|
745
|
-
# Collect until end of line, or start of comment
|
746
|
-
s = scanner.scan_until(%r((?:/\*)|$)m)
|
747
|
-
cur_lineno += s.count("\n")
|
748
|
-
#debug("eachRule(rest)") { "[#{cur_lineno}] #{s.inspect}" }
|
749
|
-
r += s
|
750
|
-
end
|
751
|
-
end
|
752
|
-
yield r unless r.empty?
|
753
|
-
end
|
754
|
-
|
755
|
-
##
|
756
|
-
# Parse a rule into a rule number, a symbol and an expression
|
757
|
-
#
|
758
|
-
# @param [String] rule
|
759
|
-
# @return [Rule]
|
760
|
-
def ruleParts(rule)
|
761
|
-
num_sym, expr = rule.split('::=', 2).map(&:strip)
|
762
|
-
num, sym = num_sym.split(']', 2).map(&:strip)
|
763
|
-
num = num[1..-1]
|
764
|
-
r = Rule.new(sym && sym.to_sym, num, ebnf(expr).first, :ebnf => self)
|
765
|
-
debug("ruleParts") { r.inspect }
|
766
|
-
r
|
767
|
-
end
|
768
|
-
|
769
|
-
##
|
770
|
-
# Parse a string into an expression tree and a remaining string
|
10
|
+
# Parse the given EBNF `query` input.
|
771
11
|
#
|
772
12
|
# @example
|
773
|
-
#
|
774
|
-
#
|
775
|
-
#
|
776
|
-
#
|
777
|
-
#
|
778
|
-
#
|
779
|
-
|
780
|
-
|
781
|
-
#
|
782
|
-
# >>> ebnf("a | (b - c)")
|
783
|
-
# ((alt, \[('id', 'a'), (diff, \[('id', 'b'), ('id', 'c')\])\]), '')
|
784
|
-
#
|
785
|
-
# >>> ebnf("a b | c d")
|
786
|
-
# ((alt, \[(seq, \[('id', 'a'), ('id', 'b')\]), (seq, \[('id', 'c'), ('id', 'd')\])\]), '')
|
787
|
-
#
|
788
|
-
# >>> ebnf("a | b | c")
|
789
|
-
# ((alt, \[('id', 'a'), ('id', 'b'), ('id', 'c')\]), '')
|
790
|
-
#
|
791
|
-
# >>> ebnf("a) b c")
|
792
|
-
# (('id', 'a'), ' b c')
|
793
|
-
#
|
794
|
-
# >>> ebnf("BaseDecl? PrefixDecl*")
|
795
|
-
# ((seq, \[(opt, ('id', 'BaseDecl')), ('*', ('id', 'PrefixDecl'))\]), '')
|
796
|
-
#
|
797
|
-
# >>> ebnf("NCCHAR1 | diff | [0-9] | #x00B7 | [#x0300-#x036F] | \[#x203F-#x2040\]")
|
798
|
-
# ((alt, \[('id', 'NCCHAR1'), ("'", diff), (range, '0-9'), (hex, '#x00B7'), (range, '#x0300-#x036F'), (range, '#x203F-#x2040')\]), '')
|
799
|
-
#
|
800
|
-
# @param [String] s
|
801
|
-
# @return [Array]
|
802
|
-
def ebnf(s)
|
803
|
-
debug("ebnf") {"(#{s.inspect})"}
|
804
|
-
e, s = depth {alt(s)}
|
805
|
-
debug {"=> alt returned #{[e, s].inspect}"}
|
806
|
-
unless s.empty?
|
807
|
-
t, ss = depth {terminal(s)}
|
808
|
-
debug {"=> terminal returned #{[t, ss].inspect}"}
|
809
|
-
return [e, ss] if t.is_a?(Array) && t.first == :")"
|
810
|
-
end
|
811
|
-
[e, s]
|
812
|
-
end
|
813
|
-
|
814
|
-
##
|
815
|
-
# Parse alt
|
816
|
-
# >>> alt("a | b | c")
|
817
|
-
# ((alt, \[('id', 'a'), ('id', 'b'), ('id', 'c')\]), '')
|
818
|
-
# @param [String] s
|
819
|
-
# @return [Array]
|
820
|
-
def alt(s)
|
821
|
-
debug("alt") {"(#{s.inspect})"}
|
822
|
-
args = []
|
823
|
-
while !s.empty?
|
824
|
-
e, s = depth {seq(s)}
|
825
|
-
debug {"=> seq returned #{[e, s].inspect}"}
|
826
|
-
if e.to_s.empty?
|
827
|
-
break unless args.empty?
|
828
|
-
e = [:seq, []] # empty sequence
|
829
|
-
end
|
830
|
-
args << e
|
831
|
-
unless s.empty?
|
832
|
-
t, ss = depth {terminal(s)}
|
833
|
-
break unless t[0] == :alt
|
834
|
-
s = ss
|
835
|
-
end
|
836
|
-
end
|
837
|
-
args.length > 1 ? [args.unshift(:alt), s] : [e, s]
|
838
|
-
end
|
839
|
-
|
840
|
-
##
|
841
|
-
# parse seq
|
842
|
-
#
|
843
|
-
# >>> seq("a b c")
|
844
|
-
# ((seq, \[('id', 'a'), ('id', 'b'), ('id', 'c')\]), '')
|
845
|
-
#
|
846
|
-
# >>> seq("a b? c")
|
847
|
-
# ((seq, \[('id', 'a'), (opt, ('id', 'b')), ('id', 'c')\]), '')
|
848
|
-
def seq(s)
|
849
|
-
debug("seq") {"(#{s.inspect})"}
|
850
|
-
args = []
|
851
|
-
while !s.empty?
|
852
|
-
e, ss = depth {diff(s)}
|
853
|
-
debug {"=> diff returned #{[e, ss].inspect}"}
|
854
|
-
unless e.to_s.empty?
|
855
|
-
args << e
|
856
|
-
s = ss
|
857
|
-
else
|
858
|
-
break;
|
859
|
-
end
|
860
|
-
end
|
861
|
-
if args.length > 1
|
862
|
-
[args.unshift(:seq), s]
|
863
|
-
elsif args.length == 1
|
864
|
-
args + [s]
|
865
|
-
else
|
866
|
-
["", s]
|
867
|
-
end
|
868
|
-
end
|
869
|
-
|
870
|
-
##
|
871
|
-
# parse diff
|
872
|
-
#
|
873
|
-
# >>> diff("a - b")
|
874
|
-
# ((diff, \[('id', 'a'), ('id', 'b')\]), '')
|
875
|
-
def diff(s)
|
876
|
-
debug("diff") {"(#{s.inspect})"}
|
877
|
-
e1, s = depth {postfix(s)}
|
878
|
-
debug {"=> postfix returned #{[e1, s].inspect}"}
|
879
|
-
unless e1.to_s.empty?
|
880
|
-
unless s.empty?
|
881
|
-
t, ss = depth {terminal(s)}
|
882
|
-
debug {"diff #{[t, ss].inspect}"}
|
883
|
-
if t.is_a?(Array) && t.first == :diff
|
884
|
-
s = ss
|
885
|
-
e2, s = primary(s)
|
886
|
-
unless e2.to_s.empty?
|
887
|
-
return [[:diff, e1, e2], s]
|
888
|
-
else
|
889
|
-
raise "Syntax Error"
|
890
|
-
end
|
891
|
-
end
|
892
|
-
end
|
893
|
-
end
|
894
|
-
[e1, s]
|
895
|
-
end
|
896
|
-
|
897
|
-
##
|
898
|
-
# parse postfix
|
899
|
-
#
|
900
|
-
# >>> postfix("a b c")
|
901
|
-
# (('id', 'a'), ' b c')
|
902
|
-
#
|
903
|
-
# >>> postfix("a? b c")
|
904
|
-
# ((opt, ('id', 'a')), ' b c')
|
905
|
-
def postfix(s)
|
906
|
-
debug("postfix") {"(#{s.inspect})"}
|
907
|
-
e, s = depth {primary(s)}
|
908
|
-
debug {"=> primary returned #{[e, s].inspect}"}
|
909
|
-
return ["", s] if e.to_s.empty?
|
910
|
-
if !s.empty?
|
911
|
-
t, ss = depth {terminal(s)}
|
912
|
-
debug {"=> #{[t, ss].inspect}"}
|
913
|
-
if t.is_a?(Array) && [:opt, :star, :plus].include?(t.first)
|
914
|
-
return [[t.first, e], ss]
|
915
|
-
end
|
916
|
-
end
|
917
|
-
[e, s]
|
918
|
-
end
|
919
|
-
|
920
|
-
##
|
921
|
-
# parse primary
|
922
|
-
#
|
923
|
-
# >>> primary("a b c")
|
924
|
-
# (('id', 'a'), ' b c')
|
925
|
-
def primary(s)
|
926
|
-
debug("primary") {"(#{s.inspect})"}
|
927
|
-
t, s = depth {terminal(s)}
|
928
|
-
debug {"=> terminal returned #{[t, s].inspect}"}
|
929
|
-
if t.is_a?(Symbol) || t.is_a?(String)
|
930
|
-
[t, s]
|
931
|
-
elsif %w(range hex).map(&:to_sym).include?(t.first)
|
932
|
-
[t, s]
|
933
|
-
elsif t.first == :"("
|
934
|
-
e, s = depth {ebnf(s)}
|
935
|
-
debug {"=> ebnf returned #{[e, s].inspect}"}
|
936
|
-
[e, s]
|
937
|
-
else
|
938
|
-
["", s]
|
939
|
-
end
|
940
|
-
end
|
941
|
-
|
942
|
-
##
|
943
|
-
# parse one terminal; return the terminal and the remaining string
|
944
|
-
#
|
945
|
-
# A terminal is represented as a tuple whose 1st item gives the type;
|
946
|
-
# some types have additional info in the tuple.
|
947
|
-
#
|
948
|
-
# @example
|
949
|
-
# >>> terminal("'abc' def")
|
950
|
-
# (("'", 'abc'), ' def')
|
951
|
-
#
|
952
|
-
# >>> terminal("[0-9]")
|
953
|
-
# ((range, '0-9'), '')
|
954
|
-
# >>> terminal("#x00B7")
|
955
|
-
# ((hex, '#x00B7'), '')
|
956
|
-
# >>> terminal ("\[#x0300-#x036F\]")
|
957
|
-
# ((range, '#x0300-#x036F'), '')
|
958
|
-
# >>> terminal("\[^<>'{}|^`\]-\[#x00-#x20\]")
|
959
|
-
# ((range, "^<>'{}|^`"), '-\[#x00-#x20\]')
|
960
|
-
def terminal(s)
|
961
|
-
s = s.strip
|
962
|
-
case m = s[0,1]
|
963
|
-
when '"', "'"
|
964
|
-
l, s = s[1..-1].split(m, 2)
|
965
|
-
[l, s]
|
966
|
-
when '['
|
967
|
-
l, s = s[1..-1].split(']', 2)
|
968
|
-
[[:range, l], s]
|
969
|
-
when '#'
|
970
|
-
s.match(/(#\w+)(.*)$/)
|
971
|
-
l, s = $1, $2
|
972
|
-
[[:hex, l], s]
|
973
|
-
when /[[:alpha:]]/
|
974
|
-
s.match(/(\w+)(.*)$/)
|
975
|
-
l, s = $1, $2
|
976
|
-
[l.to_sym, s]
|
977
|
-
when '@'
|
978
|
-
s.match(/@(#\w+)(.*)$/)
|
979
|
-
l, s = $1, $2
|
980
|
-
[[:"@", l], s]
|
981
|
-
when '-'
|
982
|
-
[[:diff], s[1..-1]]
|
983
|
-
when '?'
|
984
|
-
[[:opt], s[1..-1]]
|
985
|
-
when '|'
|
986
|
-
[[:alt], s[1..-1]]
|
987
|
-
when '+'
|
988
|
-
[[:plus], s[1..-1]]
|
989
|
-
when '*'
|
990
|
-
[[:star], s[1..-1]]
|
991
|
-
when /[\(\)]/
|
992
|
-
[[m.to_sym], s[1..-1]]
|
993
|
-
else
|
994
|
-
raise "unrecognized terminal: #{s.inspect}"
|
995
|
-
end
|
996
|
-
end
|
997
|
-
|
998
|
-
def depth
|
999
|
-
@depth += 1
|
1000
|
-
ret = yield
|
1001
|
-
@depth -= 1
|
1002
|
-
ret
|
1003
|
-
end
|
1004
|
-
|
1005
|
-
##
|
1006
|
-
# Progress output when debugging
|
1007
|
-
#
|
1008
|
-
# @overload debug(node, message)
|
1009
|
-
# @param [String] node relative location in input
|
1010
|
-
# @param [String] message ("")
|
1011
|
-
#
|
1012
|
-
# @overload debug(message)
|
1013
|
-
# @param [String] message ("")
|
1014
|
-
#
|
1015
|
-
# @yieldreturn [String] added to message
|
1016
|
-
def debug(*args)
|
1017
|
-
return unless @options[:debug]
|
1018
|
-
options = args.last.is_a?(Hash) ? args.pop : {}
|
1019
|
-
depth = options[:depth] || @depth
|
1020
|
-
message = args.pop
|
1021
|
-
message = message.call if message.is_a?(Proc)
|
1022
|
-
args << message if message
|
1023
|
-
args << yield if block_given?
|
1024
|
-
message = "#{args.join(': ')}"
|
1025
|
-
str = "[#{@lineno}]#{' ' * depth}#{message}"
|
1026
|
-
@options[:debug] << str if @options[:debug].is_a?(Array)
|
1027
|
-
$stderr.puts(str) if @options[:debug] == true
|
13
|
+
# ebnf = EBNF.parse(input)
|
14
|
+
#
|
15
|
+
# @param [#read, String, #to_s] input
|
16
|
+
# @param [Hash{Symbol => Object}] options
|
17
|
+
# @return [EBNF::Base]
|
18
|
+
# @raise [Exception] on invalid input
|
19
|
+
def self.parse(input, options = {})
|
20
|
+
query = ::EBNF::Base.new(input, options)
|
1028
21
|
end
|
1029
22
|
end
|