ebnf 0.3.1 → 0.3.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +6 -14
- data/README.md +8 -58
- data/VERSION +1 -1
- data/bin/ebnf +7 -5
- data/etc/doap.ttl +11 -12
- data/etc/ebnf.ebnf +45 -41
- data/etc/ebnf.ll1.sxp +114 -197
- data/etc/ebnf.rb +149 -459
- data/etc/ebnf.sxp +27 -22
- data/etc/turtle.ll1.sxp +5 -5
- data/etc/turtle.rb +55 -50
- data/etc/turtle.sxp +20 -21
- data/lib/ebnf/base.rb +32 -17
- data/lib/ebnf/ll1.rb +39 -7
- data/lib/ebnf/ll1/lexer.rb +8 -20
- data/lib/ebnf/ll1/parser.rb +4 -0
- data/lib/ebnf/parser.rb +55 -51
- data/lib/ebnf/rule.rb +36 -0
- metadata +15 -15
data/etc/turtle.sxp
CHANGED
@@ -1,14 +1,13 @@
|
|
1
|
-
|
2
|
-
(
|
1
|
+
(
|
3
2
|
(rule turtleDoc "1" (star statement))
|
4
3
|
(rule statement "2" (alt directive (seq triples ".")))
|
5
4
|
(rule directive "3" (alt prefixID base sparqlPrefix sparqlBase))
|
6
5
|
(rule prefixID "4" (seq "@prefix" PNAME_NS IRIREF "."))
|
7
6
|
(rule base "5" (seq "@base" IRIREF "."))
|
8
7
|
(rule triples "6"
|
9
|
-
(alt
|
8
|
+
(alt
|
10
9
|
(seq subject predicateObjectList)
|
11
|
-
(seq blankNodePropertyList (opt predicateObjectList))))
|
10
|
+
(seq blankNodePropertyList (opt predicateObjectList))) )
|
12
11
|
(rule predicateObjectList "7"
|
13
12
|
(seq verb objectList (star (seq ";" (opt (seq verb objectList))))))
|
14
13
|
(rule objectList "8" (seq object (star (seq "," object))))
|
@@ -21,29 +20,29 @@
|
|
21
20
|
(rule collection "15" (seq "(" (star object) ")"))
|
22
21
|
(rule NumericLiteral "16" (alt INTEGER DECIMAL DOUBLE))
|
23
22
|
(rule String "17"
|
24
|
-
(alt
|
25
|
-
|
26
|
-
(terminal IRIREF "18" (seq "<" (
|
23
|
+
(alt STRING_LITERAL_QUOTE STRING_LITERAL_SINGLE_QUOTE
|
24
|
+
STRING_LITERAL_LONG_SINGLE_QUOTE STRING_LITERAL_LONG_QUOTE ))
|
25
|
+
(terminal IRIREF "18" (seq "<" (range "^#x00-#x20<>\"{}|^`] | UCHAR)* '>'")))
|
27
26
|
(terminal INTEGER "19" (seq (opt (range "+-")) (plus (range "0-9"))))
|
28
27
|
(terminal DECIMAL "20"
|
29
28
|
(seq (opt (range "+-")) (seq (star (range "0-9")) "." (plus (range "0-9")))))
|
30
29
|
(terminal DOUBLE "21"
|
31
|
-
(seq
|
30
|
+
(seq
|
32
31
|
(opt (range "+-"))
|
33
|
-
(alt
|
32
|
+
(alt
|
34
33
|
(seq (plus (range "0-9")) "." (star (range "0-9")) EXPONENT)
|
35
34
|
(seq "." (plus (range "0-9")) EXPONENT)
|
36
|
-
(seq (plus (range "0-9")) EXPONENT))))
|
35
|
+
(seq (plus (range "0-9")) EXPONENT)) ))
|
37
36
|
(terminal STRING_LITERAL_QUOTE "22"
|
38
37
|
(seq "\"" (star (alt (range "^#x22#x5C#xA#xD") ECHAR UCHAR)) "\""))
|
39
38
|
(terminal STRING_LITERAL_SINGLE_QUOTE "23"
|
40
39
|
(seq "'" (star (alt (range "^#x27#x5C#xA#xD") ECHAR UCHAR)) "'"))
|
41
40
|
(terminal STRING_LITERAL_LONG_SINGLE_QUOTE "24"
|
42
|
-
(seq "'''" (
|
41
|
+
(seq "'''" (seq (opt (alt "'" "''")) (range "^'] | ECHAR | UCHAR ))* \"'''\""))))
|
43
42
|
(terminal STRING_LITERAL_LONG_QUOTE "25"
|
44
|
-
(seq "\"\"\"" (
|
43
|
+
(seq "\"\"\"" (seq (opt (alt "\"" "\"\"")) (range "^\"] | ECHAR | UCHAR ))* '\"\"\"'"))))
|
45
44
|
(terminal UCHAR "26"
|
46
|
-
(alt (seq "
|
45
|
+
(alt (seq "u" HEX HEX HEX HEX) (seq "U" HEX HEX HEX HEX HEX HEX HEX HEX)))
|
47
46
|
(rule sparqlPrefix "28s" (seq SPARQL_PREFIX PNAME_NS IRIREF))
|
48
47
|
(terminal SPARQL_PREFIX "28t"
|
49
48
|
(seq (range "Pp") (range "Rr") (range "Ee") (range "Ff") (range "Ii") (range "Xx")))
|
@@ -61,11 +60,11 @@
|
|
61
60
|
(terminal LANGTAG "144s"
|
62
61
|
(seq "@" (plus (range "a-zA-Z")) (star (seq "-" (plus (range "a-zA-Z0-9"))))))
|
63
62
|
(terminal EXPONENT "154s" (seq (range "eE") (opt (range "+-")) (plus (range "0-9"))))
|
64
|
-
(terminal ECHAR "159s" (seq "\\" (range "tbnrf
|
63
|
+
(terminal ECHAR "159s" (seq "\\" (range "tbnrf\"'")))
|
65
64
|
(terminal WS "161s" (alt (hex "#x20") (hex "#x9") (hex "#xD") (hex "#xA")))
|
66
65
|
(terminal ANON "162s" (seq "[" (star WS) "]"))
|
67
66
|
(terminal PN_CHARS_BASE "163s"
|
68
|
-
(alt
|
67
|
+
(alt
|
69
68
|
(range "A-Z")
|
70
69
|
(range "a-z")
|
71
70
|
(range "#x00C0-#x00D6")
|
@@ -79,24 +78,24 @@
|
|
79
78
|
(range "#x3001-#xD7FF")
|
80
79
|
(range "#xF900-#xFDCF")
|
81
80
|
(range "#xFDF0-#xFFFD")
|
82
|
-
(range "#x10000-#xEFFFF")))
|
81
|
+
(range "#x10000-#xEFFFF")) )
|
83
82
|
(terminal PN_CHARS_U "164s" (alt PN_CHARS_BASE "_"))
|
84
83
|
(terminal PN_CHARS "166s"
|
85
84
|
(alt PN_CHARS_U "-"
|
86
85
|
(range "0-9")
|
87
86
|
(hex "#x00B7")
|
88
87
|
(range "#x0300-#x036F")
|
89
|
-
(range "#x203F-#x2040")))
|
88
|
+
(range "#x203F-#x2040")) )
|
90
89
|
(terminal PN_PREFIX "167s"
|
91
90
|
(seq PN_CHARS_BASE (opt (seq (star (alt PN_CHARS ".")) PN_CHARS))))
|
92
91
|
(terminal PN_LOCAL "168s"
|
93
|
-
(seq
|
92
|
+
(seq
|
94
93
|
(alt PN_CHARS_U ":" (range "0-9") PLX)
|
95
|
-
(opt (seq (star (alt PN_CHARS "." ":" PLX)) (alt PN_CHARS ":" PLX)))))
|
94
|
+
(opt (seq (star (alt PN_CHARS "." ":" PLX)) (alt PN_CHARS ":" PLX)))) )
|
96
95
|
(terminal PLX "169s" (alt PERCENT PN_LOCAL_ESC))
|
97
96
|
(terminal PERCENT "170s" (seq "%" HEX HEX))
|
98
97
|
(terminal HEX "171s" (alt (range "0-9") (range "A-F") (range "a-f")))
|
99
98
|
(terminal PN_LOCAL_ESC "172s"
|
100
99
|
(seq "\\"
|
101
|
-
(alt
|
102
|
-
|
100
|
+
(alt "_" "~" "." "-" "!" "$" "&" "'" "(" ")" "*" "+" "," ";" "=" "/" "?" "#"
|
101
|
+
"@" "%" )) ))
|
data/lib/ebnf/base.rb
CHANGED
@@ -119,30 +119,44 @@ module EBNF
|
|
119
119
|
# @param [Hash{Symbol => Object}] options
|
120
120
|
# @option options [Boolean, Array] :debug
|
121
121
|
# Output debug information to an array or STDOUT.
|
122
|
+
# @option options [Symbol] :format (:ebnf)
|
123
|
+
# Format of input, one of :ebnf, or :sxp
|
122
124
|
def initialize(input, options = {})
|
123
|
-
@options = options
|
125
|
+
@options = {:format => :ebnf}.merge(options)
|
124
126
|
@lineno, @depth, @errors = 1, 0, []
|
125
127
|
terminal = false
|
126
128
|
@ast = []
|
127
129
|
|
128
130
|
input = input.respond_to?(:read) ? input.read : input.to_s
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
131
|
+
|
132
|
+
case @options[:format]
|
133
|
+
when :sxp
|
134
|
+
require 'sxp' unless defined?(SXP)
|
135
|
+
@ast = SXP::Reader::Basic.read(input).map {|e| Rule.from_sxp(e)}
|
136
|
+
when :ebnf
|
137
|
+
scanner = StringScanner.new(input)
|
138
|
+
|
139
|
+
eachRule(scanner) do |r|
|
140
|
+
debug("rule string") {r.inspect}
|
141
|
+
case r
|
142
|
+
when /^@terminals/
|
143
|
+
# Switch mode to parsing terminals
|
144
|
+
terminal = true
|
145
|
+
when /^@pass\s*(.*)$/m
|
146
|
+
expr = expression($1).first
|
147
|
+
rule = Rule.new(nil, nil, expr, :kind => :pass)
|
148
|
+
rule.orig = expr
|
149
|
+
@ast << rule
|
150
|
+
else
|
151
|
+
rule = depth {ruleParts(r)}
|
152
|
+
|
153
|
+
rule.kind = :terminal if terminal # Override after we've parsed @terminals
|
154
|
+
rule.orig = r
|
155
|
+
@ast << rule
|
156
|
+
end
|
145
157
|
end
|
158
|
+
else
|
159
|
+
raise "unknown input format #{options[:format].inspect}"
|
146
160
|
end
|
147
161
|
end
|
148
162
|
|
@@ -158,6 +172,7 @@ module EBNF
|
|
158
172
|
# Write out parsed syntax string as an S-Expression
|
159
173
|
# @return [String]
|
160
174
|
def to_sxp
|
175
|
+
require 'sxp' unless defined?(SXP)
|
161
176
|
SXP::Generator.string(ast.sort_by{|r| r.id.to_f}.map(&:for_sxp))
|
162
177
|
end
|
163
178
|
def to_s; to_sxp; end
|
data/lib/ebnf/ll1.rb
CHANGED
@@ -21,12 +21,21 @@ module EBNF
|
|
21
21
|
attr_reader :follow
|
22
22
|
|
23
23
|
# Terminal table
|
24
|
+
#
|
24
25
|
# The list of terminals used in the grammar.
|
25
26
|
#
|
26
27
|
# @return [Array<String, Symbol>]
|
27
28
|
attr_reader :terminals
|
28
29
|
|
30
|
+
# Pass expression
|
31
|
+
#
|
32
|
+
# A Terminal symbol used for skipping whitespace and comments
|
33
|
+
#
|
34
|
+
# @return [Symbol, String]
|
35
|
+
attr_reader :pass
|
36
|
+
|
29
37
|
# Start symbol
|
38
|
+
#
|
30
39
|
# The rule which starts the grammar
|
31
40
|
#
|
32
41
|
# @return [Symbol]
|
@@ -69,7 +78,6 @@ module EBNF
|
|
69
78
|
|
70
79
|
@ast += comprehensions
|
71
80
|
progress("FF.c") {"(#{ittr}) comprehensions #{comprehensions.length}"}
|
72
|
-
#require 'debugger'; breakpoint
|
73
81
|
ittr += 1
|
74
82
|
end while !comprehensions.empty?
|
75
83
|
|
@@ -128,7 +136,6 @@ module EBNF
|
|
128
136
|
end
|
129
137
|
|
130
138
|
# If there is no comprehension of this rule (meaning, it is a sequence of one non-terminal), then the follows of the non-terminal include the follows of the rule. This handles rules with multiple sequences because it will have a comprehension that includes the last element in the sequence
|
131
|
-
#require 'debugger'; breakpoint if ai.sym == :_predicateObjectList_1 && aj.sym == :_predicateObjectList_7
|
132
139
|
if !aj.comp && aj.follow
|
133
140
|
debug("Fo.2.1a") {"(#{ittr}) add follow #{aj.follow.inspect} from #{aj.sym} to #{ai.sym}"}
|
134
141
|
follows += ai.add_follow(aj.follow)
|
@@ -142,7 +149,6 @@ module EBNF
|
|
142
149
|
end
|
143
150
|
|
144
151
|
# Since the rules are of the form wAiw', and we've handled the case which is just Aiw', this leaves those cases that have rules prior to Ai. This basically says that the follows of a rule are added to the follows of the comprehension of the rule
|
145
|
-
#require 'debugger'; breakpoint if aj.sym == :_predicateObjectList_6 && aj.follow
|
146
152
|
if aj.comp && aj.follow
|
147
153
|
debug("Fo.2.3") {"(#{ittr}) add follow #{aj.follow.inspect} from #{aj.sym} to #{aj.comp.sym}"}
|
148
154
|
follows += aj.comp.add_follow(aj.follow)
|
@@ -175,11 +181,26 @@ module EBNF
|
|
175
181
|
memo[r.sym] = r.follow if r.follow
|
176
182
|
memo
|
177
183
|
}
|
178
|
-
@terminals = ast.map
|
179
|
-
(r.first || []) + (r.follow || [])
|
180
|
-
end.flatten.uniq
|
184
|
+
@terminals = ast.map {|r| Array(r.first) + Array(r.follow)}.flatten.uniq
|
181
185
|
@terminals = (@terminals - [:_eps, :_eof]).sort_by{|t| t.to_s.sub(/^_/, '')}
|
182
186
|
|
187
|
+
# FIXME: assumes that this is a (seq :PASS), or similar
|
188
|
+
if pass = ast.detect {|r| r.pass?}
|
189
|
+
@pass = pass.expr.last
|
190
|
+
end
|
191
|
+
|
192
|
+
# If a generated terminal is found, this indicates an error, as this version does not automatically generate regular expressions for automatic terminals
|
193
|
+
@terminals.
|
194
|
+
select {|t| t.to_s.start_with?("_")}.
|
195
|
+
reject {|t| t.to_s.start_with?("_pass_")}. # Concession to backwards compatibility
|
196
|
+
each do |term|
|
197
|
+
|
198
|
+
error("build_tables",
|
199
|
+
"terminal #{term} is automatically generated; " +
|
200
|
+
"regular expressions are not yet generated and parsing " +
|
201
|
+
"is not supported")
|
202
|
+
end
|
203
|
+
|
183
204
|
@branch = {}
|
184
205
|
@already = []
|
185
206
|
@agenda = []
|
@@ -256,10 +277,18 @@ module EBNF
|
|
256
277
|
end
|
257
278
|
|
258
279
|
if rule.alt?
|
280
|
+
# A First/Follow conflict appears when _eps is in the first
|
281
|
+
# of one rule and there is a token in the first and
|
282
|
+
# follow of the same rule
|
283
|
+
if rule.first.include?(:_eps) && !(overlap = ((rule.first & rule.follow) - [:eps])).empty?
|
284
|
+
error("First/Follow Conflict: #{overlap.first.inspect} is both first and follow of #{rule.sym}")
|
285
|
+
end
|
286
|
+
|
259
287
|
# Add entries for each alternative, based on the alternative's first/seq
|
260
288
|
rule.expr[1..-1].each do |prod|
|
261
289
|
prod_rule = find_rule(prod)
|
262
290
|
debug(" Alt", prod)
|
291
|
+
|
263
292
|
@agenda << prod unless @already.include?(prod) || @agenda.include?(prod)
|
264
293
|
if prod == :_empty
|
265
294
|
debug(" empty")
|
@@ -269,9 +298,12 @@ module EBNF
|
|
269
298
|
branchDict[prod] = [prod]
|
270
299
|
else
|
271
300
|
prod_rule.first.reject{|f| f == :_eps}.each do |f|
|
301
|
+
# A First/First conflict appears when there are two rules having
|
302
|
+
# the same first, so the parser can't know which one to choose.
|
272
303
|
if branchDict.has_key?(f)
|
273
|
-
error("First/First Conflict: #{f} is also the condition for #{branchDict[f]}")
|
304
|
+
error("First/First Conflict: #{f.inspect} is also the condition for #{branchDict[f].first}")
|
274
305
|
end
|
306
|
+
|
275
307
|
debug(" alt") {"[#{f}] => #{prod}"}
|
276
308
|
branchDict[f] = [prod]
|
277
309
|
end
|
data/lib/ebnf/ll1/lexer.rb
CHANGED
@@ -47,19 +47,11 @@ module EBNF::LL1
|
|
47
47
|
ESCAPE_CHAR8 = /\\U(?:[0-9A-Fa-f]{8,8})/.freeze # \UXXXXXXXX
|
48
48
|
ECHAR = /\\./ # More liberal unescaping
|
49
49
|
UCHAR = /#{ESCAPE_CHAR4}|#{ESCAPE_CHAR8}/.freeze
|
50
|
-
COMMENT = /#.*/.freeze
|
51
|
-
WS = / |\t|\r|\n/m.freeze
|
52
|
-
|
53
|
-
ML_START = /\'\'\'|\"\"\"/.freeze # Beginning of terminals that may span lines
|
54
50
|
|
55
51
|
##
|
56
|
-
# @return [Regexp] defines whitespace,
|
52
|
+
# @return [Regexp] defines whitespace, including comments, otherwise whitespace must be explicit in terminals
|
57
53
|
attr_reader :whitespace
|
58
54
|
|
59
|
-
##
|
60
|
-
# @return [Regexp] defines single-line comment, defaults to COMMENT
|
61
|
-
attr_reader :comment
|
62
|
-
|
63
55
|
##
|
64
56
|
# Returns a copy of the given `input` string with all `\uXXXX` and
|
65
57
|
# `\UXXXXXXXX` Unicode codepoint escape sequences replaced with their
|
@@ -119,13 +111,11 @@ module EBNF::LL1
|
|
119
111
|
# Array of symbol, regexp pairs used to match terminals.
|
120
112
|
# If the symbol is nil, it defines a Regexp to match string terminals.
|
121
113
|
# @param [Hash{Symbol => Object}] options
|
122
|
-
# @option options [Regexp] :whitespace
|
123
|
-
#
|
124
|
-
# @option options [Regexp] :comment (COMMENT)
|
114
|
+
# @option options [Regexp] :whitespace
|
115
|
+
# Whitespace between tokens, including comments
|
125
116
|
def initialize(input = nil, terminals = nil, options = {})
|
126
117
|
@options = options.dup
|
127
|
-
@whitespace = @options[:whitespace]
|
128
|
-
@comment = @options[:comment] || COMMENT
|
118
|
+
@whitespace = @options[:whitespace]
|
129
119
|
@terminals = terminals.map do |term|
|
130
120
|
term.is_a?(Array) ? Terminal.new(*term) : term
|
131
121
|
end
|
@@ -199,7 +189,7 @@ module EBNF::LL1
|
|
199
189
|
token = match_token
|
200
190
|
|
201
191
|
if token.nil?
|
202
|
-
lexme = (scanner.rest.split(
|
192
|
+
lexme = (scanner.rest.split(@whitespace || /\s/).first rescue nil) || scanner.rest
|
203
193
|
raise Error.new("Invalid token #{lexme[0..100].inspect}",
|
204
194
|
:input => scanner.rest[0..100], :token => lexme, :lineno => lineno)
|
205
195
|
end
|
@@ -232,7 +222,7 @@ module EBNF::LL1
|
|
232
222
|
# @return [Token]
|
233
223
|
def recover
|
234
224
|
until scanner.eos? || tok = match_token
|
235
|
-
if scanner.skip_until(@whitespace).nil? # Skip past current "token"
|
225
|
+
if scanner.skip_until(@whitespace || /\s/m).nil? # Skip past current "token"
|
236
226
|
# No whitespace at the end, must be and end of string
|
237
227
|
scanner.terminate
|
238
228
|
else
|
@@ -248,14 +238,12 @@ module EBNF::LL1
|
|
248
238
|
attr_reader :scanner
|
249
239
|
|
250
240
|
##
|
251
|
-
# Skip whitespace
|
241
|
+
# Skip whitespace, as defined through input options or defaults
|
252
242
|
def skip_whitespace
|
253
243
|
# skip all white space, but keep track of the current line number
|
254
|
-
while !scanner.eos?
|
244
|
+
while @whitespace && !scanner.eos?
|
255
245
|
if matched = scanner.scan(@whitespace)
|
256
246
|
@lineno += matched.count("\n")
|
257
|
-
elsif (scanner.scan(@comment))
|
258
|
-
#
|
259
247
|
else
|
260
248
|
return
|
261
249
|
end
|
data/lib/ebnf/ll1/parser.rb
CHANGED
@@ -219,6 +219,10 @@ module EBNF::LL1
|
|
219
219
|
# @see http://cs.adelaide.edu.au/~charles/lt/Lectures/07-ErrorRecovery.pdf
|
220
220
|
def parse(input = nil, start = nil, options = {}, &block)
|
221
221
|
@options = options.dup
|
222
|
+
@options[:debug] ||= case
|
223
|
+
when @options[:progress] then 2
|
224
|
+
when @options[:validate] then 1
|
225
|
+
end
|
222
226
|
@branch = options[:branch]
|
223
227
|
@first = options[:first] ||= {}
|
224
228
|
@follow = options[:follow] ||= {}
|
data/lib/ebnf/parser.rb
CHANGED
@@ -61,7 +61,7 @@ module EBNF
|
|
61
61
|
num_sym, expr = rule.split('::=', 2).map(&:strip)
|
62
62
|
num, sym = num_sym.split(']', 2).map(&:strip)
|
63
63
|
num = num[1..-1]
|
64
|
-
r = Rule.new(sym && sym.to_sym, num,
|
64
|
+
r = Rule.new(sym && sym.to_sym, num, expression(expr).first, :ebnf => self)
|
65
65
|
debug("ruleParts") { r.inspect }
|
66
66
|
r
|
67
67
|
end
|
@@ -70,40 +70,44 @@ module EBNF
|
|
70
70
|
# Parse a string into an expression tree and a remaining string
|
71
71
|
#
|
72
72
|
# @example
|
73
|
-
# >>>
|
74
|
-
# ((seq
|
73
|
+
# >>> expression("a b c")
|
74
|
+
# ((seq a b c) '')
|
75
75
|
#
|
76
|
-
# >>>
|
77
|
-
# ((seq
|
76
|
+
# >>> expression("a? b+ c*")
|
77
|
+
# ((seq (opt a) (plus b) (star c)) '')
|
78
78
|
#
|
79
|
-
# >>>
|
80
|
-
# ((alt
|
79
|
+
# >>> expression(" | x xlist")
|
80
|
+
# ((alt (seq) (seq x xlist)) '')
|
81
81
|
#
|
82
|
-
# >>>
|
83
|
-
# ((alt
|
82
|
+
# >>> expression("a | (b - c)")
|
83
|
+
# ((alt a (diff b c)) '')
|
84
84
|
#
|
85
|
-
# >>>
|
86
|
-
# ((alt
|
85
|
+
# >>> expression("a b | c d")
|
86
|
+
# ((alt (seq a b) (seq c d)) '')
|
87
87
|
#
|
88
|
-
# >>>
|
89
|
-
# ((alt
|
88
|
+
# >>> expression("a | b | c")
|
89
|
+
# ((alt a b c) '')
|
90
90
|
#
|
91
|
-
# >>>
|
92
|
-
# (
|
91
|
+
# >>> expression("a) b c")
|
92
|
+
# (a ' b c')
|
93
93
|
#
|
94
|
-
# >>>
|
95
|
-
# ((seq
|
94
|
+
# >>> expression("BaseDecl? PrefixDecl*")
|
95
|
+
# ((seq (opt BaseDecl) (star PrefixDecl)) '')
|
96
96
|
#
|
97
|
-
# >>>
|
98
|
-
# ((alt
|
97
|
+
# >>> expression("NCCHAR1 | diff | [0-9] | #x00B7 | [#x0300-#x036F] | \[#x203F-#x2040\]")
|
98
|
+
# ((alt NCCHAR1 diff
|
99
|
+
# (range '0-9')
|
100
|
+
# (hex '#x00B7')
|
101
|
+
# (range '#x0300-#x036F')
|
102
|
+
# (range, '#x203F-#x2040')) '')
|
99
103
|
#
|
100
104
|
# @param [String] s
|
101
105
|
# @return [Array]
|
102
|
-
def
|
103
|
-
debug("
|
106
|
+
def expression(s)
|
107
|
+
debug("expression") {"(#{s.inspect})"}
|
104
108
|
e, s = depth {alt(s)}
|
105
109
|
debug {"=> alt returned #{[e, s].inspect}"}
|
106
|
-
unless s.empty?
|
110
|
+
unless s.to_s.empty?
|
107
111
|
t, ss = depth {terminal(s)}
|
108
112
|
debug {"=> terminal returned #{[t, ss].inspect}"}
|
109
113
|
return [e, ss] if t.is_a?(Array) && t.first == :")"
|
@@ -114,13 +118,13 @@ module EBNF
|
|
114
118
|
##
|
115
119
|
# Parse alt
|
116
120
|
# >>> alt("a | b | c")
|
117
|
-
# ((alt
|
121
|
+
# ((alt a b c) '')
|
118
122
|
# @param [String] s
|
119
123
|
# @return [Array]
|
120
124
|
def alt(s)
|
121
125
|
debug("alt") {"(#{s.inspect})"}
|
122
126
|
args = []
|
123
|
-
while !s.empty?
|
127
|
+
while !s.to_s.empty?
|
124
128
|
e, s = depth {seq(s)}
|
125
129
|
debug {"=> seq returned #{[e, s].inspect}"}
|
126
130
|
if e.to_s.empty?
|
@@ -128,7 +132,7 @@ module EBNF
|
|
128
132
|
e = [:seq, []] # empty sequence
|
129
133
|
end
|
130
134
|
args << e
|
131
|
-
unless s.empty?
|
135
|
+
unless s.to_s.empty?
|
132
136
|
t, ss = depth {terminal(s)}
|
133
137
|
break unless t[0] == :alt
|
134
138
|
s = ss
|
@@ -141,14 +145,14 @@ module EBNF
|
|
141
145
|
# parse seq
|
142
146
|
#
|
143
147
|
# >>> seq("a b c")
|
144
|
-
# ((seq
|
148
|
+
# ((seq a b c) '')
|
145
149
|
#
|
146
150
|
# >>> seq("a b? c")
|
147
|
-
# ((seq
|
151
|
+
# ((seq a (opt b) c) '')
|
148
152
|
def seq(s)
|
149
153
|
debug("seq") {"(#{s.inspect})"}
|
150
154
|
args = []
|
151
|
-
while !s.empty?
|
155
|
+
while !s.to_s.empty?
|
152
156
|
e, ss = depth {diff(s)}
|
153
157
|
debug {"=> diff returned #{[e, ss].inspect}"}
|
154
158
|
unless e.to_s.empty?
|
@@ -171,13 +175,13 @@ module EBNF
|
|
171
175
|
# parse diff
|
172
176
|
#
|
173
177
|
# >>> diff("a - b")
|
174
|
-
# ((diff
|
178
|
+
# ((diff a b) '')
|
175
179
|
def diff(s)
|
176
180
|
debug("diff") {"(#{s.inspect})"}
|
177
181
|
e1, s = depth {postfix(s)}
|
178
182
|
debug {"=> postfix returned #{[e1, s].inspect}"}
|
179
183
|
unless e1.to_s.empty?
|
180
|
-
unless s.empty?
|
184
|
+
unless s.to_s.empty?
|
181
185
|
t, ss = depth {terminal(s)}
|
182
186
|
debug {"diff #{[t, ss].inspect}"}
|
183
187
|
if t.is_a?(Array) && t.first == :diff
|
@@ -199,16 +203,16 @@ module EBNF
|
|
199
203
|
# parse postfix
|
200
204
|
#
|
201
205
|
# >>> postfix("a b c")
|
202
|
-
# (
|
206
|
+
# (a ' b c')
|
203
207
|
#
|
204
208
|
# >>> postfix("a? b c")
|
205
|
-
# ((opt,
|
209
|
+
# ((opt, a) ' b c')
|
206
210
|
def postfix(s)
|
207
211
|
debug("postfix") {"(#{s.inspect})"}
|
208
212
|
e, s = depth {primary(s)}
|
209
213
|
debug {"=> primary returned #{[e, s].inspect}"}
|
210
214
|
return ["", s] if e.to_s.empty?
|
211
|
-
if !s.empty?
|
215
|
+
if !s.to_s.empty?
|
212
216
|
t, ss = depth {terminal(s)}
|
213
217
|
debug {"=> #{[t, ss].inspect}"}
|
214
218
|
if t.is_a?(Array) && [:opt, :star, :plus].include?(t.first)
|
@@ -222,7 +226,7 @@ module EBNF
|
|
222
226
|
# parse primary
|
223
227
|
#
|
224
228
|
# >>> primary("a b c")
|
225
|
-
# (
|
229
|
+
# (a ' b c')
|
226
230
|
def primary(s)
|
227
231
|
debug("primary") {"(#{s.inspect})"}
|
228
232
|
t, s = depth {terminal(s)}
|
@@ -232,8 +236,8 @@ module EBNF
|
|
232
236
|
elsif %w(range hex).map(&:to_sym).include?(t.first)
|
233
237
|
[t, s]
|
234
238
|
elsif t.first == :"("
|
235
|
-
e, s = depth {
|
236
|
-
debug {"=>
|
239
|
+
e, s = depth {expression(s)}
|
240
|
+
debug {"=> expression returned #{[e, s].inspect}"}
|
237
241
|
[e, s]
|
238
242
|
else
|
239
243
|
["", s]
|
@@ -248,34 +252,34 @@ module EBNF
|
|
248
252
|
#
|
249
253
|
# @example
|
250
254
|
# >>> terminal("'abc' def")
|
251
|
-
# (
|
255
|
+
# ('abc' ' def')
|
252
256
|
#
|
253
257
|
# >>> terminal("[0-9]")
|
254
|
-
# ((range
|
258
|
+
# ((range '0-9') '')
|
255
259
|
# >>> terminal("#x00B7")
|
256
|
-
# ((hex
|
260
|
+
# ((hex '#x00B7') '')
|
257
261
|
# >>> terminal ("\[#x0300-#x036F\]")
|
258
|
-
# ((range
|
262
|
+
# ((range '#x0300-#x036F') '')
|
259
263
|
# >>> terminal("\[^<>'{}|^`\]-\[#x00-#x20\]")
|
260
|
-
# ((range
|
264
|
+
# ((range "^<>'{}|^`") '-\[#x00-#x20\]')
|
261
265
|
def terminal(s)
|
262
266
|
s = s.strip
|
263
267
|
case m = s[0,1]
|
264
|
-
when '"', "'"
|
265
|
-
l, s = s[1..-1].split(m, 2)
|
266
|
-
[l, s]
|
267
|
-
when '['
|
268
|
-
l, s = s[1..-1].split(
|
269
|
-
[[:range, l], s]
|
270
|
-
when '#'
|
268
|
+
when '"', "'" # STRING1 or STRING2 Terminated by line-end or whitespace
|
269
|
+
l, s = s[1..-1].split(m.rstrip , 2)
|
270
|
+
[LL1::Lexer.unescape_string(l), s]
|
271
|
+
when '[' # ENUM, RANGE, O_ENUM, or O_RANGE
|
272
|
+
l, s = s[1..-1].split(/(?<=[^\\])\]/, 2)
|
273
|
+
[[:range, LL1::Lexer.unescape_string(l)], s]
|
274
|
+
when '#' # HEX
|
271
275
|
s.match(/(#\w+)(.*)$/)
|
272
276
|
l, s = $1, $2
|
273
277
|
[[:hex, l], s]
|
274
|
-
when /[\w\.]/
|
278
|
+
when /[\w\.]/ # SYMBOL
|
275
279
|
s.match(/(\w+)(.*)$/)
|
276
280
|
l, s = $1, $2
|
277
281
|
[l.to_sym, s]
|
278
|
-
when '@'
|
282
|
+
when '@' # @pass or @terminals
|
279
283
|
s.match(/@(#\w+)(.*)$/)
|
280
284
|
l, s = $1, $2
|
281
285
|
[[:"@", l], s]
|
@@ -289,7 +293,7 @@ module EBNF
|
|
289
293
|
[[:plus], s[1..-1]]
|
290
294
|
when '*'
|
291
295
|
[[:star], s[1..-1]]
|
292
|
-
when /[\(\)]/
|
296
|
+
when /[\(\)]/ # '(' or ')'
|
293
297
|
[[m.to_sym], s[1..-1]]
|
294
298
|
else
|
295
299
|
error("terminal", "unrecognized terminal: #{s.inspect}")
|