ebnf 0.3.1 → 0.3.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,14 +1,13 @@
1
-
2
- (
1
+ (
3
2
  (rule turtleDoc "1" (star statement))
4
3
  (rule statement "2" (alt directive (seq triples ".")))
5
4
  (rule directive "3" (alt prefixID base sparqlPrefix sparqlBase))
6
5
  (rule prefixID "4" (seq "@prefix" PNAME_NS IRIREF "."))
7
6
  (rule base "5" (seq "@base" IRIREF "."))
8
7
  (rule triples "6"
9
- (alt
8
+ (alt
10
9
  (seq subject predicateObjectList)
11
- (seq blankNodePropertyList (opt predicateObjectList))))
10
+ (seq blankNodePropertyList (opt predicateObjectList))) )
12
11
  (rule predicateObjectList "7"
13
12
  (seq verb objectList (star (seq ";" (opt (seq verb objectList))))))
14
13
  (rule objectList "8" (seq object (star (seq "," object))))
@@ -21,29 +20,29 @@
21
20
  (rule collection "15" (seq "(" (star object) ")"))
22
21
  (rule NumericLiteral "16" (alt INTEGER DECIMAL DOUBLE))
23
22
  (rule String "17"
24
- (alt
25
- STRING_LITERAL_QUOTE STRING_LITERAL_SINGLE_QUOTE STRING_LITERAL_LONG_SINGLE_QUOTE STRING_LITERAL_LONG_QUOTE ))
26
- (terminal IRIREF "18" (seq "<" (star (alt (range "^#x00-#x20<>\"{}|^`\\") UCHAR)) ">"))
23
+ (alt STRING_LITERAL_QUOTE STRING_LITERAL_SINGLE_QUOTE
24
+ STRING_LITERAL_LONG_SINGLE_QUOTE STRING_LITERAL_LONG_QUOTE ))
25
+ (terminal IRIREF "18" (seq "<" (range "^#x00-#x20<>\"{}|^`] | UCHAR)* '>'")))
27
26
  (terminal INTEGER "19" (seq (opt (range "+-")) (plus (range "0-9"))))
28
27
  (terminal DECIMAL "20"
29
28
  (seq (opt (range "+-")) (seq (star (range "0-9")) "." (plus (range "0-9")))))
30
29
  (terminal DOUBLE "21"
31
- (seq
30
+ (seq
32
31
  (opt (range "+-"))
33
- (alt
32
+ (alt
34
33
  (seq (plus (range "0-9")) "." (star (range "0-9")) EXPONENT)
35
34
  (seq "." (plus (range "0-9")) EXPONENT)
36
- (seq (plus (range "0-9")) EXPONENT))))
35
+ (seq (plus (range "0-9")) EXPONENT)) ))
37
36
  (terminal STRING_LITERAL_QUOTE "22"
38
37
  (seq "\"" (star (alt (range "^#x22#x5C#xA#xD") ECHAR UCHAR)) "\""))
39
38
  (terminal STRING_LITERAL_SINGLE_QUOTE "23"
40
39
  (seq "'" (star (alt (range "^#x27#x5C#xA#xD") ECHAR UCHAR)) "'"))
41
40
  (terminal STRING_LITERAL_LONG_SINGLE_QUOTE "24"
42
- (seq "'''" (star (seq (opt (alt "'" "''")) (alt (range "^'\\") ECHAR UCHAR))) "'''"))
41
+ (seq "'''" (seq (opt (alt "'" "''")) (range "^'] | ECHAR | UCHAR ))* \"'''\""))))
43
42
  (terminal STRING_LITERAL_LONG_QUOTE "25"
44
- (seq "\"\"\"" (star (seq (opt (alt "\"" "\"\"")) (alt (range "^\"\\") ECHAR UCHAR))) "\"\"\""))
43
+ (seq "\"\"\"" (seq (opt (alt "\"" "\"\"")) (range "^\"] | ECHAR | UCHAR ))* '\"\"\"'"))))
45
44
  (terminal UCHAR "26"
46
- (alt (seq "\\u" HEX HEX HEX HEX) (seq "\\U" HEX HEX HEX HEX HEX HEX HEX HEX)))
45
+ (alt (seq "u" HEX HEX HEX HEX) (seq "U" HEX HEX HEX HEX HEX HEX HEX HEX)))
47
46
  (rule sparqlPrefix "28s" (seq SPARQL_PREFIX PNAME_NS IRIREF))
48
47
  (terminal SPARQL_PREFIX "28t"
49
48
  (seq (range "Pp") (range "Rr") (range "Ee") (range "Ff") (range "Ii") (range "Xx")))
@@ -61,11 +60,11 @@
61
60
  (terminal LANGTAG "144s"
62
61
  (seq "@" (plus (range "a-zA-Z")) (star (seq "-" (plus (range "a-zA-Z0-9"))))))
63
62
  (terminal EXPONENT "154s" (seq (range "eE") (opt (range "+-")) (plus (range "0-9"))))
64
- (terminal ECHAR "159s" (seq "\\" (range "tbnrf\\\"'")))
63
+ (terminal ECHAR "159s" (seq "\\" (range "tbnrf\"'")))
65
64
  (terminal WS "161s" (alt (hex "#x20") (hex "#x9") (hex "#xD") (hex "#xA")))
66
65
  (terminal ANON "162s" (seq "[" (star WS) "]"))
67
66
  (terminal PN_CHARS_BASE "163s"
68
- (alt
67
+ (alt
69
68
  (range "A-Z")
70
69
  (range "a-z")
71
70
  (range "#x00C0-#x00D6")
@@ -79,24 +78,24 @@
79
78
  (range "#x3001-#xD7FF")
80
79
  (range "#xF900-#xFDCF")
81
80
  (range "#xFDF0-#xFFFD")
82
- (range "#x10000-#xEFFFF")))
81
+ (range "#x10000-#xEFFFF")) )
83
82
  (terminal PN_CHARS_U "164s" (alt PN_CHARS_BASE "_"))
84
83
  (terminal PN_CHARS "166s"
85
84
  (alt PN_CHARS_U "-"
86
85
  (range "0-9")
87
86
  (hex "#x00B7")
88
87
  (range "#x0300-#x036F")
89
- (range "#x203F-#x2040")))
88
+ (range "#x203F-#x2040")) )
90
89
  (terminal PN_PREFIX "167s"
91
90
  (seq PN_CHARS_BASE (opt (seq (star (alt PN_CHARS ".")) PN_CHARS))))
92
91
  (terminal PN_LOCAL "168s"
93
- (seq
92
+ (seq
94
93
  (alt PN_CHARS_U ":" (range "0-9") PLX)
95
- (opt (seq (star (alt PN_CHARS "." ":" PLX)) (alt PN_CHARS ":" PLX)))))
94
+ (opt (seq (star (alt PN_CHARS "." ":" PLX)) (alt PN_CHARS ":" PLX)))) )
96
95
  (terminal PLX "169s" (alt PERCENT PN_LOCAL_ESC))
97
96
  (terminal PERCENT "170s" (seq "%" HEX HEX))
98
97
  (terminal HEX "171s" (alt (range "0-9") (range "A-F") (range "a-f")))
99
98
  (terminal PN_LOCAL_ESC "172s"
100
99
  (seq "\\"
101
- (alt
102
- "_" "~" "." "-" "!" "$" "&" "'" "(" ")" "*" "+" "," ";" "=" "/" "?" "#" "@" "%" ))))
100
+ (alt "_" "~" "." "-" "!" "$" "&" "'" "(" ")" "*" "+" "," ";" "=" "/" "?" "#"
101
+ "@" "%" )) ))
@@ -119,30 +119,44 @@ module EBNF
119
119
  # @param [Hash{Symbol => Object}] options
120
120
  # @option options [Boolean, Array] :debug
121
121
  # Output debug information to an array or STDOUT.
122
+ # @option options [Symbol] :format (:ebnf)
123
+ # Format of input, one of :ebnf, or :sxp
122
124
  def initialize(input, options = {})
123
- @options = options
125
+ @options = {:format => :ebnf}.merge(options)
124
126
  @lineno, @depth, @errors = 1, 0, []
125
127
  terminal = false
126
128
  @ast = []
127
129
 
128
130
  input = input.respond_to?(:read) ? input.read : input.to_s
129
- scanner = StringScanner.new(input)
130
-
131
- eachRule(scanner) do |r|
132
- debug("rule string") {r.inspect}
133
- case r
134
- when /^@terminals/
135
- # Switch mode to parsing terminals
136
- terminal = true
137
- when /^@pass\s*(.*)$/m
138
- # Ignore, as we can't use this for processing the EBNF grammar itself
139
- else
140
- rule = depth {ruleParts(r)}
141
-
142
- rule.kind = :terminal if terminal # Override after we've parsed @terminals
143
- rule.orig = r
144
- @ast << rule
131
+
132
+ case @options[:format]
133
+ when :sxp
134
+ require 'sxp' unless defined?(SXP)
135
+ @ast = SXP::Reader::Basic.read(input).map {|e| Rule.from_sxp(e)}
136
+ when :ebnf
137
+ scanner = StringScanner.new(input)
138
+
139
+ eachRule(scanner) do |r|
140
+ debug("rule string") {r.inspect}
141
+ case r
142
+ when /^@terminals/
143
+ # Switch mode to parsing terminals
144
+ terminal = true
145
+ when /^@pass\s*(.*)$/m
146
+ expr = expression($1).first
147
+ rule = Rule.new(nil, nil, expr, :kind => :pass)
148
+ rule.orig = expr
149
+ @ast << rule
150
+ else
151
+ rule = depth {ruleParts(r)}
152
+
153
+ rule.kind = :terminal if terminal # Override after we've parsed @terminals
154
+ rule.orig = r
155
+ @ast << rule
156
+ end
145
157
  end
158
+ else
159
+ raise "unknown input format #{options[:format].inspect}"
146
160
  end
147
161
  end
148
162
 
@@ -158,6 +172,7 @@ module EBNF
158
172
  # Write out parsed syntax string as an S-Expression
159
173
  # @return [String]
160
174
  def to_sxp
175
+ require 'sxp' unless defined?(SXP)
161
176
  SXP::Generator.string(ast.sort_by{|r| r.id.to_f}.map(&:for_sxp))
162
177
  end
163
178
  def to_s; to_sxp; end
@@ -21,12 +21,21 @@ module EBNF
21
21
  attr_reader :follow
22
22
 
23
23
  # Terminal table
24
+ #
24
25
  # The list of terminals used in the grammar.
25
26
  #
26
27
  # @return [Array<String, Symbol>]
27
28
  attr_reader :terminals
28
29
 
30
+ # Pass expression
31
+ #
32
+ # A Terminal symbol used for skipping whitespace and comments
33
+ #
34
+ # @return [Symbol, String]
35
+ attr_reader :pass
36
+
29
37
  # Start symbol
38
+ #
30
39
  # The rule which starts the grammar
31
40
  #
32
41
  # @return [Symbol]
@@ -69,7 +78,6 @@ module EBNF
69
78
 
70
79
  @ast += comprehensions
71
80
  progress("FF.c") {"(#{ittr}) comprehensions #{comprehensions.length}"}
72
- #require 'debugger'; breakpoint
73
81
  ittr += 1
74
82
  end while !comprehensions.empty?
75
83
 
@@ -128,7 +136,6 @@ module EBNF
128
136
  end
129
137
 
130
138
  # If there is no comprehension of this rule (meaning, it is a sequence of one non-terminal), then the follows of the non-terminal include the follows of the rule. This handles rules with multiple sequences because it will have a comprehension that includes the last element in the sequence
131
- #require 'debugger'; breakpoint if ai.sym == :_predicateObjectList_1 && aj.sym == :_predicateObjectList_7
132
139
  if !aj.comp && aj.follow
133
140
  debug("Fo.2.1a") {"(#{ittr}) add follow #{aj.follow.inspect} from #{aj.sym} to #{ai.sym}"}
134
141
  follows += ai.add_follow(aj.follow)
@@ -142,7 +149,6 @@ module EBNF
142
149
  end
143
150
 
144
151
  # Since the rules are of the form wAiw', and we've handled the case which is just Aiw', this leaves those cases that have rules prior to Ai. This basically says that the follows of a rule are added to the follows of the comprehension of the rule
145
- #require 'debugger'; breakpoint if aj.sym == :_predicateObjectList_6 && aj.follow
146
152
  if aj.comp && aj.follow
147
153
  debug("Fo.2.3") {"(#{ittr}) add follow #{aj.follow.inspect} from #{aj.sym} to #{aj.comp.sym}"}
148
154
  follows += aj.comp.add_follow(aj.follow)
@@ -175,11 +181,26 @@ module EBNF
175
181
  memo[r.sym] = r.follow if r.follow
176
182
  memo
177
183
  }
178
- @terminals = ast.map do |r|
179
- (r.first || []) + (r.follow || [])
180
- end.flatten.uniq
184
+ @terminals = ast.map {|r| Array(r.first) + Array(r.follow)}.flatten.uniq
181
185
  @terminals = (@terminals - [:_eps, :_eof]).sort_by{|t| t.to_s.sub(/^_/, '')}
182
186
 
187
+ # FIXME: assumes that this is a (seq :PASS), or similar
188
+ if pass = ast.detect {|r| r.pass?}
189
+ @pass = pass.expr.last
190
+ end
191
+
192
+ # If a generated terminal is found, this indicates an error, as this version does not automatically generate regular expressions for automatic terminals
193
+ @terminals.
194
+ select {|t| t.to_s.start_with?("_")}.
195
+ reject {|t| t.to_s.start_with?("_pass_")}. # Concession to backwards compatibility
196
+ each do |term|
197
+
198
+ error("build_tables",
199
+ "terminal #{term} is automatically generated; " +
200
+ "regular expressions are not yet generated and parsing " +
201
+ "is not supported")
202
+ end
203
+
183
204
  @branch = {}
184
205
  @already = []
185
206
  @agenda = []
@@ -256,10 +277,18 @@ module EBNF
256
277
  end
257
278
 
258
279
  if rule.alt?
280
+ # A First/Follow conflict appears when _eps is in the first
281
+ # of one rule and there is a token in the first and
282
+ # follow of the same rule
283
+ if rule.first.include?(:_eps) && !(overlap = ((rule.first & rule.follow) - [:eps])).empty?
284
+ error("First/Follow Conflict: #{overlap.first.inspect} is both first and follow of #{rule.sym}")
285
+ end
286
+
259
287
  # Add entries for each alternative, based on the alternative's first/seq
260
288
  rule.expr[1..-1].each do |prod|
261
289
  prod_rule = find_rule(prod)
262
290
  debug(" Alt", prod)
291
+
263
292
  @agenda << prod unless @already.include?(prod) || @agenda.include?(prod)
264
293
  if prod == :_empty
265
294
  debug(" empty")
@@ -269,9 +298,12 @@ module EBNF
269
298
  branchDict[prod] = [prod]
270
299
  else
271
300
  prod_rule.first.reject{|f| f == :_eps}.each do |f|
301
+ # A First/First conflict appears when there are two rules having
302
+ # the same first, so the parser can't know which one to choose.
272
303
  if branchDict.has_key?(f)
273
- error("First/First Conflict: #{f} is also the condition for #{branchDict[f]}")
304
+ error("First/First Conflict: #{f.inspect} is also the condition for #{branchDict[f].first}")
274
305
  end
306
+
275
307
  debug(" alt") {"[#{f}] => #{prod}"}
276
308
  branchDict[f] = [prod]
277
309
  end
@@ -47,19 +47,11 @@ module EBNF::LL1
47
47
  ESCAPE_CHAR8 = /\\U(?:[0-9A-Fa-f]{8,8})/.freeze # \UXXXXXXXX
48
48
  ECHAR = /\\./ # More liberal unescaping
49
49
  UCHAR = /#{ESCAPE_CHAR4}|#{ESCAPE_CHAR8}/.freeze
50
- COMMENT = /#.*/.freeze
51
- WS = / |\t|\r|\n/m.freeze
52
-
53
- ML_START = /\'\'\'|\"\"\"/.freeze # Beginning of terminals that may span lines
54
50
 
55
51
  ##
56
- # @return [Regexp] defines whitespace, defaults to WS
52
+ # @return [Regexp] defines whitespace, including comments, otherwise whitespace must be explicit in terminals
57
53
  attr_reader :whitespace
58
54
 
59
- ##
60
- # @return [Regexp] defines single-line comment, defaults to COMMENT
61
- attr_reader :comment
62
-
63
55
  ##
64
56
  # Returns a copy of the given `input` string with all `\uXXXX` and
65
57
  # `\UXXXXXXXX` Unicode codepoint escape sequences replaced with their
@@ -119,13 +111,11 @@ module EBNF::LL1
119
111
  # Array of symbol, regexp pairs used to match terminals.
120
112
  # If the symbol is nil, it defines a Regexp to match string terminals.
121
113
  # @param [Hash{Symbol => Object}] options
122
- # @option options [Regexp] :whitespace (WS)
123
- # Regular expression matching the beginning of terminals that may cross newlines
124
- # @option options [Regexp] :comment (COMMENT)
114
+ # @option options [Regexp] :whitespace
115
+ # Whitespace between tokens, including comments
125
116
  def initialize(input = nil, terminals = nil, options = {})
126
117
  @options = options.dup
127
- @whitespace = @options[:whitespace] || WS
128
- @comment = @options[:comment] || COMMENT
118
+ @whitespace = @options[:whitespace]
129
119
  @terminals = terminals.map do |term|
130
120
  term.is_a?(Array) ? Terminal.new(*term) : term
131
121
  end
@@ -199,7 +189,7 @@ module EBNF::LL1
199
189
  token = match_token
200
190
 
201
191
  if token.nil?
202
- lexme = (scanner.rest.split(/#{@whitespace}|#{@comment}/).first rescue nil) || scanner.rest
192
+ lexme = (scanner.rest.split(@whitespace || /\s/).first rescue nil) || scanner.rest
203
193
  raise Error.new("Invalid token #{lexme[0..100].inspect}",
204
194
  :input => scanner.rest[0..100], :token => lexme, :lineno => lineno)
205
195
  end
@@ -232,7 +222,7 @@ module EBNF::LL1
232
222
  # @return [Token]
233
223
  def recover
234
224
  until scanner.eos? || tok = match_token
235
- if scanner.skip_until(@whitespace).nil? # Skip past current "token"
225
+ if scanner.skip_until(@whitespace || /\s/m).nil? # Skip past current "token"
236
226
  # No whitespace at the end, must be and end of string
237
227
  scanner.terminate
238
228
  else
@@ -248,14 +238,12 @@ module EBNF::LL1
248
238
  attr_reader :scanner
249
239
 
250
240
  ##
251
- # Skip whitespace or comments, as defined through input options or defaults
241
+ # Skip whitespace, as defined through input options or defaults
252
242
  def skip_whitespace
253
243
  # skip all white space, but keep track of the current line number
254
- while !scanner.eos?
244
+ while @whitespace && !scanner.eos?
255
245
  if matched = scanner.scan(@whitespace)
256
246
  @lineno += matched.count("\n")
257
- elsif (scanner.scan(@comment))
258
- #
259
247
  else
260
248
  return
261
249
  end
@@ -219,6 +219,10 @@ module EBNF::LL1
219
219
  # @see http://cs.adelaide.edu.au/~charles/lt/Lectures/07-ErrorRecovery.pdf
220
220
  def parse(input = nil, start = nil, options = {}, &block)
221
221
  @options = options.dup
222
+ @options[:debug] ||= case
223
+ when @options[:progress] then 2
224
+ when @options[:validate] then 1
225
+ end
222
226
  @branch = options[:branch]
223
227
  @first = options[:first] ||= {}
224
228
  @follow = options[:follow] ||= {}
@@ -61,7 +61,7 @@ module EBNF
61
61
  num_sym, expr = rule.split('::=', 2).map(&:strip)
62
62
  num, sym = num_sym.split(']', 2).map(&:strip)
63
63
  num = num[1..-1]
64
- r = Rule.new(sym && sym.to_sym, num, ebnf(expr).first, :ebnf => self)
64
+ r = Rule.new(sym && sym.to_sym, num, expression(expr).first, :ebnf => self)
65
65
  debug("ruleParts") { r.inspect }
66
66
  r
67
67
  end
@@ -70,40 +70,44 @@ module EBNF
70
70
  # Parse a string into an expression tree and a remaining string
71
71
  #
72
72
  # @example
73
- # >>> ebnf("a b c")
74
- # ((seq, \[('id', 'a'), ('id', 'b'), ('id', 'c')\]), '')
73
+ # >>> expression("a b c")
74
+ # ((seq a b c) '')
75
75
  #
76
- # >>> ebnf("a? b+ c*")
77
- # ((seq, \[(opt, ('id', 'a')), (plus, ('id', 'b')), ('*', ('id', 'c'))\]), '')
76
+ # >>> expression("a? b+ c*")
77
+ # ((seq (opt a) (plus b) (star c)) '')
78
78
  #
79
- # >>> ebnf(" | x xlist")
80
- # ((alt, \[(seq, \[\]), (seq, \[('id', 'x'), ('id', 'xlist')\])\]), '')
79
+ # >>> expression(" | x xlist")
80
+ # ((alt (seq) (seq x xlist)) '')
81
81
  #
82
- # >>> ebnf("a | (b - c)")
83
- # ((alt, \[('id', 'a'), (diff, \[('id', 'b'), ('id', 'c')\])\]), '')
82
+ # >>> expression("a | (b - c)")
83
+ # ((alt a (diff b c)) '')
84
84
  #
85
- # >>> ebnf("a b | c d")
86
- # ((alt, \[(seq, \[('id', 'a'), ('id', 'b')\]), (seq, \[('id', 'c'), ('id', 'd')\])\]), '')
85
+ # >>> expression("a b | c d")
86
+ # ((alt (seq a b) (seq c d)) '')
87
87
  #
88
- # >>> ebnf("a | b | c")
89
- # ((alt, \[('id', 'a'), ('id', 'b'), ('id', 'c')\]), '')
88
+ # >>> expression("a | b | c")
89
+ # ((alt a b c) '')
90
90
  #
91
- # >>> ebnf("a) b c")
92
- # (('id', 'a'), ' b c')
91
+ # >>> expression("a) b c")
92
+ # (a ' b c')
93
93
  #
94
- # >>> ebnf("BaseDecl? PrefixDecl*")
95
- # ((seq, \[(opt, ('id', 'BaseDecl')), ('*', ('id', 'PrefixDecl'))\]), '')
94
+ # >>> expression("BaseDecl? PrefixDecl*")
95
+ # ((seq (opt BaseDecl) (star PrefixDecl)) '')
96
96
  #
97
- # >>> ebnf("NCCHAR1 | diff | [0-9] | #x00B7 | [#x0300-#x036F] | \[#x203F-#x2040\]")
98
- # ((alt, \[('id', 'NCCHAR1'), ("'", diff), (range, '0-9'), (hex, '#x00B7'), (range, '#x0300-#x036F'), (range, '#x203F-#x2040')\]), '')
97
+ # >>> expression("NCCHAR1 | diff | [0-9] | #x00B7 | [#x0300-#x036F] | \[#x203F-#x2040\]")
98
+ # ((alt NCCHAR1 diff
99
+ # (range '0-9')
100
+ # (hex '#x00B7')
101
+ # (range '#x0300-#x036F')
102
+ # (range, '#x203F-#x2040')) '')
99
103
  #
100
104
  # @param [String] s
101
105
  # @return [Array]
102
- def ebnf(s)
103
- debug("ebnf") {"(#{s.inspect})"}
106
+ def expression(s)
107
+ debug("expression") {"(#{s.inspect})"}
104
108
  e, s = depth {alt(s)}
105
109
  debug {"=> alt returned #{[e, s].inspect}"}
106
- unless s.empty?
110
+ unless s.to_s.empty?
107
111
  t, ss = depth {terminal(s)}
108
112
  debug {"=> terminal returned #{[t, ss].inspect}"}
109
113
  return [e, ss] if t.is_a?(Array) && t.first == :")"
@@ -114,13 +118,13 @@ module EBNF
114
118
  ##
115
119
  # Parse alt
116
120
  # >>> alt("a | b | c")
117
- # ((alt, \[('id', 'a'), ('id', 'b'), ('id', 'c')\]), '')
121
+ # ((alt a b c) '')
118
122
  # @param [String] s
119
123
  # @return [Array]
120
124
  def alt(s)
121
125
  debug("alt") {"(#{s.inspect})"}
122
126
  args = []
123
- while !s.empty?
127
+ while !s.to_s.empty?
124
128
  e, s = depth {seq(s)}
125
129
  debug {"=> seq returned #{[e, s].inspect}"}
126
130
  if e.to_s.empty?
@@ -128,7 +132,7 @@ module EBNF
128
132
  e = [:seq, []] # empty sequence
129
133
  end
130
134
  args << e
131
- unless s.empty?
135
+ unless s.to_s.empty?
132
136
  t, ss = depth {terminal(s)}
133
137
  break unless t[0] == :alt
134
138
  s = ss
@@ -141,14 +145,14 @@ module EBNF
141
145
  # parse seq
142
146
  #
143
147
  # >>> seq("a b c")
144
- # ((seq, \[('id', 'a'), ('id', 'b'), ('id', 'c')\]), '')
148
+ # ((seq a b c) '')
145
149
  #
146
150
  # >>> seq("a b? c")
147
- # ((seq, \[('id', 'a'), (opt, ('id', 'b')), ('id', 'c')\]), '')
151
+ # ((seq a (opt b) c) '')
148
152
  def seq(s)
149
153
  debug("seq") {"(#{s.inspect})"}
150
154
  args = []
151
- while !s.empty?
155
+ while !s.to_s.empty?
152
156
  e, ss = depth {diff(s)}
153
157
  debug {"=> diff returned #{[e, ss].inspect}"}
154
158
  unless e.to_s.empty?
@@ -171,13 +175,13 @@ module EBNF
171
175
  # parse diff
172
176
  #
173
177
  # >>> diff("a - b")
174
- # ((diff, \[('id', 'a'), ('id', 'b')\]), '')
178
+ # ((diff a b) '')
175
179
  def diff(s)
176
180
  debug("diff") {"(#{s.inspect})"}
177
181
  e1, s = depth {postfix(s)}
178
182
  debug {"=> postfix returned #{[e1, s].inspect}"}
179
183
  unless e1.to_s.empty?
180
- unless s.empty?
184
+ unless s.to_s.empty?
181
185
  t, ss = depth {terminal(s)}
182
186
  debug {"diff #{[t, ss].inspect}"}
183
187
  if t.is_a?(Array) && t.first == :diff
@@ -199,16 +203,16 @@ module EBNF
199
203
  # parse postfix
200
204
  #
201
205
  # >>> postfix("a b c")
202
- # (('id', 'a'), ' b c')
206
+ # (a ' b c')
203
207
  #
204
208
  # >>> postfix("a? b c")
205
- # ((opt, ('id', 'a')), ' b c')
209
+ # ((opt, a) ' b c')
206
210
  def postfix(s)
207
211
  debug("postfix") {"(#{s.inspect})"}
208
212
  e, s = depth {primary(s)}
209
213
  debug {"=> primary returned #{[e, s].inspect}"}
210
214
  return ["", s] if e.to_s.empty?
211
- if !s.empty?
215
+ if !s.to_s.empty?
212
216
  t, ss = depth {terminal(s)}
213
217
  debug {"=> #{[t, ss].inspect}"}
214
218
  if t.is_a?(Array) && [:opt, :star, :plus].include?(t.first)
@@ -222,7 +226,7 @@ module EBNF
222
226
  # parse primary
223
227
  #
224
228
  # >>> primary("a b c")
225
- # (('id', 'a'), ' b c')
229
+ # (a ' b c')
226
230
  def primary(s)
227
231
  debug("primary") {"(#{s.inspect})"}
228
232
  t, s = depth {terminal(s)}
@@ -232,8 +236,8 @@ module EBNF
232
236
  elsif %w(range hex).map(&:to_sym).include?(t.first)
233
237
  [t, s]
234
238
  elsif t.first == :"("
235
- e, s = depth {ebnf(s)}
236
- debug {"=> ebnf returned #{[e, s].inspect}"}
239
+ e, s = depth {expression(s)}
240
+ debug {"=> expression returned #{[e, s].inspect}"}
237
241
  [e, s]
238
242
  else
239
243
  ["", s]
@@ -248,34 +252,34 @@ module EBNF
248
252
  #
249
253
  # @example
250
254
  # >>> terminal("'abc' def")
251
- # (("'", 'abc'), ' def')
255
+ # ('abc' ' def')
252
256
  #
253
257
  # >>> terminal("[0-9]")
254
- # ((range, '0-9'), '')
258
+ # ((range '0-9') '')
255
259
  # >>> terminal("#x00B7")
256
- # ((hex, '#x00B7'), '')
260
+ # ((hex '#x00B7') '')
257
261
  # >>> terminal ("\[#x0300-#x036F\]")
258
- # ((range, '#x0300-#x036F'), '')
262
+ # ((range '#x0300-#x036F') '')
259
263
  # >>> terminal("\[^<>'{}|^`\]-\[#x00-#x20\]")
260
- # ((range, "^<>'{}|^`"), '-\[#x00-#x20\]')
264
+ # ((range "^<>'{}|^`") '-\[#x00-#x20\]')
261
265
  def terminal(s)
262
266
  s = s.strip
263
267
  case m = s[0,1]
264
- when '"', "'"
265
- l, s = s[1..-1].split(m, 2)
266
- [l, s]
267
- when '['
268
- l, s = s[1..-1].split(']', 2)
269
- [[:range, l], s]
270
- when '#'
268
+ when '"', "'" # STRING1 or STRING2 Terminated by line-end or whitespace
269
+ l, s = s[1..-1].split(m.rstrip , 2)
270
+ [LL1::Lexer.unescape_string(l), s]
271
+ when '[' # ENUM, RANGE, O_ENUM, or O_RANGE
272
+ l, s = s[1..-1].split(/(?<=[^\\])\]/, 2)
273
+ [[:range, LL1::Lexer.unescape_string(l)], s]
274
+ when '#' # HEX
271
275
  s.match(/(#\w+)(.*)$/)
272
276
  l, s = $1, $2
273
277
  [[:hex, l], s]
274
- when /[\w\.]/
278
+ when /[\w\.]/ # SYMBOL
275
279
  s.match(/(\w+)(.*)$/)
276
280
  l, s = $1, $2
277
281
  [l.to_sym, s]
278
- when '@'
282
+ when '@' # @pass or @terminals
279
283
  s.match(/@(#\w+)(.*)$/)
280
284
  l, s = $1, $2
281
285
  [[:"@", l], s]
@@ -289,7 +293,7 @@ module EBNF
289
293
  [[:plus], s[1..-1]]
290
294
  when '*'
291
295
  [[:star], s[1..-1]]
292
- when /[\(\)]/
296
+ when /[\(\)]/ # '(' or ')'
293
297
  [[m.to_sym], s[1..-1]]
294
298
  else
295
299
  error("terminal", "unrecognized terminal: #{s.inspect}")