ebnf 0.0.1 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +21 -7
- data/VERSION +1 -1
- data/bin/ebnf +73 -16
- data/etc/{ebnf.bnf → ebnf.ebnf} +2 -2
- data/etc/ebnf.ll1 +1010 -0
- data/etc/turtle.ebnf +70 -0
- data/etc/turtle.ll1 +1565 -0
- data/etc/turtle.rb +1375 -0
- data/lib/ebnf.rb +16 -1023
- data/lib/ebnf/base.rb +266 -0
- data/lib/ebnf/bnf.rb +50 -0
- data/lib/ebnf/ll1.rb +321 -0
- data/lib/ebnf/ll1/lexer.rb +11 -11
- data/lib/ebnf/ll1/parser.rb +28 -32
- data/lib/ebnf/ll1/scanner.rb +1 -1
- data/lib/ebnf/parser.rb +297 -0
- data/lib/ebnf/rule.rb +362 -0
- metadata +12 -3
data/lib/ebnf/ll1/lexer.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
|
-
module
|
2
|
-
require '
|
1
|
+
module EBNF::LL1
|
2
|
+
require 'ebnf/ll1/scanner' unless defined?(Scanner)
|
3
3
|
|
4
4
|
##
|
5
5
|
# A lexical analyzer
|
@@ -10,13 +10,13 @@ module RDF::LL1
|
|
10
10
|
# ...
|
11
11
|
# ]
|
12
12
|
# ttl = "@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> ."
|
13
|
-
# lexer =
|
13
|
+
# lexer = EBNF::LL1::Lexer.tokenize(ttl, terminals)
|
14
14
|
# lexer.each_token do |token|
|
15
15
|
# puts token.inspect
|
16
16
|
# end
|
17
17
|
#
|
18
18
|
# @example Tokenizing and returning a token stream
|
19
|
-
# lexer =
|
19
|
+
# lexer = EBNF::LL1::Lexer.tokenize(...)
|
20
20
|
# while :some-condition
|
21
21
|
# token = lexer.first # Get the current token
|
22
22
|
# token = lexer.shift # Get the current token and shift to the next
|
@@ -24,8 +24,8 @@ module RDF::LL1
|
|
24
24
|
#
|
25
25
|
# @example Handling error conditions
|
26
26
|
# begin
|
27
|
-
#
|
28
|
-
# rescue
|
27
|
+
# EBNF::LL1::Lexer.tokenize(query)
|
28
|
+
# rescue EBNF::LL1::Lexer::Error => error
|
29
29
|
# warn error.inspect
|
30
30
|
# end
|
31
31
|
#
|
@@ -307,7 +307,7 @@ module RDF::LL1
|
|
307
307
|
# Represents a lexer token.
|
308
308
|
#
|
309
309
|
# @example Creating a new token
|
310
|
-
# token =
|
310
|
+
# token = EBNF::LL1::Lexer::Token.new(:LANGTAG, "en")
|
311
311
|
# token.type #=> :LANGTAG
|
312
312
|
# token.value #=> "en"
|
313
313
|
#
|
@@ -369,10 +369,10 @@ module RDF::LL1
|
|
369
369
|
# of this token.
|
370
370
|
#
|
371
371
|
# @example Matching using the symbolic type
|
372
|
-
#
|
372
|
+
# EBNF::LL1::Lexer::Token.new(:NIL) === :NIL #=> true
|
373
373
|
#
|
374
374
|
# @example Matching using the string value
|
375
|
-
#
|
375
|
+
# EBNF::LL1::Lexer::Token.new(nil, "{") === "{" #=> true
|
376
376
|
#
|
377
377
|
# @param [Symbol, String] value
|
378
378
|
# @return [Boolean]
|
@@ -425,7 +425,7 @@ module RDF::LL1
|
|
425
425
|
# Raised for errors during lexical analysis.
|
426
426
|
#
|
427
427
|
# @example Raising a lexer error
|
428
|
-
# raise
|
428
|
+
# raise EBNF::LL1::Lexer::Error.new(
|
429
429
|
# "invalid token '%' on line 10",
|
430
430
|
# :input => query, :token => '%', :lineno => 9)
|
431
431
|
#
|
@@ -472,4 +472,4 @@ module RDF::LL1
|
|
472
472
|
end
|
473
473
|
end
|
474
474
|
end # class Lexer
|
475
|
-
end # module
|
475
|
+
end # module EBNF
|
data/lib/ebnf/ll1/parser.rb
CHANGED
@@ -1,7 +1,6 @@
|
|
1
|
-
require '
|
2
|
-
require 'rdf/ll1/lexer'
|
1
|
+
require 'ebnf/ll1/lexer'
|
3
2
|
|
4
|
-
module
|
3
|
+
module EBNF::LL1
|
5
4
|
##
|
6
5
|
# A Generic LL1 parser using a lexer and branch tables defined using the SWAP tool chain (modified).
|
7
6
|
module Parser
|
@@ -33,9 +32,9 @@ module RDF::LL1
|
|
33
32
|
#
|
34
33
|
# @param [Symbol] term
|
35
34
|
# Term which is a key in the branch table
|
36
|
-
# @yield [
|
37
|
-
# @yieldparam [
|
38
|
-
#
|
35
|
+
# @yield [parse, phase, input, current]
|
36
|
+
# @yieldparam [Object] parse
|
37
|
+
# Parser instance
|
39
38
|
# @yieldparam [Symbol] phase
|
40
39
|
# Phase of parsing, one of :start, or :finish
|
41
40
|
# @yieldparam [Hash] input
|
@@ -45,7 +44,7 @@ module RDF::LL1
|
|
45
44
|
# may be initialized with data to pass to further productions,
|
46
45
|
# during :finish, it contains data placed by earlier productions
|
47
46
|
# @yieldparam [Prod] block
|
48
|
-
# Block passed to initialization for yielding to calling
|
47
|
+
# Block passed to initialization for yielding to calling parser.
|
49
48
|
# Should conform to the yield specs for #initialize
|
50
49
|
# Yield to generate a triple
|
51
50
|
def production(term, &block)
|
@@ -66,9 +65,9 @@ module RDF::LL1
|
|
66
65
|
# @param [Hash] options
|
67
66
|
# @option options [Boolean] :unescape
|
68
67
|
# Cause strings and codepoints to be unescaped.
|
69
|
-
# @yield [
|
70
|
-
# @yieldparam [
|
71
|
-
#
|
68
|
+
# @yield [parser, term, token, input]
|
69
|
+
# @yieldparam [Object] parser
|
70
|
+
# Parser instance
|
72
71
|
# @yieldparam [Symbol] term
|
73
72
|
# A symbol indicating the production which referenced this terminal
|
74
73
|
# @yieldparam [String] token
|
@@ -76,7 +75,7 @@ module RDF::LL1
|
|
76
75
|
# @yieldparam [Hash] input
|
77
76
|
# A Hash containing input from the parent production
|
78
77
|
# @yieldparam [Prod] block
|
79
|
-
# Block passed to initialization for yielding to calling
|
78
|
+
# Block passed to initialization for yielding to calling parser.
|
80
79
|
# Should conform to the yield specs for #initialize
|
81
80
|
def terminal(term, regexp, options = {}, &block)
|
82
81
|
@@patterns ||= []
|
@@ -96,10 +95,10 @@ module RDF::LL1
|
|
96
95
|
# @example
|
97
96
|
# require 'rdf/ll1/parser'
|
98
97
|
#
|
99
|
-
# class
|
100
|
-
# include
|
98
|
+
# class MyParser
|
99
|
+
# include EBNF::LL1::Parser
|
101
100
|
#
|
102
|
-
# branch
|
101
|
+
# branch MyParser::BRANCH
|
103
102
|
#
|
104
103
|
# ##
|
105
104
|
# # Defines a production called during different phases of parsing
|
@@ -107,14 +106,14 @@ module RDF::LL1
|
|
107
106
|
# # current production
|
108
107
|
# #
|
109
108
|
# # Yield to generate a triple
|
110
|
-
# production :object do |
|
109
|
+
# production :object do |parser, phase, input, current|
|
111
110
|
# object = current[:resource]
|
112
111
|
# yield :statement, RDF::Statement.new(input[:subject], input[:predicate], object)
|
113
112
|
# end
|
114
113
|
#
|
115
114
|
# ##
|
116
115
|
# # Defines the pattern for a terminal node
|
117
|
-
# terminal :BLANK_NODE_LABEL, %r(_:(#{PN_LOCAL})) do |
|
116
|
+
# terminal :BLANK_NODE_LABEL, %r(_:(#{PN_LOCAL})) do |parser, production, token, input|
|
118
117
|
# input[:BLANK_NODE_LABEL] = RDF::Node.new(token)
|
119
118
|
# end
|
120
119
|
#
|
@@ -138,29 +137,26 @@ module RDF::LL1
|
|
138
137
|
# end
|
139
138
|
#
|
140
139
|
# @param [String, #to_s] input
|
141
|
-
# @param [Symbol, #to_s] prod The starting production for the parser.
|
142
|
-
# It may be a URI from the grammar, or a symbol representing the local_name portion of the grammar URI.
|
140
|
+
# @param [Symbol, #to_s] prod The starting production for the parser. It may be a URI from the grammar, or a symbol representing the local_name portion of the grammar URI.
|
143
141
|
# @param [Hash{Symbol => Object}] options
|
144
|
-
# @option options [Hash{Symbol,String => Hash{Symbol,String => Array<Symbol,String>}}] :branch
|
145
|
-
# LL1 branch table.
|
142
|
+
# @option options [Hash{Symbol,String => Hash{Symbol,String => Array<Symbol,String>}}] :branch LL1 branch table.
|
146
143
|
# @option options [HHash{Symbol,String => Array<Symbol,String>}] :first ({})
|
147
144
|
# Lists valid terminals that can precede each production (for error recovery).
|
148
|
-
# @option options [
|
145
|
+
# @option options [Hash{Symbol,String => Array<Symbol,String>}] :follow ({})
|
149
146
|
# Lists valid terminals that can follow each production (for error recovery).
|
150
147
|
# @option options [Boolean] :validate (false)
|
151
|
-
# whether to validate the parsed statements and values. If not validating,
|
152
|
-
# the parser will attempt to recover from errors.
|
148
|
+
# whether to validate the parsed statements and values. If not validating, the parser will attempt to recover from errors.
|
153
149
|
# @option options [Boolean] :progress
|
154
150
|
# Show progress of parser productions
|
155
151
|
# @option options [Boolean] :debug
|
156
152
|
# Detailed debug output
|
157
153
|
# @yield [context, *data]
|
158
|
-
# Yields for to return data to
|
154
|
+
# Yields for to return data to parser
|
159
155
|
# @yieldparam [:statement, :trace] context
|
160
156
|
# Context for block
|
161
157
|
# @yieldparam [Symbol] *data
|
162
158
|
# Data specific to the call
|
163
|
-
# @return [
|
159
|
+
# @return [EBNF::LL1::Parser]
|
164
160
|
# @see http://cs.adelaide.edu.au/~charles/lt/Lectures/07-ErrorRecovery.pdf
|
165
161
|
def parse(input = nil, prod = nil, options = {}, &block)
|
166
162
|
@options = options.dup
|
@@ -179,7 +175,7 @@ module RDF::LL1
|
|
179
175
|
raise Error, "Starting production not defined" unless prod
|
180
176
|
|
181
177
|
@prod_data = [{}]
|
182
|
-
prod =
|
178
|
+
prod = prod.split('#').last.to_sym unless prod.is_a?(Symbol)
|
183
179
|
todo_stack = [{:prod => prod, :terms => nil}]
|
184
180
|
|
185
181
|
while !todo_stack.empty?
|
@@ -216,8 +212,8 @@ module RDF::LL1
|
|
216
212
|
end
|
217
213
|
|
218
214
|
if sequence.nil?
|
219
|
-
if prod_branch.has_key?(:
|
220
|
-
debug("parse(production)", :level => 2) {"empty sequence for
|
215
|
+
if prod_branch.has_key?(:_empty)
|
216
|
+
debug("parse(production)", :level => 2) {"empty sequence for _empty"}
|
221
217
|
else
|
222
218
|
# If there is no sequence for this production, we're
|
223
219
|
# in error recovery, and _token_ has been advanced to
|
@@ -357,7 +353,7 @@ module RDF::LL1
|
|
357
353
|
# If this token can be used by the top production, return it
|
358
354
|
# Otherwise, if the banch table allows empty, also return the token
|
359
355
|
return token if !@recovering && (
|
360
|
-
(@branch[cur_prod] && @branch[cur_prod].has_key?(:
|
356
|
+
(@branch[cur_prod] && @branch[cur_prod].has_key?(:_empty)) ||
|
361
357
|
first.any? {|t| token === t})
|
362
358
|
|
363
359
|
# Otherwise, it's an error condition, and skip either until
|
@@ -417,7 +413,7 @@ module RDF::LL1
|
|
417
413
|
def get_token
|
418
414
|
token = begin
|
419
415
|
@lexer.first
|
420
|
-
rescue
|
416
|
+
rescue EBNF::LL1::Lexer::Error => e
|
421
417
|
# Recover from lexer error
|
422
418
|
@lineno = e.lineno
|
423
419
|
error("get_token", "With input '#{e.input}': #{e.message}",
|
@@ -537,5 +533,5 @@ module RDF::LL1
|
|
537
533
|
super(message.to_s)
|
538
534
|
end
|
539
535
|
end # class Error
|
540
|
-
end # class
|
541
|
-
end # module
|
536
|
+
end # class Parser
|
537
|
+
end # module EBNF::LL1
|
data/lib/ebnf/ll1/scanner.rb
CHANGED
data/lib/ebnf/parser.rb
ADDED
@@ -0,0 +1,297 @@
|
|
1
|
+
module EBNF
|
2
|
+
module Parser
|
3
|
+
##
|
4
|
+
# Iterate over rule strings.
|
5
|
+
# a line that starts with '\[' or '@' starts a new rule
|
6
|
+
#
|
7
|
+
# @param [StringScanner] scanner
|
8
|
+
# @yield rule_string
|
9
|
+
# @yieldparam [String] rule_string
|
10
|
+
def eachRule(scanner)
|
11
|
+
cur_lineno = 1
|
12
|
+
r = ''
|
13
|
+
until scanner.eos?
|
14
|
+
case
|
15
|
+
when s = scanner.scan(%r(\s+)m)
|
16
|
+
# Eat whitespace
|
17
|
+
cur_lineno += s.count("\n")
|
18
|
+
#debug("eachRule(ws)") { "[#{cur_lineno}] #{s.inspect}" }
|
19
|
+
when s = scanner.scan(%r(/\*([^\*]|\*[^\/])*\*/)m)
|
20
|
+
# Eat comments
|
21
|
+
cur_lineno += s.count("\n")
|
22
|
+
debug("eachRule(comment)") { "[#{cur_lineno}] #{s.inspect}" }
|
23
|
+
when s = scanner.scan(%r(^@terminals))
|
24
|
+
#debug("eachRule(@terminals)") { "[#{cur_lineno}] #{s.inspect}" }
|
25
|
+
yield(r) unless r.empty?
|
26
|
+
@lineno = cur_lineno
|
27
|
+
yield(s)
|
28
|
+
r = ''
|
29
|
+
when s = scanner.scan(/@pass/)
|
30
|
+
# Found rule start, if we've already collected a rule, yield it
|
31
|
+
#debug("eachRule(@pass)") { "[#{cur_lineno}] #{s.inspect}" }
|
32
|
+
yield r unless r.empty?
|
33
|
+
@lineno = cur_lineno
|
34
|
+
r = s
|
35
|
+
when s = scanner.scan(/\[(?=\w+\])/)
|
36
|
+
# Found rule start, if we've already collected a rule, yield it
|
37
|
+
yield r unless r.empty?
|
38
|
+
#debug("eachRule(rule)") { "[#{cur_lineno}] #{s.inspect}" }
|
39
|
+
@lineno = cur_lineno
|
40
|
+
r = s
|
41
|
+
else
|
42
|
+
# Collect until end of line, or start of comment
|
43
|
+
s = scanner.scan_until(%r((?:/\*)|$)m)
|
44
|
+
cur_lineno += s.count("\n")
|
45
|
+
#debug("eachRule(rest)") { "[#{cur_lineno}] #{s.inspect}" }
|
46
|
+
r += s
|
47
|
+
end
|
48
|
+
end
|
49
|
+
yield r unless r.empty?
|
50
|
+
end
|
51
|
+
|
52
|
+
##
|
53
|
+
# Parse a rule into a rule number, a symbol and an expression
|
54
|
+
#
|
55
|
+
# @param [String] rule
|
56
|
+
# @return [Rule]
|
57
|
+
def ruleParts(rule)
|
58
|
+
num_sym, expr = rule.split('::=', 2).map(&:strip)
|
59
|
+
num, sym = num_sym.split(']', 2).map(&:strip)
|
60
|
+
num = num[1..-1]
|
61
|
+
r = Rule.new(sym && sym.to_sym, num, ebnf(expr).first, :ebnf => self)
|
62
|
+
debug("ruleParts") { r.inspect }
|
63
|
+
r
|
64
|
+
end
|
65
|
+
|
66
|
+
##
|
67
|
+
# Parse a string into an expression tree and a remaining string
|
68
|
+
#
|
69
|
+
# @example
|
70
|
+
# >>> ebnf("a b c")
|
71
|
+
# ((seq, \[('id', 'a'), ('id', 'b'), ('id', 'c')\]), '')
|
72
|
+
#
|
73
|
+
# >>> ebnf("a? b+ c*")
|
74
|
+
# ((seq, \[(opt, ('id', 'a')), (plus, ('id', 'b')), ('*', ('id', 'c'))\]), '')
|
75
|
+
#
|
76
|
+
# >>> ebnf(" | x xlist")
|
77
|
+
# ((alt, \[(seq, \[\]), (seq, \[('id', 'x'), ('id', 'xlist')\])\]), '')
|
78
|
+
#
|
79
|
+
# >>> ebnf("a | (b - c)")
|
80
|
+
# ((alt, \[('id', 'a'), (diff, \[('id', 'b'), ('id', 'c')\])\]), '')
|
81
|
+
#
|
82
|
+
# >>> ebnf("a b | c d")
|
83
|
+
# ((alt, \[(seq, \[('id', 'a'), ('id', 'b')\]), (seq, \[('id', 'c'), ('id', 'd')\])\]), '')
|
84
|
+
#
|
85
|
+
# >>> ebnf("a | b | c")
|
86
|
+
# ((alt, \[('id', 'a'), ('id', 'b'), ('id', 'c')\]), '')
|
87
|
+
#
|
88
|
+
# >>> ebnf("a) b c")
|
89
|
+
# (('id', 'a'), ' b c')
|
90
|
+
#
|
91
|
+
# >>> ebnf("BaseDecl? PrefixDecl*")
|
92
|
+
# ((seq, \[(opt, ('id', 'BaseDecl')), ('*', ('id', 'PrefixDecl'))\]), '')
|
93
|
+
#
|
94
|
+
# >>> ebnf("NCCHAR1 | diff | [0-9] | #x00B7 | [#x0300-#x036F] | \[#x203F-#x2040\]")
|
95
|
+
# ((alt, \[('id', 'NCCHAR1'), ("'", diff), (range, '0-9'), (hex, '#x00B7'), (range, '#x0300-#x036F'), (range, '#x203F-#x2040')\]), '')
|
96
|
+
#
|
97
|
+
# @param [String] s
|
98
|
+
# @return [Array]
|
99
|
+
def ebnf(s)
|
100
|
+
debug("ebnf") {"(#{s.inspect})"}
|
101
|
+
e, s = depth {alt(s)}
|
102
|
+
debug {"=> alt returned #{[e, s].inspect}"}
|
103
|
+
unless s.empty?
|
104
|
+
t, ss = depth {terminal(s)}
|
105
|
+
debug {"=> terminal returned #{[t, ss].inspect}"}
|
106
|
+
return [e, ss] if t.is_a?(Array) && t.first == :")"
|
107
|
+
end
|
108
|
+
[e, s]
|
109
|
+
end
|
110
|
+
|
111
|
+
##
|
112
|
+
# Parse alt
|
113
|
+
# >>> alt("a | b | c")
|
114
|
+
# ((alt, \[('id', 'a'), ('id', 'b'), ('id', 'c')\]), '')
|
115
|
+
# @param [String] s
|
116
|
+
# @return [Array]
|
117
|
+
def alt(s)
|
118
|
+
debug("alt") {"(#{s.inspect})"}
|
119
|
+
args = []
|
120
|
+
while !s.empty?
|
121
|
+
e, s = depth {seq(s)}
|
122
|
+
debug {"=> seq returned #{[e, s].inspect}"}
|
123
|
+
if e.to_s.empty?
|
124
|
+
break unless args.empty?
|
125
|
+
e = [:seq, []] # empty sequence
|
126
|
+
end
|
127
|
+
args << e
|
128
|
+
unless s.empty?
|
129
|
+
t, ss = depth {terminal(s)}
|
130
|
+
break unless t[0] == :alt
|
131
|
+
s = ss
|
132
|
+
end
|
133
|
+
end
|
134
|
+
args.length > 1 ? [args.unshift(:alt), s] : [e, s]
|
135
|
+
end
|
136
|
+
|
137
|
+
##
|
138
|
+
# parse seq
|
139
|
+
#
|
140
|
+
# >>> seq("a b c")
|
141
|
+
# ((seq, \[('id', 'a'), ('id', 'b'), ('id', 'c')\]), '')
|
142
|
+
#
|
143
|
+
# >>> seq("a b? c")
|
144
|
+
# ((seq, \[('id', 'a'), (opt, ('id', 'b')), ('id', 'c')\]), '')
|
145
|
+
def seq(s)
|
146
|
+
debug("seq") {"(#{s.inspect})"}
|
147
|
+
args = []
|
148
|
+
while !s.empty?
|
149
|
+
e, ss = depth {diff(s)}
|
150
|
+
debug {"=> diff returned #{[e, ss].inspect}"}
|
151
|
+
unless e.to_s.empty?
|
152
|
+
args << e
|
153
|
+
s = ss
|
154
|
+
else
|
155
|
+
break;
|
156
|
+
end
|
157
|
+
end
|
158
|
+
if args.length > 1
|
159
|
+
[args.unshift(:seq), s]
|
160
|
+
elsif args.length == 1
|
161
|
+
args + [s]
|
162
|
+
else
|
163
|
+
["", s]
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
##
|
168
|
+
# parse diff
|
169
|
+
#
|
170
|
+
# >>> diff("a - b")
|
171
|
+
# ((diff, \[('id', 'a'), ('id', 'b')\]), '')
|
172
|
+
def diff(s)
|
173
|
+
debug("diff") {"(#{s.inspect})"}
|
174
|
+
e1, s = depth {postfix(s)}
|
175
|
+
debug {"=> postfix returned #{[e1, s].inspect}"}
|
176
|
+
unless e1.to_s.empty?
|
177
|
+
unless s.empty?
|
178
|
+
t, ss = depth {terminal(s)}
|
179
|
+
debug {"diff #{[t, ss].inspect}"}
|
180
|
+
if t.is_a?(Array) && t.first == :diff
|
181
|
+
s = ss
|
182
|
+
e2, s = primary(s)
|
183
|
+
unless e2.to_s.empty?
|
184
|
+
return [[:diff, e1, e2], s]
|
185
|
+
else
|
186
|
+
error("diff", "Syntax Error")
|
187
|
+
raise "Syntax Error"
|
188
|
+
end
|
189
|
+
end
|
190
|
+
end
|
191
|
+
end
|
192
|
+
[e1, s]
|
193
|
+
end
|
194
|
+
|
195
|
+
##
|
196
|
+
# parse postfix
|
197
|
+
#
|
198
|
+
# >>> postfix("a b c")
|
199
|
+
# (('id', 'a'), ' b c')
|
200
|
+
#
|
201
|
+
# >>> postfix("a? b c")
|
202
|
+
# ((opt, ('id', 'a')), ' b c')
|
203
|
+
def postfix(s)
|
204
|
+
debug("postfix") {"(#{s.inspect})"}
|
205
|
+
e, s = depth {primary(s)}
|
206
|
+
debug {"=> primary returned #{[e, s].inspect}"}
|
207
|
+
return ["", s] if e.to_s.empty?
|
208
|
+
if !s.empty?
|
209
|
+
t, ss = depth {terminal(s)}
|
210
|
+
debug {"=> #{[t, ss].inspect}"}
|
211
|
+
if t.is_a?(Array) && [:opt, :star, :plus].include?(t.first)
|
212
|
+
return [[t.first, e], ss]
|
213
|
+
end
|
214
|
+
end
|
215
|
+
[e, s]
|
216
|
+
end
|
217
|
+
|
218
|
+
##
|
219
|
+
# parse primary
|
220
|
+
#
|
221
|
+
# >>> primary("a b c")
|
222
|
+
# (('id', 'a'), ' b c')
|
223
|
+
def primary(s)
|
224
|
+
debug("primary") {"(#{s.inspect})"}
|
225
|
+
t, s = depth {terminal(s)}
|
226
|
+
debug {"=> terminal returned #{[t, s].inspect}"}
|
227
|
+
if t.is_a?(Symbol) || t.is_a?(String)
|
228
|
+
[t, s]
|
229
|
+
elsif %w(range hex).map(&:to_sym).include?(t.first)
|
230
|
+
[t, s]
|
231
|
+
elsif t.first == :"("
|
232
|
+
e, s = depth {ebnf(s)}
|
233
|
+
debug {"=> ebnf returned #{[e, s].inspect}"}
|
234
|
+
[e, s]
|
235
|
+
else
|
236
|
+
["", s]
|
237
|
+
end
|
238
|
+
end
|
239
|
+
|
240
|
+
##
|
241
|
+
# parse one terminal; return the terminal and the remaining string
|
242
|
+
#
|
243
|
+
# A terminal is represented as a tuple whose 1st item gives the type;
|
244
|
+
# some types have additional info in the tuple.
|
245
|
+
#
|
246
|
+
# @example
|
247
|
+
# >>> terminal("'abc' def")
|
248
|
+
# (("'", 'abc'), ' def')
|
249
|
+
#
|
250
|
+
# >>> terminal("[0-9]")
|
251
|
+
# ((range, '0-9'), '')
|
252
|
+
# >>> terminal("#x00B7")
|
253
|
+
# ((hex, '#x00B7'), '')
|
254
|
+
# >>> terminal ("\[#x0300-#x036F\]")
|
255
|
+
# ((range, '#x0300-#x036F'), '')
|
256
|
+
# >>> terminal("\[^<>'{}|^`\]-\[#x00-#x20\]")
|
257
|
+
# ((range, "^<>'{}|^`"), '-\[#x00-#x20\]')
|
258
|
+
def terminal(s)
|
259
|
+
s = s.strip
|
260
|
+
case m = s[0,1]
|
261
|
+
when '"', "'"
|
262
|
+
l, s = s[1..-1].split(m, 2)
|
263
|
+
[l, s]
|
264
|
+
when '['
|
265
|
+
l, s = s[1..-1].split(']', 2)
|
266
|
+
[[:range, l], s]
|
267
|
+
when '#'
|
268
|
+
s.match(/(#\w+)(.*)$/)
|
269
|
+
l, s = $1, $2
|
270
|
+
[[:hex, l], s]
|
271
|
+
when /[[:alpha:]]/
|
272
|
+
s.match(/(\w+)(.*)$/)
|
273
|
+
l, s = $1, $2
|
274
|
+
[l.to_sym, s]
|
275
|
+
when '@'
|
276
|
+
s.match(/@(#\w+)(.*)$/)
|
277
|
+
l, s = $1, $2
|
278
|
+
[[:"@", l], s]
|
279
|
+
when '-'
|
280
|
+
[[:diff], s[1..-1]]
|
281
|
+
when '?'
|
282
|
+
[[:opt], s[1..-1]]
|
283
|
+
when '|'
|
284
|
+
[[:alt], s[1..-1]]
|
285
|
+
when '+'
|
286
|
+
[[:plus], s[1..-1]]
|
287
|
+
when '*'
|
288
|
+
[[:star], s[1..-1]]
|
289
|
+
when /[\(\)]/
|
290
|
+
[[m.to_sym], s[1..-1]]
|
291
|
+
else
|
292
|
+
error("terminal", "unrecognized terminal: #{s.inspect}")
|
293
|
+
raise "Syntax Error"
|
294
|
+
end
|
295
|
+
end
|
296
|
+
end
|
297
|
+
end
|