ebnf 1.1.2 → 2.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/README.md +218 -196
- data/UNLICENSE +1 -1
- data/VERSION +1 -1
- data/bin/ebnf +40 -21
- data/etc/abnf-core.ebnf +52 -0
- data/etc/abnf.abnf +121 -0
- data/etc/abnf.ebnf +124 -0
- data/etc/abnf.sxp +45 -0
- data/etc/doap.ttl +23 -15
- data/etc/ebnf.ebnf +21 -33
- data/etc/ebnf.html +171 -160
- data/etc/{ebnf.rb → ebnf.ll1.rb} +30 -107
- data/etc/ebnf.ll1.sxp +182 -183
- data/etc/ebnf.peg.rb +90 -0
- data/etc/ebnf.peg.sxp +84 -0
- data/etc/ebnf.sxp +40 -41
- data/etc/iso-ebnf.ebnf +140 -0
- data/etc/iso-ebnf.isoebnf +138 -0
- data/etc/iso-ebnf.sxp +65 -0
- data/etc/sparql.ebnf +4 -4
- data/etc/sparql.html +1603 -1751
- data/etc/sparql.ll1.sxp +7372 -7372
- data/etc/sparql.peg.rb +532 -0
- data/etc/sparql.peg.sxp +597 -0
- data/etc/sparql.sxp +363 -362
- data/etc/turtle.ebnf +3 -3
- data/etc/turtle.html +465 -517
- data/etc/{turtle.rb → turtle.ll1.rb} +3 -4
- data/etc/turtle.ll1.sxp +425 -425
- data/etc/turtle.peg.rb +182 -0
- data/etc/turtle.peg.sxp +199 -0
- data/etc/turtle.sxp +103 -101
- data/lib/ebnf.rb +7 -2
- data/lib/ebnf/abnf.rb +301 -0
- data/lib/ebnf/abnf/core.rb +23 -0
- data/lib/ebnf/abnf/meta.rb +111 -0
- data/lib/ebnf/base.rb +128 -87
- data/lib/ebnf/bnf.rb +1 -26
- data/lib/ebnf/ebnf/meta.rb +90 -0
- data/lib/ebnf/isoebnf.rb +229 -0
- data/lib/ebnf/isoebnf/meta.rb +75 -0
- data/lib/ebnf/ll1.rb +140 -8
- data/lib/ebnf/ll1/lexer.rb +37 -32
- data/lib/ebnf/ll1/parser.rb +113 -73
- data/lib/ebnf/ll1/scanner.rb +83 -51
- data/lib/ebnf/native.rb +320 -0
- data/lib/ebnf/parser.rb +285 -302
- data/lib/ebnf/peg.rb +39 -0
- data/lib/ebnf/peg/parser.rb +561 -0
- data/lib/ebnf/peg/rule.rb +241 -0
- data/lib/ebnf/rule.rb +453 -163
- data/lib/ebnf/terminals.rb +21 -0
- data/lib/ebnf/writer.rb +561 -88
- metadata +114 -28
- data/etc/sparql.rb +0 -45773
@@ -0,0 +1,241 @@
|
|
1
|
+
module EBNF::PEG
|
2
|
+
# Behaviior for parsing a PEG rule
|
3
|
+
module Rule
|
4
|
+
##
|
5
|
+
# Initialized by parser when loading rules.
|
6
|
+
# Used for finding rules and invoking elements of the parse process.
|
7
|
+
#
|
8
|
+
# @return [EBNF::PEG::Parser] parser
|
9
|
+
attr_accessor :parser
|
10
|
+
|
11
|
+
##
|
12
|
+
# Parse a rule or terminal, invoking callbacks, as appropriate
|
13
|
+
|
14
|
+
# If there is are `start_production` and/or `production`,
|
15
|
+
# they are invoked with a `prod_data` stack, the input stream and offset.
|
16
|
+
# Otherwise, the results are added as an array value
|
17
|
+
# to a hash indexed by the rule name.
|
18
|
+
#
|
19
|
+
# If matched, the input position is updated and the results returned in a Hash.
|
20
|
+
#
|
21
|
+
# * `alt`: returns the value of the matched production or `:unmatched`.
|
22
|
+
# * `diff`: returns the value matched, or `:unmatched`.
|
23
|
+
# * `hex`: returns a string composed of the matched hex character, or `:unmatched`.
|
24
|
+
# * `opt`: returns the value matched, or `nil` if unmatched.
|
25
|
+
# * `plus`: returns an array of the values matched for the specified production, or `:unmatched`, if none are matched. For Terminals, these are concatenated into a single string.
|
26
|
+
# * `range`: returns a string composed of the values matched, or `:unmatched`, if less than `min` are matched.
|
27
|
+
# * `seq`: returns an array composed of single-entry hashes for each matched production indexed by the production name, or `:unmatched` if any production fails to match. For Terminals, returns a string created by concatenating these values. Via option in a `production` or definition, the result can be a single hash with values for each matched production; note that this is not always possible due to the possibility of repeated productions within the sequence.
|
28
|
+
# * `star`: returns an array of the values matched for the specified production. For Terminals, these are concatenated into a single string.
|
29
|
+
#
|
30
|
+
# @param [Scanner] input
|
31
|
+
# @return [Hash{Symbol => Object}, :unmatched] A hash with keys for matched component of the expression. Returns :unmatched if the input does not match the production.
|
32
|
+
def parse(input)
|
33
|
+
# Save position and linenumber for backtracking
|
34
|
+
pos, lineno = input.pos, input.lineno
|
35
|
+
|
36
|
+
parser.packrat[sym] ||= {}
|
37
|
+
if parser.packrat[sym][pos]
|
38
|
+
parser.debug("#{sym}(:memo)", lineno: lineno) { "#{parser.packrat[sym][pos].inspect}(@#{pos})"}
|
39
|
+
input.pos, input.lineno = parser.packrat[sym][pos][:pos], parser.packrat[sym][pos][:lineno]
|
40
|
+
return parser.packrat[sym][pos][:result]
|
41
|
+
end
|
42
|
+
|
43
|
+
if terminal?
|
44
|
+
# If the terminal is defined with a regular expression,
|
45
|
+
# use that to match the input,
|
46
|
+
# otherwise,
|
47
|
+
if regexp = parser.find_terminal_regexp(sym)
|
48
|
+
matched = input.scan(regexp)
|
49
|
+
result = parser.onTerminal(sym, (matched ? matched : :unmatched))
|
50
|
+
# Update furthest failure for strings and terminals
|
51
|
+
parser.update_furthest_failure(input.pos, input.lineno, sym) if result == :unmatched
|
52
|
+
parser.packrat[sym][pos] = {
|
53
|
+
pos: input.pos,
|
54
|
+
lineno: input.lineno,
|
55
|
+
result: result
|
56
|
+
}
|
57
|
+
return parser.packrat[sym][pos][:result]
|
58
|
+
end
|
59
|
+
else
|
60
|
+
eat_whitespace(input)
|
61
|
+
end
|
62
|
+
start_options = parser.onStart(sym)
|
63
|
+
|
64
|
+
result = case expr.first
|
65
|
+
when :alt
|
66
|
+
# Return the first expression to match.
|
67
|
+
# Result is either :unmatched, or the value of the matching rule
|
68
|
+
alt = :unmatched
|
69
|
+
expr[1..-1].each do |prod|
|
70
|
+
alt = case prod
|
71
|
+
when Symbol
|
72
|
+
rule = parser.find_rule(prod)
|
73
|
+
raise "No rule found for #{prod}" unless rule
|
74
|
+
rule.parse(input)
|
75
|
+
when String
|
76
|
+
input.scan(Regexp.new(Regexp.quote(prod))) || :unmatched
|
77
|
+
end
|
78
|
+
if alt == :unmatched
|
79
|
+
# Update furthest failure for strings and terminals
|
80
|
+
parser.update_furthest_failure(input.pos, input.lineno, prod) if prod.is_a?(String) || rule.terminal?
|
81
|
+
else
|
82
|
+
break
|
83
|
+
end
|
84
|
+
end
|
85
|
+
alt
|
86
|
+
when :diff
|
87
|
+
# matches any string that matches A but does not match B.
|
88
|
+
# (Note, this is only used for Terminal rules, non-terminals will use :not)
|
89
|
+
raise "Diff used on non-terminal #{prod}" unless terminal?
|
90
|
+
re1, re2 = Regexp.new(translate_codepoints(expr[1])), Regexp.new(translate_codepoints(expr[2]))
|
91
|
+
matched = input.scan(re1)
|
92
|
+
if !matched || re2.match?(matched)
|
93
|
+
# Update furthest failure for terminals
|
94
|
+
parser.update_furthest_failure(input.pos, input.lineno, sym)
|
95
|
+
:unmatched
|
96
|
+
else
|
97
|
+
matched
|
98
|
+
end
|
99
|
+
when :hex
|
100
|
+
# Matches the given hex character if expression matches the character whose number (code point) in ISO/IEC 10646 is N. The number of leading zeros in the #xN form is insignificant.
|
101
|
+
input.scan(to_regexp) || begin
|
102
|
+
# Update furthest failure for terminals
|
103
|
+
parser.update_furthest_failure(input.pos, input.lineno, expr.last)
|
104
|
+
:unmatched
|
105
|
+
end
|
106
|
+
when :not
|
107
|
+
# matches any string that does not match B.
|
108
|
+
res = case prod = expr[1]
|
109
|
+
when Symbol
|
110
|
+
rule = parser.find_rule(prod)
|
111
|
+
raise "No rule found for #{prod}" unless rule
|
112
|
+
rule.parse(input)
|
113
|
+
when String
|
114
|
+
input.scan(Regexp.new(Regexp.quote(prod))) || :unmatched
|
115
|
+
end
|
116
|
+
if res != :unmatched
|
117
|
+
# Update furthest failure for terminals
|
118
|
+
parser.update_furthest_failure(input.pos, input.lineno, sym) if terminal?
|
119
|
+
:unmatched
|
120
|
+
else
|
121
|
+
nil
|
122
|
+
end
|
123
|
+
when :opt
|
124
|
+
# Result is the matched value or nil
|
125
|
+
opt = rept(input, 0, 1, expr[1])
|
126
|
+
|
127
|
+
# Update furthest failure for strings and terminals
|
128
|
+
parser.update_furthest_failure(input.pos, input.lineno, expr[1]) if terminal?
|
129
|
+
opt.first
|
130
|
+
when :plus
|
131
|
+
# Result is an array of all expressions while they match,
|
132
|
+
# at least one must match
|
133
|
+
plus = rept(input, 1, '*', expr[1])
|
134
|
+
|
135
|
+
# Update furthest failure for strings and terminals
|
136
|
+
parser.update_furthest_failure(input.pos, input.lineno, expr[1]) if terminal?
|
137
|
+
plus.is_a?(Array) && terminal? ? plus.join("") : plus
|
138
|
+
when :range, :istr
|
139
|
+
# Matches the specified character range
|
140
|
+
input.scan(to_regexp) || begin
|
141
|
+
# Update furthest failure for strings and terminals
|
142
|
+
parser.update_furthest_failure(input.pos, input.lineno, expr[1])
|
143
|
+
:unmatched
|
144
|
+
end
|
145
|
+
when :seq
|
146
|
+
# Evaluate each expression into an array of hashes where each hash contains a key from the associated production and the value is the parsed value of that production. Returns :unmatched if the input does not match the production. Value ordering is ensured by native Hash ordering.
|
147
|
+
seq = expr[1..-1].each_with_object([]) do |prod, accumulator|
|
148
|
+
eat_whitespace(input) unless accumulator.empty? || terminal?
|
149
|
+
res = case prod
|
150
|
+
when Symbol
|
151
|
+
rule = parser.find_rule(prod)
|
152
|
+
raise "No rule found for #{prod}" unless rule
|
153
|
+
rule.parse(input)
|
154
|
+
when String
|
155
|
+
input.scan(Regexp.new(Regexp.quote(prod))) || :unmatched
|
156
|
+
end
|
157
|
+
if res == :unmatched
|
158
|
+
# Update furthest failure for strings and terminals
|
159
|
+
parser.update_furthest_failure(input.pos, input.lineno, prod)
|
160
|
+
break :unmatched
|
161
|
+
end
|
162
|
+
accumulator << {prod.to_sym => res}
|
163
|
+
end
|
164
|
+
if seq == :unmatched
|
165
|
+
:unmatched
|
166
|
+
elsif terminal?
|
167
|
+
seq.map(&:values).compact.join("") # Concat values for terminal production
|
168
|
+
elsif start_options[:as_hash]
|
169
|
+
seq.inject {|memo, h| memo.merge(h)}
|
170
|
+
else
|
171
|
+
seq
|
172
|
+
end
|
173
|
+
when :star
|
174
|
+
# Result is an array of all expressions while they match,
|
175
|
+
# an empty array of none match
|
176
|
+
star = rept(input, 0, '*', expr[1])
|
177
|
+
|
178
|
+
# Update furthest failure for strings and terminals
|
179
|
+
parser.update_furthest_failure(input.pos, input.lineno, expr[1]) if terminal?
|
180
|
+
star.is_a?(Array) && terminal? ? star.join("") : star
|
181
|
+
else
|
182
|
+
raise "attempt to parse unknown rule type: #{expr.first}"
|
183
|
+
end
|
184
|
+
|
185
|
+
if result == :unmatched
|
186
|
+
input.pos, input.lineno = pos, lineno
|
187
|
+
end
|
188
|
+
|
189
|
+
result = parser.onFinish(result)
|
190
|
+
(parser.packrat[sym] ||= {})[pos] = {
|
191
|
+
pos: input.pos,
|
192
|
+
lineno: input.lineno,
|
193
|
+
result: result
|
194
|
+
}
|
195
|
+
return parser.packrat[sym][pos][:result]
|
196
|
+
end
|
197
|
+
|
198
|
+
##
|
199
|
+
# Repitition, 0-1, 0-n, 1-n, ...
|
200
|
+
#
|
201
|
+
# Note, nil results are removed from the result, but count towards min/max calculations
|
202
|
+
#
|
203
|
+
# @param [Scanner] input
|
204
|
+
# @param [Integer] min
|
205
|
+
# @param [Integer] max
|
206
|
+
# If it is an integer, it stops matching after max entries.
|
207
|
+
# @param [Symbol, String] prod
|
208
|
+
# @return [:unmatched, Array]
|
209
|
+
def rept(input, min, max, prod)
|
210
|
+
result = []
|
211
|
+
|
212
|
+
case prod
|
213
|
+
when Symbol
|
214
|
+
rule = parser.find_rule(prod)
|
215
|
+
raise "No rule found for #{prod}" unless rule
|
216
|
+
while (max == '*' || result.length < max) && (res = rule.parse(input)) != :unmatched
|
217
|
+
eat_whitespace(input) unless terminal?
|
218
|
+
result << res
|
219
|
+
end
|
220
|
+
when String
|
221
|
+
while (res = input.scan(Regexp.new(Regexp.quote(prod)))) && (max == '*' || result.length < max)
|
222
|
+
eat_whitespace(input) unless terminal?
|
223
|
+
result << res
|
224
|
+
end
|
225
|
+
end
|
226
|
+
|
227
|
+
result.length < min ? :unmatched : result.compact
|
228
|
+
end
|
229
|
+
|
230
|
+
##
|
231
|
+
# Eat whitespace between non-terminal rules
|
232
|
+
def eat_whitespace(input)
|
233
|
+
if parser.whitespace.is_a?(Regexp)
|
234
|
+
# Eat whitespace before a non-terminal
|
235
|
+
input.skip(parser.whitespace)
|
236
|
+
elsif parser.whitespace.is_a?(Rule)
|
237
|
+
parser.whitespace.parse(input) # throw away result
|
238
|
+
end
|
239
|
+
end
|
240
|
+
end
|
241
|
+
end
|
data/lib/ebnf/rule.rb
CHANGED
@@ -1,15 +1,33 @@
|
|
1
|
+
require 'scanf'
|
2
|
+
require 'strscan'
|
3
|
+
|
1
4
|
module EBNF
|
2
5
|
# Represent individual parsed rules
|
3
6
|
class Rule
|
4
|
-
# Operations which are flattened to seprate rules in to_bnf
|
7
|
+
# Operations which are flattened to seprate rules in to_bnf.
|
5
8
|
BNF_OPS = %w{
|
6
|
-
alt opt plus seq star
|
9
|
+
alt diff not opt plus rept seq star
|
7
10
|
}.map(&:to_sym).freeze
|
8
11
|
|
9
12
|
TERM_OPS = %w{
|
10
|
-
|
13
|
+
hex istr range
|
11
14
|
}.map(&:to_sym).freeze
|
12
15
|
|
16
|
+
# The number of arguments expected per operator. `nil` for unspecified
|
17
|
+
OP_ARGN = {
|
18
|
+
alt: nil,
|
19
|
+
diff: 2,
|
20
|
+
hex: 1,
|
21
|
+
istr: 1,
|
22
|
+
not: 1,
|
23
|
+
opt: 1,
|
24
|
+
plus: 1,
|
25
|
+
range: 1,
|
26
|
+
rept: 3,
|
27
|
+
seq: nil,
|
28
|
+
star: 1
|
29
|
+
}
|
30
|
+
|
13
31
|
# Symbol of rule
|
14
32
|
#
|
15
33
|
# @return [Symbol]
|
@@ -26,7 +44,7 @@ module EBNF
|
|
26
44
|
|
27
45
|
# Kind of rule
|
28
46
|
#
|
29
|
-
# @return [:rule, :terminal, or :pass]
|
47
|
+
# @return [:rule, :terminal, :terminals, or :pass]
|
30
48
|
attr_accessor :kind
|
31
49
|
|
32
50
|
# Rule expression
|
@@ -57,45 +75,92 @@ module EBNF
|
|
57
75
|
# Determines preparation and cleanup rules for reconstituting EBNF ? * + from BNF
|
58
76
|
attr_accessor :cleanup
|
59
77
|
|
60
|
-
# @param [
|
61
|
-
# @
|
78
|
+
# @param [Symbol, nil] sym
|
79
|
+
# `nil` is allowed only for @pass or @terminals
|
80
|
+
# @param [Integer, nil] id
|
62
81
|
# @param [Array] expr
|
63
|
-
#
|
64
|
-
#
|
65
|
-
#
|
66
|
-
#
|
67
|
-
#
|
68
|
-
#
|
69
|
-
|
82
|
+
# The expression is an internal-representation of an S-Expression with one of the following oparators:
|
83
|
+
#
|
84
|
+
# * `alt` – A list of alternative rules, which are attempted in order. It terminates with the first matching rule, or is terminated as unmatched, if no such rule is found.
|
85
|
+
# * `diff` – matches any string that matches `A` but does not match `B`.
|
86
|
+
# * `hex` – A single character represented using the hexadecimal notation `#xnn`.
|
87
|
+
# * `istr` – A string which matches in a case-insensitive manner, so that `(istr "fOo")` will match either of the strings `"foo"`, `"FOO"` or any other combination.
|
88
|
+
# * `opt` – An optional rule or terminal. It either results in the matching rule or returns `nil`.
|
89
|
+
# * `plus` – A sequence of one or more of the matching rule. If there is no such rule, it is terminated as unmatched; otherwise, the result is an array containing all matched input.
|
90
|
+
# * `range` – A range of characters, possibly repeated, of the form `(range "a-z")`. May also use hexadecimal notation.
|
91
|
+
# * `rept m n` – A sequence of at lest `m` and at most `n` of the matching rule. It will always return an array.
|
92
|
+
# * `seq` – A sequence of rules or terminals. If any (other than `opt` or `star`) to not parse, the rule is terminated as unmatched.
|
93
|
+
# * `star` – A sequence of zero or more of the matching rule. It will always return an array.
|
94
|
+
# @param [:rule, :terminal, :terminals, :pass] kind (nil)
|
95
|
+
# @param [String] ebnf (nil)
|
96
|
+
# When parsing, records the EBNF string used to create the rule.
|
97
|
+
# @param [Array] first (nil)
|
98
|
+
# Recorded set of terminals that can proceed this rule (LL(1))
|
99
|
+
# @param [Array] follow (nil)
|
100
|
+
# Recorded set of terminals that can follow this rule (LL(1))
|
101
|
+
# @param [Boolean] start (nil)
|
102
|
+
# Is this the starting rule for the grammar?
|
103
|
+
# @param [Rule] top_rule (nil)
|
104
|
+
# The top-most rule. All expressed rules are top-rules, derived rules have the original rule as their top-rule.
|
105
|
+
# @param [Boolean] cleanup (nil)
|
106
|
+
# Records information useful for cleaning up converted :plus, and :star expansions (LL(1)).
|
107
|
+
def initialize(sym, id, expr, kind: nil, ebnf: nil, first: nil, follow: nil, start: nil, top_rule: nil, cleanup: nil)
|
70
108
|
@sym, @id = sym, id
|
71
|
-
@expr = expr.is_a?(Array) ? expr : [:seq, expr]
|
72
|
-
@ebnf =
|
73
|
-
@top_rule
|
74
|
-
@
|
75
|
-
@follow = options[:follow]
|
76
|
-
@start = options[:start]
|
77
|
-
@cleanup = options[:cleanup]
|
78
|
-
@kind = case
|
79
|
-
when options[:kind] then options[:kind]
|
109
|
+
@expr = expr.is_a?(Array) ? expr : [:seq, expr].compact
|
110
|
+
@ebnf, @kind, @first, @follow, @start, @cleanup, @top_rule = ebnf, kind, first, follow, start, cleanup, top_rule
|
111
|
+
@top_rule ||= self
|
112
|
+
@kind ||= case
|
80
113
|
when sym.to_s == sym.to_s.upcase then :terminal
|
81
114
|
when !BNF_OPS.include?(@expr.first) then :terminal
|
82
115
|
else :rule
|
83
116
|
end
|
117
|
+
|
118
|
+
# Allow @pass and @terminals to not be named
|
119
|
+
@sym ||= :_pass if @kind == :pass
|
120
|
+
@sym ||= :_terminals if @kind == :terminals
|
121
|
+
|
122
|
+
raise ArgumentError, "Rule sym must be a symbol, was #{@sym.inspect}" unless @sym.is_a?(Symbol)
|
123
|
+
raise ArgumentError, "Rule id must be a string or nil, was #{@id.inspect}" unless (@id || "").is_a?(String)
|
124
|
+
raise ArgumentError, "Rule kind must be one of :rule, :terminal, :terminals, or :pass, was #{@kind.inspect}" unless
|
125
|
+
@kind.is_a?(Symbol) && %w(rule terminal terminals pass).map(&:to_sym).include?(@kind)
|
126
|
+
|
127
|
+
case @expr.first
|
128
|
+
when :alt
|
129
|
+
raise ArgumentError, "#{@expr.first} operation must have at least one operand, had #{@expr.length - 1}" unless @expr.length > 1
|
130
|
+
when :diff
|
131
|
+
raise ArgumentError, "#{@expr.first} operation must have exactly two operands, had #{@expr.length - 1}" unless @expr.length == 3
|
132
|
+
when :hex, :istr, :not, :opt, :plus, :range, :star
|
133
|
+
raise ArgumentError, "#{@expr.first} operation must have exactly one operand, had #{@expr.length - 1}" unless @expr.length == 2
|
134
|
+
when :rept
|
135
|
+
raise ArgumentError, "#{@expr.first} operation must have exactly three, had #{@expr.length - 1}" unless @expr.length == 4
|
136
|
+
raise ArgumentError, "#{@expr.first} operation must an non-negative integer minimum, was #{@expr[1]}" unless
|
137
|
+
@expr[1].is_a?(Integer) && @expr[1] >= 0
|
138
|
+
raise ArgumentError, "#{@expr.first} operation must an non-negative integer maximum or '*', was #{@expr[2]}" unless
|
139
|
+
@expr[2] == '*' || @expr[2].is_a?(Integer) && @expr[2] >= 0
|
140
|
+
when :seq
|
141
|
+
# It's legal to have a zero-length sequence
|
142
|
+
else
|
143
|
+
raise ArgumentError, "Rule expression must be an array using a known operator, was #{@expr.first}"
|
144
|
+
end
|
84
145
|
end
|
85
146
|
|
86
147
|
##
|
87
148
|
# Return a rule from its SXP representation:
|
88
149
|
#
|
89
150
|
# @example inputs
|
90
|
-
# (pass (plus (range "#x20\\t\\r\\n")))
|
151
|
+
# (pass _pass (plus (range "#x20\\t\\r\\n")))
|
91
152
|
# (rule ebnf "1" (star (alt declaration rule)))
|
92
|
-
# (terminal
|
153
|
+
# (terminal R_CHAR "19" (diff CHAR (alt "]" "-")))
|
93
154
|
#
|
94
|
-
# Also may have (first ...)
|
155
|
+
# Also may have `(first ...)`, `(follow ...)`, or `(start #t)`.
|
95
156
|
#
|
96
|
-
# @param [Array] sxp
|
157
|
+
# @param [String, Array] sxp
|
97
158
|
# @return [Rule]
|
98
159
|
def self.from_sxp(sxp)
|
160
|
+
if sxp.is_a?(String)
|
161
|
+
require 'sxp' unless defined?(SXP)
|
162
|
+
sxp = SXP.parse(sxp)
|
163
|
+
end
|
99
164
|
expr = sxp.detect {|e| e.is_a?(Array) && ![:first, :follow, :start].include?(e.first.to_sym)}
|
100
165
|
first = sxp.detect {|e| e.is_a?(Array) && e.first.to_sym == :first}
|
101
166
|
first = first[1..-1] if first
|
@@ -106,27 +171,28 @@ module EBNF
|
|
106
171
|
start = sxp.any? {|e| e.is_a?(Array) && e.first.to_sym == :start}
|
107
172
|
sym = sxp[1] if sxp[1].is_a?(Symbol)
|
108
173
|
id = sxp[2] if sxp[2].is_a?(String)
|
109
|
-
|
174
|
+
self.new(sym, id, expr, kind: sxp.first, first: first, follow: follow, cleanup: cleanup, start: start)
|
110
175
|
end
|
111
176
|
|
112
177
|
# Build a new rule creating a symbol and numbering from the current rule
|
113
|
-
# Symbol and number creation is handled by the top-most rule in such a chain
|
178
|
+
# Symbol and number creation is handled by the top-most rule in such a chain.
|
114
179
|
#
|
115
180
|
# @param [Array] expr
|
181
|
+
# @param [Symbol] kind (nil)
|
182
|
+
# @param [Hash{Symbol => Symbol}] cleanup (nil)
|
116
183
|
# @param [Hash{Symbol => Object}] options
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
# Return representation for building S-Expressions
|
184
|
+
def build(expr, kind: nil, cleanup: nil, **options)
|
185
|
+
new_sym, new_id = @top_rule.send(:make_sym_id)
|
186
|
+
self.class.new(new_sym, new_id, expr,
|
187
|
+
kind: kind,
|
188
|
+
ebnf: @ebnf,
|
189
|
+
top_rule: @top_rule,
|
190
|
+
cleanup: cleanup,
|
191
|
+
**options)
|
192
|
+
end
|
193
|
+
|
194
|
+
# Return representation for building S-Expressions.
|
195
|
+
#
|
130
196
|
# @return [Array]
|
131
197
|
def for_sxp
|
132
198
|
elements = [kind, sym]
|
@@ -142,40 +208,51 @@ module EBNF
|
|
142
208
|
# Return SXP representation of this rule
|
143
209
|
# @return [String]
|
144
210
|
def to_sxp
|
211
|
+
require 'sxp' unless defined?(SXP)
|
145
212
|
for_sxp.to_sxp
|
146
213
|
end
|
147
214
|
|
148
215
|
alias_method :to_s, :to_sxp
|
149
216
|
|
150
|
-
# Serializes this rule to an Turtle
|
217
|
+
# Serializes this rule to an Turtle.
|
218
|
+
#
|
151
219
|
# @return [String]
|
152
220
|
def to_ttl
|
153
221
|
@ebnf.debug("to_ttl") {inspect} if @ebnf
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
%{ rdfs:comment #{comment.inspect};}
|
162
|
-
|
222
|
+
statements = [%{:#{sym} rdfs:label "#{sym}";}]
|
223
|
+
if orig
|
224
|
+
comment = orig.to_s.strip.
|
225
|
+
gsub(/"""/, '\"\"\"').
|
226
|
+
gsub("\\", "\\\\").
|
227
|
+
sub(/^\"/, '\"').
|
228
|
+
sub(/\"$/m, '\"')
|
229
|
+
statements << %{ rdfs:comment #{comment.inspect};}
|
230
|
+
end
|
231
|
+
statements << %{ dc:identifier "#{id}";} if id
|
163
232
|
|
164
233
|
statements += ttl_expr(expr, terminal? ? "re" : "g", 1, false)
|
165
234
|
"\n" + statements.join("\n")
|
166
235
|
end
|
167
236
|
|
237
|
+
# Return a Ruby representation of this rule
|
238
|
+
# @return [String]
|
239
|
+
def to_ruby
|
240
|
+
"EBNF::Rule.new(#{sym.inspect}, #{id.inspect}, #{expr.inspect}#{', kind: ' + kind.inspect unless kind == :rule})"
|
241
|
+
end
|
242
|
+
|
168
243
|
##
|
169
244
|
# Transform EBNF rule to BNF rules:
|
170
245
|
#
|
171
|
-
# * Transform (a
|
172
|
-
#
|
173
|
-
#
|
174
|
-
#
|
175
|
-
# * Transform (a
|
176
|
-
# * Transform (a
|
246
|
+
# * Transform `(rule a "n" (op1 (op2)))` into two rules:
|
247
|
+
#
|
248
|
+
# (rule a "n" (op1 _a_1))
|
249
|
+
# (rule _a_1 "n.1" (op2))
|
250
|
+
# * Transform `(rule a (opt b))` into `(rule a (alt _empty b))`
|
251
|
+
# * Transform `(rule a (star b))` into `(rule a (alt _empty (seq b a)))`
|
252
|
+
# * Transform `(rule a (plus b))` into `(rule a (seq b (star b)`
|
253
|
+
#
|
254
|
+
# Transformation includes information used to re-construct non-transformed.
|
177
255
|
#
|
178
|
-
# Transformation includes information used to re-construct non-transformed
|
179
256
|
# AST representation
|
180
257
|
# @return [Array<Rule>]
|
181
258
|
def to_bnf
|
@@ -202,19 +279,19 @@ module EBNF
|
|
202
279
|
new_rules = new_rules.map {|r| r.to_bnf}.flatten
|
203
280
|
elsif expr.first == :opt
|
204
281
|
this = dup
|
205
|
-
# * Transform (a
|
282
|
+
# * Transform (rule a (opt b)) into (rule a (alt _empty b))
|
206
283
|
this.expr = [:alt, :_empty, expr.last]
|
207
284
|
this.cleanup = :opt
|
208
285
|
new_rules = this.to_bnf
|
209
286
|
elsif expr.first == :star
|
210
|
-
# * Transform (a
|
287
|
+
# * Transform (rule a (star b)) into (rule a (alt _empty (seq b a)))
|
211
288
|
this = dup
|
212
289
|
this.cleanup = :star
|
213
290
|
new_rule = this.build([:seq, expr.last, this.sym], cleanup: :merge)
|
214
291
|
this.expr = [:alt, :_empty, new_rule.sym]
|
215
292
|
new_rules = [this] + new_rule.to_bnf
|
216
293
|
elsif expr.first == :plus
|
217
|
-
# * Transform (a
|
294
|
+
# * Transform (rule a (plus b)) into (rule a (seq b (star b)
|
218
295
|
this = dup
|
219
296
|
this.cleanup = :plus
|
220
297
|
this.expr = [:seq, expr.last, [:star, expr.last]]
|
@@ -223,7 +300,7 @@ module EBNF
|
|
223
300
|
# Otherwise, no further transformation necessary
|
224
301
|
new_rules << self
|
225
302
|
elsif [:diff, :hex, :range].include?(expr.first)
|
226
|
-
# This rules are fine,
|
303
|
+
# This rules are fine, they just need to be terminals
|
227
304
|
raise "Encountered #{expr.first.inspect}, which is a #{self.kind}, not :terminal" unless self.terminal?
|
228
305
|
new_rules << self
|
229
306
|
else
|
@@ -234,89 +311,73 @@ module EBNF
|
|
234
311
|
return new_rules
|
235
312
|
end
|
236
313
|
|
237
|
-
|
238
|
-
#
|
239
|
-
#
|
240
|
-
#
|
314
|
+
##
|
315
|
+
# Transform EBNF rule for PEG:
|
316
|
+
#
|
317
|
+
# * Transform `(rule a "n" (op1 ... (op2 y) ...z))` into two rules:
|
318
|
+
#
|
319
|
+
# (rule a "n" (op1 ... _a_1 ... z))
|
320
|
+
# (rule _a_1 "n.1" (op2 y))
|
321
|
+
# * Transform `(rule a "n" (diff op1 op2))` into two rules:
|
322
|
+
#
|
323
|
+
# (rule a "n" (seq _a_1 op1))
|
324
|
+
# (rule _a_1 "n.1" (not op1))
|
325
|
+
#
|
241
326
|
# @return [Array<Rule>]
|
242
|
-
def
|
243
|
-
|
244
|
-
case sym
|
245
|
-
when Symbol
|
246
|
-
r = ast.detect {|r| r.sym == sym}
|
247
|
-
r if r && r.rule?
|
248
|
-
else
|
249
|
-
nil
|
250
|
-
end
|
251
|
-
end.compact
|
252
|
-
end
|
327
|
+
def to_peg
|
328
|
+
new_rules = []
|
253
329
|
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
when String
|
266
|
-
sym
|
267
|
-
else
|
268
|
-
nil
|
330
|
+
# Look for rules containing sub-sequences
|
331
|
+
if expr.any? {|e| e.is_a?(Array) && e.first.is_a?(Symbol)}
|
332
|
+
# duplicate ourselves for rewriting
|
333
|
+
this = dup
|
334
|
+
new_rules << this
|
335
|
+
|
336
|
+
expr.each_with_index do |e, index|
|
337
|
+
next unless e.is_a?(Array) && e.first.is_a?(Symbol)
|
338
|
+
new_rule = build(e)
|
339
|
+
this.expr[index] = new_rule.sym
|
340
|
+
new_rules << new_rule
|
269
341
|
end
|
270
|
-
end.compact
|
271
|
-
end
|
272
342
|
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
expr.
|
343
|
+
# Return new rules after recursively applying #to_bnf
|
344
|
+
new_rules = new_rules.map {|r| r.to_peg}.flatten
|
345
|
+
elsif expr.first == :diff && !terminal?
|
346
|
+
this = dup
|
347
|
+
new_rule = build([:not, expr[2]])
|
348
|
+
this.expr = [:seq, new_rule.sym, expr[1]]
|
349
|
+
new_rules << this
|
350
|
+
new_rules << new_rule
|
351
|
+
elsif [:hex, :istr, :range].include?(expr.first)
|
352
|
+
# This rules are fine, they just need to be terminals
|
353
|
+
raise "Encountered #{expr.first.inspect}, which is a #{self.kind}, not :terminal" unless self.terminal?
|
354
|
+
new_rules << self
|
284
355
|
else
|
285
|
-
|
356
|
+
new_rules << self
|
286
357
|
end
|
358
|
+
|
359
|
+
return new_rules.map {|r| r.extend(EBNF::PEG::Rule)}
|
287
360
|
end
|
288
361
|
|
289
|
-
|
290
|
-
#
|
291
|
-
def first_includes_eps?
|
292
|
-
@first && @first.include?(:_eps)
|
293
|
-
end
|
294
|
-
|
295
|
-
# Add terminal as proceding this rule
|
296
|
-
# @param [Array<Rule, Symbol, String>] terminals
|
297
|
-
# @return [Integer] if number of terminals added
|
298
|
-
def add_first(terminals)
|
299
|
-
@first ||= []
|
300
|
-
terminals = terminals.map {|t| t.is_a?(Rule) ? t.sym : t} - @first
|
301
|
-
@first += terminals
|
302
|
-
terminals.length
|
303
|
-
end
|
304
|
-
|
305
|
-
# Add terminal as following this rule. Don't add _eps as a follow
|
362
|
+
##
|
363
|
+
# For :hex or :range, create a regular expression.
|
306
364
|
#
|
307
|
-
# @
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
365
|
+
# @return [Regexp]
|
366
|
+
def to_regexp
|
367
|
+
case expr.first
|
368
|
+
when :hex
|
369
|
+
Regexp.new(translate_codepoints(expr[1]))
|
370
|
+
when :istr
|
371
|
+
/#{expr.last}/ui
|
372
|
+
when :range
|
373
|
+
Regexp.new("[#{translate_codepoints(expr[1])}]")
|
374
|
+
else
|
375
|
+
raise "Can't turn #{expr.inspect} into a regexp"
|
315
376
|
end
|
316
|
-
terminals.length
|
317
377
|
end
|
318
378
|
|
319
379
|
# Is this a terminal?
|
380
|
+
#
|
320
381
|
# @return [Boolean]
|
321
382
|
def terminal?
|
322
383
|
kind == :terminal
|
@@ -344,18 +405,14 @@ module EBNF
|
|
344
405
|
expr.is_a?(Array) && expr.first == :seq
|
345
406
|
end
|
346
407
|
|
347
|
-
# Is this rule of the form (alt ...)?
|
348
|
-
def alt?
|
349
|
-
expr.is_a?(Array) && expr.first == :alt
|
350
|
-
end
|
351
|
-
|
352
408
|
def inspect
|
353
409
|
"#<EBNF::Rule:#{object_id} " +
|
354
410
|
{sym: sym, id: id, kind: kind, expr: expr}.inspect +
|
355
411
|
">"
|
356
412
|
end
|
357
413
|
|
358
|
-
# Two rules are equal if they have the same {#sym}, {#kind} and {#expr}
|
414
|
+
# Two rules are equal if they have the same {#sym}, {#kind} and {#expr}.
|
415
|
+
#
|
359
416
|
# @param [Rule] other
|
360
417
|
# @return [Boolean]
|
361
418
|
def ==(other)
|
@@ -364,42 +421,264 @@ module EBNF
|
|
364
421
|
expr == other.expr
|
365
422
|
end
|
366
423
|
|
367
|
-
# Two rules are equivalent if they have the same {#expr}
|
424
|
+
# Two rules are equivalent if they have the same {#expr}.
|
425
|
+
#
|
368
426
|
# @param [Rule] other
|
369
427
|
# @return [Boolean]
|
370
|
-
def
|
371
|
-
expr
|
428
|
+
def eql?(other)
|
429
|
+
expr == other.expr
|
372
430
|
end
|
373
431
|
|
374
|
-
#
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
@expr = @expr.map {|e| e == src_rule.sym ? dst_rule.sym : e}
|
432
|
+
# Rules compare using their ids
|
433
|
+
def <=>(other)
|
434
|
+
if id && other.id
|
435
|
+
if id == other.id
|
436
|
+
id.to_s <=> other.id.to_s
|
437
|
+
else
|
438
|
+
id.to_f <=> other.id.to_f
|
439
|
+
end
|
383
440
|
else
|
384
|
-
|
441
|
+
sym.to_s <=> other.sym.to_s
|
385
442
|
end
|
386
|
-
self
|
387
443
|
end
|
388
444
|
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
445
|
+
##
|
446
|
+
# Utility function to translate code points of the form '#xN' into ruby unicode characters
|
447
|
+
def translate_codepoints(str)
|
448
|
+
str.gsub(/#x\h+/) {|c| c[2..-1].scanf("%x").first.chr(Encoding::UTF_8)}
|
449
|
+
end
|
450
|
+
|
451
|
+
# Return the non-terminals for this rule.
|
452
|
+
#
|
453
|
+
# * `alt` => this is every non-terminal.
|
454
|
+
# * `diff` => this is every non-terminal.
|
455
|
+
# * `hex` => nil
|
456
|
+
# * `istr` => nil
|
457
|
+
# * `not` => this is the last expression, if any.
|
458
|
+
# * `opt` => this is the last expression, if any.
|
459
|
+
# * `plus` => this is the last expression, if any.
|
460
|
+
# * `range` => nil
|
461
|
+
# * `rept` => this is the last expression, if any.
|
462
|
+
# * `seq` => this is the first expression in the sequence, if any.
|
463
|
+
# * `star` => this is the last expression, if any.
|
464
|
+
#
|
465
|
+
# @param [Array<Rule>] ast
|
466
|
+
# The set of rules, used to turn symbols into rules
|
467
|
+
# @param [Array<Symbol,String,Array>] expr (@expr)
|
468
|
+
# The expression to check, defaults to the rule expression.
|
469
|
+
# Typically, if the expression is recursive, the embedded expression is called recursively.
|
470
|
+
# @return [Array<Rule>]
|
471
|
+
# @note this is used for LL(1) tansformation, so rule types are limited
|
472
|
+
def non_terminals(ast, expr = @expr)
|
473
|
+
([:alt, :diff].include?(expr.first) ? expr[1..-1] : expr[1,1]).map do |sym|
|
474
|
+
case sym
|
475
|
+
when Symbol
|
476
|
+
r = ast.detect {|r| r.sym == sym}
|
477
|
+
r if r && r.rule?
|
478
|
+
when Array
|
479
|
+
non_terminals(ast, sym)
|
480
|
+
else
|
481
|
+
nil
|
482
|
+
end
|
483
|
+
end.flatten.compact.uniq
|
484
|
+
end
|
485
|
+
|
486
|
+
# Return the terminals for this rule.
|
487
|
+
#
|
488
|
+
# * `alt` => this is every terminal.
|
489
|
+
# * `diff` => this is every terminal.
|
490
|
+
# * `hex` => nil
|
491
|
+
# * `istr` => nil
|
492
|
+
# * `not` => this is the last expression, if any.
|
493
|
+
# * `opt` => this is the last expression, if any.
|
494
|
+
# * `plus` => this is the last expression, if any.
|
495
|
+
# * `range` => nil
|
496
|
+
# * `rept` => this is the last expression, if any.
|
497
|
+
# * `seq` => this is the first expression in the sequence, if any.
|
498
|
+
# * `star` => this is the last expression, if any.
|
499
|
+
#
|
500
|
+
# @param [Array<Rule>] ast
|
501
|
+
# The set of rules, used to turn symbols into rules
|
502
|
+
# @param [Array<Symbol,String,Array>] expr (@expr)
|
503
|
+
# The expression to check, defaults to the rule expression.
|
504
|
+
# Typically, if the expression is recursive, the embedded expression is called recursively.
|
505
|
+
# @return [Array<Rule>]
|
506
|
+
# @note this is used for LL(1) tansformation, so rule types are limited
|
507
|
+
def terminals(ast, expr = @expr)
|
508
|
+
([:alt, :diff].include?(expr.first) ? expr[1..-1] : expr[1,1]).map do |sym|
|
509
|
+
case sym
|
510
|
+
when Symbol
|
511
|
+
r = ast.detect {|r| r.sym == sym}
|
512
|
+
r if r && r.terminal?
|
513
|
+
when String
|
514
|
+
sym
|
515
|
+
when Array
|
516
|
+
terminals(ast, sym)
|
517
|
+
end
|
518
|
+
end.flatten.compact.uniq
|
519
|
+
end
|
520
|
+
|
521
|
+
# Return the symbols used in the rule.
|
522
|
+
#
|
523
|
+
# @param [Array<Symbol,String,Array>] expr (@expr)
|
524
|
+
# The expression to check, defaults to the rule expression.
|
525
|
+
# Typically, if the expression is recursive, the embedded expression is called recursively.
|
526
|
+
# @return [Array<Rule>]
|
527
|
+
def symbols(expr = @expr)
|
528
|
+
expr[1..-1].map do |sym|
|
529
|
+
case sym
|
530
|
+
when Symbol
|
531
|
+
sym
|
532
|
+
when Array
|
533
|
+
symbols(sym)
|
534
|
+
end
|
535
|
+
end.flatten.compact.uniq
|
536
|
+
end
|
537
|
+
|
538
|
+
##
|
539
|
+
# The following are used for LL(1) transformation.
|
540
|
+
##
|
541
|
+
|
542
|
+
# Does this rule start with `sym`? It does if expr is that sym,
|
543
|
+
# expr starts with alt and contains that sym,
|
544
|
+
# or expr starts with seq and the next element is that sym.
|
545
|
+
#
|
546
|
+
# @param [Symbol, class] sym
|
547
|
+
# Symbol matching any start element, or if it is String, any start element which is a String
|
548
|
+
# @return [Array<Symbol, String>] list of symbol (singular), or strings which are start symbol, or nil if there are none
|
549
|
+
def starts_with?(sym)
|
550
|
+
if seq? && sym === (v = expr.fetch(1, nil))
|
551
|
+
[v]
|
552
|
+
elsif alt? && expr.any? {|e| sym === e}
|
553
|
+
expr.select {|e| sym === e}
|
393
554
|
else
|
394
|
-
|
555
|
+
nil
|
395
556
|
end
|
396
557
|
end
|
397
558
|
|
559
|
+
##
|
560
|
+
# Validate the rule, with respect to an AST.
|
561
|
+
#
|
562
|
+
# @param [Array<Rule>] ast
|
563
|
+
# The set of rules, used to turn symbols into rules
|
564
|
+
# @param [Array<Symbol,String,Array>] expr (@expr)
|
565
|
+
# The expression to check, defaults to the rule expression.
|
566
|
+
# Typically, if the expression is recursive, the embedded expression is called recursively.
|
567
|
+
# @raise [RangeError]
|
568
|
+
def validate!(ast, expr = @expr)
|
569
|
+
op = expr.first
|
570
|
+
raise SyntaxError, "Unknown operator: #{op}" unless OP_ARGN.key?(op)
|
571
|
+
raise SyntaxError, "Argument count missmatch on operator #{op}, had #{expr.length - 1} expected #{OP_ARGN[op]}" if
|
572
|
+
OP_ARGN[op] && OP_ARGN[op] != expr.length - 1
|
573
|
+
|
574
|
+
# rept operator needs min and max
|
575
|
+
if op == :alt
|
576
|
+
raise SyntaxError, "alt operation must have at least one operand, had #{expr.length - 1}" unless expr.length > 1
|
577
|
+
elsif op == :rept
|
578
|
+
raise SyntaxError, "rept operation must an non-negative integer minimum, was #{expr[1]}" unless
|
579
|
+
expr[1].is_a?(Integer) && expr[1] >= 0
|
580
|
+
raise SyntaxError, "rept operation must an non-negative integer maximum or '*', was #{expr[2]}" unless
|
581
|
+
expr[2] == '*' || expr[2].is_a?(Integer) && expr[2] >= 0
|
582
|
+
end
|
583
|
+
|
584
|
+
case op
|
585
|
+
when :hex
|
586
|
+
raise SyntaxError, "Hex operand must be of form '#xN+': #{sym}" unless expr.last.match?(/^#x\h+$/)
|
587
|
+
when :range
|
588
|
+
str = expr.last.dup
|
589
|
+
str = str[1..-1] if str.start_with?('^')
|
590
|
+
str = str[0..-2] if str.end_with?('-') # Allowed at end of range
|
591
|
+
scanner = StringScanner.new(str)
|
592
|
+
hex = rchar = in_range = false
|
593
|
+
while !scanner.eos?
|
594
|
+
begin
|
595
|
+
if scanner.scan(Terminals::HEX)
|
596
|
+
raise SyntaxError if in_range && rchar
|
597
|
+
rchar = in_range = false
|
598
|
+
hex = true
|
599
|
+
elsif scanner.scan(Terminals::R_CHAR)
|
600
|
+
raise SyntaxError if in_range && hex
|
601
|
+
hex = in_range = false
|
602
|
+
rchar = true
|
603
|
+
else
|
604
|
+
raise(SyntaxError, "Range contains illegal components at offset #{scanner.pos}: was #{expr.last}")
|
605
|
+
end
|
606
|
+
|
607
|
+
if scanner.scan(/\-/)
|
608
|
+
raise SyntaxError if in_range
|
609
|
+
in_range = true
|
610
|
+
end
|
611
|
+
rescue SyntaxError
|
612
|
+
raise(SyntaxError, "Range contains illegal components at offset #{scanner.pos}: was #{expr.last}")
|
613
|
+
end
|
614
|
+
end
|
615
|
+
else
|
616
|
+
([:alt, :diff].include?(expr.first) ? expr[1..-1] : expr[1,1]).each do |sym|
|
617
|
+
case sym
|
618
|
+
when Symbol
|
619
|
+
r = ast.detect {|r| r.sym == sym}
|
620
|
+
raise SyntaxError, "No rule found for #{sym}" unless r
|
621
|
+
when Array
|
622
|
+
validate!(ast, sym)
|
623
|
+
when String
|
624
|
+
raise SyntaxError, "String must be of the form CHAR*" unless sym.match?(/^#{Terminals::CHAR}*$/)
|
625
|
+
end
|
626
|
+
end
|
627
|
+
end
|
628
|
+
end
|
629
|
+
|
630
|
+
##
|
631
|
+
# Validate the rule, with respect to an AST.
|
632
|
+
#
|
633
|
+
# Uses `#validate!` and catches `RangeError`
|
634
|
+
#
|
635
|
+
# @param [Array<Rule>] ast
|
636
|
+
# The set of rules, used to turn symbols into rules
|
637
|
+
# @return [Boolean]
|
638
|
+
def valid?(ast)
|
639
|
+
validate!(ast)
|
640
|
+
true
|
641
|
+
rescue SyntaxError
|
642
|
+
false
|
643
|
+
end
|
644
|
+
|
645
|
+
# Do the firsts of this rule include the empty string?
|
646
|
+
#
|
647
|
+
# @return [Boolean]
|
648
|
+
def first_includes_eps?
|
649
|
+
@first && @first.include?(:_eps)
|
650
|
+
end
|
651
|
+
|
652
|
+
# Add terminal as proceding this rule.
|
653
|
+
#
|
654
|
+
# @param [Array<Rule, Symbol, String>] terminals
|
655
|
+
# @return [Integer] if number of terminals added
|
656
|
+
def add_first(terminals)
|
657
|
+
@first ||= []
|
658
|
+
terminals = terminals.map {|t| t.is_a?(Rule) ? t.sym : t} - @first
|
659
|
+
@first += terminals
|
660
|
+
terminals.length
|
661
|
+
end
|
662
|
+
|
663
|
+
# Add terminal as following this rule. Don't add _eps as a follow
|
664
|
+
#
|
665
|
+
# @param [Array<Rule, Symbol, String>] terminals
|
666
|
+
# @return [Integer] if number of terminals added
|
667
|
+
def add_follow(terminals)
|
668
|
+
# Remove terminals already in follows, and empty string
|
669
|
+
terminals = terminals.map {|t| t.is_a?(Rule) ? t.sym : t} - (@follow || []) - [:_eps]
|
670
|
+
unless terminals.empty?
|
671
|
+
@follow ||= []
|
672
|
+
@follow += terminals
|
673
|
+
end
|
674
|
+
terminals.length
|
675
|
+
end
|
676
|
+
|
398
677
|
private
|
399
678
|
def ttl_expr(expr, pfx, depth, is_obj = true)
|
400
679
|
indent = ' ' * depth
|
401
|
-
@ebnf.debug("ttl_expr", depth: depth) {expr.inspect}
|
402
|
-
op = expr
|
680
|
+
@ebnf.debug("ttl_expr", depth: depth) {expr.inspect} if @ebnf
|
681
|
+
op, *expr = expr if expr.is_a?(Array)
|
403
682
|
statements = []
|
404
683
|
|
405
684
|
if is_obj
|
@@ -410,17 +689,28 @@ module EBNF
|
|
410
689
|
|
411
690
|
case op
|
412
691
|
when :seq, :alt, :diff
|
692
|
+
# Multiple operands
|
413
693
|
statements << %{#{indent}#{bra}#{pfx}:#{op} (}
|
414
694
|
expr.each {|a| statements += ttl_expr(a, pfx, depth + 1)}
|
415
695
|
statements << %{#{indent} )#{ket}}
|
416
|
-
when :opt, :plus, :star
|
696
|
+
when :opt, :plus, :star, :not
|
697
|
+
# Single operand
|
417
698
|
statements << %{#{indent}#{bra}#{pfx}:#{op} }
|
418
699
|
statements += ttl_expr(expr.first, pfx, depth + 1)
|
419
700
|
statements << %{#{indent} #{ket}} unless ket.empty?
|
420
|
-
when :
|
701
|
+
when :rept
|
702
|
+
# Three operands (min, max and expr)
|
703
|
+
statements << %{ #{indent}#{pfx}:min #{expr[0].inspect};}
|
704
|
+
statements << %{ #{indent}#{pfx}:max #{expr[1].inspect};}
|
705
|
+
statements << %{#{indent}#{bra}#{pfx}:#{op} }
|
706
|
+
statements += ttl_expr(expr.last, pfx, depth + 1)
|
707
|
+
statements << %{#{indent} #{ket}} unless ket.empty?
|
708
|
+
when :_empty, :_eps
|
421
709
|
statements << %{#{indent}"g:#{op.to_s[1..-1]}"}
|
422
710
|
when :"'"
|
423
711
|
statements << %{#{indent}"#{esc(expr)}"}
|
712
|
+
when :istr
|
713
|
+
statements << %{#{indent}#{bra} re:matches #{expr.first.inspect} #{ket}}
|
424
714
|
when :range
|
425
715
|
statements << %{#{indent}#{bra} re:matches #{cclass(expr.first).inspect} #{ket}}
|
426
716
|
when :hex
|
@@ -435,7 +725,7 @@ module EBNF
|
|
435
725
|
end
|
436
726
|
|
437
727
|
statements.last << " ." unless is_obj
|
438
|
-
@ebnf.debug("statements", depth: depth) {statements.join("\n")}
|
728
|
+
@ebnf.debug("statements", depth: depth) {statements.join("\n")} if @ebnf
|
439
729
|
statements
|
440
730
|
end
|
441
731
|
|
@@ -476,7 +766,7 @@ module EBNF
|
|
476
766
|
def make_sym_id(variation = nil)
|
477
767
|
@id_seq ||= 0
|
478
768
|
@id_seq += 1
|
479
|
-
["_#{@sym}_#{@id_seq}#{variation}".to_sym, "#{@id}.#{@id_seq}#{variation}"]
|
769
|
+
["_#{@sym}_#{@id_seq}#{variation}".to_sym, ("#{@id}.#{@id_seq}#{variation}" if @id)]
|
480
770
|
end
|
481
771
|
end
|
482
772
|
end
|