ebnf 1.2.0 → 2.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +223 -199
- data/UNLICENSE +1 -1
- data/VERSION +1 -1
- data/bin/ebnf +38 -19
- data/etc/abnf-core.ebnf +52 -0
- data/etc/abnf.abnf +121 -0
- data/etc/abnf.ebnf +124 -0
- data/etc/abnf.sxp +45 -0
- data/etc/doap.ttl +23 -18
- data/etc/ebnf.ebnf +21 -33
- data/etc/ebnf.html +76 -160
- data/etc/{ebnf.rb → ebnf.ll1.rb} +30 -107
- data/etc/ebnf.ll1.sxp +182 -183
- data/etc/ebnf.peg.rb +90 -0
- data/etc/ebnf.peg.sxp +84 -0
- data/etc/ebnf.sxp +40 -41
- data/etc/iso-ebnf.ebnf +140 -0
- data/etc/iso-ebnf.isoebnf +138 -0
- data/etc/iso-ebnf.sxp +65 -0
- data/etc/sparql.ebnf +4 -4
- data/etc/sparql.html +1603 -1751
- data/etc/sparql.ll1.sxp +7372 -7372
- data/etc/sparql.peg.rb +532 -0
- data/etc/sparql.peg.sxp +597 -0
- data/etc/sparql.sxp +363 -362
- data/etc/turtle.ebnf +3 -3
- data/etc/turtle.html +465 -517
- data/etc/{turtle.rb → turtle.ll1.rb} +3 -4
- data/etc/turtle.ll1.sxp +425 -425
- data/etc/turtle.peg.rb +182 -0
- data/etc/turtle.peg.sxp +199 -0
- data/etc/turtle.sxp +103 -101
- data/lib/ebnf.rb +6 -1
- data/lib/ebnf/abnf.rb +301 -0
- data/lib/ebnf/abnf/core.rb +23 -0
- data/lib/ebnf/abnf/meta.rb +111 -0
- data/lib/ebnf/base.rb +114 -69
- data/lib/ebnf/bnf.rb +1 -26
- data/lib/ebnf/ebnf/meta.rb +90 -0
- data/lib/ebnf/isoebnf.rb +229 -0
- data/lib/ebnf/isoebnf/meta.rb +75 -0
- data/lib/ebnf/ll1.rb +131 -3
- data/lib/ebnf/ll1/lexer.rb +20 -22
- data/lib/ebnf/ll1/parser.rb +97 -64
- data/lib/ebnf/ll1/scanner.rb +82 -50
- data/lib/ebnf/native.rb +320 -0
- data/lib/ebnf/parser.rb +285 -302
- data/lib/ebnf/peg.rb +39 -0
- data/lib/ebnf/peg/parser.rb +561 -0
- data/lib/ebnf/peg/rule.rb +250 -0
- data/lib/ebnf/rule.rb +442 -148
- data/lib/ebnf/terminals.rb +21 -0
- data/lib/ebnf/writer.rb +587 -82
- metadata +125 -18
- data/etc/sparql.rb +0 -45773
@@ -0,0 +1,250 @@
|
|
1
|
+
module EBNF::PEG
|
2
|
+
# Behaviior for parsing a PEG rule
|
3
|
+
module Rule
|
4
|
+
##
|
5
|
+
# Initialized by parser when loading rules.
|
6
|
+
# Used for finding rules and invoking elements of the parse process.
|
7
|
+
#
|
8
|
+
# @return [EBNF::PEG::Parser] parser
|
9
|
+
attr_accessor :parser
|
10
|
+
|
11
|
+
##
|
12
|
+
# Parse a rule or terminal, invoking callbacks, as appropriate
|
13
|
+
|
14
|
+
# If there is are `start_production` and/or `production`,
|
15
|
+
# they are invoked with a `prod_data` stack, the input stream and offset.
|
16
|
+
# Otherwise, the results are added as an array value
|
17
|
+
# to a hash indexed by the rule name.
|
18
|
+
#
|
19
|
+
# If matched, the input position is updated and the results returned in a Hash.
|
20
|
+
#
|
21
|
+
# * `alt`: returns the value of the matched production or `:unmatched`.
|
22
|
+
# * `diff`: returns the value matched, or `:unmatched`.
|
23
|
+
# * `hex`: returns a string composed of the matched hex character, or `:unmatched`.
|
24
|
+
# * `opt`: returns the value matched, or `nil` if unmatched.
|
25
|
+
# * `plus`: returns an array of the values matched for the specified production, or `:unmatched`, if none are matched. For Terminals, these are concatenated into a single string.
|
26
|
+
# * `range`: returns a string composed of the values matched, or `:unmatched`, if less than `min` are matched.
|
27
|
+
# * `rept`: returns an array of the values matched for the speficied production, or `:unmatched`, if none are matched. For Terminals, these are concatenated into a single string.
|
28
|
+
# * `seq`: returns an array composed of single-entry hashes for each matched production indexed by the production name, or `:unmatched` if any production fails to match. For Terminals, returns a string created by concatenating these values. Via option in a `production` or definition, the result can be a single hash with values for each matched production; note that this is not always possible due to the possibility of repeated productions within the sequence.
|
29
|
+
# * `star`: returns an array of the values matched for the specified production. For Terminals, these are concatenated into a single string.
|
30
|
+
#
|
31
|
+
# @param [Scanner] input
|
32
|
+
# @return [Hash{Symbol => Object}, :unmatched] A hash with keys for matched component of the expression. Returns :unmatched if the input does not match the production.
|
33
|
+
def parse(input)
|
34
|
+
# Save position and linenumber for backtracking
|
35
|
+
pos, lineno = input.pos, input.lineno
|
36
|
+
|
37
|
+
parser.packrat[sym] ||= {}
|
38
|
+
if parser.packrat[sym][pos]
|
39
|
+
parser.debug("#{sym}(:memo)", lineno: lineno) { "#{parser.packrat[sym][pos].inspect}(@#{pos})"}
|
40
|
+
input.pos, input.lineno = parser.packrat[sym][pos][:pos], parser.packrat[sym][pos][:lineno]
|
41
|
+
return parser.packrat[sym][pos][:result]
|
42
|
+
end
|
43
|
+
|
44
|
+
if terminal?
|
45
|
+
# If the terminal is defined with a regular expression,
|
46
|
+
# use that to match the input,
|
47
|
+
# otherwise,
|
48
|
+
if regexp = parser.find_terminal_regexp(sym)
|
49
|
+
matched = input.scan(regexp)
|
50
|
+
result = parser.onTerminal(sym, (matched ? matched : :unmatched))
|
51
|
+
# Update furthest failure for strings and terminals
|
52
|
+
parser.update_furthest_failure(input.pos, input.lineno, sym) if result == :unmatched
|
53
|
+
parser.packrat[sym][pos] = {
|
54
|
+
pos: input.pos,
|
55
|
+
lineno: input.lineno,
|
56
|
+
result: result
|
57
|
+
}
|
58
|
+
return parser.packrat[sym][pos][:result]
|
59
|
+
end
|
60
|
+
else
|
61
|
+
eat_whitespace(input)
|
62
|
+
end
|
63
|
+
start_options = parser.onStart(sym)
|
64
|
+
|
65
|
+
result = case expr.first
|
66
|
+
when :alt
|
67
|
+
# Return the first expression to match.
|
68
|
+
# Result is either :unmatched, or the value of the matching rule
|
69
|
+
alt = :unmatched
|
70
|
+
expr[1..-1].each do |prod|
|
71
|
+
alt = case prod
|
72
|
+
when Symbol
|
73
|
+
rule = parser.find_rule(prod)
|
74
|
+
raise "No rule found for #{prod}" unless rule
|
75
|
+
rule.parse(input)
|
76
|
+
when String
|
77
|
+
input.scan(Regexp.new(Regexp.quote(prod))) || :unmatched
|
78
|
+
end
|
79
|
+
if alt == :unmatched
|
80
|
+
# Update furthest failure for strings and terminals
|
81
|
+
parser.update_furthest_failure(input.pos, input.lineno, prod) if prod.is_a?(String) || rule.terminal?
|
82
|
+
else
|
83
|
+
break
|
84
|
+
end
|
85
|
+
end
|
86
|
+
alt
|
87
|
+
when :diff
|
88
|
+
# matches any string that matches A but does not match B.
|
89
|
+
# (Note, this is only used for Terminal rules, non-terminals will use :not)
|
90
|
+
raise "Diff used on non-terminal #{prod}" unless terminal?
|
91
|
+
re1, re2 = Regexp.new(translate_codepoints(expr[1])), Regexp.new(translate_codepoints(expr[2]))
|
92
|
+
matched = input.scan(re1)
|
93
|
+
if !matched || re2.match?(matched)
|
94
|
+
# Update furthest failure for terminals
|
95
|
+
parser.update_furthest_failure(input.pos, input.lineno, sym)
|
96
|
+
:unmatched
|
97
|
+
else
|
98
|
+
matched
|
99
|
+
end
|
100
|
+
when :hex
|
101
|
+
# Matches the given hex character if expression matches the character whose number (code point) in ISO/IEC 10646 is N. The number of leading zeros in the #xN form is insignificant.
|
102
|
+
input.scan(to_regexp) || begin
|
103
|
+
# Update furthest failure for terminals
|
104
|
+
parser.update_furthest_failure(input.pos, input.lineno, expr.last)
|
105
|
+
:unmatched
|
106
|
+
end
|
107
|
+
when :not
|
108
|
+
# matches any string that does not match B.
|
109
|
+
res = case prod = expr[1]
|
110
|
+
when Symbol
|
111
|
+
rule = parser.find_rule(prod)
|
112
|
+
raise "No rule found for #{prod}" unless rule
|
113
|
+
rule.parse(input)
|
114
|
+
when String
|
115
|
+
input.scan(Regexp.new(Regexp.quote(prod))) || :unmatched
|
116
|
+
end
|
117
|
+
if res != :unmatched
|
118
|
+
# Update furthest failure for terminals
|
119
|
+
parser.update_furthest_failure(input.pos, input.lineno, sym) if terminal?
|
120
|
+
:unmatched
|
121
|
+
else
|
122
|
+
nil
|
123
|
+
end
|
124
|
+
when :opt
|
125
|
+
# Result is the matched value or nil
|
126
|
+
opt = rept(input, 0, 1, expr[1])
|
127
|
+
|
128
|
+
# Update furthest failure for strings and terminals
|
129
|
+
parser.update_furthest_failure(input.pos, input.lineno, expr[1]) if terminal?
|
130
|
+
opt.first
|
131
|
+
when :plus
|
132
|
+
# Result is an array of all expressions while they match,
|
133
|
+
# at least one must match
|
134
|
+
plus = rept(input, 1, '*', expr[1])
|
135
|
+
|
136
|
+
# Update furthest failure for strings and terminals
|
137
|
+
parser.update_furthest_failure(input.pos, input.lineno, expr[1]) if terminal?
|
138
|
+
plus.is_a?(Array) && terminal? ? plus.join("") : plus
|
139
|
+
when :range, :istr
|
140
|
+
# Matches the specified character range
|
141
|
+
input.scan(to_regexp) || begin
|
142
|
+
# Update furthest failure for strings and terminals
|
143
|
+
parser.update_furthest_failure(input.pos, input.lineno, expr[1])
|
144
|
+
:unmatched
|
145
|
+
end
|
146
|
+
when :rept
|
147
|
+
# Result is an array of all expressions while they match,
|
148
|
+
# an empty array of none match
|
149
|
+
rept = rept(input, expr[1], expr[2], expr[3])
|
150
|
+
|
151
|
+
# # Update furthest failure for strings and terminals
|
152
|
+
parser.update_furthest_failure(input.pos, input.lineno, expr[3]) if terminal?
|
153
|
+
rept.is_a?(Array) && terminal? ? rept.join("") : rept
|
154
|
+
when :seq
|
155
|
+
# Evaluate each expression into an array of hashes where each hash contains a key from the associated production and the value is the parsed value of that production. Returns :unmatched if the input does not match the production. Value ordering is ensured by native Hash ordering.
|
156
|
+
seq = expr[1..-1].each_with_object([]) do |prod, accumulator|
|
157
|
+
eat_whitespace(input) unless accumulator.empty? || terminal?
|
158
|
+
res = case prod
|
159
|
+
when Symbol
|
160
|
+
rule = parser.find_rule(prod)
|
161
|
+
raise "No rule found for #{prod}" unless rule
|
162
|
+
rule.parse(input)
|
163
|
+
when String
|
164
|
+
input.scan(Regexp.new(Regexp.quote(prod))) || :unmatched
|
165
|
+
end
|
166
|
+
if res == :unmatched
|
167
|
+
# Update furthest failure for strings and terminals
|
168
|
+
parser.update_furthest_failure(input.pos, input.lineno, prod)
|
169
|
+
break :unmatched
|
170
|
+
end
|
171
|
+
accumulator << {prod.to_sym => res}
|
172
|
+
end
|
173
|
+
if seq == :unmatched
|
174
|
+
:unmatched
|
175
|
+
elsif terminal?
|
176
|
+
seq.map(&:values).compact.join("") # Concat values for terminal production
|
177
|
+
elsif start_options[:as_hash]
|
178
|
+
seq.inject {|memo, h| memo.merge(h)}
|
179
|
+
else
|
180
|
+
seq
|
181
|
+
end
|
182
|
+
when :star
|
183
|
+
# Result is an array of all expressions while they match,
|
184
|
+
# an empty array of none match
|
185
|
+
star = rept(input, 0, '*', expr[1])
|
186
|
+
|
187
|
+
# Update furthest failure for strings and terminals
|
188
|
+
parser.update_furthest_failure(input.pos, input.lineno, expr[1]) if terminal?
|
189
|
+
star.is_a?(Array) && terminal? ? star.join("") : star
|
190
|
+
else
|
191
|
+
raise "attempt to parse unknown rule type: #{expr.first}"
|
192
|
+
end
|
193
|
+
|
194
|
+
if result == :unmatched
|
195
|
+
input.pos, input.lineno = pos, lineno
|
196
|
+
end
|
197
|
+
|
198
|
+
result = parser.onFinish(result)
|
199
|
+
(parser.packrat[sym] ||= {})[pos] = {
|
200
|
+
pos: input.pos,
|
201
|
+
lineno: input.lineno,
|
202
|
+
result: result
|
203
|
+
}
|
204
|
+
return parser.packrat[sym][pos][:result]
|
205
|
+
end
|
206
|
+
|
207
|
+
##
|
208
|
+
# Repitition, 0-1, 0-n, 1-n, ...
|
209
|
+
#
|
210
|
+
# Note, nil results are removed from the result, but count towards min/max calculations
|
211
|
+
#
|
212
|
+
# @param [Scanner] input
|
213
|
+
# @param [Integer] min
|
214
|
+
# @param [Integer] max
|
215
|
+
# If it is an integer, it stops matching after max entries.
|
216
|
+
# @param [Symbol, String] prod
|
217
|
+
# @return [:unmatched, Array]
|
218
|
+
def rept(input, min, max, prod)
|
219
|
+
result = []
|
220
|
+
|
221
|
+
case prod
|
222
|
+
when Symbol
|
223
|
+
rule = parser.find_rule(prod)
|
224
|
+
raise "No rule found for #{prod}" unless rule
|
225
|
+
while (max == '*' || result.length < max) && (res = rule.parse(input)) != :unmatched
|
226
|
+
eat_whitespace(input) unless terminal?
|
227
|
+
result << res
|
228
|
+
end
|
229
|
+
when String
|
230
|
+
while (res = input.scan(Regexp.new(Regexp.quote(prod)))) && (max == '*' || result.length < max)
|
231
|
+
eat_whitespace(input) unless terminal?
|
232
|
+
result << res
|
233
|
+
end
|
234
|
+
end
|
235
|
+
|
236
|
+
result.length < min ? :unmatched : result.compact
|
237
|
+
end
|
238
|
+
|
239
|
+
##
|
240
|
+
# Eat whitespace between non-terminal rules
|
241
|
+
def eat_whitespace(input)
|
242
|
+
if parser.whitespace.is_a?(Regexp)
|
243
|
+
# Eat whitespace before a non-terminal
|
244
|
+
input.skip(parser.whitespace)
|
245
|
+
elsif parser.whitespace.is_a?(Rule)
|
246
|
+
parser.whitespace.parse(input) # throw away result
|
247
|
+
end
|
248
|
+
end
|
249
|
+
end
|
250
|
+
end
|
data/lib/ebnf/rule.rb
CHANGED
@@ -1,15 +1,33 @@
|
|
1
|
+
require 'scanf'
|
2
|
+
require 'strscan'
|
3
|
+
|
1
4
|
module EBNF
|
2
5
|
# Represent individual parsed rules
|
3
6
|
class Rule
|
4
|
-
# Operations which are flattened to seprate rules in to_bnf
|
7
|
+
# Operations which are flattened to seprate rules in to_bnf.
|
5
8
|
BNF_OPS = %w{
|
6
|
-
alt opt plus seq star
|
9
|
+
alt diff not opt plus rept seq star
|
7
10
|
}.map(&:to_sym).freeze
|
8
11
|
|
9
12
|
TERM_OPS = %w{
|
10
|
-
|
13
|
+
hex istr range
|
11
14
|
}.map(&:to_sym).freeze
|
12
15
|
|
16
|
+
# The number of arguments expected per operator. `nil` for unspecified
|
17
|
+
OP_ARGN = {
|
18
|
+
alt: nil,
|
19
|
+
diff: 2,
|
20
|
+
hex: 1,
|
21
|
+
istr: 1,
|
22
|
+
not: 1,
|
23
|
+
opt: 1,
|
24
|
+
plus: 1,
|
25
|
+
range: 1,
|
26
|
+
rept: 3,
|
27
|
+
seq: nil,
|
28
|
+
star: 1
|
29
|
+
}
|
30
|
+
|
13
31
|
# Symbol of rule
|
14
32
|
#
|
15
33
|
# @return [Symbol]
|
@@ -26,7 +44,7 @@ module EBNF
|
|
26
44
|
|
27
45
|
# Kind of rule
|
28
46
|
#
|
29
|
-
# @return [:rule, :terminal, or :pass]
|
47
|
+
# @return [:rule, :terminal, :terminals, or :pass]
|
30
48
|
attr_accessor :kind
|
31
49
|
|
32
50
|
# Rule expression
|
@@ -57,19 +75,38 @@ module EBNF
|
|
57
75
|
# Determines preparation and cleanup rules for reconstituting EBNF ? * + from BNF
|
58
76
|
attr_accessor :cleanup
|
59
77
|
|
60
|
-
# @param [
|
61
|
-
# @
|
78
|
+
# @param [Symbol, nil] sym
|
79
|
+
# `nil` is allowed only for @pass or @terminals
|
80
|
+
# @param [Integer, nil] id
|
62
81
|
# @param [Array] expr
|
63
|
-
#
|
64
|
-
#
|
65
|
-
#
|
66
|
-
#
|
67
|
-
#
|
68
|
-
#
|
69
|
-
#
|
82
|
+
# The expression is an internal-representation of an S-Expression with one of the following oparators:
|
83
|
+
#
|
84
|
+
# * `alt` – A list of alternative rules, which are attempted in order. It terminates with the first matching rule, or is terminated as unmatched, if no such rule is found.
|
85
|
+
# * `diff` – matches any string that matches `A` but does not match `B`.
|
86
|
+
# * `hex` – A single character represented using the hexadecimal notation `#xnn`.
|
87
|
+
# * `istr` – A string which matches in a case-insensitive manner, so that `(istr "fOo")` will match either of the strings `"foo"`, `"FOO"` or any other combination.
|
88
|
+
# * `opt` – An optional rule or terminal. It either results in the matching rule or returns `nil`.
|
89
|
+
# * `plus` – A sequence of one or more of the matching rule. If there is no such rule, it is terminated as unmatched; otherwise, the result is an array containing all matched input.
|
90
|
+
# * `range` – A range of characters, possibly repeated, of the form `(range "a-z")`. May also use hexadecimal notation.
|
91
|
+
# * `rept m n` – A sequence of at lest `m` and at most `n` of the matching rule. It will always return an array.
|
92
|
+
# * `seq` – A sequence of rules or terminals. If any (other than `opt` or `star`) to not parse, the rule is terminated as unmatched.
|
93
|
+
# * `star` – A sequence of zero or more of the matching rule. It will always return an array.
|
94
|
+
# @param [:rule, :terminal, :terminals, :pass] kind (nil)
|
95
|
+
# @param [String] ebnf (nil)
|
96
|
+
# When parsing, records the EBNF string used to create the rule.
|
97
|
+
# @param [Array] first (nil)
|
98
|
+
# Recorded set of terminals that can proceed this rule (LL(1))
|
99
|
+
# @param [Array] follow (nil)
|
100
|
+
# Recorded set of terminals that can follow this rule (LL(1))
|
101
|
+
# @param [Boolean] start (nil)
|
102
|
+
# Is this the starting rule for the grammar?
|
103
|
+
# @param [Rule] top_rule (nil)
|
104
|
+
# The top-most rule. All expressed rules are top-rules, derived rules have the original rule as their top-rule.
|
105
|
+
# @param [Boolean] cleanup (nil)
|
106
|
+
# Records information useful for cleaning up converted :plus, and :star expansions (LL(1)).
|
70
107
|
def initialize(sym, id, expr, kind: nil, ebnf: nil, first: nil, follow: nil, start: nil, top_rule: nil, cleanup: nil)
|
71
108
|
@sym, @id = sym, id
|
72
|
-
@expr = expr.is_a?(Array) ? expr : [:seq, expr]
|
109
|
+
@expr = expr.is_a?(Array) ? expr : [:seq, expr].compact
|
73
110
|
@ebnf, @kind, @first, @follow, @start, @cleanup, @top_rule = ebnf, kind, first, follow, start, cleanup, top_rule
|
74
111
|
@top_rule ||= self
|
75
112
|
@kind ||= case
|
@@ -77,21 +114,53 @@ module EBNF
|
|
77
114
|
when !BNF_OPS.include?(@expr.first) then :terminal
|
78
115
|
else :rule
|
79
116
|
end
|
117
|
+
|
118
|
+
# Allow @pass and @terminals to not be named
|
119
|
+
@sym ||= :_pass if @kind == :pass
|
120
|
+
@sym ||= :_terminals if @kind == :terminals
|
121
|
+
|
122
|
+
raise ArgumentError, "Rule sym must be a symbol, was #{@sym.inspect}" unless @sym.is_a?(Symbol)
|
123
|
+
raise ArgumentError, "Rule id must be a string or nil, was #{@id.inspect}" unless (@id || "").is_a?(String)
|
124
|
+
raise ArgumentError, "Rule kind must be one of :rule, :terminal, :terminals, or :pass, was #{@kind.inspect}" unless
|
125
|
+
@kind.is_a?(Symbol) && %w(rule terminal terminals pass).map(&:to_sym).include?(@kind)
|
126
|
+
|
127
|
+
case @expr.first
|
128
|
+
when :alt
|
129
|
+
raise ArgumentError, "#{@expr.first} operation must have at least one operand, had #{@expr.length - 1}" unless @expr.length > 1
|
130
|
+
when :diff
|
131
|
+
raise ArgumentError, "#{@expr.first} operation must have exactly two operands, had #{@expr.length - 1}" unless @expr.length == 3
|
132
|
+
when :hex, :istr, :not, :opt, :plus, :range, :star
|
133
|
+
raise ArgumentError, "#{@expr.first} operation must have exactly one operand, had #{@expr.length - 1}" unless @expr.length == 2
|
134
|
+
when :rept
|
135
|
+
raise ArgumentError, "#{@expr.first} operation must have exactly three, had #{@expr.length - 1}" unless @expr.length == 4
|
136
|
+
raise ArgumentError, "#{@expr.first} operation must an non-negative integer minimum, was #{@expr[1]}" unless
|
137
|
+
@expr[1].is_a?(Integer) && @expr[1] >= 0
|
138
|
+
raise ArgumentError, "#{@expr.first} operation must an non-negative integer maximum or '*', was #{@expr[2]}" unless
|
139
|
+
@expr[2] == '*' || @expr[2].is_a?(Integer) && @expr[2] >= 0
|
140
|
+
when :seq
|
141
|
+
# It's legal to have a zero-length sequence
|
142
|
+
else
|
143
|
+
raise ArgumentError, "Rule expression must be an array using a known operator, was #{@expr.first}"
|
144
|
+
end
|
80
145
|
end
|
81
146
|
|
82
147
|
##
|
83
148
|
# Return a rule from its SXP representation:
|
84
149
|
#
|
85
150
|
# @example inputs
|
86
|
-
# (pass (plus (range "#x20\\t\\r\\n")))
|
151
|
+
# (pass _pass (plus (range "#x20\\t\\r\\n")))
|
87
152
|
# (rule ebnf "1" (star (alt declaration rule)))
|
88
|
-
# (terminal
|
153
|
+
# (terminal R_CHAR "19" (diff CHAR (alt "]" "-")))
|
89
154
|
#
|
90
|
-
# Also may have (first ...)
|
155
|
+
# Also may have `(first ...)`, `(follow ...)`, or `(start #t)`.
|
91
156
|
#
|
92
|
-
# @param [Array] sxp
|
157
|
+
# @param [String, Array] sxp
|
93
158
|
# @return [Rule]
|
94
159
|
def self.from_sxp(sxp)
|
160
|
+
if sxp.is_a?(String)
|
161
|
+
require 'sxp' unless defined?(SXP)
|
162
|
+
sxp = SXP.parse(sxp)
|
163
|
+
end
|
95
164
|
expr = sxp.detect {|e| e.is_a?(Array) && ![:first, :follow, :start].include?(e.first.to_sym)}
|
96
165
|
first = sxp.detect {|e| e.is_a?(Array) && e.first.to_sym == :first}
|
97
166
|
first = first[1..-1] if first
|
@@ -102,26 +171,28 @@ module EBNF
|
|
102
171
|
start = sxp.any? {|e| e.is_a?(Array) && e.first.to_sym == :start}
|
103
172
|
sym = sxp[1] if sxp[1].is_a?(Symbol)
|
104
173
|
id = sxp[2] if sxp[2].is_a?(String)
|
105
|
-
|
174
|
+
self.new(sym, id, expr, kind: sxp.first, first: first, follow: follow, cleanup: cleanup, start: start)
|
106
175
|
end
|
107
176
|
|
108
177
|
# Build a new rule creating a symbol and numbering from the current rule
|
109
|
-
# Symbol and number creation is handled by the top-most rule in such a chain
|
178
|
+
# Symbol and number creation is handled by the top-most rule in such a chain.
|
110
179
|
#
|
111
180
|
# @param [Array] expr
|
181
|
+
# @param [Symbol] kind (nil)
|
182
|
+
# @param [Hash{Symbol => Symbol}] cleanup (nil)
|
112
183
|
# @param [Hash{Symbol => Object}] options
|
113
|
-
# @param [Symbol] :kind
|
114
184
|
def build(expr, kind: nil, cleanup: nil, **options)
|
115
|
-
new_sym, new_id =
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
185
|
+
new_sym, new_id = @top_rule.send(:make_sym_id)
|
186
|
+
self.class.new(new_sym, new_id, expr,
|
187
|
+
kind: kind,
|
188
|
+
ebnf: @ebnf,
|
189
|
+
top_rule: @top_rule,
|
190
|
+
cleanup: cleanup,
|
191
|
+
**options)
|
122
192
|
end
|
123
193
|
|
124
|
-
# Return representation for building S-Expressions
|
194
|
+
# Return representation for building S-Expressions.
|
195
|
+
#
|
125
196
|
# @return [Array]
|
126
197
|
def for_sxp
|
127
198
|
elements = [kind, sym]
|
@@ -143,35 +214,45 @@ module EBNF
|
|
143
214
|
|
144
215
|
alias_method :to_s, :to_sxp
|
145
216
|
|
146
|
-
# Serializes this rule to an Turtle
|
217
|
+
# Serializes this rule to an Turtle.
|
218
|
+
#
|
147
219
|
# @return [String]
|
148
220
|
def to_ttl
|
149
221
|
@ebnf.debug("to_ttl") {inspect} if @ebnf
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
%{ rdfs:comment #{comment.inspect};}
|
158
|
-
|
222
|
+
statements = [%{:#{sym} rdfs:label "#{sym}";}]
|
223
|
+
if orig
|
224
|
+
comment = orig.to_s.strip.
|
225
|
+
gsub(/"""/, '\"\"\"').
|
226
|
+
gsub("\\", "\\\\").
|
227
|
+
sub(/^\"/, '\"').
|
228
|
+
sub(/\"$/m, '\"')
|
229
|
+
statements << %{ rdfs:comment #{comment.inspect};}
|
230
|
+
end
|
231
|
+
statements << %{ dc:identifier "#{id}";} if id
|
159
232
|
|
160
233
|
statements += ttl_expr(expr, terminal? ? "re" : "g", 1, false)
|
161
234
|
"\n" + statements.join("\n")
|
162
235
|
end
|
163
236
|
|
237
|
+
# Return a Ruby representation of this rule
|
238
|
+
# @return [String]
|
239
|
+
def to_ruby
|
240
|
+
"EBNF::Rule.new(#{sym.inspect}, #{id.inspect}, #{expr.inspect}#{', kind: ' + kind.inspect unless kind == :rule})"
|
241
|
+
end
|
242
|
+
|
164
243
|
##
|
165
244
|
# Transform EBNF rule to BNF rules:
|
166
245
|
#
|
167
|
-
# * Transform (a
|
168
|
-
#
|
169
|
-
#
|
170
|
-
#
|
171
|
-
# * Transform (a
|
172
|
-
# * Transform (a
|
246
|
+
# * Transform `(rule a "n" (op1 (op2)))` into two rules:
|
247
|
+
#
|
248
|
+
# (rule a "n" (op1 _a_1))
|
249
|
+
# (rule _a_1 "n.1" (op2))
|
250
|
+
# * Transform `(rule a (opt b))` into `(rule a (alt _empty b))`
|
251
|
+
# * Transform `(rule a (star b))` into `(rule a (alt _empty (seq b a)))`
|
252
|
+
# * Transform `(rule a (plus b))` into `(rule a (seq b (star b)`
|
253
|
+
#
|
254
|
+
# Transformation includes information used to re-construct non-transformed.
|
173
255
|
#
|
174
|
-
# Transformation includes information used to re-construct non-transformed
|
175
256
|
# AST representation
|
176
257
|
# @return [Array<Rule>]
|
177
258
|
def to_bnf
|
@@ -198,19 +279,19 @@ module EBNF
|
|
198
279
|
new_rules = new_rules.map {|r| r.to_bnf}.flatten
|
199
280
|
elsif expr.first == :opt
|
200
281
|
this = dup
|
201
|
-
# * Transform (a
|
282
|
+
# * Transform (rule a (opt b)) into (rule a (alt _empty b))
|
202
283
|
this.expr = [:alt, :_empty, expr.last]
|
203
284
|
this.cleanup = :opt
|
204
285
|
new_rules = this.to_bnf
|
205
286
|
elsif expr.first == :star
|
206
|
-
# * Transform (a
|
287
|
+
# * Transform (rule a (star b)) into (rule a (alt _empty (seq b a)))
|
207
288
|
this = dup
|
208
289
|
this.cleanup = :star
|
209
290
|
new_rule = this.build([:seq, expr.last, this.sym], cleanup: :merge)
|
210
291
|
this.expr = [:alt, :_empty, new_rule.sym]
|
211
292
|
new_rules = [this] + new_rule.to_bnf
|
212
293
|
elsif expr.first == :plus
|
213
|
-
# * Transform (a
|
294
|
+
# * Transform (rule a (plus b)) into (rule a (seq b (star b)
|
214
295
|
this = dup
|
215
296
|
this.cleanup = :plus
|
216
297
|
this.expr = [:seq, expr.last, [:star, expr.last]]
|
@@ -219,7 +300,7 @@ module EBNF
|
|
219
300
|
# Otherwise, no further transformation necessary
|
220
301
|
new_rules << self
|
221
302
|
elsif [:diff, :hex, :range].include?(expr.first)
|
222
|
-
# This rules are fine,
|
303
|
+
# This rules are fine, they just need to be terminals
|
223
304
|
raise "Encountered #{expr.first.inspect}, which is a #{self.kind}, not :terminal" unless self.terminal?
|
224
305
|
new_rules << self
|
225
306
|
else
|
@@ -230,89 +311,73 @@ module EBNF
|
|
230
311
|
return new_rules
|
231
312
|
end
|
232
313
|
|
233
|
-
|
234
|
-
#
|
235
|
-
#
|
236
|
-
#
|
314
|
+
##
|
315
|
+
# Transform EBNF rule for PEG:
|
316
|
+
#
|
317
|
+
# * Transform `(rule a "n" (op1 ... (op2 y) ...z))` into two rules:
|
318
|
+
#
|
319
|
+
# (rule a "n" (op1 ... _a_1 ... z))
|
320
|
+
# (rule _a_1 "n.1" (op2 y))
|
321
|
+
# * Transform `(rule a "n" (diff op1 op2))` into two rules:
|
322
|
+
#
|
323
|
+
# (rule a "n" (seq _a_1 op1))
|
324
|
+
# (rule _a_1 "n.1" (not op1))
|
325
|
+
#
|
237
326
|
# @return [Array<Rule>]
|
238
|
-
def
|
239
|
-
|
240
|
-
case sym
|
241
|
-
when Symbol
|
242
|
-
r = ast.detect {|r| r.sym == sym}
|
243
|
-
r if r && r.rule?
|
244
|
-
else
|
245
|
-
nil
|
246
|
-
end
|
247
|
-
end.compact
|
248
|
-
end
|
327
|
+
def to_peg
|
328
|
+
new_rules = []
|
249
329
|
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
when String
|
262
|
-
sym
|
263
|
-
else
|
264
|
-
nil
|
330
|
+
# Look for rules containing sub-sequences
|
331
|
+
if expr.any? {|e| e.is_a?(Array) && e.first.is_a?(Symbol)}
|
332
|
+
# duplicate ourselves for rewriting
|
333
|
+
this = dup
|
334
|
+
new_rules << this
|
335
|
+
|
336
|
+
expr.each_with_index do |e, index|
|
337
|
+
next unless e.is_a?(Array) && e.first.is_a?(Symbol)
|
338
|
+
new_rule = build(e)
|
339
|
+
this.expr[index] = new_rule.sym
|
340
|
+
new_rules << new_rule
|
265
341
|
end
|
266
|
-
end.compact
|
267
|
-
end
|
268
342
|
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
expr.
|
343
|
+
# Return new rules after recursively applying #to_bnf
|
344
|
+
new_rules = new_rules.map {|r| r.to_peg}.flatten
|
345
|
+
elsif expr.first == :diff && !terminal?
|
346
|
+
this = dup
|
347
|
+
new_rule = build([:not, expr[2]])
|
348
|
+
this.expr = [:seq, new_rule.sym, expr[1]]
|
349
|
+
new_rules << this
|
350
|
+
new_rules << new_rule
|
351
|
+
elsif [:hex, :istr, :range].include?(expr.first)
|
352
|
+
# This rules are fine, they just need to be terminals
|
353
|
+
raise "Encountered #{expr.first.inspect}, which is a #{self.kind}, not :terminal" unless self.terminal?
|
354
|
+
new_rules << self
|
280
355
|
else
|
281
|
-
|
356
|
+
new_rules << self
|
282
357
|
end
|
358
|
+
|
359
|
+
return new_rules.map {|r| r.extend(EBNF::PEG::Rule)}
|
283
360
|
end
|
284
361
|
|
285
|
-
|
286
|
-
#
|
287
|
-
def first_includes_eps?
|
288
|
-
@first && @first.include?(:_eps)
|
289
|
-
end
|
290
|
-
|
291
|
-
# Add terminal as proceding this rule
|
292
|
-
# @param [Array<Rule, Symbol, String>] terminals
|
293
|
-
# @return [Integer] if number of terminals added
|
294
|
-
def add_first(terminals)
|
295
|
-
@first ||= []
|
296
|
-
terminals = terminals.map {|t| t.is_a?(Rule) ? t.sym : t} - @first
|
297
|
-
@first += terminals
|
298
|
-
terminals.length
|
299
|
-
end
|
300
|
-
|
301
|
-
# Add terminal as following this rule. Don't add _eps as a follow
|
362
|
+
##
|
363
|
+
# For :hex or :range, create a regular expression.
|
302
364
|
#
|
303
|
-
# @
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
365
|
+
# @return [Regexp]
|
366
|
+
def to_regexp
|
367
|
+
case expr.first
|
368
|
+
when :hex
|
369
|
+
Regexp.new(translate_codepoints(expr[1]))
|
370
|
+
when :istr
|
371
|
+
/#{expr.last}/ui
|
372
|
+
when :range
|
373
|
+
Regexp.new("[#{translate_codepoints(expr[1])}]")
|
374
|
+
else
|
375
|
+
raise "Can't turn #{expr.inspect} into a regexp"
|
311
376
|
end
|
312
|
-
terminals.length
|
313
377
|
end
|
314
378
|
|
315
379
|
# Is this a terminal?
|
380
|
+
#
|
316
381
|
# @return [Boolean]
|
317
382
|
def terminal?
|
318
383
|
kind == :terminal
|
@@ -340,18 +405,14 @@ module EBNF
|
|
340
405
|
expr.is_a?(Array) && expr.first == :seq
|
341
406
|
end
|
342
407
|
|
343
|
-
# Is this rule of the form (alt ...)?
|
344
|
-
def alt?
|
345
|
-
expr.is_a?(Array) && expr.first == :alt
|
346
|
-
end
|
347
|
-
|
348
408
|
def inspect
|
349
409
|
"#<EBNF::Rule:#{object_id} " +
|
350
410
|
{sym: sym, id: id, kind: kind, expr: expr}.inspect +
|
351
411
|
">"
|
352
412
|
end
|
353
413
|
|
354
|
-
# Two rules are equal if they have the same {#sym}, {#kind} and {#expr}
|
414
|
+
# Two rules are equal if they have the same {#sym}, {#kind} and {#expr}.
|
415
|
+
#
|
355
416
|
# @param [Rule] other
|
356
417
|
# @return [Boolean]
|
357
418
|
def ==(other)
|
@@ -360,37 +421,259 @@ module EBNF
|
|
360
421
|
expr == other.expr
|
361
422
|
end
|
362
423
|
|
363
|
-
# Two rules are equivalent if they have the same {#expr}
|
424
|
+
# Two rules are equivalent if they have the same {#expr}.
|
425
|
+
#
|
364
426
|
# @param [Rule] other
|
365
427
|
# @return [Boolean]
|
366
|
-
def
|
367
|
-
expr
|
428
|
+
def eql?(other)
|
429
|
+
expr == other.expr
|
368
430
|
end
|
369
431
|
|
370
|
-
#
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
@expr = @expr.map {|e| e == src_rule.sym ? dst_rule.sym : e}
|
432
|
+
# Rules compare using their ids
|
433
|
+
def <=>(other)
|
434
|
+
if id && other.id
|
435
|
+
if id == other.id
|
436
|
+
id.to_s <=> other.id.to_s
|
437
|
+
else
|
438
|
+
id.to_f <=> other.id.to_f
|
439
|
+
end
|
379
440
|
else
|
380
|
-
|
441
|
+
sym.to_s <=> other.sym.to_s
|
381
442
|
end
|
382
|
-
self
|
383
443
|
end
|
384
444
|
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
445
|
+
##
|
446
|
+
# Utility function to translate code points of the form '#xN' into ruby unicode characters
|
447
|
+
def translate_codepoints(str)
|
448
|
+
str.gsub(/#x\h+/) {|c| c[2..-1].scanf("%x").first.chr(Encoding::UTF_8)}
|
449
|
+
end
|
450
|
+
|
451
|
+
# Return the non-terminals for this rule.
|
452
|
+
#
|
453
|
+
# * `alt` => this is every non-terminal.
|
454
|
+
# * `diff` => this is every non-terminal.
|
455
|
+
# * `hex` => nil
|
456
|
+
# * `istr` => nil
|
457
|
+
# * `not` => this is the last expression, if any.
|
458
|
+
# * `opt` => this is the last expression, if any.
|
459
|
+
# * `plus` => this is the last expression, if any.
|
460
|
+
# * `range` => nil
|
461
|
+
# * `rept` => this is the last expression, if any.
|
462
|
+
# * `seq` => this is the first expression in the sequence, if any.
|
463
|
+
# * `star` => this is the last expression, if any.
|
464
|
+
#
|
465
|
+
# @param [Array<Rule>] ast
|
466
|
+
# The set of rules, used to turn symbols into rules
|
467
|
+
# @param [Array<Symbol,String,Array>] expr (@expr)
|
468
|
+
# The expression to check, defaults to the rule expression.
|
469
|
+
# Typically, if the expression is recursive, the embedded expression is called recursively.
|
470
|
+
# @return [Array<Rule>]
|
471
|
+
# @note this is used for LL(1) tansformation, so rule types are limited
|
472
|
+
def non_terminals(ast, expr = @expr)
|
473
|
+
([:alt, :diff].include?(expr.first) ? expr[1..-1] : expr[1,1]).map do |sym|
|
474
|
+
case sym
|
475
|
+
when Symbol
|
476
|
+
r = ast.detect {|r| r.sym == sym}
|
477
|
+
r if r && r.rule?
|
478
|
+
when Array
|
479
|
+
non_terminals(ast, sym)
|
480
|
+
else
|
481
|
+
nil
|
482
|
+
end
|
483
|
+
end.flatten.compact.uniq
|
484
|
+
end
|
485
|
+
|
486
|
+
# Return the terminals for this rule.
|
487
|
+
#
|
488
|
+
# * `alt` => this is every terminal.
|
489
|
+
# * `diff` => this is every terminal.
|
490
|
+
# * `hex` => nil
|
491
|
+
# * `istr` => nil
|
492
|
+
# * `not` => this is the last expression, if any.
|
493
|
+
# * `opt` => this is the last expression, if any.
|
494
|
+
# * `plus` => this is the last expression, if any.
|
495
|
+
# * `range` => nil
|
496
|
+
# * `rept` => this is the last expression, if any.
|
497
|
+
# * `seq` => this is the first expression in the sequence, if any.
|
498
|
+
# * `star` => this is the last expression, if any.
|
499
|
+
#
|
500
|
+
# @param [Array<Rule>] ast
|
501
|
+
# The set of rules, used to turn symbols into rules
|
502
|
+
# @param [Array<Symbol,String,Array>] expr (@expr)
|
503
|
+
# The expression to check, defaults to the rule expression.
|
504
|
+
# Typically, if the expression is recursive, the embedded expression is called recursively.
|
505
|
+
# @return [Array<Rule>]
|
506
|
+
# @note this is used for LL(1) tansformation, so rule types are limited
|
507
|
+
def terminals(ast, expr = @expr)
|
508
|
+
([:alt, :diff].include?(expr.first) ? expr[1..-1] : expr[1,1]).map do |sym|
|
509
|
+
case sym
|
510
|
+
when Symbol
|
511
|
+
r = ast.detect {|r| r.sym == sym}
|
512
|
+
r if r && r.terminal?
|
513
|
+
when String
|
514
|
+
sym
|
515
|
+
when Array
|
516
|
+
terminals(ast, sym)
|
517
|
+
end
|
518
|
+
end.flatten.compact.uniq
|
519
|
+
end
|
520
|
+
|
521
|
+
# Return the symbols used in the rule.
|
522
|
+
#
|
523
|
+
# @param [Array<Symbol,String,Array>] expr (@expr)
|
524
|
+
# The expression to check, defaults to the rule expression.
|
525
|
+
# Typically, if the expression is recursive, the embedded expression is called recursively.
|
526
|
+
# @return [Array<Rule>]
|
527
|
+
def symbols(expr = @expr)
|
528
|
+
expr[1..-1].map do |sym|
|
529
|
+
case sym
|
530
|
+
when Symbol
|
531
|
+
sym
|
532
|
+
when Array
|
533
|
+
symbols(sym)
|
534
|
+
end
|
535
|
+
end.flatten.compact.uniq
|
536
|
+
end
|
537
|
+
|
538
|
+
##
|
539
|
+
# The following are used for LL(1) transformation.
|
540
|
+
##
|
541
|
+
|
542
|
+
# Does this rule start with `sym`? It does if expr is that sym,
|
543
|
+
# expr starts with alt and contains that sym,
|
544
|
+
# or expr starts with seq and the next element is that sym.
|
545
|
+
#
|
546
|
+
# @param [Symbol, class] sym
|
547
|
+
# Symbol matching any start element, or if it is String, any start element which is a String
|
548
|
+
# @return [Array<Symbol, String>] list of symbol (singular), or strings which are start symbol, or nil if there are none
|
549
|
+
def starts_with?(sym)
|
550
|
+
if seq? && sym === (v = expr.fetch(1, nil))
|
551
|
+
[v]
|
552
|
+
elsif alt? && expr.any? {|e| sym === e}
|
553
|
+
expr.select {|e| sym === e}
|
554
|
+
else
|
555
|
+
nil
|
556
|
+
end
|
557
|
+
end
|
558
|
+
|
559
|
+
##
|
560
|
+
# Validate the rule, with respect to an AST.
|
561
|
+
#
|
562
|
+
# @param [Array<Rule>] ast
|
563
|
+
# The set of rules, used to turn symbols into rules
|
564
|
+
# @param [Array<Symbol,String,Array>] expr (@expr)
|
565
|
+
# The expression to check, defaults to the rule expression.
|
566
|
+
# Typically, if the expression is recursive, the embedded expression is called recursively.
|
567
|
+
# @raise [RangeError]
|
568
|
+
def validate!(ast, expr = @expr)
|
569
|
+
op = expr.first
|
570
|
+
raise SyntaxError, "Unknown operator: #{op}" unless OP_ARGN.key?(op)
|
571
|
+
raise SyntaxError, "Argument count missmatch on operator #{op}, had #{expr.length - 1} expected #{OP_ARGN[op]}" if
|
572
|
+
OP_ARGN[op] && OP_ARGN[op] != expr.length - 1
|
573
|
+
|
574
|
+
# rept operator needs min and max
|
575
|
+
if op == :alt
|
576
|
+
raise SyntaxError, "alt operation must have at least one operand, had #{expr.length - 1}" unless expr.length > 1
|
577
|
+
elsif op == :rept
|
578
|
+
raise SyntaxError, "rept operation must an non-negative integer minimum, was #{expr[1]}" unless
|
579
|
+
expr[1].is_a?(Integer) && expr[1] >= 0
|
580
|
+
raise SyntaxError, "rept operation must an non-negative integer maximum or '*', was #{expr[2]}" unless
|
581
|
+
expr[2] == '*' || expr[2].is_a?(Integer) && expr[2] >= 0
|
582
|
+
end
|
583
|
+
|
584
|
+
case op
|
585
|
+
when :hex
|
586
|
+
raise SyntaxError, "Hex operand must be of form '#xN+': #{sym}" unless expr.last.match?(/^#x\h+$/)
|
587
|
+
when :range
|
588
|
+
str = expr.last.dup
|
589
|
+
str = str[1..-1] if str.start_with?('^')
|
590
|
+
str = str[0..-2] if str.end_with?('-') # Allowed at end of range
|
591
|
+
scanner = StringScanner.new(str)
|
592
|
+
hex = rchar = in_range = false
|
593
|
+
while !scanner.eos?
|
594
|
+
begin
|
595
|
+
if scanner.scan(Terminals::HEX)
|
596
|
+
raise SyntaxError if in_range && rchar
|
597
|
+
rchar = in_range = false
|
598
|
+
hex = true
|
599
|
+
elsif scanner.scan(Terminals::R_CHAR)
|
600
|
+
raise SyntaxError if in_range && hex
|
601
|
+
hex = in_range = false
|
602
|
+
rchar = true
|
603
|
+
else
|
604
|
+
raise(SyntaxError, "Range contains illegal components at offset #{scanner.pos}: was #{expr.last}")
|
605
|
+
end
|
606
|
+
|
607
|
+
if scanner.scan(/\-/)
|
608
|
+
raise SyntaxError if in_range
|
609
|
+
in_range = true
|
610
|
+
end
|
611
|
+
rescue SyntaxError
|
612
|
+
raise(SyntaxError, "Range contains illegal components at offset #{scanner.pos}: was #{expr.last}")
|
613
|
+
end
|
614
|
+
end
|
389
615
|
else
|
390
|
-
|
616
|
+
([:alt, :diff].include?(expr.first) ? expr[1..-1] : expr[1,1]).each do |sym|
|
617
|
+
case sym
|
618
|
+
when Symbol
|
619
|
+
r = ast.detect {|r| r.sym == sym}
|
620
|
+
raise SyntaxError, "No rule found for #{sym}" unless r
|
621
|
+
when Array
|
622
|
+
validate!(ast, sym)
|
623
|
+
when String
|
624
|
+
raise SyntaxError, "String must be of the form CHAR*" unless sym.match?(/^#{Terminals::CHAR}*$/)
|
625
|
+
end
|
626
|
+
end
|
391
627
|
end
|
392
628
|
end
|
393
629
|
|
630
|
+
##
|
631
|
+
# Validate the rule, with respect to an AST.
|
632
|
+
#
|
633
|
+
# Uses `#validate!` and catches `RangeError`
|
634
|
+
#
|
635
|
+
# @param [Array<Rule>] ast
|
636
|
+
# The set of rules, used to turn symbols into rules
|
637
|
+
# @return [Boolean]
|
638
|
+
def valid?(ast)
|
639
|
+
validate!(ast)
|
640
|
+
true
|
641
|
+
rescue SyntaxError
|
642
|
+
false
|
643
|
+
end
|
644
|
+
|
645
|
+
# Do the firsts of this rule include the empty string?
|
646
|
+
#
|
647
|
+
# @return [Boolean]
|
648
|
+
def first_includes_eps?
|
649
|
+
@first && @first.include?(:_eps)
|
650
|
+
end
|
651
|
+
|
652
|
+
# Add terminal as proceding this rule.
|
653
|
+
#
|
654
|
+
# @param [Array<Rule, Symbol, String>] terminals
|
655
|
+
# @return [Integer] if number of terminals added
|
656
|
+
def add_first(terminals)
|
657
|
+
@first ||= []
|
658
|
+
terminals = terminals.map {|t| t.is_a?(Rule) ? t.sym : t} - @first
|
659
|
+
@first += terminals
|
660
|
+
terminals.length
|
661
|
+
end
|
662
|
+
|
663
|
+
# Add terminal as following this rule. Don't add _eps as a follow
|
664
|
+
#
|
665
|
+
# @param [Array<Rule, Symbol, String>] terminals
|
666
|
+
# @return [Integer] if number of terminals added
|
667
|
+
def add_follow(terminals)
|
668
|
+
# Remove terminals already in follows, and empty string
|
669
|
+
terminals = terminals.map {|t| t.is_a?(Rule) ? t.sym : t} - (@follow || []) - [:_eps]
|
670
|
+
unless terminals.empty?
|
671
|
+
@follow ||= []
|
672
|
+
@follow += terminals
|
673
|
+
end
|
674
|
+
terminals.length
|
675
|
+
end
|
676
|
+
|
394
677
|
private
|
395
678
|
def ttl_expr(expr, pfx, depth, is_obj = true)
|
396
679
|
indent = ' ' * depth
|
@@ -406,17 +689,28 @@ module EBNF
|
|
406
689
|
|
407
690
|
case op
|
408
691
|
when :seq, :alt, :diff
|
692
|
+
# Multiple operands
|
409
693
|
statements << %{#{indent}#{bra}#{pfx}:#{op} (}
|
410
694
|
expr.each {|a| statements += ttl_expr(a, pfx, depth + 1)}
|
411
695
|
statements << %{#{indent} )#{ket}}
|
412
|
-
when :opt, :plus, :star
|
696
|
+
when :opt, :plus, :star, :not
|
697
|
+
# Single operand
|
413
698
|
statements << %{#{indent}#{bra}#{pfx}:#{op} }
|
414
699
|
statements += ttl_expr(expr.first, pfx, depth + 1)
|
415
700
|
statements << %{#{indent} #{ket}} unless ket.empty?
|
416
|
-
when :
|
701
|
+
when :rept
|
702
|
+
# Three operands (min, max and expr)
|
703
|
+
statements << %{ #{indent}#{pfx}:min #{expr[0].inspect};}
|
704
|
+
statements << %{ #{indent}#{pfx}:max #{expr[1].inspect};}
|
705
|
+
statements << %{#{indent}#{bra}#{pfx}:#{op} }
|
706
|
+
statements += ttl_expr(expr.last, pfx, depth + 1)
|
707
|
+
statements << %{#{indent} #{ket}} unless ket.empty?
|
708
|
+
when :_empty, :_eps
|
417
709
|
statements << %{#{indent}"g:#{op.to_s[1..-1]}"}
|
418
710
|
when :"'"
|
419
711
|
statements << %{#{indent}"#{esc(expr)}"}
|
712
|
+
when :istr
|
713
|
+
statements << %{#{indent}#{bra} re:matches #{expr.first.inspect} #{ket}}
|
420
714
|
when :range
|
421
715
|
statements << %{#{indent}#{bra} re:matches #{cclass(expr.first).inspect} #{ket}}
|
422
716
|
when :hex
|
@@ -472,7 +766,7 @@ module EBNF
|
|
472
766
|
def make_sym_id(variation = nil)
|
473
767
|
@id_seq ||= 0
|
474
768
|
@id_seq += 1
|
475
|
-
["_#{@sym}_#{@id_seq}#{variation}".to_sym, "#{@id}.#{@id_seq}#{variation}"]
|
769
|
+
["_#{@sym}_#{@id_seq}#{variation}".to_sym, ("#{@id}.#{@id_seq}#{variation}" if @id)]
|
476
770
|
end
|
477
771
|
end
|
478
772
|
end
|