ebnf 1.1.3 → 2.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +221 -198
- data/UNLICENSE +1 -1
- data/VERSION +1 -1
- data/bin/ebnf +40 -21
- data/etc/abnf-core.ebnf +52 -0
- data/etc/abnf.abnf +121 -0
- data/etc/abnf.ebnf +124 -0
- data/etc/abnf.sxp +45 -0
- data/etc/doap.ttl +23 -15
- data/etc/ebnf.ebnf +21 -33
- data/etc/ebnf.html +171 -160
- data/etc/{ebnf.rb → ebnf.ll1.rb} +30 -107
- data/etc/ebnf.ll1.sxp +182 -183
- data/etc/ebnf.peg.rb +90 -0
- data/etc/ebnf.peg.sxp +84 -0
- data/etc/ebnf.sxp +40 -41
- data/etc/iso-ebnf.ebnf +140 -0
- data/etc/iso-ebnf.isoebnf +138 -0
- data/etc/iso-ebnf.sxp +65 -0
- data/etc/sparql.ebnf +4 -4
- data/etc/sparql.html +1603 -1751
- data/etc/sparql.ll1.sxp +7372 -7372
- data/etc/sparql.peg.rb +532 -0
- data/etc/sparql.peg.sxp +597 -0
- data/etc/sparql.sxp +363 -362
- data/etc/turtle.ebnf +3 -3
- data/etc/turtle.html +465 -517
- data/etc/{turtle.rb → turtle.ll1.rb} +3 -4
- data/etc/turtle.ll1.sxp +425 -425
- data/etc/turtle.peg.rb +182 -0
- data/etc/turtle.peg.sxp +199 -0
- data/etc/turtle.sxp +103 -101
- data/lib/ebnf.rb +7 -2
- data/lib/ebnf/abnf.rb +301 -0
- data/lib/ebnf/abnf/core.rb +23 -0
- data/lib/ebnf/abnf/meta.rb +111 -0
- data/lib/ebnf/base.rb +113 -69
- data/lib/ebnf/bnf.rb +1 -26
- data/lib/ebnf/ebnf/meta.rb +90 -0
- data/lib/ebnf/isoebnf.rb +229 -0
- data/lib/ebnf/isoebnf/meta.rb +75 -0
- data/lib/ebnf/ll1.rb +138 -6
- data/lib/ebnf/ll1/lexer.rb +37 -32
- data/lib/ebnf/ll1/parser.rb +113 -73
- data/lib/ebnf/ll1/scanner.rb +83 -51
- data/lib/ebnf/native.rb +320 -0
- data/lib/ebnf/parser.rb +285 -302
- data/lib/ebnf/peg.rb +39 -0
- data/lib/ebnf/peg/parser.rb +561 -0
- data/lib/ebnf/peg/rule.rb +250 -0
- data/lib/ebnf/rule.rb +443 -148
- data/lib/ebnf/terminals.rb +21 -0
- data/lib/ebnf/writer.rb +565 -83
- metadata +107 -29
- data/etc/sparql.rb +0 -45773
@@ -0,0 +1,250 @@
|
|
1
|
+
module EBNF::PEG
|
2
|
+
# Behaviior for parsing a PEG rule
|
3
|
+
module Rule
|
4
|
+
##
|
5
|
+
# Initialized by parser when loading rules.
|
6
|
+
# Used for finding rules and invoking elements of the parse process.
|
7
|
+
#
|
8
|
+
# @return [EBNF::PEG::Parser] parser
|
9
|
+
attr_accessor :parser
|
10
|
+
|
11
|
+
##
|
12
|
+
# Parse a rule or terminal, invoking callbacks, as appropriate
|
13
|
+
|
14
|
+
# If there is are `start_production` and/or `production`,
|
15
|
+
# they are invoked with a `prod_data` stack, the input stream and offset.
|
16
|
+
# Otherwise, the results are added as an array value
|
17
|
+
# to a hash indexed by the rule name.
|
18
|
+
#
|
19
|
+
# If matched, the input position is updated and the results returned in a Hash.
|
20
|
+
#
|
21
|
+
# * `alt`: returns the value of the matched production or `:unmatched`.
|
22
|
+
# * `diff`: returns the value matched, or `:unmatched`.
|
23
|
+
# * `hex`: returns a string composed of the matched hex character, or `:unmatched`.
|
24
|
+
# * `opt`: returns the value matched, or `nil` if unmatched.
|
25
|
+
# * `plus`: returns an array of the values matched for the specified production, or `:unmatched`, if none are matched. For Terminals, these are concatenated into a single string.
|
26
|
+
# * `range`: returns a string composed of the values matched, or `:unmatched`, if less than `min` are matched.
|
27
|
+
# * `rept`: returns an array of the values matched for the speficied production, or `:unmatched`, if none are matched. For Terminals, these are concatenated into a single string.
|
28
|
+
# * `seq`: returns an array composed of single-entry hashes for each matched production indexed by the production name, or `:unmatched` if any production fails to match. For Terminals, returns a string created by concatenating these values. Via option in a `production` or definition, the result can be a single hash with values for each matched production; note that this is not always possible due to the possibility of repeated productions within the sequence.
|
29
|
+
# * `star`: returns an array of the values matched for the specified production. For Terminals, these are concatenated into a single string.
|
30
|
+
#
|
31
|
+
# @param [Scanner] input
|
32
|
+
# @return [Hash{Symbol => Object}, :unmatched] A hash with keys for matched component of the expression. Returns :unmatched if the input does not match the production.
|
33
|
+
def parse(input)
|
34
|
+
# Save position and linenumber for backtracking
|
35
|
+
pos, lineno = input.pos, input.lineno
|
36
|
+
|
37
|
+
parser.packrat[sym] ||= {}
|
38
|
+
if parser.packrat[sym][pos]
|
39
|
+
parser.debug("#{sym}(:memo)", lineno: lineno) { "#{parser.packrat[sym][pos].inspect}(@#{pos})"}
|
40
|
+
input.pos, input.lineno = parser.packrat[sym][pos][:pos], parser.packrat[sym][pos][:lineno]
|
41
|
+
return parser.packrat[sym][pos][:result]
|
42
|
+
end
|
43
|
+
|
44
|
+
if terminal?
|
45
|
+
# If the terminal is defined with a regular expression,
|
46
|
+
# use that to match the input,
|
47
|
+
# otherwise,
|
48
|
+
if regexp = parser.find_terminal_regexp(sym)
|
49
|
+
matched = input.scan(regexp)
|
50
|
+
result = parser.onTerminal(sym, (matched ? matched : :unmatched))
|
51
|
+
# Update furthest failure for strings and terminals
|
52
|
+
parser.update_furthest_failure(input.pos, input.lineno, sym) if result == :unmatched
|
53
|
+
parser.packrat[sym][pos] = {
|
54
|
+
pos: input.pos,
|
55
|
+
lineno: input.lineno,
|
56
|
+
result: result
|
57
|
+
}
|
58
|
+
return parser.packrat[sym][pos][:result]
|
59
|
+
end
|
60
|
+
else
|
61
|
+
eat_whitespace(input)
|
62
|
+
end
|
63
|
+
start_options = parser.onStart(sym)
|
64
|
+
|
65
|
+
result = case expr.first
|
66
|
+
when :alt
|
67
|
+
# Return the first expression to match.
|
68
|
+
# Result is either :unmatched, or the value of the matching rule
|
69
|
+
alt = :unmatched
|
70
|
+
expr[1..-1].each do |prod|
|
71
|
+
alt = case prod
|
72
|
+
when Symbol
|
73
|
+
rule = parser.find_rule(prod)
|
74
|
+
raise "No rule found for #{prod}" unless rule
|
75
|
+
rule.parse(input)
|
76
|
+
when String
|
77
|
+
input.scan(Regexp.new(Regexp.quote(prod))) || :unmatched
|
78
|
+
end
|
79
|
+
if alt == :unmatched
|
80
|
+
# Update furthest failure for strings and terminals
|
81
|
+
parser.update_furthest_failure(input.pos, input.lineno, prod) if prod.is_a?(String) || rule.terminal?
|
82
|
+
else
|
83
|
+
break
|
84
|
+
end
|
85
|
+
end
|
86
|
+
alt
|
87
|
+
when :diff
|
88
|
+
# matches any string that matches A but does not match B.
|
89
|
+
# (Note, this is only used for Terminal rules, non-terminals will use :not)
|
90
|
+
raise "Diff used on non-terminal #{prod}" unless terminal?
|
91
|
+
re1, re2 = Regexp.new(translate_codepoints(expr[1])), Regexp.new(translate_codepoints(expr[2]))
|
92
|
+
matched = input.scan(re1)
|
93
|
+
if !matched || re2.match?(matched)
|
94
|
+
# Update furthest failure for terminals
|
95
|
+
parser.update_furthest_failure(input.pos, input.lineno, sym)
|
96
|
+
:unmatched
|
97
|
+
else
|
98
|
+
matched
|
99
|
+
end
|
100
|
+
when :hex
|
101
|
+
# Matches the given hex character if expression matches the character whose number (code point) in ISO/IEC 10646 is N. The number of leading zeros in the #xN form is insignificant.
|
102
|
+
input.scan(to_regexp) || begin
|
103
|
+
# Update furthest failure for terminals
|
104
|
+
parser.update_furthest_failure(input.pos, input.lineno, expr.last)
|
105
|
+
:unmatched
|
106
|
+
end
|
107
|
+
when :not
|
108
|
+
# matches any string that does not match B.
|
109
|
+
res = case prod = expr[1]
|
110
|
+
when Symbol
|
111
|
+
rule = parser.find_rule(prod)
|
112
|
+
raise "No rule found for #{prod}" unless rule
|
113
|
+
rule.parse(input)
|
114
|
+
when String
|
115
|
+
input.scan(Regexp.new(Regexp.quote(prod))) || :unmatched
|
116
|
+
end
|
117
|
+
if res != :unmatched
|
118
|
+
# Update furthest failure for terminals
|
119
|
+
parser.update_furthest_failure(input.pos, input.lineno, sym) if terminal?
|
120
|
+
:unmatched
|
121
|
+
else
|
122
|
+
nil
|
123
|
+
end
|
124
|
+
when :opt
|
125
|
+
# Result is the matched value or nil
|
126
|
+
opt = rept(input, 0, 1, expr[1])
|
127
|
+
|
128
|
+
# Update furthest failure for strings and terminals
|
129
|
+
parser.update_furthest_failure(input.pos, input.lineno, expr[1]) if terminal?
|
130
|
+
opt.first
|
131
|
+
when :plus
|
132
|
+
# Result is an array of all expressions while they match,
|
133
|
+
# at least one must match
|
134
|
+
plus = rept(input, 1, '*', expr[1])
|
135
|
+
|
136
|
+
# Update furthest failure for strings and terminals
|
137
|
+
parser.update_furthest_failure(input.pos, input.lineno, expr[1]) if terminal?
|
138
|
+
plus.is_a?(Array) && terminal? ? plus.join("") : plus
|
139
|
+
when :range, :istr
|
140
|
+
# Matches the specified character range
|
141
|
+
input.scan(to_regexp) || begin
|
142
|
+
# Update furthest failure for strings and terminals
|
143
|
+
parser.update_furthest_failure(input.pos, input.lineno, expr[1])
|
144
|
+
:unmatched
|
145
|
+
end
|
146
|
+
when :rept
|
147
|
+
# Result is an array of all expressions while they match,
|
148
|
+
# an empty array of none match
|
149
|
+
rept = rept(input, expr[1], expr[2], expr[3])
|
150
|
+
|
151
|
+
# # Update furthest failure for strings and terminals
|
152
|
+
parser.update_furthest_failure(input.pos, input.lineno, expr[3]) if terminal?
|
153
|
+
rept.is_a?(Array) && terminal? ? rept.join("") : rept
|
154
|
+
when :seq
|
155
|
+
# Evaluate each expression into an array of hashes where each hash contains a key from the associated production and the value is the parsed value of that production. Returns :unmatched if the input does not match the production. Value ordering is ensured by native Hash ordering.
|
156
|
+
seq = expr[1..-1].each_with_object([]) do |prod, accumulator|
|
157
|
+
eat_whitespace(input) unless accumulator.empty? || terminal?
|
158
|
+
res = case prod
|
159
|
+
when Symbol
|
160
|
+
rule = parser.find_rule(prod)
|
161
|
+
raise "No rule found for #{prod}" unless rule
|
162
|
+
rule.parse(input)
|
163
|
+
when String
|
164
|
+
input.scan(Regexp.new(Regexp.quote(prod))) || :unmatched
|
165
|
+
end
|
166
|
+
if res == :unmatched
|
167
|
+
# Update furthest failure for strings and terminals
|
168
|
+
parser.update_furthest_failure(input.pos, input.lineno, prod)
|
169
|
+
break :unmatched
|
170
|
+
end
|
171
|
+
accumulator << {prod.to_sym => res}
|
172
|
+
end
|
173
|
+
if seq == :unmatched
|
174
|
+
:unmatched
|
175
|
+
elsif terminal?
|
176
|
+
seq.map(&:values).compact.join("") # Concat values for terminal production
|
177
|
+
elsif start_options[:as_hash]
|
178
|
+
seq.inject {|memo, h| memo.merge(h)}
|
179
|
+
else
|
180
|
+
seq
|
181
|
+
end
|
182
|
+
when :star
|
183
|
+
# Result is an array of all expressions while they match,
|
184
|
+
# an empty array of none match
|
185
|
+
star = rept(input, 0, '*', expr[1])
|
186
|
+
|
187
|
+
# Update furthest failure for strings and terminals
|
188
|
+
parser.update_furthest_failure(input.pos, input.lineno, expr[1]) if terminal?
|
189
|
+
star.is_a?(Array) && terminal? ? star.join("") : star
|
190
|
+
else
|
191
|
+
raise "attempt to parse unknown rule type: #{expr.first}"
|
192
|
+
end
|
193
|
+
|
194
|
+
if result == :unmatched
|
195
|
+
input.pos, input.lineno = pos, lineno
|
196
|
+
end
|
197
|
+
|
198
|
+
result = parser.onFinish(result)
|
199
|
+
(parser.packrat[sym] ||= {})[pos] = {
|
200
|
+
pos: input.pos,
|
201
|
+
lineno: input.lineno,
|
202
|
+
result: result
|
203
|
+
}
|
204
|
+
return parser.packrat[sym][pos][:result]
|
205
|
+
end
|
206
|
+
|
207
|
+
##
|
208
|
+
# Repitition, 0-1, 0-n, 1-n, ...
|
209
|
+
#
|
210
|
+
# Note, nil results are removed from the result, but count towards min/max calculations
|
211
|
+
#
|
212
|
+
# @param [Scanner] input
|
213
|
+
# @param [Integer] min
|
214
|
+
# @param [Integer] max
|
215
|
+
# If it is an integer, it stops matching after max entries.
|
216
|
+
# @param [Symbol, String] prod
|
217
|
+
# @return [:unmatched, Array]
|
218
|
+
def rept(input, min, max, prod)
|
219
|
+
result = []
|
220
|
+
|
221
|
+
case prod
|
222
|
+
when Symbol
|
223
|
+
rule = parser.find_rule(prod)
|
224
|
+
raise "No rule found for #{prod}" unless rule
|
225
|
+
while (max == '*' || result.length < max) && (res = rule.parse(input)) != :unmatched
|
226
|
+
eat_whitespace(input) unless terminal?
|
227
|
+
result << res
|
228
|
+
end
|
229
|
+
when String
|
230
|
+
while (res = input.scan(Regexp.new(Regexp.quote(prod)))) && (max == '*' || result.length < max)
|
231
|
+
eat_whitespace(input) unless terminal?
|
232
|
+
result << res
|
233
|
+
end
|
234
|
+
end
|
235
|
+
|
236
|
+
result.length < min ? :unmatched : result.compact
|
237
|
+
end
|
238
|
+
|
239
|
+
##
|
240
|
+
# Eat whitespace between non-terminal rules
|
241
|
+
def eat_whitespace(input)
|
242
|
+
if parser.whitespace.is_a?(Regexp)
|
243
|
+
# Eat whitespace before a non-terminal
|
244
|
+
input.skip(parser.whitespace)
|
245
|
+
elsif parser.whitespace.is_a?(Rule)
|
246
|
+
parser.whitespace.parse(input) # throw away result
|
247
|
+
end
|
248
|
+
end
|
249
|
+
end
|
250
|
+
end
|
data/lib/ebnf/rule.rb
CHANGED
@@ -1,15 +1,33 @@
|
|
1
|
+
require 'scanf'
|
2
|
+
require 'strscan'
|
3
|
+
|
1
4
|
module EBNF
|
2
5
|
# Represent individual parsed rules
|
3
6
|
class Rule
|
4
|
-
# Operations which are flattened to seprate rules in to_bnf
|
7
|
+
# Operations which are flattened to seprate rules in to_bnf.
|
5
8
|
BNF_OPS = %w{
|
6
|
-
alt opt plus seq star
|
9
|
+
alt diff not opt plus rept seq star
|
7
10
|
}.map(&:to_sym).freeze
|
8
11
|
|
9
12
|
TERM_OPS = %w{
|
10
|
-
|
13
|
+
hex istr range
|
11
14
|
}.map(&:to_sym).freeze
|
12
15
|
|
16
|
+
# The number of arguments expected per operator. `nil` for unspecified
|
17
|
+
OP_ARGN = {
|
18
|
+
alt: nil,
|
19
|
+
diff: 2,
|
20
|
+
hex: 1,
|
21
|
+
istr: 1,
|
22
|
+
not: 1,
|
23
|
+
opt: 1,
|
24
|
+
plus: 1,
|
25
|
+
range: 1,
|
26
|
+
rept: 3,
|
27
|
+
seq: nil,
|
28
|
+
star: 1
|
29
|
+
}
|
30
|
+
|
13
31
|
# Symbol of rule
|
14
32
|
#
|
15
33
|
# @return [Symbol]
|
@@ -26,7 +44,7 @@ module EBNF
|
|
26
44
|
|
27
45
|
# Kind of rule
|
28
46
|
#
|
29
|
-
# @return [:rule, :terminal, or :pass]
|
47
|
+
# @return [:rule, :terminal, :terminals, or :pass]
|
30
48
|
attr_accessor :kind
|
31
49
|
|
32
50
|
# Rule expression
|
@@ -57,19 +75,38 @@ module EBNF
|
|
57
75
|
# Determines preparation and cleanup rules for reconstituting EBNF ? * + from BNF
|
58
76
|
attr_accessor :cleanup
|
59
77
|
|
60
|
-
# @param [
|
61
|
-
# @
|
78
|
+
# @param [Symbol, nil] sym
|
79
|
+
# `nil` is allowed only for @pass or @terminals
|
80
|
+
# @param [Integer, nil] id
|
62
81
|
# @param [Array] expr
|
63
|
-
#
|
64
|
-
#
|
65
|
-
#
|
66
|
-
#
|
67
|
-
#
|
68
|
-
#
|
69
|
-
#
|
82
|
+
# The expression is an internal-representation of an S-Expression with one of the following oparators:
|
83
|
+
#
|
84
|
+
# * `alt` – A list of alternative rules, which are attempted in order. It terminates with the first matching rule, or is terminated as unmatched, if no such rule is found.
|
85
|
+
# * `diff` – matches any string that matches `A` but does not match `B`.
|
86
|
+
# * `hex` – A single character represented using the hexadecimal notation `#xnn`.
|
87
|
+
# * `istr` – A string which matches in a case-insensitive manner, so that `(istr "fOo")` will match either of the strings `"foo"`, `"FOO"` or any other combination.
|
88
|
+
# * `opt` – An optional rule or terminal. It either results in the matching rule or returns `nil`.
|
89
|
+
# * `plus` – A sequence of one or more of the matching rule. If there is no such rule, it is terminated as unmatched; otherwise, the result is an array containing all matched input.
|
90
|
+
# * `range` – A range of characters, possibly repeated, of the form `(range "a-z")`. May also use hexadecimal notation.
|
91
|
+
# * `rept m n` – A sequence of at lest `m` and at most `n` of the matching rule. It will always return an array.
|
92
|
+
# * `seq` – A sequence of rules or terminals. If any (other than `opt` or `star`) to not parse, the rule is terminated as unmatched.
|
93
|
+
# * `star` – A sequence of zero or more of the matching rule. It will always return an array.
|
94
|
+
# @param [:rule, :terminal, :terminals, :pass] kind (nil)
|
95
|
+
# @param [String] ebnf (nil)
|
96
|
+
# When parsing, records the EBNF string used to create the rule.
|
97
|
+
# @param [Array] first (nil)
|
98
|
+
# Recorded set of terminals that can proceed this rule (LL(1))
|
99
|
+
# @param [Array] follow (nil)
|
100
|
+
# Recorded set of terminals that can follow this rule (LL(1))
|
101
|
+
# @param [Boolean] start (nil)
|
102
|
+
# Is this the starting rule for the grammar?
|
103
|
+
# @param [Rule] top_rule (nil)
|
104
|
+
# The top-most rule. All expressed rules are top-rules, derived rules have the original rule as their top-rule.
|
105
|
+
# @param [Boolean] cleanup (nil)
|
106
|
+
# Records information useful for cleaning up converted :plus, and :star expansions (LL(1)).
|
70
107
|
def initialize(sym, id, expr, kind: nil, ebnf: nil, first: nil, follow: nil, start: nil, top_rule: nil, cleanup: nil)
|
71
108
|
@sym, @id = sym, id
|
72
|
-
@expr = expr.is_a?(Array) ? expr : [:seq, expr]
|
109
|
+
@expr = expr.is_a?(Array) ? expr : [:seq, expr].compact
|
73
110
|
@ebnf, @kind, @first, @follow, @start, @cleanup, @top_rule = ebnf, kind, first, follow, start, cleanup, top_rule
|
74
111
|
@top_rule ||= self
|
75
112
|
@kind ||= case
|
@@ -77,21 +114,53 @@ module EBNF
|
|
77
114
|
when !BNF_OPS.include?(@expr.first) then :terminal
|
78
115
|
else :rule
|
79
116
|
end
|
117
|
+
|
118
|
+
# Allow @pass and @terminals to not be named
|
119
|
+
@sym ||= :_pass if @kind == :pass
|
120
|
+
@sym ||= :_terminals if @kind == :terminals
|
121
|
+
|
122
|
+
raise ArgumentError, "Rule sym must be a symbol, was #{@sym.inspect}" unless @sym.is_a?(Symbol)
|
123
|
+
raise ArgumentError, "Rule id must be a string or nil, was #{@id.inspect}" unless (@id || "").is_a?(String)
|
124
|
+
raise ArgumentError, "Rule kind must be one of :rule, :terminal, :terminals, or :pass, was #{@kind.inspect}" unless
|
125
|
+
@kind.is_a?(Symbol) && %w(rule terminal terminals pass).map(&:to_sym).include?(@kind)
|
126
|
+
|
127
|
+
case @expr.first
|
128
|
+
when :alt
|
129
|
+
raise ArgumentError, "#{@expr.first} operation must have at least one operand, had #{@expr.length - 1}" unless @expr.length > 1
|
130
|
+
when :diff
|
131
|
+
raise ArgumentError, "#{@expr.first} operation must have exactly two operands, had #{@expr.length - 1}" unless @expr.length == 3
|
132
|
+
when :hex, :istr, :not, :opt, :plus, :range, :star
|
133
|
+
raise ArgumentError, "#{@expr.first} operation must have exactly one operand, had #{@expr.length - 1}" unless @expr.length == 2
|
134
|
+
when :rept
|
135
|
+
raise ArgumentError, "#{@expr.first} operation must have exactly three, had #{@expr.length - 1}" unless @expr.length == 4
|
136
|
+
raise ArgumentError, "#{@expr.first} operation must an non-negative integer minimum, was #{@expr[1]}" unless
|
137
|
+
@expr[1].is_a?(Integer) && @expr[1] >= 0
|
138
|
+
raise ArgumentError, "#{@expr.first} operation must an non-negative integer maximum or '*', was #{@expr[2]}" unless
|
139
|
+
@expr[2] == '*' || @expr[2].is_a?(Integer) && @expr[2] >= 0
|
140
|
+
when :seq
|
141
|
+
# It's legal to have a zero-length sequence
|
142
|
+
else
|
143
|
+
raise ArgumentError, "Rule expression must be an array using a known operator, was #{@expr.first}"
|
144
|
+
end
|
80
145
|
end
|
81
146
|
|
82
147
|
##
|
83
148
|
# Return a rule from its SXP representation:
|
84
149
|
#
|
85
150
|
# @example inputs
|
86
|
-
# (pass (plus (range "#x20\\t\\r\\n")))
|
151
|
+
# (pass _pass (plus (range "#x20\\t\\r\\n")))
|
87
152
|
# (rule ebnf "1" (star (alt declaration rule)))
|
88
|
-
# (terminal
|
153
|
+
# (terminal R_CHAR "19" (diff CHAR (alt "]" "-")))
|
89
154
|
#
|
90
|
-
# Also may have (first ...)
|
155
|
+
# Also may have `(first ...)`, `(follow ...)`, or `(start #t)`.
|
91
156
|
#
|
92
|
-
# @param [Array] sxp
|
157
|
+
# @param [String, Array] sxp
|
93
158
|
# @return [Rule]
|
94
159
|
def self.from_sxp(sxp)
|
160
|
+
if sxp.is_a?(String)
|
161
|
+
require 'sxp' unless defined?(SXP)
|
162
|
+
sxp = SXP.parse(sxp)
|
163
|
+
end
|
95
164
|
expr = sxp.detect {|e| e.is_a?(Array) && ![:first, :follow, :start].include?(e.first.to_sym)}
|
96
165
|
first = sxp.detect {|e| e.is_a?(Array) && e.first.to_sym == :first}
|
97
166
|
first = first[1..-1] if first
|
@@ -102,26 +171,28 @@ module EBNF
|
|
102
171
|
start = sxp.any? {|e| e.is_a?(Array) && e.first.to_sym == :start}
|
103
172
|
sym = sxp[1] if sxp[1].is_a?(Symbol)
|
104
173
|
id = sxp[2] if sxp[2].is_a?(String)
|
105
|
-
|
174
|
+
self.new(sym, id, expr, kind: sxp.first, first: first, follow: follow, cleanup: cleanup, start: start)
|
106
175
|
end
|
107
176
|
|
108
177
|
# Build a new rule creating a symbol and numbering from the current rule
|
109
|
-
# Symbol and number creation is handled by the top-most rule in such a chain
|
178
|
+
# Symbol and number creation is handled by the top-most rule in such a chain.
|
110
179
|
#
|
111
180
|
# @param [Array] expr
|
181
|
+
# @param [Symbol] kind (nil)
|
182
|
+
# @param [Hash{Symbol => Symbol}] cleanup (nil)
|
112
183
|
# @param [Hash{Symbol => Object}] options
|
113
|
-
# @param [Symbol] :kind
|
114
184
|
def build(expr, kind: nil, cleanup: nil, **options)
|
115
|
-
new_sym, new_id =
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
185
|
+
new_sym, new_id = @top_rule.send(:make_sym_id)
|
186
|
+
self.class.new(new_sym, new_id, expr,
|
187
|
+
kind: kind,
|
188
|
+
ebnf: @ebnf,
|
189
|
+
top_rule: @top_rule,
|
190
|
+
cleanup: cleanup,
|
191
|
+
**options)
|
122
192
|
end
|
123
193
|
|
124
|
-
# Return representation for building S-Expressions
|
194
|
+
# Return representation for building S-Expressions.
|
195
|
+
#
|
125
196
|
# @return [Array]
|
126
197
|
def for_sxp
|
127
198
|
elements = [kind, sym]
|
@@ -137,40 +208,51 @@ module EBNF
|
|
137
208
|
# Return SXP representation of this rule
|
138
209
|
# @return [String]
|
139
210
|
def to_sxp
|
211
|
+
require 'sxp' unless defined?(SXP)
|
140
212
|
for_sxp.to_sxp
|
141
213
|
end
|
142
214
|
|
143
215
|
alias_method :to_s, :to_sxp
|
144
216
|
|
145
|
-
# Serializes this rule to an Turtle
|
217
|
+
# Serializes this rule to an Turtle.
|
218
|
+
#
|
146
219
|
# @return [String]
|
147
220
|
def to_ttl
|
148
221
|
@ebnf.debug("to_ttl") {inspect} if @ebnf
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
%{ rdfs:comment #{comment.inspect};}
|
157
|
-
|
222
|
+
statements = [%{:#{sym} rdfs:label "#{sym}";}]
|
223
|
+
if orig
|
224
|
+
comment = orig.to_s.strip.
|
225
|
+
gsub(/"""/, '\"\"\"').
|
226
|
+
gsub("\\", "\\\\").
|
227
|
+
sub(/^\"/, '\"').
|
228
|
+
sub(/\"$/m, '\"')
|
229
|
+
statements << %{ rdfs:comment #{comment.inspect};}
|
230
|
+
end
|
231
|
+
statements << %{ dc:identifier "#{id}";} if id
|
158
232
|
|
159
233
|
statements += ttl_expr(expr, terminal? ? "re" : "g", 1, false)
|
160
234
|
"\n" + statements.join("\n")
|
161
235
|
end
|
162
236
|
|
237
|
+
# Return a Ruby representation of this rule
|
238
|
+
# @return [String]
|
239
|
+
def to_ruby
|
240
|
+
"EBNF::Rule.new(#{sym.inspect}, #{id.inspect}, #{expr.inspect}#{', kind: ' + kind.inspect unless kind == :rule})"
|
241
|
+
end
|
242
|
+
|
163
243
|
##
|
164
244
|
# Transform EBNF rule to BNF rules:
|
165
245
|
#
|
166
|
-
# * Transform (a
|
167
|
-
#
|
168
|
-
#
|
169
|
-
#
|
170
|
-
# * Transform (a
|
171
|
-
# * Transform (a
|
246
|
+
# * Transform `(rule a "n" (op1 (op2)))` into two rules:
|
247
|
+
#
|
248
|
+
# (rule a "n" (op1 _a_1))
|
249
|
+
# (rule _a_1 "n.1" (op2))
|
250
|
+
# * Transform `(rule a (opt b))` into `(rule a (alt _empty b))`
|
251
|
+
# * Transform `(rule a (star b))` into `(rule a (alt _empty (seq b a)))`
|
252
|
+
# * Transform `(rule a (plus b))` into `(rule a (seq b (star b)`
|
253
|
+
#
|
254
|
+
# Transformation includes information used to re-construct non-transformed.
|
172
255
|
#
|
173
|
-
# Transformation includes information used to re-construct non-transformed
|
174
256
|
# AST representation
|
175
257
|
# @return [Array<Rule>]
|
176
258
|
def to_bnf
|
@@ -197,19 +279,19 @@ module EBNF
|
|
197
279
|
new_rules = new_rules.map {|r| r.to_bnf}.flatten
|
198
280
|
elsif expr.first == :opt
|
199
281
|
this = dup
|
200
|
-
# * Transform (a
|
282
|
+
# * Transform (rule a (opt b)) into (rule a (alt _empty b))
|
201
283
|
this.expr = [:alt, :_empty, expr.last]
|
202
284
|
this.cleanup = :opt
|
203
285
|
new_rules = this.to_bnf
|
204
286
|
elsif expr.first == :star
|
205
|
-
# * Transform (a
|
287
|
+
# * Transform (rule a (star b)) into (rule a (alt _empty (seq b a)))
|
206
288
|
this = dup
|
207
289
|
this.cleanup = :star
|
208
290
|
new_rule = this.build([:seq, expr.last, this.sym], cleanup: :merge)
|
209
291
|
this.expr = [:alt, :_empty, new_rule.sym]
|
210
292
|
new_rules = [this] + new_rule.to_bnf
|
211
293
|
elsif expr.first == :plus
|
212
|
-
# * Transform (a
|
294
|
+
# * Transform (rule a (plus b)) into (rule a (seq b (star b)
|
213
295
|
this = dup
|
214
296
|
this.cleanup = :plus
|
215
297
|
this.expr = [:seq, expr.last, [:star, expr.last]]
|
@@ -218,7 +300,7 @@ module EBNF
|
|
218
300
|
# Otherwise, no further transformation necessary
|
219
301
|
new_rules << self
|
220
302
|
elsif [:diff, :hex, :range].include?(expr.first)
|
221
|
-
# This rules are fine,
|
303
|
+
# This rules are fine, they just need to be terminals
|
222
304
|
raise "Encountered #{expr.first.inspect}, which is a #{self.kind}, not :terminal" unless self.terminal?
|
223
305
|
new_rules << self
|
224
306
|
else
|
@@ -229,89 +311,73 @@ module EBNF
|
|
229
311
|
return new_rules
|
230
312
|
end
|
231
313
|
|
232
|
-
|
233
|
-
#
|
234
|
-
#
|
235
|
-
#
|
314
|
+
##
|
315
|
+
# Transform EBNF rule for PEG:
|
316
|
+
#
|
317
|
+
# * Transform `(rule a "n" (op1 ... (op2 y) ...z))` into two rules:
|
318
|
+
#
|
319
|
+
# (rule a "n" (op1 ... _a_1 ... z))
|
320
|
+
# (rule _a_1 "n.1" (op2 y))
|
321
|
+
# * Transform `(rule a "n" (diff op1 op2))` into two rules:
|
322
|
+
#
|
323
|
+
# (rule a "n" (seq _a_1 op1))
|
324
|
+
# (rule _a_1 "n.1" (not op1))
|
325
|
+
#
|
236
326
|
# @return [Array<Rule>]
|
237
|
-
def
|
238
|
-
|
239
|
-
case sym
|
240
|
-
when Symbol
|
241
|
-
r = ast.detect {|r| r.sym == sym}
|
242
|
-
r if r && r.rule?
|
243
|
-
else
|
244
|
-
nil
|
245
|
-
end
|
246
|
-
end.compact
|
247
|
-
end
|
327
|
+
def to_peg
|
328
|
+
new_rules = []
|
248
329
|
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
when String
|
261
|
-
sym
|
262
|
-
else
|
263
|
-
nil
|
330
|
+
# Look for rules containing sub-sequences
|
331
|
+
if expr.any? {|e| e.is_a?(Array) && e.first.is_a?(Symbol)}
|
332
|
+
# duplicate ourselves for rewriting
|
333
|
+
this = dup
|
334
|
+
new_rules << this
|
335
|
+
|
336
|
+
expr.each_with_index do |e, index|
|
337
|
+
next unless e.is_a?(Array) && e.first.is_a?(Symbol)
|
338
|
+
new_rule = build(e)
|
339
|
+
this.expr[index] = new_rule.sym
|
340
|
+
new_rules << new_rule
|
264
341
|
end
|
265
|
-
end.compact
|
266
|
-
end
|
267
342
|
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
expr.
|
343
|
+
# Return new rules after recursively applying #to_bnf
|
344
|
+
new_rules = new_rules.map {|r| r.to_peg}.flatten
|
345
|
+
elsif expr.first == :diff && !terminal?
|
346
|
+
this = dup
|
347
|
+
new_rule = build([:not, expr[2]])
|
348
|
+
this.expr = [:seq, new_rule.sym, expr[1]]
|
349
|
+
new_rules << this
|
350
|
+
new_rules << new_rule
|
351
|
+
elsif [:hex, :istr, :range].include?(expr.first)
|
352
|
+
# This rules are fine, they just need to be terminals
|
353
|
+
raise "Encountered #{expr.first.inspect}, which is a #{self.kind}, not :terminal" unless self.terminal?
|
354
|
+
new_rules << self
|
279
355
|
else
|
280
|
-
|
356
|
+
new_rules << self
|
281
357
|
end
|
358
|
+
|
359
|
+
return new_rules.map {|r| r.extend(EBNF::PEG::Rule)}
|
282
360
|
end
|
283
361
|
|
284
|
-
|
285
|
-
#
|
286
|
-
def first_includes_eps?
|
287
|
-
@first && @first.include?(:_eps)
|
288
|
-
end
|
289
|
-
|
290
|
-
# Add terminal as proceding this rule
|
291
|
-
# @param [Array<Rule, Symbol, String>] terminals
|
292
|
-
# @return [Integer] if number of terminals added
|
293
|
-
def add_first(terminals)
|
294
|
-
@first ||= []
|
295
|
-
terminals = terminals.map {|t| t.is_a?(Rule) ? t.sym : t} - @first
|
296
|
-
@first += terminals
|
297
|
-
terminals.length
|
298
|
-
end
|
299
|
-
|
300
|
-
# Add terminal as following this rule. Don't add _eps as a follow
|
362
|
+
##
|
363
|
+
# For :hex or :range, create a regular expression.
|
301
364
|
#
|
302
|
-
# @
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
365
|
+
# @return [Regexp]
|
366
|
+
def to_regexp
|
367
|
+
case expr.first
|
368
|
+
when :hex
|
369
|
+
Regexp.new(translate_codepoints(expr[1]))
|
370
|
+
when :istr
|
371
|
+
/#{expr.last}/ui
|
372
|
+
when :range
|
373
|
+
Regexp.new("[#{translate_codepoints(expr[1])}]")
|
374
|
+
else
|
375
|
+
raise "Can't turn #{expr.inspect} into a regexp"
|
310
376
|
end
|
311
|
-
terminals.length
|
312
377
|
end
|
313
378
|
|
314
379
|
# Is this a terminal?
|
380
|
+
#
|
315
381
|
# @return [Boolean]
|
316
382
|
def terminal?
|
317
383
|
kind == :terminal
|
@@ -339,18 +405,14 @@ module EBNF
|
|
339
405
|
expr.is_a?(Array) && expr.first == :seq
|
340
406
|
end
|
341
407
|
|
342
|
-
# Is this rule of the form (alt ...)?
|
343
|
-
def alt?
|
344
|
-
expr.is_a?(Array) && expr.first == :alt
|
345
|
-
end
|
346
|
-
|
347
408
|
def inspect
|
348
409
|
"#<EBNF::Rule:#{object_id} " +
|
349
410
|
{sym: sym, id: id, kind: kind, expr: expr}.inspect +
|
350
411
|
">"
|
351
412
|
end
|
352
413
|
|
353
|
-
# Two rules are equal if they have the same {#sym}, {#kind} and {#expr}
|
414
|
+
# Two rules are equal if they have the same {#sym}, {#kind} and {#expr}.
|
415
|
+
#
|
354
416
|
# @param [Rule] other
|
355
417
|
# @return [Boolean]
|
356
418
|
def ==(other)
|
@@ -359,37 +421,259 @@ module EBNF
|
|
359
421
|
expr == other.expr
|
360
422
|
end
|
361
423
|
|
362
|
-
# Two rules are equivalent if they have the same {#expr}
|
424
|
+
# Two rules are equivalent if they have the same {#expr}.
|
425
|
+
#
|
363
426
|
# @param [Rule] other
|
364
427
|
# @return [Boolean]
|
365
|
-
def
|
366
|
-
expr
|
428
|
+
def eql?(other)
|
429
|
+
expr == other.expr
|
367
430
|
end
|
368
431
|
|
369
|
-
#
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
@expr = @expr.map {|e| e == src_rule.sym ? dst_rule.sym : e}
|
432
|
+
# Rules compare using their ids
|
433
|
+
def <=>(other)
|
434
|
+
if id && other.id
|
435
|
+
if id == other.id
|
436
|
+
id.to_s <=> other.id.to_s
|
437
|
+
else
|
438
|
+
id.to_f <=> other.id.to_f
|
439
|
+
end
|
378
440
|
else
|
379
|
-
|
441
|
+
sym.to_s <=> other.sym.to_s
|
380
442
|
end
|
381
|
-
self
|
382
443
|
end
|
383
444
|
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
445
|
+
##
|
446
|
+
# Utility function to translate code points of the form '#xN' into ruby unicode characters
|
447
|
+
def translate_codepoints(str)
|
448
|
+
str.gsub(/#x\h+/) {|c| c[2..-1].scanf("%x").first.chr(Encoding::UTF_8)}
|
449
|
+
end
|
450
|
+
|
451
|
+
# Return the non-terminals for this rule.
|
452
|
+
#
|
453
|
+
# * `alt` => this is every non-terminal.
|
454
|
+
# * `diff` => this is every non-terminal.
|
455
|
+
# * `hex` => nil
|
456
|
+
# * `istr` => nil
|
457
|
+
# * `not` => this is the last expression, if any.
|
458
|
+
# * `opt` => this is the last expression, if any.
|
459
|
+
# * `plus` => this is the last expression, if any.
|
460
|
+
# * `range` => nil
|
461
|
+
# * `rept` => this is the last expression, if any.
|
462
|
+
# * `seq` => this is the first expression in the sequence, if any.
|
463
|
+
# * `star` => this is the last expression, if any.
|
464
|
+
#
|
465
|
+
# @param [Array<Rule>] ast
|
466
|
+
# The set of rules, used to turn symbols into rules
|
467
|
+
# @param [Array<Symbol,String,Array>] expr (@expr)
|
468
|
+
# The expression to check, defaults to the rule expression.
|
469
|
+
# Typically, if the expression is recursive, the embedded expression is called recursively.
|
470
|
+
# @return [Array<Rule>]
|
471
|
+
# @note this is used for LL(1) tansformation, so rule types are limited
|
472
|
+
def non_terminals(ast, expr = @expr)
|
473
|
+
([:alt, :diff].include?(expr.first) ? expr[1..-1] : expr[1,1]).map do |sym|
|
474
|
+
case sym
|
475
|
+
when Symbol
|
476
|
+
r = ast.detect {|r| r.sym == sym}
|
477
|
+
r if r && r.rule?
|
478
|
+
when Array
|
479
|
+
non_terminals(ast, sym)
|
480
|
+
else
|
481
|
+
nil
|
482
|
+
end
|
483
|
+
end.flatten.compact.uniq
|
484
|
+
end
|
485
|
+
|
486
|
+
# Return the terminals for this rule.
|
487
|
+
#
|
488
|
+
# * `alt` => this is every terminal.
|
489
|
+
# * `diff` => this is every terminal.
|
490
|
+
# * `hex` => nil
|
491
|
+
# * `istr` => nil
|
492
|
+
# * `not` => this is the last expression, if any.
|
493
|
+
# * `opt` => this is the last expression, if any.
|
494
|
+
# * `plus` => this is the last expression, if any.
|
495
|
+
# * `range` => nil
|
496
|
+
# * `rept` => this is the last expression, if any.
|
497
|
+
# * `seq` => this is the first expression in the sequence, if any.
|
498
|
+
# * `star` => this is the last expression, if any.
|
499
|
+
#
|
500
|
+
# @param [Array<Rule>] ast
|
501
|
+
# The set of rules, used to turn symbols into rules
|
502
|
+
# @param [Array<Symbol,String,Array>] expr (@expr)
|
503
|
+
# The expression to check, defaults to the rule expression.
|
504
|
+
# Typically, if the expression is recursive, the embedded expression is called recursively.
|
505
|
+
# @return [Array<Rule>]
|
506
|
+
# @note this is used for LL(1) tansformation, so rule types are limited
|
507
|
+
def terminals(ast, expr = @expr)
|
508
|
+
([:alt, :diff].include?(expr.first) ? expr[1..-1] : expr[1,1]).map do |sym|
|
509
|
+
case sym
|
510
|
+
when Symbol
|
511
|
+
r = ast.detect {|r| r.sym == sym}
|
512
|
+
r if r && r.terminal?
|
513
|
+
when String
|
514
|
+
sym
|
515
|
+
when Array
|
516
|
+
terminals(ast, sym)
|
517
|
+
end
|
518
|
+
end.flatten.compact.uniq
|
519
|
+
end
|
520
|
+
|
521
|
+
# Return the symbols used in the rule.
|
522
|
+
#
|
523
|
+
# @param [Array<Symbol,String,Array>] expr (@expr)
|
524
|
+
# The expression to check, defaults to the rule expression.
|
525
|
+
# Typically, if the expression is recursive, the embedded expression is called recursively.
|
526
|
+
# @return [Array<Rule>]
|
527
|
+
def symbols(expr = @expr)
|
528
|
+
expr[1..-1].map do |sym|
|
529
|
+
case sym
|
530
|
+
when Symbol
|
531
|
+
sym
|
532
|
+
when Array
|
533
|
+
symbols(sym)
|
534
|
+
end
|
535
|
+
end.flatten.compact.uniq
|
536
|
+
end
|
537
|
+
|
538
|
+
##
|
539
|
+
# The following are used for LL(1) transformation.
|
540
|
+
##
|
541
|
+
|
542
|
+
# Does this rule start with `sym`? It does if expr is that sym,
|
543
|
+
# expr starts with alt and contains that sym,
|
544
|
+
# or expr starts with seq and the next element is that sym.
|
545
|
+
#
|
546
|
+
# @param [Symbol, class] sym
|
547
|
+
# Symbol matching any start element, or if it is String, any start element which is a String
|
548
|
+
# @return [Array<Symbol, String>] list of symbol (singular), or strings which are start symbol, or nil if there are none
|
549
|
+
def starts_with?(sym)
|
550
|
+
if seq? && sym === (v = expr.fetch(1, nil))
|
551
|
+
[v]
|
552
|
+
elsif alt? && expr.any? {|e| sym === e}
|
553
|
+
expr.select {|e| sym === e}
|
554
|
+
else
|
555
|
+
nil
|
556
|
+
end
|
557
|
+
end
|
558
|
+
|
559
|
+
##
|
560
|
+
# Validate the rule, with respect to an AST.
|
561
|
+
#
|
562
|
+
# @param [Array<Rule>] ast
|
563
|
+
# The set of rules, used to turn symbols into rules
|
564
|
+
# @param [Array<Symbol,String,Array>] expr (@expr)
|
565
|
+
# The expression to check, defaults to the rule expression.
|
566
|
+
# Typically, if the expression is recursive, the embedded expression is called recursively.
|
567
|
+
# @raise [RangeError]
|
568
|
+
def validate!(ast, expr = @expr)
|
569
|
+
op = expr.first
|
570
|
+
raise SyntaxError, "Unknown operator: #{op}" unless OP_ARGN.key?(op)
|
571
|
+
raise SyntaxError, "Argument count missmatch on operator #{op}, had #{expr.length - 1} expected #{OP_ARGN[op]}" if
|
572
|
+
OP_ARGN[op] && OP_ARGN[op] != expr.length - 1
|
573
|
+
|
574
|
+
# rept operator needs min and max
|
575
|
+
if op == :alt
|
576
|
+
raise SyntaxError, "alt operation must have at least one operand, had #{expr.length - 1}" unless expr.length > 1
|
577
|
+
elsif op == :rept
|
578
|
+
raise SyntaxError, "rept operation must an non-negative integer minimum, was #{expr[1]}" unless
|
579
|
+
expr[1].is_a?(Integer) && expr[1] >= 0
|
580
|
+
raise SyntaxError, "rept operation must an non-negative integer maximum or '*', was #{expr[2]}" unless
|
581
|
+
expr[2] == '*' || expr[2].is_a?(Integer) && expr[2] >= 0
|
582
|
+
end
|
583
|
+
|
584
|
+
case op
|
585
|
+
when :hex
|
586
|
+
raise SyntaxError, "Hex operand must be of form '#xN+': #{sym}" unless expr.last.match?(/^#x\h+$/)
|
587
|
+
when :range
|
588
|
+
str = expr.last.dup
|
589
|
+
str = str[1..-1] if str.start_with?('^')
|
590
|
+
str = str[0..-2] if str.end_with?('-') # Allowed at end of range
|
591
|
+
scanner = StringScanner.new(str)
|
592
|
+
hex = rchar = in_range = false
|
593
|
+
while !scanner.eos?
|
594
|
+
begin
|
595
|
+
if scanner.scan(Terminals::HEX)
|
596
|
+
raise SyntaxError if in_range && rchar
|
597
|
+
rchar = in_range = false
|
598
|
+
hex = true
|
599
|
+
elsif scanner.scan(Terminals::R_CHAR)
|
600
|
+
raise SyntaxError if in_range && hex
|
601
|
+
hex = in_range = false
|
602
|
+
rchar = true
|
603
|
+
else
|
604
|
+
raise(SyntaxError, "Range contains illegal components at offset #{scanner.pos}: was #{expr.last}")
|
605
|
+
end
|
606
|
+
|
607
|
+
if scanner.scan(/\-/)
|
608
|
+
raise SyntaxError if in_range
|
609
|
+
in_range = true
|
610
|
+
end
|
611
|
+
rescue SyntaxError
|
612
|
+
raise(SyntaxError, "Range contains illegal components at offset #{scanner.pos}: was #{expr.last}")
|
613
|
+
end
|
614
|
+
end
|
388
615
|
else
|
389
|
-
|
616
|
+
([:alt, :diff].include?(expr.first) ? expr[1..-1] : expr[1,1]).each do |sym|
|
617
|
+
case sym
|
618
|
+
when Symbol
|
619
|
+
r = ast.detect {|r| r.sym == sym}
|
620
|
+
raise SyntaxError, "No rule found for #{sym}" unless r
|
621
|
+
when Array
|
622
|
+
validate!(ast, sym)
|
623
|
+
when String
|
624
|
+
raise SyntaxError, "String must be of the form CHAR*" unless sym.match?(/^#{Terminals::CHAR}*$/)
|
625
|
+
end
|
626
|
+
end
|
390
627
|
end
|
391
628
|
end
|
392
629
|
|
630
|
+
##
|
631
|
+
# Validate the rule, with respect to an AST.
|
632
|
+
#
|
633
|
+
# Uses `#validate!` and catches `RangeError`
|
634
|
+
#
|
635
|
+
# @param [Array<Rule>] ast
|
636
|
+
# The set of rules, used to turn symbols into rules
|
637
|
+
# @return [Boolean]
|
638
|
+
def valid?(ast)
|
639
|
+
validate!(ast)
|
640
|
+
true
|
641
|
+
rescue SyntaxError
|
642
|
+
false
|
643
|
+
end
|
644
|
+
|
645
|
+
# Do the firsts of this rule include the empty string?
|
646
|
+
#
|
647
|
+
# @return [Boolean]
|
648
|
+
def first_includes_eps?
|
649
|
+
@first && @first.include?(:_eps)
|
650
|
+
end
|
651
|
+
|
652
|
+
# Add terminal as proceding this rule.
|
653
|
+
#
|
654
|
+
# @param [Array<Rule, Symbol, String>] terminals
|
655
|
+
# @return [Integer] if number of terminals added
|
656
|
+
def add_first(terminals)
|
657
|
+
@first ||= []
|
658
|
+
terminals = terminals.map {|t| t.is_a?(Rule) ? t.sym : t} - @first
|
659
|
+
@first += terminals
|
660
|
+
terminals.length
|
661
|
+
end
|
662
|
+
|
663
|
+
# Add terminal as following this rule. Don't add _eps as a follow
|
664
|
+
#
|
665
|
+
# @param [Array<Rule, Symbol, String>] terminals
|
666
|
+
# @return [Integer] if number of terminals added
|
667
|
+
def add_follow(terminals)
|
668
|
+
# Remove terminals already in follows, and empty string
|
669
|
+
terminals = terminals.map {|t| t.is_a?(Rule) ? t.sym : t} - (@follow || []) - [:_eps]
|
670
|
+
unless terminals.empty?
|
671
|
+
@follow ||= []
|
672
|
+
@follow += terminals
|
673
|
+
end
|
674
|
+
terminals.length
|
675
|
+
end
|
676
|
+
|
393
677
|
private
|
394
678
|
def ttl_expr(expr, pfx, depth, is_obj = true)
|
395
679
|
indent = ' ' * depth
|
@@ -405,17 +689,28 @@ module EBNF
|
|
405
689
|
|
406
690
|
case op
|
407
691
|
when :seq, :alt, :diff
|
692
|
+
# Multiple operands
|
408
693
|
statements << %{#{indent}#{bra}#{pfx}:#{op} (}
|
409
694
|
expr.each {|a| statements += ttl_expr(a, pfx, depth + 1)}
|
410
695
|
statements << %{#{indent} )#{ket}}
|
411
|
-
when :opt, :plus, :star
|
696
|
+
when :opt, :plus, :star, :not
|
697
|
+
# Single operand
|
412
698
|
statements << %{#{indent}#{bra}#{pfx}:#{op} }
|
413
699
|
statements += ttl_expr(expr.first, pfx, depth + 1)
|
414
700
|
statements << %{#{indent} #{ket}} unless ket.empty?
|
415
|
-
when :
|
701
|
+
when :rept
|
702
|
+
# Three operands (min, max and expr)
|
703
|
+
statements << %{ #{indent}#{pfx}:min #{expr[0].inspect};}
|
704
|
+
statements << %{ #{indent}#{pfx}:max #{expr[1].inspect};}
|
705
|
+
statements << %{#{indent}#{bra}#{pfx}:#{op} }
|
706
|
+
statements += ttl_expr(expr.last, pfx, depth + 1)
|
707
|
+
statements << %{#{indent} #{ket}} unless ket.empty?
|
708
|
+
when :_empty, :_eps
|
416
709
|
statements << %{#{indent}"g:#{op.to_s[1..-1]}"}
|
417
710
|
when :"'"
|
418
711
|
statements << %{#{indent}"#{esc(expr)}"}
|
712
|
+
when :istr
|
713
|
+
statements << %{#{indent}#{bra} re:matches #{expr.first.inspect} #{ket}}
|
419
714
|
when :range
|
420
715
|
statements << %{#{indent}#{bra} re:matches #{cclass(expr.first).inspect} #{ket}}
|
421
716
|
when :hex
|
@@ -471,7 +766,7 @@ module EBNF
|
|
471
766
|
def make_sym_id(variation = nil)
|
472
767
|
@id_seq ||= 0
|
473
768
|
@id_seq += 1
|
474
|
-
["_#{@sym}_#{@id_seq}#{variation}".to_sym, "#{@id}.#{@id_seq}#{variation}"]
|
769
|
+
["_#{@sym}_#{@id_seq}#{variation}".to_sym, ("#{@id}.#{@id_seq}#{variation}" if @id)]
|
475
770
|
end
|
476
771
|
end
|
477
772
|
end
|