ebnf 1.1.2 → 2.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/README.md +218 -196
- data/UNLICENSE +1 -1
- data/VERSION +1 -1
- data/bin/ebnf +40 -21
- data/etc/abnf-core.ebnf +52 -0
- data/etc/abnf.abnf +121 -0
- data/etc/abnf.ebnf +124 -0
- data/etc/abnf.sxp +45 -0
- data/etc/doap.ttl +23 -15
- data/etc/ebnf.ebnf +21 -33
- data/etc/ebnf.html +171 -160
- data/etc/{ebnf.rb → ebnf.ll1.rb} +30 -107
- data/etc/ebnf.ll1.sxp +182 -183
- data/etc/ebnf.peg.rb +90 -0
- data/etc/ebnf.peg.sxp +84 -0
- data/etc/ebnf.sxp +40 -41
- data/etc/iso-ebnf.ebnf +140 -0
- data/etc/iso-ebnf.isoebnf +138 -0
- data/etc/iso-ebnf.sxp +65 -0
- data/etc/sparql.ebnf +4 -4
- data/etc/sparql.html +1603 -1751
- data/etc/sparql.ll1.sxp +7372 -7372
- data/etc/sparql.peg.rb +532 -0
- data/etc/sparql.peg.sxp +597 -0
- data/etc/sparql.sxp +363 -362
- data/etc/turtle.ebnf +3 -3
- data/etc/turtle.html +465 -517
- data/etc/{turtle.rb → turtle.ll1.rb} +3 -4
- data/etc/turtle.ll1.sxp +425 -425
- data/etc/turtle.peg.rb +182 -0
- data/etc/turtle.peg.sxp +199 -0
- data/etc/turtle.sxp +103 -101
- data/lib/ebnf.rb +7 -2
- data/lib/ebnf/abnf.rb +301 -0
- data/lib/ebnf/abnf/core.rb +23 -0
- data/lib/ebnf/abnf/meta.rb +111 -0
- data/lib/ebnf/base.rb +128 -87
- data/lib/ebnf/bnf.rb +1 -26
- data/lib/ebnf/ebnf/meta.rb +90 -0
- data/lib/ebnf/isoebnf.rb +229 -0
- data/lib/ebnf/isoebnf/meta.rb +75 -0
- data/lib/ebnf/ll1.rb +140 -8
- data/lib/ebnf/ll1/lexer.rb +37 -32
- data/lib/ebnf/ll1/parser.rb +113 -73
- data/lib/ebnf/ll1/scanner.rb +83 -51
- data/lib/ebnf/native.rb +320 -0
- data/lib/ebnf/parser.rb +285 -302
- data/lib/ebnf/peg.rb +39 -0
- data/lib/ebnf/peg/parser.rb +561 -0
- data/lib/ebnf/peg/rule.rb +241 -0
- data/lib/ebnf/rule.rb +453 -163
- data/lib/ebnf/terminals.rb +21 -0
- data/lib/ebnf/writer.rb +561 -88
- metadata +114 -28
- data/etc/sparql.rb +0 -45773
@@ -0,0 +1,241 @@
|
|
1
|
+
module EBNF::PEG
|
2
|
+
# Behaviior for parsing a PEG rule
|
3
|
+
module Rule
|
4
|
+
##
|
5
|
+
# Initialized by parser when loading rules.
|
6
|
+
# Used for finding rules and invoking elements of the parse process.
|
7
|
+
#
|
8
|
+
# @return [EBNF::PEG::Parser] parser
|
9
|
+
attr_accessor :parser
|
10
|
+
|
11
|
+
##
|
12
|
+
# Parse a rule or terminal, invoking callbacks, as appropriate
|
13
|
+
|
14
|
+
# If there is are `start_production` and/or `production`,
|
15
|
+
# they are invoked with a `prod_data` stack, the input stream and offset.
|
16
|
+
# Otherwise, the results are added as an array value
|
17
|
+
# to a hash indexed by the rule name.
|
18
|
+
#
|
19
|
+
# If matched, the input position is updated and the results returned in a Hash.
|
20
|
+
#
|
21
|
+
# * `alt`: returns the value of the matched production or `:unmatched`.
|
22
|
+
# * `diff`: returns the value matched, or `:unmatched`.
|
23
|
+
# * `hex`: returns a string composed of the matched hex character, or `:unmatched`.
|
24
|
+
# * `opt`: returns the value matched, or `nil` if unmatched.
|
25
|
+
# * `plus`: returns an array of the values matched for the specified production, or `:unmatched`, if none are matched. For Terminals, these are concatenated into a single string.
|
26
|
+
# * `range`: returns a string composed of the values matched, or `:unmatched`, if less than `min` are matched.
|
27
|
+
# * `seq`: returns an array composed of single-entry hashes for each matched production indexed by the production name, or `:unmatched` if any production fails to match. For Terminals, returns a string created by concatenating these values. Via option in a `production` or definition, the result can be a single hash with values for each matched production; note that this is not always possible due to the possibility of repeated productions within the sequence.
|
28
|
+
# * `star`: returns an array of the values matched for the specified production. For Terminals, these are concatenated into a single string.
|
29
|
+
#
|
30
|
+
# @param [Scanner] input
|
31
|
+
# @return [Hash{Symbol => Object}, :unmatched] A hash with keys for matched component of the expression. Returns :unmatched if the input does not match the production.
|
32
|
+
def parse(input)
|
33
|
+
# Save position and linenumber for backtracking
|
34
|
+
pos, lineno = input.pos, input.lineno
|
35
|
+
|
36
|
+
parser.packrat[sym] ||= {}
|
37
|
+
if parser.packrat[sym][pos]
|
38
|
+
parser.debug("#{sym}(:memo)", lineno: lineno) { "#{parser.packrat[sym][pos].inspect}(@#{pos})"}
|
39
|
+
input.pos, input.lineno = parser.packrat[sym][pos][:pos], parser.packrat[sym][pos][:lineno]
|
40
|
+
return parser.packrat[sym][pos][:result]
|
41
|
+
end
|
42
|
+
|
43
|
+
if terminal?
|
44
|
+
# If the terminal is defined with a regular expression,
|
45
|
+
# use that to match the input,
|
46
|
+
# otherwise,
|
47
|
+
if regexp = parser.find_terminal_regexp(sym)
|
48
|
+
matched = input.scan(regexp)
|
49
|
+
result = parser.onTerminal(sym, (matched ? matched : :unmatched))
|
50
|
+
# Update furthest failure for strings and terminals
|
51
|
+
parser.update_furthest_failure(input.pos, input.lineno, sym) if result == :unmatched
|
52
|
+
parser.packrat[sym][pos] = {
|
53
|
+
pos: input.pos,
|
54
|
+
lineno: input.lineno,
|
55
|
+
result: result
|
56
|
+
}
|
57
|
+
return parser.packrat[sym][pos][:result]
|
58
|
+
end
|
59
|
+
else
|
60
|
+
eat_whitespace(input)
|
61
|
+
end
|
62
|
+
start_options = parser.onStart(sym)
|
63
|
+
|
64
|
+
result = case expr.first
|
65
|
+
when :alt
|
66
|
+
# Return the first expression to match.
|
67
|
+
# Result is either :unmatched, or the value of the matching rule
|
68
|
+
alt = :unmatched
|
69
|
+
expr[1..-1].each do |prod|
|
70
|
+
alt = case prod
|
71
|
+
when Symbol
|
72
|
+
rule = parser.find_rule(prod)
|
73
|
+
raise "No rule found for #{prod}" unless rule
|
74
|
+
rule.parse(input)
|
75
|
+
when String
|
76
|
+
input.scan(Regexp.new(Regexp.quote(prod))) || :unmatched
|
77
|
+
end
|
78
|
+
if alt == :unmatched
|
79
|
+
# Update furthest failure for strings and terminals
|
80
|
+
parser.update_furthest_failure(input.pos, input.lineno, prod) if prod.is_a?(String) || rule.terminal?
|
81
|
+
else
|
82
|
+
break
|
83
|
+
end
|
84
|
+
end
|
85
|
+
alt
|
86
|
+
when :diff
|
87
|
+
# matches any string that matches A but does not match B.
|
88
|
+
# (Note, this is only used for Terminal rules, non-terminals will use :not)
|
89
|
+
raise "Diff used on non-terminal #{prod}" unless terminal?
|
90
|
+
re1, re2 = Regexp.new(translate_codepoints(expr[1])), Regexp.new(translate_codepoints(expr[2]))
|
91
|
+
matched = input.scan(re1)
|
92
|
+
if !matched || re2.match?(matched)
|
93
|
+
# Update furthest failure for terminals
|
94
|
+
parser.update_furthest_failure(input.pos, input.lineno, sym)
|
95
|
+
:unmatched
|
96
|
+
else
|
97
|
+
matched
|
98
|
+
end
|
99
|
+
when :hex
|
100
|
+
# Matches the given hex character if expression matches the character whose number (code point) in ISO/IEC 10646 is N. The number of leading zeros in the #xN form is insignificant.
|
101
|
+
input.scan(to_regexp) || begin
|
102
|
+
# Update furthest failure for terminals
|
103
|
+
parser.update_furthest_failure(input.pos, input.lineno, expr.last)
|
104
|
+
:unmatched
|
105
|
+
end
|
106
|
+
when :not
|
107
|
+
# matches any string that does not match B.
|
108
|
+
res = case prod = expr[1]
|
109
|
+
when Symbol
|
110
|
+
rule = parser.find_rule(prod)
|
111
|
+
raise "No rule found for #{prod}" unless rule
|
112
|
+
rule.parse(input)
|
113
|
+
when String
|
114
|
+
input.scan(Regexp.new(Regexp.quote(prod))) || :unmatched
|
115
|
+
end
|
116
|
+
if res != :unmatched
|
117
|
+
# Update furthest failure for terminals
|
118
|
+
parser.update_furthest_failure(input.pos, input.lineno, sym) if terminal?
|
119
|
+
:unmatched
|
120
|
+
else
|
121
|
+
nil
|
122
|
+
end
|
123
|
+
when :opt
|
124
|
+
# Result is the matched value or nil
|
125
|
+
opt = rept(input, 0, 1, expr[1])
|
126
|
+
|
127
|
+
# Update furthest failure for strings and terminals
|
128
|
+
parser.update_furthest_failure(input.pos, input.lineno, expr[1]) if terminal?
|
129
|
+
opt.first
|
130
|
+
when :plus
|
131
|
+
# Result is an array of all expressions while they match,
|
132
|
+
# at least one must match
|
133
|
+
plus = rept(input, 1, '*', expr[1])
|
134
|
+
|
135
|
+
# Update furthest failure for strings and terminals
|
136
|
+
parser.update_furthest_failure(input.pos, input.lineno, expr[1]) if terminal?
|
137
|
+
plus.is_a?(Array) && terminal? ? plus.join("") : plus
|
138
|
+
when :range, :istr
|
139
|
+
# Matches the specified character range
|
140
|
+
input.scan(to_regexp) || begin
|
141
|
+
# Update furthest failure for strings and terminals
|
142
|
+
parser.update_furthest_failure(input.pos, input.lineno, expr[1])
|
143
|
+
:unmatched
|
144
|
+
end
|
145
|
+
when :seq
|
146
|
+
# Evaluate each expression into an array of hashes where each hash contains a key from the associated production and the value is the parsed value of that production. Returns :unmatched if the input does not match the production. Value ordering is ensured by native Hash ordering.
|
147
|
+
seq = expr[1..-1].each_with_object([]) do |prod, accumulator|
|
148
|
+
eat_whitespace(input) unless accumulator.empty? || terminal?
|
149
|
+
res = case prod
|
150
|
+
when Symbol
|
151
|
+
rule = parser.find_rule(prod)
|
152
|
+
raise "No rule found for #{prod}" unless rule
|
153
|
+
rule.parse(input)
|
154
|
+
when String
|
155
|
+
input.scan(Regexp.new(Regexp.quote(prod))) || :unmatched
|
156
|
+
end
|
157
|
+
if res == :unmatched
|
158
|
+
# Update furthest failure for strings and terminals
|
159
|
+
parser.update_furthest_failure(input.pos, input.lineno, prod)
|
160
|
+
break :unmatched
|
161
|
+
end
|
162
|
+
accumulator << {prod.to_sym => res}
|
163
|
+
end
|
164
|
+
if seq == :unmatched
|
165
|
+
:unmatched
|
166
|
+
elsif terminal?
|
167
|
+
seq.map(&:values).compact.join("") # Concat values for terminal production
|
168
|
+
elsif start_options[:as_hash]
|
169
|
+
seq.inject {|memo, h| memo.merge(h)}
|
170
|
+
else
|
171
|
+
seq
|
172
|
+
end
|
173
|
+
when :star
|
174
|
+
# Result is an array of all expressions while they match,
|
175
|
+
# an empty array of none match
|
176
|
+
star = rept(input, 0, '*', expr[1])
|
177
|
+
|
178
|
+
# Update furthest failure for strings and terminals
|
179
|
+
parser.update_furthest_failure(input.pos, input.lineno, expr[1]) if terminal?
|
180
|
+
star.is_a?(Array) && terminal? ? star.join("") : star
|
181
|
+
else
|
182
|
+
raise "attempt to parse unknown rule type: #{expr.first}"
|
183
|
+
end
|
184
|
+
|
185
|
+
if result == :unmatched
|
186
|
+
input.pos, input.lineno = pos, lineno
|
187
|
+
end
|
188
|
+
|
189
|
+
result = parser.onFinish(result)
|
190
|
+
(parser.packrat[sym] ||= {})[pos] = {
|
191
|
+
pos: input.pos,
|
192
|
+
lineno: input.lineno,
|
193
|
+
result: result
|
194
|
+
}
|
195
|
+
return parser.packrat[sym][pos][:result]
|
196
|
+
end
|
197
|
+
|
198
|
+
##
|
199
|
+
# Repitition, 0-1, 0-n, 1-n, ...
|
200
|
+
#
|
201
|
+
# Note, nil results are removed from the result, but count towards min/max calculations
|
202
|
+
#
|
203
|
+
# @param [Scanner] input
|
204
|
+
# @param [Integer] min
|
205
|
+
# @param [Integer] max
|
206
|
+
# If it is an integer, it stops matching after max entries.
|
207
|
+
# @param [Symbol, String] prod
|
208
|
+
# @return [:unmatched, Array]
|
209
|
+
def rept(input, min, max, prod)
|
210
|
+
result = []
|
211
|
+
|
212
|
+
case prod
|
213
|
+
when Symbol
|
214
|
+
rule = parser.find_rule(prod)
|
215
|
+
raise "No rule found for #{prod}" unless rule
|
216
|
+
while (max == '*' || result.length < max) && (res = rule.parse(input)) != :unmatched
|
217
|
+
eat_whitespace(input) unless terminal?
|
218
|
+
result << res
|
219
|
+
end
|
220
|
+
when String
|
221
|
+
while (res = input.scan(Regexp.new(Regexp.quote(prod)))) && (max == '*' || result.length < max)
|
222
|
+
eat_whitespace(input) unless terminal?
|
223
|
+
result << res
|
224
|
+
end
|
225
|
+
end
|
226
|
+
|
227
|
+
result.length < min ? :unmatched : result.compact
|
228
|
+
end
|
229
|
+
|
230
|
+
##
|
231
|
+
# Eat whitespace between non-terminal rules
|
232
|
+
def eat_whitespace(input)
|
233
|
+
if parser.whitespace.is_a?(Regexp)
|
234
|
+
# Eat whitespace before a non-terminal
|
235
|
+
input.skip(parser.whitespace)
|
236
|
+
elsif parser.whitespace.is_a?(Rule)
|
237
|
+
parser.whitespace.parse(input) # throw away result
|
238
|
+
end
|
239
|
+
end
|
240
|
+
end
|
241
|
+
end
|
data/lib/ebnf/rule.rb
CHANGED
@@ -1,15 +1,33 @@
|
|
1
|
+
require 'scanf'
|
2
|
+
require 'strscan'
|
3
|
+
|
1
4
|
module EBNF
|
2
5
|
# Represent individual parsed rules
|
3
6
|
class Rule
|
4
|
-
# Operations which are flattened to seprate rules in to_bnf
|
7
|
+
# Operations which are flattened to seprate rules in to_bnf.
|
5
8
|
BNF_OPS = %w{
|
6
|
-
alt opt plus seq star
|
9
|
+
alt diff not opt plus rept seq star
|
7
10
|
}.map(&:to_sym).freeze
|
8
11
|
|
9
12
|
TERM_OPS = %w{
|
10
|
-
|
13
|
+
hex istr range
|
11
14
|
}.map(&:to_sym).freeze
|
12
15
|
|
16
|
+
# The number of arguments expected per operator. `nil` for unspecified
|
17
|
+
OP_ARGN = {
|
18
|
+
alt: nil,
|
19
|
+
diff: 2,
|
20
|
+
hex: 1,
|
21
|
+
istr: 1,
|
22
|
+
not: 1,
|
23
|
+
opt: 1,
|
24
|
+
plus: 1,
|
25
|
+
range: 1,
|
26
|
+
rept: 3,
|
27
|
+
seq: nil,
|
28
|
+
star: 1
|
29
|
+
}
|
30
|
+
|
13
31
|
# Symbol of rule
|
14
32
|
#
|
15
33
|
# @return [Symbol]
|
@@ -26,7 +44,7 @@ module EBNF
|
|
26
44
|
|
27
45
|
# Kind of rule
|
28
46
|
#
|
29
|
-
# @return [:rule, :terminal, or :pass]
|
47
|
+
# @return [:rule, :terminal, :terminals, or :pass]
|
30
48
|
attr_accessor :kind
|
31
49
|
|
32
50
|
# Rule expression
|
@@ -57,45 +75,92 @@ module EBNF
|
|
57
75
|
# Determines preparation and cleanup rules for reconstituting EBNF ? * + from BNF
|
58
76
|
attr_accessor :cleanup
|
59
77
|
|
60
|
-
# @param [
|
61
|
-
# @
|
78
|
+
# @param [Symbol, nil] sym
|
79
|
+
# `nil` is allowed only for @pass or @terminals
|
80
|
+
# @param [Integer, nil] id
|
62
81
|
# @param [Array] expr
|
63
|
-
#
|
64
|
-
#
|
65
|
-
#
|
66
|
-
#
|
67
|
-
#
|
68
|
-
#
|
69
|
-
|
82
|
+
# The expression is an internal-representation of an S-Expression with one of the following oparators:
|
83
|
+
#
|
84
|
+
# * `alt` – A list of alternative rules, which are attempted in order. It terminates with the first matching rule, or is terminated as unmatched, if no such rule is found.
|
85
|
+
# * `diff` – matches any string that matches `A` but does not match `B`.
|
86
|
+
# * `hex` – A single character represented using the hexadecimal notation `#xnn`.
|
87
|
+
# * `istr` – A string which matches in a case-insensitive manner, so that `(istr "fOo")` will match either of the strings `"foo"`, `"FOO"` or any other combination.
|
88
|
+
# * `opt` – An optional rule or terminal. It either results in the matching rule or returns `nil`.
|
89
|
+
# * `plus` – A sequence of one or more of the matching rule. If there is no such rule, it is terminated as unmatched; otherwise, the result is an array containing all matched input.
|
90
|
+
# * `range` – A range of characters, possibly repeated, of the form `(range "a-z")`. May also use hexadecimal notation.
|
91
|
+
# * `rept m n` – A sequence of at lest `m` and at most `n` of the matching rule. It will always return an array.
|
92
|
+
# * `seq` – A sequence of rules or terminals. If any (other than `opt` or `star`) to not parse, the rule is terminated as unmatched.
|
93
|
+
# * `star` – A sequence of zero or more of the matching rule. It will always return an array.
|
94
|
+
# @param [:rule, :terminal, :terminals, :pass] kind (nil)
|
95
|
+
# @param [String] ebnf (nil)
|
96
|
+
# When parsing, records the EBNF string used to create the rule.
|
97
|
+
# @param [Array] first (nil)
|
98
|
+
# Recorded set of terminals that can proceed this rule (LL(1))
|
99
|
+
# @param [Array] follow (nil)
|
100
|
+
# Recorded set of terminals that can follow this rule (LL(1))
|
101
|
+
# @param [Boolean] start (nil)
|
102
|
+
# Is this the starting rule for the grammar?
|
103
|
+
# @param [Rule] top_rule (nil)
|
104
|
+
# The top-most rule. All expressed rules are top-rules, derived rules have the original rule as their top-rule.
|
105
|
+
# @param [Boolean] cleanup (nil)
|
106
|
+
# Records information useful for cleaning up converted :plus, and :star expansions (LL(1)).
|
107
|
+
def initialize(sym, id, expr, kind: nil, ebnf: nil, first: nil, follow: nil, start: nil, top_rule: nil, cleanup: nil)
|
70
108
|
@sym, @id = sym, id
|
71
|
-
@expr = expr.is_a?(Array) ? expr : [:seq, expr]
|
72
|
-
@ebnf =
|
73
|
-
@top_rule
|
74
|
-
@
|
75
|
-
@follow = options[:follow]
|
76
|
-
@start = options[:start]
|
77
|
-
@cleanup = options[:cleanup]
|
78
|
-
@kind = case
|
79
|
-
when options[:kind] then options[:kind]
|
109
|
+
@expr = expr.is_a?(Array) ? expr : [:seq, expr].compact
|
110
|
+
@ebnf, @kind, @first, @follow, @start, @cleanup, @top_rule = ebnf, kind, first, follow, start, cleanup, top_rule
|
111
|
+
@top_rule ||= self
|
112
|
+
@kind ||= case
|
80
113
|
when sym.to_s == sym.to_s.upcase then :terminal
|
81
114
|
when !BNF_OPS.include?(@expr.first) then :terminal
|
82
115
|
else :rule
|
83
116
|
end
|
117
|
+
|
118
|
+
# Allow @pass and @terminals to not be named
|
119
|
+
@sym ||= :_pass if @kind == :pass
|
120
|
+
@sym ||= :_terminals if @kind == :terminals
|
121
|
+
|
122
|
+
raise ArgumentError, "Rule sym must be a symbol, was #{@sym.inspect}" unless @sym.is_a?(Symbol)
|
123
|
+
raise ArgumentError, "Rule id must be a string or nil, was #{@id.inspect}" unless (@id || "").is_a?(String)
|
124
|
+
raise ArgumentError, "Rule kind must be one of :rule, :terminal, :terminals, or :pass, was #{@kind.inspect}" unless
|
125
|
+
@kind.is_a?(Symbol) && %w(rule terminal terminals pass).map(&:to_sym).include?(@kind)
|
126
|
+
|
127
|
+
case @expr.first
|
128
|
+
when :alt
|
129
|
+
raise ArgumentError, "#{@expr.first} operation must have at least one operand, had #{@expr.length - 1}" unless @expr.length > 1
|
130
|
+
when :diff
|
131
|
+
raise ArgumentError, "#{@expr.first} operation must have exactly two operands, had #{@expr.length - 1}" unless @expr.length == 3
|
132
|
+
when :hex, :istr, :not, :opt, :plus, :range, :star
|
133
|
+
raise ArgumentError, "#{@expr.first} operation must have exactly one operand, had #{@expr.length - 1}" unless @expr.length == 2
|
134
|
+
when :rept
|
135
|
+
raise ArgumentError, "#{@expr.first} operation must have exactly three, had #{@expr.length - 1}" unless @expr.length == 4
|
136
|
+
raise ArgumentError, "#{@expr.first} operation must an non-negative integer minimum, was #{@expr[1]}" unless
|
137
|
+
@expr[1].is_a?(Integer) && @expr[1] >= 0
|
138
|
+
raise ArgumentError, "#{@expr.first} operation must an non-negative integer maximum or '*', was #{@expr[2]}" unless
|
139
|
+
@expr[2] == '*' || @expr[2].is_a?(Integer) && @expr[2] >= 0
|
140
|
+
when :seq
|
141
|
+
# It's legal to have a zero-length sequence
|
142
|
+
else
|
143
|
+
raise ArgumentError, "Rule expression must be an array using a known operator, was #{@expr.first}"
|
144
|
+
end
|
84
145
|
end
|
85
146
|
|
86
147
|
##
|
87
148
|
# Return a rule from its SXP representation:
|
88
149
|
#
|
89
150
|
# @example inputs
|
90
|
-
# (pass (plus (range "#x20\\t\\r\\n")))
|
151
|
+
# (pass _pass (plus (range "#x20\\t\\r\\n")))
|
91
152
|
# (rule ebnf "1" (star (alt declaration rule)))
|
92
|
-
# (terminal
|
153
|
+
# (terminal R_CHAR "19" (diff CHAR (alt "]" "-")))
|
93
154
|
#
|
94
|
-
# Also may have (first ...)
|
155
|
+
# Also may have `(first ...)`, `(follow ...)`, or `(start #t)`.
|
95
156
|
#
|
96
|
-
# @param [Array] sxp
|
157
|
+
# @param [String, Array] sxp
|
97
158
|
# @return [Rule]
|
98
159
|
def self.from_sxp(sxp)
|
160
|
+
if sxp.is_a?(String)
|
161
|
+
require 'sxp' unless defined?(SXP)
|
162
|
+
sxp = SXP.parse(sxp)
|
163
|
+
end
|
99
164
|
expr = sxp.detect {|e| e.is_a?(Array) && ![:first, :follow, :start].include?(e.first.to_sym)}
|
100
165
|
first = sxp.detect {|e| e.is_a?(Array) && e.first.to_sym == :first}
|
101
166
|
first = first[1..-1] if first
|
@@ -106,27 +171,28 @@ module EBNF
|
|
106
171
|
start = sxp.any? {|e| e.is_a?(Array) && e.first.to_sym == :start}
|
107
172
|
sym = sxp[1] if sxp[1].is_a?(Symbol)
|
108
173
|
id = sxp[2] if sxp[2].is_a?(String)
|
109
|
-
|
174
|
+
self.new(sym, id, expr, kind: sxp.first, first: first, follow: follow, cleanup: cleanup, start: start)
|
110
175
|
end
|
111
176
|
|
112
177
|
# Build a new rule creating a symbol and numbering from the current rule
|
113
|
-
# Symbol and number creation is handled by the top-most rule in such a chain
|
178
|
+
# Symbol and number creation is handled by the top-most rule in such a chain.
|
114
179
|
#
|
115
180
|
# @param [Array] expr
|
181
|
+
# @param [Symbol] kind (nil)
|
182
|
+
# @param [Hash{Symbol => Symbol}] cleanup (nil)
|
116
183
|
# @param [Hash{Symbol => Object}] options
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
# Return representation for building S-Expressions
|
184
|
+
def build(expr, kind: nil, cleanup: nil, **options)
|
185
|
+
new_sym, new_id = @top_rule.send(:make_sym_id)
|
186
|
+
self.class.new(new_sym, new_id, expr,
|
187
|
+
kind: kind,
|
188
|
+
ebnf: @ebnf,
|
189
|
+
top_rule: @top_rule,
|
190
|
+
cleanup: cleanup,
|
191
|
+
**options)
|
192
|
+
end
|
193
|
+
|
194
|
+
# Return representation for building S-Expressions.
|
195
|
+
#
|
130
196
|
# @return [Array]
|
131
197
|
def for_sxp
|
132
198
|
elements = [kind, sym]
|
@@ -142,40 +208,51 @@ module EBNF
|
|
142
208
|
# Return SXP representation of this rule
|
143
209
|
# @return [String]
|
144
210
|
def to_sxp
|
211
|
+
require 'sxp' unless defined?(SXP)
|
145
212
|
for_sxp.to_sxp
|
146
213
|
end
|
147
214
|
|
148
215
|
alias_method :to_s, :to_sxp
|
149
216
|
|
150
|
-
# Serializes this rule to an Turtle
|
217
|
+
# Serializes this rule to an Turtle.
|
218
|
+
#
|
151
219
|
# @return [String]
|
152
220
|
def to_ttl
|
153
221
|
@ebnf.debug("to_ttl") {inspect} if @ebnf
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
%{ rdfs:comment #{comment.inspect};}
|
162
|
-
|
222
|
+
statements = [%{:#{sym} rdfs:label "#{sym}";}]
|
223
|
+
if orig
|
224
|
+
comment = orig.to_s.strip.
|
225
|
+
gsub(/"""/, '\"\"\"').
|
226
|
+
gsub("\\", "\\\\").
|
227
|
+
sub(/^\"/, '\"').
|
228
|
+
sub(/\"$/m, '\"')
|
229
|
+
statements << %{ rdfs:comment #{comment.inspect};}
|
230
|
+
end
|
231
|
+
statements << %{ dc:identifier "#{id}";} if id
|
163
232
|
|
164
233
|
statements += ttl_expr(expr, terminal? ? "re" : "g", 1, false)
|
165
234
|
"\n" + statements.join("\n")
|
166
235
|
end
|
167
236
|
|
237
|
+
# Return a Ruby representation of this rule
|
238
|
+
# @return [String]
|
239
|
+
def to_ruby
|
240
|
+
"EBNF::Rule.new(#{sym.inspect}, #{id.inspect}, #{expr.inspect}#{', kind: ' + kind.inspect unless kind == :rule})"
|
241
|
+
end
|
242
|
+
|
168
243
|
##
|
169
244
|
# Transform EBNF rule to BNF rules:
|
170
245
|
#
|
171
|
-
# * Transform (a
|
172
|
-
#
|
173
|
-
#
|
174
|
-
#
|
175
|
-
# * Transform (a
|
176
|
-
# * Transform (a
|
246
|
+
# * Transform `(rule a "n" (op1 (op2)))` into two rules:
|
247
|
+
#
|
248
|
+
# (rule a "n" (op1 _a_1))
|
249
|
+
# (rule _a_1 "n.1" (op2))
|
250
|
+
# * Transform `(rule a (opt b))` into `(rule a (alt _empty b))`
|
251
|
+
# * Transform `(rule a (star b))` into `(rule a (alt _empty (seq b a)))`
|
252
|
+
# * Transform `(rule a (plus b))` into `(rule a (seq b (star b)`
|
253
|
+
#
|
254
|
+
# Transformation includes information used to re-construct non-transformed.
|
177
255
|
#
|
178
|
-
# Transformation includes information used to re-construct non-transformed
|
179
256
|
# AST representation
|
180
257
|
# @return [Array<Rule>]
|
181
258
|
def to_bnf
|
@@ -202,19 +279,19 @@ module EBNF
|
|
202
279
|
new_rules = new_rules.map {|r| r.to_bnf}.flatten
|
203
280
|
elsif expr.first == :opt
|
204
281
|
this = dup
|
205
|
-
# * Transform (a
|
282
|
+
# * Transform (rule a (opt b)) into (rule a (alt _empty b))
|
206
283
|
this.expr = [:alt, :_empty, expr.last]
|
207
284
|
this.cleanup = :opt
|
208
285
|
new_rules = this.to_bnf
|
209
286
|
elsif expr.first == :star
|
210
|
-
# * Transform (a
|
287
|
+
# * Transform (rule a (star b)) into (rule a (alt _empty (seq b a)))
|
211
288
|
this = dup
|
212
289
|
this.cleanup = :star
|
213
290
|
new_rule = this.build([:seq, expr.last, this.sym], cleanup: :merge)
|
214
291
|
this.expr = [:alt, :_empty, new_rule.sym]
|
215
292
|
new_rules = [this] + new_rule.to_bnf
|
216
293
|
elsif expr.first == :plus
|
217
|
-
# * Transform (a
|
294
|
+
# * Transform (rule a (plus b)) into (rule a (seq b (star b)
|
218
295
|
this = dup
|
219
296
|
this.cleanup = :plus
|
220
297
|
this.expr = [:seq, expr.last, [:star, expr.last]]
|
@@ -223,7 +300,7 @@ module EBNF
|
|
223
300
|
# Otherwise, no further transformation necessary
|
224
301
|
new_rules << self
|
225
302
|
elsif [:diff, :hex, :range].include?(expr.first)
|
226
|
-
# This rules are fine,
|
303
|
+
# This rules are fine, they just need to be terminals
|
227
304
|
raise "Encountered #{expr.first.inspect}, which is a #{self.kind}, not :terminal" unless self.terminal?
|
228
305
|
new_rules << self
|
229
306
|
else
|
@@ -234,89 +311,73 @@ module EBNF
|
|
234
311
|
return new_rules
|
235
312
|
end
|
236
313
|
|
237
|
-
|
238
|
-
#
|
239
|
-
#
|
240
|
-
#
|
314
|
+
##
|
315
|
+
# Transform EBNF rule for PEG:
|
316
|
+
#
|
317
|
+
# * Transform `(rule a "n" (op1 ... (op2 y) ...z))` into two rules:
|
318
|
+
#
|
319
|
+
# (rule a "n" (op1 ... _a_1 ... z))
|
320
|
+
# (rule _a_1 "n.1" (op2 y))
|
321
|
+
# * Transform `(rule a "n" (diff op1 op2))` into two rules:
|
322
|
+
#
|
323
|
+
# (rule a "n" (seq _a_1 op1))
|
324
|
+
# (rule _a_1 "n.1" (not op1))
|
325
|
+
#
|
241
326
|
# @return [Array<Rule>]
|
242
|
-
def
|
243
|
-
|
244
|
-
case sym
|
245
|
-
when Symbol
|
246
|
-
r = ast.detect {|r| r.sym == sym}
|
247
|
-
r if r && r.rule?
|
248
|
-
else
|
249
|
-
nil
|
250
|
-
end
|
251
|
-
end.compact
|
252
|
-
end
|
327
|
+
def to_peg
|
328
|
+
new_rules = []
|
253
329
|
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
when String
|
266
|
-
sym
|
267
|
-
else
|
268
|
-
nil
|
330
|
+
# Look for rules containing sub-sequences
|
331
|
+
if expr.any? {|e| e.is_a?(Array) && e.first.is_a?(Symbol)}
|
332
|
+
# duplicate ourselves for rewriting
|
333
|
+
this = dup
|
334
|
+
new_rules << this
|
335
|
+
|
336
|
+
expr.each_with_index do |e, index|
|
337
|
+
next unless e.is_a?(Array) && e.first.is_a?(Symbol)
|
338
|
+
new_rule = build(e)
|
339
|
+
this.expr[index] = new_rule.sym
|
340
|
+
new_rules << new_rule
|
269
341
|
end
|
270
|
-
end.compact
|
271
|
-
end
|
272
342
|
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
expr.
|
343
|
+
# Return new rules after recursively applying #to_bnf
|
344
|
+
new_rules = new_rules.map {|r| r.to_peg}.flatten
|
345
|
+
elsif expr.first == :diff && !terminal?
|
346
|
+
this = dup
|
347
|
+
new_rule = build([:not, expr[2]])
|
348
|
+
this.expr = [:seq, new_rule.sym, expr[1]]
|
349
|
+
new_rules << this
|
350
|
+
new_rules << new_rule
|
351
|
+
elsif [:hex, :istr, :range].include?(expr.first)
|
352
|
+
# This rules are fine, they just need to be terminals
|
353
|
+
raise "Encountered #{expr.first.inspect}, which is a #{self.kind}, not :terminal" unless self.terminal?
|
354
|
+
new_rules << self
|
284
355
|
else
|
285
|
-
|
356
|
+
new_rules << self
|
286
357
|
end
|
358
|
+
|
359
|
+
return new_rules.map {|r| r.extend(EBNF::PEG::Rule)}
|
287
360
|
end
|
288
361
|
|
289
|
-
|
290
|
-
#
|
291
|
-
def first_includes_eps?
|
292
|
-
@first && @first.include?(:_eps)
|
293
|
-
end
|
294
|
-
|
295
|
-
# Add terminal as proceding this rule
|
296
|
-
# @param [Array<Rule, Symbol, String>] terminals
|
297
|
-
# @return [Integer] if number of terminals added
|
298
|
-
def add_first(terminals)
|
299
|
-
@first ||= []
|
300
|
-
terminals = terminals.map {|t| t.is_a?(Rule) ? t.sym : t} - @first
|
301
|
-
@first += terminals
|
302
|
-
terminals.length
|
303
|
-
end
|
304
|
-
|
305
|
-
# Add terminal as following this rule. Don't add _eps as a follow
|
362
|
+
##
|
363
|
+
# For :hex or :range, create a regular expression.
|
306
364
|
#
|
307
|
-
# @
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
365
|
+
# @return [Regexp]
|
366
|
+
def to_regexp
|
367
|
+
case expr.first
|
368
|
+
when :hex
|
369
|
+
Regexp.new(translate_codepoints(expr[1]))
|
370
|
+
when :istr
|
371
|
+
/#{expr.last}/ui
|
372
|
+
when :range
|
373
|
+
Regexp.new("[#{translate_codepoints(expr[1])}]")
|
374
|
+
else
|
375
|
+
raise "Can't turn #{expr.inspect} into a regexp"
|
315
376
|
end
|
316
|
-
terminals.length
|
317
377
|
end
|
318
378
|
|
319
379
|
# Is this a terminal?
|
380
|
+
#
|
320
381
|
# @return [Boolean]
|
321
382
|
def terminal?
|
322
383
|
kind == :terminal
|
@@ -344,18 +405,14 @@ module EBNF
|
|
344
405
|
expr.is_a?(Array) && expr.first == :seq
|
345
406
|
end
|
346
407
|
|
347
|
-
# Is this rule of the form (alt ...)?
|
348
|
-
def alt?
|
349
|
-
expr.is_a?(Array) && expr.first == :alt
|
350
|
-
end
|
351
|
-
|
352
408
|
def inspect
|
353
409
|
"#<EBNF::Rule:#{object_id} " +
|
354
410
|
{sym: sym, id: id, kind: kind, expr: expr}.inspect +
|
355
411
|
">"
|
356
412
|
end
|
357
413
|
|
358
|
-
# Two rules are equal if they have the same {#sym}, {#kind} and {#expr}
|
414
|
+
# Two rules are equal if they have the same {#sym}, {#kind} and {#expr}.
|
415
|
+
#
|
359
416
|
# @param [Rule] other
|
360
417
|
# @return [Boolean]
|
361
418
|
def ==(other)
|
@@ -364,42 +421,264 @@ module EBNF
|
|
364
421
|
expr == other.expr
|
365
422
|
end
|
366
423
|
|
367
|
-
# Two rules are equivalent if they have the same {#expr}
|
424
|
+
# Two rules are equivalent if they have the same {#expr}.
|
425
|
+
#
|
368
426
|
# @param [Rule] other
|
369
427
|
# @return [Boolean]
|
370
|
-
def
|
371
|
-
expr
|
428
|
+
def eql?(other)
|
429
|
+
expr == other.expr
|
372
430
|
end
|
373
431
|
|
374
|
-
#
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
@expr = @expr.map {|e| e == src_rule.sym ? dst_rule.sym : e}
|
432
|
+
# Rules compare using their ids
|
433
|
+
def <=>(other)
|
434
|
+
if id && other.id
|
435
|
+
if id == other.id
|
436
|
+
id.to_s <=> other.id.to_s
|
437
|
+
else
|
438
|
+
id.to_f <=> other.id.to_f
|
439
|
+
end
|
383
440
|
else
|
384
|
-
|
441
|
+
sym.to_s <=> other.sym.to_s
|
385
442
|
end
|
386
|
-
self
|
387
443
|
end
|
388
444
|
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
445
|
+
##
|
446
|
+
# Utility function to translate code points of the form '#xN' into ruby unicode characters
|
447
|
+
def translate_codepoints(str)
|
448
|
+
str.gsub(/#x\h+/) {|c| c[2..-1].scanf("%x").first.chr(Encoding::UTF_8)}
|
449
|
+
end
|
450
|
+
|
451
|
+
# Return the non-terminals for this rule.
|
452
|
+
#
|
453
|
+
# * `alt` => this is every non-terminal.
|
454
|
+
# * `diff` => this is every non-terminal.
|
455
|
+
# * `hex` => nil
|
456
|
+
# * `istr` => nil
|
457
|
+
# * `not` => this is the last expression, if any.
|
458
|
+
# * `opt` => this is the last expression, if any.
|
459
|
+
# * `plus` => this is the last expression, if any.
|
460
|
+
# * `range` => nil
|
461
|
+
# * `rept` => this is the last expression, if any.
|
462
|
+
# * `seq` => this is the first expression in the sequence, if any.
|
463
|
+
# * `star` => this is the last expression, if any.
|
464
|
+
#
|
465
|
+
# @param [Array<Rule>] ast
|
466
|
+
# The set of rules, used to turn symbols into rules
|
467
|
+
# @param [Array<Symbol,String,Array>] expr (@expr)
|
468
|
+
# The expression to check, defaults to the rule expression.
|
469
|
+
# Typically, if the expression is recursive, the embedded expression is called recursively.
|
470
|
+
# @return [Array<Rule>]
|
471
|
+
# @note this is used for LL(1) tansformation, so rule types are limited
|
472
|
+
def non_terminals(ast, expr = @expr)
|
473
|
+
([:alt, :diff].include?(expr.first) ? expr[1..-1] : expr[1,1]).map do |sym|
|
474
|
+
case sym
|
475
|
+
when Symbol
|
476
|
+
r = ast.detect {|r| r.sym == sym}
|
477
|
+
r if r && r.rule?
|
478
|
+
when Array
|
479
|
+
non_terminals(ast, sym)
|
480
|
+
else
|
481
|
+
nil
|
482
|
+
end
|
483
|
+
end.flatten.compact.uniq
|
484
|
+
end
|
485
|
+
|
486
|
+
# Return the terminals for this rule.
|
487
|
+
#
|
488
|
+
# * `alt` => this is every terminal.
|
489
|
+
# * `diff` => this is every terminal.
|
490
|
+
# * `hex` => nil
|
491
|
+
# * `istr` => nil
|
492
|
+
# * `not` => this is the last expression, if any.
|
493
|
+
# * `opt` => this is the last expression, if any.
|
494
|
+
# * `plus` => this is the last expression, if any.
|
495
|
+
# * `range` => nil
|
496
|
+
# * `rept` => this is the last expression, if any.
|
497
|
+
# * `seq` => this is the first expression in the sequence, if any.
|
498
|
+
# * `star` => this is the last expression, if any.
|
499
|
+
#
|
500
|
+
# @param [Array<Rule>] ast
|
501
|
+
# The set of rules, used to turn symbols into rules
|
502
|
+
# @param [Array<Symbol,String,Array>] expr (@expr)
|
503
|
+
# The expression to check, defaults to the rule expression.
|
504
|
+
# Typically, if the expression is recursive, the embedded expression is called recursively.
|
505
|
+
# @return [Array<Rule>]
|
506
|
+
# @note this is used for LL(1) tansformation, so rule types are limited
|
507
|
+
def terminals(ast, expr = @expr)
|
508
|
+
([:alt, :diff].include?(expr.first) ? expr[1..-1] : expr[1,1]).map do |sym|
|
509
|
+
case sym
|
510
|
+
when Symbol
|
511
|
+
r = ast.detect {|r| r.sym == sym}
|
512
|
+
r if r && r.terminal?
|
513
|
+
when String
|
514
|
+
sym
|
515
|
+
when Array
|
516
|
+
terminals(ast, sym)
|
517
|
+
end
|
518
|
+
end.flatten.compact.uniq
|
519
|
+
end
|
520
|
+
|
521
|
+
# Return the symbols used in the rule.
|
522
|
+
#
|
523
|
+
# @param [Array<Symbol,String,Array>] expr (@expr)
|
524
|
+
# The expression to check, defaults to the rule expression.
|
525
|
+
# Typically, if the expression is recursive, the embedded expression is called recursively.
|
526
|
+
# @return [Array<Rule>]
|
527
|
+
def symbols(expr = @expr)
|
528
|
+
expr[1..-1].map do |sym|
|
529
|
+
case sym
|
530
|
+
when Symbol
|
531
|
+
sym
|
532
|
+
when Array
|
533
|
+
symbols(sym)
|
534
|
+
end
|
535
|
+
end.flatten.compact.uniq
|
536
|
+
end
|
537
|
+
|
538
|
+
##
|
539
|
+
# The following are used for LL(1) transformation.
|
540
|
+
##
|
541
|
+
|
542
|
+
# Does this rule start with `sym`? It does if expr is that sym,
|
543
|
+
# expr starts with alt and contains that sym,
|
544
|
+
# or expr starts with seq and the next element is that sym.
|
545
|
+
#
|
546
|
+
# @param [Symbol, class] sym
|
547
|
+
# Symbol matching any start element, or if it is String, any start element which is a String
|
548
|
+
# @return [Array<Symbol, String>] list of symbol (singular), or strings which are start symbol, or nil if there are none
|
549
|
+
def starts_with?(sym)
|
550
|
+
if seq? && sym === (v = expr.fetch(1, nil))
|
551
|
+
[v]
|
552
|
+
elsif alt? && expr.any? {|e| sym === e}
|
553
|
+
expr.select {|e| sym === e}
|
393
554
|
else
|
394
|
-
|
555
|
+
nil
|
395
556
|
end
|
396
557
|
end
|
397
558
|
|
559
|
+
##
|
560
|
+
# Validate the rule, with respect to an AST.
|
561
|
+
#
|
562
|
+
# @param [Array<Rule>] ast
|
563
|
+
# The set of rules, used to turn symbols into rules
|
564
|
+
# @param [Array<Symbol,String,Array>] expr (@expr)
|
565
|
+
# The expression to check, defaults to the rule expression.
|
566
|
+
# Typically, if the expression is recursive, the embedded expression is called recursively.
|
567
|
+
# @raise [RangeError]
|
568
|
+
def validate!(ast, expr = @expr)
|
569
|
+
op = expr.first
|
570
|
+
raise SyntaxError, "Unknown operator: #{op}" unless OP_ARGN.key?(op)
|
571
|
+
raise SyntaxError, "Argument count missmatch on operator #{op}, had #{expr.length - 1} expected #{OP_ARGN[op]}" if
|
572
|
+
OP_ARGN[op] && OP_ARGN[op] != expr.length - 1
|
573
|
+
|
574
|
+
# rept operator needs min and max
|
575
|
+
if op == :alt
|
576
|
+
raise SyntaxError, "alt operation must have at least one operand, had #{expr.length - 1}" unless expr.length > 1
|
577
|
+
elsif op == :rept
|
578
|
+
raise SyntaxError, "rept operation must an non-negative integer minimum, was #{expr[1]}" unless
|
579
|
+
expr[1].is_a?(Integer) && expr[1] >= 0
|
580
|
+
raise SyntaxError, "rept operation must an non-negative integer maximum or '*', was #{expr[2]}" unless
|
581
|
+
expr[2] == '*' || expr[2].is_a?(Integer) && expr[2] >= 0
|
582
|
+
end
|
583
|
+
|
584
|
+
case op
|
585
|
+
when :hex
|
586
|
+
raise SyntaxError, "Hex operand must be of form '#xN+': #{sym}" unless expr.last.match?(/^#x\h+$/)
|
587
|
+
when :range
|
588
|
+
str = expr.last.dup
|
589
|
+
str = str[1..-1] if str.start_with?('^')
|
590
|
+
str = str[0..-2] if str.end_with?('-') # Allowed at end of range
|
591
|
+
scanner = StringScanner.new(str)
|
592
|
+
hex = rchar = in_range = false
|
593
|
+
while !scanner.eos?
|
594
|
+
begin
|
595
|
+
if scanner.scan(Terminals::HEX)
|
596
|
+
raise SyntaxError if in_range && rchar
|
597
|
+
rchar = in_range = false
|
598
|
+
hex = true
|
599
|
+
elsif scanner.scan(Terminals::R_CHAR)
|
600
|
+
raise SyntaxError if in_range && hex
|
601
|
+
hex = in_range = false
|
602
|
+
rchar = true
|
603
|
+
else
|
604
|
+
raise(SyntaxError, "Range contains illegal components at offset #{scanner.pos}: was #{expr.last}")
|
605
|
+
end
|
606
|
+
|
607
|
+
if scanner.scan(/\-/)
|
608
|
+
raise SyntaxError if in_range
|
609
|
+
in_range = true
|
610
|
+
end
|
611
|
+
rescue SyntaxError
|
612
|
+
raise(SyntaxError, "Range contains illegal components at offset #{scanner.pos}: was #{expr.last}")
|
613
|
+
end
|
614
|
+
end
|
615
|
+
else
|
616
|
+
([:alt, :diff].include?(expr.first) ? expr[1..-1] : expr[1,1]).each do |sym|
|
617
|
+
case sym
|
618
|
+
when Symbol
|
619
|
+
r = ast.detect {|r| r.sym == sym}
|
620
|
+
raise SyntaxError, "No rule found for #{sym}" unless r
|
621
|
+
when Array
|
622
|
+
validate!(ast, sym)
|
623
|
+
when String
|
624
|
+
raise SyntaxError, "String must be of the form CHAR*" unless sym.match?(/^#{Terminals::CHAR}*$/)
|
625
|
+
end
|
626
|
+
end
|
627
|
+
end
|
628
|
+
end
|
629
|
+
|
630
|
+
##
|
631
|
+
# Validate the rule, with respect to an AST.
|
632
|
+
#
|
633
|
+
# Uses `#validate!` and catches `RangeError`
|
634
|
+
#
|
635
|
+
# @param [Array<Rule>] ast
|
636
|
+
# The set of rules, used to turn symbols into rules
|
637
|
+
# @return [Boolean]
|
638
|
+
def valid?(ast)
|
639
|
+
validate!(ast)
|
640
|
+
true
|
641
|
+
rescue SyntaxError
|
642
|
+
false
|
643
|
+
end
|
644
|
+
|
645
|
+
# Do the firsts of this rule include the empty string?
|
646
|
+
#
|
647
|
+
# @return [Boolean]
|
648
|
+
def first_includes_eps?
|
649
|
+
@first && @first.include?(:_eps)
|
650
|
+
end
|
651
|
+
|
652
|
+
# Add terminal as proceding this rule.
|
653
|
+
#
|
654
|
+
# @param [Array<Rule, Symbol, String>] terminals
|
655
|
+
# @return [Integer] if number of terminals added
|
656
|
+
def add_first(terminals)
|
657
|
+
@first ||= []
|
658
|
+
terminals = terminals.map {|t| t.is_a?(Rule) ? t.sym : t} - @first
|
659
|
+
@first += terminals
|
660
|
+
terminals.length
|
661
|
+
end
|
662
|
+
|
663
|
+
# Add terminal as following this rule. Don't add _eps as a follow
|
664
|
+
#
|
665
|
+
# @param [Array<Rule, Symbol, String>] terminals
|
666
|
+
# @return [Integer] if number of terminals added
|
667
|
+
def add_follow(terminals)
|
668
|
+
# Remove terminals already in follows, and empty string
|
669
|
+
terminals = terminals.map {|t| t.is_a?(Rule) ? t.sym : t} - (@follow || []) - [:_eps]
|
670
|
+
unless terminals.empty?
|
671
|
+
@follow ||= []
|
672
|
+
@follow += terminals
|
673
|
+
end
|
674
|
+
terminals.length
|
675
|
+
end
|
676
|
+
|
398
677
|
private
|
399
678
|
def ttl_expr(expr, pfx, depth, is_obj = true)
|
400
679
|
indent = ' ' * depth
|
401
|
-
@ebnf.debug("ttl_expr", depth: depth) {expr.inspect}
|
402
|
-
op = expr
|
680
|
+
@ebnf.debug("ttl_expr", depth: depth) {expr.inspect} if @ebnf
|
681
|
+
op, *expr = expr if expr.is_a?(Array)
|
403
682
|
statements = []
|
404
683
|
|
405
684
|
if is_obj
|
@@ -410,17 +689,28 @@ module EBNF
|
|
410
689
|
|
411
690
|
case op
|
412
691
|
when :seq, :alt, :diff
|
692
|
+
# Multiple operands
|
413
693
|
statements << %{#{indent}#{bra}#{pfx}:#{op} (}
|
414
694
|
expr.each {|a| statements += ttl_expr(a, pfx, depth + 1)}
|
415
695
|
statements << %{#{indent} )#{ket}}
|
416
|
-
when :opt, :plus, :star
|
696
|
+
when :opt, :plus, :star, :not
|
697
|
+
# Single operand
|
417
698
|
statements << %{#{indent}#{bra}#{pfx}:#{op} }
|
418
699
|
statements += ttl_expr(expr.first, pfx, depth + 1)
|
419
700
|
statements << %{#{indent} #{ket}} unless ket.empty?
|
420
|
-
when :
|
701
|
+
when :rept
|
702
|
+
# Three operands (min, max and expr)
|
703
|
+
statements << %{ #{indent}#{pfx}:min #{expr[0].inspect};}
|
704
|
+
statements << %{ #{indent}#{pfx}:max #{expr[1].inspect};}
|
705
|
+
statements << %{#{indent}#{bra}#{pfx}:#{op} }
|
706
|
+
statements += ttl_expr(expr.last, pfx, depth + 1)
|
707
|
+
statements << %{#{indent} #{ket}} unless ket.empty?
|
708
|
+
when :_empty, :_eps
|
421
709
|
statements << %{#{indent}"g:#{op.to_s[1..-1]}"}
|
422
710
|
when :"'"
|
423
711
|
statements << %{#{indent}"#{esc(expr)}"}
|
712
|
+
when :istr
|
713
|
+
statements << %{#{indent}#{bra} re:matches #{expr.first.inspect} #{ket}}
|
424
714
|
when :range
|
425
715
|
statements << %{#{indent}#{bra} re:matches #{cclass(expr.first).inspect} #{ket}}
|
426
716
|
when :hex
|
@@ -435,7 +725,7 @@ module EBNF
|
|
435
725
|
end
|
436
726
|
|
437
727
|
statements.last << " ." unless is_obj
|
438
|
-
@ebnf.debug("statements", depth: depth) {statements.join("\n")}
|
728
|
+
@ebnf.debug("statements", depth: depth) {statements.join("\n")} if @ebnf
|
439
729
|
statements
|
440
730
|
end
|
441
731
|
|
@@ -476,7 +766,7 @@ module EBNF
|
|
476
766
|
def make_sym_id(variation = nil)
|
477
767
|
@id_seq ||= 0
|
478
768
|
@id_seq += 1
|
479
|
-
["_#{@sym}_#{@id_seq}#{variation}".to_sym, "#{@id}.#{@id_seq}#{variation}"]
|
769
|
+
["_#{@sym}_#{@id_seq}#{variation}".to_sym, ("#{@id}.#{@id_seq}#{variation}" if @id)]
|
480
770
|
end
|
481
771
|
end
|
482
772
|
end
|