ebnf 1.2.0 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +160 -185
- data/UNLICENSE +1 -1
- data/VERSION +1 -1
- data/bin/ebnf +6 -3
- data/etc/doap.ttl +13 -12
- data/etc/ebnf.ebnf +13 -19
- data/etc/ebnf.html +205 -239
- data/etc/{ebnf.rb → ebnf.ll1.rb} +3 -4
- data/etc/ebnf.ll1.sxp +179 -183
- data/etc/ebnf.peg.rb +98 -0
- data/etc/ebnf.peg.sxp +93 -0
- data/etc/ebnf.sxp +37 -41
- data/etc/sparql.html +1603 -1751
- data/etc/sparql.ll1.sxp +7372 -7372
- data/etc/sparql.peg.rb +532 -0
- data/etc/sparql.peg.sxp +597 -0
- data/etc/sparql.sxp +362 -362
- data/etc/turtle.html +465 -517
- data/etc/{turtle.rb → turtle.ll1.rb} +3 -4
- data/etc/turtle.ll1.sxp +425 -425
- data/etc/turtle.peg.rb +182 -0
- data/etc/turtle.peg.sxp +199 -0
- data/etc/turtle.sxp +101 -101
- data/lib/ebnf.rb +3 -1
- data/lib/ebnf/base.rb +30 -29
- data/lib/ebnf/bnf.rb +1 -26
- data/lib/ebnf/ll1.rb +132 -1
- data/lib/ebnf/ll1/lexer.rb +20 -22
- data/lib/ebnf/ll1/parser.rb +86 -61
- data/lib/ebnf/ll1/scanner.rb +83 -50
- data/lib/ebnf/peg.rb +39 -0
- data/lib/ebnf/peg/parser.rb +535 -0
- data/lib/ebnf/peg/rule.rb +222 -0
- data/lib/ebnf/rule.rb +118 -55
- data/lib/ebnf/terminals.rb +18 -0
- data/lib/ebnf/writer.rb +3 -2
- metadata +29 -6
- data/etc/sparql.rb +0 -45773
@@ -0,0 +1,222 @@
|
|
1
|
+
module EBNF::PEG
|
2
|
+
# Behaviior for parsing a PEG rule
|
3
|
+
module Rule
|
4
|
+
##
|
5
|
+
# Initialized by parser when loading rules.
|
6
|
+
# Used for finding rules and invoking elements of the parse process.
|
7
|
+
#
|
8
|
+
# @return [EBNF::PEG::Parser] parser
|
9
|
+
attr_accessor :parser
|
10
|
+
|
11
|
+
##
|
12
|
+
# Parse a rule or terminal, invoking callbacks, as appropriate
|
13
|
+
|
14
|
+
# If there is are `start_production` and/or `production`,
|
15
|
+
# they are invoked with a `prod_data` stack, the input stream and offset.
|
16
|
+
# Otherwise, the results are added as an array value
|
17
|
+
# to a hash indexed by the rule name.
|
18
|
+
#
|
19
|
+
# If matched, the input position is updated and the results returned in a Hash.
|
20
|
+
#
|
21
|
+
# * `alt`: returns the value of the matched production or `:unmatched`
|
22
|
+
# * `diff`: returns the string value matched, or `:unmatched`
|
23
|
+
# * `hex`: returns a string composed of the matched hex character, or `:unmatched`.
|
24
|
+
# * `opt`: returns the matched production, or `nil` if unmatched.
|
25
|
+
# * `plus`: returns an array of the matches for the specified production, or `:unmatched`, if none are matched. For Terminals, these are concatenated into a single string.
|
26
|
+
# * `range`: returns a string composed of the character matching the range, or `:unmatched`.
|
27
|
+
# * `seq`: returns an array composed of single-entry hashes for each matched production indexed by the production name, or `:unmatched` if any production fails to match. For Terminals, returns a string created by concatenating these values.
|
28
|
+
# * `star`: returns an array of the matches for the specified production.For Terminals, these are concatenated into a single string.
|
29
|
+
# @param [Scanner] input
|
30
|
+
# @return [Hash{Symbol => Object}, :unmatched] A hash with keys for matched component of the expression. Returns :unmatched if the input does not match the production.
|
31
|
+
def parse(input)
|
32
|
+
# Save position and linenumber for backtracking
|
33
|
+
pos, lineno = input.pos, input.lineno
|
34
|
+
|
35
|
+
parser.packrat[sym] ||= {}
|
36
|
+
if parser.packrat[sym][pos]
|
37
|
+
parser.debug("#{sym}(:memo)", lineno: lineno) { "#{parser.packrat[sym][pos].inspect}(@#{pos})"}
|
38
|
+
input.pos, input.lineno = parser.packrat[sym][pos][:pos], parser.packrat[sym][pos][:lineno]
|
39
|
+
return parser.packrat[sym][pos][:result]
|
40
|
+
end
|
41
|
+
|
42
|
+
if terminal?
|
43
|
+
# If the terminal is defined with a regular expression,
|
44
|
+
# use that to match the input,
|
45
|
+
# otherwise,
|
46
|
+
if regexp = parser.find_terminal_regexp(sym)
|
47
|
+
matched = input.scan(regexp)
|
48
|
+
result = (matched ? parser.onTerminal(sym, matched) : :unmatched)
|
49
|
+
# Update furthest failure for strings and terminals
|
50
|
+
parser.update_furthest_failure(input.pos, input.lineno, sym) if result == :unmatched
|
51
|
+
parser.packrat[sym][pos] = {
|
52
|
+
pos: input.pos,
|
53
|
+
lineno: input.lineno,
|
54
|
+
result: result
|
55
|
+
}
|
56
|
+
return parser.packrat[sym][pos][:result]
|
57
|
+
end
|
58
|
+
else
|
59
|
+
eat_whitespace(input)
|
60
|
+
end
|
61
|
+
parser.onStart(sym)
|
62
|
+
|
63
|
+
result = case expr.first
|
64
|
+
when :alt
|
65
|
+
# Return the first expression to match.
|
66
|
+
# Result is either :unmatched, or the value of the matching rule
|
67
|
+
alt = :unmatched
|
68
|
+
expr[1..-1].each do |prod|
|
69
|
+
alt = case prod
|
70
|
+
when Symbol
|
71
|
+
rule = parser.find_rule(prod)
|
72
|
+
raise "No rule found for #{prod}" unless rule
|
73
|
+
rule.parse(input)
|
74
|
+
when String
|
75
|
+
input.scan(Regexp.new(Regexp.quote(prod))) || :unmatched
|
76
|
+
end
|
77
|
+
if alt == :unmatched
|
78
|
+
# Update furthest failure for strings and terminals
|
79
|
+
parser.update_furthest_failure(input.pos, input.lineno, prod) if prod.is_a?(String) || rule.terminal?
|
80
|
+
else
|
81
|
+
break
|
82
|
+
end
|
83
|
+
end
|
84
|
+
alt
|
85
|
+
when :diff
|
86
|
+
# matches any string that matches A but does not match B.
|
87
|
+
# XXX: Should this work for arbitrary rules?
|
88
|
+
re1, re2 = Regexp.new(translate_codepoints(expr[1])), Regexp.new(translate_codepoints(expr[2]))
|
89
|
+
matched = input.scan(re1)
|
90
|
+
if !matched || re2.match?(matched)
|
91
|
+
# Update furthest failure for terminals
|
92
|
+
parser.update_furthest_failure(input.pos, input.lineno, sym)
|
93
|
+
:unmatched
|
94
|
+
else
|
95
|
+
matched
|
96
|
+
end
|
97
|
+
when :hex
|
98
|
+
# Matches the given hex character if expression matches the character whose number (code point) in ISO/IEC 10646 is N. The number of leading zeros in the #xN form is insignificant.
|
99
|
+
input.scan(to_regexp) || begin
|
100
|
+
# Update furthest failure for terminals
|
101
|
+
parser.update_furthest_failure(input.pos, input.lineno, expr.last)
|
102
|
+
:unmatched
|
103
|
+
end
|
104
|
+
when :opt
|
105
|
+
# Always matches
|
106
|
+
opt = case prod = expr[1]
|
107
|
+
when Symbol
|
108
|
+
rule = parser.find_rule(prod)
|
109
|
+
raise "No rule found for #{prod}" unless rule
|
110
|
+
rule.parse(input)
|
111
|
+
when String
|
112
|
+
input.scan(Regexp.new(Regexp.quote(prod))) || :unmatched
|
113
|
+
end
|
114
|
+
if opt == :unmatched
|
115
|
+
# Update furthest failure for terminals
|
116
|
+
parser.update_furthest_failure(input.pos, input.lineno, prod) if terminal?
|
117
|
+
nil
|
118
|
+
else
|
119
|
+
opt
|
120
|
+
end
|
121
|
+
when :plus
|
122
|
+
# Result is an array of all expressions while they match,
|
123
|
+
# at least one must match
|
124
|
+
prod, plus = expr[1], []
|
125
|
+
case prod
|
126
|
+
when Symbol
|
127
|
+
rule = parser.find_rule(prod)
|
128
|
+
raise "No rule found for #{prod}" unless rule
|
129
|
+
while (res = rule.parse(input)) != :unmatched
|
130
|
+
eat_whitespace(input)
|
131
|
+
plus << res
|
132
|
+
end
|
133
|
+
when String
|
134
|
+
while res = input.scan(Regexp.new(Regexp.quote(prod)))
|
135
|
+
eat_whitespace(input)
|
136
|
+
plus << res
|
137
|
+
end
|
138
|
+
end
|
139
|
+
# Update furthest failure for strings and terminals
|
140
|
+
parser.update_furthest_failure(input.pos, input.lineno, prod)
|
141
|
+
plus.empty? ? :unmatched : (terminal? ? plus.compact.join("") : plus.compact)
|
142
|
+
when :range
|
143
|
+
# Matches the specified character range
|
144
|
+
input.scan(to_regexp) || begin
|
145
|
+
# Update furthest failure for strings and terminals
|
146
|
+
parser.update_furthest_failure(input.pos, input.lineno, expr[1])
|
147
|
+
:unmatched
|
148
|
+
end
|
149
|
+
when :seq
|
150
|
+
# Evaluate each expression into an array of hashes where each hash contains a key from the associated production and the value is the parsed value of that production. Returns :unmatched if the input does not match the production. Value ordering is ensured by native Hash ordering.
|
151
|
+
seq = expr[1..-1].each_with_object([]) do |prod, accumulator|
|
152
|
+
eat_whitespace(input) unless accumulator.empty?
|
153
|
+
res = case prod
|
154
|
+
when Symbol
|
155
|
+
rule = parser.find_rule(prod)
|
156
|
+
raise "No rule found for #{prod}" unless rule
|
157
|
+
rule.parse(input)
|
158
|
+
when String
|
159
|
+
input.scan(Regexp.new(Regexp.quote(prod))) || :unmatched
|
160
|
+
end
|
161
|
+
if res == :unmatched
|
162
|
+
# Update furthest failure for strings and terminals
|
163
|
+
parser.update_furthest_failure(input.pos, input.lineno, prod)
|
164
|
+
break :unmatched
|
165
|
+
end
|
166
|
+
accumulator << {prod.to_sym => res}
|
167
|
+
end
|
168
|
+
seq == :unmatched ?
|
169
|
+
:unmatched :
|
170
|
+
(terminal? ?
|
171
|
+
seq.map(&:values).compact.join("") : # Concat values for terminal production
|
172
|
+
seq)
|
173
|
+
when :star
|
174
|
+
# Result is an array of all expressions while they match,
|
175
|
+
# an empty array of none match
|
176
|
+
prod, star = expr[1], []
|
177
|
+
case prod
|
178
|
+
when Symbol
|
179
|
+
rule = parser.find_rule(prod)
|
180
|
+
raise "No rule found for #{prod}" unless rule
|
181
|
+
while (res = rule.parse(input)) != :unmatched
|
182
|
+
eat_whitespace(input)
|
183
|
+
star << res
|
184
|
+
end
|
185
|
+
when String
|
186
|
+
while res = input.scan(Regexp.new(Regexp.quote(prod)))
|
187
|
+
eat_whitespace(input)
|
188
|
+
star << res
|
189
|
+
end
|
190
|
+
end
|
191
|
+
# Update furthest failure for strings and terminals
|
192
|
+
parser.update_furthest_failure(input.pos, input.lineno, prod)
|
193
|
+
star.compact
|
194
|
+
else
|
195
|
+
raise "attempt to parse unknown rule type: #{expr.first}"
|
196
|
+
end
|
197
|
+
|
198
|
+
if result == :unmatched
|
199
|
+
input.pos, input.lineno = pos, lineno
|
200
|
+
end
|
201
|
+
|
202
|
+
result = parser.onFinish(result)
|
203
|
+
(parser.packrat[sym] ||= {})[pos] = {
|
204
|
+
pos: input.pos,
|
205
|
+
lineno: input.lineno,
|
206
|
+
result: result
|
207
|
+
}
|
208
|
+
return parser.packrat[sym][pos][:result]
|
209
|
+
end
|
210
|
+
|
211
|
+
##
|
212
|
+
# Eat whitespace between non-terminal rules
|
213
|
+
def eat_whitespace(input)
|
214
|
+
if parser.whitespace.is_a?(Regexp)
|
215
|
+
# Eat whitespace before a non-terminal
|
216
|
+
input.skip(parser.whitespace)
|
217
|
+
elsif parser.whitespace.is_a?(Rule)
|
218
|
+
parser.whitespace.parse(input) # throw away result
|
219
|
+
end
|
220
|
+
end
|
221
|
+
end
|
222
|
+
end
|
data/lib/ebnf/rule.rb
CHANGED
@@ -1,7 +1,9 @@
|
|
1
|
+
require 'scanf'
|
2
|
+
|
1
3
|
module EBNF
|
2
4
|
# Represent individual parsed rules
|
3
5
|
class Rule
|
4
|
-
# Operations which are flattened to seprate rules in to_bnf
|
6
|
+
# Operations which are flattened to seprate rules in to_bnf.
|
5
7
|
BNF_OPS = %w{
|
6
8
|
alt opt plus seq star
|
7
9
|
}.map(&:to_sym).freeze
|
@@ -57,16 +59,16 @@ module EBNF
|
|
57
59
|
# Determines preparation and cleanup rules for reconstituting EBNF ? * + from BNF
|
58
60
|
attr_accessor :cleanup
|
59
61
|
|
60
|
-
# @param [Integer] id
|
61
62
|
# @param [Symbol] sym
|
63
|
+
# @param [Integer] id
|
62
64
|
# @param [Array] expr
|
63
|
-
# @param [Symbol]
|
64
|
-
# @param [String]
|
65
|
-
# @param [Array]
|
66
|
-
# @param [Array]
|
67
|
-
# @param [Boolean]
|
68
|
-
# @param [Rule]
|
69
|
-
# @param [Boolean]
|
65
|
+
# @param [Symbol] kind (nil)
|
66
|
+
# @param [String] ebnf (nil)
|
67
|
+
# @param [Array] first (nil)
|
68
|
+
# @param [Array] follow (nil)
|
69
|
+
# @param [Boolean] start (nil)
|
70
|
+
# @param [Rule] top_rule (nil)
|
71
|
+
# @param [Boolean] cleanup (nil)
|
70
72
|
def initialize(sym, id, expr, kind: nil, ebnf: nil, first: nil, follow: nil, start: nil, top_rule: nil, cleanup: nil)
|
71
73
|
@sym, @id = sym, id
|
72
74
|
@expr = expr.is_a?(Array) ? expr : [:seq, expr]
|
@@ -87,7 +89,7 @@ module EBNF
|
|
87
89
|
# (rule ebnf "1" (star (alt declaration rule)))
|
88
90
|
# (terminal O_ENUM "17" (seq "[^" (plus CHAR) "]"))
|
89
91
|
#
|
90
|
-
# Also may have (first ...)
|
92
|
+
# Also may have `(first ...)`, `(follow ...)`, or `(start #t)`.
|
91
93
|
#
|
92
94
|
# @param [Array] sxp
|
93
95
|
# @return [Rule]
|
@@ -102,26 +104,28 @@ module EBNF
|
|
102
104
|
start = sxp.any? {|e| e.is_a?(Array) && e.first.to_sym == :start}
|
103
105
|
sym = sxp[1] if sxp[1].is_a?(Symbol)
|
104
106
|
id = sxp[2] if sxp[2].is_a?(String)
|
105
|
-
|
107
|
+
self.new(sym, id, expr, kind: sxp.first, first: first, follow: follow, cleanup: cleanup, start: start)
|
106
108
|
end
|
107
109
|
|
108
110
|
# Build a new rule creating a symbol and numbering from the current rule
|
109
|
-
# Symbol and number creation is handled by the top-most rule in such a chain
|
111
|
+
# Symbol and number creation is handled by the top-most rule in such a chain.
|
110
112
|
#
|
111
113
|
# @param [Array] expr
|
114
|
+
# @param [Symbol] kind (nil)
|
115
|
+
# @param [Hash{Symbol => Symbol}] cleanup (nil)
|
112
116
|
# @param [Hash{Symbol => Object}] options
|
113
|
-
# @param [Symbol] :kind
|
114
117
|
def build(expr, kind: nil, cleanup: nil, **options)
|
115
118
|
new_sym, new_id = (@top_rule ||self).send(:make_sym_id)
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
119
|
+
self.class.new(new_sym, new_id, expr,
|
120
|
+
kind: kind,
|
121
|
+
ebnf: @ebnf,
|
122
|
+
top_rule: (@top_rule || self),
|
123
|
+
cleanup: cleanup,
|
124
|
+
**options)
|
122
125
|
end
|
123
126
|
|
124
|
-
# Return representation for building S-Expressions
|
127
|
+
# Return representation for building S-Expressions.
|
128
|
+
#
|
125
129
|
# @return [Array]
|
126
130
|
def for_sxp
|
127
131
|
elements = [kind, sym]
|
@@ -143,7 +147,8 @@ module EBNF
|
|
143
147
|
|
144
148
|
alias_method :to_s, :to_sxp
|
145
149
|
|
146
|
-
# Serializes this rule to an Turtle
|
150
|
+
# Serializes this rule to an Turtle.
|
151
|
+
#
|
147
152
|
# @return [String]
|
148
153
|
def to_ttl
|
149
154
|
@ebnf.debug("to_ttl") {inspect} if @ebnf
|
@@ -161,17 +166,24 @@ module EBNF
|
|
161
166
|
"\n" + statements.join("\n")
|
162
167
|
end
|
163
168
|
|
169
|
+
# Return a Ruby representation of this rule
|
170
|
+
# @return [String]
|
171
|
+
def to_ruby
|
172
|
+
"EBNF::Rule.new(#{sym.inspect}, #{id.inspect}, #{expr.inspect}#{', kind: ' + kind.inspect unless kind == :rule})"
|
173
|
+
end
|
174
|
+
|
164
175
|
##
|
165
176
|
# Transform EBNF rule to BNF rules:
|
166
177
|
#
|
167
|
-
# * Transform (a
|
168
|
-
# (a
|
169
|
-
# (_a_1
|
170
|
-
# * Transform (a
|
171
|
-
# * Transform (a
|
172
|
-
# * Transform (a
|
178
|
+
# * Transform (rule a "n" (op1 (op2))) into two rules:
|
179
|
+
# (rule a "n" (op1 _a_1))
|
180
|
+
# (rule _a_1 "n.1" (op2))
|
181
|
+
# * Transform (rule a (opt b)) into (rule a (alt _empty b))
|
182
|
+
# * Transform (rule a (star b)) into (rule a (alt _empty (seq b a)))
|
183
|
+
# * Transform (rule a (plus b)) into (rule a (seq b (star b)
|
184
|
+
#
|
185
|
+
# Transformation includes information used to re-construct non-transformed.
|
173
186
|
#
|
174
|
-
# Transformation includes information used to re-construct non-transformed
|
175
187
|
# AST representation
|
176
188
|
# @return [Array<Rule>]
|
177
189
|
def to_bnf
|
@@ -198,19 +210,19 @@ module EBNF
|
|
198
210
|
new_rules = new_rules.map {|r| r.to_bnf}.flatten
|
199
211
|
elsif expr.first == :opt
|
200
212
|
this = dup
|
201
|
-
# * Transform (a
|
213
|
+
# * Transform (rule a (opt b)) into (rule a (alt _empty b))
|
202
214
|
this.expr = [:alt, :_empty, expr.last]
|
203
215
|
this.cleanup = :opt
|
204
216
|
new_rules = this.to_bnf
|
205
217
|
elsif expr.first == :star
|
206
|
-
# * Transform (a
|
218
|
+
# * Transform (rule a (star b)) into (rule a (alt _empty (seq b a)))
|
207
219
|
this = dup
|
208
220
|
this.cleanup = :star
|
209
221
|
new_rule = this.build([:seq, expr.last, this.sym], cleanup: :merge)
|
210
222
|
this.expr = [:alt, :_empty, new_rule.sym]
|
211
223
|
new_rules = [this] + new_rule.to_bnf
|
212
224
|
elsif expr.first == :plus
|
213
|
-
# * Transform (a
|
225
|
+
# * Transform (rule a (plus b)) into (rule a (seq b (star b)
|
214
226
|
this = dup
|
215
227
|
this.cleanup = :plus
|
216
228
|
this.expr = [:seq, expr.last, [:star, expr.last]]
|
@@ -230,8 +242,61 @@ module EBNF
|
|
230
242
|
return new_rules
|
231
243
|
end
|
232
244
|
|
245
|
+
##
|
246
|
+
# Transform EBNF rule for PEG:
|
247
|
+
#
|
248
|
+
# * Transform (rule a "n" (op1 ... (op2 y) ...z)) into two rules:
|
249
|
+
# (rule a "n" (op1 ... _a_1 ... z))
|
250
|
+
# (rule _a_1 "n.1" (op2 y))
|
251
|
+
#
|
252
|
+
# @return [Array<Rule>]
|
253
|
+
def to_peg
|
254
|
+
new_rules = []
|
255
|
+
|
256
|
+
# Look for rules containing sub-sequences
|
257
|
+
if expr.any? {|e| e.is_a?(Array) && e.first.is_a?(Symbol)}
|
258
|
+
# duplicate ourselves for rewriting
|
259
|
+
this = dup
|
260
|
+
new_rules << this
|
261
|
+
|
262
|
+
expr.each_with_index do |e, index|
|
263
|
+
next unless e.is_a?(Array) && e.first.is_a?(Symbol)
|
264
|
+
new_rule = build(e)
|
265
|
+
this.expr[index] = new_rule.sym
|
266
|
+
new_rules << new_rule
|
267
|
+
end
|
268
|
+
|
269
|
+
# Return new rules after recursively applying #to_bnf
|
270
|
+
new_rules = new_rules.map {|r| r.to_peg}.flatten
|
271
|
+
elsif [:diff, :hex, :range].include?(expr.first)
|
272
|
+
# This rules are fine, the just need to be terminals
|
273
|
+
raise "Encountered #{expr.first.inspect}, which is a #{self.kind}, not :terminal" unless self.terminal?
|
274
|
+
new_rules << self
|
275
|
+
else
|
276
|
+
new_rules << self
|
277
|
+
end
|
278
|
+
|
279
|
+
return new_rules.map {|r| r.extend(EBNF::PEG::Rule)}
|
280
|
+
end
|
281
|
+
|
282
|
+
##
|
283
|
+
# For :hex or :range, create a regular expression.
|
284
|
+
#
|
285
|
+
# @return [Regexp]
|
286
|
+
def to_regexp
|
287
|
+
case expr.first
|
288
|
+
when :hex
|
289
|
+
Regexp.new(translate_codepoints(expr[1]))
|
290
|
+
when :range
|
291
|
+
Regexp.new("[#{translate_codepoints(expr[1])}]")
|
292
|
+
else
|
293
|
+
raise "Can't turn #{expr.inspect} into a regexp"
|
294
|
+
end
|
295
|
+
end
|
296
|
+
|
233
297
|
# Return the non-terminals for this rule. For seq, this is the first
|
234
|
-
# non-
|
298
|
+
# non-terminal in the sequence. For alt, this is every non-terminal in the alt.
|
299
|
+
#
|
235
300
|
# @param [Array<Rule>] ast
|
236
301
|
# The set of rules, used to turn symbols into rules
|
237
302
|
# @return [Array<Rule>]
|
@@ -248,7 +313,8 @@ module EBNF
|
|
248
313
|
end
|
249
314
|
|
250
315
|
# Return the terminals for this rule. For seq, this is the first
|
251
|
-
# terminals or strings in the seq. For alt, this is every non-terminal ni the alt
|
316
|
+
# terminals or strings in the seq. For alt, this is every non-terminal ni the alt.
|
317
|
+
#
|
252
318
|
# @param [Array<Rule>] ast
|
253
319
|
# The set of rules, used to turn symbols into rules
|
254
320
|
# @return [Array<Rule>]
|
@@ -267,8 +333,9 @@ module EBNF
|
|
267
333
|
end
|
268
334
|
|
269
335
|
# Does this rule start with a sym? It does if expr is that sym,
|
270
|
-
# expr starts with alt and contains that sym,
|
271
|
-
# expr starts with seq and the next element is that sym
|
336
|
+
# expr starts with alt and contains that sym,
|
337
|
+
# or expr starts with seq and the next element is that sym.
|
338
|
+
#
|
272
339
|
# @param [Symbol, class] sym
|
273
340
|
# Symbol matching any start element, or if it is String, any start element which is a String
|
274
341
|
# @return [Array<Symbol, String>] list of symbol (singular), or strings which are start symbol, or nil if there are none
|
@@ -283,12 +350,14 @@ module EBNF
|
|
283
350
|
end
|
284
351
|
|
285
352
|
# Do the firsts of this rule include the empty string?
|
353
|
+
#
|
286
354
|
# @return [Boolean]
|
287
355
|
def first_includes_eps?
|
288
356
|
@first && @first.include?(:_eps)
|
289
357
|
end
|
290
358
|
|
291
|
-
# Add terminal as proceding this rule
|
359
|
+
# Add terminal as proceding this rule.
|
360
|
+
#
|
292
361
|
# @param [Array<Rule, Symbol, String>] terminals
|
293
362
|
# @return [Integer] if number of terminals added
|
294
363
|
def add_first(terminals)
|
@@ -313,6 +382,7 @@ module EBNF
|
|
313
382
|
end
|
314
383
|
|
315
384
|
# Is this a terminal?
|
385
|
+
#
|
316
386
|
# @return [Boolean]
|
317
387
|
def terminal?
|
318
388
|
kind == :terminal
|
@@ -351,7 +421,8 @@ module EBNF
|
|
351
421
|
">"
|
352
422
|
end
|
353
423
|
|
354
|
-
# Two rules are equal if they have the same {#sym}, {#kind} and {#expr}
|
424
|
+
# Two rules are equal if they have the same {#sym}, {#kind} and {#expr}.
|
425
|
+
#
|
355
426
|
# @param [Rule] other
|
356
427
|
# @return [Boolean]
|
357
428
|
def ==(other)
|
@@ -360,26 +431,12 @@ module EBNF
|
|
360
431
|
expr == other.expr
|
361
432
|
end
|
362
433
|
|
363
|
-
# Two rules are equivalent if they have the same {#expr}
|
434
|
+
# Two rules are equivalent if they have the same {#expr}.
|
435
|
+
#
|
364
436
|
# @param [Rule] other
|
365
437
|
# @return [Boolean]
|
366
438
|
def equivalent?(other)
|
367
|
-
expr
|
368
|
-
end
|
369
|
-
|
370
|
-
# Rewrite the rule substituting src_rule for dst_rule wherever
|
371
|
-
# it is used in the production (first level only).
|
372
|
-
# @param [Rule] src_rule
|
373
|
-
# @param [Rule] dst_rule
|
374
|
-
# @return [Rule]
|
375
|
-
def rewrite(src_rule, dst_rule)
|
376
|
-
case @expr
|
377
|
-
when Array
|
378
|
-
@expr = @expr.map {|e| e == src_rule.sym ? dst_rule.sym : e}
|
379
|
-
else
|
380
|
-
@expr = dst_rule.sym if @expr == src_rule.sym
|
381
|
-
end
|
382
|
-
self
|
439
|
+
expr == other.expr
|
383
440
|
end
|
384
441
|
|
385
442
|
# Rules compare using their ids
|
@@ -391,6 +448,12 @@ module EBNF
|
|
391
448
|
end
|
392
449
|
end
|
393
450
|
|
451
|
+
##
|
452
|
+
# Utility function to translate code points of the form '#xN' into ruby unicode characters
|
453
|
+
def translate_codepoints(str)
|
454
|
+
str.gsub(/#x\h+/) {|c| c[2..-1].scanf("%x").first.chr(Encoding::UTF_8)}
|
455
|
+
end
|
456
|
+
|
394
457
|
private
|
395
458
|
def ttl_expr(expr, pfx, depth, is_obj = true)
|
396
459
|
indent = ' ' * depth
|
@@ -413,7 +476,7 @@ module EBNF
|
|
413
476
|
statements << %{#{indent}#{bra}#{pfx}:#{op} }
|
414
477
|
statements += ttl_expr(expr.first, pfx, depth + 1)
|
415
478
|
statements << %{#{indent} #{ket}} unless ket.empty?
|
416
|
-
when :_empty, :_eps
|
479
|
+
when :_empty, :_eps
|
417
480
|
statements << %{#{indent}"g:#{op.to_s[1..-1]}"}
|
418
481
|
when :"'"
|
419
482
|
statements << %{#{indent}"#{esc(expr)}"}
|