ebnf 1.2.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +160 -185
- data/UNLICENSE +1 -1
- data/VERSION +1 -1
- data/bin/ebnf +6 -3
- data/etc/doap.ttl +13 -12
- data/etc/ebnf.ebnf +13 -19
- data/etc/ebnf.html +205 -239
- data/etc/{ebnf.rb → ebnf.ll1.rb} +3 -4
- data/etc/ebnf.ll1.sxp +179 -183
- data/etc/ebnf.peg.rb +98 -0
- data/etc/ebnf.peg.sxp +93 -0
- data/etc/ebnf.sxp +37 -41
- data/etc/sparql.html +1603 -1751
- data/etc/sparql.ll1.sxp +7372 -7372
- data/etc/sparql.peg.rb +532 -0
- data/etc/sparql.peg.sxp +597 -0
- data/etc/sparql.sxp +362 -362
- data/etc/turtle.html +465 -517
- data/etc/{turtle.rb → turtle.ll1.rb} +3 -4
- data/etc/turtle.ll1.sxp +425 -425
- data/etc/turtle.peg.rb +182 -0
- data/etc/turtle.peg.sxp +199 -0
- data/etc/turtle.sxp +101 -101
- data/lib/ebnf.rb +3 -1
- data/lib/ebnf/base.rb +30 -29
- data/lib/ebnf/bnf.rb +1 -26
- data/lib/ebnf/ll1.rb +132 -1
- data/lib/ebnf/ll1/lexer.rb +20 -22
- data/lib/ebnf/ll1/parser.rb +86 -61
- data/lib/ebnf/ll1/scanner.rb +83 -50
- data/lib/ebnf/peg.rb +39 -0
- data/lib/ebnf/peg/parser.rb +535 -0
- data/lib/ebnf/peg/rule.rb +222 -0
- data/lib/ebnf/rule.rb +118 -55
- data/lib/ebnf/terminals.rb +18 -0
- data/lib/ebnf/writer.rb +3 -2
- metadata +29 -6
- data/etc/sparql.rb +0 -45773
@@ -0,0 +1,222 @@
|
|
1
|
+
module EBNF::PEG
|
2
|
+
# Behaviior for parsing a PEG rule
|
3
|
+
module Rule
|
4
|
+
##
|
5
|
+
# Initialized by parser when loading rules.
|
6
|
+
# Used for finding rules and invoking elements of the parse process.
|
7
|
+
#
|
8
|
+
# @return [EBNF::PEG::Parser] parser
|
9
|
+
attr_accessor :parser
|
10
|
+
|
11
|
+
##
|
12
|
+
# Parse a rule or terminal, invoking callbacks, as appropriate
|
13
|
+
|
14
|
+
# If there is are `start_production` and/or `production`,
|
15
|
+
# they are invoked with a `prod_data` stack, the input stream and offset.
|
16
|
+
# Otherwise, the results are added as an array value
|
17
|
+
# to a hash indexed by the rule name.
|
18
|
+
#
|
19
|
+
# If matched, the input position is updated and the results returned in a Hash.
|
20
|
+
#
|
21
|
+
# * `alt`: returns the value of the matched production or `:unmatched`
|
22
|
+
# * `diff`: returns the string value matched, or `:unmatched`
|
23
|
+
# * `hex`: returns a string composed of the matched hex character, or `:unmatched`.
|
24
|
+
# * `opt`: returns the matched production, or `nil` if unmatched.
|
25
|
+
# * `plus`: returns an array of the matches for the specified production, or `:unmatched`, if none are matched. For Terminals, these are concatenated into a single string.
|
26
|
+
# * `range`: returns a string composed of the character matching the range, or `:unmatched`.
|
27
|
+
# * `seq`: returns an array composed of single-entry hashes for each matched production indexed by the production name, or `:unmatched` if any production fails to match. For Terminals, returns a string created by concatenating these values.
|
28
|
+
# * `star`: returns an array of the matches for the specified production.For Terminals, these are concatenated into a single string.
|
29
|
+
# @param [Scanner] input
|
30
|
+
# @return [Hash{Symbol => Object}, :unmatched] A hash with keys for matched component of the expression. Returns :unmatched if the input does not match the production.
|
31
|
+
def parse(input)
|
32
|
+
# Save position and linenumber for backtracking
|
33
|
+
pos, lineno = input.pos, input.lineno
|
34
|
+
|
35
|
+
parser.packrat[sym] ||= {}
|
36
|
+
if parser.packrat[sym][pos]
|
37
|
+
parser.debug("#{sym}(:memo)", lineno: lineno) { "#{parser.packrat[sym][pos].inspect}(@#{pos})"}
|
38
|
+
input.pos, input.lineno = parser.packrat[sym][pos][:pos], parser.packrat[sym][pos][:lineno]
|
39
|
+
return parser.packrat[sym][pos][:result]
|
40
|
+
end
|
41
|
+
|
42
|
+
if terminal?
|
43
|
+
# If the terminal is defined with a regular expression,
|
44
|
+
# use that to match the input,
|
45
|
+
# otherwise,
|
46
|
+
if regexp = parser.find_terminal_regexp(sym)
|
47
|
+
matched = input.scan(regexp)
|
48
|
+
result = (matched ? parser.onTerminal(sym, matched) : :unmatched)
|
49
|
+
# Update furthest failure for strings and terminals
|
50
|
+
parser.update_furthest_failure(input.pos, input.lineno, sym) if result == :unmatched
|
51
|
+
parser.packrat[sym][pos] = {
|
52
|
+
pos: input.pos,
|
53
|
+
lineno: input.lineno,
|
54
|
+
result: result
|
55
|
+
}
|
56
|
+
return parser.packrat[sym][pos][:result]
|
57
|
+
end
|
58
|
+
else
|
59
|
+
eat_whitespace(input)
|
60
|
+
end
|
61
|
+
parser.onStart(sym)
|
62
|
+
|
63
|
+
result = case expr.first
|
64
|
+
when :alt
|
65
|
+
# Return the first expression to match.
|
66
|
+
# Result is either :unmatched, or the value of the matching rule
|
67
|
+
alt = :unmatched
|
68
|
+
expr[1..-1].each do |prod|
|
69
|
+
alt = case prod
|
70
|
+
when Symbol
|
71
|
+
rule = parser.find_rule(prod)
|
72
|
+
raise "No rule found for #{prod}" unless rule
|
73
|
+
rule.parse(input)
|
74
|
+
when String
|
75
|
+
input.scan(Regexp.new(Regexp.quote(prod))) || :unmatched
|
76
|
+
end
|
77
|
+
if alt == :unmatched
|
78
|
+
# Update furthest failure for strings and terminals
|
79
|
+
parser.update_furthest_failure(input.pos, input.lineno, prod) if prod.is_a?(String) || rule.terminal?
|
80
|
+
else
|
81
|
+
break
|
82
|
+
end
|
83
|
+
end
|
84
|
+
alt
|
85
|
+
when :diff
|
86
|
+
# matches any string that matches A but does not match B.
|
87
|
+
# XXX: Should this work for arbitrary rules?
|
88
|
+
re1, re2 = Regexp.new(translate_codepoints(expr[1])), Regexp.new(translate_codepoints(expr[2]))
|
89
|
+
matched = input.scan(re1)
|
90
|
+
if !matched || re2.match?(matched)
|
91
|
+
# Update furthest failure for terminals
|
92
|
+
parser.update_furthest_failure(input.pos, input.lineno, sym)
|
93
|
+
:unmatched
|
94
|
+
else
|
95
|
+
matched
|
96
|
+
end
|
97
|
+
when :hex
|
98
|
+
# Matches the given hex character if expression matches the character whose number (code point) in ISO/IEC 10646 is N. The number of leading zeros in the #xN form is insignificant.
|
99
|
+
input.scan(to_regexp) || begin
|
100
|
+
# Update furthest failure for terminals
|
101
|
+
parser.update_furthest_failure(input.pos, input.lineno, expr.last)
|
102
|
+
:unmatched
|
103
|
+
end
|
104
|
+
when :opt
|
105
|
+
# Always matches
|
106
|
+
opt = case prod = expr[1]
|
107
|
+
when Symbol
|
108
|
+
rule = parser.find_rule(prod)
|
109
|
+
raise "No rule found for #{prod}" unless rule
|
110
|
+
rule.parse(input)
|
111
|
+
when String
|
112
|
+
input.scan(Regexp.new(Regexp.quote(prod))) || :unmatched
|
113
|
+
end
|
114
|
+
if opt == :unmatched
|
115
|
+
# Update furthest failure for terminals
|
116
|
+
parser.update_furthest_failure(input.pos, input.lineno, prod) if terminal?
|
117
|
+
nil
|
118
|
+
else
|
119
|
+
opt
|
120
|
+
end
|
121
|
+
when :plus
|
122
|
+
# Result is an array of all expressions while they match,
|
123
|
+
# at least one must match
|
124
|
+
prod, plus = expr[1], []
|
125
|
+
case prod
|
126
|
+
when Symbol
|
127
|
+
rule = parser.find_rule(prod)
|
128
|
+
raise "No rule found for #{prod}" unless rule
|
129
|
+
while (res = rule.parse(input)) != :unmatched
|
130
|
+
eat_whitespace(input)
|
131
|
+
plus << res
|
132
|
+
end
|
133
|
+
when String
|
134
|
+
while res = input.scan(Regexp.new(Regexp.quote(prod)))
|
135
|
+
eat_whitespace(input)
|
136
|
+
plus << res
|
137
|
+
end
|
138
|
+
end
|
139
|
+
# Update furthest failure for strings and terminals
|
140
|
+
parser.update_furthest_failure(input.pos, input.lineno, prod)
|
141
|
+
plus.empty? ? :unmatched : (terminal? ? plus.compact.join("") : plus.compact)
|
142
|
+
when :range
|
143
|
+
# Matches the specified character range
|
144
|
+
input.scan(to_regexp) || begin
|
145
|
+
# Update furthest failure for strings and terminals
|
146
|
+
parser.update_furthest_failure(input.pos, input.lineno, expr[1])
|
147
|
+
:unmatched
|
148
|
+
end
|
149
|
+
when :seq
|
150
|
+
# Evaluate each expression into an array of hashes where each hash contains a key from the associated production and the value is the parsed value of that production. Returns :unmatched if the input does not match the production. Value ordering is ensured by native Hash ordering.
|
151
|
+
seq = expr[1..-1].each_with_object([]) do |prod, accumulator|
|
152
|
+
eat_whitespace(input) unless accumulator.empty?
|
153
|
+
res = case prod
|
154
|
+
when Symbol
|
155
|
+
rule = parser.find_rule(prod)
|
156
|
+
raise "No rule found for #{prod}" unless rule
|
157
|
+
rule.parse(input)
|
158
|
+
when String
|
159
|
+
input.scan(Regexp.new(Regexp.quote(prod))) || :unmatched
|
160
|
+
end
|
161
|
+
if res == :unmatched
|
162
|
+
# Update furthest failure for strings and terminals
|
163
|
+
parser.update_furthest_failure(input.pos, input.lineno, prod)
|
164
|
+
break :unmatched
|
165
|
+
end
|
166
|
+
accumulator << {prod.to_sym => res}
|
167
|
+
end
|
168
|
+
seq == :unmatched ?
|
169
|
+
:unmatched :
|
170
|
+
(terminal? ?
|
171
|
+
seq.map(&:values).compact.join("") : # Concat values for terminal production
|
172
|
+
seq)
|
173
|
+
when :star
|
174
|
+
# Result is an array of all expressions while they match,
|
175
|
+
# an empty array of none match
|
176
|
+
prod, star = expr[1], []
|
177
|
+
case prod
|
178
|
+
when Symbol
|
179
|
+
rule = parser.find_rule(prod)
|
180
|
+
raise "No rule found for #{prod}" unless rule
|
181
|
+
while (res = rule.parse(input)) != :unmatched
|
182
|
+
eat_whitespace(input)
|
183
|
+
star << res
|
184
|
+
end
|
185
|
+
when String
|
186
|
+
while res = input.scan(Regexp.new(Regexp.quote(prod)))
|
187
|
+
eat_whitespace(input)
|
188
|
+
star << res
|
189
|
+
end
|
190
|
+
end
|
191
|
+
# Update furthest failure for strings and terminals
|
192
|
+
parser.update_furthest_failure(input.pos, input.lineno, prod)
|
193
|
+
star.compact
|
194
|
+
else
|
195
|
+
raise "attempt to parse unknown rule type: #{expr.first}"
|
196
|
+
end
|
197
|
+
|
198
|
+
if result == :unmatched
|
199
|
+
input.pos, input.lineno = pos, lineno
|
200
|
+
end
|
201
|
+
|
202
|
+
result = parser.onFinish(result)
|
203
|
+
(parser.packrat[sym] ||= {})[pos] = {
|
204
|
+
pos: input.pos,
|
205
|
+
lineno: input.lineno,
|
206
|
+
result: result
|
207
|
+
}
|
208
|
+
return parser.packrat[sym][pos][:result]
|
209
|
+
end
|
210
|
+
|
211
|
+
##
|
212
|
+
# Eat whitespace between non-terminal rules
|
213
|
+
def eat_whitespace(input)
|
214
|
+
if parser.whitespace.is_a?(Regexp)
|
215
|
+
# Eat whitespace before a non-terminal
|
216
|
+
input.skip(parser.whitespace)
|
217
|
+
elsif parser.whitespace.is_a?(Rule)
|
218
|
+
parser.whitespace.parse(input) # throw away result
|
219
|
+
end
|
220
|
+
end
|
221
|
+
end
|
222
|
+
end
|
data/lib/ebnf/rule.rb
CHANGED
@@ -1,7 +1,9 @@
|
|
1
|
+
require 'scanf'
|
2
|
+
|
1
3
|
module EBNF
|
2
4
|
# Represent individual parsed rules
|
3
5
|
class Rule
|
4
|
-
# Operations which are flattened to seprate rules in to_bnf
|
6
|
+
# Operations which are flattened to seprate rules in to_bnf.
|
5
7
|
BNF_OPS = %w{
|
6
8
|
alt opt plus seq star
|
7
9
|
}.map(&:to_sym).freeze
|
@@ -57,16 +59,16 @@ module EBNF
|
|
57
59
|
# Determines preparation and cleanup rules for reconstituting EBNF ? * + from BNF
|
58
60
|
attr_accessor :cleanup
|
59
61
|
|
60
|
-
# @param [Integer] id
|
61
62
|
# @param [Symbol] sym
|
63
|
+
# @param [Integer] id
|
62
64
|
# @param [Array] expr
|
63
|
-
# @param [Symbol]
|
64
|
-
# @param [String]
|
65
|
-
# @param [Array]
|
66
|
-
# @param [Array]
|
67
|
-
# @param [Boolean]
|
68
|
-
# @param [Rule]
|
69
|
-
# @param [Boolean]
|
65
|
+
# @param [Symbol] kind (nil)
|
66
|
+
# @param [String] ebnf (nil)
|
67
|
+
# @param [Array] first (nil)
|
68
|
+
# @param [Array] follow (nil)
|
69
|
+
# @param [Boolean] start (nil)
|
70
|
+
# @param [Rule] top_rule (nil)
|
71
|
+
# @param [Boolean] cleanup (nil)
|
70
72
|
def initialize(sym, id, expr, kind: nil, ebnf: nil, first: nil, follow: nil, start: nil, top_rule: nil, cleanup: nil)
|
71
73
|
@sym, @id = sym, id
|
72
74
|
@expr = expr.is_a?(Array) ? expr : [:seq, expr]
|
@@ -87,7 +89,7 @@ module EBNF
|
|
87
89
|
# (rule ebnf "1" (star (alt declaration rule)))
|
88
90
|
# (terminal O_ENUM "17" (seq "[^" (plus CHAR) "]"))
|
89
91
|
#
|
90
|
-
# Also may have (first ...)
|
92
|
+
# Also may have `(first ...)`, `(follow ...)`, or `(start #t)`.
|
91
93
|
#
|
92
94
|
# @param [Array] sxp
|
93
95
|
# @return [Rule]
|
@@ -102,26 +104,28 @@ module EBNF
|
|
102
104
|
start = sxp.any? {|e| e.is_a?(Array) && e.first.to_sym == :start}
|
103
105
|
sym = sxp[1] if sxp[1].is_a?(Symbol)
|
104
106
|
id = sxp[2] if sxp[2].is_a?(String)
|
105
|
-
|
107
|
+
self.new(sym, id, expr, kind: sxp.first, first: first, follow: follow, cleanup: cleanup, start: start)
|
106
108
|
end
|
107
109
|
|
108
110
|
# Build a new rule creating a symbol and numbering from the current rule
|
109
|
-
# Symbol and number creation is handled by the top-most rule in such a chain
|
111
|
+
# Symbol and number creation is handled by the top-most rule in such a chain.
|
110
112
|
#
|
111
113
|
# @param [Array] expr
|
114
|
+
# @param [Symbol] kind (nil)
|
115
|
+
# @param [Hash{Symbol => Symbol}] cleanup (nil)
|
112
116
|
# @param [Hash{Symbol => Object}] options
|
113
|
-
# @param [Symbol] :kind
|
114
117
|
def build(expr, kind: nil, cleanup: nil, **options)
|
115
118
|
new_sym, new_id = (@top_rule ||self).send(:make_sym_id)
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
119
|
+
self.class.new(new_sym, new_id, expr,
|
120
|
+
kind: kind,
|
121
|
+
ebnf: @ebnf,
|
122
|
+
top_rule: (@top_rule || self),
|
123
|
+
cleanup: cleanup,
|
124
|
+
**options)
|
122
125
|
end
|
123
126
|
|
124
|
-
# Return representation for building S-Expressions
|
127
|
+
# Return representation for building S-Expressions.
|
128
|
+
#
|
125
129
|
# @return [Array]
|
126
130
|
def for_sxp
|
127
131
|
elements = [kind, sym]
|
@@ -143,7 +147,8 @@ module EBNF
|
|
143
147
|
|
144
148
|
alias_method :to_s, :to_sxp
|
145
149
|
|
146
|
-
# Serializes this rule to an Turtle
|
150
|
+
# Serializes this rule to an Turtle.
|
151
|
+
#
|
147
152
|
# @return [String]
|
148
153
|
def to_ttl
|
149
154
|
@ebnf.debug("to_ttl") {inspect} if @ebnf
|
@@ -161,17 +166,24 @@ module EBNF
|
|
161
166
|
"\n" + statements.join("\n")
|
162
167
|
end
|
163
168
|
|
169
|
+
# Return a Ruby representation of this rule
|
170
|
+
# @return [String]
|
171
|
+
def to_ruby
|
172
|
+
"EBNF::Rule.new(#{sym.inspect}, #{id.inspect}, #{expr.inspect}#{', kind: ' + kind.inspect unless kind == :rule})"
|
173
|
+
end
|
174
|
+
|
164
175
|
##
|
165
176
|
# Transform EBNF rule to BNF rules:
|
166
177
|
#
|
167
|
-
# * Transform (a
|
168
|
-
# (a
|
169
|
-
# (_a_1
|
170
|
-
# * Transform (a
|
171
|
-
# * Transform (a
|
172
|
-
# * Transform (a
|
178
|
+
# * Transform (rule a "n" (op1 (op2))) into two rules:
|
179
|
+
# (rule a "n" (op1 _a_1))
|
180
|
+
# (rule _a_1 "n.1" (op2))
|
181
|
+
# * Transform (rule a (opt b)) into (rule a (alt _empty b))
|
182
|
+
# * Transform (rule a (star b)) into (rule a (alt _empty (seq b a)))
|
183
|
+
# * Transform (rule a (plus b)) into (rule a (seq b (star b)
|
184
|
+
#
|
185
|
+
# Transformation includes information used to re-construct non-transformed.
|
173
186
|
#
|
174
|
-
# Transformation includes information used to re-construct non-transformed
|
175
187
|
# AST representation
|
176
188
|
# @return [Array<Rule>]
|
177
189
|
def to_bnf
|
@@ -198,19 +210,19 @@ module EBNF
|
|
198
210
|
new_rules = new_rules.map {|r| r.to_bnf}.flatten
|
199
211
|
elsif expr.first == :opt
|
200
212
|
this = dup
|
201
|
-
# * Transform (a
|
213
|
+
# * Transform (rule a (opt b)) into (rule a (alt _empty b))
|
202
214
|
this.expr = [:alt, :_empty, expr.last]
|
203
215
|
this.cleanup = :opt
|
204
216
|
new_rules = this.to_bnf
|
205
217
|
elsif expr.first == :star
|
206
|
-
# * Transform (a
|
218
|
+
# * Transform (rule a (star b)) into (rule a (alt _empty (seq b a)))
|
207
219
|
this = dup
|
208
220
|
this.cleanup = :star
|
209
221
|
new_rule = this.build([:seq, expr.last, this.sym], cleanup: :merge)
|
210
222
|
this.expr = [:alt, :_empty, new_rule.sym]
|
211
223
|
new_rules = [this] + new_rule.to_bnf
|
212
224
|
elsif expr.first == :plus
|
213
|
-
# * Transform (a
|
225
|
+
# * Transform (rule a (plus b)) into (rule a (seq b (star b)
|
214
226
|
this = dup
|
215
227
|
this.cleanup = :plus
|
216
228
|
this.expr = [:seq, expr.last, [:star, expr.last]]
|
@@ -230,8 +242,61 @@ module EBNF
|
|
230
242
|
return new_rules
|
231
243
|
end
|
232
244
|
|
245
|
+
##
|
246
|
+
# Transform EBNF rule for PEG:
|
247
|
+
#
|
248
|
+
# * Transform (rule a "n" (op1 ... (op2 y) ...z)) into two rules:
|
249
|
+
# (rule a "n" (op1 ... _a_1 ... z))
|
250
|
+
# (rule _a_1 "n.1" (op2 y))
|
251
|
+
#
|
252
|
+
# @return [Array<Rule>]
|
253
|
+
def to_peg
|
254
|
+
new_rules = []
|
255
|
+
|
256
|
+
# Look for rules containing sub-sequences
|
257
|
+
if expr.any? {|e| e.is_a?(Array) && e.first.is_a?(Symbol)}
|
258
|
+
# duplicate ourselves for rewriting
|
259
|
+
this = dup
|
260
|
+
new_rules << this
|
261
|
+
|
262
|
+
expr.each_with_index do |e, index|
|
263
|
+
next unless e.is_a?(Array) && e.first.is_a?(Symbol)
|
264
|
+
new_rule = build(e)
|
265
|
+
this.expr[index] = new_rule.sym
|
266
|
+
new_rules << new_rule
|
267
|
+
end
|
268
|
+
|
269
|
+
# Return new rules after recursively applying #to_bnf
|
270
|
+
new_rules = new_rules.map {|r| r.to_peg}.flatten
|
271
|
+
elsif [:diff, :hex, :range].include?(expr.first)
|
272
|
+
# This rules are fine, the just need to be terminals
|
273
|
+
raise "Encountered #{expr.first.inspect}, which is a #{self.kind}, not :terminal" unless self.terminal?
|
274
|
+
new_rules << self
|
275
|
+
else
|
276
|
+
new_rules << self
|
277
|
+
end
|
278
|
+
|
279
|
+
return new_rules.map {|r| r.extend(EBNF::PEG::Rule)}
|
280
|
+
end
|
281
|
+
|
282
|
+
##
|
283
|
+
# For :hex or :range, create a regular expression.
|
284
|
+
#
|
285
|
+
# @return [Regexp]
|
286
|
+
def to_regexp
|
287
|
+
case expr.first
|
288
|
+
when :hex
|
289
|
+
Regexp.new(translate_codepoints(expr[1]))
|
290
|
+
when :range
|
291
|
+
Regexp.new("[#{translate_codepoints(expr[1])}]")
|
292
|
+
else
|
293
|
+
raise "Can't turn #{expr.inspect} into a regexp"
|
294
|
+
end
|
295
|
+
end
|
296
|
+
|
233
297
|
# Return the non-terminals for this rule. For seq, this is the first
|
234
|
-
# non-
|
298
|
+
# non-terminal in the sequence. For alt, this is every non-terminal in the alt.
|
299
|
+
#
|
235
300
|
# @param [Array<Rule>] ast
|
236
301
|
# The set of rules, used to turn symbols into rules
|
237
302
|
# @return [Array<Rule>]
|
@@ -248,7 +313,8 @@ module EBNF
|
|
248
313
|
end
|
249
314
|
|
250
315
|
# Return the terminals for this rule. For seq, this is the first
|
251
|
-
# terminals or strings in the seq. For alt, this is every non-terminal ni the alt
|
316
|
+
# terminals or strings in the seq. For alt, this is every non-terminal ni the alt.
|
317
|
+
#
|
252
318
|
# @param [Array<Rule>] ast
|
253
319
|
# The set of rules, used to turn symbols into rules
|
254
320
|
# @return [Array<Rule>]
|
@@ -267,8 +333,9 @@ module EBNF
|
|
267
333
|
end
|
268
334
|
|
269
335
|
# Does this rule start with a sym? It does if expr is that sym,
|
270
|
-
# expr starts with alt and contains that sym,
|
271
|
-
# expr starts with seq and the next element is that sym
|
336
|
+
# expr starts with alt and contains that sym,
|
337
|
+
# or expr starts with seq and the next element is that sym.
|
338
|
+
#
|
272
339
|
# @param [Symbol, class] sym
|
273
340
|
# Symbol matching any start element, or if it is String, any start element which is a String
|
274
341
|
# @return [Array<Symbol, String>] list of symbol (singular), or strings which are start symbol, or nil if there are none
|
@@ -283,12 +350,14 @@ module EBNF
|
|
283
350
|
end
|
284
351
|
|
285
352
|
# Do the firsts of this rule include the empty string?
|
353
|
+
#
|
286
354
|
# @return [Boolean]
|
287
355
|
def first_includes_eps?
|
288
356
|
@first && @first.include?(:_eps)
|
289
357
|
end
|
290
358
|
|
291
|
-
# Add terminal as proceding this rule
|
359
|
+
# Add terminal as proceding this rule.
|
360
|
+
#
|
292
361
|
# @param [Array<Rule, Symbol, String>] terminals
|
293
362
|
# @return [Integer] if number of terminals added
|
294
363
|
def add_first(terminals)
|
@@ -313,6 +382,7 @@ module EBNF
|
|
313
382
|
end
|
314
383
|
|
315
384
|
# Is this a terminal?
|
385
|
+
#
|
316
386
|
# @return [Boolean]
|
317
387
|
def terminal?
|
318
388
|
kind == :terminal
|
@@ -351,7 +421,8 @@ module EBNF
|
|
351
421
|
">"
|
352
422
|
end
|
353
423
|
|
354
|
-
# Two rules are equal if they have the same {#sym}, {#kind} and {#expr}
|
424
|
+
# Two rules are equal if they have the same {#sym}, {#kind} and {#expr}.
|
425
|
+
#
|
355
426
|
# @param [Rule] other
|
356
427
|
# @return [Boolean]
|
357
428
|
def ==(other)
|
@@ -360,26 +431,12 @@ module EBNF
|
|
360
431
|
expr == other.expr
|
361
432
|
end
|
362
433
|
|
363
|
-
# Two rules are equivalent if they have the same {#expr}
|
434
|
+
# Two rules are equivalent if they have the same {#expr}.
|
435
|
+
#
|
364
436
|
# @param [Rule] other
|
365
437
|
# @return [Boolean]
|
366
438
|
def equivalent?(other)
|
367
|
-
expr
|
368
|
-
end
|
369
|
-
|
370
|
-
# Rewrite the rule substituting src_rule for dst_rule wherever
|
371
|
-
# it is used in the production (first level only).
|
372
|
-
# @param [Rule] src_rule
|
373
|
-
# @param [Rule] dst_rule
|
374
|
-
# @return [Rule]
|
375
|
-
def rewrite(src_rule, dst_rule)
|
376
|
-
case @expr
|
377
|
-
when Array
|
378
|
-
@expr = @expr.map {|e| e == src_rule.sym ? dst_rule.sym : e}
|
379
|
-
else
|
380
|
-
@expr = dst_rule.sym if @expr == src_rule.sym
|
381
|
-
end
|
382
|
-
self
|
439
|
+
expr == other.expr
|
383
440
|
end
|
384
441
|
|
385
442
|
# Rules compare using their ids
|
@@ -391,6 +448,12 @@ module EBNF
|
|
391
448
|
end
|
392
449
|
end
|
393
450
|
|
451
|
+
##
|
452
|
+
# Utility function to translate code points of the form '#xN' into ruby unicode characters
|
453
|
+
def translate_codepoints(str)
|
454
|
+
str.gsub(/#x\h+/) {|c| c[2..-1].scanf("%x").first.chr(Encoding::UTF_8)}
|
455
|
+
end
|
456
|
+
|
394
457
|
private
|
395
458
|
def ttl_expr(expr, pfx, depth, is_obj = true)
|
396
459
|
indent = ' ' * depth
|
@@ -413,7 +476,7 @@ module EBNF
|
|
413
476
|
statements << %{#{indent}#{bra}#{pfx}:#{op} }
|
414
477
|
statements += ttl_expr(expr.first, pfx, depth + 1)
|
415
478
|
statements << %{#{indent} #{ket}} unless ket.empty?
|
416
|
-
when :_empty, :_eps
|
479
|
+
when :_empty, :_eps
|
417
480
|
statements << %{#{indent}"g:#{op.to_s[1..-1]}"}
|
418
481
|
when :"'"
|
419
482
|
statements << %{#{indent}"#{esc(expr)}"}
|