ebnf 2.0.0 → 2.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +81 -36
- data/VERSION +1 -1
- data/bin/ebnf +34 -18
- data/etc/abnf-core.ebnf +52 -0
- data/etc/abnf.abnf +121 -0
- data/etc/abnf.ebnf +124 -0
- data/etc/abnf.sxp +45 -0
- data/etc/ebnf.ebnf +19 -25
- data/etc/ebnf.html +251 -206
- data/etc/ebnf.ll1.rb +27 -103
- data/etc/ebnf.ll1.sxp +105 -102
- data/etc/ebnf.peg.rb +54 -62
- data/etc/ebnf.peg.sxp +53 -62
- data/etc/ebnf.sxp +22 -19
- data/etc/iso-ebnf.ebnf +140 -0
- data/etc/iso-ebnf.isoebnf +138 -0
- data/etc/iso-ebnf.sxp +65 -0
- data/etc/sparql.ebnf +4 -4
- data/etc/sparql.sxp +8 -7
- data/etc/turtle.ebnf +3 -3
- data/etc/turtle.sxp +22 -20
- data/lib/ebnf.rb +3 -0
- data/lib/ebnf/abnf.rb +301 -0
- data/lib/ebnf/abnf/core.rb +23 -0
- data/lib/ebnf/abnf/meta.rb +111 -0
- data/lib/ebnf/base.rb +87 -44
- data/lib/ebnf/ebnf/meta.rb +90 -0
- data/lib/ebnf/isoebnf.rb +229 -0
- data/lib/ebnf/isoebnf/meta.rb +75 -0
- data/lib/ebnf/ll1.rb +4 -7
- data/lib/ebnf/ll1/parser.rb +12 -4
- data/lib/ebnf/native.rb +320 -0
- data/lib/ebnf/parser.rb +285 -302
- data/lib/ebnf/peg.rb +1 -1
- data/lib/ebnf/peg/parser.rb +24 -5
- data/lib/ebnf/peg/rule.rb +77 -58
- data/lib/ebnf/rule.rb +352 -121
- data/lib/ebnf/terminals.rb +13 -10
- data/lib/ebnf/writer.rb +550 -78
- metadata +48 -6
data/lib/ebnf/peg.rb
CHANGED
@@ -31,7 +31,7 @@ module EBNF
|
|
31
31
|
def to_ruby_peg(output, **options)
|
32
32
|
output.puts " RULES = ["
|
33
33
|
ast.each do |rule|
|
34
|
-
output.puts " " + rule.to_ruby + '.extend(EBNF::PEG::Rule),'
|
34
|
+
output.puts " " + rule.to_ruby + (rule.is_a?(EBNF::PEG::Rule) ? '.extend(EBNF::PEG::Rule)' : '') + ','
|
35
35
|
end
|
36
36
|
output.puts " ]"
|
37
37
|
end
|
data/lib/ebnf/peg/parser.rb
CHANGED
@@ -51,6 +51,7 @@ module EBNF::PEG
|
|
51
51
|
# DSL for creating terminals and productions
|
52
52
|
module ClassMethods
|
53
53
|
def start_handlers; (@start_handlers ||= {}); end
|
54
|
+
def start_options; (@start_hoptions ||= {}); end
|
54
55
|
def production_handlers; (@production_handlers ||= {}); end
|
55
56
|
def terminal_handlers; (@terminal_handlers ||= {}); end
|
56
57
|
def terminal_regexps; (@terminal_regexps ||= {}); end
|
@@ -97,6 +98,10 @@ module EBNF::PEG
|
|
97
98
|
#
|
98
99
|
# @param [Symbol] term
|
99
100
|
# The rule name
|
101
|
+
# @param [Hash{Symbol => Object}] options
|
102
|
+
# Options which are returned from {Parser#onStart}.
|
103
|
+
# @option options [Boolean] :as_hash (false)
|
104
|
+
# If the production is a `seq`, causes the value to be represented as a single hash, rather than an array of individual hashes for each sub-production. Note that this is not always advisable due to the possibility of repeated productions within the sequence.
|
100
105
|
# @yield [data, block]
|
101
106
|
# @yieldparam [Hash] data
|
102
107
|
# A Hash defined for the current production, during :start
|
@@ -106,8 +111,9 @@ module EBNF::PEG
|
|
106
111
|
# Block passed to initialization for yielding to calling parser.
|
107
112
|
# Should conform to the yield specs for #initialize
|
108
113
|
# Yield to generate a triple
|
109
|
-
def start_production(term, &block)
|
114
|
+
def start_production(term, **options, &block)
|
110
115
|
start_handlers[term] = block
|
116
|
+
start_options[term] = options.freeze
|
111
117
|
end
|
112
118
|
|
113
119
|
##
|
@@ -204,6 +210,7 @@ module EBNF::PEG
|
|
204
210
|
@whitespace = case options[:whitespace]
|
205
211
|
when Regexp then options[:whitespace]
|
206
212
|
when Symbol then @rules[options[:whitespace]]
|
213
|
+
else options[:whitespace]
|
207
214
|
end ||
|
208
215
|
@rules.values.detect(&:pass?) ||
|
209
216
|
/(?:\s|(?:#[^x][^\n\r]*))+/m.freeze
|
@@ -329,19 +336,30 @@ module EBNF::PEG
|
|
329
336
|
# @option options [Integer] :depth
|
330
337
|
# Recursion depth for indenting output
|
331
338
|
# @yieldreturn [String] additional string appended to `message`.
|
332
|
-
def debug(*args)
|
339
|
+
def debug(*args, &block)
|
333
340
|
return unless @options[:logger]
|
334
341
|
options = args.last.is_a?(Hash) ? args.pop : {}
|
335
342
|
lineno = options[:lineno] || (scanner.lineno if scanner)
|
336
343
|
level = options.fetch(:level, 0)
|
337
|
-
|
338
344
|
depth = options[:depth] || self.depth
|
339
|
-
|
340
|
-
|
345
|
+
|
346
|
+
if self.respond_to?(:log_debug)
|
347
|
+
level = [:debug, :info, :warn, :error, :fatal][level]
|
348
|
+
log_debug(*args, **options.merge(level: level, lineno: lineno, depth: depth), &block)
|
349
|
+
elsif @options[:logger].respond_to?(:add)
|
350
|
+
args << yield if block_given?
|
351
|
+
@options[:logger].add(level, "[#{lineno}]" + (" " * depth) + args.join(" "))
|
352
|
+
elsif @options[:logger].respond_to?(:<<)
|
353
|
+
args << yield if block_given?
|
354
|
+
@options[:logger] << "[#{lineno}]" + (" " * depth) + args.join(" ")
|
355
|
+
end
|
341
356
|
end
|
342
357
|
|
343
358
|
# Start for production
|
344
359
|
# Adds data avoiable during the processing of the production
|
360
|
+
#
|
361
|
+
# @return [Hash] composed of production options. Currently only `as_hash` is supported.
|
362
|
+
# @see ClassMethods#start_production
|
345
363
|
def onStart(prod)
|
346
364
|
handler = self.class.start_handlers[prod]
|
347
365
|
@productions << prod
|
@@ -367,6 +385,7 @@ module EBNF::PEG
|
|
367
385
|
# explicit start handler
|
368
386
|
@prod_data << {}
|
369
387
|
end
|
388
|
+
return self.class.start_options.fetch(prod, {}) # any options on this production
|
370
389
|
end
|
371
390
|
|
372
391
|
# Finish of production
|
data/lib/ebnf/peg/rule.rb
CHANGED
@@ -18,14 +18,15 @@ module EBNF::PEG
|
|
18
18
|
#
|
19
19
|
# If matched, the input position is updated and the results returned in a Hash.
|
20
20
|
#
|
21
|
-
# * `alt`: returns the value of the matched production or `:unmatched
|
22
|
-
# * `diff`: returns the
|
21
|
+
# * `alt`: returns the value of the matched production or `:unmatched`.
|
22
|
+
# * `diff`: returns the value matched, or `:unmatched`.
|
23
23
|
# * `hex`: returns a string composed of the matched hex character, or `:unmatched`.
|
24
|
-
# * `opt`: returns the matched
|
25
|
-
# * `plus`: returns an array of the
|
26
|
-
# * `range`: returns a string composed of the
|
27
|
-
# * `seq`: returns an array composed of single-entry hashes for each matched production indexed by the production name, or `:unmatched` if any production fails to match. For Terminals, returns a string created by concatenating these values.
|
28
|
-
# * `star`: returns an array of the
|
24
|
+
# * `opt`: returns the value matched, or `nil` if unmatched.
|
25
|
+
# * `plus`: returns an array of the values matched for the specified production, or `:unmatched`, if none are matched. For Terminals, these are concatenated into a single string.
|
26
|
+
# * `range`: returns a string composed of the values matched, or `:unmatched`, if less than `min` are matched.
|
27
|
+
# * `seq`: returns an array composed of single-entry hashes for each matched production indexed by the production name, or `:unmatched` if any production fails to match. For Terminals, returns a string created by concatenating these values. Via option in a `production` or definition, the result can be a single hash with values for each matched production; note that this is not always possible due to the possibility of repeated productions within the sequence.
|
28
|
+
# * `star`: returns an array of the values matched for the specified production. For Terminals, these are concatenated into a single string.
|
29
|
+
#
|
29
30
|
# @param [Scanner] input
|
30
31
|
# @return [Hash{Symbol => Object}, :unmatched] A hash with keys for matched component of the expression. Returns :unmatched if the input does not match the production.
|
31
32
|
def parse(input)
|
@@ -45,7 +46,7 @@ module EBNF::PEG
|
|
45
46
|
# otherwise,
|
46
47
|
if regexp = parser.find_terminal_regexp(sym)
|
47
48
|
matched = input.scan(regexp)
|
48
|
-
result =
|
49
|
+
result = parser.onTerminal(sym, (matched ? matched : :unmatched))
|
49
50
|
# Update furthest failure for strings and terminals
|
50
51
|
parser.update_furthest_failure(input.pos, input.lineno, sym) if result == :unmatched
|
51
52
|
parser.packrat[sym][pos] = {
|
@@ -58,7 +59,7 @@ module EBNF::PEG
|
|
58
59
|
else
|
59
60
|
eat_whitespace(input)
|
60
61
|
end
|
61
|
-
parser.onStart(sym)
|
62
|
+
start_options = parser.onStart(sym)
|
62
63
|
|
63
64
|
result = case expr.first
|
64
65
|
when :alt
|
@@ -84,7 +85,8 @@ module EBNF::PEG
|
|
84
85
|
alt
|
85
86
|
when :diff
|
86
87
|
# matches any string that matches A but does not match B.
|
87
|
-
#
|
88
|
+
# (Note, this is only used for Terminal rules, non-terminals will use :not)
|
89
|
+
raise "Diff used on non-terminal #{prod}" unless terminal?
|
88
90
|
re1, re2 = Regexp.new(translate_codepoints(expr[1])), Regexp.new(translate_codepoints(expr[2]))
|
89
91
|
matched = input.scan(re1)
|
90
92
|
if !matched || re2.match?(matched)
|
@@ -101,9 +103,9 @@ module EBNF::PEG
|
|
101
103
|
parser.update_furthest_failure(input.pos, input.lineno, expr.last)
|
102
104
|
:unmatched
|
103
105
|
end
|
104
|
-
when :
|
105
|
-
#
|
106
|
-
|
106
|
+
when :not
|
107
|
+
# matches any string that does not match B.
|
108
|
+
res = case prod = expr[1]
|
107
109
|
when Symbol
|
108
110
|
rule = parser.find_rule(prod)
|
109
111
|
raise "No rule found for #{prod}" unless rule
|
@@ -111,35 +113,29 @@ module EBNF::PEG
|
|
111
113
|
when String
|
112
114
|
input.scan(Regexp.new(Regexp.quote(prod))) || :unmatched
|
113
115
|
end
|
114
|
-
if
|
116
|
+
if res != :unmatched
|
115
117
|
# Update furthest failure for terminals
|
116
|
-
parser.update_furthest_failure(input.pos, input.lineno,
|
117
|
-
|
118
|
+
parser.update_furthest_failure(input.pos, input.lineno, sym) if terminal?
|
119
|
+
:unmatched
|
118
120
|
else
|
119
|
-
|
121
|
+
nil
|
120
122
|
end
|
123
|
+
when :opt
|
124
|
+
# Result is the matched value or nil
|
125
|
+
opt = rept(input, 0, 1, expr[1])
|
126
|
+
|
127
|
+
# Update furthest failure for strings and terminals
|
128
|
+
parser.update_furthest_failure(input.pos, input.lineno, expr[1]) if terminal?
|
129
|
+
opt.first
|
121
130
|
when :plus
|
122
131
|
# Result is an array of all expressions while they match,
|
123
132
|
# at least one must match
|
124
|
-
|
125
|
-
|
126
|
-
when Symbol
|
127
|
-
rule = parser.find_rule(prod)
|
128
|
-
raise "No rule found for #{prod}" unless rule
|
129
|
-
while (res = rule.parse(input)) != :unmatched
|
130
|
-
eat_whitespace(input)
|
131
|
-
plus << res
|
132
|
-
end
|
133
|
-
when String
|
134
|
-
while res = input.scan(Regexp.new(Regexp.quote(prod)))
|
135
|
-
eat_whitespace(input)
|
136
|
-
plus << res
|
137
|
-
end
|
138
|
-
end
|
133
|
+
plus = rept(input, 1, '*', expr[1])
|
134
|
+
|
139
135
|
# Update furthest failure for strings and terminals
|
140
|
-
parser.update_furthest_failure(input.pos, input.lineno,
|
141
|
-
plus.
|
142
|
-
when :range
|
136
|
+
parser.update_furthest_failure(input.pos, input.lineno, expr[1]) if terminal?
|
137
|
+
plus.is_a?(Array) && terminal? ? plus.join("") : plus
|
138
|
+
when :range, :istr
|
143
139
|
# Matches the specified character range
|
144
140
|
input.scan(to_regexp) || begin
|
145
141
|
# Update furthest failure for strings and terminals
|
@@ -149,7 +145,7 @@ module EBNF::PEG
|
|
149
145
|
when :seq
|
150
146
|
# Evaluate each expression into an array of hashes where each hash contains a key from the associated production and the value is the parsed value of that production. Returns :unmatched if the input does not match the production. Value ordering is ensured by native Hash ordering.
|
151
147
|
seq = expr[1..-1].each_with_object([]) do |prod, accumulator|
|
152
|
-
eat_whitespace(input) unless accumulator.empty?
|
148
|
+
eat_whitespace(input) unless accumulator.empty? || terminal?
|
153
149
|
res = case prod
|
154
150
|
when Symbol
|
155
151
|
rule = parser.find_rule(prod)
|
@@ -165,32 +161,23 @@ module EBNF::PEG
|
|
165
161
|
end
|
166
162
|
accumulator << {prod.to_sym => res}
|
167
163
|
end
|
168
|
-
seq == :unmatched
|
169
|
-
:unmatched
|
170
|
-
|
171
|
-
|
172
|
-
|
164
|
+
if seq == :unmatched
|
165
|
+
:unmatched
|
166
|
+
elsif terminal?
|
167
|
+
seq.map(&:values).compact.join("") # Concat values for terminal production
|
168
|
+
elsif start_options[:as_hash]
|
169
|
+
seq.inject {|memo, h| memo.merge(h)}
|
170
|
+
else
|
171
|
+
seq
|
172
|
+
end
|
173
173
|
when :star
|
174
174
|
# Result is an array of all expressions while they match,
|
175
175
|
# an empty array of none match
|
176
|
-
|
177
|
-
|
178
|
-
when Symbol
|
179
|
-
rule = parser.find_rule(prod)
|
180
|
-
raise "No rule found for #{prod}" unless rule
|
181
|
-
while (res = rule.parse(input)) != :unmatched
|
182
|
-
eat_whitespace(input)
|
183
|
-
star << res
|
184
|
-
end
|
185
|
-
when String
|
186
|
-
while res = input.scan(Regexp.new(Regexp.quote(prod)))
|
187
|
-
eat_whitespace(input)
|
188
|
-
star << res
|
189
|
-
end
|
190
|
-
end
|
176
|
+
star = rept(input, 0, '*', expr[1])
|
177
|
+
|
191
178
|
# Update furthest failure for strings and terminals
|
192
|
-
parser.update_furthest_failure(input.pos, input.lineno,
|
193
|
-
star.
|
179
|
+
parser.update_furthest_failure(input.pos, input.lineno, expr[1]) if terminal?
|
180
|
+
star.is_a?(Array) && terminal? ? star.join("") : star
|
194
181
|
else
|
195
182
|
raise "attempt to parse unknown rule type: #{expr.first}"
|
196
183
|
end
|
@@ -208,6 +195,38 @@ module EBNF::PEG
|
|
208
195
|
return parser.packrat[sym][pos][:result]
|
209
196
|
end
|
210
197
|
|
198
|
+
##
|
199
|
+
# Repitition, 0-1, 0-n, 1-n, ...
|
200
|
+
#
|
201
|
+
# Note, nil results are removed from the result, but count towards min/max calculations
|
202
|
+
#
|
203
|
+
# @param [Scanner] input
|
204
|
+
# @param [Integer] min
|
205
|
+
# @param [Integer] max
|
206
|
+
# If it is an integer, it stops matching after max entries.
|
207
|
+
# @param [Symbol, String] prod
|
208
|
+
# @return [:unmatched, Array]
|
209
|
+
def rept(input, min, max, prod)
|
210
|
+
result = []
|
211
|
+
|
212
|
+
case prod
|
213
|
+
when Symbol
|
214
|
+
rule = parser.find_rule(prod)
|
215
|
+
raise "No rule found for #{prod}" unless rule
|
216
|
+
while (max == '*' || result.length < max) && (res = rule.parse(input)) != :unmatched
|
217
|
+
eat_whitespace(input) unless terminal?
|
218
|
+
result << res
|
219
|
+
end
|
220
|
+
when String
|
221
|
+
while (res = input.scan(Regexp.new(Regexp.quote(prod)))) && (max == '*' || result.length < max)
|
222
|
+
eat_whitespace(input) unless terminal?
|
223
|
+
result << res
|
224
|
+
end
|
225
|
+
end
|
226
|
+
|
227
|
+
result.length < min ? :unmatched : result.compact
|
228
|
+
end
|
229
|
+
|
211
230
|
##
|
212
231
|
# Eat whitespace between non-terminal rules
|
213
232
|
def eat_whitespace(input)
|
data/lib/ebnf/rule.rb
CHANGED
@@ -1,17 +1,33 @@
|
|
1
1
|
require 'scanf'
|
2
|
+
require 'strscan'
|
2
3
|
|
3
4
|
module EBNF
|
4
5
|
# Represent individual parsed rules
|
5
6
|
class Rule
|
6
7
|
# Operations which are flattened to seprate rules in to_bnf.
|
7
8
|
BNF_OPS = %w{
|
8
|
-
alt opt plus seq star
|
9
|
+
alt diff not opt plus rept seq star
|
9
10
|
}.map(&:to_sym).freeze
|
10
11
|
|
11
12
|
TERM_OPS = %w{
|
12
|
-
|
13
|
+
hex istr range
|
13
14
|
}.map(&:to_sym).freeze
|
14
15
|
|
16
|
+
# The number of arguments expected per operator. `nil` for unspecified
|
17
|
+
OP_ARGN = {
|
18
|
+
alt: nil,
|
19
|
+
diff: 2,
|
20
|
+
hex: 1,
|
21
|
+
istr: 1,
|
22
|
+
not: 1,
|
23
|
+
opt: 1,
|
24
|
+
plus: 1,
|
25
|
+
range: 1,
|
26
|
+
rept: 3,
|
27
|
+
seq: nil,
|
28
|
+
star: 1
|
29
|
+
}
|
30
|
+
|
15
31
|
# Symbol of rule
|
16
32
|
#
|
17
33
|
# @return [Symbol]
|
@@ -28,7 +44,7 @@ module EBNF
|
|
28
44
|
|
29
45
|
# Kind of rule
|
30
46
|
#
|
31
|
-
# @return [:rule, :terminal, or :pass]
|
47
|
+
# @return [:rule, :terminal, :terminals, or :pass]
|
32
48
|
attr_accessor :kind
|
33
49
|
|
34
50
|
# Rule expression
|
@@ -59,19 +75,38 @@ module EBNF
|
|
59
75
|
# Determines preparation and cleanup rules for reconstituting EBNF ? * + from BNF
|
60
76
|
attr_accessor :cleanup
|
61
77
|
|
62
|
-
# @param [Symbol] sym
|
63
|
-
# @
|
78
|
+
# @param [Symbol, nil] sym
|
79
|
+
# `nil` is allowed only for @pass or @terminals
|
80
|
+
# @param [Integer, nil] id
|
64
81
|
# @param [Array] expr
|
65
|
-
#
|
82
|
+
# The expression is an internal-representation of an S-Expression with one of the following oparators:
|
83
|
+
#
|
84
|
+
# * `alt` – A list of alternative rules, which are attempted in order. It terminates with the first matching rule, or is terminated as unmatched, if no such rule is found.
|
85
|
+
# * `diff` – matches any string that matches `A` but does not match `B`.
|
86
|
+
# * `hex` – A single character represented using the hexadecimal notation `#xnn`.
|
87
|
+
# * `istr` – A string which matches in a case-insensitive manner, so that `(istr "fOo")` will match either of the strings `"foo"`, `"FOO"` or any other combination.
|
88
|
+
# * `opt` – An optional rule or terminal. It either results in the matching rule or returns `nil`.
|
89
|
+
# * `plus` – A sequence of one or more of the matching rule. If there is no such rule, it is terminated as unmatched; otherwise, the result is an array containing all matched input.
|
90
|
+
# * `range` – A range of characters, possibly repeated, of the form `(range "a-z")`. May also use hexadecimal notation.
|
91
|
+
# * `rept m n` – A sequence of at lest `m` and at most `n` of the matching rule. It will always return an array.
|
92
|
+
# * `seq` – A sequence of rules or terminals. If any (other than `opt` or `star`) to not parse, the rule is terminated as unmatched.
|
93
|
+
# * `star` – A sequence of zero or more of the matching rule. It will always return an array.
|
94
|
+
# @param [:rule, :terminal, :terminals, :pass] kind (nil)
|
66
95
|
# @param [String] ebnf (nil)
|
96
|
+
# When parsing, records the EBNF string used to create the rule.
|
67
97
|
# @param [Array] first (nil)
|
98
|
+
# Recorded set of terminals that can proceed this rule (LL(1))
|
68
99
|
# @param [Array] follow (nil)
|
100
|
+
# Recorded set of terminals that can follow this rule (LL(1))
|
69
101
|
# @param [Boolean] start (nil)
|
102
|
+
# Is this the starting rule for the grammar?
|
70
103
|
# @param [Rule] top_rule (nil)
|
104
|
+
# The top-most rule. All expressed rules are top-rules, derived rules have the original rule as their top-rule.
|
71
105
|
# @param [Boolean] cleanup (nil)
|
106
|
+
# Records information useful for cleaning up converted :plus, and :star expansions (LL(1)).
|
72
107
|
def initialize(sym, id, expr, kind: nil, ebnf: nil, first: nil, follow: nil, start: nil, top_rule: nil, cleanup: nil)
|
73
108
|
@sym, @id = sym, id
|
74
|
-
@expr = expr.is_a?(Array) ? expr : [:seq, expr]
|
109
|
+
@expr = expr.is_a?(Array) ? expr : [:seq, expr].compact
|
75
110
|
@ebnf, @kind, @first, @follow, @start, @cleanup, @top_rule = ebnf, kind, first, follow, start, cleanup, top_rule
|
76
111
|
@top_rule ||= self
|
77
112
|
@kind ||= case
|
@@ -79,21 +114,53 @@ module EBNF
|
|
79
114
|
when !BNF_OPS.include?(@expr.first) then :terminal
|
80
115
|
else :rule
|
81
116
|
end
|
117
|
+
|
118
|
+
# Allow @pass and @terminals to not be named
|
119
|
+
@sym ||= :_pass if @kind == :pass
|
120
|
+
@sym ||= :_terminals if @kind == :terminals
|
121
|
+
|
122
|
+
raise ArgumentError, "Rule sym must be a symbol, was #{@sym.inspect}" unless @sym.is_a?(Symbol)
|
123
|
+
raise ArgumentError, "Rule id must be a string or nil, was #{@id.inspect}" unless (@id || "").is_a?(String)
|
124
|
+
raise ArgumentError, "Rule kind must be one of :rule, :terminal, :terminals, or :pass, was #{@kind.inspect}" unless
|
125
|
+
@kind.is_a?(Symbol) && %w(rule terminal terminals pass).map(&:to_sym).include?(@kind)
|
126
|
+
|
127
|
+
case @expr.first
|
128
|
+
when :alt
|
129
|
+
raise ArgumentError, "#{@expr.first} operation must have at least one operand, had #{@expr.length - 1}" unless @expr.length > 1
|
130
|
+
when :diff
|
131
|
+
raise ArgumentError, "#{@expr.first} operation must have exactly two operands, had #{@expr.length - 1}" unless @expr.length == 3
|
132
|
+
when :hex, :istr, :not, :opt, :plus, :range, :star
|
133
|
+
raise ArgumentError, "#{@expr.first} operation must have exactly one operand, had #{@expr.length - 1}" unless @expr.length == 2
|
134
|
+
when :rept
|
135
|
+
raise ArgumentError, "#{@expr.first} operation must have exactly three, had #{@expr.length - 1}" unless @expr.length == 4
|
136
|
+
raise ArgumentError, "#{@expr.first} operation must an non-negative integer minimum, was #{@expr[1]}" unless
|
137
|
+
@expr[1].is_a?(Integer) && @expr[1] >= 0
|
138
|
+
raise ArgumentError, "#{@expr.first} operation must an non-negative integer maximum or '*', was #{@expr[2]}" unless
|
139
|
+
@expr[2] == '*' || @expr[2].is_a?(Integer) && @expr[2] >= 0
|
140
|
+
when :seq
|
141
|
+
# It's legal to have a zero-length sequence
|
142
|
+
else
|
143
|
+
raise ArgumentError, "Rule expression must be an array using a known operator, was #{@expr.first}"
|
144
|
+
end
|
82
145
|
end
|
83
146
|
|
84
147
|
##
|
85
148
|
# Return a rule from its SXP representation:
|
86
149
|
#
|
87
150
|
# @example inputs
|
88
|
-
# (pass (plus (range "#x20\\t\\r\\n")))
|
151
|
+
# (pass _pass (plus (range "#x20\\t\\r\\n")))
|
89
152
|
# (rule ebnf "1" (star (alt declaration rule)))
|
90
|
-
# (terminal
|
153
|
+
# (terminal R_CHAR "19" (diff CHAR (alt "]" "-")))
|
91
154
|
#
|
92
155
|
# Also may have `(first ...)`, `(follow ...)`, or `(start #t)`.
|
93
156
|
#
|
94
|
-
# @param [Array] sxp
|
157
|
+
# @param [String, Array] sxp
|
95
158
|
# @return [Rule]
|
96
159
|
def self.from_sxp(sxp)
|
160
|
+
if sxp.is_a?(String)
|
161
|
+
require 'sxp' unless defined?(SXP)
|
162
|
+
sxp = SXP.parse(sxp)
|
163
|
+
end
|
97
164
|
expr = sxp.detect {|e| e.is_a?(Array) && ![:first, :follow, :start].include?(e.first.to_sym)}
|
98
165
|
first = sxp.detect {|e| e.is_a?(Array) && e.first.to_sym == :first}
|
99
166
|
first = first[1..-1] if first
|
@@ -115,11 +182,11 @@ module EBNF
|
|
115
182
|
# @param [Hash{Symbol => Symbol}] cleanup (nil)
|
116
183
|
# @param [Hash{Symbol => Object}] options
|
117
184
|
def build(expr, kind: nil, cleanup: nil, **options)
|
118
|
-
new_sym, new_id =
|
185
|
+
new_sym, new_id = @top_rule.send(:make_sym_id)
|
119
186
|
self.class.new(new_sym, new_id, expr,
|
120
187
|
kind: kind,
|
121
188
|
ebnf: @ebnf,
|
122
|
-
top_rule:
|
189
|
+
top_rule: @top_rule,
|
123
190
|
cleanup: cleanup,
|
124
191
|
**options)
|
125
192
|
end
|
@@ -152,15 +219,16 @@ module EBNF
|
|
152
219
|
# @return [String]
|
153
220
|
def to_ttl
|
154
221
|
@ebnf.debug("to_ttl") {inspect} if @ebnf
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
%{ rdfs:comment #{comment.inspect};}
|
163
|
-
|
222
|
+
statements = [%{:#{sym} rdfs:label "#{sym}";}]
|
223
|
+
if orig
|
224
|
+
comment = orig.to_s.strip.
|
225
|
+
gsub(/"""/, '\"\"\"').
|
226
|
+
gsub("\\", "\\\\").
|
227
|
+
sub(/^\"/, '\"').
|
228
|
+
sub(/\"$/m, '\"')
|
229
|
+
statements << %{ rdfs:comment #{comment.inspect};}
|
230
|
+
end
|
231
|
+
statements << %{ dc:identifier "#{id}";} if id
|
164
232
|
|
165
233
|
statements += ttl_expr(expr, terminal? ? "re" : "g", 1, false)
|
166
234
|
"\n" + statements.join("\n")
|
@@ -175,12 +243,13 @@ module EBNF
|
|
175
243
|
##
|
176
244
|
# Transform EBNF rule to BNF rules:
|
177
245
|
#
|
178
|
-
# * Transform (rule a "n" (op1 (op2))) into two rules:
|
179
|
-
#
|
180
|
-
#
|
181
|
-
#
|
182
|
-
# * Transform (rule a (
|
183
|
-
# * Transform (rule a (
|
246
|
+
# * Transform `(rule a "n" (op1 (op2)))` into two rules:
|
247
|
+
#
|
248
|
+
# (rule a "n" (op1 _a_1))
|
249
|
+
# (rule _a_1 "n.1" (op2))
|
250
|
+
# * Transform `(rule a (opt b))` into `(rule a (alt _empty b))`
|
251
|
+
# * Transform `(rule a (star b))` into `(rule a (alt _empty (seq b a)))`
|
252
|
+
# * Transform `(rule a (plus b))` into `(rule a (seq b (star b)`
|
184
253
|
#
|
185
254
|
# Transformation includes information used to re-construct non-transformed.
|
186
255
|
#
|
@@ -231,7 +300,7 @@ module EBNF
|
|
231
300
|
# Otherwise, no further transformation necessary
|
232
301
|
new_rules << self
|
233
302
|
elsif [:diff, :hex, :range].include?(expr.first)
|
234
|
-
# This rules are fine,
|
303
|
+
# This rules are fine, they just need to be terminals
|
235
304
|
raise "Encountered #{expr.first.inspect}, which is a #{self.kind}, not :terminal" unless self.terminal?
|
236
305
|
new_rules << self
|
237
306
|
else
|
@@ -245,9 +314,14 @@ module EBNF
|
|
245
314
|
##
|
246
315
|
# Transform EBNF rule for PEG:
|
247
316
|
#
|
248
|
-
# * Transform (rule a "n" (op1 ... (op2 y) ...z)) into two rules:
|
249
|
-
#
|
250
|
-
#
|
317
|
+
# * Transform `(rule a "n" (op1 ... (op2 y) ...z))` into two rules:
|
318
|
+
#
|
319
|
+
# (rule a "n" (op1 ... _a_1 ... z))
|
320
|
+
# (rule _a_1 "n.1" (op2 y))
|
321
|
+
# * Transform `(rule a "n" (diff op1 op2))` into two rules:
|
322
|
+
#
|
323
|
+
# (rule a "n" (seq _a_1 op1))
|
324
|
+
# (rule _a_1 "n.1" (not op1))
|
251
325
|
#
|
252
326
|
# @return [Array<Rule>]
|
253
327
|
def to_peg
|
@@ -268,8 +342,14 @@ module EBNF
|
|
268
342
|
|
269
343
|
# Return new rules after recursively applying #to_bnf
|
270
344
|
new_rules = new_rules.map {|r| r.to_peg}.flatten
|
271
|
-
elsif
|
272
|
-
|
345
|
+
elsif expr.first == :diff && !terminal?
|
346
|
+
this = dup
|
347
|
+
new_rule = build([:not, expr[2]])
|
348
|
+
this.expr = [:seq, new_rule.sym, expr[1]]
|
349
|
+
new_rules << this
|
350
|
+
new_rules << new_rule
|
351
|
+
elsif [:hex, :istr, :range].include?(expr.first)
|
352
|
+
# This rules are fine, they just need to be terminals
|
273
353
|
raise "Encountered #{expr.first.inspect}, which is a #{self.kind}, not :terminal" unless self.terminal?
|
274
354
|
new_rules << self
|
275
355
|
else
|
@@ -287,6 +367,8 @@ module EBNF
|
|
287
367
|
case expr.first
|
288
368
|
when :hex
|
289
369
|
Regexp.new(translate_codepoints(expr[1]))
|
370
|
+
when :istr
|
371
|
+
/#{expr.last}/ui
|
290
372
|
when :range
|
291
373
|
Regexp.new("[#{translate_codepoints(expr[1])}]")
|
292
374
|
else
|
@@ -294,45 +376,170 @@ module EBNF
|
|
294
376
|
end
|
295
377
|
end
|
296
378
|
|
297
|
-
#
|
298
|
-
#
|
379
|
+
# Is this a terminal?
|
380
|
+
#
|
381
|
+
# @return [Boolean]
|
382
|
+
def terminal?
|
383
|
+
kind == :terminal
|
384
|
+
end
|
385
|
+
|
386
|
+
# Is this a pass?
|
387
|
+
# @return [Boolean]
|
388
|
+
def pass?
|
389
|
+
kind == :pass
|
390
|
+
end
|
391
|
+
|
392
|
+
# Is this a rule?
|
393
|
+
# @return [Boolean]
|
394
|
+
def rule?
|
395
|
+
kind == :rule
|
396
|
+
end
|
397
|
+
|
398
|
+
# Is this rule of the form (alt ...)?
|
399
|
+
def alt?
|
400
|
+
expr.is_a?(Array) && expr.first == :alt
|
401
|
+
end
|
402
|
+
|
403
|
+
# Is this rule of the form (seq ...)?
|
404
|
+
def seq?
|
405
|
+
expr.is_a?(Array) && expr.first == :seq
|
406
|
+
end
|
407
|
+
|
408
|
+
def inspect
|
409
|
+
"#<EBNF::Rule:#{object_id} " +
|
410
|
+
{sym: sym, id: id, kind: kind, expr: expr}.inspect +
|
411
|
+
">"
|
412
|
+
end
|
413
|
+
|
414
|
+
# Two rules are equal if they have the same {#sym}, {#kind} and {#expr}.
|
415
|
+
#
|
416
|
+
# @param [Rule] other
|
417
|
+
# @return [Boolean]
|
418
|
+
def ==(other)
|
419
|
+
sym == other.sym &&
|
420
|
+
kind == other.kind &&
|
421
|
+
expr == other.expr
|
422
|
+
end
|
423
|
+
|
424
|
+
# Two rules are equivalent if they have the same {#expr}.
|
425
|
+
#
|
426
|
+
# @param [Rule] other
|
427
|
+
# @return [Boolean]
|
428
|
+
def eql?(other)
|
429
|
+
expr == other.expr
|
430
|
+
end
|
431
|
+
|
432
|
+
# Rules compare using their ids
|
433
|
+
def <=>(other)
|
434
|
+
if id && other.id
|
435
|
+
if id == other.id
|
436
|
+
id.to_s <=> other.id.to_s
|
437
|
+
else
|
438
|
+
id.to_f <=> other.id.to_f
|
439
|
+
end
|
440
|
+
else
|
441
|
+
sym.to_s <=> other.sym.to_s
|
442
|
+
end
|
443
|
+
end
|
444
|
+
|
445
|
+
##
|
446
|
+
# Utility function to translate code points of the form '#xN' into ruby unicode characters
|
447
|
+
def translate_codepoints(str)
|
448
|
+
str.gsub(/#x\h+/) {|c| c[2..-1].scanf("%x").first.chr(Encoding::UTF_8)}
|
449
|
+
end
|
450
|
+
|
451
|
+
# Return the non-terminals for this rule.
|
452
|
+
#
|
453
|
+
# * `alt` => this is every non-terminal.
|
454
|
+
# * `diff` => this is every non-terminal.
|
455
|
+
# * `hex` => nil
|
456
|
+
# * `istr` => nil
|
457
|
+
# * `not` => this is the last expression, if any.
|
458
|
+
# * `opt` => this is the last expression, if any.
|
459
|
+
# * `plus` => this is the last expression, if any.
|
460
|
+
# * `range` => nil
|
461
|
+
# * `rept` => this is the last expression, if any.
|
462
|
+
# * `seq` => this is the first expression in the sequence, if any.
|
463
|
+
# * `star` => this is the last expression, if any.
|
299
464
|
#
|
300
465
|
# @param [Array<Rule>] ast
|
301
466
|
# The set of rules, used to turn symbols into rules
|
467
|
+
# @param [Array<Symbol,String,Array>] expr (@expr)
|
468
|
+
# The expression to check, defaults to the rule expression.
|
469
|
+
# Typically, if the expression is recursive, the embedded expression is called recursively.
|
302
470
|
# @return [Array<Rule>]
|
303
|
-
|
304
|
-
|
471
|
+
# @note this is used for LL(1) tansformation, so rule types are limited
|
472
|
+
def non_terminals(ast, expr = @expr)
|
473
|
+
([:alt, :diff].include?(expr.first) ? expr[1..-1] : expr[1,1]).map do |sym|
|
305
474
|
case sym
|
306
475
|
when Symbol
|
307
476
|
r = ast.detect {|r| r.sym == sym}
|
308
477
|
r if r && r.rule?
|
478
|
+
when Array
|
479
|
+
non_terminals(ast, sym)
|
309
480
|
else
|
310
481
|
nil
|
311
482
|
end
|
312
|
-
end.compact
|
483
|
+
end.flatten.compact.uniq
|
313
484
|
end
|
314
485
|
|
315
|
-
# Return the terminals for this rule.
|
316
|
-
#
|
486
|
+
# Return the terminals for this rule.
|
487
|
+
#
|
488
|
+
# * `alt` => this is every terminal.
|
489
|
+
# * `diff` => this is every terminal.
|
490
|
+
# * `hex` => nil
|
491
|
+
# * `istr` => nil
|
492
|
+
# * `not` => this is the last expression, if any.
|
493
|
+
# * `opt` => this is the last expression, if any.
|
494
|
+
# * `plus` => this is the last expression, if any.
|
495
|
+
# * `range` => nil
|
496
|
+
# * `rept` => this is the last expression, if any.
|
497
|
+
# * `seq` => this is the first expression in the sequence, if any.
|
498
|
+
# * `star` => this is the last expression, if any.
|
317
499
|
#
|
318
500
|
# @param [Array<Rule>] ast
|
319
501
|
# The set of rules, used to turn symbols into rules
|
502
|
+
# @param [Array<Symbol,String,Array>] expr (@expr)
|
503
|
+
# The expression to check, defaults to the rule expression.
|
504
|
+
# Typically, if the expression is recursive, the embedded expression is called recursively.
|
320
505
|
# @return [Array<Rule>]
|
321
|
-
|
322
|
-
|
506
|
+
# @note this is used for LL(1) tansformation, so rule types are limited
|
507
|
+
def terminals(ast, expr = @expr)
|
508
|
+
([:alt, :diff].include?(expr.first) ? expr[1..-1] : expr[1,1]).map do |sym|
|
323
509
|
case sym
|
324
510
|
when Symbol
|
325
511
|
r = ast.detect {|r| r.sym == sym}
|
326
512
|
r if r && r.terminal?
|
327
513
|
when String
|
328
514
|
sym
|
329
|
-
|
330
|
-
|
515
|
+
when Array
|
516
|
+
terminals(ast, sym)
|
331
517
|
end
|
332
|
-
end.compact
|
518
|
+
end.flatten.compact.uniq
|
333
519
|
end
|
334
520
|
|
335
|
-
#
|
521
|
+
# Return the symbols used in the rule.
|
522
|
+
#
|
523
|
+
# @param [Array<Symbol,String,Array>] expr (@expr)
|
524
|
+
# The expression to check, defaults to the rule expression.
|
525
|
+
# Typically, if the expression is recursive, the embedded expression is called recursively.
|
526
|
+
# @return [Array<Rule>]
|
527
|
+
def symbols(expr = @expr)
|
528
|
+
expr[1..-1].map do |sym|
|
529
|
+
case sym
|
530
|
+
when Symbol
|
531
|
+
sym
|
532
|
+
when Array
|
533
|
+
symbols(sym)
|
534
|
+
end
|
535
|
+
end.flatten.compact.uniq
|
536
|
+
end
|
537
|
+
|
538
|
+
##
|
539
|
+
# The following are used for LL(1) transformation.
|
540
|
+
##
|
541
|
+
|
542
|
+
# Does this rule start with `sym`? It does if expr is that sym,
|
336
543
|
# expr starts with alt and contains that sym,
|
337
544
|
# or expr starts with seq and the next element is that sym.
|
338
545
|
#
|
@@ -349,6 +556,92 @@ module EBNF
|
|
349
556
|
end
|
350
557
|
end
|
351
558
|
|
559
|
+
##
|
560
|
+
# Validate the rule, with respect to an AST.
|
561
|
+
#
|
562
|
+
# @param [Array<Rule>] ast
|
563
|
+
# The set of rules, used to turn symbols into rules
|
564
|
+
# @param [Array<Symbol,String,Array>] expr (@expr)
|
565
|
+
# The expression to check, defaults to the rule expression.
|
566
|
+
# Typically, if the expression is recursive, the embedded expression is called recursively.
|
567
|
+
# @raise [RangeError]
|
568
|
+
def validate!(ast, expr = @expr)
|
569
|
+
op = expr.first
|
570
|
+
raise SyntaxError, "Unknown operator: #{op}" unless OP_ARGN.key?(op)
|
571
|
+
raise SyntaxError, "Argument count missmatch on operator #{op}, had #{expr.length - 1} expected #{OP_ARGN[op]}" if
|
572
|
+
OP_ARGN[op] && OP_ARGN[op] != expr.length - 1
|
573
|
+
|
574
|
+
# rept operator needs min and max
|
575
|
+
if op == :alt
|
576
|
+
raise SyntaxError, "alt operation must have at least one operand, had #{expr.length - 1}" unless expr.length > 1
|
577
|
+
elsif op == :rept
|
578
|
+
raise SyntaxError, "rept operation must an non-negative integer minimum, was #{expr[1]}" unless
|
579
|
+
expr[1].is_a?(Integer) && expr[1] >= 0
|
580
|
+
raise SyntaxError, "rept operation must an non-negative integer maximum or '*', was #{expr[2]}" unless
|
581
|
+
expr[2] == '*' || expr[2].is_a?(Integer) && expr[2] >= 0
|
582
|
+
end
|
583
|
+
|
584
|
+
case op
|
585
|
+
when :hex
|
586
|
+
raise SyntaxError, "Hex operand must be of form '#xN+': #{sym}" unless expr.last.match?(/^#x\h+$/)
|
587
|
+
when :range
|
588
|
+
str = expr.last.dup
|
589
|
+
str = str[1..-1] if str.start_with?('^')
|
590
|
+
str = str[0..-2] if str.end_with?('-') # Allowed at end of range
|
591
|
+
scanner = StringScanner.new(str)
|
592
|
+
hex = rchar = in_range = false
|
593
|
+
while !scanner.eos?
|
594
|
+
begin
|
595
|
+
if scanner.scan(Terminals::HEX)
|
596
|
+
raise SyntaxError if in_range && rchar
|
597
|
+
rchar = in_range = false
|
598
|
+
hex = true
|
599
|
+
elsif scanner.scan(Terminals::R_CHAR)
|
600
|
+
raise SyntaxError if in_range && hex
|
601
|
+
hex = in_range = false
|
602
|
+
rchar = true
|
603
|
+
else
|
604
|
+
raise(SyntaxError, "Range contains illegal components at offset #{scanner.pos}: was #{expr.last}")
|
605
|
+
end
|
606
|
+
|
607
|
+
if scanner.scan(/\-/)
|
608
|
+
raise SyntaxError if in_range
|
609
|
+
in_range = true
|
610
|
+
end
|
611
|
+
rescue SyntaxError
|
612
|
+
raise(SyntaxError, "Range contains illegal components at offset #{scanner.pos}: was #{expr.last}")
|
613
|
+
end
|
614
|
+
end
|
615
|
+
else
|
616
|
+
([:alt, :diff].include?(expr.first) ? expr[1..-1] : expr[1,1]).each do |sym|
|
617
|
+
case sym
|
618
|
+
when Symbol
|
619
|
+
r = ast.detect {|r| r.sym == sym}
|
620
|
+
raise SyntaxError, "No rule found for #{sym}" unless r
|
621
|
+
when Array
|
622
|
+
validate!(ast, sym)
|
623
|
+
when String
|
624
|
+
raise SyntaxError, "String must be of the form CHAR*" unless sym.match?(/^#{Terminals::CHAR}*$/)
|
625
|
+
end
|
626
|
+
end
|
627
|
+
end
|
628
|
+
end
|
629
|
+
|
630
|
+
##
|
631
|
+
# Validate the rule, with respect to an AST.
|
632
|
+
#
|
633
|
+
# Uses `#validate!` and catches `RangeError`
|
634
|
+
#
|
635
|
+
# @param [Array<Rule>] ast
|
636
|
+
# The set of rules, used to turn symbols into rules
|
637
|
+
# @return [Boolean]
|
638
|
+
def valid?(ast)
|
639
|
+
validate!(ast)
|
640
|
+
true
|
641
|
+
rescue SyntaxError
|
642
|
+
false
|
643
|
+
end
|
644
|
+
|
352
645
|
# Do the firsts of this rule include the empty string?
|
353
646
|
#
|
354
647
|
# @return [Boolean]
|
@@ -381,79 +674,6 @@ module EBNF
|
|
381
674
|
terminals.length
|
382
675
|
end
|
383
676
|
|
384
|
-
# Is this a terminal?
|
385
|
-
#
|
386
|
-
# @return [Boolean]
|
387
|
-
def terminal?
|
388
|
-
kind == :terminal
|
389
|
-
end
|
390
|
-
|
391
|
-
# Is this a pass?
|
392
|
-
# @return [Boolean]
|
393
|
-
def pass?
|
394
|
-
kind == :pass
|
395
|
-
end
|
396
|
-
|
397
|
-
# Is this a rule?
|
398
|
-
# @return [Boolean]
|
399
|
-
def rule?
|
400
|
-
kind == :rule
|
401
|
-
end
|
402
|
-
|
403
|
-
# Is this rule of the form (alt ...)?
|
404
|
-
def alt?
|
405
|
-
expr.is_a?(Array) && expr.first == :alt
|
406
|
-
end
|
407
|
-
|
408
|
-
# Is this rule of the form (seq ...)?
|
409
|
-
def seq?
|
410
|
-
expr.is_a?(Array) && expr.first == :seq
|
411
|
-
end
|
412
|
-
|
413
|
-
# Is this rule of the form (alt ...)?
|
414
|
-
def alt?
|
415
|
-
expr.is_a?(Array) && expr.first == :alt
|
416
|
-
end
|
417
|
-
|
418
|
-
def inspect
|
419
|
-
"#<EBNF::Rule:#{object_id} " +
|
420
|
-
{sym: sym, id: id, kind: kind, expr: expr}.inspect +
|
421
|
-
">"
|
422
|
-
end
|
423
|
-
|
424
|
-
# Two rules are equal if they have the same {#sym}, {#kind} and {#expr}.
|
425
|
-
#
|
426
|
-
# @param [Rule] other
|
427
|
-
# @return [Boolean]
|
428
|
-
def ==(other)
|
429
|
-
sym == other.sym &&
|
430
|
-
kind == other.kind &&
|
431
|
-
expr == other.expr
|
432
|
-
end
|
433
|
-
|
434
|
-
# Two rules are equivalent if they have the same {#expr}.
|
435
|
-
#
|
436
|
-
# @param [Rule] other
|
437
|
-
# @return [Boolean]
|
438
|
-
def equivalent?(other)
|
439
|
-
expr == other.expr
|
440
|
-
end
|
441
|
-
|
442
|
-
# Rules compare using their ids
|
443
|
-
def <=>(other)
|
444
|
-
if id.to_i == other.id.to_i
|
445
|
-
id.to_s <=> other.id.to_s
|
446
|
-
else
|
447
|
-
id.to_i <=> other.id.to_i
|
448
|
-
end
|
449
|
-
end
|
450
|
-
|
451
|
-
##
|
452
|
-
# Utility function to translate code points of the form '#xN' into ruby unicode characters
|
453
|
-
def translate_codepoints(str)
|
454
|
-
str.gsub(/#x\h+/) {|c| c[2..-1].scanf("%x").first.chr(Encoding::UTF_8)}
|
455
|
-
end
|
456
|
-
|
457
677
|
private
|
458
678
|
def ttl_expr(expr, pfx, depth, is_obj = true)
|
459
679
|
indent = ' ' * depth
|
@@ -469,17 +689,28 @@ module EBNF
|
|
469
689
|
|
470
690
|
case op
|
471
691
|
when :seq, :alt, :diff
|
692
|
+
# Multiple operands
|
472
693
|
statements << %{#{indent}#{bra}#{pfx}:#{op} (}
|
473
694
|
expr.each {|a| statements += ttl_expr(a, pfx, depth + 1)}
|
474
695
|
statements << %{#{indent} )#{ket}}
|
475
|
-
when :opt, :plus, :star
|
696
|
+
when :opt, :plus, :star, :not
|
697
|
+
# Single operand
|
476
698
|
statements << %{#{indent}#{bra}#{pfx}:#{op} }
|
477
699
|
statements += ttl_expr(expr.first, pfx, depth + 1)
|
478
700
|
statements << %{#{indent} #{ket}} unless ket.empty?
|
701
|
+
when :rept
|
702
|
+
# Three operands (min, max and expr)
|
703
|
+
statements << %{ #{indent}#{pfx}:min #{expr[0].inspect};}
|
704
|
+
statements << %{ #{indent}#{pfx}:max #{expr[1].inspect};}
|
705
|
+
statements << %{#{indent}#{bra}#{pfx}:#{op} }
|
706
|
+
statements += ttl_expr(expr.last, pfx, depth + 1)
|
707
|
+
statements << %{#{indent} #{ket}} unless ket.empty?
|
479
708
|
when :_empty, :_eps
|
480
709
|
statements << %{#{indent}"g:#{op.to_s[1..-1]}"}
|
481
710
|
when :"'"
|
482
711
|
statements << %{#{indent}"#{esc(expr)}"}
|
712
|
+
when :istr
|
713
|
+
statements << %{#{indent}#{bra} re:matches #{expr.first.inspect} #{ket}}
|
483
714
|
when :range
|
484
715
|
statements << %{#{indent}#{bra} re:matches #{cclass(expr.first).inspect} #{ket}}
|
485
716
|
when :hex
|
@@ -535,7 +766,7 @@ module EBNF
|
|
535
766
|
def make_sym_id(variation = nil)
|
536
767
|
@id_seq ||= 0
|
537
768
|
@id_seq += 1
|
538
|
-
["_#{@sym}_#{@id_seq}#{variation}".to_sym, "#{@id}.#{@id_seq}#{variation}"]
|
769
|
+
["_#{@sym}_#{@id_seq}#{variation}".to_sym, ("#{@id}.#{@id_seq}#{variation}" if @id)]
|
539
770
|
end
|
540
771
|
end
|
541
772
|
end
|