ebnf 2.0.0 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +81 -36
- data/VERSION +1 -1
- data/bin/ebnf +34 -18
- data/etc/abnf-core.ebnf +52 -0
- data/etc/abnf.abnf +121 -0
- data/etc/abnf.ebnf +124 -0
- data/etc/abnf.sxp +45 -0
- data/etc/ebnf.ebnf +19 -25
- data/etc/ebnf.html +251 -206
- data/etc/ebnf.ll1.rb +27 -103
- data/etc/ebnf.ll1.sxp +105 -102
- data/etc/ebnf.peg.rb +54 -62
- data/etc/ebnf.peg.sxp +53 -62
- data/etc/ebnf.sxp +22 -19
- data/etc/iso-ebnf.ebnf +140 -0
- data/etc/iso-ebnf.isoebnf +138 -0
- data/etc/iso-ebnf.sxp +65 -0
- data/etc/sparql.ebnf +4 -4
- data/etc/sparql.sxp +8 -7
- data/etc/turtle.ebnf +3 -3
- data/etc/turtle.sxp +22 -20
- data/lib/ebnf.rb +3 -0
- data/lib/ebnf/abnf.rb +301 -0
- data/lib/ebnf/abnf/core.rb +23 -0
- data/lib/ebnf/abnf/meta.rb +111 -0
- data/lib/ebnf/base.rb +87 -44
- data/lib/ebnf/ebnf/meta.rb +90 -0
- data/lib/ebnf/isoebnf.rb +229 -0
- data/lib/ebnf/isoebnf/meta.rb +75 -0
- data/lib/ebnf/ll1.rb +4 -7
- data/lib/ebnf/ll1/parser.rb +12 -4
- data/lib/ebnf/native.rb +320 -0
- data/lib/ebnf/parser.rb +285 -302
- data/lib/ebnf/peg.rb +1 -1
- data/lib/ebnf/peg/parser.rb +24 -5
- data/lib/ebnf/peg/rule.rb +77 -58
- data/lib/ebnf/rule.rb +352 -121
- data/lib/ebnf/terminals.rb +13 -10
- data/lib/ebnf/writer.rb +550 -78
- metadata +48 -6
data/lib/ebnf/peg.rb
CHANGED
@@ -31,7 +31,7 @@ module EBNF
|
|
31
31
|
def to_ruby_peg(output, **options)
|
32
32
|
output.puts " RULES = ["
|
33
33
|
ast.each do |rule|
|
34
|
-
output.puts " " + rule.to_ruby + '.extend(EBNF::PEG::Rule),'
|
34
|
+
output.puts " " + rule.to_ruby + (rule.is_a?(EBNF::PEG::Rule) ? '.extend(EBNF::PEG::Rule)' : '') + ','
|
35
35
|
end
|
36
36
|
output.puts " ]"
|
37
37
|
end
|
data/lib/ebnf/peg/parser.rb
CHANGED
@@ -51,6 +51,7 @@ module EBNF::PEG
|
|
51
51
|
# DSL for creating terminals and productions
|
52
52
|
module ClassMethods
|
53
53
|
def start_handlers; (@start_handlers ||= {}); end
|
54
|
+
def start_options; (@start_hoptions ||= {}); end
|
54
55
|
def production_handlers; (@production_handlers ||= {}); end
|
55
56
|
def terminal_handlers; (@terminal_handlers ||= {}); end
|
56
57
|
def terminal_regexps; (@terminal_regexps ||= {}); end
|
@@ -97,6 +98,10 @@ module EBNF::PEG
|
|
97
98
|
#
|
98
99
|
# @param [Symbol] term
|
99
100
|
# The rule name
|
101
|
+
# @param [Hash{Symbol => Object}] options
|
102
|
+
# Options which are returned from {Parser#onStart}.
|
103
|
+
# @option options [Boolean] :as_hash (false)
|
104
|
+
# If the production is a `seq`, causes the value to be represented as a single hash, rather than an array of individual hashes for each sub-production. Note that this is not always advisable due to the possibility of repeated productions within the sequence.
|
100
105
|
# @yield [data, block]
|
101
106
|
# @yieldparam [Hash] data
|
102
107
|
# A Hash defined for the current production, during :start
|
@@ -106,8 +111,9 @@ module EBNF::PEG
|
|
106
111
|
# Block passed to initialization for yielding to calling parser.
|
107
112
|
# Should conform to the yield specs for #initialize
|
108
113
|
# Yield to generate a triple
|
109
|
-
def start_production(term, &block)
|
114
|
+
def start_production(term, **options, &block)
|
110
115
|
start_handlers[term] = block
|
116
|
+
start_options[term] = options.freeze
|
111
117
|
end
|
112
118
|
|
113
119
|
##
|
@@ -204,6 +210,7 @@ module EBNF::PEG
|
|
204
210
|
@whitespace = case options[:whitespace]
|
205
211
|
when Regexp then options[:whitespace]
|
206
212
|
when Symbol then @rules[options[:whitespace]]
|
213
|
+
else options[:whitespace]
|
207
214
|
end ||
|
208
215
|
@rules.values.detect(&:pass?) ||
|
209
216
|
/(?:\s|(?:#[^x][^\n\r]*))+/m.freeze
|
@@ -329,19 +336,30 @@ module EBNF::PEG
|
|
329
336
|
# @option options [Integer] :depth
|
330
337
|
# Recursion depth for indenting output
|
331
338
|
# @yieldreturn [String] additional string appended to `message`.
|
332
|
-
def debug(*args)
|
339
|
+
def debug(*args, &block)
|
333
340
|
return unless @options[:logger]
|
334
341
|
options = args.last.is_a?(Hash) ? args.pop : {}
|
335
342
|
lineno = options[:lineno] || (scanner.lineno if scanner)
|
336
343
|
level = options.fetch(:level, 0)
|
337
|
-
|
338
344
|
depth = options[:depth] || self.depth
|
339
|
-
|
340
|
-
|
345
|
+
|
346
|
+
if self.respond_to?(:log_debug)
|
347
|
+
level = [:debug, :info, :warn, :error, :fatal][level]
|
348
|
+
log_debug(*args, **options.merge(level: level, lineno: lineno, depth: depth), &block)
|
349
|
+
elsif @options[:logger].respond_to?(:add)
|
350
|
+
args << yield if block_given?
|
351
|
+
@options[:logger].add(level, "[#{lineno}]" + (" " * depth) + args.join(" "))
|
352
|
+
elsif @options[:logger].respond_to?(:<<)
|
353
|
+
args << yield if block_given?
|
354
|
+
@options[:logger] << "[#{lineno}]" + (" " * depth) + args.join(" ")
|
355
|
+
end
|
341
356
|
end
|
342
357
|
|
343
358
|
# Start for production
|
344
359
|
# Adds data avoiable during the processing of the production
|
360
|
+
#
|
361
|
+
# @return [Hash] composed of production options. Currently only `as_hash` is supported.
|
362
|
+
# @see ClassMethods#start_production
|
345
363
|
def onStart(prod)
|
346
364
|
handler = self.class.start_handlers[prod]
|
347
365
|
@productions << prod
|
@@ -367,6 +385,7 @@ module EBNF::PEG
|
|
367
385
|
# explicit start handler
|
368
386
|
@prod_data << {}
|
369
387
|
end
|
388
|
+
return self.class.start_options.fetch(prod, {}) # any options on this production
|
370
389
|
end
|
371
390
|
|
372
391
|
# Finish of production
|
data/lib/ebnf/peg/rule.rb
CHANGED
@@ -18,14 +18,15 @@ module EBNF::PEG
|
|
18
18
|
#
|
19
19
|
# If matched, the input position is updated and the results returned in a Hash.
|
20
20
|
#
|
21
|
-
# * `alt`: returns the value of the matched production or `:unmatched
|
22
|
-
# * `diff`: returns the
|
21
|
+
# * `alt`: returns the value of the matched production or `:unmatched`.
|
22
|
+
# * `diff`: returns the value matched, or `:unmatched`.
|
23
23
|
# * `hex`: returns a string composed of the matched hex character, or `:unmatched`.
|
24
|
-
# * `opt`: returns the matched
|
25
|
-
# * `plus`: returns an array of the
|
26
|
-
# * `range`: returns a string composed of the
|
27
|
-
# * `seq`: returns an array composed of single-entry hashes for each matched production indexed by the production name, or `:unmatched` if any production fails to match. For Terminals, returns a string created by concatenating these values.
|
28
|
-
# * `star`: returns an array of the
|
24
|
+
# * `opt`: returns the value matched, or `nil` if unmatched.
|
25
|
+
# * `plus`: returns an array of the values matched for the specified production, or `:unmatched`, if none are matched. For Terminals, these are concatenated into a single string.
|
26
|
+
# * `range`: returns a string composed of the values matched, or `:unmatched`, if less than `min` are matched.
|
27
|
+
# * `seq`: returns an array composed of single-entry hashes for each matched production indexed by the production name, or `:unmatched` if any production fails to match. For Terminals, returns a string created by concatenating these values. Via option in a `production` or definition, the result can be a single hash with values for each matched production; note that this is not always possible due to the possibility of repeated productions within the sequence.
|
28
|
+
# * `star`: returns an array of the values matched for the specified production. For Terminals, these are concatenated into a single string.
|
29
|
+
#
|
29
30
|
# @param [Scanner] input
|
30
31
|
# @return [Hash{Symbol => Object}, :unmatched] A hash with keys for matched component of the expression. Returns :unmatched if the input does not match the production.
|
31
32
|
def parse(input)
|
@@ -45,7 +46,7 @@ module EBNF::PEG
|
|
45
46
|
# otherwise,
|
46
47
|
if regexp = parser.find_terminal_regexp(sym)
|
47
48
|
matched = input.scan(regexp)
|
48
|
-
result =
|
49
|
+
result = parser.onTerminal(sym, (matched ? matched : :unmatched))
|
49
50
|
# Update furthest failure for strings and terminals
|
50
51
|
parser.update_furthest_failure(input.pos, input.lineno, sym) if result == :unmatched
|
51
52
|
parser.packrat[sym][pos] = {
|
@@ -58,7 +59,7 @@ module EBNF::PEG
|
|
58
59
|
else
|
59
60
|
eat_whitespace(input)
|
60
61
|
end
|
61
|
-
parser.onStart(sym)
|
62
|
+
start_options = parser.onStart(sym)
|
62
63
|
|
63
64
|
result = case expr.first
|
64
65
|
when :alt
|
@@ -84,7 +85,8 @@ module EBNF::PEG
|
|
84
85
|
alt
|
85
86
|
when :diff
|
86
87
|
# matches any string that matches A but does not match B.
|
87
|
-
#
|
88
|
+
# (Note, this is only used for Terminal rules, non-terminals will use :not)
|
89
|
+
raise "Diff used on non-terminal #{prod}" unless terminal?
|
88
90
|
re1, re2 = Regexp.new(translate_codepoints(expr[1])), Regexp.new(translate_codepoints(expr[2]))
|
89
91
|
matched = input.scan(re1)
|
90
92
|
if !matched || re2.match?(matched)
|
@@ -101,9 +103,9 @@ module EBNF::PEG
|
|
101
103
|
parser.update_furthest_failure(input.pos, input.lineno, expr.last)
|
102
104
|
:unmatched
|
103
105
|
end
|
104
|
-
when :
|
105
|
-
#
|
106
|
-
|
106
|
+
when :not
|
107
|
+
# matches any string that does not match B.
|
108
|
+
res = case prod = expr[1]
|
107
109
|
when Symbol
|
108
110
|
rule = parser.find_rule(prod)
|
109
111
|
raise "No rule found for #{prod}" unless rule
|
@@ -111,35 +113,29 @@ module EBNF::PEG
|
|
111
113
|
when String
|
112
114
|
input.scan(Regexp.new(Regexp.quote(prod))) || :unmatched
|
113
115
|
end
|
114
|
-
if
|
116
|
+
if res != :unmatched
|
115
117
|
# Update furthest failure for terminals
|
116
|
-
parser.update_furthest_failure(input.pos, input.lineno,
|
117
|
-
|
118
|
+
parser.update_furthest_failure(input.pos, input.lineno, sym) if terminal?
|
119
|
+
:unmatched
|
118
120
|
else
|
119
|
-
|
121
|
+
nil
|
120
122
|
end
|
123
|
+
when :opt
|
124
|
+
# Result is the matched value or nil
|
125
|
+
opt = rept(input, 0, 1, expr[1])
|
126
|
+
|
127
|
+
# Update furthest failure for strings and terminals
|
128
|
+
parser.update_furthest_failure(input.pos, input.lineno, expr[1]) if terminal?
|
129
|
+
opt.first
|
121
130
|
when :plus
|
122
131
|
# Result is an array of all expressions while they match,
|
123
132
|
# at least one must match
|
124
|
-
|
125
|
-
|
126
|
-
when Symbol
|
127
|
-
rule = parser.find_rule(prod)
|
128
|
-
raise "No rule found for #{prod}" unless rule
|
129
|
-
while (res = rule.parse(input)) != :unmatched
|
130
|
-
eat_whitespace(input)
|
131
|
-
plus << res
|
132
|
-
end
|
133
|
-
when String
|
134
|
-
while res = input.scan(Regexp.new(Regexp.quote(prod)))
|
135
|
-
eat_whitespace(input)
|
136
|
-
plus << res
|
137
|
-
end
|
138
|
-
end
|
133
|
+
plus = rept(input, 1, '*', expr[1])
|
134
|
+
|
139
135
|
# Update furthest failure for strings and terminals
|
140
|
-
parser.update_furthest_failure(input.pos, input.lineno,
|
141
|
-
plus.
|
142
|
-
when :range
|
136
|
+
parser.update_furthest_failure(input.pos, input.lineno, expr[1]) if terminal?
|
137
|
+
plus.is_a?(Array) && terminal? ? plus.join("") : plus
|
138
|
+
when :range, :istr
|
143
139
|
# Matches the specified character range
|
144
140
|
input.scan(to_regexp) || begin
|
145
141
|
# Update furthest failure for strings and terminals
|
@@ -149,7 +145,7 @@ module EBNF::PEG
|
|
149
145
|
when :seq
|
150
146
|
# Evaluate each expression into an array of hashes where each hash contains a key from the associated production and the value is the parsed value of that production. Returns :unmatched if the input does not match the production. Value ordering is ensured by native Hash ordering.
|
151
147
|
seq = expr[1..-1].each_with_object([]) do |prod, accumulator|
|
152
|
-
eat_whitespace(input) unless accumulator.empty?
|
148
|
+
eat_whitespace(input) unless accumulator.empty? || terminal?
|
153
149
|
res = case prod
|
154
150
|
when Symbol
|
155
151
|
rule = parser.find_rule(prod)
|
@@ -165,32 +161,23 @@ module EBNF::PEG
|
|
165
161
|
end
|
166
162
|
accumulator << {prod.to_sym => res}
|
167
163
|
end
|
168
|
-
seq == :unmatched
|
169
|
-
:unmatched
|
170
|
-
|
171
|
-
|
172
|
-
|
164
|
+
if seq == :unmatched
|
165
|
+
:unmatched
|
166
|
+
elsif terminal?
|
167
|
+
seq.map(&:values).compact.join("") # Concat values for terminal production
|
168
|
+
elsif start_options[:as_hash]
|
169
|
+
seq.inject {|memo, h| memo.merge(h)}
|
170
|
+
else
|
171
|
+
seq
|
172
|
+
end
|
173
173
|
when :star
|
174
174
|
# Result is an array of all expressions while they match,
|
175
175
|
# an empty array of none match
|
176
|
-
|
177
|
-
|
178
|
-
when Symbol
|
179
|
-
rule = parser.find_rule(prod)
|
180
|
-
raise "No rule found for #{prod}" unless rule
|
181
|
-
while (res = rule.parse(input)) != :unmatched
|
182
|
-
eat_whitespace(input)
|
183
|
-
star << res
|
184
|
-
end
|
185
|
-
when String
|
186
|
-
while res = input.scan(Regexp.new(Regexp.quote(prod)))
|
187
|
-
eat_whitespace(input)
|
188
|
-
star << res
|
189
|
-
end
|
190
|
-
end
|
176
|
+
star = rept(input, 0, '*', expr[1])
|
177
|
+
|
191
178
|
# Update furthest failure for strings and terminals
|
192
|
-
parser.update_furthest_failure(input.pos, input.lineno,
|
193
|
-
star.
|
179
|
+
parser.update_furthest_failure(input.pos, input.lineno, expr[1]) if terminal?
|
180
|
+
star.is_a?(Array) && terminal? ? star.join("") : star
|
194
181
|
else
|
195
182
|
raise "attempt to parse unknown rule type: #{expr.first}"
|
196
183
|
end
|
@@ -208,6 +195,38 @@ module EBNF::PEG
|
|
208
195
|
return parser.packrat[sym][pos][:result]
|
209
196
|
end
|
210
197
|
|
198
|
+
##
|
199
|
+
# Repitition, 0-1, 0-n, 1-n, ...
|
200
|
+
#
|
201
|
+
# Note, nil results are removed from the result, but count towards min/max calculations
|
202
|
+
#
|
203
|
+
# @param [Scanner] input
|
204
|
+
# @param [Integer] min
|
205
|
+
# @param [Integer] max
|
206
|
+
# If it is an integer, it stops matching after max entries.
|
207
|
+
# @param [Symbol, String] prod
|
208
|
+
# @return [:unmatched, Array]
|
209
|
+
def rept(input, min, max, prod)
|
210
|
+
result = []
|
211
|
+
|
212
|
+
case prod
|
213
|
+
when Symbol
|
214
|
+
rule = parser.find_rule(prod)
|
215
|
+
raise "No rule found for #{prod}" unless rule
|
216
|
+
while (max == '*' || result.length < max) && (res = rule.parse(input)) != :unmatched
|
217
|
+
eat_whitespace(input) unless terminal?
|
218
|
+
result << res
|
219
|
+
end
|
220
|
+
when String
|
221
|
+
while (res = input.scan(Regexp.new(Regexp.quote(prod)))) && (max == '*' || result.length < max)
|
222
|
+
eat_whitespace(input) unless terminal?
|
223
|
+
result << res
|
224
|
+
end
|
225
|
+
end
|
226
|
+
|
227
|
+
result.length < min ? :unmatched : result.compact
|
228
|
+
end
|
229
|
+
|
211
230
|
##
|
212
231
|
# Eat whitespace between non-terminal rules
|
213
232
|
def eat_whitespace(input)
|
data/lib/ebnf/rule.rb
CHANGED
@@ -1,17 +1,33 @@
|
|
1
1
|
require 'scanf'
|
2
|
+
require 'strscan'
|
2
3
|
|
3
4
|
module EBNF
|
4
5
|
# Represent individual parsed rules
|
5
6
|
class Rule
|
6
7
|
# Operations which are flattened to seprate rules in to_bnf.
|
7
8
|
BNF_OPS = %w{
|
8
|
-
alt opt plus seq star
|
9
|
+
alt diff not opt plus rept seq star
|
9
10
|
}.map(&:to_sym).freeze
|
10
11
|
|
11
12
|
TERM_OPS = %w{
|
12
|
-
|
13
|
+
hex istr range
|
13
14
|
}.map(&:to_sym).freeze
|
14
15
|
|
16
|
+
# The number of arguments expected per operator. `nil` for unspecified
|
17
|
+
OP_ARGN = {
|
18
|
+
alt: nil,
|
19
|
+
diff: 2,
|
20
|
+
hex: 1,
|
21
|
+
istr: 1,
|
22
|
+
not: 1,
|
23
|
+
opt: 1,
|
24
|
+
plus: 1,
|
25
|
+
range: 1,
|
26
|
+
rept: 3,
|
27
|
+
seq: nil,
|
28
|
+
star: 1
|
29
|
+
}
|
30
|
+
|
15
31
|
# Symbol of rule
|
16
32
|
#
|
17
33
|
# @return [Symbol]
|
@@ -28,7 +44,7 @@ module EBNF
|
|
28
44
|
|
29
45
|
# Kind of rule
|
30
46
|
#
|
31
|
-
# @return [:rule, :terminal, or :pass]
|
47
|
+
# @return [:rule, :terminal, :terminals, or :pass]
|
32
48
|
attr_accessor :kind
|
33
49
|
|
34
50
|
# Rule expression
|
@@ -59,19 +75,38 @@ module EBNF
|
|
59
75
|
# Determines preparation and cleanup rules for reconstituting EBNF ? * + from BNF
|
60
76
|
attr_accessor :cleanup
|
61
77
|
|
62
|
-
# @param [Symbol] sym
|
63
|
-
# @
|
78
|
+
# @param [Symbol, nil] sym
|
79
|
+
# `nil` is allowed only for @pass or @terminals
|
80
|
+
# @param [Integer, nil] id
|
64
81
|
# @param [Array] expr
|
65
|
-
#
|
82
|
+
# The expression is an internal-representation of an S-Expression with one of the following oparators:
|
83
|
+
#
|
84
|
+
# * `alt` – A list of alternative rules, which are attempted in order. It terminates with the first matching rule, or is terminated as unmatched, if no such rule is found.
|
85
|
+
# * `diff` – matches any string that matches `A` but does not match `B`.
|
86
|
+
# * `hex` – A single character represented using the hexadecimal notation `#xnn`.
|
87
|
+
# * `istr` – A string which matches in a case-insensitive manner, so that `(istr "fOo")` will match either of the strings `"foo"`, `"FOO"` or any other combination.
|
88
|
+
# * `opt` – An optional rule or terminal. It either results in the matching rule or returns `nil`.
|
89
|
+
# * `plus` – A sequence of one or more of the matching rule. If there is no such rule, it is terminated as unmatched; otherwise, the result is an array containing all matched input.
|
90
|
+
# * `range` – A range of characters, possibly repeated, of the form `(range "a-z")`. May also use hexadecimal notation.
|
91
|
+
# * `rept m n` – A sequence of at lest `m` and at most `n` of the matching rule. It will always return an array.
|
92
|
+
# * `seq` – A sequence of rules or terminals. If any (other than `opt` or `star`) to not parse, the rule is terminated as unmatched.
|
93
|
+
# * `star` – A sequence of zero or more of the matching rule. It will always return an array.
|
94
|
+
# @param [:rule, :terminal, :terminals, :pass] kind (nil)
|
66
95
|
# @param [String] ebnf (nil)
|
96
|
+
# When parsing, records the EBNF string used to create the rule.
|
67
97
|
# @param [Array] first (nil)
|
98
|
+
# Recorded set of terminals that can proceed this rule (LL(1))
|
68
99
|
# @param [Array] follow (nil)
|
100
|
+
# Recorded set of terminals that can follow this rule (LL(1))
|
69
101
|
# @param [Boolean] start (nil)
|
102
|
+
# Is this the starting rule for the grammar?
|
70
103
|
# @param [Rule] top_rule (nil)
|
104
|
+
# The top-most rule. All expressed rules are top-rules, derived rules have the original rule as their top-rule.
|
71
105
|
# @param [Boolean] cleanup (nil)
|
106
|
+
# Records information useful for cleaning up converted :plus, and :star expansions (LL(1)).
|
72
107
|
def initialize(sym, id, expr, kind: nil, ebnf: nil, first: nil, follow: nil, start: nil, top_rule: nil, cleanup: nil)
|
73
108
|
@sym, @id = sym, id
|
74
|
-
@expr = expr.is_a?(Array) ? expr : [:seq, expr]
|
109
|
+
@expr = expr.is_a?(Array) ? expr : [:seq, expr].compact
|
75
110
|
@ebnf, @kind, @first, @follow, @start, @cleanup, @top_rule = ebnf, kind, first, follow, start, cleanup, top_rule
|
76
111
|
@top_rule ||= self
|
77
112
|
@kind ||= case
|
@@ -79,21 +114,53 @@ module EBNF
|
|
79
114
|
when !BNF_OPS.include?(@expr.first) then :terminal
|
80
115
|
else :rule
|
81
116
|
end
|
117
|
+
|
118
|
+
# Allow @pass and @terminals to not be named
|
119
|
+
@sym ||= :_pass if @kind == :pass
|
120
|
+
@sym ||= :_terminals if @kind == :terminals
|
121
|
+
|
122
|
+
raise ArgumentError, "Rule sym must be a symbol, was #{@sym.inspect}" unless @sym.is_a?(Symbol)
|
123
|
+
raise ArgumentError, "Rule id must be a string or nil, was #{@id.inspect}" unless (@id || "").is_a?(String)
|
124
|
+
raise ArgumentError, "Rule kind must be one of :rule, :terminal, :terminals, or :pass, was #{@kind.inspect}" unless
|
125
|
+
@kind.is_a?(Symbol) && %w(rule terminal terminals pass).map(&:to_sym).include?(@kind)
|
126
|
+
|
127
|
+
case @expr.first
|
128
|
+
when :alt
|
129
|
+
raise ArgumentError, "#{@expr.first} operation must have at least one operand, had #{@expr.length - 1}" unless @expr.length > 1
|
130
|
+
when :diff
|
131
|
+
raise ArgumentError, "#{@expr.first} operation must have exactly two operands, had #{@expr.length - 1}" unless @expr.length == 3
|
132
|
+
when :hex, :istr, :not, :opt, :plus, :range, :star
|
133
|
+
raise ArgumentError, "#{@expr.first} operation must have exactly one operand, had #{@expr.length - 1}" unless @expr.length == 2
|
134
|
+
when :rept
|
135
|
+
raise ArgumentError, "#{@expr.first} operation must have exactly three, had #{@expr.length - 1}" unless @expr.length == 4
|
136
|
+
raise ArgumentError, "#{@expr.first} operation must an non-negative integer minimum, was #{@expr[1]}" unless
|
137
|
+
@expr[1].is_a?(Integer) && @expr[1] >= 0
|
138
|
+
raise ArgumentError, "#{@expr.first} operation must an non-negative integer maximum or '*', was #{@expr[2]}" unless
|
139
|
+
@expr[2] == '*' || @expr[2].is_a?(Integer) && @expr[2] >= 0
|
140
|
+
when :seq
|
141
|
+
# It's legal to have a zero-length sequence
|
142
|
+
else
|
143
|
+
raise ArgumentError, "Rule expression must be an array using a known operator, was #{@expr.first}"
|
144
|
+
end
|
82
145
|
end
|
83
146
|
|
84
147
|
##
|
85
148
|
# Return a rule from its SXP representation:
|
86
149
|
#
|
87
150
|
# @example inputs
|
88
|
-
# (pass (plus (range "#x20\\t\\r\\n")))
|
151
|
+
# (pass _pass (plus (range "#x20\\t\\r\\n")))
|
89
152
|
# (rule ebnf "1" (star (alt declaration rule)))
|
90
|
-
# (terminal
|
153
|
+
# (terminal R_CHAR "19" (diff CHAR (alt "]" "-")))
|
91
154
|
#
|
92
155
|
# Also may have `(first ...)`, `(follow ...)`, or `(start #t)`.
|
93
156
|
#
|
94
|
-
# @param [Array] sxp
|
157
|
+
# @param [String, Array] sxp
|
95
158
|
# @return [Rule]
|
96
159
|
def self.from_sxp(sxp)
|
160
|
+
if sxp.is_a?(String)
|
161
|
+
require 'sxp' unless defined?(SXP)
|
162
|
+
sxp = SXP.parse(sxp)
|
163
|
+
end
|
97
164
|
expr = sxp.detect {|e| e.is_a?(Array) && ![:first, :follow, :start].include?(e.first.to_sym)}
|
98
165
|
first = sxp.detect {|e| e.is_a?(Array) && e.first.to_sym == :first}
|
99
166
|
first = first[1..-1] if first
|
@@ -115,11 +182,11 @@ module EBNF
|
|
115
182
|
# @param [Hash{Symbol => Symbol}] cleanup (nil)
|
116
183
|
# @param [Hash{Symbol => Object}] options
|
117
184
|
def build(expr, kind: nil, cleanup: nil, **options)
|
118
|
-
new_sym, new_id =
|
185
|
+
new_sym, new_id = @top_rule.send(:make_sym_id)
|
119
186
|
self.class.new(new_sym, new_id, expr,
|
120
187
|
kind: kind,
|
121
188
|
ebnf: @ebnf,
|
122
|
-
top_rule:
|
189
|
+
top_rule: @top_rule,
|
123
190
|
cleanup: cleanup,
|
124
191
|
**options)
|
125
192
|
end
|
@@ -152,15 +219,16 @@ module EBNF
|
|
152
219
|
# @return [String]
|
153
220
|
def to_ttl
|
154
221
|
@ebnf.debug("to_ttl") {inspect} if @ebnf
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
%{ rdfs:comment #{comment.inspect};}
|
163
|
-
|
222
|
+
statements = [%{:#{sym} rdfs:label "#{sym}";}]
|
223
|
+
if orig
|
224
|
+
comment = orig.to_s.strip.
|
225
|
+
gsub(/"""/, '\"\"\"').
|
226
|
+
gsub("\\", "\\\\").
|
227
|
+
sub(/^\"/, '\"').
|
228
|
+
sub(/\"$/m, '\"')
|
229
|
+
statements << %{ rdfs:comment #{comment.inspect};}
|
230
|
+
end
|
231
|
+
statements << %{ dc:identifier "#{id}";} if id
|
164
232
|
|
165
233
|
statements += ttl_expr(expr, terminal? ? "re" : "g", 1, false)
|
166
234
|
"\n" + statements.join("\n")
|
@@ -175,12 +243,13 @@ module EBNF
|
|
175
243
|
##
|
176
244
|
# Transform EBNF rule to BNF rules:
|
177
245
|
#
|
178
|
-
# * Transform (rule a "n" (op1 (op2))) into two rules:
|
179
|
-
#
|
180
|
-
#
|
181
|
-
#
|
182
|
-
# * Transform (rule a (
|
183
|
-
# * Transform (rule a (
|
246
|
+
# * Transform `(rule a "n" (op1 (op2)))` into two rules:
|
247
|
+
#
|
248
|
+
# (rule a "n" (op1 _a_1))
|
249
|
+
# (rule _a_1 "n.1" (op2))
|
250
|
+
# * Transform `(rule a (opt b))` into `(rule a (alt _empty b))`
|
251
|
+
# * Transform `(rule a (star b))` into `(rule a (alt _empty (seq b a)))`
|
252
|
+
# * Transform `(rule a (plus b))` into `(rule a (seq b (star b)`
|
184
253
|
#
|
185
254
|
# Transformation includes information used to re-construct non-transformed.
|
186
255
|
#
|
@@ -231,7 +300,7 @@ module EBNF
|
|
231
300
|
# Otherwise, no further transformation necessary
|
232
301
|
new_rules << self
|
233
302
|
elsif [:diff, :hex, :range].include?(expr.first)
|
234
|
-
# This rules are fine,
|
303
|
+
# This rules are fine, they just need to be terminals
|
235
304
|
raise "Encountered #{expr.first.inspect}, which is a #{self.kind}, not :terminal" unless self.terminal?
|
236
305
|
new_rules << self
|
237
306
|
else
|
@@ -245,9 +314,14 @@ module EBNF
|
|
245
314
|
##
|
246
315
|
# Transform EBNF rule for PEG:
|
247
316
|
#
|
248
|
-
# * Transform (rule a "n" (op1 ... (op2 y) ...z)) into two rules:
|
249
|
-
#
|
250
|
-
#
|
317
|
+
# * Transform `(rule a "n" (op1 ... (op2 y) ...z))` into two rules:
|
318
|
+
#
|
319
|
+
# (rule a "n" (op1 ... _a_1 ... z))
|
320
|
+
# (rule _a_1 "n.1" (op2 y))
|
321
|
+
# * Transform `(rule a "n" (diff op1 op2))` into two rules:
|
322
|
+
#
|
323
|
+
# (rule a "n" (seq _a_1 op1))
|
324
|
+
# (rule _a_1 "n.1" (not op1))
|
251
325
|
#
|
252
326
|
# @return [Array<Rule>]
|
253
327
|
def to_peg
|
@@ -268,8 +342,14 @@ module EBNF
|
|
268
342
|
|
269
343
|
# Return new rules after recursively applying #to_bnf
|
270
344
|
new_rules = new_rules.map {|r| r.to_peg}.flatten
|
271
|
-
elsif
|
272
|
-
|
345
|
+
elsif expr.first == :diff && !terminal?
|
346
|
+
this = dup
|
347
|
+
new_rule = build([:not, expr[2]])
|
348
|
+
this.expr = [:seq, new_rule.sym, expr[1]]
|
349
|
+
new_rules << this
|
350
|
+
new_rules << new_rule
|
351
|
+
elsif [:hex, :istr, :range].include?(expr.first)
|
352
|
+
# This rules are fine, they just need to be terminals
|
273
353
|
raise "Encountered #{expr.first.inspect}, which is a #{self.kind}, not :terminal" unless self.terminal?
|
274
354
|
new_rules << self
|
275
355
|
else
|
@@ -287,6 +367,8 @@ module EBNF
|
|
287
367
|
case expr.first
|
288
368
|
when :hex
|
289
369
|
Regexp.new(translate_codepoints(expr[1]))
|
370
|
+
when :istr
|
371
|
+
/#{expr.last}/ui
|
290
372
|
when :range
|
291
373
|
Regexp.new("[#{translate_codepoints(expr[1])}]")
|
292
374
|
else
|
@@ -294,45 +376,170 @@ module EBNF
|
|
294
376
|
end
|
295
377
|
end
|
296
378
|
|
297
|
-
#
|
298
|
-
#
|
379
|
+
# Is this a terminal?
|
380
|
+
#
|
381
|
+
# @return [Boolean]
|
382
|
+
def terminal?
|
383
|
+
kind == :terminal
|
384
|
+
end
|
385
|
+
|
386
|
+
# Is this a pass?
|
387
|
+
# @return [Boolean]
|
388
|
+
def pass?
|
389
|
+
kind == :pass
|
390
|
+
end
|
391
|
+
|
392
|
+
# Is this a rule?
|
393
|
+
# @return [Boolean]
|
394
|
+
def rule?
|
395
|
+
kind == :rule
|
396
|
+
end
|
397
|
+
|
398
|
+
# Is this rule of the form (alt ...)?
|
399
|
+
def alt?
|
400
|
+
expr.is_a?(Array) && expr.first == :alt
|
401
|
+
end
|
402
|
+
|
403
|
+
# Is this rule of the form (seq ...)?
|
404
|
+
def seq?
|
405
|
+
expr.is_a?(Array) && expr.first == :seq
|
406
|
+
end
|
407
|
+
|
408
|
+
def inspect
|
409
|
+
"#<EBNF::Rule:#{object_id} " +
|
410
|
+
{sym: sym, id: id, kind: kind, expr: expr}.inspect +
|
411
|
+
">"
|
412
|
+
end
|
413
|
+
|
414
|
+
# Two rules are equal if they have the same {#sym}, {#kind} and {#expr}.
|
415
|
+
#
|
416
|
+
# @param [Rule] other
|
417
|
+
# @return [Boolean]
|
418
|
+
def ==(other)
|
419
|
+
sym == other.sym &&
|
420
|
+
kind == other.kind &&
|
421
|
+
expr == other.expr
|
422
|
+
end
|
423
|
+
|
424
|
+
# Two rules are equivalent if they have the same {#expr}.
|
425
|
+
#
|
426
|
+
# @param [Rule] other
|
427
|
+
# @return [Boolean]
|
428
|
+
def eql?(other)
|
429
|
+
expr == other.expr
|
430
|
+
end
|
431
|
+
|
432
|
+
# Rules compare using their ids
|
433
|
+
def <=>(other)
|
434
|
+
if id && other.id
|
435
|
+
if id == other.id
|
436
|
+
id.to_s <=> other.id.to_s
|
437
|
+
else
|
438
|
+
id.to_f <=> other.id.to_f
|
439
|
+
end
|
440
|
+
else
|
441
|
+
sym.to_s <=> other.sym.to_s
|
442
|
+
end
|
443
|
+
end
|
444
|
+
|
445
|
+
##
|
446
|
+
# Utility function to translate code points of the form '#xN' into ruby unicode characters
|
447
|
+
def translate_codepoints(str)
|
448
|
+
str.gsub(/#x\h+/) {|c| c[2..-1].scanf("%x").first.chr(Encoding::UTF_8)}
|
449
|
+
end
|
450
|
+
|
451
|
+
# Return the non-terminals for this rule.
|
452
|
+
#
|
453
|
+
# * `alt` => this is every non-terminal.
|
454
|
+
# * `diff` => this is every non-terminal.
|
455
|
+
# * `hex` => nil
|
456
|
+
# * `istr` => nil
|
457
|
+
# * `not` => this is the last expression, if any.
|
458
|
+
# * `opt` => this is the last expression, if any.
|
459
|
+
# * `plus` => this is the last expression, if any.
|
460
|
+
# * `range` => nil
|
461
|
+
# * `rept` => this is the last expression, if any.
|
462
|
+
# * `seq` => this is the first expression in the sequence, if any.
|
463
|
+
# * `star` => this is the last expression, if any.
|
299
464
|
#
|
300
465
|
# @param [Array<Rule>] ast
|
301
466
|
# The set of rules, used to turn symbols into rules
|
467
|
+
# @param [Array<Symbol,String,Array>] expr (@expr)
|
468
|
+
# The expression to check, defaults to the rule expression.
|
469
|
+
# Typically, if the expression is recursive, the embedded expression is called recursively.
|
302
470
|
# @return [Array<Rule>]
|
303
|
-
|
304
|
-
|
471
|
+
# @note this is used for LL(1) tansformation, so rule types are limited
|
472
|
+
def non_terminals(ast, expr = @expr)
|
473
|
+
([:alt, :diff].include?(expr.first) ? expr[1..-1] : expr[1,1]).map do |sym|
|
305
474
|
case sym
|
306
475
|
when Symbol
|
307
476
|
r = ast.detect {|r| r.sym == sym}
|
308
477
|
r if r && r.rule?
|
478
|
+
when Array
|
479
|
+
non_terminals(ast, sym)
|
309
480
|
else
|
310
481
|
nil
|
311
482
|
end
|
312
|
-
end.compact
|
483
|
+
end.flatten.compact.uniq
|
313
484
|
end
|
314
485
|
|
315
|
-
# Return the terminals for this rule.
|
316
|
-
#
|
486
|
+
# Return the terminals for this rule.
|
487
|
+
#
|
488
|
+
# * `alt` => this is every terminal.
|
489
|
+
# * `diff` => this is every terminal.
|
490
|
+
# * `hex` => nil
|
491
|
+
# * `istr` => nil
|
492
|
+
# * `not` => this is the last expression, if any.
|
493
|
+
# * `opt` => this is the last expression, if any.
|
494
|
+
# * `plus` => this is the last expression, if any.
|
495
|
+
# * `range` => nil
|
496
|
+
# * `rept` => this is the last expression, if any.
|
497
|
+
# * `seq` => this is the first expression in the sequence, if any.
|
498
|
+
# * `star` => this is the last expression, if any.
|
317
499
|
#
|
318
500
|
# @param [Array<Rule>] ast
|
319
501
|
# The set of rules, used to turn symbols into rules
|
502
|
+
# @param [Array<Symbol,String,Array>] expr (@expr)
|
503
|
+
# The expression to check, defaults to the rule expression.
|
504
|
+
# Typically, if the expression is recursive, the embedded expression is called recursively.
|
320
505
|
# @return [Array<Rule>]
|
321
|
-
|
322
|
-
|
506
|
+
# @note this is used for LL(1) tansformation, so rule types are limited
|
507
|
+
def terminals(ast, expr = @expr)
|
508
|
+
([:alt, :diff].include?(expr.first) ? expr[1..-1] : expr[1,1]).map do |sym|
|
323
509
|
case sym
|
324
510
|
when Symbol
|
325
511
|
r = ast.detect {|r| r.sym == sym}
|
326
512
|
r if r && r.terminal?
|
327
513
|
when String
|
328
514
|
sym
|
329
|
-
|
330
|
-
|
515
|
+
when Array
|
516
|
+
terminals(ast, sym)
|
331
517
|
end
|
332
|
-
end.compact
|
518
|
+
end.flatten.compact.uniq
|
333
519
|
end
|
334
520
|
|
335
|
-
#
|
521
|
+
# Return the symbols used in the rule.
|
522
|
+
#
|
523
|
+
# @param [Array<Symbol,String,Array>] expr (@expr)
|
524
|
+
# The expression to check, defaults to the rule expression.
|
525
|
+
# Typically, if the expression is recursive, the embedded expression is called recursively.
|
526
|
+
# @return [Array<Rule>]
|
527
|
+
def symbols(expr = @expr)
|
528
|
+
expr[1..-1].map do |sym|
|
529
|
+
case sym
|
530
|
+
when Symbol
|
531
|
+
sym
|
532
|
+
when Array
|
533
|
+
symbols(sym)
|
534
|
+
end
|
535
|
+
end.flatten.compact.uniq
|
536
|
+
end
|
537
|
+
|
538
|
+
##
|
539
|
+
# The following are used for LL(1) transformation.
|
540
|
+
##
|
541
|
+
|
542
|
+
# Does this rule start with `sym`? It does if expr is that sym,
|
336
543
|
# expr starts with alt and contains that sym,
|
337
544
|
# or expr starts with seq and the next element is that sym.
|
338
545
|
#
|
@@ -349,6 +556,92 @@ module EBNF
|
|
349
556
|
end
|
350
557
|
end
|
351
558
|
|
559
|
+
##
|
560
|
+
# Validate the rule, with respect to an AST.
|
561
|
+
#
|
562
|
+
# @param [Array<Rule>] ast
|
563
|
+
# The set of rules, used to turn symbols into rules
|
564
|
+
# @param [Array<Symbol,String,Array>] expr (@expr)
|
565
|
+
# The expression to check, defaults to the rule expression.
|
566
|
+
# Typically, if the expression is recursive, the embedded expression is called recursively.
|
567
|
+
# @raise [RangeError]
|
568
|
+
def validate!(ast, expr = @expr)
|
569
|
+
op = expr.first
|
570
|
+
raise SyntaxError, "Unknown operator: #{op}" unless OP_ARGN.key?(op)
|
571
|
+
raise SyntaxError, "Argument count missmatch on operator #{op}, had #{expr.length - 1} expected #{OP_ARGN[op]}" if
|
572
|
+
OP_ARGN[op] && OP_ARGN[op] != expr.length - 1
|
573
|
+
|
574
|
+
# rept operator needs min and max
|
575
|
+
if op == :alt
|
576
|
+
raise SyntaxError, "alt operation must have at least one operand, had #{expr.length - 1}" unless expr.length > 1
|
577
|
+
elsif op == :rept
|
578
|
+
raise SyntaxError, "rept operation must an non-negative integer minimum, was #{expr[1]}" unless
|
579
|
+
expr[1].is_a?(Integer) && expr[1] >= 0
|
580
|
+
raise SyntaxError, "rept operation must an non-negative integer maximum or '*', was #{expr[2]}" unless
|
581
|
+
expr[2] == '*' || expr[2].is_a?(Integer) && expr[2] >= 0
|
582
|
+
end
|
583
|
+
|
584
|
+
case op
|
585
|
+
when :hex
|
586
|
+
raise SyntaxError, "Hex operand must be of form '#xN+': #{sym}" unless expr.last.match?(/^#x\h+$/)
|
587
|
+
when :range
|
588
|
+
str = expr.last.dup
|
589
|
+
str = str[1..-1] if str.start_with?('^')
|
590
|
+
str = str[0..-2] if str.end_with?('-') # Allowed at end of range
|
591
|
+
scanner = StringScanner.new(str)
|
592
|
+
hex = rchar = in_range = false
|
593
|
+
while !scanner.eos?
|
594
|
+
begin
|
595
|
+
if scanner.scan(Terminals::HEX)
|
596
|
+
raise SyntaxError if in_range && rchar
|
597
|
+
rchar = in_range = false
|
598
|
+
hex = true
|
599
|
+
elsif scanner.scan(Terminals::R_CHAR)
|
600
|
+
raise SyntaxError if in_range && hex
|
601
|
+
hex = in_range = false
|
602
|
+
rchar = true
|
603
|
+
else
|
604
|
+
raise(SyntaxError, "Range contains illegal components at offset #{scanner.pos}: was #{expr.last}")
|
605
|
+
end
|
606
|
+
|
607
|
+
if scanner.scan(/\-/)
|
608
|
+
raise SyntaxError if in_range
|
609
|
+
in_range = true
|
610
|
+
end
|
611
|
+
rescue SyntaxError
|
612
|
+
raise(SyntaxError, "Range contains illegal components at offset #{scanner.pos}: was #{expr.last}")
|
613
|
+
end
|
614
|
+
end
|
615
|
+
else
|
616
|
+
([:alt, :diff].include?(expr.first) ? expr[1..-1] : expr[1,1]).each do |sym|
|
617
|
+
case sym
|
618
|
+
when Symbol
|
619
|
+
r = ast.detect {|r| r.sym == sym}
|
620
|
+
raise SyntaxError, "No rule found for #{sym}" unless r
|
621
|
+
when Array
|
622
|
+
validate!(ast, sym)
|
623
|
+
when String
|
624
|
+
raise SyntaxError, "String must be of the form CHAR*" unless sym.match?(/^#{Terminals::CHAR}*$/)
|
625
|
+
end
|
626
|
+
end
|
627
|
+
end
|
628
|
+
end
|
629
|
+
|
630
|
+
##
|
631
|
+
# Validate the rule, with respect to an AST.
|
632
|
+
#
|
633
|
+
# Uses `#validate!` and catches `RangeError`
|
634
|
+
#
|
635
|
+
# @param [Array<Rule>] ast
|
636
|
+
# The set of rules, used to turn symbols into rules
|
637
|
+
# @return [Boolean]
|
638
|
+
def valid?(ast)
|
639
|
+
validate!(ast)
|
640
|
+
true
|
641
|
+
rescue SyntaxError
|
642
|
+
false
|
643
|
+
end
|
644
|
+
|
352
645
|
# Do the firsts of this rule include the empty string?
|
353
646
|
#
|
354
647
|
# @return [Boolean]
|
@@ -381,79 +674,6 @@ module EBNF
|
|
381
674
|
terminals.length
|
382
675
|
end
|
383
676
|
|
384
|
-
# Is this a terminal?
|
385
|
-
#
|
386
|
-
# @return [Boolean]
|
387
|
-
def terminal?
|
388
|
-
kind == :terminal
|
389
|
-
end
|
390
|
-
|
391
|
-
# Is this a pass?
|
392
|
-
# @return [Boolean]
|
393
|
-
def pass?
|
394
|
-
kind == :pass
|
395
|
-
end
|
396
|
-
|
397
|
-
# Is this a rule?
|
398
|
-
# @return [Boolean]
|
399
|
-
def rule?
|
400
|
-
kind == :rule
|
401
|
-
end
|
402
|
-
|
403
|
-
# Is this rule of the form (alt ...)?
|
404
|
-
def alt?
|
405
|
-
expr.is_a?(Array) && expr.first == :alt
|
406
|
-
end
|
407
|
-
|
408
|
-
# Is this rule of the form (seq ...)?
|
409
|
-
def seq?
|
410
|
-
expr.is_a?(Array) && expr.first == :seq
|
411
|
-
end
|
412
|
-
|
413
|
-
# Is this rule of the form (alt ...)?
|
414
|
-
def alt?
|
415
|
-
expr.is_a?(Array) && expr.first == :alt
|
416
|
-
end
|
417
|
-
|
418
|
-
def inspect
|
419
|
-
"#<EBNF::Rule:#{object_id} " +
|
420
|
-
{sym: sym, id: id, kind: kind, expr: expr}.inspect +
|
421
|
-
">"
|
422
|
-
end
|
423
|
-
|
424
|
-
# Two rules are equal if they have the same {#sym}, {#kind} and {#expr}.
|
425
|
-
#
|
426
|
-
# @param [Rule] other
|
427
|
-
# @return [Boolean]
|
428
|
-
def ==(other)
|
429
|
-
sym == other.sym &&
|
430
|
-
kind == other.kind &&
|
431
|
-
expr == other.expr
|
432
|
-
end
|
433
|
-
|
434
|
-
# Two rules are equivalent if they have the same {#expr}.
|
435
|
-
#
|
436
|
-
# @param [Rule] other
|
437
|
-
# @return [Boolean]
|
438
|
-
def equivalent?(other)
|
439
|
-
expr == other.expr
|
440
|
-
end
|
441
|
-
|
442
|
-
# Rules compare using their ids
|
443
|
-
def <=>(other)
|
444
|
-
if id.to_i == other.id.to_i
|
445
|
-
id.to_s <=> other.id.to_s
|
446
|
-
else
|
447
|
-
id.to_i <=> other.id.to_i
|
448
|
-
end
|
449
|
-
end
|
450
|
-
|
451
|
-
##
|
452
|
-
# Utility function to translate code points of the form '#xN' into ruby unicode characters
|
453
|
-
def translate_codepoints(str)
|
454
|
-
str.gsub(/#x\h+/) {|c| c[2..-1].scanf("%x").first.chr(Encoding::UTF_8)}
|
455
|
-
end
|
456
|
-
|
457
677
|
private
|
458
678
|
def ttl_expr(expr, pfx, depth, is_obj = true)
|
459
679
|
indent = ' ' * depth
|
@@ -469,17 +689,28 @@ module EBNF
|
|
469
689
|
|
470
690
|
case op
|
471
691
|
when :seq, :alt, :diff
|
692
|
+
# Multiple operands
|
472
693
|
statements << %{#{indent}#{bra}#{pfx}:#{op} (}
|
473
694
|
expr.each {|a| statements += ttl_expr(a, pfx, depth + 1)}
|
474
695
|
statements << %{#{indent} )#{ket}}
|
475
|
-
when :opt, :plus, :star
|
696
|
+
when :opt, :plus, :star, :not
|
697
|
+
# Single operand
|
476
698
|
statements << %{#{indent}#{bra}#{pfx}:#{op} }
|
477
699
|
statements += ttl_expr(expr.first, pfx, depth + 1)
|
478
700
|
statements << %{#{indent} #{ket}} unless ket.empty?
|
701
|
+
when :rept
|
702
|
+
# Three operands (min, max and expr)
|
703
|
+
statements << %{ #{indent}#{pfx}:min #{expr[0].inspect};}
|
704
|
+
statements << %{ #{indent}#{pfx}:max #{expr[1].inspect};}
|
705
|
+
statements << %{#{indent}#{bra}#{pfx}:#{op} }
|
706
|
+
statements += ttl_expr(expr.last, pfx, depth + 1)
|
707
|
+
statements << %{#{indent} #{ket}} unless ket.empty?
|
479
708
|
when :_empty, :_eps
|
480
709
|
statements << %{#{indent}"g:#{op.to_s[1..-1]}"}
|
481
710
|
when :"'"
|
482
711
|
statements << %{#{indent}"#{esc(expr)}"}
|
712
|
+
when :istr
|
713
|
+
statements << %{#{indent}#{bra} re:matches #{expr.first.inspect} #{ket}}
|
483
714
|
when :range
|
484
715
|
statements << %{#{indent}#{bra} re:matches #{cclass(expr.first).inspect} #{ket}}
|
485
716
|
when :hex
|
@@ -535,7 +766,7 @@ module EBNF
|
|
535
766
|
def make_sym_id(variation = nil)
|
536
767
|
@id_seq ||= 0
|
537
768
|
@id_seq += 1
|
538
|
-
["_#{@sym}_#{@id_seq}#{variation}".to_sym, "#{@id}.#{@id_seq}#{variation}"]
|
769
|
+
["_#{@sym}_#{@id_seq}#{variation}".to_sym, ("#{@id}.#{@id_seq}#{variation}" if @id)]
|
539
770
|
end
|
540
771
|
end
|
541
772
|
end
|