ebnf 2.0.0 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +81 -36
- data/VERSION +1 -1
- data/bin/ebnf +34 -18
- data/etc/abnf-core.ebnf +52 -0
- data/etc/abnf.abnf +121 -0
- data/etc/abnf.ebnf +124 -0
- data/etc/abnf.sxp +45 -0
- data/etc/ebnf.ebnf +19 -25
- data/etc/ebnf.html +251 -206
- data/etc/ebnf.ll1.rb +27 -103
- data/etc/ebnf.ll1.sxp +105 -102
- data/etc/ebnf.peg.rb +54 -62
- data/etc/ebnf.peg.sxp +53 -62
- data/etc/ebnf.sxp +22 -19
- data/etc/iso-ebnf.ebnf +140 -0
- data/etc/iso-ebnf.isoebnf +138 -0
- data/etc/iso-ebnf.sxp +65 -0
- data/etc/sparql.ebnf +4 -4
- data/etc/sparql.sxp +8 -7
- data/etc/turtle.ebnf +3 -3
- data/etc/turtle.sxp +22 -20
- data/lib/ebnf.rb +3 -0
- data/lib/ebnf/abnf.rb +301 -0
- data/lib/ebnf/abnf/core.rb +23 -0
- data/lib/ebnf/abnf/meta.rb +111 -0
- data/lib/ebnf/base.rb +87 -44
- data/lib/ebnf/ebnf/meta.rb +90 -0
- data/lib/ebnf/isoebnf.rb +229 -0
- data/lib/ebnf/isoebnf/meta.rb +75 -0
- data/lib/ebnf/ll1.rb +4 -7
- data/lib/ebnf/ll1/parser.rb +12 -4
- data/lib/ebnf/native.rb +320 -0
- data/lib/ebnf/parser.rb +285 -302
- data/lib/ebnf/peg.rb +1 -1
- data/lib/ebnf/peg/parser.rb +24 -5
- data/lib/ebnf/peg/rule.rb +77 -58
- data/lib/ebnf/rule.rb +352 -121
- data/lib/ebnf/terminals.rb +13 -10
- data/lib/ebnf/writer.rb +550 -78
- metadata +48 -6
data/lib/ebnf/terminals.rb
CHANGED
@@ -1,18 +1,21 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
# Terminal definitions for the EBNF grammar
|
3
3
|
module EBNF::Terminals
|
4
|
-
|
5
|
-
|
4
|
+
SYMBOL_BASE = %r(\b[a-zA-Z0-9_\.]+\b)u.freeze
|
5
|
+
SYMBOL = %r(#{SYMBOL_BASE}(?!\s*::=))u.freeze
|
6
|
+
HEX = %r(\#x\h+)u.freeze
|
6
7
|
CHAR = %r([\u0009\u000A\u000D\u0020-\uD7FF\u{10000}-\u{10FFFF}])u.freeze
|
7
|
-
R_CHAR = %r([\u0009\u000A\u000D\u0020-\u005C\u005E-\uD7FF\u{10000}-\u{10FFFF}])u.freeze
|
8
|
-
RANGE = %r(\[(?:(?:#{R_CHAR}
|
9
|
-
|
10
|
-
|
11
|
-
LHS = %r(\[(?:(?:#{SYMBOL})+\]\s+)?(?:#{SYMBOL})\s*::=)u.freeze
|
12
|
-
O_RANGE = %r(\[^(?:#{R_CHAR}\-#{R_CHAR})|(?:#{HEX}-#{HEX})\])u.freeze
|
13
|
-
O_ENUM = %r(\[^(?:#{R_CHAR})+\])u.freeze
|
8
|
+
R_CHAR = %r([\u0009\u000A\u000D\u0020-\u002C\u002E-\u005C\u005E-\uD7FF\u{10000}-\u{10FFFF}])u.freeze
|
9
|
+
RANGE = %r(\[(?:(?:#{R_CHAR}\-#{R_CHAR})|(?:#{HEX}\-#{HEX})|#{R_CHAR}|#{HEX})+-?\](?!\s+#{SYMBOL_BASE}\s*::=))u.freeze
|
10
|
+
LHS = %r((?:\[#{SYMBOL_BASE}\])?\s*#{SYMBOL_BASE}\s*::=)u.freeze
|
11
|
+
O_RANGE = %r(\[\^(?:(?:#{R_CHAR}\-#{R_CHAR})|(?:#{HEX}\-#{HEX}|#{R_CHAR}|#{HEX}))+-?\])u.freeze
|
14
12
|
STRING1 = %r("[\u0009\u000A\u000D\u0020\u0021\u0023-\uD7FF\u{10000}-\u{10FFFF}]*")u.freeze
|
15
13
|
STRING2 = %r('[\u0009\u000A\u000D\u0020-\u0026\u0028-\uD7FF\u{10000}-\u{10FFFF}]*')u.freeze
|
16
14
|
POSTFIX = %r([?*+])u.freeze
|
17
|
-
PASS = %r((
|
15
|
+
PASS = %r((
|
16
|
+
\s
|
17
|
+
| (?:(?:\#[^x]|//)[^\n\r]*)
|
18
|
+
| (?:/\*(?:(?:\*[^/])|[^*])*\*/)
|
19
|
+
| (?:\(\*(?:(?:\*[^\)])|[^*])*\*\))
|
20
|
+
)+)xmu.freeze
|
18
21
|
end
|
data/lib/ebnf/writer.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
2
2
|
require 'rdf'
|
3
3
|
require 'strscan' unless defined?(StringScanner)
|
4
|
+
require "ostruct"
|
4
5
|
|
5
6
|
##
|
6
7
|
# Serialize ruleset back to EBNF
|
@@ -8,15 +9,53 @@ module EBNF
|
|
8
9
|
class Writer
|
9
10
|
LINE_LENGTH = 80
|
10
11
|
|
12
|
+
# ASCII escape names
|
13
|
+
ASCII_ESCAPE_NAMES = [
|
14
|
+
"null", #x00
|
15
|
+
"start of heading", #x01
|
16
|
+
"start of text", #x02
|
17
|
+
"end of text", #x03
|
18
|
+
"end of transmission", #x04
|
19
|
+
"enquiry", #x05
|
20
|
+
"acknowledge", #x06
|
21
|
+
"bell", #x07
|
22
|
+
"backspace", #x08
|
23
|
+
"horizontal tab", #x09
|
24
|
+
"new line", #x0A
|
25
|
+
"vertical tab", #x0B
|
26
|
+
"form feed", #x0C
|
27
|
+
"carriage return", #x0D
|
28
|
+
"shift out", #x0E
|
29
|
+
"shift in", #x0F
|
30
|
+
"data link escape", #x10
|
31
|
+
"device control 1", #x11
|
32
|
+
"device control 2", #x12
|
33
|
+
"device control 3", #x13
|
34
|
+
"device control 4", #x14
|
35
|
+
"negative acknowledge", #x15
|
36
|
+
"synchronous idle", #x16
|
37
|
+
"end of trans. block", #x17
|
38
|
+
"cancel", #x18
|
39
|
+
"end of medium", #x19
|
40
|
+
"substitute", #x1A
|
41
|
+
"escape", #x1B
|
42
|
+
"file separator", #x1C
|
43
|
+
"group separator", #x1D
|
44
|
+
"record separator", #x1E
|
45
|
+
"unit separator", #x1F
|
46
|
+
"space" #x20
|
47
|
+
]
|
48
|
+
|
11
49
|
##
|
12
50
|
# Format rules to a String
|
13
51
|
#
|
14
52
|
# @param [Array<Rule>] rules
|
53
|
+
# @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
|
15
54
|
# @return [Object]
|
16
|
-
def self.string(*rules)
|
55
|
+
def self.string(*rules, format: :ebnf)
|
17
56
|
require 'stringio' unless defined?(StringIO)
|
18
57
|
buf = StringIO.new
|
19
|
-
write(buf, *rules)
|
58
|
+
write(buf, *rules, format: format)
|
20
59
|
buf.string
|
21
60
|
end
|
22
61
|
|
@@ -24,9 +63,10 @@ module EBNF
|
|
24
63
|
# Format rules to $stdout
|
25
64
|
#
|
26
65
|
# @param [Array<Rule>] rules
|
66
|
+
# @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
|
27
67
|
# @return [Object]
|
28
|
-
def self.print(*rules)
|
29
|
-
write($stdout, *rules)
|
68
|
+
def self.print(*rules, format: :ebnf)
|
69
|
+
write($stdout, *rules, format: format)
|
30
70
|
end
|
31
71
|
|
32
72
|
##
|
@@ -34,20 +74,22 @@ module EBNF
|
|
34
74
|
#
|
35
75
|
# @param [Object] out
|
36
76
|
# @param [Array<Rule>] rules
|
77
|
+
# @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
|
37
78
|
# @return [Object]
|
38
|
-
def self.write(out, *rules)
|
39
|
-
Writer.new(rules, out: out)
|
79
|
+
def self.write(out, *rules, format: :ebnf)
|
80
|
+
Writer.new(rules, out: out, format: format)
|
40
81
|
end
|
41
82
|
|
42
83
|
##
|
43
84
|
# Write formatted rules to an IO like object as HTML
|
44
85
|
#
|
45
86
|
# @param [Array<Rule>] rules
|
87
|
+
# @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
|
46
88
|
# @return [Object]
|
47
|
-
def self.html(*rules)
|
89
|
+
def self.html(*rules, format: :ebnf)
|
48
90
|
require 'stringio' unless defined?(StringIO)
|
49
91
|
buf = StringIO.new
|
50
|
-
Writer.new(rules, out: buf, html: true)
|
92
|
+
Writer.new(rules, out: buf, html: true, format: format)
|
51
93
|
buf.string
|
52
94
|
end
|
53
95
|
|
@@ -55,17 +97,24 @@ module EBNF
|
|
55
97
|
# @param [Array<Rule>] rules
|
56
98
|
# @param [Hash{Symbol => Object}] options
|
57
99
|
# @param [#write] out ($stdout)
|
100
|
+
# @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
|
58
101
|
# @option options [Symbol] format
|
59
102
|
# @option options [Boolean] html (false)
|
60
|
-
def initialize(rules, out: $stdout, html: false, **options)
|
61
|
-
@options = options.
|
103
|
+
def initialize(rules, out: $stdout, html: false, format: :ebnf, **options)
|
104
|
+
@options = options.merge(html: html)
|
105
|
+
return if rules.empty?
|
62
106
|
|
63
107
|
# Determine max LHS length
|
108
|
+
format_meth = "format_#{format}".to_sym
|
64
109
|
max_id = rules.max_by {|r| r.id.to_s.length}.id.to_s.length
|
65
110
|
max_sym = rules.max_by {|r| r.sym.to_s.length}.sym.to_s.length
|
66
|
-
lhs_length = max_sym +
|
67
|
-
lhs_fmt =
|
68
|
-
|
111
|
+
lhs_length = max_sym + 1
|
112
|
+
lhs_fmt = case format
|
113
|
+
when :abnf then "%<sym>-#{max_sym}s = "
|
114
|
+
when :ebnf then "%<sym>-#{max_sym}s ::= "
|
115
|
+
when :isoebnf then "%<sym>-#{max_sym}s = "
|
116
|
+
end
|
117
|
+
if format == :ebnf && max_id > 0
|
69
118
|
lhs_fmt = "%<id>-#{max_id+2}s " + lhs_fmt
|
70
119
|
lhs_length += max_id + 3
|
71
120
|
end
|
@@ -74,49 +123,104 @@ module EBNF
|
|
74
123
|
if html
|
75
124
|
# Output as formatted HTML
|
76
125
|
begin
|
77
|
-
require '
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
126
|
+
require 'erubis'
|
127
|
+
eruby = Erubis::Eruby.new(ERB_DESC)
|
128
|
+
formatted_rules = rules.map do |rule|
|
129
|
+
if rule.kind == :terminals || rule.kind == :pass
|
130
|
+
OpenStruct.new(id: ("@#{rule.kind}"),
|
131
|
+
sym: nil,
|
132
|
+
assign: nil,
|
133
|
+
formatted: ("<strong>Productions for terminals</strong>" if rule.kind == :terminals))
|
134
|
+
else
|
135
|
+
formatted_expr = self.send(format_meth, rule.expr)
|
136
|
+
# Measure text without markup
|
137
|
+
formatted_expr_text = formatted_expr.gsub(%r{</?\w+[^>]*>}, '')
|
138
|
+
if formatted_expr_text.length > rhs_length && (format != :abnf || rule.alt?)
|
139
|
+
lines = []
|
140
|
+
# Can only reasonably split apart alts
|
141
|
+
self.send(format_meth, rule.expr, sep: "--rule-extensions--").
|
142
|
+
split(/\s*--rule-extensions--\s*/).each_with_index do |formatted, ndx|
|
143
|
+
assign = case format
|
144
|
+
when :ebnf
|
145
|
+
formatted.sub!(%r{\s*<code>\|</code>\s*}, '')
|
146
|
+
(ndx > 0 ? (rule.alt? ? '|' : '') : '::=')
|
147
|
+
when :abnf
|
148
|
+
formatted.sub!(%r{\s*<code>/</code>\s*}, '')
|
149
|
+
(ndx > 0 ? '=/' : '=')
|
150
|
+
else
|
151
|
+
formatted.sub!(%r{\s*<code>\|</code>\s*}, '')
|
152
|
+
(ndx > 0 ? (rule.alt? ? '|' : '') : '=')
|
153
|
+
end
|
154
|
+
lines << OpenStruct.new(id: ("[#{rule.id}]" if rule.id),
|
155
|
+
sym: (rule.sym if ndx == 0 || format == :abnf),
|
156
|
+
assign: assign,
|
157
|
+
formatted: formatted)
|
158
|
+
end
|
159
|
+
if format == :isoebnf
|
160
|
+
lines << OpenStruct.new(assign: ';')
|
161
|
+
end
|
162
|
+
lines
|
163
|
+
else
|
164
|
+
OpenStruct.new(id: ("[#{rule.id}]" if rule.id),
|
165
|
+
sym: rule.sym,
|
166
|
+
assign: (format == :ebnf ? '::=' : '='),
|
167
|
+
formatted: (formatted_expr + (format == :isoebnf ? ' ;' : '')))
|
168
|
+
end
|
169
|
+
end
|
170
|
+
end.flatten
|
171
|
+
out.write eruby.evaluate(format: format, rules: formatted_rules)
|
83
172
|
return
|
84
173
|
rescue LoadError
|
85
|
-
$stderr.puts "Generating HTML requires
|
174
|
+
$stderr.puts "Generating HTML requires erubis gem to be loaded"
|
86
175
|
end
|
87
176
|
end
|
88
177
|
|
89
178
|
# Format each rule, considering the available rhs size
|
90
179
|
rules.each do |rule|
|
91
180
|
buffer = if rule.pass?
|
92
|
-
"%-#{lhs_length-2}s" % "@pass"
|
181
|
+
"\n%-#{lhs_length-2}s " % "@pass"
|
182
|
+
elsif rule.kind == :terminals
|
183
|
+
"\n%-#{lhs_length-2}s" % "@terminals"
|
93
184
|
else
|
94
185
|
lhs_fmt % {id: "[#{rule.id}]", sym: rule.sym}
|
95
186
|
end
|
96
|
-
formatted_expr =
|
97
|
-
if formatted_expr.length > rhs_length
|
98
|
-
|
187
|
+
formatted_expr = self.send(format_meth, rule.expr)
|
188
|
+
if formatted_expr.length > rhs_length && (format != :abnf || rule.alt?)
|
189
|
+
if format == :abnf
|
190
|
+
# No whitespace, use =/
|
191
|
+
self.send(format_meth, rule.expr, sep: "--rule-extensions--").
|
192
|
+
split(/\s*--rule-extensions--\s*/).each_with_index do |formatted, ndx|
|
193
|
+
if ndx > 0
|
194
|
+
buffer << "\n" + lhs_fmt.sub('= ', '=/') % {id: "[#{rule.id}]", sym: rule.sym}
|
195
|
+
end
|
196
|
+
buffer << formatted.sub(/\s*\/\s*/, '')
|
197
|
+
end
|
198
|
+
else
|
199
|
+
# Space out past "= "
|
200
|
+
buffer << self.send(format_meth, rule.expr, sep: ("\n" + " " * (lhs_length + (rule.alt? ? 2 : 4) - (format == :ebnf ? 0 : 2))))
|
201
|
+
buffer << ("\n" + " " * (lhs_length) + ';') if format == :isoebnf
|
202
|
+
end
|
99
203
|
else
|
100
|
-
buffer << formatted_expr
|
204
|
+
buffer << formatted_expr + (format == :isoebnf ? ' ;' : '')
|
101
205
|
end
|
206
|
+
buffer << "\n\n" if [:terminals, :pass].include?(rule.kind)
|
102
207
|
out.puts(buffer)
|
103
208
|
end
|
104
209
|
end
|
105
210
|
|
106
211
|
protected
|
212
|
+
|
213
|
+
##
|
214
|
+
# W3C EBNF Formatters
|
215
|
+
##
|
216
|
+
|
107
217
|
# Format the expression part of a rule
|
108
|
-
def
|
218
|
+
def format_ebnf(expr, sep: nil, embedded: false)
|
109
219
|
return (@options[:html] ? %(<a href="#grammar-production-#{expr}">#{expr}</a>) : expr.to_s) if expr.is_a?(Symbol)
|
110
220
|
if expr.is_a?(String)
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
return (@options[:html] ? %(<code class="grammar-char-escape">#{expr}</code>) : expr)
|
115
|
-
elsif expr =~ /"/
|
116
|
-
return (@options[:html] ? %('<code class="grammar-literal">#{escape(expr, "'")}</code>') : %('#{escape(expr, "'")}'))
|
117
|
-
else
|
118
|
-
return (@options[:html] ? %("<code class="grammar-literal">#{escape(expr, '"')}</code>") : %("#{escape(expr, '"')}"))
|
119
|
-
end
|
221
|
+
return expr.length == 1 ?
|
222
|
+
format_ebnf_char(expr) :
|
223
|
+
format_ebnf_string(expr, expr.include?('"') ? "'" : '"')
|
120
224
|
end
|
121
225
|
parts = {
|
122
226
|
alt: (@options[:html] ? "<code>|</code> " : "| "),
|
@@ -129,40 +233,75 @@ module EBNF
|
|
129
233
|
rparen = (@options[:html] ? "<code>)</code> " : ")")
|
130
234
|
|
131
235
|
case expr.first
|
236
|
+
when :istr
|
237
|
+
# Looses fidelity, but, oh well ...
|
238
|
+
format_ebnf(expr.last, embedded: true)
|
132
239
|
when :alt, :diff
|
133
240
|
this_sep = (sep ? sep : " ") + parts[expr.first.to_sym]
|
134
|
-
expr[1..-1].map {|e|
|
241
|
+
res = expr[1..-1].map {|e| format_ebnf(e, embedded: true)}.join(this_sep)
|
242
|
+
embedded ? (lparen + res + rparen) : res
|
135
243
|
when :star, :plus, :opt
|
136
|
-
raise "Expected star expression to have a single operand" unless expr.length == 2
|
137
244
|
char = parts[expr.first.to_sym]
|
138
|
-
r =
|
139
|
-
|
245
|
+
r = format_ebnf(expr[1], embedded: true)
|
246
|
+
"#{r}#{char}"
|
140
247
|
when :hex
|
141
|
-
(
|
248
|
+
escape_ebnf_hex(expr.last[2..-1].hex.chr(Encoding::UTF_8))
|
142
249
|
when :range
|
143
|
-
|
250
|
+
format_ebnf_range(expr.last)
|
144
251
|
when :seq
|
145
252
|
this_sep = (sep ? sep : " ")
|
146
|
-
expr[1..-1].map
|
253
|
+
res = expr[1..-1].map do |e|
|
254
|
+
format_ebnf(e, embedded: true)
|
255
|
+
end.join(this_sep)
|
256
|
+
embedded ? (lparen + res + rparen) : res
|
257
|
+
when :rept
|
258
|
+
# Expand repetition
|
259
|
+
min, max, value = expr[1..-1]
|
260
|
+
if min == 0 && max == 1
|
261
|
+
format_ebnf([:opt, value], sep: sep, embedded: embedded)
|
262
|
+
elsif min == 0 && max == '*'
|
263
|
+
format_ebnf([:star, value], sep: sep, embedded: embedded)
|
264
|
+
elsif min == 1 && max == '*'
|
265
|
+
format_ebnf([:plus, value], sep: sep, embedded: embedded)
|
266
|
+
else
|
267
|
+
val2 = [:seq]
|
268
|
+
while min > 0
|
269
|
+
val2 << value
|
270
|
+
min -= 1
|
271
|
+
max -= 1 unless max == '*'
|
272
|
+
end
|
273
|
+
if max == '*'
|
274
|
+
val2 << [:star, value]
|
275
|
+
else
|
276
|
+
opt = nil
|
277
|
+
while max > 0
|
278
|
+
opt = [:opt, opt ? [:seq, value, opt] : value]
|
279
|
+
max -= 1
|
280
|
+
end
|
281
|
+
val2 << opt if opt
|
282
|
+
end
|
283
|
+
format_ebnf(val2, sep: sep, embedded: embedded)
|
284
|
+
end
|
147
285
|
else
|
148
286
|
raise "Unknown operator: #{expr.first}"
|
149
287
|
end
|
150
288
|
end
|
151
289
|
|
152
290
|
# Format a single-character string, prefering hex for non-main ASCII
|
153
|
-
def
|
291
|
+
def format_ebnf_char(c)
|
154
292
|
case c.ord
|
155
|
-
when
|
156
|
-
when
|
157
|
-
|
293
|
+
when (0x21) then (@options[:html] ? %("<code class="grammar-literal">#{c}</code>") : %{"#{c}"})
|
294
|
+
when 0x22 then (@options[:html] ? %('<code class="grammar-literal">"</code>') : %{'"'})
|
295
|
+
when (0x23..0x7e) then (@options[:html] ? %("<code class="grammar-literal">#{c}</code>") : %{"#{c}"})
|
296
|
+
when (0x80..0xFFFD) then (@options[:html] ? %("<code class="grammar-literal">#{c}</code>") : %{"#{c}"})
|
297
|
+
else escape_ebnf_hex(c)
|
158
298
|
end
|
159
299
|
end
|
160
300
|
|
161
301
|
# Format a range
|
162
|
-
def
|
302
|
+
def format_ebnf_range(string)
|
163
303
|
lbrac = (@options[:html] ? "<code>[</code> " : "[")
|
164
304
|
rbrac = (@options[:html] ? "<code>]</code> " : "]")
|
165
|
-
dash = (@options[:html] ? "<code>-</code> " : "-")
|
166
305
|
|
167
306
|
buffer = lbrac
|
168
307
|
s = StringScanner.new(string)
|
@@ -171,53 +310,386 @@ module EBNF
|
|
171
310
|
when s.scan(/\A[!"\u0024-\u007e]+/)
|
172
311
|
buffer << (@options[:html] ? %(<code class="grammar-literal">#{s.matched}</code>) : s.matched)
|
173
312
|
when s.scan(/\A#x\h+/)
|
174
|
-
buffer << (
|
175
|
-
when s.scan(/\A-/)
|
176
|
-
buffer << dash
|
313
|
+
buffer << escape_ebnf_hex(s.matched[2..-1].hex.chr(Encoding::UTF_8))
|
177
314
|
else
|
178
|
-
buffer << (
|
315
|
+
buffer << escape_ebnf_hex(s.getch)
|
179
316
|
end
|
180
317
|
end
|
181
318
|
buffer + rbrac
|
182
319
|
end
|
183
320
|
|
184
321
|
# Escape a string, using as many UTF-8 characters as possible
|
185
|
-
def
|
186
|
-
buffer = ""
|
322
|
+
def format_ebnf_string(string, quote = '"')
|
187
323
|
string.each_char do |c|
|
188
|
-
|
189
|
-
when
|
190
|
-
|
191
|
-
|
324
|
+
case c.ord
|
325
|
+
when 0x00..0x19, quote.ord
|
326
|
+
raise RangeError, "cannot format #{string.inspect} as an EBNF String: #{c.inspect} is out of range" unless
|
327
|
+
ISOEBNF::TERMINAL_CHARACTER.match?(c)
|
192
328
|
end
|
193
329
|
end
|
194
|
-
|
330
|
+
|
331
|
+
"#{quote}#{string}#{quote}"
|
195
332
|
end
|
196
333
|
|
197
|
-
def
|
334
|
+
def escape_ebnf_hex(u)
|
198
335
|
fmt = case u.ord
|
336
|
+
when 0x00..0x20 then "#x%02X"
|
199
337
|
when 0x0000..0x00ff then "#x%02X"
|
200
338
|
when 0x0100..0xffff then "#x%04X"
|
201
339
|
else "#x%08X"
|
202
340
|
end
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
341
|
+
char = fmt % u.ord
|
342
|
+
if @options[:html]
|
343
|
+
if u.ord <= 0x20
|
344
|
+
char = %(<abbr title="#{ASCII_ESCAPE_NAMES[u.ord]}">#{char}</abbr>)
|
345
|
+
elsif u.ord < 0x7F
|
346
|
+
char = %(<abbr title="ascii '#{u}'">#{char}</abbr>)
|
347
|
+
elsif u.ord == 0x7F
|
348
|
+
char = %(<abbr title="delete">#{char}</abbr>)
|
349
|
+
elsif u.ord <= 0xFF
|
350
|
+
char = %(<abbr title="extended ascii '#{u}'">#{char}</abbr>)
|
351
|
+
else
|
352
|
+
char = %(<abbr title="unicode '#{u}'">#{char}</abbr>)
|
353
|
+
end
|
354
|
+
%(<code class="grammar-char-escape">#{char}</code>)
|
355
|
+
else
|
356
|
+
char
|
357
|
+
end
|
358
|
+
end
|
359
|
+
|
360
|
+
##
|
361
|
+
# ABNF Formatters
|
362
|
+
##
|
363
|
+
|
364
|
+
# Format the expression part of a rule
|
365
|
+
def format_abnf(expr, sep: nil, embedded: false, sensitive: true)
|
366
|
+
return (@options[:html] ? %(<a href="#grammar-production-#{expr}">#{expr}</a>) : expr.to_s) if expr.is_a?(Symbol)
|
367
|
+
if expr.is_a?(String)
|
368
|
+
if expr.length == 1
|
369
|
+
return format_abnf_char(expr)
|
370
|
+
elsif expr.start_with?('%')
|
371
|
+
# Already encoded
|
372
|
+
return expr
|
373
|
+
elsif expr =~ /"/
|
374
|
+
# Split into segments
|
375
|
+
segments = expr.split('"')
|
376
|
+
|
377
|
+
return format_abnf_char(expr) if segments.empty?
|
378
|
+
|
379
|
+
seq = segments.inject([]) {|memo, s| memo.concat([[:hex, "#x22"], s])}[1..-1]
|
380
|
+
seq.unshift(:seq)
|
381
|
+
return format_abnf(seq, sep: nil, embedded: false)
|
382
|
+
else
|
383
|
+
return (@options[:html] ? %("<code class="grammar-literal">#{'%s' if sensitive}#{expr}</code>") : %(#{'%s' if sensitive}"#{expr}"))
|
384
|
+
end
|
385
|
+
end
|
386
|
+
parts = {
|
387
|
+
alt: (@options[:html] ? "<code>/</code> " : "/ "),
|
388
|
+
star: (@options[:html] ? "<code>*</code> " : "*"),
|
389
|
+
plus: (@options[:html] ? "<code>+</code> " : "1*"),
|
390
|
+
opt: (@options[:html] ? "<code>?</code> " : "?")
|
391
|
+
}
|
392
|
+
lbrac = (@options[:html] ? "<code>[</code> " : "[")
|
393
|
+
rbrac = (@options[:html] ? "<code>]</code> " : "]")
|
394
|
+
lparen = (@options[:html] ? "<code>(</code> " : "(")
|
395
|
+
rparen = (@options[:html] ? "<code>)</code> " : ")")
|
396
|
+
|
397
|
+
case expr.first
|
398
|
+
when :istr
|
399
|
+
# FIXME: if string part is segmented, need to do something different
|
400
|
+
format_abnf(expr.last, embedded: true, sensitive: false)
|
401
|
+
when :alt
|
402
|
+
this_sep = (sep ? sep : " ") + parts[expr.first.to_sym]
|
403
|
+
res = expr[1..-1].map {|e| format_abnf(e, embedded: true)}.join(this_sep)
|
404
|
+
embedded ? (lparen + res + rparen) : res
|
405
|
+
when :diff
|
406
|
+
raise RangeError, "ABNF does not support the diff operator"
|
407
|
+
when :opt
|
408
|
+
char = parts[expr.first.to_sym]
|
409
|
+
r = format_abnf(expr[1], embedded: true)
|
410
|
+
"#{lbrac}#{r}#{rbrac}"
|
411
|
+
when :plus, :star
|
412
|
+
char = parts[expr.first.to_sym]
|
413
|
+
r = format_abnf(expr[1], embedded: true)
|
414
|
+
"#{char}#{r}"
|
415
|
+
when :hex
|
416
|
+
escape_abnf_hex(expr.last[2..-1].hex.chr)
|
417
|
+
when :range
|
418
|
+
# Returns an [:alt] or [:not [:alt]] if composed of multiple sequences
|
419
|
+
# Note: ABNF does not support the `not` operator
|
420
|
+
res = format_abnf_range(expr.last)
|
421
|
+
res.is_a?(Array) ?
|
422
|
+
format_abnf(res, embedded: true) :
|
423
|
+
res
|
424
|
+
when :seq
|
425
|
+
this_sep = (sep ? sep : " ")
|
426
|
+
res = expr[1..-1].map do |e|
|
427
|
+
format_abnf(e, embedded: true)
|
428
|
+
end.join(this_sep)
|
429
|
+
embedded ? (lparen + res + rparen) : res
|
430
|
+
when :rept
|
431
|
+
# Expand repetition
|
432
|
+
min, max, value = expr[1..-1]
|
433
|
+
r = format_abnf(value, embedded: true)
|
434
|
+
if min == max
|
435
|
+
"#{min}#{r}"
|
436
|
+
elsif min == 0 && max == '*'
|
437
|
+
"#{parts[:star]}#{r}"
|
438
|
+
elsif min > 0 && max == '*'
|
439
|
+
"#{min}#{parts[:star]}#{r}"
|
440
|
+
else
|
441
|
+
"#{min}#{parts[:star]}#{max}#{r}"
|
442
|
+
end
|
443
|
+
else
|
444
|
+
raise "Unknown operator: #{expr.first}"
|
445
|
+
end
|
446
|
+
end
|
447
|
+
|
448
|
+
# Format a single-character string, prefering hex for non-main ASCII
|
449
|
+
def format_abnf_char(c)
|
450
|
+
if /[\x20-\x21\x23-\x7E]/.match?(c)
|
451
|
+
c.inspect
|
452
|
+
else
|
453
|
+
escape_abnf_hex(c)
|
454
|
+
end
|
455
|
+
end
|
456
|
+
|
457
|
+
# Format a range
|
458
|
+
#
|
459
|
+
# Presumes range has already been validated
|
460
|
+
def format_abnf_range(string)
|
461
|
+
alt, o_dash = [:alt], false
|
462
|
+
|
463
|
+
raise RangeError, "cannot format #{string.inspect} an ABNF range" if string.start_with?('^')
|
464
|
+
|
465
|
+
if string.end_with?('-')
|
466
|
+
o_dash = true
|
467
|
+
string = string[0..-2]
|
468
|
+
end
|
469
|
+
|
470
|
+
scanner = StringScanner.new(string)
|
471
|
+
hexes, deces = [], []
|
472
|
+
in_range = false
|
473
|
+
# Build op (alt) from different ranges/enums
|
474
|
+
while !scanner.eos?
|
475
|
+
if hex = scanner.scan(Terminals::HEX)
|
476
|
+
# Append any decimal values
|
477
|
+
alt << "%d" + deces.join(".") unless deces.empty?
|
478
|
+
deces = []
|
479
|
+
|
480
|
+
if in_range
|
481
|
+
# Add "." sequences for any previous hexes
|
482
|
+
alt << "%x" + hexes[0..-2].join(".") if hexes.length > 1
|
483
|
+
alt << "%x#{hexes.last}-#{hex[2..-1]}"
|
484
|
+
in_range, hexes = false, []
|
485
|
+
else
|
486
|
+
hexes << hex[2..-1]
|
487
|
+
end
|
488
|
+
elsif dec = scanner.scan(Terminals::R_CHAR)
|
489
|
+
# Append any hexadecimal values
|
490
|
+
alt << "%x" + hexes.join(".") unless hexes.empty?
|
491
|
+
hexes = []
|
492
|
+
|
493
|
+
if in_range
|
494
|
+
# Add "." sequences for any previous hexes
|
495
|
+
alt << "%d" + deces[0..-2].join(".") if deces.length > 1
|
496
|
+
alt << "%d#{deces.last}-#{dec.codepoints.first}"
|
497
|
+
in_range, deces = false, []
|
498
|
+
else
|
499
|
+
deces << dec.codepoints.first.to_s
|
500
|
+
end
|
501
|
+
end
|
502
|
+
|
503
|
+
in_range = true if scanner.scan(/\-/)
|
504
|
+
end
|
505
|
+
|
506
|
+
deces << '45' if o_dash
|
507
|
+
|
508
|
+
# Append hexes and deces as "." sequences (should be only one)
|
509
|
+
alt << "%d" + deces.join(".") unless deces.empty?
|
510
|
+
alt << "%x" + hexes.join(".") unless hexes.empty?
|
511
|
+
|
512
|
+
# FIXME: HTML abbreviations?
|
513
|
+
if alt.length == 2
|
514
|
+
# Just return the range or enum
|
515
|
+
alt.last
|
516
|
+
else
|
517
|
+
# Return the alt, which will be further formatted
|
518
|
+
alt
|
519
|
+
end
|
520
|
+
end
|
521
|
+
|
522
|
+
def escape_abnf_hex(u)
|
523
|
+
fmt = case u.ord
|
524
|
+
when 0x0000..0x00ff then "%02X"
|
525
|
+
when 0x0100..0xffff then "%04X"
|
526
|
+
else "%08X"
|
527
|
+
end
|
528
|
+
char = "%x" + (fmt % u.ord)
|
529
|
+
if @options[:html]
|
530
|
+
if u.ord <= 0x20
|
531
|
+
char = %(<abbr title="#{ASCII_ESCAPE_NAMES[u.ord]}">#{char}</abbr>)
|
532
|
+
elsif u.ord <= 0x7F
|
533
|
+
char = %(<abbr title="ascii '#{u}'">#{char}</abbr>)
|
534
|
+
elsif u.ord == 0x7F
|
535
|
+
char = %(<abbr title="delete">#{char}</abbr>)
|
536
|
+
elsif u.ord <= 0xFF
|
537
|
+
char = %(<abbr title="extended ascii '#{u}'">#{char}</abbr>)
|
538
|
+
else
|
539
|
+
char = %(<abbr title="unicode '#{u}'">#{char}</abbr>)
|
540
|
+
end
|
541
|
+
%(<code class="grammar-char-escape">#{char}</code>)
|
542
|
+
else
|
543
|
+
char
|
544
|
+
end
|
545
|
+
end
|
546
|
+
|
547
|
+
##
|
548
|
+
# ISO EBNF Formatters
|
549
|
+
##
|
550
|
+
|
551
|
+
# Format the expression part of a rule
|
552
|
+
def format_isoebnf(expr, sep: nil, embedded: false)
|
553
|
+
return (@options[:html] ? %(<a href="#grammar-production-#{expr}">#{expr}</a>) : expr.to_s) if expr.is_a?(Symbol)
|
554
|
+
if expr.is_a?(String)
|
555
|
+
expr = expr[2..-1].hex.chr if expr =~ /\A#x\h+/
|
556
|
+
expr.chars.each do |c|
|
557
|
+
raise RangeError, "cannot format #{expr.inspect} as an ISO EBNF String: #{c.inspect} is out of range" unless
|
558
|
+
ISOEBNF::TERMINAL_CHARACTER.match?(c)
|
559
|
+
end
|
560
|
+
if expr =~ /"/
|
561
|
+
return (@options[:html] ? %('<code class="grammar-literal">#{expr}</code>') : %('#{expr}'))
|
562
|
+
else
|
563
|
+
return (@options[:html] ? %("<code class="grammar-literal">#{expr}</code>") : %("#{expr}"))
|
564
|
+
end
|
565
|
+
end
|
566
|
+
parts = {
|
567
|
+
alt: (@options[:html] ? "<code>|</code> " : "| "),
|
568
|
+
diff: (@options[:html] ? "<code>-</code> " : "- "),
|
569
|
+
}
|
570
|
+
lparen = (@options[:html] ? "<code>(</code> " : "(")
|
571
|
+
rparen = (@options[:html] ? "<code>)</code> " : ")")
|
572
|
+
|
573
|
+
case expr.first
|
574
|
+
when :istr
|
575
|
+
# Looses fidelity, but, oh well ...
|
576
|
+
format_isoebnf(expr.last, embedded: true)
|
577
|
+
when :alt, :diff
|
578
|
+
this_sep = (sep ? sep : " ") + parts[expr.first.to_sym]
|
579
|
+
res = expr[1..-1].map {|e| format_isoebnf(e, embedded: true)}.join(this_sep)
|
580
|
+
embedded ? (lparen + res + rparen) : res
|
581
|
+
when :opt
|
582
|
+
r = format_isoebnf(expr[1], embedded: true)
|
583
|
+
"[#{r}]"
|
584
|
+
when :star
|
585
|
+
r = format_isoebnf(expr[1], embedded: true)
|
586
|
+
"{#{r}}"
|
587
|
+
when :plus
|
588
|
+
r = format_isoebnf(expr[1], embedded: true)
|
589
|
+
"#{r}, {#{r}}"
|
590
|
+
when :hex
|
591
|
+
format_isoebnf(expr[1], embedded: true)
|
592
|
+
when :range
|
593
|
+
res = format_isoebnf_range(expr.last)
|
594
|
+
res.is_a?(Array) ?
|
595
|
+
format_isoebnf(res, embedded: true) :
|
596
|
+
res
|
597
|
+
when :seq
|
598
|
+
this_sep = "," + (sep ? sep : " ")
|
599
|
+
res = expr[1..-1].map do |e|
|
600
|
+
format_isoebnf(e, embedded: true)
|
601
|
+
end.join(this_sep)
|
602
|
+
embedded ? (lparen + res + rparen) : res
|
603
|
+
when :rept
|
604
|
+
# Expand repetition
|
605
|
+
min, max, value = expr[1..-1]
|
606
|
+
if min == 0 && max == 1
|
607
|
+
format_isoebnf([:opt, value], sep: sep, embedded: embedded)
|
608
|
+
elsif min == 0 && max == '*'
|
609
|
+
format_isoebnf([:star, value], sep: sep, embedded: embedded)
|
610
|
+
elsif min == 1 && max == '*'
|
611
|
+
format_isoebnf([:plus, value], sep: sep, embedded: embedded)
|
612
|
+
else
|
613
|
+
val2 = [:seq]
|
614
|
+
while min > 0
|
615
|
+
val2 << value
|
616
|
+
min -= 1
|
617
|
+
max -= 1 unless max == '*'
|
618
|
+
end
|
619
|
+
if max == '*'
|
620
|
+
val2 << [:star, value]
|
621
|
+
else
|
622
|
+
opt = nil
|
623
|
+
while max > 0
|
624
|
+
opt = [:opt, opt ? [:seq, value, opt] : value]
|
625
|
+
max -= 1
|
626
|
+
end
|
627
|
+
val2 << opt if opt
|
628
|
+
end
|
629
|
+
format_isoebnf(val2, sep: sep, embedded: embedded)
|
630
|
+
end
|
631
|
+
else
|
632
|
+
raise "Unknown operator: #{expr.first}"
|
633
|
+
end
|
634
|
+
end
|
635
|
+
|
636
|
+
# Format a range
|
637
|
+
# Range is formatted as a aliteration of characters
|
638
|
+
def format_isoebnf_range(string)
|
639
|
+
chars = []
|
640
|
+
o_dash = false
|
641
|
+
|
642
|
+
raise RangeError, "cannot format #{string.inspect} an ABNF range" if string.start_with?('^')
|
643
|
+
|
644
|
+
if string.end_with?('-')
|
645
|
+
o_dash = true
|
646
|
+
string = string[0..-2]
|
647
|
+
end
|
648
|
+
|
649
|
+
scanner = StringScanner.new(string)
|
650
|
+
in_range = false
|
651
|
+
# Build chars from different ranges/enums
|
652
|
+
while !scanner.eos?
|
653
|
+
char = if hex = scanner.scan(Terminals::HEX)
|
654
|
+
hex[2..-1].hex.ord.char(Encoding::UTF_8)
|
655
|
+
else scanner.scan(Terminals::R_CHAR)
|
656
|
+
end
|
657
|
+
raise RangeError, "cannot format #{string.inspect} as an ISO EBNF Aliteration: #{char.inspect} is out of range" unless
|
658
|
+
char && ISOEBNF::TERMINAL_CHARACTER.match?(char)
|
659
|
+
|
660
|
+
if in_range
|
661
|
+
# calculate characters from chars.last to this char
|
662
|
+
raise RangeError, "cannot format #{string.inspect} as an ISO EBNF Aliteration" unless chars.last < char
|
663
|
+
chars.concat (chars.last..char).to_a[1..-1]
|
664
|
+
in_range = false
|
665
|
+
else
|
666
|
+
chars << char
|
667
|
+
end
|
668
|
+
|
669
|
+
in_range = true if scanner.scan(/\-/)
|
670
|
+
end
|
671
|
+
|
672
|
+
chars << '-' if o_dash
|
673
|
+
|
674
|
+
# Possibly only a single character (no character?)
|
675
|
+
chars.length == 1 ? chars.last.inspect : chars.unshift(:alt)
|
676
|
+
end
|
677
|
+
|
678
|
+
ERB_DESC = %q(
|
679
|
+
<table class="grammar">
|
680
|
+
<tbody id="grammar-productions" class="<%= @format %>">
|
681
|
+
<% for rule in @rules %>
|
682
|
+
<tr<%= %{ id="grammar-production-#{rule.sym}"} unless %w(=/ |).include?(rule.assign)%>>
|
683
|
+
<% if rule.id %>
|
684
|
+
<td><%= rule.id %></td>
|
685
|
+
<% end %>
|
686
|
+
<td><code><%== rule.sym %></code></td>
|
687
|
+
<td><%= rule.assign %></td>
|
688
|
+
<td><%= rule.formatted %></td>
|
689
|
+
</tr>
|
690
|
+
<% end %>
|
691
|
+
</tbody>
|
692
|
+
</table>
|
221
693
|
).gsub(/^ /, '')
|
222
694
|
end
|
223
695
|
end
|