ebnf 2.0.0 → 2.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +81 -36
- data/VERSION +1 -1
- data/bin/ebnf +34 -18
- data/etc/abnf-core.ebnf +52 -0
- data/etc/abnf.abnf +121 -0
- data/etc/abnf.ebnf +124 -0
- data/etc/abnf.sxp +45 -0
- data/etc/ebnf.ebnf +19 -25
- data/etc/ebnf.html +251 -206
- data/etc/ebnf.ll1.rb +27 -103
- data/etc/ebnf.ll1.sxp +105 -102
- data/etc/ebnf.peg.rb +54 -62
- data/etc/ebnf.peg.sxp +53 -62
- data/etc/ebnf.sxp +22 -19
- data/etc/iso-ebnf.ebnf +140 -0
- data/etc/iso-ebnf.isoebnf +138 -0
- data/etc/iso-ebnf.sxp +65 -0
- data/etc/sparql.ebnf +4 -4
- data/etc/sparql.sxp +8 -7
- data/etc/turtle.ebnf +3 -3
- data/etc/turtle.sxp +22 -20
- data/lib/ebnf.rb +3 -0
- data/lib/ebnf/abnf.rb +301 -0
- data/lib/ebnf/abnf/core.rb +23 -0
- data/lib/ebnf/abnf/meta.rb +111 -0
- data/lib/ebnf/base.rb +87 -44
- data/lib/ebnf/ebnf/meta.rb +90 -0
- data/lib/ebnf/isoebnf.rb +229 -0
- data/lib/ebnf/isoebnf/meta.rb +75 -0
- data/lib/ebnf/ll1.rb +4 -7
- data/lib/ebnf/ll1/parser.rb +12 -4
- data/lib/ebnf/native.rb +320 -0
- data/lib/ebnf/parser.rb +285 -302
- data/lib/ebnf/peg.rb +1 -1
- data/lib/ebnf/peg/parser.rb +24 -5
- data/lib/ebnf/peg/rule.rb +77 -58
- data/lib/ebnf/rule.rb +352 -121
- data/lib/ebnf/terminals.rb +13 -10
- data/lib/ebnf/writer.rb +550 -78
- metadata +48 -6
data/lib/ebnf/terminals.rb
CHANGED
@@ -1,18 +1,21 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
# Terminal definitions for the EBNF grammar
|
3
3
|
module EBNF::Terminals
|
4
|
-
|
5
|
-
|
4
|
+
SYMBOL_BASE = %r(\b[a-zA-Z0-9_\.]+\b)u.freeze
|
5
|
+
SYMBOL = %r(#{SYMBOL_BASE}(?!\s*::=))u.freeze
|
6
|
+
HEX = %r(\#x\h+)u.freeze
|
6
7
|
CHAR = %r([\u0009\u000A\u000D\u0020-\uD7FF\u{10000}-\u{10FFFF}])u.freeze
|
7
|
-
R_CHAR = %r([\u0009\u000A\u000D\u0020-\u005C\u005E-\uD7FF\u{10000}-\u{10FFFF}])u.freeze
|
8
|
-
RANGE = %r(\[(?:(?:#{R_CHAR}
|
9
|
-
|
10
|
-
|
11
|
-
LHS = %r(\[(?:(?:#{SYMBOL})+\]\s+)?(?:#{SYMBOL})\s*::=)u.freeze
|
12
|
-
O_RANGE = %r(\[^(?:#{R_CHAR}\-#{R_CHAR})|(?:#{HEX}-#{HEX})\])u.freeze
|
13
|
-
O_ENUM = %r(\[^(?:#{R_CHAR})+\])u.freeze
|
8
|
+
R_CHAR = %r([\u0009\u000A\u000D\u0020-\u002C\u002E-\u005C\u005E-\uD7FF\u{10000}-\u{10FFFF}])u.freeze
|
9
|
+
RANGE = %r(\[(?:(?:#{R_CHAR}\-#{R_CHAR})|(?:#{HEX}\-#{HEX})|#{R_CHAR}|#{HEX})+-?\](?!\s+#{SYMBOL_BASE}\s*::=))u.freeze
|
10
|
+
LHS = %r((?:\[#{SYMBOL_BASE}\])?\s*#{SYMBOL_BASE}\s*::=)u.freeze
|
11
|
+
O_RANGE = %r(\[\^(?:(?:#{R_CHAR}\-#{R_CHAR})|(?:#{HEX}\-#{HEX}|#{R_CHAR}|#{HEX}))+-?\])u.freeze
|
14
12
|
STRING1 = %r("[\u0009\u000A\u000D\u0020\u0021\u0023-\uD7FF\u{10000}-\u{10FFFF}]*")u.freeze
|
15
13
|
STRING2 = %r('[\u0009\u000A\u000D\u0020-\u0026\u0028-\uD7FF\u{10000}-\u{10FFFF}]*')u.freeze
|
16
14
|
POSTFIX = %r([?*+])u.freeze
|
17
|
-
PASS = %r((
|
15
|
+
PASS = %r((
|
16
|
+
\s
|
17
|
+
| (?:(?:\#[^x]|//)[^\n\r]*)
|
18
|
+
| (?:/\*(?:(?:\*[^/])|[^*])*\*/)
|
19
|
+
| (?:\(\*(?:(?:\*[^\)])|[^*])*\*\))
|
20
|
+
)+)xmu.freeze
|
18
21
|
end
|
data/lib/ebnf/writer.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
2
2
|
require 'rdf'
|
3
3
|
require 'strscan' unless defined?(StringScanner)
|
4
|
+
require "ostruct"
|
4
5
|
|
5
6
|
##
|
6
7
|
# Serialize ruleset back to EBNF
|
@@ -8,15 +9,53 @@ module EBNF
|
|
8
9
|
class Writer
|
9
10
|
LINE_LENGTH = 80
|
10
11
|
|
12
|
+
# ASCII escape names
|
13
|
+
ASCII_ESCAPE_NAMES = [
|
14
|
+
"null", #x00
|
15
|
+
"start of heading", #x01
|
16
|
+
"start of text", #x02
|
17
|
+
"end of text", #x03
|
18
|
+
"end of transmission", #x04
|
19
|
+
"enquiry", #x05
|
20
|
+
"acknowledge", #x06
|
21
|
+
"bell", #x07
|
22
|
+
"backspace", #x08
|
23
|
+
"horizontal tab", #x09
|
24
|
+
"new line", #x0A
|
25
|
+
"vertical tab", #x0B
|
26
|
+
"form feed", #x0C
|
27
|
+
"carriage return", #x0D
|
28
|
+
"shift out", #x0E
|
29
|
+
"shift in", #x0F
|
30
|
+
"data link escape", #x10
|
31
|
+
"device control 1", #x11
|
32
|
+
"device control 2", #x12
|
33
|
+
"device control 3", #x13
|
34
|
+
"device control 4", #x14
|
35
|
+
"negative acknowledge", #x15
|
36
|
+
"synchronous idle", #x16
|
37
|
+
"end of trans. block", #x17
|
38
|
+
"cancel", #x18
|
39
|
+
"end of medium", #x19
|
40
|
+
"substitute", #x1A
|
41
|
+
"escape", #x1B
|
42
|
+
"file separator", #x1C
|
43
|
+
"group separator", #x1D
|
44
|
+
"record separator", #x1E
|
45
|
+
"unit separator", #x1F
|
46
|
+
"space" #x20
|
47
|
+
]
|
48
|
+
|
11
49
|
##
|
12
50
|
# Format rules to a String
|
13
51
|
#
|
14
52
|
# @param [Array<Rule>] rules
|
53
|
+
# @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
|
15
54
|
# @return [Object]
|
16
|
-
def self.string(*rules)
|
55
|
+
def self.string(*rules, format: :ebnf)
|
17
56
|
require 'stringio' unless defined?(StringIO)
|
18
57
|
buf = StringIO.new
|
19
|
-
write(buf, *rules)
|
58
|
+
write(buf, *rules, format: format)
|
20
59
|
buf.string
|
21
60
|
end
|
22
61
|
|
@@ -24,9 +63,10 @@ module EBNF
|
|
24
63
|
# Format rules to $stdout
|
25
64
|
#
|
26
65
|
# @param [Array<Rule>] rules
|
66
|
+
# @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
|
27
67
|
# @return [Object]
|
28
|
-
def self.print(*rules)
|
29
|
-
write($stdout, *rules)
|
68
|
+
def self.print(*rules, format: :ebnf)
|
69
|
+
write($stdout, *rules, format: format)
|
30
70
|
end
|
31
71
|
|
32
72
|
##
|
@@ -34,20 +74,22 @@ module EBNF
|
|
34
74
|
#
|
35
75
|
# @param [Object] out
|
36
76
|
# @param [Array<Rule>] rules
|
77
|
+
# @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
|
37
78
|
# @return [Object]
|
38
|
-
def self.write(out, *rules)
|
39
|
-
Writer.new(rules, out: out)
|
79
|
+
def self.write(out, *rules, format: :ebnf)
|
80
|
+
Writer.new(rules, out: out, format: format)
|
40
81
|
end
|
41
82
|
|
42
83
|
##
|
43
84
|
# Write formatted rules to an IO like object as HTML
|
44
85
|
#
|
45
86
|
# @param [Array<Rule>] rules
|
87
|
+
# @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
|
46
88
|
# @return [Object]
|
47
|
-
def self.html(*rules)
|
89
|
+
def self.html(*rules, format: :ebnf)
|
48
90
|
require 'stringio' unless defined?(StringIO)
|
49
91
|
buf = StringIO.new
|
50
|
-
Writer.new(rules, out: buf, html: true)
|
92
|
+
Writer.new(rules, out: buf, html: true, format: format)
|
51
93
|
buf.string
|
52
94
|
end
|
53
95
|
|
@@ -55,17 +97,24 @@ module EBNF
|
|
55
97
|
# @param [Array<Rule>] rules
|
56
98
|
# @param [Hash{Symbol => Object}] options
|
57
99
|
# @param [#write] out ($stdout)
|
100
|
+
# @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
|
58
101
|
# @option options [Symbol] format
|
59
102
|
# @option options [Boolean] html (false)
|
60
|
-
def initialize(rules, out: $stdout, html: false, **options)
|
61
|
-
@options = options.
|
103
|
+
def initialize(rules, out: $stdout, html: false, format: :ebnf, **options)
|
104
|
+
@options = options.merge(html: html)
|
105
|
+
return if rules.empty?
|
62
106
|
|
63
107
|
# Determine max LHS length
|
108
|
+
format_meth = "format_#{format}".to_sym
|
64
109
|
max_id = rules.max_by {|r| r.id.to_s.length}.id.to_s.length
|
65
110
|
max_sym = rules.max_by {|r| r.sym.to_s.length}.sym.to_s.length
|
66
|
-
lhs_length = max_sym +
|
67
|
-
lhs_fmt =
|
68
|
-
|
111
|
+
lhs_length = max_sym + 1
|
112
|
+
lhs_fmt = case format
|
113
|
+
when :abnf then "%<sym>-#{max_sym}s = "
|
114
|
+
when :ebnf then "%<sym>-#{max_sym}s ::= "
|
115
|
+
when :isoebnf then "%<sym>-#{max_sym}s = "
|
116
|
+
end
|
117
|
+
if format == :ebnf && max_id > 0
|
69
118
|
lhs_fmt = "%<id>-#{max_id+2}s " + lhs_fmt
|
70
119
|
lhs_length += max_id + 3
|
71
120
|
end
|
@@ -74,49 +123,104 @@ module EBNF
|
|
74
123
|
if html
|
75
124
|
# Output as formatted HTML
|
76
125
|
begin
|
77
|
-
require '
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
126
|
+
require 'erubis'
|
127
|
+
eruby = Erubis::Eruby.new(ERB_DESC)
|
128
|
+
formatted_rules = rules.map do |rule|
|
129
|
+
if rule.kind == :terminals || rule.kind == :pass
|
130
|
+
OpenStruct.new(id: ("@#{rule.kind}"),
|
131
|
+
sym: nil,
|
132
|
+
assign: nil,
|
133
|
+
formatted: ("<strong>Productions for terminals</strong>" if rule.kind == :terminals))
|
134
|
+
else
|
135
|
+
formatted_expr = self.send(format_meth, rule.expr)
|
136
|
+
# Measure text without markup
|
137
|
+
formatted_expr_text = formatted_expr.gsub(%r{</?\w+[^>]*>}, '')
|
138
|
+
if formatted_expr_text.length > rhs_length && (format != :abnf || rule.alt?)
|
139
|
+
lines = []
|
140
|
+
# Can only reasonably split apart alts
|
141
|
+
self.send(format_meth, rule.expr, sep: "--rule-extensions--").
|
142
|
+
split(/\s*--rule-extensions--\s*/).each_with_index do |formatted, ndx|
|
143
|
+
assign = case format
|
144
|
+
when :ebnf
|
145
|
+
formatted.sub!(%r{\s*<code>\|</code>\s*}, '')
|
146
|
+
(ndx > 0 ? (rule.alt? ? '|' : '') : '::=')
|
147
|
+
when :abnf
|
148
|
+
formatted.sub!(%r{\s*<code>/</code>\s*}, '')
|
149
|
+
(ndx > 0 ? '=/' : '=')
|
150
|
+
else
|
151
|
+
formatted.sub!(%r{\s*<code>\|</code>\s*}, '')
|
152
|
+
(ndx > 0 ? (rule.alt? ? '|' : '') : '=')
|
153
|
+
end
|
154
|
+
lines << OpenStruct.new(id: ("[#{rule.id}]" if rule.id),
|
155
|
+
sym: (rule.sym if ndx == 0 || format == :abnf),
|
156
|
+
assign: assign,
|
157
|
+
formatted: formatted)
|
158
|
+
end
|
159
|
+
if format == :isoebnf
|
160
|
+
lines << OpenStruct.new(assign: ';')
|
161
|
+
end
|
162
|
+
lines
|
163
|
+
else
|
164
|
+
OpenStruct.new(id: ("[#{rule.id}]" if rule.id),
|
165
|
+
sym: rule.sym,
|
166
|
+
assign: (format == :ebnf ? '::=' : '='),
|
167
|
+
formatted: (formatted_expr + (format == :isoebnf ? ' ;' : '')))
|
168
|
+
end
|
169
|
+
end
|
170
|
+
end.flatten
|
171
|
+
out.write eruby.evaluate(format: format, rules: formatted_rules)
|
83
172
|
return
|
84
173
|
rescue LoadError
|
85
|
-
$stderr.puts "Generating HTML requires
|
174
|
+
$stderr.puts "Generating HTML requires erubis gem to be loaded"
|
86
175
|
end
|
87
176
|
end
|
88
177
|
|
89
178
|
# Format each rule, considering the available rhs size
|
90
179
|
rules.each do |rule|
|
91
180
|
buffer = if rule.pass?
|
92
|
-
"%-#{lhs_length-2}s" % "@pass"
|
181
|
+
"\n%-#{lhs_length-2}s " % "@pass"
|
182
|
+
elsif rule.kind == :terminals
|
183
|
+
"\n%-#{lhs_length-2}s" % "@terminals"
|
93
184
|
else
|
94
185
|
lhs_fmt % {id: "[#{rule.id}]", sym: rule.sym}
|
95
186
|
end
|
96
|
-
formatted_expr =
|
97
|
-
if formatted_expr.length > rhs_length
|
98
|
-
|
187
|
+
formatted_expr = self.send(format_meth, rule.expr)
|
188
|
+
if formatted_expr.length > rhs_length && (format != :abnf || rule.alt?)
|
189
|
+
if format == :abnf
|
190
|
+
# No whitespace, use =/
|
191
|
+
self.send(format_meth, rule.expr, sep: "--rule-extensions--").
|
192
|
+
split(/\s*--rule-extensions--\s*/).each_with_index do |formatted, ndx|
|
193
|
+
if ndx > 0
|
194
|
+
buffer << "\n" + lhs_fmt.sub('= ', '=/') % {id: "[#{rule.id}]", sym: rule.sym}
|
195
|
+
end
|
196
|
+
buffer << formatted.sub(/\s*\/\s*/, '')
|
197
|
+
end
|
198
|
+
else
|
199
|
+
# Space out past "= "
|
200
|
+
buffer << self.send(format_meth, rule.expr, sep: ("\n" + " " * (lhs_length + (rule.alt? ? 2 : 4) - (format == :ebnf ? 0 : 2))))
|
201
|
+
buffer << ("\n" + " " * (lhs_length) + ';') if format == :isoebnf
|
202
|
+
end
|
99
203
|
else
|
100
|
-
buffer << formatted_expr
|
204
|
+
buffer << formatted_expr + (format == :isoebnf ? ' ;' : '')
|
101
205
|
end
|
206
|
+
buffer << "\n\n" if [:terminals, :pass].include?(rule.kind)
|
102
207
|
out.puts(buffer)
|
103
208
|
end
|
104
209
|
end
|
105
210
|
|
106
211
|
protected
|
212
|
+
|
213
|
+
##
|
214
|
+
# W3C EBNF Formatters
|
215
|
+
##
|
216
|
+
|
107
217
|
# Format the expression part of a rule
|
108
|
-
def
|
218
|
+
def format_ebnf(expr, sep: nil, embedded: false)
|
109
219
|
return (@options[:html] ? %(<a href="#grammar-production-#{expr}">#{expr}</a>) : expr.to_s) if expr.is_a?(Symbol)
|
110
220
|
if expr.is_a?(String)
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
return (@options[:html] ? %(<code class="grammar-char-escape">#{expr}</code>) : expr)
|
115
|
-
elsif expr =~ /"/
|
116
|
-
return (@options[:html] ? %('<code class="grammar-literal">#{escape(expr, "'")}</code>') : %('#{escape(expr, "'")}'))
|
117
|
-
else
|
118
|
-
return (@options[:html] ? %("<code class="grammar-literal">#{escape(expr, '"')}</code>") : %("#{escape(expr, '"')}"))
|
119
|
-
end
|
221
|
+
return expr.length == 1 ?
|
222
|
+
format_ebnf_char(expr) :
|
223
|
+
format_ebnf_string(expr, expr.include?('"') ? "'" : '"')
|
120
224
|
end
|
121
225
|
parts = {
|
122
226
|
alt: (@options[:html] ? "<code>|</code> " : "| "),
|
@@ -129,40 +233,75 @@ module EBNF
|
|
129
233
|
rparen = (@options[:html] ? "<code>)</code> " : ")")
|
130
234
|
|
131
235
|
case expr.first
|
236
|
+
when :istr
|
237
|
+
# Looses fidelity, but, oh well ...
|
238
|
+
format_ebnf(expr.last, embedded: true)
|
132
239
|
when :alt, :diff
|
133
240
|
this_sep = (sep ? sep : " ") + parts[expr.first.to_sym]
|
134
|
-
expr[1..-1].map {|e|
|
241
|
+
res = expr[1..-1].map {|e| format_ebnf(e, embedded: true)}.join(this_sep)
|
242
|
+
embedded ? (lparen + res + rparen) : res
|
135
243
|
when :star, :plus, :opt
|
136
|
-
raise "Expected star expression to have a single operand" unless expr.length == 2
|
137
244
|
char = parts[expr.first.to_sym]
|
138
|
-
r =
|
139
|
-
|
245
|
+
r = format_ebnf(expr[1], embedded: true)
|
246
|
+
"#{r}#{char}"
|
140
247
|
when :hex
|
141
|
-
(
|
248
|
+
escape_ebnf_hex(expr.last[2..-1].hex.chr(Encoding::UTF_8))
|
142
249
|
when :range
|
143
|
-
|
250
|
+
format_ebnf_range(expr.last)
|
144
251
|
when :seq
|
145
252
|
this_sep = (sep ? sep : " ")
|
146
|
-
expr[1..-1].map
|
253
|
+
res = expr[1..-1].map do |e|
|
254
|
+
format_ebnf(e, embedded: true)
|
255
|
+
end.join(this_sep)
|
256
|
+
embedded ? (lparen + res + rparen) : res
|
257
|
+
when :rept
|
258
|
+
# Expand repetition
|
259
|
+
min, max, value = expr[1..-1]
|
260
|
+
if min == 0 && max == 1
|
261
|
+
format_ebnf([:opt, value], sep: sep, embedded: embedded)
|
262
|
+
elsif min == 0 && max == '*'
|
263
|
+
format_ebnf([:star, value], sep: sep, embedded: embedded)
|
264
|
+
elsif min == 1 && max == '*'
|
265
|
+
format_ebnf([:plus, value], sep: sep, embedded: embedded)
|
266
|
+
else
|
267
|
+
val2 = [:seq]
|
268
|
+
while min > 0
|
269
|
+
val2 << value
|
270
|
+
min -= 1
|
271
|
+
max -= 1 unless max == '*'
|
272
|
+
end
|
273
|
+
if max == '*'
|
274
|
+
val2 << [:star, value]
|
275
|
+
else
|
276
|
+
opt = nil
|
277
|
+
while max > 0
|
278
|
+
opt = [:opt, opt ? [:seq, value, opt] : value]
|
279
|
+
max -= 1
|
280
|
+
end
|
281
|
+
val2 << opt if opt
|
282
|
+
end
|
283
|
+
format_ebnf(val2, sep: sep, embedded: embedded)
|
284
|
+
end
|
147
285
|
else
|
148
286
|
raise "Unknown operator: #{expr.first}"
|
149
287
|
end
|
150
288
|
end
|
151
289
|
|
152
290
|
# Format a single-character string, prefering hex for non-main ASCII
|
153
|
-
def
|
291
|
+
def format_ebnf_char(c)
|
154
292
|
case c.ord
|
155
|
-
when
|
156
|
-
when
|
157
|
-
|
293
|
+
when (0x21) then (@options[:html] ? %("<code class="grammar-literal">#{c}</code>") : %{"#{c}"})
|
294
|
+
when 0x22 then (@options[:html] ? %('<code class="grammar-literal">"</code>') : %{'"'})
|
295
|
+
when (0x23..0x7e) then (@options[:html] ? %("<code class="grammar-literal">#{c}</code>") : %{"#{c}"})
|
296
|
+
when (0x80..0xFFFD) then (@options[:html] ? %("<code class="grammar-literal">#{c}</code>") : %{"#{c}"})
|
297
|
+
else escape_ebnf_hex(c)
|
158
298
|
end
|
159
299
|
end
|
160
300
|
|
161
301
|
# Format a range
|
162
|
-
def
|
302
|
+
def format_ebnf_range(string)
|
163
303
|
lbrac = (@options[:html] ? "<code>[</code> " : "[")
|
164
304
|
rbrac = (@options[:html] ? "<code>]</code> " : "]")
|
165
|
-
dash = (@options[:html] ? "<code>-</code> " : "-")
|
166
305
|
|
167
306
|
buffer = lbrac
|
168
307
|
s = StringScanner.new(string)
|
@@ -171,53 +310,386 @@ module EBNF
|
|
171
310
|
when s.scan(/\A[!"\u0024-\u007e]+/)
|
172
311
|
buffer << (@options[:html] ? %(<code class="grammar-literal">#{s.matched}</code>) : s.matched)
|
173
312
|
when s.scan(/\A#x\h+/)
|
174
|
-
buffer << (
|
175
|
-
when s.scan(/\A-/)
|
176
|
-
buffer << dash
|
313
|
+
buffer << escape_ebnf_hex(s.matched[2..-1].hex.chr(Encoding::UTF_8))
|
177
314
|
else
|
178
|
-
buffer << (
|
315
|
+
buffer << escape_ebnf_hex(s.getch)
|
179
316
|
end
|
180
317
|
end
|
181
318
|
buffer + rbrac
|
182
319
|
end
|
183
320
|
|
184
321
|
# Escape a string, using as many UTF-8 characters as possible
|
185
|
-
def
|
186
|
-
buffer = ""
|
322
|
+
def format_ebnf_string(string, quote = '"')
|
187
323
|
string.each_char do |c|
|
188
|
-
|
189
|
-
when
|
190
|
-
|
191
|
-
|
324
|
+
case c.ord
|
325
|
+
when 0x00..0x19, quote.ord
|
326
|
+
raise RangeError, "cannot format #{string.inspect} as an EBNF String: #{c.inspect} is out of range" unless
|
327
|
+
ISOEBNF::TERMINAL_CHARACTER.match?(c)
|
192
328
|
end
|
193
329
|
end
|
194
|
-
|
330
|
+
|
331
|
+
"#{quote}#{string}#{quote}"
|
195
332
|
end
|
196
333
|
|
197
|
-
def
|
334
|
+
def escape_ebnf_hex(u)
|
198
335
|
fmt = case u.ord
|
336
|
+
when 0x00..0x20 then "#x%02X"
|
199
337
|
when 0x0000..0x00ff then "#x%02X"
|
200
338
|
when 0x0100..0xffff then "#x%04X"
|
201
339
|
else "#x%08X"
|
202
340
|
end
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
341
|
+
char = fmt % u.ord
|
342
|
+
if @options[:html]
|
343
|
+
if u.ord <= 0x20
|
344
|
+
char = %(<abbr title="#{ASCII_ESCAPE_NAMES[u.ord]}">#{char}</abbr>)
|
345
|
+
elsif u.ord < 0x7F
|
346
|
+
char = %(<abbr title="ascii '#{u}'">#{char}</abbr>)
|
347
|
+
elsif u.ord == 0x7F
|
348
|
+
char = %(<abbr title="delete">#{char}</abbr>)
|
349
|
+
elsif u.ord <= 0xFF
|
350
|
+
char = %(<abbr title="extended ascii '#{u}'">#{char}</abbr>)
|
351
|
+
else
|
352
|
+
char = %(<abbr title="unicode '#{u}'">#{char}</abbr>)
|
353
|
+
end
|
354
|
+
%(<code class="grammar-char-escape">#{char}</code>)
|
355
|
+
else
|
356
|
+
char
|
357
|
+
end
|
358
|
+
end
|
359
|
+
|
360
|
+
##
|
361
|
+
# ABNF Formatters
|
362
|
+
##
|
363
|
+
|
364
|
+
# Format the expression part of a rule
|
365
|
+
def format_abnf(expr, sep: nil, embedded: false, sensitive: true)
|
366
|
+
return (@options[:html] ? %(<a href="#grammar-production-#{expr}">#{expr}</a>) : expr.to_s) if expr.is_a?(Symbol)
|
367
|
+
if expr.is_a?(String)
|
368
|
+
if expr.length == 1
|
369
|
+
return format_abnf_char(expr)
|
370
|
+
elsif expr.start_with?('%')
|
371
|
+
# Already encoded
|
372
|
+
return expr
|
373
|
+
elsif expr =~ /"/
|
374
|
+
# Split into segments
|
375
|
+
segments = expr.split('"')
|
376
|
+
|
377
|
+
return format_abnf_char(expr) if segments.empty?
|
378
|
+
|
379
|
+
seq = segments.inject([]) {|memo, s| memo.concat([[:hex, "#x22"], s])}[1..-1]
|
380
|
+
seq.unshift(:seq)
|
381
|
+
return format_abnf(seq, sep: nil, embedded: false)
|
382
|
+
else
|
383
|
+
return (@options[:html] ? %("<code class="grammar-literal">#{'%s' if sensitive}#{expr}</code>") : %(#{'%s' if sensitive}"#{expr}"))
|
384
|
+
end
|
385
|
+
end
|
386
|
+
parts = {
|
387
|
+
alt: (@options[:html] ? "<code>/</code> " : "/ "),
|
388
|
+
star: (@options[:html] ? "<code>*</code> " : "*"),
|
389
|
+
plus: (@options[:html] ? "<code>+</code> " : "1*"),
|
390
|
+
opt: (@options[:html] ? "<code>?</code> " : "?")
|
391
|
+
}
|
392
|
+
lbrac = (@options[:html] ? "<code>[</code> " : "[")
|
393
|
+
rbrac = (@options[:html] ? "<code>]</code> " : "]")
|
394
|
+
lparen = (@options[:html] ? "<code>(</code> " : "(")
|
395
|
+
rparen = (@options[:html] ? "<code>)</code> " : ")")
|
396
|
+
|
397
|
+
case expr.first
|
398
|
+
when :istr
|
399
|
+
# FIXME: if string part is segmented, need to do something different
|
400
|
+
format_abnf(expr.last, embedded: true, sensitive: false)
|
401
|
+
when :alt
|
402
|
+
this_sep = (sep ? sep : " ") + parts[expr.first.to_sym]
|
403
|
+
res = expr[1..-1].map {|e| format_abnf(e, embedded: true)}.join(this_sep)
|
404
|
+
embedded ? (lparen + res + rparen) : res
|
405
|
+
when :diff
|
406
|
+
raise RangeError, "ABNF does not support the diff operator"
|
407
|
+
when :opt
|
408
|
+
char = parts[expr.first.to_sym]
|
409
|
+
r = format_abnf(expr[1], embedded: true)
|
410
|
+
"#{lbrac}#{r}#{rbrac}"
|
411
|
+
when :plus, :star
|
412
|
+
char = parts[expr.first.to_sym]
|
413
|
+
r = format_abnf(expr[1], embedded: true)
|
414
|
+
"#{char}#{r}"
|
415
|
+
when :hex
|
416
|
+
escape_abnf_hex(expr.last[2..-1].hex.chr)
|
417
|
+
when :range
|
418
|
+
# Returns an [:alt] or [:not [:alt]] if composed of multiple sequences
|
419
|
+
# Note: ABNF does not support the `not` operator
|
420
|
+
res = format_abnf_range(expr.last)
|
421
|
+
res.is_a?(Array) ?
|
422
|
+
format_abnf(res, embedded: true) :
|
423
|
+
res
|
424
|
+
when :seq
|
425
|
+
this_sep = (sep ? sep : " ")
|
426
|
+
res = expr[1..-1].map do |e|
|
427
|
+
format_abnf(e, embedded: true)
|
428
|
+
end.join(this_sep)
|
429
|
+
embedded ? (lparen + res + rparen) : res
|
430
|
+
when :rept
|
431
|
+
# Expand repetition
|
432
|
+
min, max, value = expr[1..-1]
|
433
|
+
r = format_abnf(value, embedded: true)
|
434
|
+
if min == max
|
435
|
+
"#{min}#{r}"
|
436
|
+
elsif min == 0 && max == '*'
|
437
|
+
"#{parts[:star]}#{r}"
|
438
|
+
elsif min > 0 && max == '*'
|
439
|
+
"#{min}#{parts[:star]}#{r}"
|
440
|
+
else
|
441
|
+
"#{min}#{parts[:star]}#{max}#{r}"
|
442
|
+
end
|
443
|
+
else
|
444
|
+
raise "Unknown operator: #{expr.first}"
|
445
|
+
end
|
446
|
+
end
|
447
|
+
|
448
|
+
# Format a single-character string, prefering hex for non-main ASCII
|
449
|
+
def format_abnf_char(c)
|
450
|
+
if /[\x20-\x21\x23-\x7E]/.match?(c)
|
451
|
+
c.inspect
|
452
|
+
else
|
453
|
+
escape_abnf_hex(c)
|
454
|
+
end
|
455
|
+
end
|
456
|
+
|
457
|
+
# Format a range
|
458
|
+
#
|
459
|
+
# Presumes range has already been validated
|
460
|
+
def format_abnf_range(string)
|
461
|
+
alt, o_dash = [:alt], false
|
462
|
+
|
463
|
+
raise RangeError, "cannot format #{string.inspect} an ABNF range" if string.start_with?('^')
|
464
|
+
|
465
|
+
if string.end_with?('-')
|
466
|
+
o_dash = true
|
467
|
+
string = string[0..-2]
|
468
|
+
end
|
469
|
+
|
470
|
+
scanner = StringScanner.new(string)
|
471
|
+
hexes, deces = [], []
|
472
|
+
in_range = false
|
473
|
+
# Build op (alt) from different ranges/enums
|
474
|
+
while !scanner.eos?
|
475
|
+
if hex = scanner.scan(Terminals::HEX)
|
476
|
+
# Append any decimal values
|
477
|
+
alt << "%d" + deces.join(".") unless deces.empty?
|
478
|
+
deces = []
|
479
|
+
|
480
|
+
if in_range
|
481
|
+
# Add "." sequences for any previous hexes
|
482
|
+
alt << "%x" + hexes[0..-2].join(".") if hexes.length > 1
|
483
|
+
alt << "%x#{hexes.last}-#{hex[2..-1]}"
|
484
|
+
in_range, hexes = false, []
|
485
|
+
else
|
486
|
+
hexes << hex[2..-1]
|
487
|
+
end
|
488
|
+
elsif dec = scanner.scan(Terminals::R_CHAR)
|
489
|
+
# Append any hexadecimal values
|
490
|
+
alt << "%x" + hexes.join(".") unless hexes.empty?
|
491
|
+
hexes = []
|
492
|
+
|
493
|
+
if in_range
|
494
|
+
# Add "." sequences for any previous hexes
|
495
|
+
alt << "%d" + deces[0..-2].join(".") if deces.length > 1
|
496
|
+
alt << "%d#{deces.last}-#{dec.codepoints.first}"
|
497
|
+
in_range, deces = false, []
|
498
|
+
else
|
499
|
+
deces << dec.codepoints.first.to_s
|
500
|
+
end
|
501
|
+
end
|
502
|
+
|
503
|
+
in_range = true if scanner.scan(/\-/)
|
504
|
+
end
|
505
|
+
|
506
|
+
deces << '45' if o_dash
|
507
|
+
|
508
|
+
# Append hexes and deces as "." sequences (should be only one)
|
509
|
+
alt << "%d" + deces.join(".") unless deces.empty?
|
510
|
+
alt << "%x" + hexes.join(".") unless hexes.empty?
|
511
|
+
|
512
|
+
# FIXME: HTML abbreviations?
|
513
|
+
if alt.length == 2
|
514
|
+
# Just return the range or enum
|
515
|
+
alt.last
|
516
|
+
else
|
517
|
+
# Return the alt, which will be further formatted
|
518
|
+
alt
|
519
|
+
end
|
520
|
+
end
|
521
|
+
|
522
|
+
def escape_abnf_hex(u)
|
523
|
+
fmt = case u.ord
|
524
|
+
when 0x0000..0x00ff then "%02X"
|
525
|
+
when 0x0100..0xffff then "%04X"
|
526
|
+
else "%08X"
|
527
|
+
end
|
528
|
+
char = "%x" + (fmt % u.ord)
|
529
|
+
if @options[:html]
|
530
|
+
if u.ord <= 0x20
|
531
|
+
char = %(<abbr title="#{ASCII_ESCAPE_NAMES[u.ord]}">#{char}</abbr>)
|
532
|
+
elsif u.ord <= 0x7F
|
533
|
+
char = %(<abbr title="ascii '#{u}'">#{char}</abbr>)
|
534
|
+
elsif u.ord == 0x7F
|
535
|
+
char = %(<abbr title="delete">#{char}</abbr>)
|
536
|
+
elsif u.ord <= 0xFF
|
537
|
+
char = %(<abbr title="extended ascii '#{u}'">#{char}</abbr>)
|
538
|
+
else
|
539
|
+
char = %(<abbr title="unicode '#{u}'">#{char}</abbr>)
|
540
|
+
end
|
541
|
+
%(<code class="grammar-char-escape">#{char}</code>)
|
542
|
+
else
|
543
|
+
char
|
544
|
+
end
|
545
|
+
end
|
546
|
+
|
547
|
+
##
|
548
|
+
# ISO EBNF Formatters
|
549
|
+
##
|
550
|
+
|
551
|
+
# Format the expression part of a rule
|
552
|
+
def format_isoebnf(expr, sep: nil, embedded: false)
|
553
|
+
return (@options[:html] ? %(<a href="#grammar-production-#{expr}">#{expr}</a>) : expr.to_s) if expr.is_a?(Symbol)
|
554
|
+
if expr.is_a?(String)
|
555
|
+
expr = expr[2..-1].hex.chr if expr =~ /\A#x\h+/
|
556
|
+
expr.chars.each do |c|
|
557
|
+
raise RangeError, "cannot format #{expr.inspect} as an ISO EBNF String: #{c.inspect} is out of range" unless
|
558
|
+
ISOEBNF::TERMINAL_CHARACTER.match?(c)
|
559
|
+
end
|
560
|
+
if expr =~ /"/
|
561
|
+
return (@options[:html] ? %('<code class="grammar-literal">#{expr}</code>') : %('#{expr}'))
|
562
|
+
else
|
563
|
+
return (@options[:html] ? %("<code class="grammar-literal">#{expr}</code>") : %("#{expr}"))
|
564
|
+
end
|
565
|
+
end
|
566
|
+
parts = {
|
567
|
+
alt: (@options[:html] ? "<code>|</code> " : "| "),
|
568
|
+
diff: (@options[:html] ? "<code>-</code> " : "- "),
|
569
|
+
}
|
570
|
+
lparen = (@options[:html] ? "<code>(</code> " : "(")
|
571
|
+
rparen = (@options[:html] ? "<code>)</code> " : ")")
|
572
|
+
|
573
|
+
case expr.first
|
574
|
+
when :istr
|
575
|
+
# Looses fidelity, but, oh well ...
|
576
|
+
format_isoebnf(expr.last, embedded: true)
|
577
|
+
when :alt, :diff
|
578
|
+
this_sep = (sep ? sep : " ") + parts[expr.first.to_sym]
|
579
|
+
res = expr[1..-1].map {|e| format_isoebnf(e, embedded: true)}.join(this_sep)
|
580
|
+
embedded ? (lparen + res + rparen) : res
|
581
|
+
when :opt
|
582
|
+
r = format_isoebnf(expr[1], embedded: true)
|
583
|
+
"[#{r}]"
|
584
|
+
when :star
|
585
|
+
r = format_isoebnf(expr[1], embedded: true)
|
586
|
+
"{#{r}}"
|
587
|
+
when :plus
|
588
|
+
r = format_isoebnf(expr[1], embedded: true)
|
589
|
+
"#{r}, {#{r}}"
|
590
|
+
when :hex
|
591
|
+
format_isoebnf(expr[1], embedded: true)
|
592
|
+
when :range
|
593
|
+
res = format_isoebnf_range(expr.last)
|
594
|
+
res.is_a?(Array) ?
|
595
|
+
format_isoebnf(res, embedded: true) :
|
596
|
+
res
|
597
|
+
when :seq
|
598
|
+
this_sep = "," + (sep ? sep : " ")
|
599
|
+
res = expr[1..-1].map do |e|
|
600
|
+
format_isoebnf(e, embedded: true)
|
601
|
+
end.join(this_sep)
|
602
|
+
embedded ? (lparen + res + rparen) : res
|
603
|
+
when :rept
|
604
|
+
# Expand repetition
|
605
|
+
min, max, value = expr[1..-1]
|
606
|
+
if min == 0 && max == 1
|
607
|
+
format_isoebnf([:opt, value], sep: sep, embedded: embedded)
|
608
|
+
elsif min == 0 && max == '*'
|
609
|
+
format_isoebnf([:star, value], sep: sep, embedded: embedded)
|
610
|
+
elsif min == 1 && max == '*'
|
611
|
+
format_isoebnf([:plus, value], sep: sep, embedded: embedded)
|
612
|
+
else
|
613
|
+
val2 = [:seq]
|
614
|
+
while min > 0
|
615
|
+
val2 << value
|
616
|
+
min -= 1
|
617
|
+
max -= 1 unless max == '*'
|
618
|
+
end
|
619
|
+
if max == '*'
|
620
|
+
val2 << [:star, value]
|
621
|
+
else
|
622
|
+
opt = nil
|
623
|
+
while max > 0
|
624
|
+
opt = [:opt, opt ? [:seq, value, opt] : value]
|
625
|
+
max -= 1
|
626
|
+
end
|
627
|
+
val2 << opt if opt
|
628
|
+
end
|
629
|
+
format_isoebnf(val2, sep: sep, embedded: embedded)
|
630
|
+
end
|
631
|
+
else
|
632
|
+
raise "Unknown operator: #{expr.first}"
|
633
|
+
end
|
634
|
+
end
|
635
|
+
|
636
|
+
# Format a range
|
637
|
+
# Range is formatted as a aliteration of characters
|
638
|
+
def format_isoebnf_range(string)
|
639
|
+
chars = []
|
640
|
+
o_dash = false
|
641
|
+
|
642
|
+
raise RangeError, "cannot format #{string.inspect} an ABNF range" if string.start_with?('^')
|
643
|
+
|
644
|
+
if string.end_with?('-')
|
645
|
+
o_dash = true
|
646
|
+
string = string[0..-2]
|
647
|
+
end
|
648
|
+
|
649
|
+
scanner = StringScanner.new(string)
|
650
|
+
in_range = false
|
651
|
+
# Build chars from different ranges/enums
|
652
|
+
while !scanner.eos?
|
653
|
+
char = if hex = scanner.scan(Terminals::HEX)
|
654
|
+
hex[2..-1].hex.ord.char(Encoding::UTF_8)
|
655
|
+
else scanner.scan(Terminals::R_CHAR)
|
656
|
+
end
|
657
|
+
raise RangeError, "cannot format #{string.inspect} as an ISO EBNF Aliteration: #{char.inspect} is out of range" unless
|
658
|
+
char && ISOEBNF::TERMINAL_CHARACTER.match?(char)
|
659
|
+
|
660
|
+
if in_range
|
661
|
+
# calculate characters from chars.last to this char
|
662
|
+
raise RangeError, "cannot format #{string.inspect} as an ISO EBNF Aliteration" unless chars.last < char
|
663
|
+
chars.concat (chars.last..char).to_a[1..-1]
|
664
|
+
in_range = false
|
665
|
+
else
|
666
|
+
chars << char
|
667
|
+
end
|
668
|
+
|
669
|
+
in_range = true if scanner.scan(/\-/)
|
670
|
+
end
|
671
|
+
|
672
|
+
chars << '-' if o_dash
|
673
|
+
|
674
|
+
# Possibly only a single character (no character?)
|
675
|
+
chars.length == 1 ? chars.last.inspect : chars.unshift(:alt)
|
676
|
+
end
|
677
|
+
|
678
|
+
ERB_DESC = %q(
|
679
|
+
<table class="grammar">
|
680
|
+
<tbody id="grammar-productions" class="<%= @format %>">
|
681
|
+
<% for rule in @rules %>
|
682
|
+
<tr<%= %{ id="grammar-production-#{rule.sym}"} unless %w(=/ |).include?(rule.assign)%>>
|
683
|
+
<% if rule.id %>
|
684
|
+
<td><%= rule.id %></td>
|
685
|
+
<% end %>
|
686
|
+
<td><code><%== rule.sym %></code></td>
|
687
|
+
<td><%= rule.assign %></td>
|
688
|
+
<td><%= rule.formatted %></td>
|
689
|
+
</tr>
|
690
|
+
<% end %>
|
691
|
+
</tbody>
|
692
|
+
</table>
|
221
693
|
).gsub(/^ /, '')
|
222
694
|
end
|
223
695
|
end
|