ebnf 1.2.0 → 2.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +223 -199
- data/UNLICENSE +1 -1
- data/VERSION +1 -1
- data/bin/ebnf +38 -19
- data/etc/abnf-core.ebnf +52 -0
- data/etc/abnf.abnf +121 -0
- data/etc/abnf.ebnf +124 -0
- data/etc/abnf.sxp +45 -0
- data/etc/doap.ttl +23 -18
- data/etc/ebnf.ebnf +21 -33
- data/etc/ebnf.html +76 -160
- data/etc/{ebnf.rb → ebnf.ll1.rb} +30 -107
- data/etc/ebnf.ll1.sxp +182 -183
- data/etc/ebnf.peg.rb +90 -0
- data/etc/ebnf.peg.sxp +84 -0
- data/etc/ebnf.sxp +40 -41
- data/etc/iso-ebnf.ebnf +140 -0
- data/etc/iso-ebnf.isoebnf +138 -0
- data/etc/iso-ebnf.sxp +65 -0
- data/etc/sparql.ebnf +4 -4
- data/etc/sparql.html +1603 -1751
- data/etc/sparql.ll1.sxp +7372 -7372
- data/etc/sparql.peg.rb +532 -0
- data/etc/sparql.peg.sxp +597 -0
- data/etc/sparql.sxp +363 -362
- data/etc/turtle.ebnf +3 -3
- data/etc/turtle.html +465 -517
- data/etc/{turtle.rb → turtle.ll1.rb} +3 -4
- data/etc/turtle.ll1.sxp +425 -425
- data/etc/turtle.peg.rb +182 -0
- data/etc/turtle.peg.sxp +199 -0
- data/etc/turtle.sxp +103 -101
- data/lib/ebnf.rb +6 -1
- data/lib/ebnf/abnf.rb +301 -0
- data/lib/ebnf/abnf/core.rb +23 -0
- data/lib/ebnf/abnf/meta.rb +111 -0
- data/lib/ebnf/base.rb +114 -69
- data/lib/ebnf/bnf.rb +1 -26
- data/lib/ebnf/ebnf/meta.rb +90 -0
- data/lib/ebnf/isoebnf.rb +229 -0
- data/lib/ebnf/isoebnf/meta.rb +75 -0
- data/lib/ebnf/ll1.rb +131 -3
- data/lib/ebnf/ll1/lexer.rb +20 -22
- data/lib/ebnf/ll1/parser.rb +97 -64
- data/lib/ebnf/ll1/scanner.rb +82 -50
- data/lib/ebnf/native.rb +320 -0
- data/lib/ebnf/parser.rb +285 -302
- data/lib/ebnf/peg.rb +39 -0
- data/lib/ebnf/peg/parser.rb +561 -0
- data/lib/ebnf/peg/rule.rb +250 -0
- data/lib/ebnf/rule.rb +442 -148
- data/lib/ebnf/terminals.rb +21 -0
- data/lib/ebnf/writer.rb +587 -82
- metadata +125 -18
- data/etc/sparql.rb +0 -45773
@@ -0,0 +1,21 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
# Terminal definitions for the EBNF grammar
|
3
|
+
module EBNF::Terminals
|
4
|
+
SYMBOL_BASE = %r(\b[a-zA-Z0-9_\.]+\b)u.freeze
|
5
|
+
SYMBOL = %r(#{SYMBOL_BASE}(?!\s*::=))u.freeze
|
6
|
+
HEX = %r(\#x\h+)u.freeze
|
7
|
+
CHAR = %r([\u0009\u000A\u000D\u0020-\uD7FF\u{10000}-\u{10FFFF}])u.freeze
|
8
|
+
R_CHAR = %r([\u0009\u000A\u000D\u0020-\u002C\u002E-\u005C\u005E-\uD7FF\u{10000}-\u{10FFFF}])u.freeze
|
9
|
+
RANGE = %r(\[(?:(?:#{R_CHAR}\-#{R_CHAR})|(?:#{HEX}\-#{HEX})|#{R_CHAR}|#{HEX})+-?\](?!\s+#{SYMBOL_BASE}\s*::=))u.freeze
|
10
|
+
LHS = %r((?:\[#{SYMBOL_BASE}\])?\s*#{SYMBOL_BASE}\s*::=)u.freeze
|
11
|
+
O_RANGE = %r(\[\^(?:(?:#{R_CHAR}\-#{R_CHAR})|(?:#{HEX}\-#{HEX}|#{R_CHAR}|#{HEX}))+-?\])u.freeze
|
12
|
+
STRING1 = %r("[\u0009\u000A\u000D\u0020\u0021\u0023-\uD7FF\u{10000}-\u{10FFFF}]*")u.freeze
|
13
|
+
STRING2 = %r('[\u0009\u000A\u000D\u0020-\u0026\u0028-\uD7FF\u{10000}-\u{10FFFF}]*')u.freeze
|
14
|
+
POSTFIX = %r([?*+])u.freeze
|
15
|
+
PASS = %r((
|
16
|
+
\s
|
17
|
+
| (?:(?:\#[^x]|//)[^\n\r]*)
|
18
|
+
| (?:/\*(?:(?:\*[^/])|[^*])*\*/)
|
19
|
+
| (?:\(\*(?:(?:\*[^\)])|[^*])*\*\))
|
20
|
+
)+)xmu.freeze
|
21
|
+
end
|
data/lib/ebnf/writer.rb
CHANGED
@@ -1,22 +1,63 @@
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
2
2
|
require 'rdf'
|
3
3
|
require 'strscan' unless defined?(StringScanner)
|
4
|
+
require "ostruct"
|
5
|
+
require 'unicode/types'
|
4
6
|
|
5
7
|
##
|
6
8
|
# Serialize ruleset back to EBNF
|
7
9
|
module EBNF
|
8
10
|
class Writer
|
9
11
|
LINE_LENGTH = 80
|
12
|
+
LINE_LENGTH_HTML = 200
|
13
|
+
|
14
|
+
# ASCII escape names
|
15
|
+
ASCII_ESCAPE_NAMES = [
|
16
|
+
"null", #x00
|
17
|
+
"start of heading", #x01
|
18
|
+
"start of text", #x02
|
19
|
+
"end of text", #x03
|
20
|
+
"end of transmission", #x04
|
21
|
+
"enquiry", #x05
|
22
|
+
"acknowledge", #x06
|
23
|
+
"bell", #x07
|
24
|
+
"backspace", #x08
|
25
|
+
"horizontal tab", #x09
|
26
|
+
"new line", #x0A
|
27
|
+
"vertical tab", #x0B
|
28
|
+
"form feed", #x0C
|
29
|
+
"carriage return", #x0D
|
30
|
+
"shift out", #x0E
|
31
|
+
"shift in", #x0F
|
32
|
+
"data link escape", #x10
|
33
|
+
"device control 1", #x11
|
34
|
+
"device control 2", #x12
|
35
|
+
"device control 3", #x13
|
36
|
+
"device control 4", #x14
|
37
|
+
"negative acknowledge", #x15
|
38
|
+
"synchronous idle", #x16
|
39
|
+
"end of trans. block", #x17
|
40
|
+
"cancel", #x18
|
41
|
+
"end of medium", #x19
|
42
|
+
"substitute", #x1A
|
43
|
+
"escape", #x1B
|
44
|
+
"file separator", #x1C
|
45
|
+
"group separator", #x1D
|
46
|
+
"record separator", #x1E
|
47
|
+
"unit separator", #x1F
|
48
|
+
"space" #x20
|
49
|
+
]
|
10
50
|
|
11
51
|
##
|
12
52
|
# Format rules to a String
|
13
53
|
#
|
14
54
|
# @param [Array<Rule>] rules
|
55
|
+
# @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
|
15
56
|
# @return [Object]
|
16
|
-
def self.string(*rules)
|
57
|
+
def self.string(*rules, format: :ebnf)
|
17
58
|
require 'stringio' unless defined?(StringIO)
|
18
59
|
buf = StringIO.new
|
19
|
-
write(buf, *rules)
|
60
|
+
write(buf, *rules, format: format)
|
20
61
|
buf.string
|
21
62
|
end
|
22
63
|
|
@@ -24,9 +65,10 @@ module EBNF
|
|
24
65
|
# Format rules to $stdout
|
25
66
|
#
|
26
67
|
# @param [Array<Rule>] rules
|
68
|
+
# @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
|
27
69
|
# @return [Object]
|
28
|
-
def self.print(*rules)
|
29
|
-
write($stdout, *rules)
|
70
|
+
def self.print(*rules, format: :ebnf)
|
71
|
+
write($stdout, *rules, format: format)
|
30
72
|
end
|
31
73
|
|
32
74
|
##
|
@@ -34,88 +76,174 @@ module EBNF
|
|
34
76
|
#
|
35
77
|
# @param [Object] out
|
36
78
|
# @param [Array<Rule>] rules
|
79
|
+
# @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
|
37
80
|
# @return [Object]
|
38
|
-
def self.write(out, *rules)
|
39
|
-
Writer.new(rules, out: out)
|
81
|
+
def self.write(out, *rules, format: :ebnf)
|
82
|
+
Writer.new(rules, out: out, format: format)
|
40
83
|
end
|
41
84
|
|
42
85
|
##
|
43
86
|
# Write formatted rules to an IO like object as HTML
|
44
87
|
#
|
45
88
|
# @param [Array<Rule>] rules
|
89
|
+
# @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
|
90
|
+
# @param [Boolean] validate (false) validate generated HTML.
|
46
91
|
# @return [Object]
|
47
|
-
def self.html(*rules)
|
92
|
+
def self.html(*rules, format: :ebnf, validate: false)
|
48
93
|
require 'stringio' unless defined?(StringIO)
|
49
94
|
buf = StringIO.new
|
50
|
-
Writer.new(rules, out: buf, html: true)
|
95
|
+
Writer.new(rules, out: buf, html: true, format: format, validate: validate)
|
51
96
|
buf.string
|
52
97
|
end
|
53
98
|
|
54
99
|
##
|
55
100
|
# @param [Array<Rule>] rules
|
101
|
+
# @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
|
102
|
+
# @param [Boolean] html (false) generate HTML output
|
103
|
+
# @param [Boolean] validate (false) validate generated HTML.
|
56
104
|
# @param [Hash{Symbol => Object}] options
|
57
|
-
# @param [#write]
|
58
|
-
|
59
|
-
|
60
|
-
|
105
|
+
# @param [#write] out ($stdout)
|
106
|
+
def initialize(rules, out: $stdout, html: false, format: :ebnf, validate: false, **options)
|
107
|
+
@options = options.merge(html: html)
|
108
|
+
return if rules.empty?
|
61
109
|
|
62
110
|
# Determine max LHS length
|
111
|
+
format_meth = "format_#{format}".to_sym
|
63
112
|
max_id = rules.max_by {|r| r.id.to_s.length}.id.to_s.length
|
64
113
|
max_sym = rules.max_by {|r| r.sym.to_s.length}.sym.to_s.length
|
65
|
-
lhs_length = max_sym +
|
66
|
-
lhs_fmt =
|
67
|
-
|
114
|
+
lhs_length = max_sym + 1
|
115
|
+
lhs_fmt = case format
|
116
|
+
when :abnf then "%<sym>-#{max_sym}s = "
|
117
|
+
when :ebnf then "%<sym>-#{max_sym}s ::= "
|
118
|
+
when :isoebnf then "%<sym>-#{max_sym}s = "
|
119
|
+
end
|
120
|
+
if format == :ebnf && max_id > 0
|
68
121
|
lhs_fmt = "%<id>-#{max_id+2}s " + lhs_fmt
|
69
122
|
lhs_length += max_id + 3
|
70
123
|
end
|
71
|
-
rhs_length = LINE_LENGTH - lhs_length
|
124
|
+
rhs_length = (html ? LINE_LENGTH_HTML : LINE_LENGTH) - lhs_length
|
72
125
|
|
73
126
|
if html
|
74
127
|
# Output as formatted HTML
|
75
128
|
begin
|
76
|
-
require '
|
77
|
-
|
78
|
-
|
79
|
-
|
129
|
+
require 'erubis'
|
130
|
+
require 'htmlentities'
|
131
|
+
@coder = HTMLEntities.new
|
132
|
+
eruby = Erubis::Eruby.new(ERB_DESC)
|
133
|
+
formatted_rules = rules.map do |rule|
|
134
|
+
if rule.kind == :terminals || rule.kind == :pass
|
135
|
+
OpenStruct.new(id: ("@#{rule.kind}"),
|
136
|
+
sym: nil,
|
137
|
+
assign: nil,
|
138
|
+
formatted: (
|
139
|
+
rule.kind == :terminals ?
|
140
|
+
"<strong># Productions for terminals</strong>" :
|
141
|
+
self.send(format_meth, rule.expr)))
|
142
|
+
else
|
143
|
+
formatted_expr = self.send(format_meth, rule.expr)
|
144
|
+
# Measure text without markup
|
145
|
+
formatted_expr_text = formatted_expr.gsub(%r{</?\w+[^>]*>}, '')
|
146
|
+
if formatted_expr_text.length > rhs_length && (format != :abnf || rule.alt?)
|
147
|
+
lines = []
|
148
|
+
# Can only reasonably split apart alts
|
149
|
+
self.send(format_meth, rule.expr, sep: "--rule-extensions--").
|
150
|
+
split(/\s*--rule-extensions--\s*/).each_with_index do |formatted, ndx|
|
151
|
+
assign = case format
|
152
|
+
when :ebnf
|
153
|
+
formatted.sub!(%r{\s*<code>\|</code>\s*}, '')
|
154
|
+
(ndx > 0 ? (rule.alt? ? '|' : '') : '::=')
|
155
|
+
when :abnf
|
156
|
+
formatted.sub!(%r{\s*<code>/</code>\s*}, '')
|
157
|
+
(ndx > 0 ? '=/' : '=')
|
158
|
+
else
|
159
|
+
formatted.sub!(%r{\s*<code>\|</code>\s*}, '')
|
160
|
+
(ndx > 0 ? (rule.alt? ? '|' : '') : '=')
|
161
|
+
end
|
162
|
+
lines << OpenStruct.new(id: ((ndx == 0 ? "[#{rule.id}]" : "") if rule.id),
|
163
|
+
sym: (rule.sym if ndx == 0 || format == :abnf),
|
164
|
+
assign: assign,
|
165
|
+
formatted: formatted)
|
166
|
+
end
|
167
|
+
if format == :isoebnf
|
168
|
+
lines << OpenStruct.new(assign: ';')
|
169
|
+
end
|
170
|
+
lines
|
171
|
+
else
|
172
|
+
OpenStruct.new(id: ("[#{rule.id}]" if rule.id),
|
173
|
+
sym: rule.sym,
|
174
|
+
assign: (format == :ebnf ? '::=' : '='),
|
175
|
+
formatted: (formatted_expr + (format == :isoebnf ? ' ;' : '')))
|
176
|
+
end
|
177
|
+
end
|
178
|
+
end.flatten
|
179
|
+
|
180
|
+
html_result = eruby.evaluate(format: format, rules: formatted_rules)
|
181
|
+
|
182
|
+
if validate
|
183
|
+
begin
|
184
|
+
require 'nokogumbo'
|
185
|
+
# Validate the output HTML
|
186
|
+
doc = Nokogiri::HTML5("<!DOCTYPE html>" + html_result, max_errors: 10)
|
187
|
+
raise EncodingError, "Errors found in generated HTML:\n " +
|
188
|
+
doc.errors.map(&:to_s).join("\n ") unless doc.errors.empty?
|
189
|
+
rescue LoadError
|
190
|
+
# Skip
|
191
|
+
end
|
80
192
|
end
|
81
|
-
|
193
|
+
|
194
|
+
out.write html_result
|
82
195
|
return
|
83
196
|
rescue LoadError
|
84
|
-
$stderr.puts "Generating HTML requires
|
197
|
+
$stderr.puts "Generating HTML requires erubis and htmlentities gems to be loaded"
|
85
198
|
end
|
86
199
|
end
|
87
200
|
|
88
201
|
# Format each rule, considering the available rhs size
|
89
202
|
rules.each do |rule|
|
90
203
|
buffer = if rule.pass?
|
91
|
-
"%-#{lhs_length-2}s" % "@pass"
|
204
|
+
"\n%-#{lhs_length-2}s " % "@pass"
|
205
|
+
elsif rule.kind == :terminals
|
206
|
+
"\n%-#{lhs_length-2}s" % "@terminals"
|
92
207
|
else
|
93
208
|
lhs_fmt % {id: "[#{rule.id}]", sym: rule.sym}
|
94
209
|
end
|
95
|
-
formatted_expr =
|
96
|
-
if formatted_expr.length > rhs_length
|
97
|
-
|
210
|
+
formatted_expr = self.send(format_meth, rule.expr)
|
211
|
+
if formatted_expr.length > rhs_length && (format != :abnf || rule.alt?)
|
212
|
+
if format == :abnf
|
213
|
+
# No whitespace, use =/
|
214
|
+
self.send(format_meth, rule.expr, sep: "--rule-extensions--").
|
215
|
+
split(/\s*--rule-extensions--\s*/).each_with_index do |formatted, ndx|
|
216
|
+
if ndx > 0
|
217
|
+
buffer << "\n" + lhs_fmt.sub('= ', '=/') % {id: "[#{rule.id}]", sym: rule.sym}
|
218
|
+
end
|
219
|
+
buffer << formatted.sub(/\s*\/\s*/, '')
|
220
|
+
end
|
221
|
+
else
|
222
|
+
# Space out past "= "
|
223
|
+
buffer << self.send(format_meth, rule.expr, sep: ("\n" + " " * (lhs_length + (rule.alt? ? 2 : 4) - (format == :ebnf ? 0 : 2))))
|
224
|
+
buffer << ("\n" + " " * (lhs_length) + ';') if format == :isoebnf
|
225
|
+
end
|
98
226
|
else
|
99
|
-
buffer << formatted_expr
|
227
|
+
buffer << formatted_expr + (format == :isoebnf ? ' ;' : '')
|
100
228
|
end
|
229
|
+
buffer << "\n\n" if [:terminals, :pass].include?(rule.kind)
|
101
230
|
out.puts(buffer)
|
102
231
|
end
|
103
232
|
end
|
104
233
|
|
105
234
|
protected
|
235
|
+
|
236
|
+
##
|
237
|
+
# W3C EBNF Formatters
|
238
|
+
##
|
239
|
+
|
106
240
|
# Format the expression part of a rule
|
107
|
-
def
|
108
|
-
return (@options[:html] ? %(<a href="#grammar-production-#{expr}">#{expr}</a>) : expr.to_s) if expr.is_a?(Symbol)
|
241
|
+
def format_ebnf(expr, sep: nil, embedded: false)
|
242
|
+
return (@options[:html] ? %(<a href="#grammar-production-#{@coder.encode expr}">#{@coder.encode expr}</a>) : expr.to_s) if expr.is_a?(Symbol)
|
109
243
|
if expr.is_a?(String)
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
return (@options[:html] ? %(<code class="grammar-char-escape">#{expr}</code>) : expr)
|
114
|
-
elsif expr =~ /"/
|
115
|
-
return (@options[:html] ? %('<code class="grammar-literal">#{escape(expr, "'")}</code>') : %('#{escape(expr, "'")}'))
|
116
|
-
else
|
117
|
-
return (@options[:html] ? %("<code class="grammar-literal">#{escape(expr, '"')}</code>") : %("#{escape(expr, '"')}"))
|
118
|
-
end
|
244
|
+
return expr.length == 1 ?
|
245
|
+
format_ebnf_char(expr) :
|
246
|
+
format_ebnf_string(expr, expr.include?('"') ? "'" : '"')
|
119
247
|
end
|
120
248
|
parts = {
|
121
249
|
alt: (@options[:html] ? "<code>|</code> " : "| "),
|
@@ -128,95 +256,472 @@ module EBNF
|
|
128
256
|
rparen = (@options[:html] ? "<code>)</code> " : ")")
|
129
257
|
|
130
258
|
case expr.first
|
259
|
+
when :istr
|
260
|
+
# Looses fidelity, but, oh well ...
|
261
|
+
format_ebnf(expr.last, embedded: true)
|
131
262
|
when :alt, :diff
|
132
263
|
this_sep = (sep ? sep : " ") + parts[expr.first.to_sym]
|
133
|
-
expr[1..-1].map {|e|
|
264
|
+
res = expr[1..-1].map {|e| format_ebnf(e, embedded: true)}.join(this_sep)
|
265
|
+
embedded ? (lparen + res + rparen) : res
|
134
266
|
when :star, :plus, :opt
|
135
|
-
raise "Expected star expression to have a single operand" unless expr.length == 2
|
136
267
|
char = parts[expr.first.to_sym]
|
137
|
-
r =
|
138
|
-
|
268
|
+
r = format_ebnf(expr[1], embedded: true)
|
269
|
+
"#{r}#{char}"
|
139
270
|
when :hex
|
140
|
-
(
|
271
|
+
escape_ebnf_hex(expr.last[2..-1].hex.chr(Encoding::UTF_8))
|
141
272
|
when :range
|
142
|
-
|
273
|
+
format_ebnf_range(expr.last)
|
143
274
|
when :seq
|
144
275
|
this_sep = (sep ? sep : " ")
|
145
|
-
expr[1..-1].map
|
276
|
+
res = expr[1..-1].map do |e|
|
277
|
+
format_ebnf(e, embedded: true)
|
278
|
+
end.join(this_sep)
|
279
|
+
embedded ? (lparen + res + rparen) : res
|
280
|
+
when :rept
|
281
|
+
# Expand repetition
|
282
|
+
min, max, value = expr[1..-1]
|
283
|
+
if min == 0 && max == 1
|
284
|
+
format_ebnf([:opt, value], sep: sep, embedded: embedded)
|
285
|
+
elsif min == 0 && max == '*'
|
286
|
+
format_ebnf([:star, value], sep: sep, embedded: embedded)
|
287
|
+
elsif min == 1 && max == '*'
|
288
|
+
format_ebnf([:plus, value], sep: sep, embedded: embedded)
|
289
|
+
else
|
290
|
+
val2 = [:seq]
|
291
|
+
while min > 0
|
292
|
+
val2 << value
|
293
|
+
min -= 1
|
294
|
+
max -= 1 unless max == '*'
|
295
|
+
end
|
296
|
+
if max == '*'
|
297
|
+
val2 << [:star, value]
|
298
|
+
else
|
299
|
+
opt = nil
|
300
|
+
while max > 0
|
301
|
+
opt = [:opt, opt ? [:seq, value, opt] : value]
|
302
|
+
max -= 1
|
303
|
+
end
|
304
|
+
val2 << opt if opt
|
305
|
+
end
|
306
|
+
format_ebnf(val2, sep: sep, embedded: embedded)
|
307
|
+
end
|
146
308
|
else
|
147
309
|
raise "Unknown operator: #{expr.first}"
|
148
310
|
end
|
149
311
|
end
|
150
312
|
|
151
313
|
# Format a single-character string, prefering hex for non-main ASCII
|
152
|
-
def
|
314
|
+
def format_ebnf_char(c)
|
153
315
|
case c.ord
|
154
|
-
when
|
155
|
-
when
|
156
|
-
|
316
|
+
when (0x21) then (@options[:html] ? %("<code class="grammar-literal">#{@coder.encode c}</code>") : %{"#{c}"})
|
317
|
+
when 0x22 then (@options[:html] ? %('<code class="grammar-literal">"</code>') : %{'"'})
|
318
|
+
when (0x23..0x7e) then (@options[:html] ? %("<code class="grammar-literal">#{@coder.encode c}</code>") : %{"#{c}"})
|
319
|
+
when (0x80..0xFFFD) then (@options[:html] ? %("<code class="grammar-literal">#{@coder.encode c}</code>") : %{"#{c}"})
|
320
|
+
else escape_ebnf_hex(c)
|
157
321
|
end
|
158
322
|
end
|
159
323
|
|
160
324
|
# Format a range
|
161
|
-
def
|
325
|
+
def format_ebnf_range(string)
|
162
326
|
lbrac = (@options[:html] ? "<code>[</code> " : "[")
|
163
327
|
rbrac = (@options[:html] ? "<code>]</code> " : "]")
|
164
|
-
dash = (@options[:html] ? "<code>-</code> " : "-")
|
165
328
|
|
166
329
|
buffer = lbrac
|
167
330
|
s = StringScanner.new(string)
|
168
331
|
while !s.eos?
|
169
332
|
case
|
170
333
|
when s.scan(/\A[!"\u0024-\u007e]+/)
|
171
|
-
buffer << (@options[:html] ? %(<code class="grammar-literal">#{s.matched}</code>) : s.matched)
|
334
|
+
buffer << (@options[:html] ? %(<code class="grammar-literal">#{@coder.encode s.matched}</code>) : s.matched)
|
172
335
|
when s.scan(/\A#x\h+/)
|
173
|
-
buffer << (
|
174
|
-
when s.scan(/\A-/)
|
175
|
-
buffer << dash
|
336
|
+
buffer << escape_ebnf_hex(s.matched[2..-1].hex.chr(Encoding::UTF_8))
|
176
337
|
else
|
177
|
-
buffer << (
|
338
|
+
buffer << escape_ebnf_hex(s.getch)
|
178
339
|
end
|
179
340
|
end
|
180
341
|
buffer + rbrac
|
181
342
|
end
|
182
343
|
|
183
344
|
# Escape a string, using as many UTF-8 characters as possible
|
184
|
-
def
|
185
|
-
buffer = ""
|
345
|
+
def format_ebnf_string(string, quote = '"')
|
186
346
|
string.each_char do |c|
|
187
|
-
|
188
|
-
when
|
189
|
-
|
190
|
-
|
347
|
+
case c.ord
|
348
|
+
when 0x00..0x19, quote.ord
|
349
|
+
raise RangeError, "cannot format #{string.inspect} as an EBNF String: #{c.inspect} is out of range" unless
|
350
|
+
ISOEBNF::TERMINAL_CHARACTER.match?(c)
|
191
351
|
end
|
192
352
|
end
|
193
|
-
|
353
|
+
|
354
|
+
res = "#{quote}#{string}#{quote}"
|
355
|
+
@options[:html] ? @coder.encode(res) : res
|
194
356
|
end
|
195
357
|
|
196
|
-
def
|
358
|
+
def escape_ebnf_hex(u)
|
197
359
|
fmt = case u.ord
|
360
|
+
when 0x00..0x20 then "#x%02X"
|
198
361
|
when 0x0000..0x00ff then "#x%02X"
|
199
362
|
when 0x0100..0xffff then "#x%04X"
|
200
363
|
else "#x%08X"
|
201
364
|
end
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
365
|
+
char = fmt % u.ord
|
366
|
+
if @options[:html]
|
367
|
+
char = if u.ord <= 0x20
|
368
|
+
%(<abbr title="#{ASCII_ESCAPE_NAMES[u.ord]}">#{@coder.encode char}</abbr>)
|
369
|
+
elsif u.ord == 0x22
|
370
|
+
%(<abbr title="quot">>"</abbr>)
|
371
|
+
elsif u.ord < 0x7F
|
372
|
+
%(<abbr title="ascii '#{@coder.encode u}'">#{@coder.encode char}</abbr>)
|
373
|
+
elsif u.ord == 0x7F
|
374
|
+
%(<abbr title="delete">#{@coder.encode char}</abbr>)
|
375
|
+
elsif u.ord <= 0xFF
|
376
|
+
%(<abbr title="extended ascii '#{@coder.encode char}'">#{char}</abbr>)
|
377
|
+
elsif (%w(Control Private-use Surrogate Noncharacter Reserved) - ::Unicode::Types.of(u)).empty?
|
378
|
+
%(<abbr title="unicode '#{u}'">#{char}</abbr>)
|
379
|
+
else
|
380
|
+
%(<abbr title="unicode '#{::Unicode::Types.of(u).first}'">#{char}</abbr>)
|
381
|
+
end
|
382
|
+
%(<code class="grammar-char-escape">#{char}</code>)
|
383
|
+
else
|
384
|
+
char
|
385
|
+
end
|
386
|
+
end
|
387
|
+
|
388
|
+
##
|
389
|
+
# ABNF Formatters
|
390
|
+
##
|
391
|
+
|
392
|
+
# Format the expression part of a rule
|
393
|
+
def format_abnf(expr, sep: nil, embedded: false, sensitive: true)
|
394
|
+
return (@options[:html] ? %(<a href="#grammar-production-#{@coder.encode expr}">#{@coder.encode expr}</a>) : expr.to_s) if expr.is_a?(Symbol)
|
395
|
+
if expr.is_a?(String)
|
396
|
+
if expr.length == 1
|
397
|
+
return format_abnf_char(expr)
|
398
|
+
elsif expr.start_with?('%')
|
399
|
+
# Already encoded
|
400
|
+
return expr
|
401
|
+
elsif expr =~ /"/
|
402
|
+
# Split into segments
|
403
|
+
segments = expr.split('"')
|
404
|
+
|
405
|
+
return format_abnf_char(expr) if segments.empty?
|
406
|
+
|
407
|
+
seq = segments.inject([]) {|memo, s| memo.concat([[:hex, "#x22"], s])}[1..-1]
|
408
|
+
seq.unshift(:seq)
|
409
|
+
return format_abnf(seq, sep: nil, embedded: false)
|
410
|
+
else
|
411
|
+
return (@options[:html] ? %("<code class="grammar-literal">#{'%s' if sensitive}#{@coder.encode expr}</code>") : %(#{'%s' if sensitive}"#{expr}"))
|
412
|
+
end
|
413
|
+
end
|
414
|
+
parts = {
|
415
|
+
alt: (@options[:html] ? "<code>/</code> " : "/ "),
|
416
|
+
star: (@options[:html] ? "<code>*</code> " : "*"),
|
417
|
+
plus: (@options[:html] ? "<code>+</code> " : "1*"),
|
418
|
+
opt: (@options[:html] ? "<code>?</code> " : "?")
|
419
|
+
}
|
420
|
+
lbrac = (@options[:html] ? "<code>[</code> " : "[")
|
421
|
+
rbrac = (@options[:html] ? "<code>]</code> " : "]")
|
422
|
+
lparen = (@options[:html] ? "<code>(</code> " : "(")
|
423
|
+
rparen = (@options[:html] ? "<code>)</code> " : ")")
|
424
|
+
|
425
|
+
case expr.first
|
426
|
+
when :istr
|
427
|
+
# FIXME: if string part is segmented, need to do something different
|
428
|
+
format_abnf(expr.last, embedded: true, sensitive: false)
|
429
|
+
when :alt
|
430
|
+
this_sep = (sep ? sep : " ") + parts[expr.first.to_sym]
|
431
|
+
res = expr[1..-1].map {|e| format_abnf(e, embedded: true)}.join(this_sep)
|
432
|
+
embedded ? (lparen + res + rparen) : res
|
433
|
+
when :diff
|
434
|
+
raise RangeError, "ABNF does not support the diff operator"
|
435
|
+
when :opt
|
436
|
+
char = parts[expr.first.to_sym]
|
437
|
+
r = format_abnf(expr[1], embedded: true)
|
438
|
+
"#{lbrac}#{r}#{rbrac}"
|
439
|
+
when :plus, :star
|
440
|
+
char = parts[expr.first.to_sym]
|
441
|
+
r = format_abnf(expr[1], embedded: true)
|
442
|
+
"#{char}#{r}"
|
443
|
+
when :hex
|
444
|
+
escape_abnf_hex(expr.last[2..-1].hex.chr)
|
445
|
+
when :range
|
446
|
+
# Returns an [:alt] or [:not [:alt]] if composed of multiple sequences
|
447
|
+
# Note: ABNF does not support the `not` operator
|
448
|
+
res = format_abnf_range(expr.last)
|
449
|
+
res.is_a?(Array) ?
|
450
|
+
format_abnf(res, embedded: true) :
|
451
|
+
res
|
452
|
+
when :seq
|
453
|
+
this_sep = (sep ? sep : " ")
|
454
|
+
res = expr[1..-1].map do |e|
|
455
|
+
format_abnf(e, embedded: true)
|
456
|
+
end.join(this_sep)
|
457
|
+
embedded ? (lparen + res + rparen) : res
|
458
|
+
when :rept
|
459
|
+
# Expand repetition
|
460
|
+
min, max, value = expr[1..-1]
|
461
|
+
r = format_abnf(value, embedded: true)
|
462
|
+
if min == max
|
463
|
+
"#{min}#{r}"
|
464
|
+
elsif min == 0 && max == '*'
|
465
|
+
"#{parts[:star]}#{r}"
|
466
|
+
elsif min > 0 && max == '*'
|
467
|
+
"#{min}#{parts[:star]}#{r}"
|
468
|
+
else
|
469
|
+
"#{min}#{parts[:star]}#{max}#{r}"
|
470
|
+
end
|
471
|
+
else
|
472
|
+
raise "Unknown operator: #{expr.first}"
|
473
|
+
end
|
474
|
+
end
|
475
|
+
|
476
|
+
# Format a single-character string, prefering hex for non-main ASCII
|
477
|
+
def format_abnf_char(c)
|
478
|
+
if /[\x20-\x21\x23-\x7E]/.match?(c)
|
479
|
+
@options[:html] ? %("<code class="grammar-literal">#{@coder.encode c}</code>") : c.inspect
|
480
|
+
else
|
481
|
+
escape_abnf_hex(c)
|
482
|
+
end
|
483
|
+
end
|
484
|
+
|
485
|
+
# Format a range
|
486
|
+
#
|
487
|
+
# Presumes range has already been validated
|
488
|
+
def format_abnf_range(string)
|
489
|
+
alt, o_dash = [:alt], false
|
490
|
+
|
491
|
+
raise RangeError, "cannot format #{string.inspect} an ABNF range" if string.start_with?('^')
|
492
|
+
|
493
|
+
if string.end_with?('-')
|
494
|
+
o_dash = true
|
495
|
+
string = string[0..-2]
|
496
|
+
end
|
497
|
+
|
498
|
+
scanner = StringScanner.new(string)
|
499
|
+
hexes, deces = [], []
|
500
|
+
in_range = false
|
501
|
+
# Build op (alt) from different ranges/enums
|
502
|
+
while !scanner.eos?
|
503
|
+
if hex = scanner.scan(Terminals::HEX)
|
504
|
+
# Append any decimal values
|
505
|
+
alt << "%d" + deces.join(".") unless deces.empty?
|
506
|
+
deces = []
|
507
|
+
|
508
|
+
if in_range
|
509
|
+
# Add "." sequences for any previous hexes
|
510
|
+
alt << "%x" + hexes[0..-2].join(".") if hexes.length > 1
|
511
|
+
alt << "%x#{hexes.last}-#{hex[2..-1]}"
|
512
|
+
in_range, hexes = false, []
|
513
|
+
else
|
514
|
+
hexes << hex[2..-1]
|
515
|
+
end
|
516
|
+
elsif dec = scanner.scan(Terminals::R_CHAR)
|
517
|
+
# Append any hexadecimal values
|
518
|
+
alt << "%x" + hexes.join(".") unless hexes.empty?
|
519
|
+
hexes = []
|
520
|
+
|
521
|
+
if in_range
|
522
|
+
# Add "." sequences for any previous hexes
|
523
|
+
alt << "%d" + deces[0..-2].join(".") if deces.length > 1
|
524
|
+
alt << "%d#{deces.last}-#{dec.codepoints.first}"
|
525
|
+
in_range, deces = false, []
|
526
|
+
else
|
527
|
+
deces << dec.codepoints.first.to_s
|
528
|
+
end
|
529
|
+
end
|
530
|
+
|
531
|
+
in_range = true if scanner.scan(/\-/)
|
532
|
+
end
|
533
|
+
|
534
|
+
deces << '45' if o_dash
|
535
|
+
|
536
|
+
# Append hexes and deces as "." sequences (should be only one)
|
537
|
+
alt << "%d" + deces.join(".") unless deces.empty?
|
538
|
+
alt << "%x" + hexes.join(".") unless hexes.empty?
|
539
|
+
|
540
|
+
# FIXME: HTML abbreviations?
|
541
|
+
if alt.length == 2
|
542
|
+
# Just return the range or enum
|
543
|
+
alt.last
|
544
|
+
else
|
545
|
+
# Return the alt, which will be further formatted
|
546
|
+
alt
|
547
|
+
end
|
548
|
+
end
|
549
|
+
|
550
|
+
def escape_abnf_hex(u)
|
551
|
+
fmt = case u.ord
|
552
|
+
when 0x0000..0x00ff then "%02X"
|
553
|
+
when 0x0100..0xffff then "%04X"
|
554
|
+
else "%08X"
|
555
|
+
end
|
556
|
+
char = "%x" + (fmt % u.ord)
|
557
|
+
if @options[:html]
|
558
|
+
if u.ord <= 0x20
|
559
|
+
char = %(<abbr title="#{ASCII_ESCAPE_NAMES[u.ord]}">#{@coder.encode char}</abbr>)
|
560
|
+
elsif u.ord == 0x22
|
561
|
+
%(<abbr title="quot">>"</abbr>)
|
562
|
+
elsif u.ord < 0x7F
|
563
|
+
char = %(<abbr title="ascii '#{u}'">#{@coder.encode char}</abbr>)
|
564
|
+
elsif u.ord == 0x7F
|
565
|
+
char = %(<abbr title="delete">#{@coder.encode char}</abbr>)
|
566
|
+
elsif u.ord <= 0xFF
|
567
|
+
char = %(<abbr title="extended ascii '#{u}'">#{char}</abbr>)
|
568
|
+
else
|
569
|
+
char = %(<abbr title="unicode '#{u.unicode_normaliz}'">#{char}</abbr>)
|
570
|
+
end
|
571
|
+
%(<code class="grammar-char-escape">#{char}</code>)
|
572
|
+
else
|
573
|
+
char
|
574
|
+
end
|
575
|
+
end
|
576
|
+
|
577
|
+
##
|
578
|
+
# ISO EBNF Formatters
|
579
|
+
##
|
580
|
+
|
581
|
+
# Format the expression part of a rule
|
582
|
+
def format_isoebnf(expr, sep: nil, embedded: false)
|
583
|
+
return (@options[:html] ? %(<a href="#grammar-production-#{@coder.encode expr}">#{@coder.encode expr}</a>) : expr.to_s) if expr.is_a?(Symbol)
|
584
|
+
if expr.is_a?(String)
|
585
|
+
expr = expr[2..-1].hex.chr if expr =~ /\A#x\h+/
|
586
|
+
expr.chars.each do |c|
|
587
|
+
raise RangeError, "cannot format #{expr.inspect} as an ISO EBNF String: #{c.inspect} is out of range" unless
|
588
|
+
ISOEBNF::TERMINAL_CHARACTER.match?(c)
|
589
|
+
end
|
590
|
+
if expr =~ /"/
|
591
|
+
return (@options[:html] ? %('<code class="grammar-literal">#{@coder.encode expr}</code>') : %('#{expr}'))
|
592
|
+
else
|
593
|
+
return (@options[:html] ? %("<code class="grammar-literal">#{@coder.encode expr}</code>") : %("#{expr}"))
|
594
|
+
end
|
595
|
+
end
|
596
|
+
parts = {
|
597
|
+
alt: (@options[:html] ? "<code>|</code> " : "| "),
|
598
|
+
diff: (@options[:html] ? "<code>-</code> " : "- "),
|
599
|
+
}
|
600
|
+
lparen = (@options[:html] ? "<code>(</code> " : "(")
|
601
|
+
rparen = (@options[:html] ? "<code>)</code> " : ")")
|
602
|
+
|
603
|
+
case expr.first
|
604
|
+
when :istr
|
605
|
+
# Looses fidelity, but, oh well ...
|
606
|
+
format_isoebnf(expr.last, embedded: true)
|
607
|
+
when :alt, :diff
|
608
|
+
this_sep = (sep ? sep : " ") + parts[expr.first.to_sym]
|
609
|
+
res = expr[1..-1].map {|e| format_isoebnf(e, embedded: true)}.join(this_sep)
|
610
|
+
embedded ? (lparen + res + rparen) : res
|
611
|
+
when :opt
|
612
|
+
r = format_isoebnf(expr[1], embedded: true)
|
613
|
+
"[#{r}]"
|
614
|
+
when :star
|
615
|
+
r = format_isoebnf(expr[1], embedded: true)
|
616
|
+
"{#{r}}"
|
617
|
+
when :plus
|
618
|
+
r = format_isoebnf(expr[1], embedded: true)
|
619
|
+
"#{r}, {#{r}}"
|
620
|
+
when :hex
|
621
|
+
format_isoebnf(expr[1], embedded: true)
|
622
|
+
when :range
|
623
|
+
res = format_isoebnf_range(expr.last)
|
624
|
+
res.is_a?(Array) ?
|
625
|
+
format_isoebnf(res, embedded: true) :
|
626
|
+
res
|
627
|
+
when :seq
|
628
|
+
this_sep = "," + (sep ? sep : " ")
|
629
|
+
res = expr[1..-1].map do |e|
|
630
|
+
format_isoebnf(e, embedded: true)
|
631
|
+
end.join(this_sep)
|
632
|
+
embedded ? (lparen + res + rparen) : res
|
633
|
+
when :rept
|
634
|
+
# Expand repetition
|
635
|
+
min, max, value = expr[1..-1]
|
636
|
+
if min == 0 && max == 1
|
637
|
+
format_isoebnf([:opt, value], sep: sep, embedded: embedded)
|
638
|
+
elsif min == 0 && max == '*'
|
639
|
+
format_isoebnf([:star, value], sep: sep, embedded: embedded)
|
640
|
+
elsif min == 1 && max == '*'
|
641
|
+
format_isoebnf([:plus, value], sep: sep, embedded: embedded)
|
642
|
+
else
|
643
|
+
val2 = [:seq]
|
644
|
+
while min > 0
|
645
|
+
val2 << value
|
646
|
+
min -= 1
|
647
|
+
max -= 1 unless max == '*'
|
648
|
+
end
|
649
|
+
if max == '*'
|
650
|
+
val2 << [:star, value]
|
651
|
+
else
|
652
|
+
opt = nil
|
653
|
+
while max > 0
|
654
|
+
opt = [:opt, opt ? [:seq, value, opt] : value]
|
655
|
+
max -= 1
|
656
|
+
end
|
657
|
+
val2 << opt if opt
|
658
|
+
end
|
659
|
+
format_isoebnf(val2, sep: sep, embedded: embedded)
|
660
|
+
end
|
661
|
+
else
|
662
|
+
raise "Unknown operator: #{expr.first}"
|
663
|
+
end
|
664
|
+
end
|
665
|
+
|
666
|
+
# Format a range
|
667
|
+
# Range is formatted as a aliteration of characters
|
668
|
+
def format_isoebnf_range(string)
|
669
|
+
chars = []
|
670
|
+
o_dash = false
|
671
|
+
|
672
|
+
raise RangeError, "cannot format #{string.inspect} an ABNF range" if string.start_with?('^')
|
673
|
+
|
674
|
+
if string.end_with?('-')
|
675
|
+
o_dash = true
|
676
|
+
string = string[0..-2]
|
677
|
+
end
|
678
|
+
|
679
|
+
scanner = StringScanner.new(string)
|
680
|
+
in_range = false
|
681
|
+
# Build chars from different ranges/enums
|
682
|
+
while !scanner.eos?
|
683
|
+
char = if hex = scanner.scan(Terminals::HEX)
|
684
|
+
hex[2..-1].hex.ord.char(Encoding::UTF_8)
|
685
|
+
else scanner.scan(Terminals::R_CHAR)
|
686
|
+
end
|
687
|
+
raise RangeError, "cannot format #{string.inspect} as an ISO EBNF Aliteration: #{char.inspect} is out of range" unless
|
688
|
+
char && ISOEBNF::TERMINAL_CHARACTER.match?(char)
|
689
|
+
|
690
|
+
if in_range
|
691
|
+
# calculate characters from chars.last to this char
|
692
|
+
raise RangeError, "cannot format #{string.inspect} as an ISO EBNF Aliteration" unless chars.last < char
|
693
|
+
chars.concat (chars.last..char).to_a[1..-1]
|
694
|
+
in_range = false
|
695
|
+
else
|
696
|
+
chars << char
|
697
|
+
end
|
698
|
+
|
699
|
+
in_range = true if scanner.scan(/\-/)
|
700
|
+
end
|
701
|
+
|
702
|
+
chars << '-' if o_dash
|
703
|
+
|
704
|
+
# Possibly only a single character (no character?)
|
705
|
+
chars.length == 1 ? chars.last.inspect : chars.unshift(:alt)
|
706
|
+
end
|
707
|
+
|
708
|
+
ERB_DESC = %q(
|
709
|
+
<table class="grammar">
|
710
|
+
<tbody id="grammar-productions" class="<%= @format %>">
|
711
|
+
<% for rule in @rules %>
|
712
|
+
<tr<%= %{ id="grammar-production-#{rule.sym}"} unless %w(=/ |).include?(rule.assign) || rule.sym.nil?%>>
|
713
|
+
<% if rule.id %>
|
714
|
+
<td<%= " colspan=2" unless rule.sym %>><%= rule.id %></td>
|
715
|
+
<% end %>
|
716
|
+
<% if rule.sym %>
|
717
|
+
<td><code><%== rule.sym %></code></td>
|
718
|
+
<% end %>
|
719
|
+
<td><%= rule.assign %></td>
|
720
|
+
<td><%= rule.formatted %></td>
|
721
|
+
</tr>
|
722
|
+
<% end %>
|
723
|
+
</tbody>
|
724
|
+
</table>
|
220
725
|
).gsub(/^ /, '')
|
221
726
|
end
|
222
727
|
end
|