ebnf 1.2.0 → 2.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +223 -199
- data/UNLICENSE +1 -1
- data/VERSION +1 -1
- data/bin/ebnf +38 -19
- data/etc/abnf-core.ebnf +52 -0
- data/etc/abnf.abnf +121 -0
- data/etc/abnf.ebnf +124 -0
- data/etc/abnf.sxp +45 -0
- data/etc/doap.ttl +23 -18
- data/etc/ebnf.ebnf +21 -33
- data/etc/ebnf.html +76 -160
- data/etc/{ebnf.rb → ebnf.ll1.rb} +30 -107
- data/etc/ebnf.ll1.sxp +182 -183
- data/etc/ebnf.peg.rb +90 -0
- data/etc/ebnf.peg.sxp +84 -0
- data/etc/ebnf.sxp +40 -41
- data/etc/iso-ebnf.ebnf +140 -0
- data/etc/iso-ebnf.isoebnf +138 -0
- data/etc/iso-ebnf.sxp +65 -0
- data/etc/sparql.ebnf +4 -4
- data/etc/sparql.html +1603 -1751
- data/etc/sparql.ll1.sxp +7372 -7372
- data/etc/sparql.peg.rb +532 -0
- data/etc/sparql.peg.sxp +597 -0
- data/etc/sparql.sxp +363 -362
- data/etc/turtle.ebnf +3 -3
- data/etc/turtle.html +465 -517
- data/etc/{turtle.rb → turtle.ll1.rb} +3 -4
- data/etc/turtle.ll1.sxp +425 -425
- data/etc/turtle.peg.rb +182 -0
- data/etc/turtle.peg.sxp +199 -0
- data/etc/turtle.sxp +103 -101
- data/lib/ebnf.rb +6 -1
- data/lib/ebnf/abnf.rb +301 -0
- data/lib/ebnf/abnf/core.rb +23 -0
- data/lib/ebnf/abnf/meta.rb +111 -0
- data/lib/ebnf/base.rb +114 -69
- data/lib/ebnf/bnf.rb +1 -26
- data/lib/ebnf/ebnf/meta.rb +90 -0
- data/lib/ebnf/isoebnf.rb +229 -0
- data/lib/ebnf/isoebnf/meta.rb +75 -0
- data/lib/ebnf/ll1.rb +131 -3
- data/lib/ebnf/ll1/lexer.rb +20 -22
- data/lib/ebnf/ll1/parser.rb +97 -64
- data/lib/ebnf/ll1/scanner.rb +82 -50
- data/lib/ebnf/native.rb +320 -0
- data/lib/ebnf/parser.rb +285 -302
- data/lib/ebnf/peg.rb +39 -0
- data/lib/ebnf/peg/parser.rb +561 -0
- data/lib/ebnf/peg/rule.rb +250 -0
- data/lib/ebnf/rule.rb +442 -148
- data/lib/ebnf/terminals.rb +21 -0
- data/lib/ebnf/writer.rb +587 -82
- metadata +125 -18
- data/etc/sparql.rb +0 -45773
@@ -0,0 +1,21 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
# Terminal definitions for the EBNF grammar
|
3
|
+
module EBNF::Terminals
|
4
|
+
SYMBOL_BASE = %r(\b[a-zA-Z0-9_\.]+\b)u.freeze
|
5
|
+
SYMBOL = %r(#{SYMBOL_BASE}(?!\s*::=))u.freeze
|
6
|
+
HEX = %r(\#x\h+)u.freeze
|
7
|
+
CHAR = %r([\u0009\u000A\u000D\u0020-\uD7FF\u{10000}-\u{10FFFF}])u.freeze
|
8
|
+
R_CHAR = %r([\u0009\u000A\u000D\u0020-\u002C\u002E-\u005C\u005E-\uD7FF\u{10000}-\u{10FFFF}])u.freeze
|
9
|
+
RANGE = %r(\[(?:(?:#{R_CHAR}\-#{R_CHAR})|(?:#{HEX}\-#{HEX})|#{R_CHAR}|#{HEX})+-?\](?!\s+#{SYMBOL_BASE}\s*::=))u.freeze
|
10
|
+
LHS = %r((?:\[#{SYMBOL_BASE}\])?\s*#{SYMBOL_BASE}\s*::=)u.freeze
|
11
|
+
O_RANGE = %r(\[\^(?:(?:#{R_CHAR}\-#{R_CHAR})|(?:#{HEX}\-#{HEX}|#{R_CHAR}|#{HEX}))+-?\])u.freeze
|
12
|
+
STRING1 = %r("[\u0009\u000A\u000D\u0020\u0021\u0023-\uD7FF\u{10000}-\u{10FFFF}]*")u.freeze
|
13
|
+
STRING2 = %r('[\u0009\u000A\u000D\u0020-\u0026\u0028-\uD7FF\u{10000}-\u{10FFFF}]*')u.freeze
|
14
|
+
POSTFIX = %r([?*+])u.freeze
|
15
|
+
PASS = %r((
|
16
|
+
\s
|
17
|
+
| (?:(?:\#[^x]|//)[^\n\r]*)
|
18
|
+
| (?:/\*(?:(?:\*[^/])|[^*])*\*/)
|
19
|
+
| (?:\(\*(?:(?:\*[^\)])|[^*])*\*\))
|
20
|
+
)+)xmu.freeze
|
21
|
+
end
|
data/lib/ebnf/writer.rb
CHANGED
@@ -1,22 +1,63 @@
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
2
2
|
require 'rdf'
|
3
3
|
require 'strscan' unless defined?(StringScanner)
|
4
|
+
require "ostruct"
|
5
|
+
require 'unicode/types'
|
4
6
|
|
5
7
|
##
|
6
8
|
# Serialize ruleset back to EBNF
|
7
9
|
module EBNF
|
8
10
|
class Writer
|
9
11
|
LINE_LENGTH = 80
|
12
|
+
LINE_LENGTH_HTML = 200
|
13
|
+
|
14
|
+
# ASCII escape names
|
15
|
+
ASCII_ESCAPE_NAMES = [
|
16
|
+
"null", #x00
|
17
|
+
"start of heading", #x01
|
18
|
+
"start of text", #x02
|
19
|
+
"end of text", #x03
|
20
|
+
"end of transmission", #x04
|
21
|
+
"enquiry", #x05
|
22
|
+
"acknowledge", #x06
|
23
|
+
"bell", #x07
|
24
|
+
"backspace", #x08
|
25
|
+
"horizontal tab", #x09
|
26
|
+
"new line", #x0A
|
27
|
+
"vertical tab", #x0B
|
28
|
+
"form feed", #x0C
|
29
|
+
"carriage return", #x0D
|
30
|
+
"shift out", #x0E
|
31
|
+
"shift in", #x0F
|
32
|
+
"data link escape", #x10
|
33
|
+
"device control 1", #x11
|
34
|
+
"device control 2", #x12
|
35
|
+
"device control 3", #x13
|
36
|
+
"device control 4", #x14
|
37
|
+
"negative acknowledge", #x15
|
38
|
+
"synchronous idle", #x16
|
39
|
+
"end of trans. block", #x17
|
40
|
+
"cancel", #x18
|
41
|
+
"end of medium", #x19
|
42
|
+
"substitute", #x1A
|
43
|
+
"escape", #x1B
|
44
|
+
"file separator", #x1C
|
45
|
+
"group separator", #x1D
|
46
|
+
"record separator", #x1E
|
47
|
+
"unit separator", #x1F
|
48
|
+
"space" #x20
|
49
|
+
]
|
10
50
|
|
11
51
|
##
|
12
52
|
# Format rules to a String
|
13
53
|
#
|
14
54
|
# @param [Array<Rule>] rules
|
55
|
+
# @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
|
15
56
|
# @return [Object]
|
16
|
-
def self.string(*rules)
|
57
|
+
def self.string(*rules, format: :ebnf)
|
17
58
|
require 'stringio' unless defined?(StringIO)
|
18
59
|
buf = StringIO.new
|
19
|
-
write(buf, *rules)
|
60
|
+
write(buf, *rules, format: format)
|
20
61
|
buf.string
|
21
62
|
end
|
22
63
|
|
@@ -24,9 +65,10 @@ module EBNF
|
|
24
65
|
# Format rules to $stdout
|
25
66
|
#
|
26
67
|
# @param [Array<Rule>] rules
|
68
|
+
# @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
|
27
69
|
# @return [Object]
|
28
|
-
def self.print(*rules)
|
29
|
-
write($stdout, *rules)
|
70
|
+
def self.print(*rules, format: :ebnf)
|
71
|
+
write($stdout, *rules, format: format)
|
30
72
|
end
|
31
73
|
|
32
74
|
##
|
@@ -34,88 +76,174 @@ module EBNF
|
|
34
76
|
#
|
35
77
|
# @param [Object] out
|
36
78
|
# @param [Array<Rule>] rules
|
79
|
+
# @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
|
37
80
|
# @return [Object]
|
38
|
-
def self.write(out, *rules)
|
39
|
-
Writer.new(rules, out: out)
|
81
|
+
def self.write(out, *rules, format: :ebnf)
|
82
|
+
Writer.new(rules, out: out, format: format)
|
40
83
|
end
|
41
84
|
|
42
85
|
##
|
43
86
|
# Write formatted rules to an IO like object as HTML
|
44
87
|
#
|
45
88
|
# @param [Array<Rule>] rules
|
89
|
+
# @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
|
90
|
+
# @param [Boolean] validate (false) validate generated HTML.
|
46
91
|
# @return [Object]
|
47
|
-
def self.html(*rules)
|
92
|
+
def self.html(*rules, format: :ebnf, validate: false)
|
48
93
|
require 'stringio' unless defined?(StringIO)
|
49
94
|
buf = StringIO.new
|
50
|
-
Writer.new(rules, out: buf, html: true)
|
95
|
+
Writer.new(rules, out: buf, html: true, format: format, validate: validate)
|
51
96
|
buf.string
|
52
97
|
end
|
53
98
|
|
54
99
|
##
|
55
100
|
# @param [Array<Rule>] rules
|
101
|
+
# @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
|
102
|
+
# @param [Boolean] html (false) generate HTML output
|
103
|
+
# @param [Boolean] validate (false) validate generated HTML.
|
56
104
|
# @param [Hash{Symbol => Object}] options
|
57
|
-
# @param [#write]
|
58
|
-
|
59
|
-
|
60
|
-
|
105
|
+
# @param [#write] out ($stdout)
|
106
|
+
def initialize(rules, out: $stdout, html: false, format: :ebnf, validate: false, **options)
|
107
|
+
@options = options.merge(html: html)
|
108
|
+
return if rules.empty?
|
61
109
|
|
62
110
|
# Determine max LHS length
|
111
|
+
format_meth = "format_#{format}".to_sym
|
63
112
|
max_id = rules.max_by {|r| r.id.to_s.length}.id.to_s.length
|
64
113
|
max_sym = rules.max_by {|r| r.sym.to_s.length}.sym.to_s.length
|
65
|
-
lhs_length = max_sym +
|
66
|
-
lhs_fmt =
|
67
|
-
|
114
|
+
lhs_length = max_sym + 1
|
115
|
+
lhs_fmt = case format
|
116
|
+
when :abnf then "%<sym>-#{max_sym}s = "
|
117
|
+
when :ebnf then "%<sym>-#{max_sym}s ::= "
|
118
|
+
when :isoebnf then "%<sym>-#{max_sym}s = "
|
119
|
+
end
|
120
|
+
if format == :ebnf && max_id > 0
|
68
121
|
lhs_fmt = "%<id>-#{max_id+2}s " + lhs_fmt
|
69
122
|
lhs_length += max_id + 3
|
70
123
|
end
|
71
|
-
rhs_length = LINE_LENGTH - lhs_length
|
124
|
+
rhs_length = (html ? LINE_LENGTH_HTML : LINE_LENGTH) - lhs_length
|
72
125
|
|
73
126
|
if html
|
74
127
|
# Output as formatted HTML
|
75
128
|
begin
|
76
|
-
require '
|
77
|
-
|
78
|
-
|
79
|
-
|
129
|
+
require 'erubis'
|
130
|
+
require 'htmlentities'
|
131
|
+
@coder = HTMLEntities.new
|
132
|
+
eruby = Erubis::Eruby.new(ERB_DESC)
|
133
|
+
formatted_rules = rules.map do |rule|
|
134
|
+
if rule.kind == :terminals || rule.kind == :pass
|
135
|
+
OpenStruct.new(id: ("@#{rule.kind}"),
|
136
|
+
sym: nil,
|
137
|
+
assign: nil,
|
138
|
+
formatted: (
|
139
|
+
rule.kind == :terminals ?
|
140
|
+
"<strong># Productions for terminals</strong>" :
|
141
|
+
self.send(format_meth, rule.expr)))
|
142
|
+
else
|
143
|
+
formatted_expr = self.send(format_meth, rule.expr)
|
144
|
+
# Measure text without markup
|
145
|
+
formatted_expr_text = formatted_expr.gsub(%r{</?\w+[^>]*>}, '')
|
146
|
+
if formatted_expr_text.length > rhs_length && (format != :abnf || rule.alt?)
|
147
|
+
lines = []
|
148
|
+
# Can only reasonably split apart alts
|
149
|
+
self.send(format_meth, rule.expr, sep: "--rule-extensions--").
|
150
|
+
split(/\s*--rule-extensions--\s*/).each_with_index do |formatted, ndx|
|
151
|
+
assign = case format
|
152
|
+
when :ebnf
|
153
|
+
formatted.sub!(%r{\s*<code>\|</code>\s*}, '')
|
154
|
+
(ndx > 0 ? (rule.alt? ? '|' : '') : '::=')
|
155
|
+
when :abnf
|
156
|
+
formatted.sub!(%r{\s*<code>/</code>\s*}, '')
|
157
|
+
(ndx > 0 ? '=/' : '=')
|
158
|
+
else
|
159
|
+
formatted.sub!(%r{\s*<code>\|</code>\s*}, '')
|
160
|
+
(ndx > 0 ? (rule.alt? ? '|' : '') : '=')
|
161
|
+
end
|
162
|
+
lines << OpenStruct.new(id: ((ndx == 0 ? "[#{rule.id}]" : "") if rule.id),
|
163
|
+
sym: (rule.sym if ndx == 0 || format == :abnf),
|
164
|
+
assign: assign,
|
165
|
+
formatted: formatted)
|
166
|
+
end
|
167
|
+
if format == :isoebnf
|
168
|
+
lines << OpenStruct.new(assign: ';')
|
169
|
+
end
|
170
|
+
lines
|
171
|
+
else
|
172
|
+
OpenStruct.new(id: ("[#{rule.id}]" if rule.id),
|
173
|
+
sym: rule.sym,
|
174
|
+
assign: (format == :ebnf ? '::=' : '='),
|
175
|
+
formatted: (formatted_expr + (format == :isoebnf ? ' ;' : '')))
|
176
|
+
end
|
177
|
+
end
|
178
|
+
end.flatten
|
179
|
+
|
180
|
+
html_result = eruby.evaluate(format: format, rules: formatted_rules)
|
181
|
+
|
182
|
+
if validate
|
183
|
+
begin
|
184
|
+
require 'nokogumbo'
|
185
|
+
# Validate the output HTML
|
186
|
+
doc = Nokogiri::HTML5("<!DOCTYPE html>" + html_result, max_errors: 10)
|
187
|
+
raise EncodingError, "Errors found in generated HTML:\n " +
|
188
|
+
doc.errors.map(&:to_s).join("\n ") unless doc.errors.empty?
|
189
|
+
rescue LoadError
|
190
|
+
# Skip
|
191
|
+
end
|
80
192
|
end
|
81
|
-
|
193
|
+
|
194
|
+
out.write html_result
|
82
195
|
return
|
83
196
|
rescue LoadError
|
84
|
-
$stderr.puts "Generating HTML requires
|
197
|
+
$stderr.puts "Generating HTML requires erubis and htmlentities gems to be loaded"
|
85
198
|
end
|
86
199
|
end
|
87
200
|
|
88
201
|
# Format each rule, considering the available rhs size
|
89
202
|
rules.each do |rule|
|
90
203
|
buffer = if rule.pass?
|
91
|
-
"%-#{lhs_length-2}s" % "@pass"
|
204
|
+
"\n%-#{lhs_length-2}s " % "@pass"
|
205
|
+
elsif rule.kind == :terminals
|
206
|
+
"\n%-#{lhs_length-2}s" % "@terminals"
|
92
207
|
else
|
93
208
|
lhs_fmt % {id: "[#{rule.id}]", sym: rule.sym}
|
94
209
|
end
|
95
|
-
formatted_expr =
|
96
|
-
if formatted_expr.length > rhs_length
|
97
|
-
|
210
|
+
formatted_expr = self.send(format_meth, rule.expr)
|
211
|
+
if formatted_expr.length > rhs_length && (format != :abnf || rule.alt?)
|
212
|
+
if format == :abnf
|
213
|
+
# No whitespace, use =/
|
214
|
+
self.send(format_meth, rule.expr, sep: "--rule-extensions--").
|
215
|
+
split(/\s*--rule-extensions--\s*/).each_with_index do |formatted, ndx|
|
216
|
+
if ndx > 0
|
217
|
+
buffer << "\n" + lhs_fmt.sub('= ', '=/') % {id: "[#{rule.id}]", sym: rule.sym}
|
218
|
+
end
|
219
|
+
buffer << formatted.sub(/\s*\/\s*/, '')
|
220
|
+
end
|
221
|
+
else
|
222
|
+
# Space out past "= "
|
223
|
+
buffer << self.send(format_meth, rule.expr, sep: ("\n" + " " * (lhs_length + (rule.alt? ? 2 : 4) - (format == :ebnf ? 0 : 2))))
|
224
|
+
buffer << ("\n" + " " * (lhs_length) + ';') if format == :isoebnf
|
225
|
+
end
|
98
226
|
else
|
99
|
-
buffer << formatted_expr
|
227
|
+
buffer << formatted_expr + (format == :isoebnf ? ' ;' : '')
|
100
228
|
end
|
229
|
+
buffer << "\n\n" if [:terminals, :pass].include?(rule.kind)
|
101
230
|
out.puts(buffer)
|
102
231
|
end
|
103
232
|
end
|
104
233
|
|
105
234
|
protected
|
235
|
+
|
236
|
+
##
|
237
|
+
# W3C EBNF Formatters
|
238
|
+
##
|
239
|
+
|
106
240
|
# Format the expression part of a rule
|
107
|
-
def
|
108
|
-
return (@options[:html] ? %(<a href="#grammar-production-#{expr}">#{expr}</a>) : expr.to_s) if expr.is_a?(Symbol)
|
241
|
+
def format_ebnf(expr, sep: nil, embedded: false)
|
242
|
+
return (@options[:html] ? %(<a href="#grammar-production-#{@coder.encode expr}">#{@coder.encode expr}</a>) : expr.to_s) if expr.is_a?(Symbol)
|
109
243
|
if expr.is_a?(String)
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
return (@options[:html] ? %(<code class="grammar-char-escape">#{expr}</code>) : expr)
|
114
|
-
elsif expr =~ /"/
|
115
|
-
return (@options[:html] ? %('<code class="grammar-literal">#{escape(expr, "'")}</code>') : %('#{escape(expr, "'")}'))
|
116
|
-
else
|
117
|
-
return (@options[:html] ? %("<code class="grammar-literal">#{escape(expr, '"')}</code>") : %("#{escape(expr, '"')}"))
|
118
|
-
end
|
244
|
+
return expr.length == 1 ?
|
245
|
+
format_ebnf_char(expr) :
|
246
|
+
format_ebnf_string(expr, expr.include?('"') ? "'" : '"')
|
119
247
|
end
|
120
248
|
parts = {
|
121
249
|
alt: (@options[:html] ? "<code>|</code> " : "| "),
|
@@ -128,95 +256,472 @@ module EBNF
|
|
128
256
|
rparen = (@options[:html] ? "<code>)</code> " : ")")
|
129
257
|
|
130
258
|
case expr.first
|
259
|
+
when :istr
|
260
|
+
# Looses fidelity, but, oh well ...
|
261
|
+
format_ebnf(expr.last, embedded: true)
|
131
262
|
when :alt, :diff
|
132
263
|
this_sep = (sep ? sep : " ") + parts[expr.first.to_sym]
|
133
|
-
expr[1..-1].map {|e|
|
264
|
+
res = expr[1..-1].map {|e| format_ebnf(e, embedded: true)}.join(this_sep)
|
265
|
+
embedded ? (lparen + res + rparen) : res
|
134
266
|
when :star, :plus, :opt
|
135
|
-
raise "Expected star expression to have a single operand" unless expr.length == 2
|
136
267
|
char = parts[expr.first.to_sym]
|
137
|
-
r =
|
138
|
-
|
268
|
+
r = format_ebnf(expr[1], embedded: true)
|
269
|
+
"#{r}#{char}"
|
139
270
|
when :hex
|
140
|
-
(
|
271
|
+
escape_ebnf_hex(expr.last[2..-1].hex.chr(Encoding::UTF_8))
|
141
272
|
when :range
|
142
|
-
|
273
|
+
format_ebnf_range(expr.last)
|
143
274
|
when :seq
|
144
275
|
this_sep = (sep ? sep : " ")
|
145
|
-
expr[1..-1].map
|
276
|
+
res = expr[1..-1].map do |e|
|
277
|
+
format_ebnf(e, embedded: true)
|
278
|
+
end.join(this_sep)
|
279
|
+
embedded ? (lparen + res + rparen) : res
|
280
|
+
when :rept
|
281
|
+
# Expand repetition
|
282
|
+
min, max, value = expr[1..-1]
|
283
|
+
if min == 0 && max == 1
|
284
|
+
format_ebnf([:opt, value], sep: sep, embedded: embedded)
|
285
|
+
elsif min == 0 && max == '*'
|
286
|
+
format_ebnf([:star, value], sep: sep, embedded: embedded)
|
287
|
+
elsif min == 1 && max == '*'
|
288
|
+
format_ebnf([:plus, value], sep: sep, embedded: embedded)
|
289
|
+
else
|
290
|
+
val2 = [:seq]
|
291
|
+
while min > 0
|
292
|
+
val2 << value
|
293
|
+
min -= 1
|
294
|
+
max -= 1 unless max == '*'
|
295
|
+
end
|
296
|
+
if max == '*'
|
297
|
+
val2 << [:star, value]
|
298
|
+
else
|
299
|
+
opt = nil
|
300
|
+
while max > 0
|
301
|
+
opt = [:opt, opt ? [:seq, value, opt] : value]
|
302
|
+
max -= 1
|
303
|
+
end
|
304
|
+
val2 << opt if opt
|
305
|
+
end
|
306
|
+
format_ebnf(val2, sep: sep, embedded: embedded)
|
307
|
+
end
|
146
308
|
else
|
147
309
|
raise "Unknown operator: #{expr.first}"
|
148
310
|
end
|
149
311
|
end
|
150
312
|
|
151
313
|
# Format a single-character string, prefering hex for non-main ASCII
|
152
|
-
def
|
314
|
+
def format_ebnf_char(c)
|
153
315
|
case c.ord
|
154
|
-
when
|
155
|
-
when
|
156
|
-
|
316
|
+
when (0x21) then (@options[:html] ? %("<code class="grammar-literal">#{@coder.encode c}</code>") : %{"#{c}"})
|
317
|
+
when 0x22 then (@options[:html] ? %('<code class="grammar-literal">"</code>') : %{'"'})
|
318
|
+
when (0x23..0x7e) then (@options[:html] ? %("<code class="grammar-literal">#{@coder.encode c}</code>") : %{"#{c}"})
|
319
|
+
when (0x80..0xFFFD) then (@options[:html] ? %("<code class="grammar-literal">#{@coder.encode c}</code>") : %{"#{c}"})
|
320
|
+
else escape_ebnf_hex(c)
|
157
321
|
end
|
158
322
|
end
|
159
323
|
|
160
324
|
# Format a range
|
161
|
-
def
|
325
|
+
def format_ebnf_range(string)
|
162
326
|
lbrac = (@options[:html] ? "<code>[</code> " : "[")
|
163
327
|
rbrac = (@options[:html] ? "<code>]</code> " : "]")
|
164
|
-
dash = (@options[:html] ? "<code>-</code> " : "-")
|
165
328
|
|
166
329
|
buffer = lbrac
|
167
330
|
s = StringScanner.new(string)
|
168
331
|
while !s.eos?
|
169
332
|
case
|
170
333
|
when s.scan(/\A[!"\u0024-\u007e]+/)
|
171
|
-
buffer << (@options[:html] ? %(<code class="grammar-literal">#{s.matched}</code>) : s.matched)
|
334
|
+
buffer << (@options[:html] ? %(<code class="grammar-literal">#{@coder.encode s.matched}</code>) : s.matched)
|
172
335
|
when s.scan(/\A#x\h+/)
|
173
|
-
buffer << (
|
174
|
-
when s.scan(/\A-/)
|
175
|
-
buffer << dash
|
336
|
+
buffer << escape_ebnf_hex(s.matched[2..-1].hex.chr(Encoding::UTF_8))
|
176
337
|
else
|
177
|
-
buffer << (
|
338
|
+
buffer << escape_ebnf_hex(s.getch)
|
178
339
|
end
|
179
340
|
end
|
180
341
|
buffer + rbrac
|
181
342
|
end
|
182
343
|
|
183
344
|
# Escape a string, using as many UTF-8 characters as possible
|
184
|
-
def
|
185
|
-
buffer = ""
|
345
|
+
def format_ebnf_string(string, quote = '"')
|
186
346
|
string.each_char do |c|
|
187
|
-
|
188
|
-
when
|
189
|
-
|
190
|
-
|
347
|
+
case c.ord
|
348
|
+
when 0x00..0x19, quote.ord
|
349
|
+
raise RangeError, "cannot format #{string.inspect} as an EBNF String: #{c.inspect} is out of range" unless
|
350
|
+
ISOEBNF::TERMINAL_CHARACTER.match?(c)
|
191
351
|
end
|
192
352
|
end
|
193
|
-
|
353
|
+
|
354
|
+
res = "#{quote}#{string}#{quote}"
|
355
|
+
@options[:html] ? @coder.encode(res) : res
|
194
356
|
end
|
195
357
|
|
196
|
-
def
|
358
|
+
def escape_ebnf_hex(u)
|
197
359
|
fmt = case u.ord
|
360
|
+
when 0x00..0x20 then "#x%02X"
|
198
361
|
when 0x0000..0x00ff then "#x%02X"
|
199
362
|
when 0x0100..0xffff then "#x%04X"
|
200
363
|
else "#x%08X"
|
201
364
|
end
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
365
|
+
char = fmt % u.ord
|
366
|
+
if @options[:html]
|
367
|
+
char = if u.ord <= 0x20
|
368
|
+
%(<abbr title="#{ASCII_ESCAPE_NAMES[u.ord]}">#{@coder.encode char}</abbr>)
|
369
|
+
elsif u.ord == 0x22
|
370
|
+
%(<abbr title="quot">>"</abbr>)
|
371
|
+
elsif u.ord < 0x7F
|
372
|
+
%(<abbr title="ascii '#{@coder.encode u}'">#{@coder.encode char}</abbr>)
|
373
|
+
elsif u.ord == 0x7F
|
374
|
+
%(<abbr title="delete">#{@coder.encode char}</abbr>)
|
375
|
+
elsif u.ord <= 0xFF
|
376
|
+
%(<abbr title="extended ascii '#{@coder.encode char}'">#{char}</abbr>)
|
377
|
+
elsif (%w(Control Private-use Surrogate Noncharacter Reserved) - ::Unicode::Types.of(u)).empty?
|
378
|
+
%(<abbr title="unicode '#{u}'">#{char}</abbr>)
|
379
|
+
else
|
380
|
+
%(<abbr title="unicode '#{::Unicode::Types.of(u).first}'">#{char}</abbr>)
|
381
|
+
end
|
382
|
+
%(<code class="grammar-char-escape">#{char}</code>)
|
383
|
+
else
|
384
|
+
char
|
385
|
+
end
|
386
|
+
end
|
387
|
+
|
388
|
+
##
|
389
|
+
# ABNF Formatters
|
390
|
+
##
|
391
|
+
|
392
|
+
# Format the expression part of a rule
|
393
|
+
def format_abnf(expr, sep: nil, embedded: false, sensitive: true)
|
394
|
+
return (@options[:html] ? %(<a href="#grammar-production-#{@coder.encode expr}">#{@coder.encode expr}</a>) : expr.to_s) if expr.is_a?(Symbol)
|
395
|
+
if expr.is_a?(String)
|
396
|
+
if expr.length == 1
|
397
|
+
return format_abnf_char(expr)
|
398
|
+
elsif expr.start_with?('%')
|
399
|
+
# Already encoded
|
400
|
+
return expr
|
401
|
+
elsif expr =~ /"/
|
402
|
+
# Split into segments
|
403
|
+
segments = expr.split('"')
|
404
|
+
|
405
|
+
return format_abnf_char(expr) if segments.empty?
|
406
|
+
|
407
|
+
seq = segments.inject([]) {|memo, s| memo.concat([[:hex, "#x22"], s])}[1..-1]
|
408
|
+
seq.unshift(:seq)
|
409
|
+
return format_abnf(seq, sep: nil, embedded: false)
|
410
|
+
else
|
411
|
+
return (@options[:html] ? %("<code class="grammar-literal">#{'%s' if sensitive}#{@coder.encode expr}</code>") : %(#{'%s' if sensitive}"#{expr}"))
|
412
|
+
end
|
413
|
+
end
|
414
|
+
parts = {
|
415
|
+
alt: (@options[:html] ? "<code>/</code> " : "/ "),
|
416
|
+
star: (@options[:html] ? "<code>*</code> " : "*"),
|
417
|
+
plus: (@options[:html] ? "<code>+</code> " : "1*"),
|
418
|
+
opt: (@options[:html] ? "<code>?</code> " : "?")
|
419
|
+
}
|
420
|
+
lbrac = (@options[:html] ? "<code>[</code> " : "[")
|
421
|
+
rbrac = (@options[:html] ? "<code>]</code> " : "]")
|
422
|
+
lparen = (@options[:html] ? "<code>(</code> " : "(")
|
423
|
+
rparen = (@options[:html] ? "<code>)</code> " : ")")
|
424
|
+
|
425
|
+
case expr.first
|
426
|
+
when :istr
|
427
|
+
# FIXME: if string part is segmented, need to do something different
|
428
|
+
format_abnf(expr.last, embedded: true, sensitive: false)
|
429
|
+
when :alt
|
430
|
+
this_sep = (sep ? sep : " ") + parts[expr.first.to_sym]
|
431
|
+
res = expr[1..-1].map {|e| format_abnf(e, embedded: true)}.join(this_sep)
|
432
|
+
embedded ? (lparen + res + rparen) : res
|
433
|
+
when :diff
|
434
|
+
raise RangeError, "ABNF does not support the diff operator"
|
435
|
+
when :opt
|
436
|
+
char = parts[expr.first.to_sym]
|
437
|
+
r = format_abnf(expr[1], embedded: true)
|
438
|
+
"#{lbrac}#{r}#{rbrac}"
|
439
|
+
when :plus, :star
|
440
|
+
char = parts[expr.first.to_sym]
|
441
|
+
r = format_abnf(expr[1], embedded: true)
|
442
|
+
"#{char}#{r}"
|
443
|
+
when :hex
|
444
|
+
escape_abnf_hex(expr.last[2..-1].hex.chr)
|
445
|
+
when :range
|
446
|
+
# Returns an [:alt] or [:not [:alt]] if composed of multiple sequences
|
447
|
+
# Note: ABNF does not support the `not` operator
|
448
|
+
res = format_abnf_range(expr.last)
|
449
|
+
res.is_a?(Array) ?
|
450
|
+
format_abnf(res, embedded: true) :
|
451
|
+
res
|
452
|
+
when :seq
|
453
|
+
this_sep = (sep ? sep : " ")
|
454
|
+
res = expr[1..-1].map do |e|
|
455
|
+
format_abnf(e, embedded: true)
|
456
|
+
end.join(this_sep)
|
457
|
+
embedded ? (lparen + res + rparen) : res
|
458
|
+
when :rept
|
459
|
+
# Expand repetition
|
460
|
+
min, max, value = expr[1..-1]
|
461
|
+
r = format_abnf(value, embedded: true)
|
462
|
+
if min == max
|
463
|
+
"#{min}#{r}"
|
464
|
+
elsif min == 0 && max == '*'
|
465
|
+
"#{parts[:star]}#{r}"
|
466
|
+
elsif min > 0 && max == '*'
|
467
|
+
"#{min}#{parts[:star]}#{r}"
|
468
|
+
else
|
469
|
+
"#{min}#{parts[:star]}#{max}#{r}"
|
470
|
+
end
|
471
|
+
else
|
472
|
+
raise "Unknown operator: #{expr.first}"
|
473
|
+
end
|
474
|
+
end
|
475
|
+
|
476
|
+
# Format a single-character string, prefering hex for non-main ASCII
|
477
|
+
def format_abnf_char(c)
|
478
|
+
if /[\x20-\x21\x23-\x7E]/.match?(c)
|
479
|
+
@options[:html] ? %("<code class="grammar-literal">#{@coder.encode c}</code>") : c.inspect
|
480
|
+
else
|
481
|
+
escape_abnf_hex(c)
|
482
|
+
end
|
483
|
+
end
|
484
|
+
|
485
|
+
# Format a range
|
486
|
+
#
|
487
|
+
# Presumes range has already been validated
|
488
|
+
def format_abnf_range(string)
|
489
|
+
alt, o_dash = [:alt], false
|
490
|
+
|
491
|
+
raise RangeError, "cannot format #{string.inspect} an ABNF range" if string.start_with?('^')
|
492
|
+
|
493
|
+
if string.end_with?('-')
|
494
|
+
o_dash = true
|
495
|
+
string = string[0..-2]
|
496
|
+
end
|
497
|
+
|
498
|
+
scanner = StringScanner.new(string)
|
499
|
+
hexes, deces = [], []
|
500
|
+
in_range = false
|
501
|
+
# Build op (alt) from different ranges/enums
|
502
|
+
while !scanner.eos?
|
503
|
+
if hex = scanner.scan(Terminals::HEX)
|
504
|
+
# Append any decimal values
|
505
|
+
alt << "%d" + deces.join(".") unless deces.empty?
|
506
|
+
deces = []
|
507
|
+
|
508
|
+
if in_range
|
509
|
+
# Add "." sequences for any previous hexes
|
510
|
+
alt << "%x" + hexes[0..-2].join(".") if hexes.length > 1
|
511
|
+
alt << "%x#{hexes.last}-#{hex[2..-1]}"
|
512
|
+
in_range, hexes = false, []
|
513
|
+
else
|
514
|
+
hexes << hex[2..-1]
|
515
|
+
end
|
516
|
+
elsif dec = scanner.scan(Terminals::R_CHAR)
|
517
|
+
# Append any hexadecimal values
|
518
|
+
alt << "%x" + hexes.join(".") unless hexes.empty?
|
519
|
+
hexes = []
|
520
|
+
|
521
|
+
if in_range
|
522
|
+
# Add "." sequences for any previous hexes
|
523
|
+
alt << "%d" + deces[0..-2].join(".") if deces.length > 1
|
524
|
+
alt << "%d#{deces.last}-#{dec.codepoints.first}"
|
525
|
+
in_range, deces = false, []
|
526
|
+
else
|
527
|
+
deces << dec.codepoints.first.to_s
|
528
|
+
end
|
529
|
+
end
|
530
|
+
|
531
|
+
in_range = true if scanner.scan(/\-/)
|
532
|
+
end
|
533
|
+
|
534
|
+
deces << '45' if o_dash
|
535
|
+
|
536
|
+
# Append hexes and deces as "." sequences (should be only one)
|
537
|
+
alt << "%d" + deces.join(".") unless deces.empty?
|
538
|
+
alt << "%x" + hexes.join(".") unless hexes.empty?
|
539
|
+
|
540
|
+
# FIXME: HTML abbreviations?
|
541
|
+
if alt.length == 2
|
542
|
+
# Just return the range or enum
|
543
|
+
alt.last
|
544
|
+
else
|
545
|
+
# Return the alt, which will be further formatted
|
546
|
+
alt
|
547
|
+
end
|
548
|
+
end
|
549
|
+
|
550
|
+
def escape_abnf_hex(u)
|
551
|
+
fmt = case u.ord
|
552
|
+
when 0x0000..0x00ff then "%02X"
|
553
|
+
when 0x0100..0xffff then "%04X"
|
554
|
+
else "%08X"
|
555
|
+
end
|
556
|
+
char = "%x" + (fmt % u.ord)
|
557
|
+
if @options[:html]
|
558
|
+
if u.ord <= 0x20
|
559
|
+
char = %(<abbr title="#{ASCII_ESCAPE_NAMES[u.ord]}">#{@coder.encode char}</abbr>)
|
560
|
+
elsif u.ord == 0x22
|
561
|
+
%(<abbr title="quot">>"</abbr>)
|
562
|
+
elsif u.ord < 0x7F
|
563
|
+
char = %(<abbr title="ascii '#{u}'">#{@coder.encode char}</abbr>)
|
564
|
+
elsif u.ord == 0x7F
|
565
|
+
char = %(<abbr title="delete">#{@coder.encode char}</abbr>)
|
566
|
+
elsif u.ord <= 0xFF
|
567
|
+
char = %(<abbr title="extended ascii '#{u}'">#{char}</abbr>)
|
568
|
+
else
|
569
|
+
char = %(<abbr title="unicode '#{u.unicode_normaliz}'">#{char}</abbr>)
|
570
|
+
end
|
571
|
+
%(<code class="grammar-char-escape">#{char}</code>)
|
572
|
+
else
|
573
|
+
char
|
574
|
+
end
|
575
|
+
end
|
576
|
+
|
577
|
+
##
|
578
|
+
# ISO EBNF Formatters
|
579
|
+
##
|
580
|
+
|
581
|
+
# Format the expression part of a rule
|
582
|
+
def format_isoebnf(expr, sep: nil, embedded: false)
|
583
|
+
return (@options[:html] ? %(<a href="#grammar-production-#{@coder.encode expr}">#{@coder.encode expr}</a>) : expr.to_s) if expr.is_a?(Symbol)
|
584
|
+
if expr.is_a?(String)
|
585
|
+
expr = expr[2..-1].hex.chr if expr =~ /\A#x\h+/
|
586
|
+
expr.chars.each do |c|
|
587
|
+
raise RangeError, "cannot format #{expr.inspect} as an ISO EBNF String: #{c.inspect} is out of range" unless
|
588
|
+
ISOEBNF::TERMINAL_CHARACTER.match?(c)
|
589
|
+
end
|
590
|
+
if expr =~ /"/
|
591
|
+
return (@options[:html] ? %('<code class="grammar-literal">#{@coder.encode expr}</code>') : %('#{expr}'))
|
592
|
+
else
|
593
|
+
return (@options[:html] ? %("<code class="grammar-literal">#{@coder.encode expr}</code>") : %("#{expr}"))
|
594
|
+
end
|
595
|
+
end
|
596
|
+
parts = {
|
597
|
+
alt: (@options[:html] ? "<code>|</code> " : "| "),
|
598
|
+
diff: (@options[:html] ? "<code>-</code> " : "- "),
|
599
|
+
}
|
600
|
+
lparen = (@options[:html] ? "<code>(</code> " : "(")
|
601
|
+
rparen = (@options[:html] ? "<code>)</code> " : ")")
|
602
|
+
|
603
|
+
case expr.first
|
604
|
+
when :istr
|
605
|
+
# Looses fidelity, but, oh well ...
|
606
|
+
format_isoebnf(expr.last, embedded: true)
|
607
|
+
when :alt, :diff
|
608
|
+
this_sep = (sep ? sep : " ") + parts[expr.first.to_sym]
|
609
|
+
res = expr[1..-1].map {|e| format_isoebnf(e, embedded: true)}.join(this_sep)
|
610
|
+
embedded ? (lparen + res + rparen) : res
|
611
|
+
when :opt
|
612
|
+
r = format_isoebnf(expr[1], embedded: true)
|
613
|
+
"[#{r}]"
|
614
|
+
when :star
|
615
|
+
r = format_isoebnf(expr[1], embedded: true)
|
616
|
+
"{#{r}}"
|
617
|
+
when :plus
|
618
|
+
r = format_isoebnf(expr[1], embedded: true)
|
619
|
+
"#{r}, {#{r}}"
|
620
|
+
when :hex
|
621
|
+
format_isoebnf(expr[1], embedded: true)
|
622
|
+
when :range
|
623
|
+
res = format_isoebnf_range(expr.last)
|
624
|
+
res.is_a?(Array) ?
|
625
|
+
format_isoebnf(res, embedded: true) :
|
626
|
+
res
|
627
|
+
when :seq
|
628
|
+
this_sep = "," + (sep ? sep : " ")
|
629
|
+
res = expr[1..-1].map do |e|
|
630
|
+
format_isoebnf(e, embedded: true)
|
631
|
+
end.join(this_sep)
|
632
|
+
embedded ? (lparen + res + rparen) : res
|
633
|
+
when :rept
|
634
|
+
# Expand repetition
|
635
|
+
min, max, value = expr[1..-1]
|
636
|
+
if min == 0 && max == 1
|
637
|
+
format_isoebnf([:opt, value], sep: sep, embedded: embedded)
|
638
|
+
elsif min == 0 && max == '*'
|
639
|
+
format_isoebnf([:star, value], sep: sep, embedded: embedded)
|
640
|
+
elsif min == 1 && max == '*'
|
641
|
+
format_isoebnf([:plus, value], sep: sep, embedded: embedded)
|
642
|
+
else
|
643
|
+
val2 = [:seq]
|
644
|
+
while min > 0
|
645
|
+
val2 << value
|
646
|
+
min -= 1
|
647
|
+
max -= 1 unless max == '*'
|
648
|
+
end
|
649
|
+
if max == '*'
|
650
|
+
val2 << [:star, value]
|
651
|
+
else
|
652
|
+
opt = nil
|
653
|
+
while max > 0
|
654
|
+
opt = [:opt, opt ? [:seq, value, opt] : value]
|
655
|
+
max -= 1
|
656
|
+
end
|
657
|
+
val2 << opt if opt
|
658
|
+
end
|
659
|
+
format_isoebnf(val2, sep: sep, embedded: embedded)
|
660
|
+
end
|
661
|
+
else
|
662
|
+
raise "Unknown operator: #{expr.first}"
|
663
|
+
end
|
664
|
+
end
|
665
|
+
|
666
|
+
# Format a range
|
667
|
+
# Range is formatted as a aliteration of characters
|
668
|
+
def format_isoebnf_range(string)
|
669
|
+
chars = []
|
670
|
+
o_dash = false
|
671
|
+
|
672
|
+
raise RangeError, "cannot format #{string.inspect} an ABNF range" if string.start_with?('^')
|
673
|
+
|
674
|
+
if string.end_with?('-')
|
675
|
+
o_dash = true
|
676
|
+
string = string[0..-2]
|
677
|
+
end
|
678
|
+
|
679
|
+
scanner = StringScanner.new(string)
|
680
|
+
in_range = false
|
681
|
+
# Build chars from different ranges/enums
|
682
|
+
while !scanner.eos?
|
683
|
+
char = if hex = scanner.scan(Terminals::HEX)
|
684
|
+
hex[2..-1].hex.ord.char(Encoding::UTF_8)
|
685
|
+
else scanner.scan(Terminals::R_CHAR)
|
686
|
+
end
|
687
|
+
raise RangeError, "cannot format #{string.inspect} as an ISO EBNF Aliteration: #{char.inspect} is out of range" unless
|
688
|
+
char && ISOEBNF::TERMINAL_CHARACTER.match?(char)
|
689
|
+
|
690
|
+
if in_range
|
691
|
+
# calculate characters from chars.last to this char
|
692
|
+
raise RangeError, "cannot format #{string.inspect} as an ISO EBNF Aliteration" unless chars.last < char
|
693
|
+
chars.concat (chars.last..char).to_a[1..-1]
|
694
|
+
in_range = false
|
695
|
+
else
|
696
|
+
chars << char
|
697
|
+
end
|
698
|
+
|
699
|
+
in_range = true if scanner.scan(/\-/)
|
700
|
+
end
|
701
|
+
|
702
|
+
chars << '-' if o_dash
|
703
|
+
|
704
|
+
# Possibly only a single character (no character?)
|
705
|
+
chars.length == 1 ? chars.last.inspect : chars.unshift(:alt)
|
706
|
+
end
|
707
|
+
|
708
|
+
ERB_DESC = %q(
|
709
|
+
<table class="grammar">
|
710
|
+
<tbody id="grammar-productions" class="<%= @format %>">
|
711
|
+
<% for rule in @rules %>
|
712
|
+
<tr<%= %{ id="grammar-production-#{rule.sym}"} unless %w(=/ |).include?(rule.assign) || rule.sym.nil?%>>
|
713
|
+
<% if rule.id %>
|
714
|
+
<td<%= " colspan=2" unless rule.sym %>><%= rule.id %></td>
|
715
|
+
<% end %>
|
716
|
+
<% if rule.sym %>
|
717
|
+
<td><code><%== rule.sym %></code></td>
|
718
|
+
<% end %>
|
719
|
+
<td><%= rule.assign %></td>
|
720
|
+
<td><%= rule.formatted %></td>
|
721
|
+
</tr>
|
722
|
+
<% end %>
|
723
|
+
</tbody>
|
724
|
+
</table>
|
220
725
|
).gsub(/^ /, '')
|
221
726
|
end
|
222
727
|
end
|