ebnf 1.1.3 → 2.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +221 -198
- data/UNLICENSE +1 -1
- data/VERSION +1 -1
- data/bin/ebnf +40 -21
- data/etc/abnf-core.ebnf +52 -0
- data/etc/abnf.abnf +121 -0
- data/etc/abnf.ebnf +124 -0
- data/etc/abnf.sxp +45 -0
- data/etc/doap.ttl +23 -15
- data/etc/ebnf.ebnf +21 -33
- data/etc/ebnf.html +171 -160
- data/etc/{ebnf.rb → ebnf.ll1.rb} +30 -107
- data/etc/ebnf.ll1.sxp +182 -183
- data/etc/ebnf.peg.rb +90 -0
- data/etc/ebnf.peg.sxp +84 -0
- data/etc/ebnf.sxp +40 -41
- data/etc/iso-ebnf.ebnf +140 -0
- data/etc/iso-ebnf.isoebnf +138 -0
- data/etc/iso-ebnf.sxp +65 -0
- data/etc/sparql.ebnf +4 -4
- data/etc/sparql.html +1603 -1751
- data/etc/sparql.ll1.sxp +7372 -7372
- data/etc/sparql.peg.rb +532 -0
- data/etc/sparql.peg.sxp +597 -0
- data/etc/sparql.sxp +363 -362
- data/etc/turtle.ebnf +3 -3
- data/etc/turtle.html +465 -517
- data/etc/{turtle.rb → turtle.ll1.rb} +3 -4
- data/etc/turtle.ll1.sxp +425 -425
- data/etc/turtle.peg.rb +182 -0
- data/etc/turtle.peg.sxp +199 -0
- data/etc/turtle.sxp +103 -101
- data/lib/ebnf.rb +7 -2
- data/lib/ebnf/abnf.rb +301 -0
- data/lib/ebnf/abnf/core.rb +23 -0
- data/lib/ebnf/abnf/meta.rb +111 -0
- data/lib/ebnf/base.rb +113 -69
- data/lib/ebnf/bnf.rb +1 -26
- data/lib/ebnf/ebnf/meta.rb +90 -0
- data/lib/ebnf/isoebnf.rb +229 -0
- data/lib/ebnf/isoebnf/meta.rb +75 -0
- data/lib/ebnf/ll1.rb +138 -6
- data/lib/ebnf/ll1/lexer.rb +37 -32
- data/lib/ebnf/ll1/parser.rb +113 -73
- data/lib/ebnf/ll1/scanner.rb +83 -51
- data/lib/ebnf/native.rb +320 -0
- data/lib/ebnf/parser.rb +285 -302
- data/lib/ebnf/peg.rb +39 -0
- data/lib/ebnf/peg/parser.rb +561 -0
- data/lib/ebnf/peg/rule.rb +250 -0
- data/lib/ebnf/rule.rb +443 -148
- data/lib/ebnf/terminals.rb +21 -0
- data/lib/ebnf/writer.rb +565 -83
- metadata +107 -29
- data/etc/sparql.rb +0 -45773
@@ -0,0 +1,21 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
# Terminal definitions for the EBNF grammar
|
3
|
+
module EBNF::Terminals
|
4
|
+
SYMBOL_BASE = %r(\b[a-zA-Z0-9_\.]+\b)u.freeze
|
5
|
+
SYMBOL = %r(#{SYMBOL_BASE}(?!\s*::=))u.freeze
|
6
|
+
HEX = %r(\#x\h+)u.freeze
|
7
|
+
CHAR = %r([\u0009\u000A\u000D\u0020-\uD7FF\u{10000}-\u{10FFFF}])u.freeze
|
8
|
+
R_CHAR = %r([\u0009\u000A\u000D\u0020-\u002C\u002E-\u005C\u005E-\uD7FF\u{10000}-\u{10FFFF}])u.freeze
|
9
|
+
RANGE = %r(\[(?:(?:#{R_CHAR}\-#{R_CHAR})|(?:#{HEX}\-#{HEX})|#{R_CHAR}|#{HEX})+-?\](?!\s+#{SYMBOL_BASE}\s*::=))u.freeze
|
10
|
+
LHS = %r((?:\[#{SYMBOL_BASE}\])?\s*#{SYMBOL_BASE}\s*::=)u.freeze
|
11
|
+
O_RANGE = %r(\[\^(?:(?:#{R_CHAR}\-#{R_CHAR})|(?:#{HEX}\-#{HEX}|#{R_CHAR}|#{HEX}))+-?\])u.freeze
|
12
|
+
STRING1 = %r("[\u0009\u000A\u000D\u0020\u0021\u0023-\uD7FF\u{10000}-\u{10FFFF}]*")u.freeze
|
13
|
+
STRING2 = %r('[\u0009\u000A\u000D\u0020-\u0026\u0028-\uD7FF\u{10000}-\u{10FFFF}]*')u.freeze
|
14
|
+
POSTFIX = %r([?*+])u.freeze
|
15
|
+
PASS = %r((
|
16
|
+
\s
|
17
|
+
| (?:(?:\#[^x]|//)[^\n\r]*)
|
18
|
+
| (?:/\*(?:(?:\*[^/])|[^*])*\*/)
|
19
|
+
| (?:\(\*(?:(?:\*[^\)])|[^*])*\*\))
|
20
|
+
)+)xmu.freeze
|
21
|
+
end
|
data/lib/ebnf/writer.rb
CHANGED
@@ -1,22 +1,62 @@
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
2
2
|
require 'rdf'
|
3
3
|
require 'strscan' unless defined?(StringScanner)
|
4
|
+
require "ostruct"
|
4
5
|
|
5
6
|
##
|
6
7
|
# Serialize ruleset back to EBNF
|
7
8
|
module EBNF
|
8
9
|
class Writer
|
9
10
|
LINE_LENGTH = 80
|
11
|
+
LINE_LENGTH_HTML = 200
|
12
|
+
|
13
|
+
# ASCII escape names
|
14
|
+
ASCII_ESCAPE_NAMES = [
|
15
|
+
"null", #x00
|
16
|
+
"start of heading", #x01
|
17
|
+
"start of text", #x02
|
18
|
+
"end of text", #x03
|
19
|
+
"end of transmission", #x04
|
20
|
+
"enquiry", #x05
|
21
|
+
"acknowledge", #x06
|
22
|
+
"bell", #x07
|
23
|
+
"backspace", #x08
|
24
|
+
"horizontal tab", #x09
|
25
|
+
"new line", #x0A
|
26
|
+
"vertical tab", #x0B
|
27
|
+
"form feed", #x0C
|
28
|
+
"carriage return", #x0D
|
29
|
+
"shift out", #x0E
|
30
|
+
"shift in", #x0F
|
31
|
+
"data link escape", #x10
|
32
|
+
"device control 1", #x11
|
33
|
+
"device control 2", #x12
|
34
|
+
"device control 3", #x13
|
35
|
+
"device control 4", #x14
|
36
|
+
"negative acknowledge", #x15
|
37
|
+
"synchronous idle", #x16
|
38
|
+
"end of trans. block", #x17
|
39
|
+
"cancel", #x18
|
40
|
+
"end of medium", #x19
|
41
|
+
"substitute", #x1A
|
42
|
+
"escape", #x1B
|
43
|
+
"file separator", #x1C
|
44
|
+
"group separator", #x1D
|
45
|
+
"record separator", #x1E
|
46
|
+
"unit separator", #x1F
|
47
|
+
"space" #x20
|
48
|
+
]
|
10
49
|
|
11
50
|
##
|
12
51
|
# Format rules to a String
|
13
52
|
#
|
14
53
|
# @param [Array<Rule>] rules
|
54
|
+
# @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
|
15
55
|
# @return [Object]
|
16
|
-
def self.string(*rules)
|
56
|
+
def self.string(*rules, format: :ebnf)
|
17
57
|
require 'stringio' unless defined?(StringIO)
|
18
58
|
buf = StringIO.new
|
19
|
-
write(buf, *rules)
|
59
|
+
write(buf, *rules, format: format)
|
20
60
|
buf.string
|
21
61
|
end
|
22
62
|
|
@@ -24,9 +64,10 @@ module EBNF
|
|
24
64
|
# Format rules to $stdout
|
25
65
|
#
|
26
66
|
# @param [Array<Rule>] rules
|
67
|
+
# @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
|
27
68
|
# @return [Object]
|
28
|
-
def self.print(*rules)
|
29
|
-
write($stdout, *rules)
|
69
|
+
def self.print(*rules, format: :ebnf)
|
70
|
+
write($stdout, *rules, format: format)
|
30
71
|
end
|
31
72
|
|
32
73
|
##
|
@@ -34,88 +75,158 @@ module EBNF
|
|
34
75
|
#
|
35
76
|
# @param [Object] out
|
36
77
|
# @param [Array<Rule>] rules
|
78
|
+
# @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
|
37
79
|
# @return [Object]
|
38
|
-
def self.write(out, *rules)
|
39
|
-
Writer.new(rules, out: out)
|
80
|
+
def self.write(out, *rules, format: :ebnf)
|
81
|
+
Writer.new(rules, out: out, format: format)
|
40
82
|
end
|
41
83
|
|
42
84
|
##
|
43
85
|
# Write formatted rules to an IO like object as HTML
|
44
86
|
#
|
45
87
|
# @param [Array<Rule>] rules
|
88
|
+
# @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
|
46
89
|
# @return [Object]
|
47
|
-
def self.html(*rules)
|
90
|
+
def self.html(*rules, format: :ebnf)
|
48
91
|
require 'stringio' unless defined?(StringIO)
|
49
92
|
buf = StringIO.new
|
50
|
-
Writer.new(rules, out: buf, html: true)
|
93
|
+
Writer.new(rules, out: buf, html: true, format: format)
|
51
94
|
buf.string
|
52
95
|
end
|
53
96
|
|
54
97
|
##
|
55
98
|
# @param [Array<Rule>] rules
|
56
99
|
# @param [Hash{Symbol => Object}] options
|
57
|
-
# @param [#write]
|
58
|
-
# @
|
59
|
-
|
60
|
-
|
100
|
+
# @param [#write] out ($stdout)
|
101
|
+
# @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
|
102
|
+
# @option options [Symbol] format
|
103
|
+
# @option options [Boolean] html (false)
|
104
|
+
def initialize(rules, out: $stdout, html: false, format: :ebnf, **options)
|
105
|
+
@options = options.merge(html: html)
|
106
|
+
return if rules.empty?
|
61
107
|
|
62
108
|
# Determine max LHS length
|
109
|
+
format_meth = "format_#{format}".to_sym
|
63
110
|
max_id = rules.max_by {|r| r.id.to_s.length}.id.to_s.length
|
64
111
|
max_sym = rules.max_by {|r| r.sym.to_s.length}.sym.to_s.length
|
65
|
-
lhs_length = max_sym +
|
66
|
-
lhs_fmt =
|
67
|
-
|
112
|
+
lhs_length = max_sym + 1
|
113
|
+
lhs_fmt = case format
|
114
|
+
when :abnf then "%<sym>-#{max_sym}s = "
|
115
|
+
when :ebnf then "%<sym>-#{max_sym}s ::= "
|
116
|
+
when :isoebnf then "%<sym>-#{max_sym}s = "
|
117
|
+
end
|
118
|
+
if format == :ebnf && max_id > 0
|
68
119
|
lhs_fmt = "%<id>-#{max_id+2}s " + lhs_fmt
|
69
120
|
lhs_length += max_id + 3
|
70
121
|
end
|
71
|
-
rhs_length = LINE_LENGTH - lhs_length
|
122
|
+
rhs_length = (html ? LINE_LENGTH_HTML : LINE_LENGTH) - lhs_length
|
72
123
|
|
73
124
|
if html
|
74
125
|
# Output as formatted HTML
|
75
126
|
begin
|
76
|
-
require '
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
127
|
+
require 'erubis'
|
128
|
+
require 'htmlentities'
|
129
|
+
@coder = HTMLEntities.new
|
130
|
+
eruby = Erubis::Eruby.new(ERB_DESC)
|
131
|
+
formatted_rules = rules.map do |rule|
|
132
|
+
if rule.kind == :terminals || rule.kind == :pass
|
133
|
+
OpenStruct.new(id: ("@#{rule.kind}"),
|
134
|
+
sym: nil,
|
135
|
+
assign: nil,
|
136
|
+
formatted: (
|
137
|
+
rule.kind == :terminals ?
|
138
|
+
"<strong># Productions for terminals</strong>" :
|
139
|
+
self.send(format_meth, rule.expr)))
|
140
|
+
else
|
141
|
+
formatted_expr = self.send(format_meth, rule.expr)
|
142
|
+
# Measure text without markup
|
143
|
+
formatted_expr_text = formatted_expr.gsub(%r{</?\w+[^>]*>}, '')
|
144
|
+
if formatted_expr_text.length > rhs_length && (format != :abnf || rule.alt?)
|
145
|
+
lines = []
|
146
|
+
# Can only reasonably split apart alts
|
147
|
+
self.send(format_meth, rule.expr, sep: "--rule-extensions--").
|
148
|
+
split(/\s*--rule-extensions--\s*/).each_with_index do |formatted, ndx|
|
149
|
+
assign = case format
|
150
|
+
when :ebnf
|
151
|
+
formatted.sub!(%r{\s*<code>\|</code>\s*}, '')
|
152
|
+
(ndx > 0 ? (rule.alt? ? '|' : '') : '::=')
|
153
|
+
when :abnf
|
154
|
+
formatted.sub!(%r{\s*<code>/</code>\s*}, '')
|
155
|
+
(ndx > 0 ? '=/' : '=')
|
156
|
+
else
|
157
|
+
formatted.sub!(%r{\s*<code>\|</code>\s*}, '')
|
158
|
+
(ndx > 0 ? (rule.alt? ? '|' : '') : '=')
|
159
|
+
end
|
160
|
+
lines << OpenStruct.new(id: ((ndx == 0 ? "[#{rule.id}]" : "") if rule.id),
|
161
|
+
sym: (rule.sym if ndx == 0 || format == :abnf),
|
162
|
+
assign: assign,
|
163
|
+
formatted: formatted)
|
164
|
+
end
|
165
|
+
if format == :isoebnf
|
166
|
+
lines << OpenStruct.new(assign: ';')
|
167
|
+
end
|
168
|
+
lines
|
169
|
+
else
|
170
|
+
OpenStruct.new(id: ("[#{rule.id}]" if rule.id),
|
171
|
+
sym: rule.sym,
|
172
|
+
assign: (format == :ebnf ? '::=' : '='),
|
173
|
+
formatted: (formatted_expr + (format == :isoebnf ? ' ;' : '')))
|
174
|
+
end
|
175
|
+
end
|
176
|
+
end.flatten
|
177
|
+
out.write eruby.evaluate(format: format, rules: formatted_rules)
|
82
178
|
return
|
83
179
|
rescue LoadError
|
84
|
-
$stderr.puts "Generating HTML requires
|
180
|
+
$stderr.puts "Generating HTML requires erubis and htmlentities gems to be loaded"
|
85
181
|
end
|
86
182
|
end
|
87
183
|
|
88
184
|
# Format each rule, considering the available rhs size
|
89
185
|
rules.each do |rule|
|
90
186
|
buffer = if rule.pass?
|
91
|
-
"%-#{lhs_length-2}s" % "@pass"
|
187
|
+
"\n%-#{lhs_length-2}s " % "@pass"
|
188
|
+
elsif rule.kind == :terminals
|
189
|
+
"\n%-#{lhs_length-2}s" % "@terminals"
|
92
190
|
else
|
93
191
|
lhs_fmt % {id: "[#{rule.id}]", sym: rule.sym}
|
94
192
|
end
|
95
|
-
formatted_expr =
|
96
|
-
if formatted_expr.length > rhs_length
|
97
|
-
|
193
|
+
formatted_expr = self.send(format_meth, rule.expr)
|
194
|
+
if formatted_expr.length > rhs_length && (format != :abnf || rule.alt?)
|
195
|
+
if format == :abnf
|
196
|
+
# No whitespace, use =/
|
197
|
+
self.send(format_meth, rule.expr, sep: "--rule-extensions--").
|
198
|
+
split(/\s*--rule-extensions--\s*/).each_with_index do |formatted, ndx|
|
199
|
+
if ndx > 0
|
200
|
+
buffer << "\n" + lhs_fmt.sub('= ', '=/') % {id: "[#{rule.id}]", sym: rule.sym}
|
201
|
+
end
|
202
|
+
buffer << formatted.sub(/\s*\/\s*/, '')
|
203
|
+
end
|
204
|
+
else
|
205
|
+
# Space out past "= "
|
206
|
+
buffer << self.send(format_meth, rule.expr, sep: ("\n" + " " * (lhs_length + (rule.alt? ? 2 : 4) - (format == :ebnf ? 0 : 2))))
|
207
|
+
buffer << ("\n" + " " * (lhs_length) + ';') if format == :isoebnf
|
208
|
+
end
|
98
209
|
else
|
99
|
-
buffer << formatted_expr
|
210
|
+
buffer << formatted_expr + (format == :isoebnf ? ' ;' : '')
|
100
211
|
end
|
212
|
+
buffer << "\n\n" if [:terminals, :pass].include?(rule.kind)
|
101
213
|
out.puts(buffer)
|
102
214
|
end
|
103
215
|
end
|
104
216
|
|
105
217
|
protected
|
218
|
+
|
219
|
+
##
|
220
|
+
# W3C EBNF Formatters
|
221
|
+
##
|
222
|
+
|
106
223
|
# Format the expression part of a rule
|
107
|
-
def
|
108
|
-
return (@options[:html] ? %(<a href="#grammar-production-#{expr}">#{expr}</a>) : expr.to_s) if expr.is_a?(Symbol)
|
224
|
+
def format_ebnf(expr, sep: nil, embedded: false)
|
225
|
+
return (@options[:html] ? %(<a href="#grammar-production-#{@coder.encode expr}">#{@coder.encode expr}</a>) : expr.to_s) if expr.is_a?(Symbol)
|
109
226
|
if expr.is_a?(String)
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
return (@options[:html] ? %(<code class="grammar-char-escape">#{expr}</code>) : expr)
|
114
|
-
elsif expr =~ /"/
|
115
|
-
return (@options[:html] ? %('<code class="grammar-literal">#{escape(expr, "'")}</code>') : %('#{escape(expr, "'")}'))
|
116
|
-
else
|
117
|
-
return (@options[:html] ? %("<code class="grammar-literal">#{escape(expr, '"')}</code>") : %("#{escape(expr, '"')}"))
|
118
|
-
end
|
227
|
+
return expr.length == 1 ?
|
228
|
+
format_ebnf_char(expr) :
|
229
|
+
format_ebnf_string(expr, expr.include?('"') ? "'" : '"')
|
119
230
|
end
|
120
231
|
parts = {
|
121
232
|
alt: (@options[:html] ? "<code>|</code> " : "| "),
|
@@ -128,95 +239,466 @@ module EBNF
|
|
128
239
|
rparen = (@options[:html] ? "<code>)</code> " : ")")
|
129
240
|
|
130
241
|
case expr.first
|
242
|
+
when :istr
|
243
|
+
# Looses fidelity, but, oh well ...
|
244
|
+
format_ebnf(expr.last, embedded: true)
|
131
245
|
when :alt, :diff
|
132
246
|
this_sep = (sep ? sep : " ") + parts[expr.first.to_sym]
|
133
|
-
expr[1..-1].map {|e|
|
247
|
+
res = expr[1..-1].map {|e| format_ebnf(e, embedded: true)}.join(this_sep)
|
248
|
+
embedded ? (lparen + res + rparen) : res
|
134
249
|
when :star, :plus, :opt
|
135
|
-
raise "Expected star expression to have a single operand" unless expr.length == 2
|
136
250
|
char = parts[expr.first.to_sym]
|
137
|
-
r =
|
138
|
-
|
251
|
+
r = format_ebnf(expr[1], embedded: true)
|
252
|
+
"#{r}#{char}"
|
139
253
|
when :hex
|
140
|
-
(
|
254
|
+
escape_ebnf_hex(expr.last[2..-1].hex.chr(Encoding::UTF_8))
|
141
255
|
when :range
|
142
|
-
|
256
|
+
format_ebnf_range(expr.last)
|
143
257
|
when :seq
|
144
258
|
this_sep = (sep ? sep : " ")
|
145
|
-
expr[1..-1].map
|
259
|
+
res = expr[1..-1].map do |e|
|
260
|
+
format_ebnf(e, embedded: true)
|
261
|
+
end.join(this_sep)
|
262
|
+
embedded ? (lparen + res + rparen) : res
|
263
|
+
when :rept
|
264
|
+
# Expand repetition
|
265
|
+
min, max, value = expr[1..-1]
|
266
|
+
if min == 0 && max == 1
|
267
|
+
format_ebnf([:opt, value], sep: sep, embedded: embedded)
|
268
|
+
elsif min == 0 && max == '*'
|
269
|
+
format_ebnf([:star, value], sep: sep, embedded: embedded)
|
270
|
+
elsif min == 1 && max == '*'
|
271
|
+
format_ebnf([:plus, value], sep: sep, embedded: embedded)
|
272
|
+
else
|
273
|
+
val2 = [:seq]
|
274
|
+
while min > 0
|
275
|
+
val2 << value
|
276
|
+
min -= 1
|
277
|
+
max -= 1 unless max == '*'
|
278
|
+
end
|
279
|
+
if max == '*'
|
280
|
+
val2 << [:star, value]
|
281
|
+
else
|
282
|
+
opt = nil
|
283
|
+
while max > 0
|
284
|
+
opt = [:opt, opt ? [:seq, value, opt] : value]
|
285
|
+
max -= 1
|
286
|
+
end
|
287
|
+
val2 << opt if opt
|
288
|
+
end
|
289
|
+
format_ebnf(val2, sep: sep, embedded: embedded)
|
290
|
+
end
|
146
291
|
else
|
147
292
|
raise "Unknown operator: #{expr.first}"
|
148
293
|
end
|
149
294
|
end
|
150
295
|
|
151
296
|
# Format a single-character string, prefering hex for non-main ASCII
|
152
|
-
def
|
297
|
+
def format_ebnf_char(c)
|
153
298
|
case c.ord
|
154
|
-
when
|
155
|
-
when
|
156
|
-
|
299
|
+
when (0x21) then (@options[:html] ? %("<code class="grammar-literal">#{@coder.encode c}</code>") : %{"#{c}"})
|
300
|
+
when 0x22 then (@options[:html] ? %('<code class="grammar-literal">"</code>') : %{'"'})
|
301
|
+
when (0x23..0x7e) then (@options[:html] ? %("<code class="grammar-literal">#{@coder.encode c}</code>") : %{"#{c}"})
|
302
|
+
when (0x80..0xFFFD) then (@options[:html] ? %("<code class="grammar-literal">#{@coder.encode c}</code>") : %{"#{c}"})
|
303
|
+
else escape_ebnf_hex(c)
|
157
304
|
end
|
158
305
|
end
|
159
306
|
|
160
307
|
# Format a range
|
161
|
-
def
|
308
|
+
def format_ebnf_range(string)
|
162
309
|
lbrac = (@options[:html] ? "<code>[</code> " : "[")
|
163
310
|
rbrac = (@options[:html] ? "<code>]</code> " : "]")
|
164
|
-
dash = (@options[:html] ? "<code>-</code> " : "-")
|
165
311
|
|
166
312
|
buffer = lbrac
|
167
313
|
s = StringScanner.new(string)
|
168
314
|
while !s.eos?
|
169
315
|
case
|
170
316
|
when s.scan(/\A[!"\u0024-\u007e]+/)
|
171
|
-
buffer << (@options[:html] ? %(<code class="grammar-literal">#{s.matched}</code>) : s.matched)
|
317
|
+
buffer << (@options[:html] ? %(<code class="grammar-literal">#{@coder.encode s.matched}</code>) : s.matched)
|
172
318
|
when s.scan(/\A#x\h+/)
|
173
|
-
buffer << (
|
174
|
-
when s.scan(/\A-/)
|
175
|
-
buffer << dash
|
319
|
+
buffer << escape_ebnf_hex(s.matched[2..-1].hex.chr(Encoding::UTF_8))
|
176
320
|
else
|
177
|
-
buffer << (
|
321
|
+
buffer << escape_ebnf_hex(s.getch)
|
178
322
|
end
|
179
323
|
end
|
180
324
|
buffer + rbrac
|
181
325
|
end
|
182
326
|
|
183
327
|
# Escape a string, using as many UTF-8 characters as possible
|
184
|
-
def
|
185
|
-
buffer = ""
|
328
|
+
def format_ebnf_string(string, quote = '"')
|
186
329
|
string.each_char do |c|
|
187
|
-
|
188
|
-
when
|
189
|
-
|
190
|
-
|
330
|
+
case c.ord
|
331
|
+
when 0x00..0x19, quote.ord
|
332
|
+
raise RangeError, "cannot format #{string.inspect} as an EBNF String: #{c.inspect} is out of range" unless
|
333
|
+
ISOEBNF::TERMINAL_CHARACTER.match?(c)
|
191
334
|
end
|
192
335
|
end
|
193
|
-
|
336
|
+
|
337
|
+
res = "#{quote}#{string}#{quote}"
|
338
|
+
@options[:html] ? @coder.encode(res) : res
|
194
339
|
end
|
195
340
|
|
196
|
-
def
|
341
|
+
def escape_ebnf_hex(u)
|
197
342
|
fmt = case u.ord
|
343
|
+
when 0x00..0x20 then "#x%02X"
|
198
344
|
when 0x0000..0x00ff then "#x%02X"
|
199
345
|
when 0x0100..0xffff then "#x%04X"
|
200
346
|
else "#x%08X"
|
201
347
|
end
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
348
|
+
char = fmt % u.ord
|
349
|
+
if @options[:html]
|
350
|
+
if u.ord <= 0x20
|
351
|
+
char = %(<abbr title="#{ASCII_ESCAPE_NAMES[u.ord]}">#{@coder.encode char}</abbr>)
|
352
|
+
elsif u.ord < 0x7F
|
353
|
+
char = %(<abbr title="ascii '#{@coder.encode u}'">#{@coder.encode char}</abbr>)
|
354
|
+
elsif u.ord == 0x7F
|
355
|
+
char = %(<abbr title="delete">#{@coder.encode char}</abbr>)
|
356
|
+
elsif u.ord <= 0xFF
|
357
|
+
char = %(<abbr title="extended ascii '#{u}'">#{char}</abbr>)
|
358
|
+
else
|
359
|
+
char = %(<abbr title="unicode '#{u}'">#{char}</abbr>)
|
360
|
+
end
|
361
|
+
%(<code class="grammar-char-escape">#{char}</code>)
|
362
|
+
else
|
363
|
+
char
|
364
|
+
end
|
365
|
+
end
|
366
|
+
|
367
|
+
##
|
368
|
+
# ABNF Formatters
|
369
|
+
##
|
370
|
+
|
371
|
+
# Format the expression part of a rule
|
372
|
+
def format_abnf(expr, sep: nil, embedded: false, sensitive: true)
|
373
|
+
return (@options[:html] ? %(<a href="#grammar-production-#{@coder.encode expr}">#{@coder.encode expr}</a>) : expr.to_s) if expr.is_a?(Symbol)
|
374
|
+
if expr.is_a?(String)
|
375
|
+
if expr.length == 1
|
376
|
+
return format_abnf_char(expr)
|
377
|
+
elsif expr.start_with?('%')
|
378
|
+
# Already encoded
|
379
|
+
return expr
|
380
|
+
elsif expr =~ /"/
|
381
|
+
# Split into segments
|
382
|
+
segments = expr.split('"')
|
383
|
+
|
384
|
+
return format_abnf_char(expr) if segments.empty?
|
385
|
+
|
386
|
+
seq = segments.inject([]) {|memo, s| memo.concat([[:hex, "#x22"], s])}[1..-1]
|
387
|
+
seq.unshift(:seq)
|
388
|
+
return format_abnf(seq, sep: nil, embedded: false)
|
389
|
+
else
|
390
|
+
return (@options[:html] ? %("<code class="grammar-literal">#{'%s' if sensitive}#{@coder.encode expr}</code>") : %(#{'%s' if sensitive}"#{expr}"))
|
391
|
+
end
|
392
|
+
end
|
393
|
+
parts = {
|
394
|
+
alt: (@options[:html] ? "<code>/</code> " : "/ "),
|
395
|
+
star: (@options[:html] ? "<code>*</code> " : "*"),
|
396
|
+
plus: (@options[:html] ? "<code>+</code> " : "1*"),
|
397
|
+
opt: (@options[:html] ? "<code>?</code> " : "?")
|
398
|
+
}
|
399
|
+
lbrac = (@options[:html] ? "<code>[</code> " : "[")
|
400
|
+
rbrac = (@options[:html] ? "<code>]</code> " : "]")
|
401
|
+
lparen = (@options[:html] ? "<code>(</code> " : "(")
|
402
|
+
rparen = (@options[:html] ? "<code>)</code> " : ")")
|
403
|
+
|
404
|
+
case expr.first
|
405
|
+
when :istr
|
406
|
+
# FIXME: if string part is segmented, need to do something different
|
407
|
+
format_abnf(expr.last, embedded: true, sensitive: false)
|
408
|
+
when :alt
|
409
|
+
this_sep = (sep ? sep : " ") + parts[expr.first.to_sym]
|
410
|
+
res = expr[1..-1].map {|e| format_abnf(e, embedded: true)}.join(this_sep)
|
411
|
+
embedded ? (lparen + res + rparen) : res
|
412
|
+
when :diff
|
413
|
+
raise RangeError, "ABNF does not support the diff operator"
|
414
|
+
when :opt
|
415
|
+
char = parts[expr.first.to_sym]
|
416
|
+
r = format_abnf(expr[1], embedded: true)
|
417
|
+
"#{lbrac}#{r}#{rbrac}"
|
418
|
+
when :plus, :star
|
419
|
+
char = parts[expr.first.to_sym]
|
420
|
+
r = format_abnf(expr[1], embedded: true)
|
421
|
+
"#{char}#{r}"
|
422
|
+
when :hex
|
423
|
+
escape_abnf_hex(expr.last[2..-1].hex.chr)
|
424
|
+
when :range
|
425
|
+
# Returns an [:alt] or [:not [:alt]] if composed of multiple sequences
|
426
|
+
# Note: ABNF does not support the `not` operator
|
427
|
+
res = format_abnf_range(expr.last)
|
428
|
+
res.is_a?(Array) ?
|
429
|
+
format_abnf(res, embedded: true) :
|
430
|
+
res
|
431
|
+
when :seq
|
432
|
+
this_sep = (sep ? sep : " ")
|
433
|
+
res = expr[1..-1].map do |e|
|
434
|
+
format_abnf(e, embedded: true)
|
435
|
+
end.join(this_sep)
|
436
|
+
embedded ? (lparen + res + rparen) : res
|
437
|
+
when :rept
|
438
|
+
# Expand repetition
|
439
|
+
min, max, value = expr[1..-1]
|
440
|
+
r = format_abnf(value, embedded: true)
|
441
|
+
if min == max
|
442
|
+
"#{min}#{r}"
|
443
|
+
elsif min == 0 && max == '*'
|
444
|
+
"#{parts[:star]}#{r}"
|
445
|
+
elsif min > 0 && max == '*'
|
446
|
+
"#{min}#{parts[:star]}#{r}"
|
447
|
+
else
|
448
|
+
"#{min}#{parts[:star]}#{max}#{r}"
|
449
|
+
end
|
450
|
+
else
|
451
|
+
raise "Unknown operator: #{expr.first}"
|
452
|
+
end
|
453
|
+
end
|
454
|
+
|
455
|
+
# Format a single-character string, prefering hex for non-main ASCII
|
456
|
+
def format_abnf_char(c)
|
457
|
+
if /[\x20-\x21\x23-\x7E]/.match?(c)
|
458
|
+
c.inspect
|
459
|
+
else
|
460
|
+
escape_abnf_hex(c)
|
461
|
+
end
|
462
|
+
end
|
463
|
+
|
464
|
+
# Format a range
|
465
|
+
#
|
466
|
+
# Presumes range has already been validated
|
467
|
+
def format_abnf_range(string)
|
468
|
+
alt, o_dash = [:alt], false
|
469
|
+
|
470
|
+
raise RangeError, "cannot format #{string.inspect} an ABNF range" if string.start_with?('^')
|
471
|
+
|
472
|
+
if string.end_with?('-')
|
473
|
+
o_dash = true
|
474
|
+
string = string[0..-2]
|
475
|
+
end
|
476
|
+
|
477
|
+
scanner = StringScanner.new(string)
|
478
|
+
hexes, deces = [], []
|
479
|
+
in_range = false
|
480
|
+
# Build op (alt) from different ranges/enums
|
481
|
+
while !scanner.eos?
|
482
|
+
if hex = scanner.scan(Terminals::HEX)
|
483
|
+
# Append any decimal values
|
484
|
+
alt << "%d" + deces.join(".") unless deces.empty?
|
485
|
+
deces = []
|
486
|
+
|
487
|
+
if in_range
|
488
|
+
# Add "." sequences for any previous hexes
|
489
|
+
alt << "%x" + hexes[0..-2].join(".") if hexes.length > 1
|
490
|
+
alt << "%x#{hexes.last}-#{hex[2..-1]}"
|
491
|
+
in_range, hexes = false, []
|
492
|
+
else
|
493
|
+
hexes << hex[2..-1]
|
494
|
+
end
|
495
|
+
elsif dec = scanner.scan(Terminals::R_CHAR)
|
496
|
+
# Append any hexadecimal values
|
497
|
+
alt << "%x" + hexes.join(".") unless hexes.empty?
|
498
|
+
hexes = []
|
499
|
+
|
500
|
+
if in_range
|
501
|
+
# Add "." sequences for any previous hexes
|
502
|
+
alt << "%d" + deces[0..-2].join(".") if deces.length > 1
|
503
|
+
alt << "%d#{deces.last}-#{dec.codepoints.first}"
|
504
|
+
in_range, deces = false, []
|
505
|
+
else
|
506
|
+
deces << dec.codepoints.first.to_s
|
507
|
+
end
|
508
|
+
end
|
509
|
+
|
510
|
+
in_range = true if scanner.scan(/\-/)
|
511
|
+
end
|
512
|
+
|
513
|
+
deces << '45' if o_dash
|
514
|
+
|
515
|
+
# Append hexes and deces as "." sequences (should be only one)
|
516
|
+
alt << "%d" + deces.join(".") unless deces.empty?
|
517
|
+
alt << "%x" + hexes.join(".") unless hexes.empty?
|
518
|
+
|
519
|
+
# FIXME: HTML abbreviations?
|
520
|
+
if alt.length == 2
|
521
|
+
# Just return the range or enum
|
522
|
+
alt.last
|
523
|
+
else
|
524
|
+
# Return the alt, which will be further formatted
|
525
|
+
alt
|
526
|
+
end
|
527
|
+
end
|
528
|
+
|
529
|
+
def escape_abnf_hex(u)
|
530
|
+
fmt = case u.ord
|
531
|
+
when 0x0000..0x00ff then "%02X"
|
532
|
+
when 0x0100..0xffff then "%04X"
|
533
|
+
else "%08X"
|
534
|
+
end
|
535
|
+
char = "%x" + (fmt % u.ord)
|
536
|
+
if @options[:html]
|
537
|
+
if u.ord <= 0x20
|
538
|
+
char = %(<abbr title="#{ASCII_ESCAPE_NAMES[u.ord]}">#{@coder.encode char}</abbr>)
|
539
|
+
elsif u.ord <= 0x7F
|
540
|
+
char = %(<abbr title="ascii '#{u}'">#{@coder.encode char}</abbr>)
|
541
|
+
elsif u.ord == 0x7F
|
542
|
+
char = %(<abbr title="delete">#{@coder.encode char}</abbr>)
|
543
|
+
elsif u.ord <= 0xFF
|
544
|
+
char = %(<abbr title="extended ascii '#{u}'">#{char}</abbr>)
|
545
|
+
else
|
546
|
+
char = %(<abbr title="unicode '#{u}'">#{char}</abbr>)
|
547
|
+
end
|
548
|
+
%(<code class="grammar-char-escape">#{char}</code>)
|
549
|
+
else
|
550
|
+
char
|
551
|
+
end
|
552
|
+
end
|
553
|
+
|
554
|
+
##
|
555
|
+
# ISO EBNF Formatters
|
556
|
+
##
|
557
|
+
|
558
|
+
# Format the expression part of a rule
|
559
|
+
def format_isoebnf(expr, sep: nil, embedded: false)
|
560
|
+
return (@options[:html] ? %(<a href="#grammar-production-#{@coder.encode expr}">#{@coder.encode expr}</a>) : expr.to_s) if expr.is_a?(Symbol)
|
561
|
+
if expr.is_a?(String)
|
562
|
+
expr = expr[2..-1].hex.chr if expr =~ /\A#x\h+/
|
563
|
+
expr.chars.each do |c|
|
564
|
+
raise RangeError, "cannot format #{expr.inspect} as an ISO EBNF String: #{c.inspect} is out of range" unless
|
565
|
+
ISOEBNF::TERMINAL_CHARACTER.match?(c)
|
566
|
+
end
|
567
|
+
if expr =~ /"/
|
568
|
+
return (@options[:html] ? %('<code class="grammar-literal">#{@coder.encode expr}</code>') : %('#{expr}'))
|
569
|
+
else
|
570
|
+
return (@options[:html] ? %("<code class="grammar-literal">#{@coder.encode expr}</code>") : %("#{expr}"))
|
571
|
+
end
|
572
|
+
end
|
573
|
+
parts = {
|
574
|
+
alt: (@options[:html] ? "<code>|</code> " : "| "),
|
575
|
+
diff: (@options[:html] ? "<code>-</code> " : "- "),
|
576
|
+
}
|
577
|
+
lparen = (@options[:html] ? "<code>(</code> " : "(")
|
578
|
+
rparen = (@options[:html] ? "<code>)</code> " : ")")
|
579
|
+
|
580
|
+
case expr.first
|
581
|
+
when :istr
|
582
|
+
# Looses fidelity, but, oh well ...
|
583
|
+
format_isoebnf(expr.last, embedded: true)
|
584
|
+
when :alt, :diff
|
585
|
+
this_sep = (sep ? sep : " ") + parts[expr.first.to_sym]
|
586
|
+
res = expr[1..-1].map {|e| format_isoebnf(e, embedded: true)}.join(this_sep)
|
587
|
+
embedded ? (lparen + res + rparen) : res
|
588
|
+
when :opt
|
589
|
+
r = format_isoebnf(expr[1], embedded: true)
|
590
|
+
"[#{r}]"
|
591
|
+
when :star
|
592
|
+
r = format_isoebnf(expr[1], embedded: true)
|
593
|
+
"{#{r}}"
|
594
|
+
when :plus
|
595
|
+
r = format_isoebnf(expr[1], embedded: true)
|
596
|
+
"#{r}, {#{r}}"
|
597
|
+
when :hex
|
598
|
+
format_isoebnf(expr[1], embedded: true)
|
599
|
+
when :range
|
600
|
+
res = format_isoebnf_range(expr.last)
|
601
|
+
res.is_a?(Array) ?
|
602
|
+
format_isoebnf(res, embedded: true) :
|
603
|
+
res
|
604
|
+
when :seq
|
605
|
+
this_sep = "," + (sep ? sep : " ")
|
606
|
+
res = expr[1..-1].map do |e|
|
607
|
+
format_isoebnf(e, embedded: true)
|
608
|
+
end.join(this_sep)
|
609
|
+
embedded ? (lparen + res + rparen) : res
|
610
|
+
when :rept
|
611
|
+
# Expand repetition
|
612
|
+
min, max, value = expr[1..-1]
|
613
|
+
if min == 0 && max == 1
|
614
|
+
format_isoebnf([:opt, value], sep: sep, embedded: embedded)
|
615
|
+
elsif min == 0 && max == '*'
|
616
|
+
format_isoebnf([:star, value], sep: sep, embedded: embedded)
|
617
|
+
elsif min == 1 && max == '*'
|
618
|
+
format_isoebnf([:plus, value], sep: sep, embedded: embedded)
|
619
|
+
else
|
620
|
+
val2 = [:seq]
|
621
|
+
while min > 0
|
622
|
+
val2 << value
|
623
|
+
min -= 1
|
624
|
+
max -= 1 unless max == '*'
|
625
|
+
end
|
626
|
+
if max == '*'
|
627
|
+
val2 << [:star, value]
|
628
|
+
else
|
629
|
+
opt = nil
|
630
|
+
while max > 0
|
631
|
+
opt = [:opt, opt ? [:seq, value, opt] : value]
|
632
|
+
max -= 1
|
633
|
+
end
|
634
|
+
val2 << opt if opt
|
635
|
+
end
|
636
|
+
format_isoebnf(val2, sep: sep, embedded: embedded)
|
637
|
+
end
|
638
|
+
else
|
639
|
+
raise "Unknown operator: #{expr.first}"
|
640
|
+
end
|
641
|
+
end
|
642
|
+
|
643
|
+
# Format a range
|
644
|
+
# Range is formatted as a aliteration of characters
|
645
|
+
def format_isoebnf_range(string)
|
646
|
+
chars = []
|
647
|
+
o_dash = false
|
648
|
+
|
649
|
+
raise RangeError, "cannot format #{string.inspect} an ABNF range" if string.start_with?('^')
|
650
|
+
|
651
|
+
if string.end_with?('-')
|
652
|
+
o_dash = true
|
653
|
+
string = string[0..-2]
|
654
|
+
end
|
655
|
+
|
656
|
+
scanner = StringScanner.new(string)
|
657
|
+
in_range = false
|
658
|
+
# Build chars from different ranges/enums
|
659
|
+
while !scanner.eos?
|
660
|
+
char = if hex = scanner.scan(Terminals::HEX)
|
661
|
+
hex[2..-1].hex.ord.char(Encoding::UTF_8)
|
662
|
+
else scanner.scan(Terminals::R_CHAR)
|
663
|
+
end
|
664
|
+
raise RangeError, "cannot format #{string.inspect} as an ISO EBNF Aliteration: #{char.inspect} is out of range" unless
|
665
|
+
char && ISOEBNF::TERMINAL_CHARACTER.match?(char)
|
666
|
+
|
667
|
+
if in_range
|
668
|
+
# calculate characters from chars.last to this char
|
669
|
+
raise RangeError, "cannot format #{string.inspect} as an ISO EBNF Aliteration" unless chars.last < char
|
670
|
+
chars.concat (chars.last..char).to_a[1..-1]
|
671
|
+
in_range = false
|
672
|
+
else
|
673
|
+
chars << char
|
674
|
+
end
|
675
|
+
|
676
|
+
in_range = true if scanner.scan(/\-/)
|
677
|
+
end
|
678
|
+
|
679
|
+
chars << '-' if o_dash
|
680
|
+
|
681
|
+
# Possibly only a single character (no character?)
|
682
|
+
chars.length == 1 ? chars.last.inspect : chars.unshift(:alt)
|
683
|
+
end
|
684
|
+
|
685
|
+
ERB_DESC = %q(
|
686
|
+
<table class="grammar">
|
687
|
+
<tbody id="grammar-productions" class="<%= @format %>">
|
688
|
+
<% for rule in @rules %>
|
689
|
+
<tr<%= %{ id="grammar-production-#{rule.sym}"} unless %w(=/ |).include?(rule.assign)%>>
|
690
|
+
<% if rule.id %>
|
691
|
+
<td<%= " colspan=2" unless rule.sym %>><%= rule.id %></td>
|
692
|
+
<% end %>
|
693
|
+
<% if rule.sym %>
|
694
|
+
<td><code><%== rule.sym %></code></td>
|
695
|
+
<% end %>
|
696
|
+
<td><%= rule.assign %></td>
|
697
|
+
<td><%= rule.formatted %></td>
|
698
|
+
</tr>
|
699
|
+
<% end %>
|
700
|
+
</tbody>
|
701
|
+
</table>
|
220
702
|
).gsub(/^ /, '')
|
221
703
|
end
|
222
704
|
end
|