ebnf 1.1.2 → 2.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/README.md +218 -196
- data/UNLICENSE +1 -1
- data/VERSION +1 -1
- data/bin/ebnf +40 -21
- data/etc/abnf-core.ebnf +52 -0
- data/etc/abnf.abnf +121 -0
- data/etc/abnf.ebnf +124 -0
- data/etc/abnf.sxp +45 -0
- data/etc/doap.ttl +23 -15
- data/etc/ebnf.ebnf +21 -33
- data/etc/ebnf.html +171 -160
- data/etc/{ebnf.rb → ebnf.ll1.rb} +30 -107
- data/etc/ebnf.ll1.sxp +182 -183
- data/etc/ebnf.peg.rb +90 -0
- data/etc/ebnf.peg.sxp +84 -0
- data/etc/ebnf.sxp +40 -41
- data/etc/iso-ebnf.ebnf +140 -0
- data/etc/iso-ebnf.isoebnf +138 -0
- data/etc/iso-ebnf.sxp +65 -0
- data/etc/sparql.ebnf +4 -4
- data/etc/sparql.html +1603 -1751
- data/etc/sparql.ll1.sxp +7372 -7372
- data/etc/sparql.peg.rb +532 -0
- data/etc/sparql.peg.sxp +597 -0
- data/etc/sparql.sxp +363 -362
- data/etc/turtle.ebnf +3 -3
- data/etc/turtle.html +465 -517
- data/etc/{turtle.rb → turtle.ll1.rb} +3 -4
- data/etc/turtle.ll1.sxp +425 -425
- data/etc/turtle.peg.rb +182 -0
- data/etc/turtle.peg.sxp +199 -0
- data/etc/turtle.sxp +103 -101
- data/lib/ebnf.rb +7 -2
- data/lib/ebnf/abnf.rb +301 -0
- data/lib/ebnf/abnf/core.rb +23 -0
- data/lib/ebnf/abnf/meta.rb +111 -0
- data/lib/ebnf/base.rb +128 -87
- data/lib/ebnf/bnf.rb +1 -26
- data/lib/ebnf/ebnf/meta.rb +90 -0
- data/lib/ebnf/isoebnf.rb +229 -0
- data/lib/ebnf/isoebnf/meta.rb +75 -0
- data/lib/ebnf/ll1.rb +140 -8
- data/lib/ebnf/ll1/lexer.rb +37 -32
- data/lib/ebnf/ll1/parser.rb +113 -73
- data/lib/ebnf/ll1/scanner.rb +83 -51
- data/lib/ebnf/native.rb +320 -0
- data/lib/ebnf/parser.rb +285 -302
- data/lib/ebnf/peg.rb +39 -0
- data/lib/ebnf/peg/parser.rb +561 -0
- data/lib/ebnf/peg/rule.rb +241 -0
- data/lib/ebnf/rule.rb +453 -163
- data/lib/ebnf/terminals.rb +21 -0
- data/lib/ebnf/writer.rb +561 -88
- metadata +114 -28
- data/etc/sparql.rb +0 -45773
@@ -0,0 +1,21 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
# Terminal definitions for the EBNF grammar
|
3
|
+
module EBNF::Terminals
|
4
|
+
SYMBOL_BASE = %r(\b[a-zA-Z0-9_\.]+\b)u.freeze
|
5
|
+
SYMBOL = %r(#{SYMBOL_BASE}(?!\s*::=))u.freeze
|
6
|
+
HEX = %r(\#x\h+)u.freeze
|
7
|
+
CHAR = %r([\u0009\u000A\u000D\u0020-\uD7FF\u{10000}-\u{10FFFF}])u.freeze
|
8
|
+
R_CHAR = %r([\u0009\u000A\u000D\u0020-\u002C\u002E-\u005C\u005E-\uD7FF\u{10000}-\u{10FFFF}])u.freeze
|
9
|
+
RANGE = %r(\[(?:(?:#{R_CHAR}\-#{R_CHAR})|(?:#{HEX}\-#{HEX})|#{R_CHAR}|#{HEX})+-?\](?!\s+#{SYMBOL_BASE}\s*::=))u.freeze
|
10
|
+
LHS = %r((?:\[#{SYMBOL_BASE}\])?\s*#{SYMBOL_BASE}\s*::=)u.freeze
|
11
|
+
O_RANGE = %r(\[\^(?:(?:#{R_CHAR}\-#{R_CHAR})|(?:#{HEX}\-#{HEX}|#{R_CHAR}|#{HEX}))+-?\])u.freeze
|
12
|
+
STRING1 = %r("[\u0009\u000A\u000D\u0020\u0021\u0023-\uD7FF\u{10000}-\u{10FFFF}]*")u.freeze
|
13
|
+
STRING2 = %r('[\u0009\u000A\u000D\u0020-\u0026\u0028-\uD7FF\u{10000}-\u{10FFFF}]*')u.freeze
|
14
|
+
POSTFIX = %r([?*+])u.freeze
|
15
|
+
PASS = %r((
|
16
|
+
\s
|
17
|
+
| (?:(?:\#[^x]|//)[^\n\r]*)
|
18
|
+
| (?:/\*(?:(?:\*[^/])|[^*])*\*/)
|
19
|
+
| (?:\(\*(?:(?:\*[^\)])|[^*])*\*\))
|
20
|
+
)+)xmu.freeze
|
21
|
+
end
|
data/lib/ebnf/writer.rb
CHANGED
@@ -1,22 +1,62 @@
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
2
2
|
require 'rdf'
|
3
3
|
require 'strscan' unless defined?(StringScanner)
|
4
|
+
require "ostruct"
|
4
5
|
|
5
6
|
##
|
6
7
|
# Serialize ruleset back to EBNF
|
7
8
|
module EBNF
|
8
9
|
class Writer
|
9
10
|
LINE_LENGTH = 80
|
11
|
+
LINE_LENGTH_HTML = 200
|
12
|
+
|
13
|
+
# ASCII escape names
|
14
|
+
ASCII_ESCAPE_NAMES = [
|
15
|
+
"null", #x00
|
16
|
+
"start of heading", #x01
|
17
|
+
"start of text", #x02
|
18
|
+
"end of text", #x03
|
19
|
+
"end of transmission", #x04
|
20
|
+
"enquiry", #x05
|
21
|
+
"acknowledge", #x06
|
22
|
+
"bell", #x07
|
23
|
+
"backspace", #x08
|
24
|
+
"horizontal tab", #x09
|
25
|
+
"new line", #x0A
|
26
|
+
"vertical tab", #x0B
|
27
|
+
"form feed", #x0C
|
28
|
+
"carriage return", #x0D
|
29
|
+
"shift out", #x0E
|
30
|
+
"shift in", #x0F
|
31
|
+
"data link escape", #x10
|
32
|
+
"device control 1", #x11
|
33
|
+
"device control 2", #x12
|
34
|
+
"device control 3", #x13
|
35
|
+
"device control 4", #x14
|
36
|
+
"negative acknowledge", #x15
|
37
|
+
"synchronous idle", #x16
|
38
|
+
"end of trans. block", #x17
|
39
|
+
"cancel", #x18
|
40
|
+
"end of medium", #x19
|
41
|
+
"substitute", #x1A
|
42
|
+
"escape", #x1B
|
43
|
+
"file separator", #x1C
|
44
|
+
"group separator", #x1D
|
45
|
+
"record separator", #x1E
|
46
|
+
"unit separator", #x1F
|
47
|
+
"space" #x20
|
48
|
+
]
|
10
49
|
|
11
50
|
##
|
12
51
|
# Format rules to a String
|
13
52
|
#
|
14
53
|
# @param [Array<Rule>] rules
|
54
|
+
# @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
|
15
55
|
# @return [Object]
|
16
|
-
def self.string(*rules)
|
56
|
+
def self.string(*rules, format: :ebnf)
|
17
57
|
require 'stringio' unless defined?(StringIO)
|
18
58
|
buf = StringIO.new
|
19
|
-
write(buf, *rules)
|
59
|
+
write(buf, *rules, format: format)
|
20
60
|
buf.string
|
21
61
|
end
|
22
62
|
|
@@ -24,9 +64,10 @@ module EBNF
|
|
24
64
|
# Format rules to $stdout
|
25
65
|
#
|
26
66
|
# @param [Array<Rule>] rules
|
67
|
+
# @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
|
27
68
|
# @return [Object]
|
28
|
-
def self.print(*rules)
|
29
|
-
write($stdout, *rules)
|
69
|
+
def self.print(*rules, format: :ebnf)
|
70
|
+
write($stdout, *rules, format: format)
|
30
71
|
end
|
31
72
|
|
32
73
|
##
|
@@ -34,92 +75,155 @@ module EBNF
|
|
34
75
|
#
|
35
76
|
# @param [Object] out
|
36
77
|
# @param [Array<Rule>] rules
|
78
|
+
# @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
|
37
79
|
# @return [Object]
|
38
|
-
def self.write(out, *rules)
|
39
|
-
Writer.new(rules, out: out)
|
80
|
+
def self.write(out, *rules, format: :ebnf)
|
81
|
+
Writer.new(rules, out: out, format: format)
|
40
82
|
end
|
41
83
|
|
42
84
|
##
|
43
85
|
# Write formatted rules to an IO like object as HTML
|
44
86
|
#
|
45
87
|
# @param [Array<Rule>] rules
|
88
|
+
# @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
|
46
89
|
# @return [Object]
|
47
|
-
def self.html(*rules)
|
90
|
+
def self.html(*rules, format: :ebnf)
|
48
91
|
require 'stringio' unless defined?(StringIO)
|
49
92
|
buf = StringIO.new
|
50
|
-
Writer.new(rules, out: buf, html: true)
|
93
|
+
Writer.new(rules, out: buf, html: true, format: format)
|
51
94
|
buf.string
|
52
95
|
end
|
53
96
|
|
54
97
|
##
|
55
98
|
# @param [Array<Rule>] rules
|
56
99
|
# @param [Hash{Symbol => Object}] options
|
57
|
-
# @
|
58
|
-
# @
|
59
|
-
# @option options [
|
60
|
-
#
|
61
|
-
def initialize(rules,
|
62
|
-
@options = options.
|
63
|
-
|
64
|
-
#fmt = options.fetch(:format, :ebnf)
|
100
|
+
# @param [#write] out ($stdout)
|
101
|
+
# @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
|
102
|
+
# @option options [Symbol] format
|
103
|
+
# @option options [Boolean] html (false)
|
104
|
+
def initialize(rules, out: $stdout, html: false, format: :ebnf, **options)
|
105
|
+
@options = options.merge(html: html)
|
106
|
+
return if rules.empty?
|
65
107
|
|
66
108
|
# Determine max LHS length
|
109
|
+
format_meth = "format_#{format}".to_sym
|
67
110
|
max_id = rules.max_by {|r| r.id.to_s.length}.id.to_s.length
|
68
111
|
max_sym = rules.max_by {|r| r.sym.to_s.length}.sym.to_s.length
|
69
|
-
lhs_length = max_sym +
|
70
|
-
lhs_fmt =
|
71
|
-
|
112
|
+
lhs_length = max_sym + 1
|
113
|
+
lhs_fmt = case format
|
114
|
+
when :abnf then "%<sym>-#{max_sym}s = "
|
115
|
+
when :ebnf then "%<sym>-#{max_sym}s ::= "
|
116
|
+
when :isoebnf then "%<sym>-#{max_sym}s = "
|
117
|
+
end
|
118
|
+
if format == :ebnf && max_id > 0
|
72
119
|
lhs_fmt = "%<id>-#{max_id+2}s " + lhs_fmt
|
73
120
|
lhs_length += max_id + 3
|
74
121
|
end
|
75
|
-
rhs_length = LINE_LENGTH - lhs_length
|
122
|
+
rhs_length = (html ? LINE_LENGTH_HTML : LINE_LENGTH) - lhs_length
|
76
123
|
|
77
|
-
if
|
124
|
+
if html
|
78
125
|
# Output as formatted HTML
|
79
126
|
begin
|
80
|
-
require '
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
127
|
+
require 'erubis'
|
128
|
+
require 'htmlentities'
|
129
|
+
@coder = HTMLEntities.new
|
130
|
+
eruby = Erubis::Eruby.new(ERB_DESC)
|
131
|
+
formatted_rules = rules.map do |rule|
|
132
|
+
if rule.kind == :terminals || rule.kind == :pass
|
133
|
+
OpenStruct.new(id: ("@#{rule.kind}"),
|
134
|
+
sym: nil,
|
135
|
+
assign: nil,
|
136
|
+
formatted: ("<strong># Productions for terminals</strong>" if rule.kind == :terminals))
|
137
|
+
else
|
138
|
+
formatted_expr = self.send(format_meth, rule.expr)
|
139
|
+
# Measure text without markup
|
140
|
+
formatted_expr_text = formatted_expr.gsub(%r{</?\w+[^>]*>}, '')
|
141
|
+
if formatted_expr_text.length > rhs_length && (format != :abnf || rule.alt?)
|
142
|
+
lines = []
|
143
|
+
# Can only reasonably split apart alts
|
144
|
+
self.send(format_meth, rule.expr, sep: "--rule-extensions--").
|
145
|
+
split(/\s*--rule-extensions--\s*/).each_with_index do |formatted, ndx|
|
146
|
+
assign = case format
|
147
|
+
when :ebnf
|
148
|
+
formatted.sub!(%r{\s*<code>\|</code>\s*}, '')
|
149
|
+
(ndx > 0 ? (rule.alt? ? '|' : '') : '::=')
|
150
|
+
when :abnf
|
151
|
+
formatted.sub!(%r{\s*<code>/</code>\s*}, '')
|
152
|
+
(ndx > 0 ? '=/' : '=')
|
153
|
+
else
|
154
|
+
formatted.sub!(%r{\s*<code>\|</code>\s*}, '')
|
155
|
+
(ndx > 0 ? (rule.alt? ? '|' : '') : '=')
|
156
|
+
end
|
157
|
+
lines << OpenStruct.new(id: ((ndx == 0 ? "[#{rule.id}]" : "") if rule.id),
|
158
|
+
sym: (rule.sym if ndx == 0 || format == :abnf),
|
159
|
+
assign: assign,
|
160
|
+
formatted: formatted)
|
161
|
+
end
|
162
|
+
if format == :isoebnf
|
163
|
+
lines << OpenStruct.new(assign: ';')
|
164
|
+
end
|
165
|
+
lines
|
166
|
+
else
|
167
|
+
OpenStruct.new(id: ("[#{rule.id}]" if rule.id),
|
168
|
+
sym: rule.sym,
|
169
|
+
assign: (format == :ebnf ? '::=' : '='),
|
170
|
+
formatted: (formatted_expr + (format == :isoebnf ? ' ;' : '')))
|
171
|
+
end
|
172
|
+
end
|
173
|
+
end.flatten
|
174
|
+
out.write eruby.evaluate(format: format, rules: formatted_rules)
|
86
175
|
return
|
87
176
|
rescue LoadError
|
88
|
-
$stderr.puts "Generating HTML requires
|
177
|
+
$stderr.puts "Generating HTML requires erubis and htmlentities gems to be loaded"
|
89
178
|
end
|
90
179
|
end
|
91
180
|
|
92
181
|
# Format each rule, considering the available rhs size
|
93
182
|
rules.each do |rule|
|
94
183
|
buffer = if rule.pass?
|
95
|
-
"%-#{lhs_length-2}s" % "@pass"
|
184
|
+
"\n%-#{lhs_length-2}s " % "@pass"
|
185
|
+
elsif rule.kind == :terminals
|
186
|
+
"\n%-#{lhs_length-2}s" % "@terminals"
|
96
187
|
else
|
97
188
|
lhs_fmt % {id: "[#{rule.id}]", sym: rule.sym}
|
98
189
|
end
|
99
|
-
formatted_expr =
|
100
|
-
if formatted_expr.length > rhs_length
|
101
|
-
|
190
|
+
formatted_expr = self.send(format_meth, rule.expr)
|
191
|
+
if formatted_expr.length > rhs_length && (format != :abnf || rule.alt?)
|
192
|
+
if format == :abnf
|
193
|
+
# No whitespace, use =/
|
194
|
+
self.send(format_meth, rule.expr, sep: "--rule-extensions--").
|
195
|
+
split(/\s*--rule-extensions--\s*/).each_with_index do |formatted, ndx|
|
196
|
+
if ndx > 0
|
197
|
+
buffer << "\n" + lhs_fmt.sub('= ', '=/') % {id: "[#{rule.id}]", sym: rule.sym}
|
198
|
+
end
|
199
|
+
buffer << formatted.sub(/\s*\/\s*/, '')
|
200
|
+
end
|
201
|
+
else
|
202
|
+
# Space out past "= "
|
203
|
+
buffer << self.send(format_meth, rule.expr, sep: ("\n" + " " * (lhs_length + (rule.alt? ? 2 : 4) - (format == :ebnf ? 0 : 2))))
|
204
|
+
buffer << ("\n" + " " * (lhs_length) + ';') if format == :isoebnf
|
205
|
+
end
|
102
206
|
else
|
103
|
-
buffer << formatted_expr
|
207
|
+
buffer << formatted_expr + (format == :isoebnf ? ' ;' : '')
|
104
208
|
end
|
209
|
+
buffer << "\n\n" if [:terminals, :pass].include?(rule.kind)
|
105
210
|
out.puts(buffer)
|
106
211
|
end
|
107
212
|
end
|
108
213
|
|
109
214
|
protected
|
215
|
+
|
216
|
+
##
|
217
|
+
# W3C EBNF Formatters
|
218
|
+
##
|
219
|
+
|
110
220
|
# Format the expression part of a rule
|
111
|
-
def
|
112
|
-
return (@options[:html] ? %(<a href="#grammar-production-#{expr}">#{expr}</a>) : expr.to_s) if expr.is_a?(Symbol)
|
221
|
+
def format_ebnf(expr, sep: nil, embedded: false)
|
222
|
+
return (@options[:html] ? %(<a href="#grammar-production-#{@coder.encode expr}">#{@coder.encode expr}</a>) : expr.to_s) if expr.is_a?(Symbol)
|
113
223
|
if expr.is_a?(String)
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
return (@options[:html] ? %(<code class="grammar-char-escape">#{expr}</code>) : expr)
|
118
|
-
elsif expr =~ /"/
|
119
|
-
return (@options[:html] ? %('<code class="grammar-literal">#{escape(expr, "'")}</code>') : %('#{escape(expr, "'")}'))
|
120
|
-
else
|
121
|
-
return (@options[:html] ? %("<code class="grammar-literal">#{escape(expr, '"')}</code>") : %("#{escape(expr, '"')}"))
|
122
|
-
end
|
224
|
+
return expr.length == 1 ?
|
225
|
+
format_ebnf_char(expr) :
|
226
|
+
format_ebnf_string(expr, expr.include?('"') ? "'" : '"')
|
123
227
|
end
|
124
228
|
parts = {
|
125
229
|
alt: (@options[:html] ? "<code>|</code> " : "| "),
|
@@ -132,95 +236,464 @@ module EBNF
|
|
132
236
|
rparen = (@options[:html] ? "<code>)</code> " : ")")
|
133
237
|
|
134
238
|
case expr.first
|
239
|
+
when :istr
|
240
|
+
# Looses fidelity, but, oh well ...
|
241
|
+
format_ebnf(expr.last, embedded: true)
|
135
242
|
when :alt, :diff
|
136
243
|
this_sep = (sep ? sep : " ") + parts[expr.first.to_sym]
|
137
|
-
expr[1..-1].map {|e|
|
244
|
+
res = expr[1..-1].map {|e| format_ebnf(e, embedded: true)}.join(this_sep)
|
245
|
+
embedded ? (lparen + res + rparen) : res
|
138
246
|
when :star, :plus, :opt
|
139
|
-
raise "Expected star expression to have a single operand" unless expr.length == 2
|
140
247
|
char = parts[expr.first.to_sym]
|
141
|
-
r =
|
142
|
-
|
248
|
+
r = format_ebnf(expr[1], embedded: true)
|
249
|
+
"#{r}#{char}"
|
143
250
|
when :hex
|
144
|
-
(
|
251
|
+
escape_ebnf_hex(expr.last[2..-1].hex.chr(Encoding::UTF_8))
|
145
252
|
when :range
|
146
|
-
|
253
|
+
format_ebnf_range(expr.last)
|
147
254
|
when :seq
|
148
255
|
this_sep = (sep ? sep : " ")
|
149
|
-
expr[1..-1].map
|
256
|
+
res = expr[1..-1].map do |e|
|
257
|
+
format_ebnf(e, embedded: true)
|
258
|
+
end.join(this_sep)
|
259
|
+
embedded ? (lparen + res + rparen) : res
|
260
|
+
when :rept
|
261
|
+
# Expand repetition
|
262
|
+
min, max, value = expr[1..-1]
|
263
|
+
if min == 0 && max == 1
|
264
|
+
format_ebnf([:opt, value], sep: sep, embedded: embedded)
|
265
|
+
elsif min == 0 && max == '*'
|
266
|
+
format_ebnf([:star, value], sep: sep, embedded: embedded)
|
267
|
+
elsif min == 1 && max == '*'
|
268
|
+
format_ebnf([:plus, value], sep: sep, embedded: embedded)
|
269
|
+
else
|
270
|
+
val2 = [:seq]
|
271
|
+
while min > 0
|
272
|
+
val2 << value
|
273
|
+
min -= 1
|
274
|
+
max -= 1 unless max == '*'
|
275
|
+
end
|
276
|
+
if max == '*'
|
277
|
+
val2 << [:star, value]
|
278
|
+
else
|
279
|
+
opt = nil
|
280
|
+
while max > 0
|
281
|
+
opt = [:opt, opt ? [:seq, value, opt] : value]
|
282
|
+
max -= 1
|
283
|
+
end
|
284
|
+
val2 << opt if opt
|
285
|
+
end
|
286
|
+
format_ebnf(val2, sep: sep, embedded: embedded)
|
287
|
+
end
|
150
288
|
else
|
151
289
|
raise "Unknown operator: #{expr.first}"
|
152
290
|
end
|
153
291
|
end
|
154
292
|
|
155
293
|
# Format a single-character string, prefering hex for non-main ASCII
|
156
|
-
def
|
294
|
+
def format_ebnf_char(c)
|
157
295
|
case c.ord
|
158
|
-
when
|
159
|
-
when
|
160
|
-
|
296
|
+
when (0x21) then (@options[:html] ? %("<code class="grammar-literal">#{@coder.encode c}</code>") : %{"#{c}"})
|
297
|
+
when 0x22 then (@options[:html] ? %('<code class="grammar-literal">"</code>') : %{'"'})
|
298
|
+
when (0x23..0x7e) then (@options[:html] ? %("<code class="grammar-literal">#{@coder.encode c}</code>") : %{"#{c}"})
|
299
|
+
when (0x80..0xFFFD) then (@options[:html] ? %("<code class="grammar-literal">#{@coder.encode c}</code>") : %{"#{c}"})
|
300
|
+
else escape_ebnf_hex(c)
|
161
301
|
end
|
162
302
|
end
|
163
303
|
|
164
304
|
# Format a range
|
165
|
-
def
|
305
|
+
def format_ebnf_range(string)
|
166
306
|
lbrac = (@options[:html] ? "<code>[</code> " : "[")
|
167
307
|
rbrac = (@options[:html] ? "<code>]</code> " : "]")
|
168
|
-
dash = (@options[:html] ? "<code>-</code> " : "-")
|
169
308
|
|
170
309
|
buffer = lbrac
|
171
310
|
s = StringScanner.new(string)
|
172
311
|
while !s.eos?
|
173
312
|
case
|
174
313
|
when s.scan(/\A[!"\u0024-\u007e]+/)
|
175
|
-
buffer << (@options[:html] ? %(<code class="grammar-literal">#{s.matched}</code>) : s.matched)
|
314
|
+
buffer << (@options[:html] ? %(<code class="grammar-literal">#{@coder.encode s.matched}</code>) : s.matched)
|
176
315
|
when s.scan(/\A#x\h+/)
|
177
|
-
buffer << (
|
178
|
-
when s.scan(/\A-/)
|
179
|
-
buffer << dash
|
316
|
+
buffer << escape_ebnf_hex(s.matched[2..-1].hex.chr(Encoding::UTF_8))
|
180
317
|
else
|
181
|
-
buffer << (
|
318
|
+
buffer << escape_ebnf_hex(s.getch)
|
182
319
|
end
|
183
320
|
end
|
184
321
|
buffer + rbrac
|
185
322
|
end
|
186
323
|
|
187
324
|
# Escape a string, using as many UTF-8 characters as possible
|
188
|
-
def
|
189
|
-
buffer = ""
|
325
|
+
def format_ebnf_string(string, quote = '"')
|
190
326
|
string.each_char do |c|
|
191
|
-
|
192
|
-
when
|
193
|
-
|
194
|
-
|
327
|
+
case c.ord
|
328
|
+
when 0x00..0x19, quote.ord
|
329
|
+
raise RangeError, "cannot format #{string.inspect} as an EBNF String: #{c.inspect} is out of range" unless
|
330
|
+
ISOEBNF::TERMINAL_CHARACTER.match?(c)
|
195
331
|
end
|
196
332
|
end
|
197
|
-
|
333
|
+
|
334
|
+
res = "#{quote}#{string}#{quote}"
|
335
|
+
@options[:html] ? @coder.encode(res) : res
|
198
336
|
end
|
199
337
|
|
200
|
-
def
|
338
|
+
def escape_ebnf_hex(u)
|
201
339
|
fmt = case u.ord
|
340
|
+
when 0x00..0x20 then "#x%02X"
|
202
341
|
when 0x0000..0x00ff then "#x%02X"
|
203
342
|
when 0x0100..0xffff then "#x%04X"
|
204
343
|
else "#x%08X"
|
205
344
|
end
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
345
|
+
char = fmt % u.ord
|
346
|
+
if @options[:html]
|
347
|
+
if u.ord <= 0x20
|
348
|
+
char = %(<abbr title="#{ASCII_ESCAPE_NAMES[u.ord]}">#{@coder.encode char}</abbr>)
|
349
|
+
elsif u.ord < 0x7F
|
350
|
+
char = %(<abbr title="ascii '#{@coder.encode u}'">#{@coder.encode char}</abbr>)
|
351
|
+
elsif u.ord == 0x7F
|
352
|
+
char = %(<abbr title="delete">#{@coder.encode char}</abbr>)
|
353
|
+
elsif u.ord <= 0xFF
|
354
|
+
char = %(<abbr title="extended ascii '#{u}'">#{char}</abbr>)
|
355
|
+
else
|
356
|
+
char = %(<abbr title="unicode '#{u}'">#{char}</abbr>)
|
357
|
+
end
|
358
|
+
%(<code class="grammar-char-escape">#{char}</code>)
|
359
|
+
else
|
360
|
+
char
|
361
|
+
end
|
362
|
+
end
|
363
|
+
|
364
|
+
##
|
365
|
+
# ABNF Formatters
|
366
|
+
##
|
367
|
+
|
368
|
+
# Format the expression part of a rule
|
369
|
+
def format_abnf(expr, sep: nil, embedded: false, sensitive: true)
|
370
|
+
return (@options[:html] ? %(<a href="#grammar-production-#{@coder.encode expr}">#{@coder.encode expr}</a>) : expr.to_s) if expr.is_a?(Symbol)
|
371
|
+
if expr.is_a?(String)
|
372
|
+
if expr.length == 1
|
373
|
+
return format_abnf_char(expr)
|
374
|
+
elsif expr.start_with?('%')
|
375
|
+
# Already encoded
|
376
|
+
return expr
|
377
|
+
elsif expr =~ /"/
|
378
|
+
# Split into segments
|
379
|
+
segments = expr.split('"')
|
380
|
+
|
381
|
+
return format_abnf_char(expr) if segments.empty?
|
382
|
+
|
383
|
+
seq = segments.inject([]) {|memo, s| memo.concat([[:hex, "#x22"], s])}[1..-1]
|
384
|
+
seq.unshift(:seq)
|
385
|
+
return format_abnf(seq, sep: nil, embedded: false)
|
386
|
+
else
|
387
|
+
return (@options[:html] ? %("<code class="grammar-literal">#{'%s' if sensitive}#{@coder.encode expr}</code>") : %(#{'%s' if sensitive}"#{expr}"))
|
388
|
+
end
|
389
|
+
end
|
390
|
+
parts = {
|
391
|
+
alt: (@options[:html] ? "<code>/</code> " : "/ "),
|
392
|
+
star: (@options[:html] ? "<code>*</code> " : "*"),
|
393
|
+
plus: (@options[:html] ? "<code>+</code> " : "1*"),
|
394
|
+
opt: (@options[:html] ? "<code>?</code> " : "?")
|
395
|
+
}
|
396
|
+
lbrac = (@options[:html] ? "<code>[</code> " : "[")
|
397
|
+
rbrac = (@options[:html] ? "<code>]</code> " : "]")
|
398
|
+
lparen = (@options[:html] ? "<code>(</code> " : "(")
|
399
|
+
rparen = (@options[:html] ? "<code>)</code> " : ")")
|
400
|
+
|
401
|
+
case expr.first
|
402
|
+
when :istr
|
403
|
+
# FIXME: if string part is segmented, need to do something different
|
404
|
+
format_abnf(expr.last, embedded: true, sensitive: false)
|
405
|
+
when :alt
|
406
|
+
this_sep = (sep ? sep : " ") + parts[expr.first.to_sym]
|
407
|
+
res = expr[1..-1].map {|e| format_abnf(e, embedded: true)}.join(this_sep)
|
408
|
+
embedded ? (lparen + res + rparen) : res
|
409
|
+
when :diff
|
410
|
+
raise RangeError, "ABNF does not support the diff operator"
|
411
|
+
when :opt
|
412
|
+
char = parts[expr.first.to_sym]
|
413
|
+
r = format_abnf(expr[1], embedded: true)
|
414
|
+
"#{lbrac}#{r}#{rbrac}"
|
415
|
+
when :plus, :star
|
416
|
+
char = parts[expr.first.to_sym]
|
417
|
+
r = format_abnf(expr[1], embedded: true)
|
418
|
+
"#{char}#{r}"
|
419
|
+
when :hex
|
420
|
+
escape_abnf_hex(expr.last[2..-1].hex.chr)
|
421
|
+
when :range
|
422
|
+
# Returns an [:alt] or [:not [:alt]] if composed of multiple sequences
|
423
|
+
# Note: ABNF does not support the `not` operator
|
424
|
+
res = format_abnf_range(expr.last)
|
425
|
+
res.is_a?(Array) ?
|
426
|
+
format_abnf(res, embedded: true) :
|
427
|
+
res
|
428
|
+
when :seq
|
429
|
+
this_sep = (sep ? sep : " ")
|
430
|
+
res = expr[1..-1].map do |e|
|
431
|
+
format_abnf(e, embedded: true)
|
432
|
+
end.join(this_sep)
|
433
|
+
embedded ? (lparen + res + rparen) : res
|
434
|
+
when :rept
|
435
|
+
# Expand repetition
|
436
|
+
min, max, value = expr[1..-1]
|
437
|
+
r = format_abnf(value, embedded: true)
|
438
|
+
if min == max
|
439
|
+
"#{min}#{r}"
|
440
|
+
elsif min == 0 && max == '*'
|
441
|
+
"#{parts[:star]}#{r}"
|
442
|
+
elsif min > 0 && max == '*'
|
443
|
+
"#{min}#{parts[:star]}#{r}"
|
444
|
+
else
|
445
|
+
"#{min}#{parts[:star]}#{max}#{r}"
|
446
|
+
end
|
447
|
+
else
|
448
|
+
raise "Unknown operator: #{expr.first}"
|
449
|
+
end
|
450
|
+
end
|
451
|
+
|
452
|
+
# Format a single-character string, prefering hex for non-main ASCII
|
453
|
+
def format_abnf_char(c)
|
454
|
+
if /[\x20-\x21\x23-\x7E]/.match?(c)
|
455
|
+
c.inspect
|
456
|
+
else
|
457
|
+
escape_abnf_hex(c)
|
458
|
+
end
|
459
|
+
end
|
460
|
+
|
461
|
+
# Format a range
|
462
|
+
#
|
463
|
+
# Presumes range has already been validated
|
464
|
+
def format_abnf_range(string)
|
465
|
+
alt, o_dash = [:alt], false
|
466
|
+
|
467
|
+
raise RangeError, "cannot format #{string.inspect} an ABNF range" if string.start_with?('^')
|
468
|
+
|
469
|
+
if string.end_with?('-')
|
470
|
+
o_dash = true
|
471
|
+
string = string[0..-2]
|
472
|
+
end
|
473
|
+
|
474
|
+
scanner = StringScanner.new(string)
|
475
|
+
hexes, deces = [], []
|
476
|
+
in_range = false
|
477
|
+
# Build op (alt) from different ranges/enums
|
478
|
+
while !scanner.eos?
|
479
|
+
if hex = scanner.scan(Terminals::HEX)
|
480
|
+
# Append any decimal values
|
481
|
+
alt << "%d" + deces.join(".") unless deces.empty?
|
482
|
+
deces = []
|
483
|
+
|
484
|
+
if in_range
|
485
|
+
# Add "." sequences for any previous hexes
|
486
|
+
alt << "%x" + hexes[0..-2].join(".") if hexes.length > 1
|
487
|
+
alt << "%x#{hexes.last}-#{hex[2..-1]}"
|
488
|
+
in_range, hexes = false, []
|
489
|
+
else
|
490
|
+
hexes << hex[2..-1]
|
491
|
+
end
|
492
|
+
elsif dec = scanner.scan(Terminals::R_CHAR)
|
493
|
+
# Append any hexadecimal values
|
494
|
+
alt << "%x" + hexes.join(".") unless hexes.empty?
|
495
|
+
hexes = []
|
496
|
+
|
497
|
+
if in_range
|
498
|
+
# Add "." sequences for any previous hexes
|
499
|
+
alt << "%d" + deces[0..-2].join(".") if deces.length > 1
|
500
|
+
alt << "%d#{deces.last}-#{dec.codepoints.first}"
|
501
|
+
in_range, deces = false, []
|
502
|
+
else
|
503
|
+
deces << dec.codepoints.first.to_s
|
504
|
+
end
|
505
|
+
end
|
506
|
+
|
507
|
+
in_range = true if scanner.scan(/\-/)
|
508
|
+
end
|
509
|
+
|
510
|
+
deces << '45' if o_dash
|
511
|
+
|
512
|
+
# Append hexes and deces as "." sequences (should be only one)
|
513
|
+
alt << "%d" + deces.join(".") unless deces.empty?
|
514
|
+
alt << "%x" + hexes.join(".") unless hexes.empty?
|
515
|
+
|
516
|
+
# FIXME: HTML abbreviations?
|
517
|
+
if alt.length == 2
|
518
|
+
# Just return the range or enum
|
519
|
+
alt.last
|
520
|
+
else
|
521
|
+
# Return the alt, which will be further formatted
|
522
|
+
alt
|
523
|
+
end
|
524
|
+
end
|
525
|
+
|
526
|
+
def escape_abnf_hex(u)
|
527
|
+
fmt = case u.ord
|
528
|
+
when 0x0000..0x00ff then "%02X"
|
529
|
+
when 0x0100..0xffff then "%04X"
|
530
|
+
else "%08X"
|
531
|
+
end
|
532
|
+
char = "%x" + (fmt % u.ord)
|
533
|
+
if @options[:html]
|
534
|
+
if u.ord <= 0x20
|
535
|
+
char = %(<abbr title="#{ASCII_ESCAPE_NAMES[u.ord]}">#{@coder.encode char}</abbr>)
|
536
|
+
elsif u.ord <= 0x7F
|
537
|
+
char = %(<abbr title="ascii '#{u}'">#{@coder.encode char}</abbr>)
|
538
|
+
elsif u.ord == 0x7F
|
539
|
+
char = %(<abbr title="delete">#{@coder.encode char}</abbr>)
|
540
|
+
elsif u.ord <= 0xFF
|
541
|
+
char = %(<abbr title="extended ascii '#{u}'">#{char}</abbr>)
|
542
|
+
else
|
543
|
+
char = %(<abbr title="unicode '#{u}'">#{char}</abbr>)
|
544
|
+
end
|
545
|
+
%(<code class="grammar-char-escape">#{char}</code>)
|
546
|
+
else
|
547
|
+
char
|
548
|
+
end
|
549
|
+
end
|
550
|
+
|
551
|
+
##
|
552
|
+
# ISO EBNF Formatters
|
553
|
+
##
|
554
|
+
|
555
|
+
# Format the expression part of a rule
|
556
|
+
def format_isoebnf(expr, sep: nil, embedded: false)
|
557
|
+
return (@options[:html] ? %(<a href="#grammar-production-#{@coder.encode expr}">#{@coder.encode expr}</a>) : expr.to_s) if expr.is_a?(Symbol)
|
558
|
+
if expr.is_a?(String)
|
559
|
+
expr = expr[2..-1].hex.chr if expr =~ /\A#x\h+/
|
560
|
+
expr.chars.each do |c|
|
561
|
+
raise RangeError, "cannot format #{expr.inspect} as an ISO EBNF String: #{c.inspect} is out of range" unless
|
562
|
+
ISOEBNF::TERMINAL_CHARACTER.match?(c)
|
563
|
+
end
|
564
|
+
if expr =~ /"/
|
565
|
+
return (@options[:html] ? %('<code class="grammar-literal">#{@coder.encode expr}</code>') : %('#{expr}'))
|
566
|
+
else
|
567
|
+
return (@options[:html] ? %("<code class="grammar-literal">#{@coder.encode expr}</code>") : %("#{expr}"))
|
568
|
+
end
|
569
|
+
end
|
570
|
+
parts = {
|
571
|
+
alt: (@options[:html] ? "<code>|</code> " : "| "),
|
572
|
+
diff: (@options[:html] ? "<code>-</code> " : "- "),
|
573
|
+
}
|
574
|
+
lparen = (@options[:html] ? "<code>(</code> " : "(")
|
575
|
+
rparen = (@options[:html] ? "<code>)</code> " : ")")
|
576
|
+
|
577
|
+
case expr.first
|
578
|
+
when :istr
|
579
|
+
# Looses fidelity, but, oh well ...
|
580
|
+
format_isoebnf(expr.last, embedded: true)
|
581
|
+
when :alt, :diff
|
582
|
+
this_sep = (sep ? sep : " ") + parts[expr.first.to_sym]
|
583
|
+
res = expr[1..-1].map {|e| format_isoebnf(e, embedded: true)}.join(this_sep)
|
584
|
+
embedded ? (lparen + res + rparen) : res
|
585
|
+
when :opt
|
586
|
+
r = format_isoebnf(expr[1], embedded: true)
|
587
|
+
"[#{r}]"
|
588
|
+
when :star
|
589
|
+
r = format_isoebnf(expr[1], embedded: true)
|
590
|
+
"{#{r}}"
|
591
|
+
when :plus
|
592
|
+
r = format_isoebnf(expr[1], embedded: true)
|
593
|
+
"#{r}, {#{r}}"
|
594
|
+
when :hex
|
595
|
+
format_isoebnf(expr[1], embedded: true)
|
596
|
+
when :range
|
597
|
+
res = format_isoebnf_range(expr.last)
|
598
|
+
res.is_a?(Array) ?
|
599
|
+
format_isoebnf(res, embedded: true) :
|
600
|
+
res
|
601
|
+
when :seq
|
602
|
+
this_sep = "," + (sep ? sep : " ")
|
603
|
+
res = expr[1..-1].map do |e|
|
604
|
+
format_isoebnf(e, embedded: true)
|
605
|
+
end.join(this_sep)
|
606
|
+
embedded ? (lparen + res + rparen) : res
|
607
|
+
when :rept
|
608
|
+
# Expand repetition
|
609
|
+
min, max, value = expr[1..-1]
|
610
|
+
if min == 0 && max == 1
|
611
|
+
format_isoebnf([:opt, value], sep: sep, embedded: embedded)
|
612
|
+
elsif min == 0 && max == '*'
|
613
|
+
format_isoebnf([:star, value], sep: sep, embedded: embedded)
|
614
|
+
elsif min == 1 && max == '*'
|
615
|
+
format_isoebnf([:plus, value], sep: sep, embedded: embedded)
|
616
|
+
else
|
617
|
+
val2 = [:seq]
|
618
|
+
while min > 0
|
619
|
+
val2 << value
|
620
|
+
min -= 1
|
621
|
+
max -= 1 unless max == '*'
|
622
|
+
end
|
623
|
+
if max == '*'
|
624
|
+
val2 << [:star, value]
|
625
|
+
else
|
626
|
+
opt = nil
|
627
|
+
while max > 0
|
628
|
+
opt = [:opt, opt ? [:seq, value, opt] : value]
|
629
|
+
max -= 1
|
630
|
+
end
|
631
|
+
val2 << opt if opt
|
632
|
+
end
|
633
|
+
format_isoebnf(val2, sep: sep, embedded: embedded)
|
634
|
+
end
|
635
|
+
else
|
636
|
+
raise "Unknown operator: #{expr.first}"
|
637
|
+
end
|
638
|
+
end
|
639
|
+
|
640
|
+
# Format a range
|
641
|
+
# Range is formatted as a aliteration of characters
|
642
|
+
def format_isoebnf_range(string)
|
643
|
+
chars = []
|
644
|
+
o_dash = false
|
645
|
+
|
646
|
+
raise RangeError, "cannot format #{string.inspect} an ABNF range" if string.start_with?('^')
|
647
|
+
|
648
|
+
if string.end_with?('-')
|
649
|
+
o_dash = true
|
650
|
+
string = string[0..-2]
|
651
|
+
end
|
652
|
+
|
653
|
+
scanner = StringScanner.new(string)
|
654
|
+
in_range = false
|
655
|
+
# Build chars from different ranges/enums
|
656
|
+
while !scanner.eos?
|
657
|
+
char = if hex = scanner.scan(Terminals::HEX)
|
658
|
+
hex[2..-1].hex.ord.char(Encoding::UTF_8)
|
659
|
+
else scanner.scan(Terminals::R_CHAR)
|
660
|
+
end
|
661
|
+
raise RangeError, "cannot format #{string.inspect} as an ISO EBNF Aliteration: #{char.inspect} is out of range" unless
|
662
|
+
char && ISOEBNF::TERMINAL_CHARACTER.match?(char)
|
663
|
+
|
664
|
+
if in_range
|
665
|
+
# calculate characters from chars.last to this char
|
666
|
+
raise RangeError, "cannot format #{string.inspect} as an ISO EBNF Aliteration" unless chars.last < char
|
667
|
+
chars.concat (chars.last..char).to_a[1..-1]
|
668
|
+
in_range = false
|
669
|
+
else
|
670
|
+
chars << char
|
671
|
+
end
|
672
|
+
|
673
|
+
in_range = true if scanner.scan(/\-/)
|
674
|
+
end
|
675
|
+
|
676
|
+
chars << '-' if o_dash
|
677
|
+
|
678
|
+
# Possibly only a single character (no character?)
|
679
|
+
chars.length == 1 ? chars.last.inspect : chars.unshift(:alt)
|
680
|
+
end
|
681
|
+
|
682
|
+
ERB_DESC = %q(
|
683
|
+
<table class="grammar">
|
684
|
+
<tbody id="grammar-productions" class="<%= @format %>">
|
685
|
+
<% for rule in @rules %>
|
686
|
+
<tr<%= %{ id="grammar-production-#{rule.sym}"} unless %w(=/ |).include?(rule.assign)%>>
|
687
|
+
<% if rule.id %>
|
688
|
+
<td><%= rule.id %></td>
|
689
|
+
<% end %>
|
690
|
+
<td><code><%== rule.sym %></code></td>
|
691
|
+
<td><%= rule.assign %></td>
|
692
|
+
<td><%= rule.formatted %></td>
|
693
|
+
</tr>
|
694
|
+
<% end %>
|
695
|
+
</tbody>
|
696
|
+
</table>
|
224
697
|
).gsub(/^ /, '')
|
225
698
|
end
|
226
699
|
end
|