ebnf 1.1.1 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/README.md +218 -196
- data/UNLICENSE +1 -1
- data/VERSION +1 -1
- data/bin/ebnf +40 -21
- data/etc/abnf-core.ebnf +52 -0
- data/etc/abnf.abnf +121 -0
- data/etc/abnf.ebnf +124 -0
- data/etc/abnf.sxp +45 -0
- data/etc/doap.ttl +13 -12
- data/etc/ebnf.ebnf +21 -33
- data/etc/ebnf.html +171 -160
- data/etc/{ebnf.rb → ebnf.ll1.rb} +30 -107
- data/etc/ebnf.ll1.sxp +182 -183
- data/etc/ebnf.peg.rb +90 -0
- data/etc/ebnf.peg.sxp +84 -0
- data/etc/ebnf.sxp +40 -41
- data/etc/iso-ebnf.ebnf +140 -0
- data/etc/iso-ebnf.isoebnf +138 -0
- data/etc/iso-ebnf.sxp +65 -0
- data/etc/sparql.ebnf +4 -4
- data/etc/sparql.html +1603 -1751
- data/etc/sparql.ll1.sxp +7372 -7372
- data/etc/sparql.peg.rb +532 -0
- data/etc/sparql.peg.sxp +597 -0
- data/etc/sparql.sxp +363 -362
- data/etc/turtle.ebnf +3 -3
- data/etc/turtle.html +465 -517
- data/etc/{turtle.rb → turtle.ll1.rb} +3 -4
- data/etc/turtle.ll1.sxp +425 -425
- data/etc/turtle.peg.rb +182 -0
- data/etc/turtle.peg.sxp +199 -0
- data/etc/turtle.sxp +103 -101
- data/lib/ebnf.rb +7 -2
- data/lib/ebnf/abnf.rb +301 -0
- data/lib/ebnf/abnf/core.rb +23 -0
- data/lib/ebnf/abnf/meta.rb +111 -0
- data/lib/ebnf/base.rb +128 -87
- data/lib/ebnf/bnf.rb +1 -26
- data/lib/ebnf/ebnf/meta.rb +90 -0
- data/lib/ebnf/isoebnf.rb +229 -0
- data/lib/ebnf/isoebnf/meta.rb +75 -0
- data/lib/ebnf/ll1.rb +140 -8
- data/lib/ebnf/ll1/lexer.rb +37 -32
- data/lib/ebnf/ll1/parser.rb +113 -73
- data/lib/ebnf/ll1/scanner.rb +84 -51
- data/lib/ebnf/native.rb +320 -0
- data/lib/ebnf/parser.rb +285 -302
- data/lib/ebnf/peg.rb +39 -0
- data/lib/ebnf/peg/parser.rb +554 -0
- data/lib/ebnf/peg/rule.rb +241 -0
- data/lib/ebnf/rule.rb +453 -163
- data/lib/ebnf/terminals.rb +21 -0
- data/lib/ebnf/writer.rb +554 -85
- metadata +98 -20
- data/etc/sparql.rb +0 -45773
@@ -0,0 +1,21 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
# Terminal definitions for the EBNF grammar
|
3
|
+
module EBNF::Terminals
|
4
|
+
SYMBOL_BASE = %r(\b[a-zA-Z0-9_\.]+\b)u.freeze
|
5
|
+
SYMBOL = %r(#{SYMBOL_BASE}(?!\s*::=))u.freeze
|
6
|
+
HEX = %r(\#x\h+)u.freeze
|
7
|
+
CHAR = %r([\u0009\u000A\u000D\u0020-\uD7FF\u{10000}-\u{10FFFF}])u.freeze
|
8
|
+
R_CHAR = %r([\u0009\u000A\u000D\u0020-\u002C\u002E-\u005C\u005E-\uD7FF\u{10000}-\u{10FFFF}])u.freeze
|
9
|
+
RANGE = %r(\[(?:(?:#{R_CHAR}\-#{R_CHAR})|(?:#{HEX}\-#{HEX})|#{R_CHAR}|#{HEX})+-?\](?!\s+#{SYMBOL_BASE}\s*::=))u.freeze
|
10
|
+
LHS = %r((?:\[#{SYMBOL_BASE}\])?\s*#{SYMBOL_BASE}\s*::=)u.freeze
|
11
|
+
O_RANGE = %r(\[\^(?:(?:#{R_CHAR}\-#{R_CHAR})|(?:#{HEX}\-#{HEX}|#{R_CHAR}|#{HEX}))+-?\])u.freeze
|
12
|
+
STRING1 = %r("[\u0009\u000A\u000D\u0020\u0021\u0023-\uD7FF\u{10000}-\u{10FFFF}]*")u.freeze
|
13
|
+
STRING2 = %r('[\u0009\u000A\u000D\u0020-\u0026\u0028-\uD7FF\u{10000}-\u{10FFFF}]*')u.freeze
|
14
|
+
POSTFIX = %r([?*+])u.freeze
|
15
|
+
PASS = %r((
|
16
|
+
\s
|
17
|
+
| (?:(?:\#[^x]|//)[^\n\r]*)
|
18
|
+
| (?:/\*(?:(?:\*[^/])|[^*])*\*/)
|
19
|
+
| (?:\(\*(?:(?:\*[^\)])|[^*])*\*\))
|
20
|
+
)+)xmu.freeze
|
21
|
+
end
|
data/lib/ebnf/writer.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
2
2
|
require 'rdf'
|
3
3
|
require 'strscan' unless defined?(StringScanner)
|
4
|
+
require "ostruct"
|
4
5
|
|
5
6
|
##
|
6
7
|
# Serialize ruleset back to EBNF
|
@@ -8,15 +9,53 @@ module EBNF
|
|
8
9
|
class Writer
|
9
10
|
LINE_LENGTH = 80
|
10
11
|
|
12
|
+
# ASCII escape names
|
13
|
+
ASCII_ESCAPE_NAMES = [
|
14
|
+
"null", #x00
|
15
|
+
"start of heading", #x01
|
16
|
+
"start of text", #x02
|
17
|
+
"end of text", #x03
|
18
|
+
"end of transmission", #x04
|
19
|
+
"enquiry", #x05
|
20
|
+
"acknowledge", #x06
|
21
|
+
"bell", #x07
|
22
|
+
"backspace", #x08
|
23
|
+
"horizontal tab", #x09
|
24
|
+
"new line", #x0A
|
25
|
+
"vertical tab", #x0B
|
26
|
+
"form feed", #x0C
|
27
|
+
"carriage return", #x0D
|
28
|
+
"shift out", #x0E
|
29
|
+
"shift in", #x0F
|
30
|
+
"data link escape", #x10
|
31
|
+
"device control 1", #x11
|
32
|
+
"device control 2", #x12
|
33
|
+
"device control 3", #x13
|
34
|
+
"device control 4", #x14
|
35
|
+
"negative acknowledge", #x15
|
36
|
+
"synchronous idle", #x16
|
37
|
+
"end of trans. block", #x17
|
38
|
+
"cancel", #x18
|
39
|
+
"end of medium", #x19
|
40
|
+
"substitute", #x1A
|
41
|
+
"escape", #x1B
|
42
|
+
"file separator", #x1C
|
43
|
+
"group separator", #x1D
|
44
|
+
"record separator", #x1E
|
45
|
+
"unit separator", #x1F
|
46
|
+
"space" #x20
|
47
|
+
]
|
48
|
+
|
11
49
|
##
|
12
50
|
# Format rules to a String
|
13
51
|
#
|
14
52
|
# @param [Array<Rule>] rules
|
53
|
+
# @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
|
15
54
|
# @return [Object]
|
16
|
-
def self.string(*rules)
|
55
|
+
def self.string(*rules, format: :ebnf)
|
17
56
|
require 'stringio' unless defined?(StringIO)
|
18
57
|
buf = StringIO.new
|
19
|
-
write(buf, *rules)
|
58
|
+
write(buf, *rules, format: format)
|
20
59
|
buf.string
|
21
60
|
end
|
22
61
|
|
@@ -24,9 +63,10 @@ module EBNF
|
|
24
63
|
# Format rules to $stdout
|
25
64
|
#
|
26
65
|
# @param [Array<Rule>] rules
|
66
|
+
# @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
|
27
67
|
# @return [Object]
|
28
|
-
def self.print(*rules)
|
29
|
-
write($stdout, *rules)
|
68
|
+
def self.print(*rules, format: :ebnf)
|
69
|
+
write($stdout, *rules, format: format)
|
30
70
|
end
|
31
71
|
|
32
72
|
##
|
@@ -34,92 +74,153 @@ module EBNF
|
|
34
74
|
#
|
35
75
|
# @param [Object] out
|
36
76
|
# @param [Array<Rule>] rules
|
77
|
+
# @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
|
37
78
|
# @return [Object]
|
38
|
-
def self.write(out, *rules)
|
39
|
-
Writer.new(rules, out: out)
|
79
|
+
def self.write(out, *rules, format: :ebnf)
|
80
|
+
Writer.new(rules, out: out, format: format)
|
40
81
|
end
|
41
82
|
|
42
83
|
##
|
43
84
|
# Write formatted rules to an IO like object as HTML
|
44
85
|
#
|
45
86
|
# @param [Array<Rule>] rules
|
87
|
+
# @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
|
46
88
|
# @return [Object]
|
47
|
-
def self.html(*rules)
|
89
|
+
def self.html(*rules, format: :ebnf)
|
48
90
|
require 'stringio' unless defined?(StringIO)
|
49
91
|
buf = StringIO.new
|
50
|
-
Writer.new(rules, out: buf, html: true)
|
92
|
+
Writer.new(rules, out: buf, html: true, format: format)
|
51
93
|
buf.string
|
52
94
|
end
|
53
95
|
|
54
96
|
##
|
55
97
|
# @param [Array<Rule>] rules
|
56
98
|
# @param [Hash{Symbol => Object}] options
|
57
|
-
# @
|
58
|
-
# @
|
59
|
-
# @option options [
|
60
|
-
#
|
61
|
-
def initialize(rules,
|
62
|
-
@options = options.
|
63
|
-
|
64
|
-
#fmt = options.fetch(:format, :ebnf)
|
99
|
+
# @param [#write] out ($stdout)
|
100
|
+
# @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
|
101
|
+
# @option options [Symbol] format
|
102
|
+
# @option options [Boolean] html (false)
|
103
|
+
def initialize(rules, out: $stdout, html: false, format: :ebnf, **options)
|
104
|
+
@options = options.merge(html: html)
|
105
|
+
return if rules.empty?
|
65
106
|
|
66
107
|
# Determine max LHS length
|
108
|
+
format_meth = "format_#{format}".to_sym
|
67
109
|
max_id = rules.max_by {|r| r.id.to_s.length}.id.to_s.length
|
68
110
|
max_sym = rules.max_by {|r| r.sym.to_s.length}.sym.to_s.length
|
69
|
-
lhs_length = max_sym +
|
70
|
-
lhs_fmt =
|
71
|
-
|
111
|
+
lhs_length = max_sym + 1
|
112
|
+
lhs_fmt = case format
|
113
|
+
when :abnf then "%<sym>-#{max_sym}s = "
|
114
|
+
when :ebnf then "%<sym>-#{max_sym}s ::= "
|
115
|
+
when :isoebnf then "%<sym>-#{max_sym}s = "
|
116
|
+
end
|
117
|
+
if format == :ebnf && max_id > 0
|
72
118
|
lhs_fmt = "%<id>-#{max_id+2}s " + lhs_fmt
|
73
119
|
lhs_length += max_id + 3
|
74
120
|
end
|
75
121
|
rhs_length = LINE_LENGTH - lhs_length
|
76
122
|
|
77
|
-
if
|
123
|
+
if html
|
78
124
|
# Output as formatted HTML
|
79
125
|
begin
|
80
|
-
require '
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
126
|
+
require 'erubis'
|
127
|
+
eruby = Erubis::Eruby.new(ERB_DESC)
|
128
|
+
formatted_rules = rules.map do |rule|
|
129
|
+
if rule.kind == :terminals || rule.kind == :pass
|
130
|
+
OpenStruct.new(id: ("@#{rule.kind}"),
|
131
|
+
sym: nil,
|
132
|
+
assign: nil,
|
133
|
+
formatted: ("<strong>Productions for terminals</strong>" if rule.kind == :terminals))
|
134
|
+
else
|
135
|
+
formatted_expr = self.send(format_meth, rule.expr)
|
136
|
+
# Measure text without markup
|
137
|
+
formatted_expr_text = formatted_expr.gsub(%r{</?\w+[^>]*>}, '')
|
138
|
+
if formatted_expr_text.length > rhs_length && (format != :abnf || rule.alt?)
|
139
|
+
lines = []
|
140
|
+
# Can only reasonably split apart alts
|
141
|
+
self.send(format_meth, rule.expr, sep: "--rule-extensions--").
|
142
|
+
split(/\s*--rule-extensions--\s*/).each_with_index do |formatted, ndx|
|
143
|
+
assign = case format
|
144
|
+
when :ebnf
|
145
|
+
formatted.sub!(%r{\s*<code>\|</code>\s*}, '')
|
146
|
+
(ndx > 0 ? (rule.alt? ? '|' : '') : '::=')
|
147
|
+
when :abnf
|
148
|
+
formatted.sub!(%r{\s*<code>/</code>\s*}, '')
|
149
|
+
(ndx > 0 ? '=/' : '=')
|
150
|
+
else
|
151
|
+
formatted.sub!(%r{\s*<code>\|</code>\s*}, '')
|
152
|
+
(ndx > 0 ? (rule.alt? ? '|' : '') : '=')
|
153
|
+
end
|
154
|
+
lines << OpenStruct.new(id: ("[#{rule.id}]" if rule.id),
|
155
|
+
sym: (rule.sym if ndx == 0 || format == :abnf),
|
156
|
+
assign: assign,
|
157
|
+
formatted: formatted)
|
158
|
+
end
|
159
|
+
if format == :isoebnf
|
160
|
+
lines << OpenStruct.new(assign: ';')
|
161
|
+
end
|
162
|
+
lines
|
163
|
+
else
|
164
|
+
OpenStruct.new(id: ("[#{rule.id}]" if rule.id),
|
165
|
+
sym: rule.sym,
|
166
|
+
assign: (format == :ebnf ? '::=' : '='),
|
167
|
+
formatted: (formatted_expr + (format == :isoebnf ? ' ;' : '')))
|
168
|
+
end
|
169
|
+
end
|
170
|
+
end.flatten
|
171
|
+
out.write eruby.evaluate(format: format, rules: formatted_rules)
|
86
172
|
return
|
87
173
|
rescue LoadError
|
88
|
-
$stderr.puts "Generating HTML requires
|
174
|
+
$stderr.puts "Generating HTML requires erubis gem to be loaded"
|
89
175
|
end
|
90
176
|
end
|
91
177
|
|
92
178
|
# Format each rule, considering the available rhs size
|
93
179
|
rules.each do |rule|
|
94
180
|
buffer = if rule.pass?
|
95
|
-
"%-#{lhs_length-2}s" % "@pass"
|
181
|
+
"\n%-#{lhs_length-2}s " % "@pass"
|
182
|
+
elsif rule.kind == :terminals
|
183
|
+
"\n%-#{lhs_length-2}s" % "@terminals"
|
96
184
|
else
|
97
185
|
lhs_fmt % {id: "[#{rule.id}]", sym: rule.sym}
|
98
186
|
end
|
99
|
-
formatted_expr =
|
100
|
-
if formatted_expr.length > rhs_length
|
101
|
-
|
187
|
+
formatted_expr = self.send(format_meth, rule.expr)
|
188
|
+
if formatted_expr.length > rhs_length && (format != :abnf || rule.alt?)
|
189
|
+
if format == :abnf
|
190
|
+
# No whitespace, use =/
|
191
|
+
self.send(format_meth, rule.expr, sep: "--rule-extensions--").
|
192
|
+
split(/\s*--rule-extensions--\s*/).each_with_index do |formatted, ndx|
|
193
|
+
if ndx > 0
|
194
|
+
buffer << "\n" + lhs_fmt.sub('= ', '=/') % {id: "[#{rule.id}]", sym: rule.sym}
|
195
|
+
end
|
196
|
+
buffer << formatted.sub(/\s*\/\s*/, '')
|
197
|
+
end
|
198
|
+
else
|
199
|
+
# Space out past "= "
|
200
|
+
buffer << self.send(format_meth, rule.expr, sep: ("\n" + " " * (lhs_length + (rule.alt? ? 2 : 4) - (format == :ebnf ? 0 : 2))))
|
201
|
+
buffer << ("\n" + " " * (lhs_length) + ';') if format == :isoebnf
|
202
|
+
end
|
102
203
|
else
|
103
|
-
buffer << formatted_expr
|
204
|
+
buffer << formatted_expr + (format == :isoebnf ? ' ;' : '')
|
104
205
|
end
|
206
|
+
buffer << "\n\n" if [:terminals, :pass].include?(rule.kind)
|
105
207
|
out.puts(buffer)
|
106
208
|
end
|
107
209
|
end
|
108
210
|
|
109
211
|
protected
|
212
|
+
|
213
|
+
##
|
214
|
+
# W3C EBNF Formatters
|
215
|
+
##
|
216
|
+
|
110
217
|
# Format the expression part of a rule
|
111
|
-
def
|
218
|
+
def format_ebnf(expr, sep: nil, embedded: false)
|
112
219
|
return (@options[:html] ? %(<a href="#grammar-production-#{expr}">#{expr}</a>) : expr.to_s) if expr.is_a?(Symbol)
|
113
220
|
if expr.is_a?(String)
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
return (@options[:html] ? %(<code class="grammar-char-escape">#{expr}</code>) : expr)
|
118
|
-
elsif expr =~ /"/
|
119
|
-
return (@options[:html] ? %('<code class="grammar-literal">#{escape(expr, "'")}</code>') : %('#{escape(expr, "'")}'))
|
120
|
-
else
|
121
|
-
return (@options[:html] ? %("<code class="grammar-literal">#{escape(expr, '"')}</code>") : %("#{escape(expr, '"')}"))
|
122
|
-
end
|
221
|
+
return expr.length == 1 ?
|
222
|
+
format_ebnf_char(expr) :
|
223
|
+
format_ebnf_string(expr, expr.include?('"') ? "'" : '"')
|
123
224
|
end
|
124
225
|
parts = {
|
125
226
|
alt: (@options[:html] ? "<code>|</code> " : "| "),
|
@@ -132,40 +233,75 @@ module EBNF
|
|
132
233
|
rparen = (@options[:html] ? "<code>)</code> " : ")")
|
133
234
|
|
134
235
|
case expr.first
|
236
|
+
when :istr
|
237
|
+
# Looses fidelity, but, oh well ...
|
238
|
+
format_ebnf(expr.last, embedded: true)
|
135
239
|
when :alt, :diff
|
136
240
|
this_sep = (sep ? sep : " ") + parts[expr.first.to_sym]
|
137
|
-
expr[1..-1].map {|e|
|
241
|
+
res = expr[1..-1].map {|e| format_ebnf(e, embedded: true)}.join(this_sep)
|
242
|
+
embedded ? (lparen + res + rparen) : res
|
138
243
|
when :star, :plus, :opt
|
139
|
-
raise "Expected star expression to have a single operand" unless expr.length == 2
|
140
244
|
char = parts[expr.first.to_sym]
|
141
|
-
r =
|
142
|
-
|
245
|
+
r = format_ebnf(expr[1], embedded: true)
|
246
|
+
"#{r}#{char}"
|
143
247
|
when :hex
|
144
|
-
(
|
248
|
+
escape_ebnf_hex(expr.last[2..-1].hex.chr(Encoding::UTF_8))
|
145
249
|
when :range
|
146
|
-
|
250
|
+
format_ebnf_range(expr.last)
|
147
251
|
when :seq
|
148
252
|
this_sep = (sep ? sep : " ")
|
149
|
-
expr[1..-1].map
|
253
|
+
res = expr[1..-1].map do |e|
|
254
|
+
format_ebnf(e, embedded: true)
|
255
|
+
end.join(this_sep)
|
256
|
+
embedded ? (lparen + res + rparen) : res
|
257
|
+
when :rept
|
258
|
+
# Expand repetition
|
259
|
+
min, max, value = expr[1..-1]
|
260
|
+
if min == 0 && max == 1
|
261
|
+
format_ebnf([:opt, value], sep: sep, embedded: embedded)
|
262
|
+
elsif min == 0 && max == '*'
|
263
|
+
format_ebnf([:star, value], sep: sep, embedded: embedded)
|
264
|
+
elsif min == 1 && max == '*'
|
265
|
+
format_ebnf([:plus, value], sep: sep, embedded: embedded)
|
266
|
+
else
|
267
|
+
val2 = [:seq]
|
268
|
+
while min > 0
|
269
|
+
val2 << value
|
270
|
+
min -= 1
|
271
|
+
max -= 1 unless max == '*'
|
272
|
+
end
|
273
|
+
if max == '*'
|
274
|
+
val2 << [:star, value]
|
275
|
+
else
|
276
|
+
opt = nil
|
277
|
+
while max > 0
|
278
|
+
opt = [:opt, opt ? [:seq, value, opt] : value]
|
279
|
+
max -= 1
|
280
|
+
end
|
281
|
+
val2 << opt if opt
|
282
|
+
end
|
283
|
+
format_ebnf(val2, sep: sep, embedded: embedded)
|
284
|
+
end
|
150
285
|
else
|
151
286
|
raise "Unknown operator: #{expr.first}"
|
152
287
|
end
|
153
288
|
end
|
154
289
|
|
155
290
|
# Format a single-character string, prefering hex for non-main ASCII
|
156
|
-
def
|
291
|
+
def format_ebnf_char(c)
|
157
292
|
case c.ord
|
158
|
-
when
|
159
|
-
when
|
160
|
-
|
293
|
+
when (0x21) then (@options[:html] ? %("<code class="grammar-literal">#{c}</code>") : %{"#{c}"})
|
294
|
+
when 0x22 then (@options[:html] ? %('<code class="grammar-literal">"</code>') : %{'"'})
|
295
|
+
when (0x23..0x7e) then (@options[:html] ? %("<code class="grammar-literal">#{c}</code>") : %{"#{c}"})
|
296
|
+
when (0x80..0xFFFD) then (@options[:html] ? %("<code class="grammar-literal">#{c}</code>") : %{"#{c}"})
|
297
|
+
else escape_ebnf_hex(c)
|
161
298
|
end
|
162
299
|
end
|
163
300
|
|
164
301
|
# Format a range
|
165
|
-
def
|
302
|
+
def format_ebnf_range(string)
|
166
303
|
lbrac = (@options[:html] ? "<code>[</code> " : "[")
|
167
304
|
rbrac = (@options[:html] ? "<code>]</code> " : "]")
|
168
|
-
dash = (@options[:html] ? "<code>-</code> " : "-")
|
169
305
|
|
170
306
|
buffer = lbrac
|
171
307
|
s = StringScanner.new(string)
|
@@ -174,53 +310,386 @@ module EBNF
|
|
174
310
|
when s.scan(/\A[!"\u0024-\u007e]+/)
|
175
311
|
buffer << (@options[:html] ? %(<code class="grammar-literal">#{s.matched}</code>) : s.matched)
|
176
312
|
when s.scan(/\A#x\h+/)
|
177
|
-
buffer << (
|
178
|
-
when s.scan(/\A-/)
|
179
|
-
buffer << dash
|
313
|
+
buffer << escape_ebnf_hex(s.matched[2..-1].hex.chr(Encoding::UTF_8))
|
180
314
|
else
|
181
|
-
buffer << (
|
315
|
+
buffer << escape_ebnf_hex(s.getch)
|
182
316
|
end
|
183
317
|
end
|
184
318
|
buffer + rbrac
|
185
319
|
end
|
186
320
|
|
187
321
|
# Escape a string, using as many UTF-8 characters as possible
|
188
|
-
def
|
189
|
-
buffer = ""
|
322
|
+
def format_ebnf_string(string, quote = '"')
|
190
323
|
string.each_char do |c|
|
191
|
-
|
192
|
-
when
|
193
|
-
|
194
|
-
|
324
|
+
case c.ord
|
325
|
+
when 0x00..0x19, quote.ord
|
326
|
+
raise RangeError, "cannot format #{string.inspect} as an EBNF String: #{c.inspect} is out of range" unless
|
327
|
+
ISOEBNF::TERMINAL_CHARACTER.match?(c)
|
195
328
|
end
|
196
329
|
end
|
197
|
-
|
330
|
+
|
331
|
+
"#{quote}#{string}#{quote}"
|
198
332
|
end
|
199
333
|
|
200
|
-
def
|
334
|
+
def escape_ebnf_hex(u)
|
201
335
|
fmt = case u.ord
|
336
|
+
when 0x00..0x20 then "#x%02X"
|
202
337
|
when 0x0000..0x00ff then "#x%02X"
|
203
338
|
when 0x0100..0xffff then "#x%04X"
|
204
339
|
else "#x%08X"
|
205
340
|
end
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
341
|
+
char = fmt % u.ord
|
342
|
+
if @options[:html]
|
343
|
+
if u.ord <= 0x20
|
344
|
+
char = %(<abbr title="#{ASCII_ESCAPE_NAMES[u.ord]}">#{char}</abbr>)
|
345
|
+
elsif u.ord < 0x7F
|
346
|
+
char = %(<abbr title="ascii '#{u}'">#{char}</abbr>)
|
347
|
+
elsif u.ord == 0x7F
|
348
|
+
char = %(<abbr title="delete">#{char}</abbr>)
|
349
|
+
elsif u.ord <= 0xFF
|
350
|
+
char = %(<abbr title="extended ascii '#{u}'">#{char}</abbr>)
|
351
|
+
else
|
352
|
+
char = %(<abbr title="unicode '#{u}'">#{char}</abbr>)
|
353
|
+
end
|
354
|
+
%(<code class="grammar-char-escape">#{char}</code>)
|
355
|
+
else
|
356
|
+
char
|
357
|
+
end
|
358
|
+
end
|
359
|
+
|
360
|
+
##
|
361
|
+
# ABNF Formatters
|
362
|
+
##
|
363
|
+
|
364
|
+
# Format the expression part of a rule
|
365
|
+
def format_abnf(expr, sep: nil, embedded: false, sensitive: true)
|
366
|
+
return (@options[:html] ? %(<a href="#grammar-production-#{expr}">#{expr}</a>) : expr.to_s) if expr.is_a?(Symbol)
|
367
|
+
if expr.is_a?(String)
|
368
|
+
if expr.length == 1
|
369
|
+
return format_abnf_char(expr)
|
370
|
+
elsif expr.start_with?('%')
|
371
|
+
# Already encoded
|
372
|
+
return expr
|
373
|
+
elsif expr =~ /"/
|
374
|
+
# Split into segments
|
375
|
+
segments = expr.split('"')
|
376
|
+
|
377
|
+
return format_abnf_char(expr) if segments.empty?
|
378
|
+
|
379
|
+
seq = segments.inject([]) {|memo, s| memo.concat([[:hex, "#x22"], s])}[1..-1]
|
380
|
+
seq.unshift(:seq)
|
381
|
+
return format_abnf(seq, sep: nil, embedded: false)
|
382
|
+
else
|
383
|
+
return (@options[:html] ? %("<code class="grammar-literal">#{'%s' if sensitive}#{expr}</code>") : %(#{'%s' if sensitive}"#{expr}"))
|
384
|
+
end
|
385
|
+
end
|
386
|
+
parts = {
|
387
|
+
alt: (@options[:html] ? "<code>/</code> " : "/ "),
|
388
|
+
star: (@options[:html] ? "<code>*</code> " : "*"),
|
389
|
+
plus: (@options[:html] ? "<code>+</code> " : "1*"),
|
390
|
+
opt: (@options[:html] ? "<code>?</code> " : "?")
|
391
|
+
}
|
392
|
+
lbrac = (@options[:html] ? "<code>[</code> " : "[")
|
393
|
+
rbrac = (@options[:html] ? "<code>]</code> " : "]")
|
394
|
+
lparen = (@options[:html] ? "<code>(</code> " : "(")
|
395
|
+
rparen = (@options[:html] ? "<code>)</code> " : ")")
|
396
|
+
|
397
|
+
case expr.first
|
398
|
+
when :istr
|
399
|
+
# FIXME: if string part is segmented, need to do something different
|
400
|
+
format_abnf(expr.last, embedded: true, sensitive: false)
|
401
|
+
when :alt
|
402
|
+
this_sep = (sep ? sep : " ") + parts[expr.first.to_sym]
|
403
|
+
res = expr[1..-1].map {|e| format_abnf(e, embedded: true)}.join(this_sep)
|
404
|
+
embedded ? (lparen + res + rparen) : res
|
405
|
+
when :diff
|
406
|
+
raise RangeError, "ABNF does not support the diff operator"
|
407
|
+
when :opt
|
408
|
+
char = parts[expr.first.to_sym]
|
409
|
+
r = format_abnf(expr[1], embedded: true)
|
410
|
+
"#{lbrac}#{r}#{rbrac}"
|
411
|
+
when :plus, :star
|
412
|
+
char = parts[expr.first.to_sym]
|
413
|
+
r = format_abnf(expr[1], embedded: true)
|
414
|
+
"#{char}#{r}"
|
415
|
+
when :hex
|
416
|
+
escape_abnf_hex(expr.last[2..-1].hex.chr)
|
417
|
+
when :range
|
418
|
+
# Returns an [:alt] or [:not [:alt]] if composed of multiple sequences
|
419
|
+
# Note: ABNF does not support the `not` operator
|
420
|
+
res = format_abnf_range(expr.last)
|
421
|
+
res.is_a?(Array) ?
|
422
|
+
format_abnf(res, embedded: true) :
|
423
|
+
res
|
424
|
+
when :seq
|
425
|
+
this_sep = (sep ? sep : " ")
|
426
|
+
res = expr[1..-1].map do |e|
|
427
|
+
format_abnf(e, embedded: true)
|
428
|
+
end.join(this_sep)
|
429
|
+
embedded ? (lparen + res + rparen) : res
|
430
|
+
when :rept
|
431
|
+
# Expand repetition
|
432
|
+
min, max, value = expr[1..-1]
|
433
|
+
r = format_abnf(value, embedded: true)
|
434
|
+
if min == max
|
435
|
+
"#{min}#{r}"
|
436
|
+
elsif min == 0 && max == '*'
|
437
|
+
"#{parts[:star]}#{r}"
|
438
|
+
elsif min > 0 && max == '*'
|
439
|
+
"#{min}#{parts[:star]}#{r}"
|
440
|
+
else
|
441
|
+
"#{min}#{parts[:star]}#{max}#{r}"
|
442
|
+
end
|
443
|
+
else
|
444
|
+
raise "Unknown operator: #{expr.first}"
|
445
|
+
end
|
446
|
+
end
|
447
|
+
|
448
|
+
# Format a single-character string, prefering hex for non-main ASCII
|
449
|
+
def format_abnf_char(c)
|
450
|
+
if /[\x20-\x21\x23-\x7E]/.match?(c)
|
451
|
+
c.inspect
|
452
|
+
else
|
453
|
+
escape_abnf_hex(c)
|
454
|
+
end
|
455
|
+
end
|
456
|
+
|
457
|
+
# Format a range
|
458
|
+
#
|
459
|
+
# Presumes range has already been validated
|
460
|
+
def format_abnf_range(string)
|
461
|
+
alt, o_dash = [:alt], false
|
462
|
+
|
463
|
+
raise RangeError, "cannot format #{string.inspect} an ABNF range" if string.start_with?('^')
|
464
|
+
|
465
|
+
if string.end_with?('-')
|
466
|
+
o_dash = true
|
467
|
+
string = string[0..-2]
|
468
|
+
end
|
469
|
+
|
470
|
+
scanner = StringScanner.new(string)
|
471
|
+
hexes, deces = [], []
|
472
|
+
in_range = false
|
473
|
+
# Build op (alt) from different ranges/enums
|
474
|
+
while !scanner.eos?
|
475
|
+
if hex = scanner.scan(Terminals::HEX)
|
476
|
+
# Append any decimal values
|
477
|
+
alt << "%d" + deces.join(".") unless deces.empty?
|
478
|
+
deces = []
|
479
|
+
|
480
|
+
if in_range
|
481
|
+
# Add "." sequences for any previous hexes
|
482
|
+
alt << "%x" + hexes[0..-2].join(".") if hexes.length > 1
|
483
|
+
alt << "%x#{hexes.last}-#{hex[2..-1]}"
|
484
|
+
in_range, hexes = false, []
|
485
|
+
else
|
486
|
+
hexes << hex[2..-1]
|
487
|
+
end
|
488
|
+
elsif dec = scanner.scan(Terminals::R_CHAR)
|
489
|
+
# Append any hexadecimal values
|
490
|
+
alt << "%x" + hexes.join(".") unless hexes.empty?
|
491
|
+
hexes = []
|
492
|
+
|
493
|
+
if in_range
|
494
|
+
# Add "." sequences for any previous hexes
|
495
|
+
alt << "%d" + deces[0..-2].join(".") if deces.length > 1
|
496
|
+
alt << "%d#{deces.last}-#{dec.codepoints.first}"
|
497
|
+
in_range, deces = false, []
|
498
|
+
else
|
499
|
+
deces << dec.codepoints.first.to_s
|
500
|
+
end
|
501
|
+
end
|
502
|
+
|
503
|
+
in_range = true if scanner.scan(/\-/)
|
504
|
+
end
|
505
|
+
|
506
|
+
deces << '45' if o_dash
|
507
|
+
|
508
|
+
# Append hexes and deces as "." sequences (should be only one)
|
509
|
+
alt << "%d" + deces.join(".") unless deces.empty?
|
510
|
+
alt << "%x" + hexes.join(".") unless hexes.empty?
|
511
|
+
|
512
|
+
# FIXME: HTML abbreviations?
|
513
|
+
if alt.length == 2
|
514
|
+
# Just return the range or enum
|
515
|
+
alt.last
|
516
|
+
else
|
517
|
+
# Return the alt, which will be further formatted
|
518
|
+
alt
|
519
|
+
end
|
520
|
+
end
|
521
|
+
|
522
|
+
def escape_abnf_hex(u)
|
523
|
+
fmt = case u.ord
|
524
|
+
when 0x0000..0x00ff then "%02X"
|
525
|
+
when 0x0100..0xffff then "%04X"
|
526
|
+
else "%08X"
|
527
|
+
end
|
528
|
+
char = "%x" + (fmt % u.ord)
|
529
|
+
if @options[:html]
|
530
|
+
if u.ord <= 0x20
|
531
|
+
char = %(<abbr title="#{ASCII_ESCAPE_NAMES[u.ord]}">#{char}</abbr>)
|
532
|
+
elsif u.ord <= 0x7F
|
533
|
+
char = %(<abbr title="ascii '#{u}'">#{char}</abbr>)
|
534
|
+
elsif u.ord == 0x7F
|
535
|
+
char = %(<abbr title="delete">#{char}</abbr>)
|
536
|
+
elsif u.ord <= 0xFF
|
537
|
+
char = %(<abbr title="extended ascii '#{u}'">#{char}</abbr>)
|
538
|
+
else
|
539
|
+
char = %(<abbr title="unicode '#{u}'">#{char}</abbr>)
|
540
|
+
end
|
541
|
+
%(<code class="grammar-char-escape">#{char}</code>)
|
542
|
+
else
|
543
|
+
char
|
544
|
+
end
|
545
|
+
end
|
546
|
+
|
547
|
+
##
|
548
|
+
# ISO EBNF Formatters
|
549
|
+
##
|
550
|
+
|
551
|
+
# Format the expression part of a rule
|
552
|
+
def format_isoebnf(expr, sep: nil, embedded: false)
|
553
|
+
return (@options[:html] ? %(<a href="#grammar-production-#{expr}">#{expr}</a>) : expr.to_s) if expr.is_a?(Symbol)
|
554
|
+
if expr.is_a?(String)
|
555
|
+
expr = expr[2..-1].hex.chr if expr =~ /\A#x\h+/
|
556
|
+
expr.chars.each do |c|
|
557
|
+
raise RangeError, "cannot format #{expr.inspect} as an ISO EBNF String: #{c.inspect} is out of range" unless
|
558
|
+
ISOEBNF::TERMINAL_CHARACTER.match?(c)
|
559
|
+
end
|
560
|
+
if expr =~ /"/
|
561
|
+
return (@options[:html] ? %('<code class="grammar-literal">#{expr}</code>') : %('#{expr}'))
|
562
|
+
else
|
563
|
+
return (@options[:html] ? %("<code class="grammar-literal">#{expr}</code>") : %("#{expr}"))
|
564
|
+
end
|
565
|
+
end
|
566
|
+
parts = {
|
567
|
+
alt: (@options[:html] ? "<code>|</code> " : "| "),
|
568
|
+
diff: (@options[:html] ? "<code>-</code> " : "- "),
|
569
|
+
}
|
570
|
+
lparen = (@options[:html] ? "<code>(</code> " : "(")
|
571
|
+
rparen = (@options[:html] ? "<code>)</code> " : ")")
|
572
|
+
|
573
|
+
case expr.first
|
574
|
+
when :istr
|
575
|
+
# Looses fidelity, but, oh well ...
|
576
|
+
format_isoebnf(expr.last, embedded: true)
|
577
|
+
when :alt, :diff
|
578
|
+
this_sep = (sep ? sep : " ") + parts[expr.first.to_sym]
|
579
|
+
res = expr[1..-1].map {|e| format_isoebnf(e, embedded: true)}.join(this_sep)
|
580
|
+
embedded ? (lparen + res + rparen) : res
|
581
|
+
when :opt
|
582
|
+
r = format_isoebnf(expr[1], embedded: true)
|
583
|
+
"[#{r}]"
|
584
|
+
when :star
|
585
|
+
r = format_isoebnf(expr[1], embedded: true)
|
586
|
+
"{#{r}}"
|
587
|
+
when :plus
|
588
|
+
r = format_isoebnf(expr[1], embedded: true)
|
589
|
+
"#{r}, {#{r}}"
|
590
|
+
when :hex
|
591
|
+
format_isoebnf(expr[1], embedded: true)
|
592
|
+
when :range
|
593
|
+
res = format_isoebnf_range(expr.last)
|
594
|
+
res.is_a?(Array) ?
|
595
|
+
format_isoebnf(res, embedded: true) :
|
596
|
+
res
|
597
|
+
when :seq
|
598
|
+
this_sep = "," + (sep ? sep : " ")
|
599
|
+
res = expr[1..-1].map do |e|
|
600
|
+
format_isoebnf(e, embedded: true)
|
601
|
+
end.join(this_sep)
|
602
|
+
embedded ? (lparen + res + rparen) : res
|
603
|
+
when :rept
|
604
|
+
# Expand repetition
|
605
|
+
min, max, value = expr[1..-1]
|
606
|
+
if min == 0 && max == 1
|
607
|
+
format_isoebnf([:opt, value], sep: sep, embedded: embedded)
|
608
|
+
elsif min == 0 && max == '*'
|
609
|
+
format_isoebnf([:star, value], sep: sep, embedded: embedded)
|
610
|
+
elsif min == 1 && max == '*'
|
611
|
+
format_isoebnf([:plus, value], sep: sep, embedded: embedded)
|
612
|
+
else
|
613
|
+
val2 = [:seq]
|
614
|
+
while min > 0
|
615
|
+
val2 << value
|
616
|
+
min -= 1
|
617
|
+
max -= 1 unless max == '*'
|
618
|
+
end
|
619
|
+
if max == '*'
|
620
|
+
val2 << [:star, value]
|
621
|
+
else
|
622
|
+
opt = nil
|
623
|
+
while max > 0
|
624
|
+
opt = [:opt, opt ? [:seq, value, opt] : value]
|
625
|
+
max -= 1
|
626
|
+
end
|
627
|
+
val2 << opt if opt
|
628
|
+
end
|
629
|
+
format_isoebnf(val2, sep: sep, embedded: embedded)
|
630
|
+
end
|
631
|
+
else
|
632
|
+
raise "Unknown operator: #{expr.first}"
|
633
|
+
end
|
634
|
+
end
|
635
|
+
|
636
|
+
# Format a range
|
637
|
+
# Range is formatted as a aliteration of characters
|
638
|
+
def format_isoebnf_range(string)
|
639
|
+
chars = []
|
640
|
+
o_dash = false
|
641
|
+
|
642
|
+
raise RangeError, "cannot format #{string.inspect} an ABNF range" if string.start_with?('^')
|
643
|
+
|
644
|
+
if string.end_with?('-')
|
645
|
+
o_dash = true
|
646
|
+
string = string[0..-2]
|
647
|
+
end
|
648
|
+
|
649
|
+
scanner = StringScanner.new(string)
|
650
|
+
in_range = false
|
651
|
+
# Build chars from different ranges/enums
|
652
|
+
while !scanner.eos?
|
653
|
+
char = if hex = scanner.scan(Terminals::HEX)
|
654
|
+
hex[2..-1].hex.ord.char(Encoding::UTF_8)
|
655
|
+
else scanner.scan(Terminals::R_CHAR)
|
656
|
+
end
|
657
|
+
raise RangeError, "cannot format #{string.inspect} as an ISO EBNF Aliteration: #{char.inspect} is out of range" unless
|
658
|
+
char && ISOEBNF::TERMINAL_CHARACTER.match?(char)
|
659
|
+
|
660
|
+
if in_range
|
661
|
+
# calculate characters from chars.last to this char
|
662
|
+
raise RangeError, "cannot format #{string.inspect} as an ISO EBNF Aliteration" unless chars.last < char
|
663
|
+
chars.concat (chars.last..char).to_a[1..-1]
|
664
|
+
in_range = false
|
665
|
+
else
|
666
|
+
chars << char
|
667
|
+
end
|
668
|
+
|
669
|
+
in_range = true if scanner.scan(/\-/)
|
670
|
+
end
|
671
|
+
|
672
|
+
chars << '-' if o_dash
|
673
|
+
|
674
|
+
# Possibly only a single character (no character?)
|
675
|
+
chars.length == 1 ? chars.last.inspect : chars.unshift(:alt)
|
676
|
+
end
|
677
|
+
|
678
|
+
ERB_DESC = %q(
|
679
|
+
<table class="grammar">
|
680
|
+
<tbody id="grammar-productions" class="<%= @format %>">
|
681
|
+
<% for rule in @rules %>
|
682
|
+
<tr<%= %{ id="grammar-production-#{rule.sym}"} unless %w(=/ |).include?(rule.assign)%>>
|
683
|
+
<% if rule.id %>
|
684
|
+
<td><%= rule.id %></td>
|
685
|
+
<% end %>
|
686
|
+
<td><code><%== rule.sym %></code></td>
|
687
|
+
<td><%= rule.assign %></td>
|
688
|
+
<td><%= rule.formatted %></td>
|
689
|
+
</tr>
|
690
|
+
<% end %>
|
691
|
+
</tbody>
|
692
|
+
</table>
|
224
693
|
).gsub(/^ /, '')
|
225
694
|
end
|
226
695
|
end
|