ebnf 2.0.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,18 +1,21 @@
1
1
  # encoding: utf-8
2
2
  # Terminal definitions for the EBNF grammar
3
3
  module EBNF::Terminals
4
- SYMBOL = %r([a-zA-Z0-9_\.]+)u.freeze
5
- HEX = %r(\#x[a-fA-F0-9]+)u.freeze
4
+ SYMBOL_BASE = %r(\b[a-zA-Z0-9_\.]+\b)u.freeze
5
+ SYMBOL = %r(#{SYMBOL_BASE}(?!\s*::=))u.freeze
6
+ HEX = %r(\#x\h+)u.freeze
6
7
  CHAR = %r([\u0009\u000A\u000D\u0020-\uD7FF\u{10000}-\u{10FFFF}])u.freeze
7
- R_CHAR = %r([\u0009\u000A\u000D\u0020-\u005C\u005E-\uD7FF\u{10000}-\u{10FFFF}])u.freeze
8
- RANGE = %r(\[(?:(?:#{R_CHAR})\-(?:#{R_CHAR})|(?:#{HEX})-(?:#{HEX}))\])u.freeze
9
- ENUM_BASE = %r(\[(?:(?:#{R_CHAR})+|(?:#{HEX})+)\])u.freeze
10
- ENUM = %r((?:#{ENUM_BASE})(?!\s+#{SYMBOL}))u.freeze
11
- LHS = %r(\[(?:(?:#{SYMBOL})+\]\s+)?(?:#{SYMBOL})\s*::=)u.freeze
12
- O_RANGE = %r(\[^(?:#{R_CHAR}\-#{R_CHAR})|(?:#{HEX}-#{HEX})\])u.freeze
13
- O_ENUM = %r(\[^(?:#{R_CHAR})+\])u.freeze
8
+ R_CHAR = %r([\u0009\u000A\u000D\u0020-\u002C\u002E-\u005C\u005E-\uD7FF\u{10000}-\u{10FFFF}])u.freeze
9
+ RANGE = %r(\[(?:(?:#{R_CHAR}\-#{R_CHAR})|(?:#{HEX}\-#{HEX})|#{R_CHAR}|#{HEX})+-?\](?!\s+#{SYMBOL_BASE}\s*::=))u.freeze
10
+ LHS = %r((?:\[#{SYMBOL_BASE}\])?\s*#{SYMBOL_BASE}\s*::=)u.freeze
11
+ O_RANGE = %r(\[\^(?:(?:#{R_CHAR}\-#{R_CHAR})|(?:#{HEX}\-#{HEX}|#{R_CHAR}|#{HEX}))+-?\])u.freeze
14
12
  STRING1 = %r("[\u0009\u000A\u000D\u0020\u0021\u0023-\uD7FF\u{10000}-\u{10FFFF}]*")u.freeze
15
13
  STRING2 = %r('[\u0009\u000A\u000D\u0020-\u0026\u0028-\uD7FF\u{10000}-\u{10FFFF}]*')u.freeze
16
14
  POSTFIX = %r([?*+])u.freeze
17
- PASS = %r((\s|(?:(#[^x]|//)[^\n\r]*$)|(?:/\*(?:(?:\*[^/])|[^*])*\*/))+)mu.freeze
15
+ PASS = %r((
16
+ \s
17
+ | (?:(?:\#[^x]|//)[^\n\r]*)
18
+ | (?:/\*(?:(?:\*[^/])|[^*])*\*/)
19
+ | (?:\(\*(?:(?:\*[^\)])|[^*])*\*\))
20
+ )+)xmu.freeze
18
21
  end
@@ -1,6 +1,7 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
  require 'rdf'
3
3
  require 'strscan' unless defined?(StringScanner)
4
+ require "ostruct"
4
5
 
5
6
  ##
6
7
  # Serialize ruleset back to EBNF
@@ -8,15 +9,53 @@ module EBNF
8
9
  class Writer
9
10
  LINE_LENGTH = 80
10
11
 
12
+ # ASCII escape names
13
+ ASCII_ESCAPE_NAMES = [
14
+ "null", #x00
15
+ "start of heading", #x01
16
+ "start of text", #x02
17
+ "end of text", #x03
18
+ "end of transmission", #x04
19
+ "enquiry", #x05
20
+ "acknowledge", #x06
21
+ "bell", #x07
22
+ "backspace", #x08
23
+ "horizontal tab", #x09
24
+ "new line", #x0A
25
+ "vertical tab", #x0B
26
+ "form feed", #x0C
27
+ "carriage return", #x0D
28
+ "shift out", #x0E
29
+ "shift in", #x0F
30
+ "data link escape", #x10
31
+ "device control 1", #x11
32
+ "device control 2", #x12
33
+ "device control 3", #x13
34
+ "device control 4", #x14
35
+ "negative acknowledge", #x15
36
+ "synchronous idle", #x16
37
+ "end of trans. block", #x17
38
+ "cancel", #x18
39
+ "end of medium", #x19
40
+ "substitute", #x1A
41
+ "escape", #x1B
42
+ "file separator", #x1C
43
+ "group separator", #x1D
44
+ "record separator", #x1E
45
+ "unit separator", #x1F
46
+ "space" #x20
47
+ ]
48
+
11
49
  ##
12
50
  # Format rules to a String
13
51
  #
14
52
  # @param [Array<Rule>] rules
53
+ # @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
15
54
  # @return [Object]
16
- def self.string(*rules)
55
+ def self.string(*rules, format: :ebnf)
17
56
  require 'stringio' unless defined?(StringIO)
18
57
  buf = StringIO.new
19
- write(buf, *rules)
58
+ write(buf, *rules, format: format)
20
59
  buf.string
21
60
  end
22
61
 
@@ -24,9 +63,10 @@ module EBNF
24
63
  # Format rules to $stdout
25
64
  #
26
65
  # @param [Array<Rule>] rules
66
+ # @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
27
67
  # @return [Object]
28
- def self.print(*rules)
29
- write($stdout, *rules)
68
+ def self.print(*rules, format: :ebnf)
69
+ write($stdout, *rules, format: format)
30
70
  end
31
71
 
32
72
  ##
@@ -34,20 +74,22 @@ module EBNF
34
74
  #
35
75
  # @param [Object] out
36
76
  # @param [Array<Rule>] rules
77
+ # @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
37
78
  # @return [Object]
38
- def self.write(out, *rules)
39
- Writer.new(rules, out: out)
79
+ def self.write(out, *rules, format: :ebnf)
80
+ Writer.new(rules, out: out, format: format)
40
81
  end
41
82
 
42
83
  ##
43
84
  # Write formatted rules to an IO like object as HTML
44
85
  #
45
86
  # @param [Array<Rule>] rules
87
+ # @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
46
88
  # @return [Object]
47
- def self.html(*rules)
89
+ def self.html(*rules, format: :ebnf)
48
90
  require 'stringio' unless defined?(StringIO)
49
91
  buf = StringIO.new
50
- Writer.new(rules, out: buf, html: true)
92
+ Writer.new(rules, out: buf, html: true, format: format)
51
93
  buf.string
52
94
  end
53
95
 
@@ -55,17 +97,24 @@ module EBNF
55
97
  # @param [Array<Rule>] rules
56
98
  # @param [Hash{Symbol => Object}] options
57
99
  # @param [#write] out ($stdout)
100
+ # @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
58
101
  # @option options [Symbol] format
59
102
  # @option options [Boolean] html (false)
60
- def initialize(rules, out: $stdout, html: false, **options)
61
- @options = options.dup
103
+ def initialize(rules, out: $stdout, html: false, format: :ebnf, **options)
104
+ @options = options.merge(html: html)
105
+ return if rules.empty?
62
106
 
63
107
  # Determine max LHS length
108
+ format_meth = "format_#{format}".to_sym
64
109
  max_id = rules.max_by {|r| r.id.to_s.length}.id.to_s.length
65
110
  max_sym = rules.max_by {|r| r.sym.to_s.length}.sym.to_s.length
66
- lhs_length = max_sym + 3
67
- lhs_fmt = "%<sym>-#{max_sym}s ::= "
68
- if max_id > 0
111
+ lhs_length = max_sym + 1
112
+ lhs_fmt = case format
113
+ when :abnf then "%<sym>-#{max_sym}s = "
114
+ when :ebnf then "%<sym>-#{max_sym}s ::= "
115
+ when :isoebnf then "%<sym>-#{max_sym}s = "
116
+ end
117
+ if format == :ebnf && max_id > 0
69
118
  lhs_fmt = "%<id>-#{max_id+2}s " + lhs_fmt
70
119
  lhs_length += max_id + 3
71
120
  end
@@ -74,49 +123,104 @@ module EBNF
74
123
  if html
75
124
  # Output as formatted HTML
76
125
  begin
77
- require 'haml'
78
- hout = Haml::Engine.new(HAML_DESC).render(self, rules: rules) do |rule|
79
- formatted_expr = format(rule.expr)
80
- formatted_expr.length > rhs_length ? format(rule.expr, "\n") : formatted_expr
81
- end
82
- out.write hout
126
+ require 'erubis'
127
+ eruby = Erubis::Eruby.new(ERB_DESC)
128
+ formatted_rules = rules.map do |rule|
129
+ if rule.kind == :terminals || rule.kind == :pass
130
+ OpenStruct.new(id: ("@#{rule.kind}"),
131
+ sym: nil,
132
+ assign: nil,
133
+ formatted: ("<strong>Productions for terminals</strong>" if rule.kind == :terminals))
134
+ else
135
+ formatted_expr = self.send(format_meth, rule.expr)
136
+ # Measure text without markup
137
+ formatted_expr_text = formatted_expr.gsub(%r{</?\w+[^>]*>}, '')
138
+ if formatted_expr_text.length > rhs_length && (format != :abnf || rule.alt?)
139
+ lines = []
140
+ # Can only reasonably split apart alts
141
+ self.send(format_meth, rule.expr, sep: "--rule-extensions--").
142
+ split(/\s*--rule-extensions--\s*/).each_with_index do |formatted, ndx|
143
+ assign = case format
144
+ when :ebnf
145
+ formatted.sub!(%r{\s*<code>\|</code>\s*}, '')
146
+ (ndx > 0 ? (rule.alt? ? '|' : '') : '::=')
147
+ when :abnf
148
+ formatted.sub!(%r{\s*<code>/</code>\s*}, '')
149
+ (ndx > 0 ? '=/' : '=')
150
+ else
151
+ formatted.sub!(%r{\s*<code>\|</code>\s*}, '')
152
+ (ndx > 0 ? (rule.alt? ? '|' : '') : '=')
153
+ end
154
+ lines << OpenStruct.new(id: ("[#{rule.id}]" if rule.id),
155
+ sym: (rule.sym if ndx == 0 || format == :abnf),
156
+ assign: assign,
157
+ formatted: formatted)
158
+ end
159
+ if format == :isoebnf
160
+ lines << OpenStruct.new(assign: ';')
161
+ end
162
+ lines
163
+ else
164
+ OpenStruct.new(id: ("[#{rule.id}]" if rule.id),
165
+ sym: rule.sym,
166
+ assign: (format == :ebnf ? '::=' : '='),
167
+ formatted: (formatted_expr + (format == :isoebnf ? ' ;' : '')))
168
+ end
169
+ end
170
+ end.flatten
171
+ out.write eruby.evaluate(format: format, rules: formatted_rules)
83
172
  return
84
173
  rescue LoadError
85
- $stderr.puts "Generating HTML requires haml gem to be loaded"
174
+ $stderr.puts "Generating HTML requires erubis gem to be loaded"
86
175
  end
87
176
  end
88
177
 
89
178
  # Format each rule, considering the available rhs size
90
179
  rules.each do |rule|
91
180
  buffer = if rule.pass?
92
- "%-#{lhs_length-2}s" % "@pass"
181
+ "\n%-#{lhs_length-2}s " % "@pass"
182
+ elsif rule.kind == :terminals
183
+ "\n%-#{lhs_length-2}s" % "@terminals"
93
184
  else
94
185
  lhs_fmt % {id: "[#{rule.id}]", sym: rule.sym}
95
186
  end
96
- formatted_expr = format(rule.expr)
97
- if formatted_expr.length > rhs_length
98
- buffer << format(rule.expr, ("\n" + " " * lhs_length))
187
+ formatted_expr = self.send(format_meth, rule.expr)
188
+ if formatted_expr.length > rhs_length && (format != :abnf || rule.alt?)
189
+ if format == :abnf
190
+ # No whitespace, use =/
191
+ self.send(format_meth, rule.expr, sep: "--rule-extensions--").
192
+ split(/\s*--rule-extensions--\s*/).each_with_index do |formatted, ndx|
193
+ if ndx > 0
194
+ buffer << "\n" + lhs_fmt.sub('= ', '=/') % {id: "[#{rule.id}]", sym: rule.sym}
195
+ end
196
+ buffer << formatted.sub(/\s*\/\s*/, '')
197
+ end
198
+ else
199
+ # Space out past "= "
200
+ buffer << self.send(format_meth, rule.expr, sep: ("\n" + " " * (lhs_length + (rule.alt? ? 2 : 4) - (format == :ebnf ? 0 : 2))))
201
+ buffer << ("\n" + " " * (lhs_length) + ';') if format == :isoebnf
202
+ end
99
203
  else
100
- buffer << formatted_expr
204
+ buffer << formatted_expr + (format == :isoebnf ? ' ;' : '')
101
205
  end
206
+ buffer << "\n\n" if [:terminals, :pass].include?(rule.kind)
102
207
  out.puts(buffer)
103
208
  end
104
209
  end
105
210
 
106
211
  protected
212
+
213
+ ##
214
+ # W3C EBNF Formatters
215
+ ##
216
+
107
217
  # Format the expression part of a rule
108
- def format(expr, sep = nil)
218
+ def format_ebnf(expr, sep: nil, embedded: false)
109
219
  return (@options[:html] ? %(<a href="#grammar-production-#{expr}">#{expr}</a>) : expr.to_s) if expr.is_a?(Symbol)
110
220
  if expr.is_a?(String)
111
- if expr.length == 1
112
- return format_char(expr)
113
- elsif expr =~ /\A#x\h+/
114
- return (@options[:html] ? %(<code class="grammar-char-escape">#{expr}</code>) : expr)
115
- elsif expr =~ /"/
116
- return (@options[:html] ? %('<code class="grammar-literal">#{escape(expr, "'")}</code>') : %('#{escape(expr, "'")}'))
117
- else
118
- return (@options[:html] ? %("<code class="grammar-literal">#{escape(expr, '"')}</code>") : %("#{escape(expr, '"')}"))
119
- end
221
+ return expr.length == 1 ?
222
+ format_ebnf_char(expr) :
223
+ format_ebnf_string(expr, expr.include?('"') ? "'" : '"')
120
224
  end
121
225
  parts = {
122
226
  alt: (@options[:html] ? "<code>|</code> " : "| "),
@@ -129,40 +233,75 @@ module EBNF
129
233
  rparen = (@options[:html] ? "<code>)</code> " : ")")
130
234
 
131
235
  case expr.first
236
+ when :istr
237
+ # Looses fidelity, but, oh well ...
238
+ format_ebnf(expr.last, embedded: true)
132
239
  when :alt, :diff
133
240
  this_sep = (sep ? sep : " ") + parts[expr.first.to_sym]
134
- expr[1..-1].map {|e| format(e)}.join(this_sep)
241
+ res = expr[1..-1].map {|e| format_ebnf(e, embedded: true)}.join(this_sep)
242
+ embedded ? (lparen + res + rparen) : res
135
243
  when :star, :plus, :opt
136
- raise "Expected star expression to have a single operand" unless expr.length == 2
137
244
  char = parts[expr.first.to_sym]
138
- r = format(expr[1])
139
- (r.start_with?("(") || Array(expr[1]).length == 1) ? "#{r}#{char}" : "(#{r})#{char}"
245
+ r = format_ebnf(expr[1], embedded: true)
246
+ "#{r}#{char}"
140
247
  when :hex
141
- (@options[:html] ? %(<code class="grammar-char-escape">#{expr.last}</code>) : expr.last)
248
+ escape_ebnf_hex(expr.last[2..-1].hex.chr(Encoding::UTF_8))
142
249
  when :range
143
- format_range(expr.last)
250
+ format_ebnf_range(expr.last)
144
251
  when :seq
145
252
  this_sep = (sep ? sep : " ")
146
- expr[1..-1].map {|e| r = format(e); Array(e).length > 2 ? "#{lparen}#{r}#{rparen}" : r}.join(this_sep)
253
+ res = expr[1..-1].map do |e|
254
+ format_ebnf(e, embedded: true)
255
+ end.join(this_sep)
256
+ embedded ? (lparen + res + rparen) : res
257
+ when :rept
258
+ # Expand repetition
259
+ min, max, value = expr[1..-1]
260
+ if min == 0 && max == 1
261
+ format_ebnf([:opt, value], sep: sep, embedded: embedded)
262
+ elsif min == 0 && max == '*'
263
+ format_ebnf([:star, value], sep: sep, embedded: embedded)
264
+ elsif min == 1 && max == '*'
265
+ format_ebnf([:plus, value], sep: sep, embedded: embedded)
266
+ else
267
+ val2 = [:seq]
268
+ while min > 0
269
+ val2 << value
270
+ min -= 1
271
+ max -= 1 unless max == '*'
272
+ end
273
+ if max == '*'
274
+ val2 << [:star, value]
275
+ else
276
+ opt = nil
277
+ while max > 0
278
+ opt = [:opt, opt ? [:seq, value, opt] : value]
279
+ max -= 1
280
+ end
281
+ val2 << opt if opt
282
+ end
283
+ format_ebnf(val2, sep: sep, embedded: embedded)
284
+ end
147
285
  else
148
286
  raise "Unknown operator: #{expr.first}"
149
287
  end
150
288
  end
151
289
 
152
290
  # Format a single-character string, prefering hex for non-main ASCII
153
- def format_char(c)
291
+ def format_ebnf_char(c)
154
292
  case c.ord
155
- when 0x22 then (@options[:html] ? %('<code class="grammar-literal">"</code>') : %{'"'})
156
- when (0x23..0x7e) then (@options[:html] ? %("<code class="grammar-literal">#{c}</code>") : %{"#{c}"})
157
- else (@options[:html] ? %(<code class="grammar-char-escape">#{escape_hex(c)}</code>) : escape_hex(c))
293
+ when (0x21) then (@options[:html] ? %("<code class="grammar-literal">#{c}</code>") : %{"#{c}"})
294
+ when 0x22 then (@options[:html] ? %('<code class="grammar-literal">"</code>') : %{'"'})
295
+ when (0x23..0x7e) then (@options[:html] ? %("<code class="grammar-literal">#{c}</code>") : %{"#{c}"})
296
+ when (0x80..0xFFFD) then (@options[:html] ? %("<code class="grammar-literal">#{c}</code>") : %{"#{c}"})
297
+ else escape_ebnf_hex(c)
158
298
  end
159
299
  end
160
300
 
161
301
  # Format a range
162
- def format_range(string)
302
+ def format_ebnf_range(string)
163
303
  lbrac = (@options[:html] ? "<code>[</code> " : "[")
164
304
  rbrac = (@options[:html] ? "<code>]</code> " : "]")
165
- dash = (@options[:html] ? "<code>-</code> " : "-")
166
305
 
167
306
  buffer = lbrac
168
307
  s = StringScanner.new(string)
@@ -171,53 +310,386 @@ module EBNF
171
310
  when s.scan(/\A[!"\u0024-\u007e]+/)
172
311
  buffer << (@options[:html] ? %(<code class="grammar-literal">#{s.matched}</code>) : s.matched)
173
312
  when s.scan(/\A#x\h+/)
174
- buffer << (@options[:html] ? %(<code class="grammar-char-escape">#{s.matched}</code>) : s.matched)
175
- when s.scan(/\A-/)
176
- buffer << dash
313
+ buffer << escape_ebnf_hex(s.matched[2..-1].hex.chr(Encoding::UTF_8))
177
314
  else
178
- buffer << (@options[:html] ? %(<code class="grammar-char-escape">#{escape_hex(s.getch)}</code>) : escape_hex(s.getch))
315
+ buffer << escape_ebnf_hex(s.getch)
179
316
  end
180
317
  end
181
318
  buffer + rbrac
182
319
  end
183
320
 
184
321
  # Escape a string, using as many UTF-8 characters as possible
185
- def escape(string, quote = '"')
186
- buffer = ""
322
+ def format_ebnf_string(string, quote = '"')
187
323
  string.each_char do |c|
188
- buffer << case (u = c.ord)
189
- when (0x00..0x1f) then "#x%02X" % u
190
- when quote.ord then "#x%02X" % u
191
- else c
324
+ case c.ord
325
+ when 0x00..0x19, quote.ord
326
+ raise RangeError, "cannot format #{string.inspect} as an EBNF String: #{c.inspect} is out of range" unless
327
+ ISOEBNF::TERMINAL_CHARACTER.match?(c)
192
328
  end
193
329
  end
194
- buffer
330
+
331
+ "#{quote}#{string}#{quote}"
195
332
  end
196
333
 
197
- def escape_hex(u)
334
+ def escape_ebnf_hex(u)
198
335
  fmt = case u.ord
336
+ when 0x00..0x20 then "#x%02X"
199
337
  when 0x0000..0x00ff then "#x%02X"
200
338
  when 0x0100..0xffff then "#x%04X"
201
339
  else "#x%08X"
202
340
  end
203
- sprintf(fmt, u.ord)
204
- end
205
-
206
- HAML_DESC = %q(
207
- %table.grammar
208
- %tbody#grammar-productions
209
- - rules.each do |rule|
210
- %tr{id: "grammar-production-#{rule.sym}"}
211
- - if rule.pass?
212
- %td{colspan: 3}
213
- %code<="@pass"
214
- - else
215
- %td<= "[#{rule.id}]"
216
- %td<
217
- %code<= rule.sym
218
- %td<= "::="
219
- %td
220
- != yield rule
341
+ char = fmt % u.ord
342
+ if @options[:html]
343
+ if u.ord <= 0x20
344
+ char = %(<abbr title="#{ASCII_ESCAPE_NAMES[u.ord]}">#{char}</abbr>)
345
+ elsif u.ord < 0x7F
346
+ char = %(<abbr title="ascii '#{u}'">#{char}</abbr>)
347
+ elsif u.ord == 0x7F
348
+ char = %(<abbr title="delete">#{char}</abbr>)
349
+ elsif u.ord <= 0xFF
350
+ char = %(<abbr title="extended ascii '#{u}'">#{char}</abbr>)
351
+ else
352
+ char = %(<abbr title="unicode '#{u}'">#{char}</abbr>)
353
+ end
354
+ %(<code class="grammar-char-escape">#{char}</code>)
355
+ else
356
+ char
357
+ end
358
+ end
359
+
360
+ ##
361
+ # ABNF Formatters
362
+ ##
363
+
364
+ # Format the expression part of a rule
365
+ def format_abnf(expr, sep: nil, embedded: false, sensitive: true)
366
+ return (@options[:html] ? %(<a href="#grammar-production-#{expr}">#{expr}</a>) : expr.to_s) if expr.is_a?(Symbol)
367
+ if expr.is_a?(String)
368
+ if expr.length == 1
369
+ return format_abnf_char(expr)
370
+ elsif expr.start_with?('%')
371
+ # Already encoded
372
+ return expr
373
+ elsif expr =~ /"/
374
+ # Split into segments
375
+ segments = expr.split('"')
376
+
377
+ return format_abnf_char(expr) if segments.empty?
378
+
379
+ seq = segments.inject([]) {|memo, s| memo.concat([[:hex, "#x22"], s])}[1..-1]
380
+ seq.unshift(:seq)
381
+ return format_abnf(seq, sep: nil, embedded: false)
382
+ else
383
+ return (@options[:html] ? %("<code class="grammar-literal">#{'%s' if sensitive}#{expr}</code>") : %(#{'%s' if sensitive}"#{expr}"))
384
+ end
385
+ end
386
+ parts = {
387
+ alt: (@options[:html] ? "<code>/</code> " : "/ "),
388
+ star: (@options[:html] ? "<code>*</code> " : "*"),
389
+ plus: (@options[:html] ? "<code>+</code> " : "1*"),
390
+ opt: (@options[:html] ? "<code>?</code> " : "?")
391
+ }
392
+ lbrac = (@options[:html] ? "<code>[</code> " : "[")
393
+ rbrac = (@options[:html] ? "<code>]</code> " : "]")
394
+ lparen = (@options[:html] ? "<code>(</code> " : "(")
395
+ rparen = (@options[:html] ? "<code>)</code> " : ")")
396
+
397
+ case expr.first
398
+ when :istr
399
+ # FIXME: if string part is segmented, need to do something different
400
+ format_abnf(expr.last, embedded: true, sensitive: false)
401
+ when :alt
402
+ this_sep = (sep ? sep : " ") + parts[expr.first.to_sym]
403
+ res = expr[1..-1].map {|e| format_abnf(e, embedded: true)}.join(this_sep)
404
+ embedded ? (lparen + res + rparen) : res
405
+ when :diff
406
+ raise RangeError, "ABNF does not support the diff operator"
407
+ when :opt
408
+ char = parts[expr.first.to_sym]
409
+ r = format_abnf(expr[1], embedded: true)
410
+ "#{lbrac}#{r}#{rbrac}"
411
+ when :plus, :star
412
+ char = parts[expr.first.to_sym]
413
+ r = format_abnf(expr[1], embedded: true)
414
+ "#{char}#{r}"
415
+ when :hex
416
+ escape_abnf_hex(expr.last[2..-1].hex.chr)
417
+ when :range
418
+ # Returns an [:alt] or [:not [:alt]] if composed of multiple sequences
419
+ # Note: ABNF does not support the `not` operator
420
+ res = format_abnf_range(expr.last)
421
+ res.is_a?(Array) ?
422
+ format_abnf(res, embedded: true) :
423
+ res
424
+ when :seq
425
+ this_sep = (sep ? sep : " ")
426
+ res = expr[1..-1].map do |e|
427
+ format_abnf(e, embedded: true)
428
+ end.join(this_sep)
429
+ embedded ? (lparen + res + rparen) : res
430
+ when :rept
431
+ # Expand repetition
432
+ min, max, value = expr[1..-1]
433
+ r = format_abnf(value, embedded: true)
434
+ if min == max
435
+ "#{min}#{r}"
436
+ elsif min == 0 && max == '*'
437
+ "#{parts[:star]}#{r}"
438
+ elsif min > 0 && max == '*'
439
+ "#{min}#{parts[:star]}#{r}"
440
+ else
441
+ "#{min}#{parts[:star]}#{max}#{r}"
442
+ end
443
+ else
444
+ raise "Unknown operator: #{expr.first}"
445
+ end
446
+ end
447
+
448
+ # Format a single-character string, prefering hex for non-main ASCII
449
+ def format_abnf_char(c)
450
+ if /[\x20-\x21\x23-\x7E]/.match?(c)
451
+ c.inspect
452
+ else
453
+ escape_abnf_hex(c)
454
+ end
455
+ end
456
+
457
+ # Format a range
458
+ #
459
+ # Presumes range has already been validated
460
+ def format_abnf_range(string)
461
+ alt, o_dash = [:alt], false
462
+
463
+ raise RangeError, "cannot format #{string.inspect} an ABNF range" if string.start_with?('^')
464
+
465
+ if string.end_with?('-')
466
+ o_dash = true
467
+ string = string[0..-2]
468
+ end
469
+
470
+ scanner = StringScanner.new(string)
471
+ hexes, deces = [], []
472
+ in_range = false
473
+ # Build op (alt) from different ranges/enums
474
+ while !scanner.eos?
475
+ if hex = scanner.scan(Terminals::HEX)
476
+ # Append any decimal values
477
+ alt << "%d" + deces.join(".") unless deces.empty?
478
+ deces = []
479
+
480
+ if in_range
481
+ # Add "." sequences for any previous hexes
482
+ alt << "%x" + hexes[0..-2].join(".") if hexes.length > 1
483
+ alt << "%x#{hexes.last}-#{hex[2..-1]}"
484
+ in_range, hexes = false, []
485
+ else
486
+ hexes << hex[2..-1]
487
+ end
488
+ elsif dec = scanner.scan(Terminals::R_CHAR)
489
+ # Append any hexadecimal values
490
+ alt << "%x" + hexes.join(".") unless hexes.empty?
491
+ hexes = []
492
+
493
+ if in_range
494
+ # Add "." sequences for any previous hexes
495
+ alt << "%d" + deces[0..-2].join(".") if deces.length > 1
496
+ alt << "%d#{deces.last}-#{dec.codepoints.first}"
497
+ in_range, deces = false, []
498
+ else
499
+ deces << dec.codepoints.first.to_s
500
+ end
501
+ end
502
+
503
+ in_range = true if scanner.scan(/\-/)
504
+ end
505
+
506
+ deces << '45' if o_dash
507
+
508
+ # Append hexes and deces as "." sequences (should be only one)
509
+ alt << "%d" + deces.join(".") unless deces.empty?
510
+ alt << "%x" + hexes.join(".") unless hexes.empty?
511
+
512
+ # FIXME: HTML abbreviations?
513
+ if alt.length == 2
514
+ # Just return the range or enum
515
+ alt.last
516
+ else
517
+ # Return the alt, which will be further formatted
518
+ alt
519
+ end
520
+ end
521
+
522
+ def escape_abnf_hex(u)
523
+ fmt = case u.ord
524
+ when 0x0000..0x00ff then "%02X"
525
+ when 0x0100..0xffff then "%04X"
526
+ else "%08X"
527
+ end
528
+ char = "%x" + (fmt % u.ord)
529
+ if @options[:html]
530
+ if u.ord <= 0x20
531
+ char = %(<abbr title="#{ASCII_ESCAPE_NAMES[u.ord]}">#{char}</abbr>)
532
+ elsif u.ord <= 0x7F
533
+ char = %(<abbr title="ascii '#{u}'">#{char}</abbr>)
534
+ elsif u.ord == 0x7F
535
+ char = %(<abbr title="delete">#{char}</abbr>)
536
+ elsif u.ord <= 0xFF
537
+ char = %(<abbr title="extended ascii '#{u}'">#{char}</abbr>)
538
+ else
539
+ char = %(<abbr title="unicode '#{u}'">#{char}</abbr>)
540
+ end
541
+ %(<code class="grammar-char-escape">#{char}</code>)
542
+ else
543
+ char
544
+ end
545
+ end
546
+
547
+ ##
548
+ # ISO EBNF Formatters
549
+ ##
550
+
551
+ # Format the expression part of a rule
552
+ def format_isoebnf(expr, sep: nil, embedded: false)
553
+ return (@options[:html] ? %(<a href="#grammar-production-#{expr}">#{expr}</a>) : expr.to_s) if expr.is_a?(Symbol)
554
+ if expr.is_a?(String)
555
+ expr = expr[2..-1].hex.chr if expr =~ /\A#x\h+/
556
+ expr.chars.each do |c|
557
+ raise RangeError, "cannot format #{expr.inspect} as an ISO EBNF String: #{c.inspect} is out of range" unless
558
+ ISOEBNF::TERMINAL_CHARACTER.match?(c)
559
+ end
560
+ if expr =~ /"/
561
+ return (@options[:html] ? %('<code class="grammar-literal">#{expr}</code>') : %('#{expr}'))
562
+ else
563
+ return (@options[:html] ? %("<code class="grammar-literal">#{expr}</code>") : %("#{expr}"))
564
+ end
565
+ end
566
+ parts = {
567
+ alt: (@options[:html] ? "<code>|</code> " : "| "),
568
+ diff: (@options[:html] ? "<code>-</code> " : "- "),
569
+ }
570
+ lparen = (@options[:html] ? "<code>(</code> " : "(")
571
+ rparen = (@options[:html] ? "<code>)</code> " : ")")
572
+
573
+ case expr.first
574
+ when :istr
575
+ # Looses fidelity, but, oh well ...
576
+ format_isoebnf(expr.last, embedded: true)
577
+ when :alt, :diff
578
+ this_sep = (sep ? sep : " ") + parts[expr.first.to_sym]
579
+ res = expr[1..-1].map {|e| format_isoebnf(e, embedded: true)}.join(this_sep)
580
+ embedded ? (lparen + res + rparen) : res
581
+ when :opt
582
+ r = format_isoebnf(expr[1], embedded: true)
583
+ "[#{r}]"
584
+ when :star
585
+ r = format_isoebnf(expr[1], embedded: true)
586
+ "{#{r}}"
587
+ when :plus
588
+ r = format_isoebnf(expr[1], embedded: true)
589
+ "#{r}, {#{r}}"
590
+ when :hex
591
+ format_isoebnf(expr[1], embedded: true)
592
+ when :range
593
+ res = format_isoebnf_range(expr.last)
594
+ res.is_a?(Array) ?
595
+ format_isoebnf(res, embedded: true) :
596
+ res
597
+ when :seq
598
+ this_sep = "," + (sep ? sep : " ")
599
+ res = expr[1..-1].map do |e|
600
+ format_isoebnf(e, embedded: true)
601
+ end.join(this_sep)
602
+ embedded ? (lparen + res + rparen) : res
603
+ when :rept
604
+ # Expand repetition
605
+ min, max, value = expr[1..-1]
606
+ if min == 0 && max == 1
607
+ format_isoebnf([:opt, value], sep: sep, embedded: embedded)
608
+ elsif min == 0 && max == '*'
609
+ format_isoebnf([:star, value], sep: sep, embedded: embedded)
610
+ elsif min == 1 && max == '*'
611
+ format_isoebnf([:plus, value], sep: sep, embedded: embedded)
612
+ else
613
+ val2 = [:seq]
614
+ while min > 0
615
+ val2 << value
616
+ min -= 1
617
+ max -= 1 unless max == '*'
618
+ end
619
+ if max == '*'
620
+ val2 << [:star, value]
621
+ else
622
+ opt = nil
623
+ while max > 0
624
+ opt = [:opt, opt ? [:seq, value, opt] : value]
625
+ max -= 1
626
+ end
627
+ val2 << opt if opt
628
+ end
629
+ format_isoebnf(val2, sep: sep, embedded: embedded)
630
+ end
631
+ else
632
+ raise "Unknown operator: #{expr.first}"
633
+ end
634
+ end
635
+
636
+ # Format a range
637
+ # Range is formatted as a aliteration of characters
638
+ def format_isoebnf_range(string)
639
+ chars = []
640
+ o_dash = false
641
+
642
+ raise RangeError, "cannot format #{string.inspect} an ABNF range" if string.start_with?('^')
643
+
644
+ if string.end_with?('-')
645
+ o_dash = true
646
+ string = string[0..-2]
647
+ end
648
+
649
+ scanner = StringScanner.new(string)
650
+ in_range = false
651
+ # Build chars from different ranges/enums
652
+ while !scanner.eos?
653
+ char = if hex = scanner.scan(Terminals::HEX)
654
+ hex[2..-1].hex.ord.char(Encoding::UTF_8)
655
+ else scanner.scan(Terminals::R_CHAR)
656
+ end
657
+ raise RangeError, "cannot format #{string.inspect} as an ISO EBNF Aliteration: #{char.inspect} is out of range" unless
658
+ char && ISOEBNF::TERMINAL_CHARACTER.match?(char)
659
+
660
+ if in_range
661
+ # calculate characters from chars.last to this char
662
+ raise RangeError, "cannot format #{string.inspect} as an ISO EBNF Aliteration" unless chars.last < char
663
+ chars.concat (chars.last..char).to_a[1..-1]
664
+ in_range = false
665
+ else
666
+ chars << char
667
+ end
668
+
669
+ in_range = true if scanner.scan(/\-/)
670
+ end
671
+
672
+ chars << '-' if o_dash
673
+
674
+ # Possibly only a single character (no character?)
675
+ chars.length == 1 ? chars.last.inspect : chars.unshift(:alt)
676
+ end
677
+
678
+ ERB_DESC = %q(
679
+ <table class="grammar">
680
+ <tbody id="grammar-productions" class="<%= @format %>">
681
+ <% for rule in @rules %>
682
+ <tr<%= %{ id="grammar-production-#{rule.sym}"} unless %w(=/ |).include?(rule.assign)%>>
683
+ <% if rule.id %>
684
+ <td><%= rule.id %></td>
685
+ <% end %>
686
+ <td><code><%== rule.sym %></code></td>
687
+ <td><%= rule.assign %></td>
688
+ <td><%= rule.formatted %></td>
689
+ </tr>
690
+ <% end %>
691
+ </tbody>
692
+ </table>
221
693
  ).gsub(/^ /, '')
222
694
  end
223
695
  end