ebnf 2.0.0 → 2.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,18 +1,21 @@
1
1
  # encoding: utf-8
2
2
  # Terminal definitions for the EBNF grammar
3
3
  module EBNF::Terminals
4
- SYMBOL = %r([a-zA-Z0-9_\.]+)u.freeze
5
- HEX = %r(\#x[a-fA-F0-9]+)u.freeze
4
+ SYMBOL_BASE = %r(\b[a-zA-Z0-9_\.]+\b)u.freeze
5
+ SYMBOL = %r(#{SYMBOL_BASE}(?!\s*::=))u.freeze
6
+ HEX = %r(\#x\h+)u.freeze
6
7
  CHAR = %r([\u0009\u000A\u000D\u0020-\uD7FF\u{10000}-\u{10FFFF}])u.freeze
7
- R_CHAR = %r([\u0009\u000A\u000D\u0020-\u005C\u005E-\uD7FF\u{10000}-\u{10FFFF}])u.freeze
8
- RANGE = %r(\[(?:(?:#{R_CHAR})\-(?:#{R_CHAR})|(?:#{HEX})-(?:#{HEX}))\])u.freeze
9
- ENUM_BASE = %r(\[(?:(?:#{R_CHAR})+|(?:#{HEX})+)\])u.freeze
10
- ENUM = %r((?:#{ENUM_BASE})(?!\s+#{SYMBOL}))u.freeze
11
- LHS = %r(\[(?:(?:#{SYMBOL})+\]\s+)?(?:#{SYMBOL})\s*::=)u.freeze
12
- O_RANGE = %r(\[^(?:#{R_CHAR}\-#{R_CHAR})|(?:#{HEX}-#{HEX})\])u.freeze
13
- O_ENUM = %r(\[^(?:#{R_CHAR})+\])u.freeze
8
+ R_CHAR = %r([\u0009\u000A\u000D\u0020-\u002C\u002E-\u005C\u005E-\uD7FF\u{10000}-\u{10FFFF}])u.freeze
9
+ RANGE = %r(\[(?:(?:#{R_CHAR}\-#{R_CHAR})|(?:#{HEX}\-#{HEX})|#{R_CHAR}|#{HEX})+-?\](?!\s+#{SYMBOL_BASE}\s*::=))u.freeze
10
+ LHS = %r((?:\[#{SYMBOL_BASE}\])?\s*#{SYMBOL_BASE}\s*::=)u.freeze
11
+ O_RANGE = %r(\[\^(?:(?:#{R_CHAR}\-#{R_CHAR})|(?:#{HEX}\-#{HEX}|#{R_CHAR}|#{HEX}))+-?\])u.freeze
14
12
  STRING1 = %r("[\u0009\u000A\u000D\u0020\u0021\u0023-\uD7FF\u{10000}-\u{10FFFF}]*")u.freeze
15
13
  STRING2 = %r('[\u0009\u000A\u000D\u0020-\u0026\u0028-\uD7FF\u{10000}-\u{10FFFF}]*')u.freeze
16
14
  POSTFIX = %r([?*+])u.freeze
17
- PASS = %r((\s|(?:(#[^x]|//)[^\n\r]*$)|(?:/\*(?:(?:\*[^/])|[^*])*\*/))+)mu.freeze
15
+ PASS = %r((
16
+ \s
17
+ | (?:(?:\#[^x]|//)[^\n\r]*)
18
+ | (?:/\*(?:(?:\*[^/])|[^*])*\*/)
19
+ | (?:\(\*(?:(?:\*[^\)])|[^*])*\*\))
20
+ )+)xmu.freeze
18
21
  end
@@ -1,6 +1,7 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
  require 'rdf'
3
3
  require 'strscan' unless defined?(StringScanner)
4
+ require "ostruct"
4
5
 
5
6
  ##
6
7
  # Serialize ruleset back to EBNF
@@ -8,15 +9,53 @@ module EBNF
8
9
  class Writer
9
10
  LINE_LENGTH = 80
10
11
 
12
+ # ASCII escape names
13
+ ASCII_ESCAPE_NAMES = [
14
+ "null", #x00
15
+ "start of heading", #x01
16
+ "start of text", #x02
17
+ "end of text", #x03
18
+ "end of transmission", #x04
19
+ "enquiry", #x05
20
+ "acknowledge", #x06
21
+ "bell", #x07
22
+ "backspace", #x08
23
+ "horizontal tab", #x09
24
+ "new line", #x0A
25
+ "vertical tab", #x0B
26
+ "form feed", #x0C
27
+ "carriage return", #x0D
28
+ "shift out", #x0E
29
+ "shift in", #x0F
30
+ "data link escape", #x10
31
+ "device control 1", #x11
32
+ "device control 2", #x12
33
+ "device control 3", #x13
34
+ "device control 4", #x14
35
+ "negative acknowledge", #x15
36
+ "synchronous idle", #x16
37
+ "end of trans. block", #x17
38
+ "cancel", #x18
39
+ "end of medium", #x19
40
+ "substitute", #x1A
41
+ "escape", #x1B
42
+ "file separator", #x1C
43
+ "group separator", #x1D
44
+ "record separator", #x1E
45
+ "unit separator", #x1F
46
+ "space" #x20
47
+ ]
48
+
11
49
  ##
12
50
  # Format rules to a String
13
51
  #
14
52
  # @param [Array<Rule>] rules
53
+ # @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
15
54
  # @return [Object]
16
- def self.string(*rules)
55
+ def self.string(*rules, format: :ebnf)
17
56
  require 'stringio' unless defined?(StringIO)
18
57
  buf = StringIO.new
19
- write(buf, *rules)
58
+ write(buf, *rules, format: format)
20
59
  buf.string
21
60
  end
22
61
 
@@ -24,9 +63,10 @@ module EBNF
24
63
  # Format rules to $stdout
25
64
  #
26
65
  # @param [Array<Rule>] rules
66
+ # @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
27
67
  # @return [Object]
28
- def self.print(*rules)
29
- write($stdout, *rules)
68
+ def self.print(*rules, format: :ebnf)
69
+ write($stdout, *rules, format: format)
30
70
  end
31
71
 
32
72
  ##
@@ -34,20 +74,22 @@ module EBNF
34
74
  #
35
75
  # @param [Object] out
36
76
  # @param [Array<Rule>] rules
77
+ # @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
37
78
  # @return [Object]
38
- def self.write(out, *rules)
39
- Writer.new(rules, out: out)
79
+ def self.write(out, *rules, format: :ebnf)
80
+ Writer.new(rules, out: out, format: format)
40
81
  end
41
82
 
42
83
  ##
43
84
  # Write formatted rules to an IO like object as HTML
44
85
  #
45
86
  # @param [Array<Rule>] rules
87
+ # @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
46
88
  # @return [Object]
47
- def self.html(*rules)
89
+ def self.html(*rules, format: :ebnf)
48
90
  require 'stringio' unless defined?(StringIO)
49
91
  buf = StringIO.new
50
- Writer.new(rules, out: buf, html: true)
92
+ Writer.new(rules, out: buf, html: true, format: format)
51
93
  buf.string
52
94
  end
53
95
 
@@ -55,17 +97,24 @@ module EBNF
55
97
  # @param [Array<Rule>] rules
56
98
  # @param [Hash{Symbol => Object}] options
57
99
  # @param [#write] out ($stdout)
100
+ # @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
58
101
  # @option options [Symbol] format
59
102
  # @option options [Boolean] html (false)
60
- def initialize(rules, out: $stdout, html: false, **options)
61
- @options = options.dup
103
+ def initialize(rules, out: $stdout, html: false, format: :ebnf, **options)
104
+ @options = options.merge(html: html)
105
+ return if rules.empty?
62
106
 
63
107
  # Determine max LHS length
108
+ format_meth = "format_#{format}".to_sym
64
109
  max_id = rules.max_by {|r| r.id.to_s.length}.id.to_s.length
65
110
  max_sym = rules.max_by {|r| r.sym.to_s.length}.sym.to_s.length
66
- lhs_length = max_sym + 3
67
- lhs_fmt = "%<sym>-#{max_sym}s ::= "
68
- if max_id > 0
111
+ lhs_length = max_sym + 1
112
+ lhs_fmt = case format
113
+ when :abnf then "%<sym>-#{max_sym}s = "
114
+ when :ebnf then "%<sym>-#{max_sym}s ::= "
115
+ when :isoebnf then "%<sym>-#{max_sym}s = "
116
+ end
117
+ if format == :ebnf && max_id > 0
69
118
  lhs_fmt = "%<id>-#{max_id+2}s " + lhs_fmt
70
119
  lhs_length += max_id + 3
71
120
  end
@@ -74,49 +123,104 @@ module EBNF
74
123
  if html
75
124
  # Output as formatted HTML
76
125
  begin
77
- require 'haml'
78
- hout = Haml::Engine.new(HAML_DESC).render(self, rules: rules) do |rule|
79
- formatted_expr = format(rule.expr)
80
- formatted_expr.length > rhs_length ? format(rule.expr, "\n") : formatted_expr
81
- end
82
- out.write hout
126
+ require 'erubis'
127
+ eruby = Erubis::Eruby.new(ERB_DESC)
128
+ formatted_rules = rules.map do |rule|
129
+ if rule.kind == :terminals || rule.kind == :pass
130
+ OpenStruct.new(id: ("@#{rule.kind}"),
131
+ sym: nil,
132
+ assign: nil,
133
+ formatted: ("<strong>Productions for terminals</strong>" if rule.kind == :terminals))
134
+ else
135
+ formatted_expr = self.send(format_meth, rule.expr)
136
+ # Measure text without markup
137
+ formatted_expr_text = formatted_expr.gsub(%r{</?\w+[^>]*>}, '')
138
+ if formatted_expr_text.length > rhs_length && (format != :abnf || rule.alt?)
139
+ lines = []
140
+ # Can only reasonably split apart alts
141
+ self.send(format_meth, rule.expr, sep: "--rule-extensions--").
142
+ split(/\s*--rule-extensions--\s*/).each_with_index do |formatted, ndx|
143
+ assign = case format
144
+ when :ebnf
145
+ formatted.sub!(%r{\s*<code>\|</code>\s*}, '')
146
+ (ndx > 0 ? (rule.alt? ? '|' : '') : '::=')
147
+ when :abnf
148
+ formatted.sub!(%r{\s*<code>/</code>\s*}, '')
149
+ (ndx > 0 ? '=/' : '=')
150
+ else
151
+ formatted.sub!(%r{\s*<code>\|</code>\s*}, '')
152
+ (ndx > 0 ? (rule.alt? ? '|' : '') : '=')
153
+ end
154
+ lines << OpenStruct.new(id: ("[#{rule.id}]" if rule.id),
155
+ sym: (rule.sym if ndx == 0 || format == :abnf),
156
+ assign: assign,
157
+ formatted: formatted)
158
+ end
159
+ if format == :isoebnf
160
+ lines << OpenStruct.new(assign: ';')
161
+ end
162
+ lines
163
+ else
164
+ OpenStruct.new(id: ("[#{rule.id}]" if rule.id),
165
+ sym: rule.sym,
166
+ assign: (format == :ebnf ? '::=' : '='),
167
+ formatted: (formatted_expr + (format == :isoebnf ? ' ;' : '')))
168
+ end
169
+ end
170
+ end.flatten
171
+ out.write eruby.evaluate(format: format, rules: formatted_rules)
83
172
  return
84
173
  rescue LoadError
85
- $stderr.puts "Generating HTML requires haml gem to be loaded"
174
+ $stderr.puts "Generating HTML requires erubis gem to be loaded"
86
175
  end
87
176
  end
88
177
 
89
178
  # Format each rule, considering the available rhs size
90
179
  rules.each do |rule|
91
180
  buffer = if rule.pass?
92
- "%-#{lhs_length-2}s" % "@pass"
181
+ "\n%-#{lhs_length-2}s " % "@pass"
182
+ elsif rule.kind == :terminals
183
+ "\n%-#{lhs_length-2}s" % "@terminals"
93
184
  else
94
185
  lhs_fmt % {id: "[#{rule.id}]", sym: rule.sym}
95
186
  end
96
- formatted_expr = format(rule.expr)
97
- if formatted_expr.length > rhs_length
98
- buffer << format(rule.expr, ("\n" + " " * lhs_length))
187
+ formatted_expr = self.send(format_meth, rule.expr)
188
+ if formatted_expr.length > rhs_length && (format != :abnf || rule.alt?)
189
+ if format == :abnf
190
+ # No whitespace, use =/
191
+ self.send(format_meth, rule.expr, sep: "--rule-extensions--").
192
+ split(/\s*--rule-extensions--\s*/).each_with_index do |formatted, ndx|
193
+ if ndx > 0
194
+ buffer << "\n" + lhs_fmt.sub('= ', '=/') % {id: "[#{rule.id}]", sym: rule.sym}
195
+ end
196
+ buffer << formatted.sub(/\s*\/\s*/, '')
197
+ end
198
+ else
199
+ # Space out past "= "
200
+ buffer << self.send(format_meth, rule.expr, sep: ("\n" + " " * (lhs_length + (rule.alt? ? 2 : 4) - (format == :ebnf ? 0 : 2))))
201
+ buffer << ("\n" + " " * (lhs_length) + ';') if format == :isoebnf
202
+ end
99
203
  else
100
- buffer << formatted_expr
204
+ buffer << formatted_expr + (format == :isoebnf ? ' ;' : '')
101
205
  end
206
+ buffer << "\n\n" if [:terminals, :pass].include?(rule.kind)
102
207
  out.puts(buffer)
103
208
  end
104
209
  end
105
210
 
106
211
  protected
212
+
213
+ ##
214
+ # W3C EBNF Formatters
215
+ ##
216
+
107
217
  # Format the expression part of a rule
108
- def format(expr, sep = nil)
218
+ def format_ebnf(expr, sep: nil, embedded: false)
109
219
  return (@options[:html] ? %(<a href="#grammar-production-#{expr}">#{expr}</a>) : expr.to_s) if expr.is_a?(Symbol)
110
220
  if expr.is_a?(String)
111
- if expr.length == 1
112
- return format_char(expr)
113
- elsif expr =~ /\A#x\h+/
114
- return (@options[:html] ? %(<code class="grammar-char-escape">#{expr}</code>) : expr)
115
- elsif expr =~ /"/
116
- return (@options[:html] ? %('<code class="grammar-literal">#{escape(expr, "'")}</code>') : %('#{escape(expr, "'")}'))
117
- else
118
- return (@options[:html] ? %("<code class="grammar-literal">#{escape(expr, '"')}</code>") : %("#{escape(expr, '"')}"))
119
- end
221
+ return expr.length == 1 ?
222
+ format_ebnf_char(expr) :
223
+ format_ebnf_string(expr, expr.include?('"') ? "'" : '"')
120
224
  end
121
225
  parts = {
122
226
  alt: (@options[:html] ? "<code>|</code> " : "| "),
@@ -129,40 +233,75 @@ module EBNF
129
233
  rparen = (@options[:html] ? "<code>)</code> " : ")")
130
234
 
131
235
  case expr.first
236
+ when :istr
237
+ # Looses fidelity, but, oh well ...
238
+ format_ebnf(expr.last, embedded: true)
132
239
  when :alt, :diff
133
240
  this_sep = (sep ? sep : " ") + parts[expr.first.to_sym]
134
- expr[1..-1].map {|e| format(e)}.join(this_sep)
241
+ res = expr[1..-1].map {|e| format_ebnf(e, embedded: true)}.join(this_sep)
242
+ embedded ? (lparen + res + rparen) : res
135
243
  when :star, :plus, :opt
136
- raise "Expected star expression to have a single operand" unless expr.length == 2
137
244
  char = parts[expr.first.to_sym]
138
- r = format(expr[1])
139
- (r.start_with?("(") || Array(expr[1]).length == 1) ? "#{r}#{char}" : "(#{r})#{char}"
245
+ r = format_ebnf(expr[1], embedded: true)
246
+ "#{r}#{char}"
140
247
  when :hex
141
- (@options[:html] ? %(<code class="grammar-char-escape">#{expr.last}</code>) : expr.last)
248
+ escape_ebnf_hex(expr.last[2..-1].hex.chr(Encoding::UTF_8))
142
249
  when :range
143
- format_range(expr.last)
250
+ format_ebnf_range(expr.last)
144
251
  when :seq
145
252
  this_sep = (sep ? sep : " ")
146
- expr[1..-1].map {|e| r = format(e); Array(e).length > 2 ? "#{lparen}#{r}#{rparen}" : r}.join(this_sep)
253
+ res = expr[1..-1].map do |e|
254
+ format_ebnf(e, embedded: true)
255
+ end.join(this_sep)
256
+ embedded ? (lparen + res + rparen) : res
257
+ when :rept
258
+ # Expand repetition
259
+ min, max, value = expr[1..-1]
260
+ if min == 0 && max == 1
261
+ format_ebnf([:opt, value], sep: sep, embedded: embedded)
262
+ elsif min == 0 && max == '*'
263
+ format_ebnf([:star, value], sep: sep, embedded: embedded)
264
+ elsif min == 1 && max == '*'
265
+ format_ebnf([:plus, value], sep: sep, embedded: embedded)
266
+ else
267
+ val2 = [:seq]
268
+ while min > 0
269
+ val2 << value
270
+ min -= 1
271
+ max -= 1 unless max == '*'
272
+ end
273
+ if max == '*'
274
+ val2 << [:star, value]
275
+ else
276
+ opt = nil
277
+ while max > 0
278
+ opt = [:opt, opt ? [:seq, value, opt] : value]
279
+ max -= 1
280
+ end
281
+ val2 << opt if opt
282
+ end
283
+ format_ebnf(val2, sep: sep, embedded: embedded)
284
+ end
147
285
  else
148
286
  raise "Unknown operator: #{expr.first}"
149
287
  end
150
288
  end
151
289
 
152
290
  # Format a single-character string, prefering hex for non-main ASCII
153
- def format_char(c)
291
+ def format_ebnf_char(c)
154
292
  case c.ord
155
- when 0x22 then (@options[:html] ? %('<code class="grammar-literal">"</code>') : %{'"'})
156
- when (0x23..0x7e) then (@options[:html] ? %("<code class="grammar-literal">#{c}</code>") : %{"#{c}"})
157
- else (@options[:html] ? %(<code class="grammar-char-escape">#{escape_hex(c)}</code>) : escape_hex(c))
293
+ when (0x21) then (@options[:html] ? %("<code class="grammar-literal">#{c}</code>") : %{"#{c}"})
294
+ when 0x22 then (@options[:html] ? %('<code class="grammar-literal">"</code>') : %{'"'})
295
+ when (0x23..0x7e) then (@options[:html] ? %("<code class="grammar-literal">#{c}</code>") : %{"#{c}"})
296
+ when (0x80..0xFFFD) then (@options[:html] ? %("<code class="grammar-literal">#{c}</code>") : %{"#{c}"})
297
+ else escape_ebnf_hex(c)
158
298
  end
159
299
  end
160
300
 
161
301
  # Format a range
162
- def format_range(string)
302
+ def format_ebnf_range(string)
163
303
  lbrac = (@options[:html] ? "<code>[</code> " : "[")
164
304
  rbrac = (@options[:html] ? "<code>]</code> " : "]")
165
- dash = (@options[:html] ? "<code>-</code> " : "-")
166
305
 
167
306
  buffer = lbrac
168
307
  s = StringScanner.new(string)
@@ -171,53 +310,386 @@ module EBNF
171
310
  when s.scan(/\A[!"\u0024-\u007e]+/)
172
311
  buffer << (@options[:html] ? %(<code class="grammar-literal">#{s.matched}</code>) : s.matched)
173
312
  when s.scan(/\A#x\h+/)
174
- buffer << (@options[:html] ? %(<code class="grammar-char-escape">#{s.matched}</code>) : s.matched)
175
- when s.scan(/\A-/)
176
- buffer << dash
313
+ buffer << escape_ebnf_hex(s.matched[2..-1].hex.chr(Encoding::UTF_8))
177
314
  else
178
- buffer << (@options[:html] ? %(<code class="grammar-char-escape">#{escape_hex(s.getch)}</code>) : escape_hex(s.getch))
315
+ buffer << escape_ebnf_hex(s.getch)
179
316
  end
180
317
  end
181
318
  buffer + rbrac
182
319
  end
183
320
 
184
321
  # Escape a string, using as many UTF-8 characters as possible
185
- def escape(string, quote = '"')
186
- buffer = ""
322
+ def format_ebnf_string(string, quote = '"')
187
323
  string.each_char do |c|
188
- buffer << case (u = c.ord)
189
- when (0x00..0x1f) then "#x%02X" % u
190
- when quote.ord then "#x%02X" % u
191
- else c
324
+ case c.ord
325
+ when 0x00..0x19, quote.ord
326
+ raise RangeError, "cannot format #{string.inspect} as an EBNF String: #{c.inspect} is out of range" unless
327
+ ISOEBNF::TERMINAL_CHARACTER.match?(c)
192
328
  end
193
329
  end
194
- buffer
330
+
331
+ "#{quote}#{string}#{quote}"
195
332
  end
196
333
 
197
- def escape_hex(u)
334
+ def escape_ebnf_hex(u)
198
335
  fmt = case u.ord
336
+ when 0x00..0x20 then "#x%02X"
199
337
  when 0x0000..0x00ff then "#x%02X"
200
338
  when 0x0100..0xffff then "#x%04X"
201
339
  else "#x%08X"
202
340
  end
203
- sprintf(fmt, u.ord)
204
- end
205
-
206
- HAML_DESC = %q(
207
- %table.grammar
208
- %tbody#grammar-productions
209
- - rules.each do |rule|
210
- %tr{id: "grammar-production-#{rule.sym}"}
211
- - if rule.pass?
212
- %td{colspan: 3}
213
- %code<="@pass"
214
- - else
215
- %td<= "[#{rule.id}]"
216
- %td<
217
- %code<= rule.sym
218
- %td<= "::="
219
- %td
220
- != yield rule
341
+ char = fmt % u.ord
342
+ if @options[:html]
343
+ if u.ord <= 0x20
344
+ char = %(<abbr title="#{ASCII_ESCAPE_NAMES[u.ord]}">#{char}</abbr>)
345
+ elsif u.ord < 0x7F
346
+ char = %(<abbr title="ascii '#{u}'">#{char}</abbr>)
347
+ elsif u.ord == 0x7F
348
+ char = %(<abbr title="delete">#{char}</abbr>)
349
+ elsif u.ord <= 0xFF
350
+ char = %(<abbr title="extended ascii '#{u}'">#{char}</abbr>)
351
+ else
352
+ char = %(<abbr title="unicode '#{u}'">#{char}</abbr>)
353
+ end
354
+ %(<code class="grammar-char-escape">#{char}</code>)
355
+ else
356
+ char
357
+ end
358
+ end
359
+
360
+ ##
361
+ # ABNF Formatters
362
+ ##
363
+
364
+ # Format the expression part of a rule
365
+ def format_abnf(expr, sep: nil, embedded: false, sensitive: true)
366
+ return (@options[:html] ? %(<a href="#grammar-production-#{expr}">#{expr}</a>) : expr.to_s) if expr.is_a?(Symbol)
367
+ if expr.is_a?(String)
368
+ if expr.length == 1
369
+ return format_abnf_char(expr)
370
+ elsif expr.start_with?('%')
371
+ # Already encoded
372
+ return expr
373
+ elsif expr =~ /"/
374
+ # Split into segments
375
+ segments = expr.split('"')
376
+
377
+ return format_abnf_char(expr) if segments.empty?
378
+
379
+ seq = segments.inject([]) {|memo, s| memo.concat([[:hex, "#x22"], s])}[1..-1]
380
+ seq.unshift(:seq)
381
+ return format_abnf(seq, sep: nil, embedded: false)
382
+ else
383
+ return (@options[:html] ? %("<code class="grammar-literal">#{'%s' if sensitive}#{expr}</code>") : %(#{'%s' if sensitive}"#{expr}"))
384
+ end
385
+ end
386
+ parts = {
387
+ alt: (@options[:html] ? "<code>/</code> " : "/ "),
388
+ star: (@options[:html] ? "<code>*</code> " : "*"),
389
+ plus: (@options[:html] ? "<code>+</code> " : "1*"),
390
+ opt: (@options[:html] ? "<code>?</code> " : "?")
391
+ }
392
+ lbrac = (@options[:html] ? "<code>[</code> " : "[")
393
+ rbrac = (@options[:html] ? "<code>]</code> " : "]")
394
+ lparen = (@options[:html] ? "<code>(</code> " : "(")
395
+ rparen = (@options[:html] ? "<code>)</code> " : ")")
396
+
397
+ case expr.first
398
+ when :istr
399
+ # FIXME: if string part is segmented, need to do something different
400
+ format_abnf(expr.last, embedded: true, sensitive: false)
401
+ when :alt
402
+ this_sep = (sep ? sep : " ") + parts[expr.first.to_sym]
403
+ res = expr[1..-1].map {|e| format_abnf(e, embedded: true)}.join(this_sep)
404
+ embedded ? (lparen + res + rparen) : res
405
+ when :diff
406
+ raise RangeError, "ABNF does not support the diff operator"
407
+ when :opt
408
+ char = parts[expr.first.to_sym]
409
+ r = format_abnf(expr[1], embedded: true)
410
+ "#{lbrac}#{r}#{rbrac}"
411
+ when :plus, :star
412
+ char = parts[expr.first.to_sym]
413
+ r = format_abnf(expr[1], embedded: true)
414
+ "#{char}#{r}"
415
+ when :hex
416
+ escape_abnf_hex(expr.last[2..-1].hex.chr)
417
+ when :range
418
+ # Returns an [:alt] or [:not [:alt]] if composed of multiple sequences
419
+ # Note: ABNF does not support the `not` operator
420
+ res = format_abnf_range(expr.last)
421
+ res.is_a?(Array) ?
422
+ format_abnf(res, embedded: true) :
423
+ res
424
+ when :seq
425
+ this_sep = (sep ? sep : " ")
426
+ res = expr[1..-1].map do |e|
427
+ format_abnf(e, embedded: true)
428
+ end.join(this_sep)
429
+ embedded ? (lparen + res + rparen) : res
430
+ when :rept
431
+ # Expand repetition
432
+ min, max, value = expr[1..-1]
433
+ r = format_abnf(value, embedded: true)
434
+ if min == max
435
+ "#{min}#{r}"
436
+ elsif min == 0 && max == '*'
437
+ "#{parts[:star]}#{r}"
438
+ elsif min > 0 && max == '*'
439
+ "#{min}#{parts[:star]}#{r}"
440
+ else
441
+ "#{min}#{parts[:star]}#{max}#{r}"
442
+ end
443
+ else
444
+ raise "Unknown operator: #{expr.first}"
445
+ end
446
+ end
447
+
448
+ # Format a single-character string, prefering hex for non-main ASCII
449
+ def format_abnf_char(c)
450
+ if /[\x20-\x21\x23-\x7E]/.match?(c)
451
+ c.inspect
452
+ else
453
+ escape_abnf_hex(c)
454
+ end
455
+ end
456
+
457
+ # Format a range
458
+ #
459
+ # Presumes range has already been validated
460
+ def format_abnf_range(string)
461
+ alt, o_dash = [:alt], false
462
+
463
+ raise RangeError, "cannot format #{string.inspect} an ABNF range" if string.start_with?('^')
464
+
465
+ if string.end_with?('-')
466
+ o_dash = true
467
+ string = string[0..-2]
468
+ end
469
+
470
+ scanner = StringScanner.new(string)
471
+ hexes, deces = [], []
472
+ in_range = false
473
+ # Build op (alt) from different ranges/enums
474
+ while !scanner.eos?
475
+ if hex = scanner.scan(Terminals::HEX)
476
+ # Append any decimal values
477
+ alt << "%d" + deces.join(".") unless deces.empty?
478
+ deces = []
479
+
480
+ if in_range
481
+ # Add "." sequences for any previous hexes
482
+ alt << "%x" + hexes[0..-2].join(".") if hexes.length > 1
483
+ alt << "%x#{hexes.last}-#{hex[2..-1]}"
484
+ in_range, hexes = false, []
485
+ else
486
+ hexes << hex[2..-1]
487
+ end
488
+ elsif dec = scanner.scan(Terminals::R_CHAR)
489
+ # Append any hexadecimal values
490
+ alt << "%x" + hexes.join(".") unless hexes.empty?
491
+ hexes = []
492
+
493
+ if in_range
494
+ # Add "." sequences for any previous hexes
495
+ alt << "%d" + deces[0..-2].join(".") if deces.length > 1
496
+ alt << "%d#{deces.last}-#{dec.codepoints.first}"
497
+ in_range, deces = false, []
498
+ else
499
+ deces << dec.codepoints.first.to_s
500
+ end
501
+ end
502
+
503
+ in_range = true if scanner.scan(/\-/)
504
+ end
505
+
506
+ deces << '45' if o_dash
507
+
508
+ # Append hexes and deces as "." sequences (should be only one)
509
+ alt << "%d" + deces.join(".") unless deces.empty?
510
+ alt << "%x" + hexes.join(".") unless hexes.empty?
511
+
512
+ # FIXME: HTML abbreviations?
513
+ if alt.length == 2
514
+ # Just return the range or enum
515
+ alt.last
516
+ else
517
+ # Return the alt, which will be further formatted
518
+ alt
519
+ end
520
+ end
521
+
522
+ def escape_abnf_hex(u)
523
+ fmt = case u.ord
524
+ when 0x0000..0x00ff then "%02X"
525
+ when 0x0100..0xffff then "%04X"
526
+ else "%08X"
527
+ end
528
+ char = "%x" + (fmt % u.ord)
529
+ if @options[:html]
530
+ if u.ord <= 0x20
531
+ char = %(<abbr title="#{ASCII_ESCAPE_NAMES[u.ord]}">#{char}</abbr>)
532
+ elsif u.ord <= 0x7F
533
+ char = %(<abbr title="ascii '#{u}'">#{char}</abbr>)
534
+ elsif u.ord == 0x7F
535
+ char = %(<abbr title="delete">#{char}</abbr>)
536
+ elsif u.ord <= 0xFF
537
+ char = %(<abbr title="extended ascii '#{u}'">#{char}</abbr>)
538
+ else
539
+ char = %(<abbr title="unicode '#{u}'">#{char}</abbr>)
540
+ end
541
+ %(<code class="grammar-char-escape">#{char}</code>)
542
+ else
543
+ char
544
+ end
545
+ end
546
+
547
+ ##
548
+ # ISO EBNF Formatters
549
+ ##
550
+
551
+ # Format the expression part of a rule
552
+ def format_isoebnf(expr, sep: nil, embedded: false)
553
+ return (@options[:html] ? %(<a href="#grammar-production-#{expr}">#{expr}</a>) : expr.to_s) if expr.is_a?(Symbol)
554
+ if expr.is_a?(String)
555
+ expr = expr[2..-1].hex.chr if expr =~ /\A#x\h+/
556
+ expr.chars.each do |c|
557
+ raise RangeError, "cannot format #{expr.inspect} as an ISO EBNF String: #{c.inspect} is out of range" unless
558
+ ISOEBNF::TERMINAL_CHARACTER.match?(c)
559
+ end
560
+ if expr =~ /"/
561
+ return (@options[:html] ? %('<code class="grammar-literal">#{expr}</code>') : %('#{expr}'))
562
+ else
563
+ return (@options[:html] ? %("<code class="grammar-literal">#{expr}</code>") : %("#{expr}"))
564
+ end
565
+ end
566
+ parts = {
567
+ alt: (@options[:html] ? "<code>|</code> " : "| "),
568
+ diff: (@options[:html] ? "<code>-</code> " : "- "),
569
+ }
570
+ lparen = (@options[:html] ? "<code>(</code> " : "(")
571
+ rparen = (@options[:html] ? "<code>)</code> " : ")")
572
+
573
+ case expr.first
574
+ when :istr
575
+ # Looses fidelity, but, oh well ...
576
+ format_isoebnf(expr.last, embedded: true)
577
+ when :alt, :diff
578
+ this_sep = (sep ? sep : " ") + parts[expr.first.to_sym]
579
+ res = expr[1..-1].map {|e| format_isoebnf(e, embedded: true)}.join(this_sep)
580
+ embedded ? (lparen + res + rparen) : res
581
+ when :opt
582
+ r = format_isoebnf(expr[1], embedded: true)
583
+ "[#{r}]"
584
+ when :star
585
+ r = format_isoebnf(expr[1], embedded: true)
586
+ "{#{r}}"
587
+ when :plus
588
+ r = format_isoebnf(expr[1], embedded: true)
589
+ "#{r}, {#{r}}"
590
+ when :hex
591
+ format_isoebnf(expr[1], embedded: true)
592
+ when :range
593
+ res = format_isoebnf_range(expr.last)
594
+ res.is_a?(Array) ?
595
+ format_isoebnf(res, embedded: true) :
596
+ res
597
+ when :seq
598
+ this_sep = "," + (sep ? sep : " ")
599
+ res = expr[1..-1].map do |e|
600
+ format_isoebnf(e, embedded: true)
601
+ end.join(this_sep)
602
+ embedded ? (lparen + res + rparen) : res
603
+ when :rept
604
+ # Expand repetition
605
+ min, max, value = expr[1..-1]
606
+ if min == 0 && max == 1
607
+ format_isoebnf([:opt, value], sep: sep, embedded: embedded)
608
+ elsif min == 0 && max == '*'
609
+ format_isoebnf([:star, value], sep: sep, embedded: embedded)
610
+ elsif min == 1 && max == '*'
611
+ format_isoebnf([:plus, value], sep: sep, embedded: embedded)
612
+ else
613
+ val2 = [:seq]
614
+ while min > 0
615
+ val2 << value
616
+ min -= 1
617
+ max -= 1 unless max == '*'
618
+ end
619
+ if max == '*'
620
+ val2 << [:star, value]
621
+ else
622
+ opt = nil
623
+ while max > 0
624
+ opt = [:opt, opt ? [:seq, value, opt] : value]
625
+ max -= 1
626
+ end
627
+ val2 << opt if opt
628
+ end
629
+ format_isoebnf(val2, sep: sep, embedded: embedded)
630
+ end
631
+ else
632
+ raise "Unknown operator: #{expr.first}"
633
+ end
634
+ end
635
+
636
+ # Format a range
637
+ # Range is formatted as a aliteration of characters
638
+ def format_isoebnf_range(string)
639
+ chars = []
640
+ o_dash = false
641
+
642
+ raise RangeError, "cannot format #{string.inspect} an ABNF range" if string.start_with?('^')
643
+
644
+ if string.end_with?('-')
645
+ o_dash = true
646
+ string = string[0..-2]
647
+ end
648
+
649
+ scanner = StringScanner.new(string)
650
+ in_range = false
651
+ # Build chars from different ranges/enums
652
+ while !scanner.eos?
653
+ char = if hex = scanner.scan(Terminals::HEX)
654
+ hex[2..-1].hex.ord.char(Encoding::UTF_8)
655
+ else scanner.scan(Terminals::R_CHAR)
656
+ end
657
+ raise RangeError, "cannot format #{string.inspect} as an ISO EBNF Aliteration: #{char.inspect} is out of range" unless
658
+ char && ISOEBNF::TERMINAL_CHARACTER.match?(char)
659
+
660
+ if in_range
661
+ # calculate characters from chars.last to this char
662
+ raise RangeError, "cannot format #{string.inspect} as an ISO EBNF Aliteration" unless chars.last < char
663
+ chars.concat (chars.last..char).to_a[1..-1]
664
+ in_range = false
665
+ else
666
+ chars << char
667
+ end
668
+
669
+ in_range = true if scanner.scan(/\-/)
670
+ end
671
+
672
+ chars << '-' if o_dash
673
+
674
+ # Possibly only a single character (no character?)
675
+ chars.length == 1 ? chars.last.inspect : chars.unshift(:alt)
676
+ end
677
+
678
+ ERB_DESC = %q(
679
+ <table class="grammar">
680
+ <tbody id="grammar-productions" class="<%= @format %>">
681
+ <% for rule in @rules %>
682
+ <tr<%= %{ id="grammar-production-#{rule.sym}"} unless %w(=/ |).include?(rule.assign)%>>
683
+ <% if rule.id %>
684
+ <td><%= rule.id %></td>
685
+ <% end %>
686
+ <td><code><%== rule.sym %></code></td>
687
+ <td><%= rule.assign %></td>
688
+ <td><%= rule.formatted %></td>
689
+ </tr>
690
+ <% end %>
691
+ </tbody>
692
+ </table>
221
693
  ).gsub(/^ /, '')
222
694
  end
223
695
  end