ebnf 1.1.1 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. checksums.yaml +5 -5
  2. data/README.md +218 -196
  3. data/UNLICENSE +1 -1
  4. data/VERSION +1 -1
  5. data/bin/ebnf +40 -21
  6. data/etc/abnf-core.ebnf +52 -0
  7. data/etc/abnf.abnf +121 -0
  8. data/etc/abnf.ebnf +124 -0
  9. data/etc/abnf.sxp +45 -0
  10. data/etc/doap.ttl +13 -12
  11. data/etc/ebnf.ebnf +21 -33
  12. data/etc/ebnf.html +171 -160
  13. data/etc/{ebnf.rb → ebnf.ll1.rb} +30 -107
  14. data/etc/ebnf.ll1.sxp +182 -183
  15. data/etc/ebnf.peg.rb +90 -0
  16. data/etc/ebnf.peg.sxp +84 -0
  17. data/etc/ebnf.sxp +40 -41
  18. data/etc/iso-ebnf.ebnf +140 -0
  19. data/etc/iso-ebnf.isoebnf +138 -0
  20. data/etc/iso-ebnf.sxp +65 -0
  21. data/etc/sparql.ebnf +4 -4
  22. data/etc/sparql.html +1603 -1751
  23. data/etc/sparql.ll1.sxp +7372 -7372
  24. data/etc/sparql.peg.rb +532 -0
  25. data/etc/sparql.peg.sxp +597 -0
  26. data/etc/sparql.sxp +363 -362
  27. data/etc/turtle.ebnf +3 -3
  28. data/etc/turtle.html +465 -517
  29. data/etc/{turtle.rb → turtle.ll1.rb} +3 -4
  30. data/etc/turtle.ll1.sxp +425 -425
  31. data/etc/turtle.peg.rb +182 -0
  32. data/etc/turtle.peg.sxp +199 -0
  33. data/etc/turtle.sxp +103 -101
  34. data/lib/ebnf.rb +7 -2
  35. data/lib/ebnf/abnf.rb +301 -0
  36. data/lib/ebnf/abnf/core.rb +23 -0
  37. data/lib/ebnf/abnf/meta.rb +111 -0
  38. data/lib/ebnf/base.rb +128 -87
  39. data/lib/ebnf/bnf.rb +1 -26
  40. data/lib/ebnf/ebnf/meta.rb +90 -0
  41. data/lib/ebnf/isoebnf.rb +229 -0
  42. data/lib/ebnf/isoebnf/meta.rb +75 -0
  43. data/lib/ebnf/ll1.rb +140 -8
  44. data/lib/ebnf/ll1/lexer.rb +37 -32
  45. data/lib/ebnf/ll1/parser.rb +113 -73
  46. data/lib/ebnf/ll1/scanner.rb +84 -51
  47. data/lib/ebnf/native.rb +320 -0
  48. data/lib/ebnf/parser.rb +285 -302
  49. data/lib/ebnf/peg.rb +39 -0
  50. data/lib/ebnf/peg/parser.rb +554 -0
  51. data/lib/ebnf/peg/rule.rb +241 -0
  52. data/lib/ebnf/rule.rb +453 -163
  53. data/lib/ebnf/terminals.rb +21 -0
  54. data/lib/ebnf/writer.rb +554 -85
  55. metadata +98 -20
  56. data/etc/sparql.rb +0 -45773
@@ -0,0 +1,21 @@
1
+ # encoding: utf-8
2
+ # Terminal definitions for the EBNF grammar
3
+ module EBNF::Terminals
4
+ SYMBOL_BASE = %r(\b[a-zA-Z0-9_\.]+\b)u.freeze
5
+ SYMBOL = %r(#{SYMBOL_BASE}(?!\s*::=))u.freeze
6
+ HEX = %r(\#x\h+)u.freeze
7
+ CHAR = %r([\u0009\u000A\u000D\u0020-\uD7FF\u{10000}-\u{10FFFF}])u.freeze
8
+ R_CHAR = %r([\u0009\u000A\u000D\u0020-\u002C\u002E-\u005C\u005E-\uD7FF\u{10000}-\u{10FFFF}])u.freeze
9
+ RANGE = %r(\[(?:(?:#{R_CHAR}\-#{R_CHAR})|(?:#{HEX}\-#{HEX})|#{R_CHAR}|#{HEX})+-?\](?!\s+#{SYMBOL_BASE}\s*::=))u.freeze
10
+ LHS = %r((?:\[#{SYMBOL_BASE}\])?\s*#{SYMBOL_BASE}\s*::=)u.freeze
11
+ O_RANGE = %r(\[\^(?:(?:#{R_CHAR}\-#{R_CHAR})|(?:#{HEX}\-#{HEX}|#{R_CHAR}|#{HEX}))+-?\])u.freeze
12
+ STRING1 = %r("[\u0009\u000A\u000D\u0020\u0021\u0023-\uD7FF\u{10000}-\u{10FFFF}]*")u.freeze
13
+ STRING2 = %r('[\u0009\u000A\u000D\u0020-\u0026\u0028-\uD7FF\u{10000}-\u{10FFFF}]*')u.freeze
14
+ POSTFIX = %r([?*+])u.freeze
15
+ PASS = %r((
16
+ \s
17
+ | (?:(?:\#[^x]|//)[^\n\r]*)
18
+ | (?:/\*(?:(?:\*[^/])|[^*])*\*/)
19
+ | (?:\(\*(?:(?:\*[^\)])|[^*])*\*\))
20
+ )+)xmu.freeze
21
+ end
@@ -1,6 +1,7 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
  require 'rdf'
3
3
  require 'strscan' unless defined?(StringScanner)
4
+ require "ostruct"
4
5
 
5
6
  ##
6
7
  # Serialize ruleset back to EBNF
@@ -8,15 +9,53 @@ module EBNF
8
9
  class Writer
9
10
  LINE_LENGTH = 80
10
11
 
12
+ # ASCII escape names
13
+ ASCII_ESCAPE_NAMES = [
14
+ "null", #x00
15
+ "start of heading", #x01
16
+ "start of text", #x02
17
+ "end of text", #x03
18
+ "end of transmission", #x04
19
+ "enquiry", #x05
20
+ "acknowledge", #x06
21
+ "bell", #x07
22
+ "backspace", #x08
23
+ "horizontal tab", #x09
24
+ "new line", #x0A
25
+ "vertical tab", #x0B
26
+ "form feed", #x0C
27
+ "carriage return", #x0D
28
+ "shift out", #x0E
29
+ "shift in", #x0F
30
+ "data link escape", #x10
31
+ "device control 1", #x11
32
+ "device control 2", #x12
33
+ "device control 3", #x13
34
+ "device control 4", #x14
35
+ "negative acknowledge", #x15
36
+ "synchronous idle", #x16
37
+ "end of trans. block", #x17
38
+ "cancel", #x18
39
+ "end of medium", #x19
40
+ "substitute", #x1A
41
+ "escape", #x1B
42
+ "file separator", #x1C
43
+ "group separator", #x1D
44
+ "record separator", #x1E
45
+ "unit separator", #x1F
46
+ "space" #x20
47
+ ]
48
+
11
49
  ##
12
50
  # Format rules to a String
13
51
  #
14
52
  # @param [Array<Rule>] rules
53
+ # @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
15
54
  # @return [Object]
16
- def self.string(*rules)
55
+ def self.string(*rules, format: :ebnf)
17
56
  require 'stringio' unless defined?(StringIO)
18
57
  buf = StringIO.new
19
- write(buf, *rules)
58
+ write(buf, *rules, format: format)
20
59
  buf.string
21
60
  end
22
61
 
@@ -24,9 +63,10 @@ module EBNF
24
63
  # Format rules to $stdout
25
64
  #
26
65
  # @param [Array<Rule>] rules
66
+ # @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
27
67
  # @return [Object]
28
- def self.print(*rules)
29
- write($stdout, *rules)
68
+ def self.print(*rules, format: :ebnf)
69
+ write($stdout, *rules, format: format)
30
70
  end
31
71
 
32
72
  ##
@@ -34,92 +74,153 @@ module EBNF
34
74
  #
35
75
  # @param [Object] out
36
76
  # @param [Array<Rule>] rules
77
+ # @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
37
78
  # @return [Object]
38
- def self.write(out, *rules)
39
- Writer.new(rules, out: out)
79
+ def self.write(out, *rules, format: :ebnf)
80
+ Writer.new(rules, out: out, format: format)
40
81
  end
41
82
 
42
83
  ##
43
84
  # Write formatted rules to an IO like object as HTML
44
85
  #
45
86
  # @param [Array<Rule>] rules
87
+ # @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
46
88
  # @return [Object]
47
- def self.html(*rules)
89
+ def self.html(*rules, format: :ebnf)
48
90
  require 'stringio' unless defined?(StringIO)
49
91
  buf = StringIO.new
50
- Writer.new(rules, out: buf, html: true)
92
+ Writer.new(rules, out: buf, html: true, format: format)
51
93
  buf.string
52
94
  end
53
95
 
54
96
  ##
55
97
  # @param [Array<Rule>] rules
56
98
  # @param [Hash{Symbol => Object}] options
57
- # @option options [Symbol] :format
58
- # @option options [#write] :out ($stdout)
59
- # @option options [Boolean] :html (false)
60
- # Format as HTML
61
- def initialize(rules, options = {})
62
- @options = options.dup
63
- out = options.fetch(:out, $stdio)
64
- #fmt = options.fetch(:format, :ebnf)
99
+ # @param [#write] out ($stdout)
100
+ # @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
101
+ # @option options [Symbol] format
102
+ # @option options [Boolean] html (false)
103
+ def initialize(rules, out: $stdout, html: false, format: :ebnf, **options)
104
+ @options = options.merge(html: html)
105
+ return if rules.empty?
65
106
 
66
107
  # Determine max LHS length
108
+ format_meth = "format_#{format}".to_sym
67
109
  max_id = rules.max_by {|r| r.id.to_s.length}.id.to_s.length
68
110
  max_sym = rules.max_by {|r| r.sym.to_s.length}.sym.to_s.length
69
- lhs_length = max_sym + 3
70
- lhs_fmt = "%<sym>-#{max_sym}s ::= "
71
- if max_id > 0
111
+ lhs_length = max_sym + 1
112
+ lhs_fmt = case format
113
+ when :abnf then "%<sym>-#{max_sym}s = "
114
+ when :ebnf then "%<sym>-#{max_sym}s ::= "
115
+ when :isoebnf then "%<sym>-#{max_sym}s = "
116
+ end
117
+ if format == :ebnf && max_id > 0
72
118
  lhs_fmt = "%<id>-#{max_id+2}s " + lhs_fmt
73
119
  lhs_length += max_id + 3
74
120
  end
75
121
  rhs_length = LINE_LENGTH - lhs_length
76
122
 
77
- if @options[:html]
123
+ if html
78
124
  # Output as formatted HTML
79
125
  begin
80
- require 'haml'
81
- html = Haml::Engine.new(HAML_DESC).render(self, rules: rules) do |rule|
82
- formatted_expr = format(rule.expr)
83
- formatted_expr.length > rhs_length ? format(rule.expr, "\n") : formatted_expr
84
- end
85
- out.write html
126
+ require 'erubis'
127
+ eruby = Erubis::Eruby.new(ERB_DESC)
128
+ formatted_rules = rules.map do |rule|
129
+ if rule.kind == :terminals || rule.kind == :pass
130
+ OpenStruct.new(id: ("@#{rule.kind}"),
131
+ sym: nil,
132
+ assign: nil,
133
+ formatted: ("<strong>Productions for terminals</strong>" if rule.kind == :terminals))
134
+ else
135
+ formatted_expr = self.send(format_meth, rule.expr)
136
+ # Measure text without markup
137
+ formatted_expr_text = formatted_expr.gsub(%r{</?\w+[^>]*>}, '')
138
+ if formatted_expr_text.length > rhs_length && (format != :abnf || rule.alt?)
139
+ lines = []
140
+ # Can only reasonably split apart alts
141
+ self.send(format_meth, rule.expr, sep: "--rule-extensions--").
142
+ split(/\s*--rule-extensions--\s*/).each_with_index do |formatted, ndx|
143
+ assign = case format
144
+ when :ebnf
145
+ formatted.sub!(%r{\s*<code>\|</code>\s*}, '')
146
+ (ndx > 0 ? (rule.alt? ? '|' : '') : '::=')
147
+ when :abnf
148
+ formatted.sub!(%r{\s*<code>/</code>\s*}, '')
149
+ (ndx > 0 ? '=/' : '=')
150
+ else
151
+ formatted.sub!(%r{\s*<code>\|</code>\s*}, '')
152
+ (ndx > 0 ? (rule.alt? ? '|' : '') : '=')
153
+ end
154
+ lines << OpenStruct.new(id: ("[#{rule.id}]" if rule.id),
155
+ sym: (rule.sym if ndx == 0 || format == :abnf),
156
+ assign: assign,
157
+ formatted: formatted)
158
+ end
159
+ if format == :isoebnf
160
+ lines << OpenStruct.new(assign: ';')
161
+ end
162
+ lines
163
+ else
164
+ OpenStruct.new(id: ("[#{rule.id}]" if rule.id),
165
+ sym: rule.sym,
166
+ assign: (format == :ebnf ? '::=' : '='),
167
+ formatted: (formatted_expr + (format == :isoebnf ? ' ;' : '')))
168
+ end
169
+ end
170
+ end.flatten
171
+ out.write eruby.evaluate(format: format, rules: formatted_rules)
86
172
  return
87
173
  rescue LoadError
88
- $stderr.puts "Generating HTML requires haml gem to be loaded"
174
+ $stderr.puts "Generating HTML requires erubis gem to be loaded"
89
175
  end
90
176
  end
91
177
 
92
178
  # Format each rule, considering the available rhs size
93
179
  rules.each do |rule|
94
180
  buffer = if rule.pass?
95
- "%-#{lhs_length-2}s" % "@pass"
181
+ "\n%-#{lhs_length-2}s " % "@pass"
182
+ elsif rule.kind == :terminals
183
+ "\n%-#{lhs_length-2}s" % "@terminals"
96
184
  else
97
185
  lhs_fmt % {id: "[#{rule.id}]", sym: rule.sym}
98
186
  end
99
- formatted_expr = format(rule.expr)
100
- if formatted_expr.length > rhs_length
101
- buffer << format(rule.expr, ("\n" + " " * lhs_length))
187
+ formatted_expr = self.send(format_meth, rule.expr)
188
+ if formatted_expr.length > rhs_length && (format != :abnf || rule.alt?)
189
+ if format == :abnf
190
+ # No whitespace, use =/
191
+ self.send(format_meth, rule.expr, sep: "--rule-extensions--").
192
+ split(/\s*--rule-extensions--\s*/).each_with_index do |formatted, ndx|
193
+ if ndx > 0
194
+ buffer << "\n" + lhs_fmt.sub('= ', '=/') % {id: "[#{rule.id}]", sym: rule.sym}
195
+ end
196
+ buffer << formatted.sub(/\s*\/\s*/, '')
197
+ end
198
+ else
199
+ # Space out past "= "
200
+ buffer << self.send(format_meth, rule.expr, sep: ("\n" + " " * (lhs_length + (rule.alt? ? 2 : 4) - (format == :ebnf ? 0 : 2))))
201
+ buffer << ("\n" + " " * (lhs_length) + ';') if format == :isoebnf
202
+ end
102
203
  else
103
- buffer << formatted_expr
204
+ buffer << formatted_expr + (format == :isoebnf ? ' ;' : '')
104
205
  end
206
+ buffer << "\n\n" if [:terminals, :pass].include?(rule.kind)
105
207
  out.puts(buffer)
106
208
  end
107
209
  end
108
210
 
109
211
  protected
212
+
213
+ ##
214
+ # W3C EBNF Formatters
215
+ ##
216
+
110
217
  # Format the expression part of a rule
111
- def format(expr, sep = nil)
218
+ def format_ebnf(expr, sep: nil, embedded: false)
112
219
  return (@options[:html] ? %(<a href="#grammar-production-#{expr}">#{expr}</a>) : expr.to_s) if expr.is_a?(Symbol)
113
220
  if expr.is_a?(String)
114
- if expr.length == 1
115
- return format_char(expr)
116
- elsif expr =~ /\A#x\h+/
117
- return (@options[:html] ? %(<code class="grammar-char-escape">#{expr}</code>) : expr)
118
- elsif expr =~ /"/
119
- return (@options[:html] ? %('<code class="grammar-literal">#{escape(expr, "'")}</code>') : %('#{escape(expr, "'")}'))
120
- else
121
- return (@options[:html] ? %("<code class="grammar-literal">#{escape(expr, '"')}</code>") : %("#{escape(expr, '"')}"))
122
- end
221
+ return expr.length == 1 ?
222
+ format_ebnf_char(expr) :
223
+ format_ebnf_string(expr, expr.include?('"') ? "'" : '"')
123
224
  end
124
225
  parts = {
125
226
  alt: (@options[:html] ? "<code>|</code> " : "| "),
@@ -132,40 +233,75 @@ module EBNF
132
233
  rparen = (@options[:html] ? "<code>)</code> " : ")")
133
234
 
134
235
  case expr.first
236
+ when :istr
237
+ # Looses fidelity, but, oh well ...
238
+ format_ebnf(expr.last, embedded: true)
135
239
  when :alt, :diff
136
240
  this_sep = (sep ? sep : " ") + parts[expr.first.to_sym]
137
- expr[1..-1].map {|e| format(e)}.join(this_sep)
241
+ res = expr[1..-1].map {|e| format_ebnf(e, embedded: true)}.join(this_sep)
242
+ embedded ? (lparen + res + rparen) : res
138
243
  when :star, :plus, :opt
139
- raise "Expected star expression to have a single operand" unless expr.length == 2
140
244
  char = parts[expr.first.to_sym]
141
- r = format(expr[1])
142
- (r.start_with?("(") || Array(expr[1]).length == 1) ? "#{r}#{char}" : "(#{r})#{char}"
245
+ r = format_ebnf(expr[1], embedded: true)
246
+ "#{r}#{char}"
143
247
  when :hex
144
- (@options[:html] ? %(<code class="grammar-char-escape">#{expr.last}</code>) : expr.last)
248
+ escape_ebnf_hex(expr.last[2..-1].hex.chr(Encoding::UTF_8))
145
249
  when :range
146
- format_range(expr.last)
250
+ format_ebnf_range(expr.last)
147
251
  when :seq
148
252
  this_sep = (sep ? sep : " ")
149
- expr[1..-1].map {|e| r = format(e); Array(e).length > 2 ? "#{lparen}#{r}#{rparen}" : r}.join(this_sep)
253
+ res = expr[1..-1].map do |e|
254
+ format_ebnf(e, embedded: true)
255
+ end.join(this_sep)
256
+ embedded ? (lparen + res + rparen) : res
257
+ when :rept
258
+ # Expand repetition
259
+ min, max, value = expr[1..-1]
260
+ if min == 0 && max == 1
261
+ format_ebnf([:opt, value], sep: sep, embedded: embedded)
262
+ elsif min == 0 && max == '*'
263
+ format_ebnf([:star, value], sep: sep, embedded: embedded)
264
+ elsif min == 1 && max == '*'
265
+ format_ebnf([:plus, value], sep: sep, embedded: embedded)
266
+ else
267
+ val2 = [:seq]
268
+ while min > 0
269
+ val2 << value
270
+ min -= 1
271
+ max -= 1 unless max == '*'
272
+ end
273
+ if max == '*'
274
+ val2 << [:star, value]
275
+ else
276
+ opt = nil
277
+ while max > 0
278
+ opt = [:opt, opt ? [:seq, value, opt] : value]
279
+ max -= 1
280
+ end
281
+ val2 << opt if opt
282
+ end
283
+ format_ebnf(val2, sep: sep, embedded: embedded)
284
+ end
150
285
  else
151
286
  raise "Unknown operator: #{expr.first}"
152
287
  end
153
288
  end
154
289
 
155
290
  # Format a single-character string, prefering hex for non-main ASCII
156
- def format_char(c)
291
+ def format_ebnf_char(c)
157
292
  case c.ord
158
- when 0x22 then (@options[:html] ? %('<code class="grammar-literal">"</code>') : %{'"'})
159
- when (0x23..0x7e) then (@options[:html] ? %("<code class="grammar-literal">#{c}</code>") : %{"#{c}"})
160
- else (@options[:html] ? %(<code class="grammar-char-escape">#{escape_hex(c)}</code>) : escape_hex(c))
293
+ when (0x21) then (@options[:html] ? %("<code class="grammar-literal">#{c}</code>") : %{"#{c}"})
294
+ when 0x22 then (@options[:html] ? %('<code class="grammar-literal">"</code>') : %{'"'})
295
+ when (0x23..0x7e) then (@options[:html] ? %("<code class="grammar-literal">#{c}</code>") : %{"#{c}"})
296
+ when (0x80..0xFFFD) then (@options[:html] ? %("<code class="grammar-literal">#{c}</code>") : %{"#{c}"})
297
+ else escape_ebnf_hex(c)
161
298
  end
162
299
  end
163
300
 
164
301
  # Format a range
165
- def format_range(string)
302
+ def format_ebnf_range(string)
166
303
  lbrac = (@options[:html] ? "<code>[</code> " : "[")
167
304
  rbrac = (@options[:html] ? "<code>]</code> " : "]")
168
- dash = (@options[:html] ? "<code>-</code> " : "-")
169
305
 
170
306
  buffer = lbrac
171
307
  s = StringScanner.new(string)
@@ -174,53 +310,386 @@ module EBNF
174
310
  when s.scan(/\A[!"\u0024-\u007e]+/)
175
311
  buffer << (@options[:html] ? %(<code class="grammar-literal">#{s.matched}</code>) : s.matched)
176
312
  when s.scan(/\A#x\h+/)
177
- buffer << (@options[:html] ? %(<code class="grammar-char-escape">#{s.matched}</code>) : s.matched)
178
- when s.scan(/\A-/)
179
- buffer << dash
313
+ buffer << escape_ebnf_hex(s.matched[2..-1].hex.chr(Encoding::UTF_8))
180
314
  else
181
- buffer << (@options[:html] ? %(<code class="grammar-char-escape">#{escape_hex(s.getch)}</code>) : escape_hex(s.getch))
315
+ buffer << escape_ebnf_hex(s.getch)
182
316
  end
183
317
  end
184
318
  buffer + rbrac
185
319
  end
186
320
 
187
321
  # Escape a string, using as many UTF-8 characters as possible
188
- def escape(string, quote = '"')
189
- buffer = ""
322
+ def format_ebnf_string(string, quote = '"')
190
323
  string.each_char do |c|
191
- buffer << case (u = c.ord)
192
- when (0x00..0x1f) then "#x%02X" % u
193
- when quote.ord then "#x%02X" % u
194
- else c
324
+ case c.ord
325
+ when 0x00..0x19, quote.ord
326
+ raise RangeError, "cannot format #{string.inspect} as an EBNF String: #{c.inspect} is out of range" unless
327
+ ISOEBNF::TERMINAL_CHARACTER.match?(c)
195
328
  end
196
329
  end
197
- buffer
330
+
331
+ "#{quote}#{string}#{quote}"
198
332
  end
199
333
 
200
- def escape_hex(u)
334
+ def escape_ebnf_hex(u)
201
335
  fmt = case u.ord
336
+ when 0x00..0x20 then "#x%02X"
202
337
  when 0x0000..0x00ff then "#x%02X"
203
338
  when 0x0100..0xffff then "#x%04X"
204
339
  else "#x%08X"
205
340
  end
206
- sprintf(fmt, u.ord)
207
- end
208
-
209
- HAML_DESC = %q(
210
- %table.grammar
211
- %tbody#grammar-productions
212
- - rules.each do |rule|
213
- %tr{id: "grammar-production-#{rule.sym}"}
214
- - if rule.pass?
215
- %td{colspan: 3}
216
- %code<="@pass"
217
- - else
218
- %td<= "[#{rule.id}]"
219
- %td<
220
- %code<= rule.sym
221
- %td<= "::="
222
- %td
223
- != yield rule
341
+ char = fmt % u.ord
342
+ if @options[:html]
343
+ if u.ord <= 0x20
344
+ char = %(<abbr title="#{ASCII_ESCAPE_NAMES[u.ord]}">#{char}</abbr>)
345
+ elsif u.ord < 0x7F
346
+ char = %(<abbr title="ascii '#{u}'">#{char}</abbr>)
347
+ elsif u.ord == 0x7F
348
+ char = %(<abbr title="delete">#{char}</abbr>)
349
+ elsif u.ord <= 0xFF
350
+ char = %(<abbr title="extended ascii '#{u}'">#{char}</abbr>)
351
+ else
352
+ char = %(<abbr title="unicode '#{u}'">#{char}</abbr>)
353
+ end
354
+ %(<code class="grammar-char-escape">#{char}</code>)
355
+ else
356
+ char
357
+ end
358
+ end
359
+
360
+ ##
361
+ # ABNF Formatters
362
+ ##
363
+
364
+ # Format the expression part of a rule
365
+ def format_abnf(expr, sep: nil, embedded: false, sensitive: true)
366
+ return (@options[:html] ? %(<a href="#grammar-production-#{expr}">#{expr}</a>) : expr.to_s) if expr.is_a?(Symbol)
367
+ if expr.is_a?(String)
368
+ if expr.length == 1
369
+ return format_abnf_char(expr)
370
+ elsif expr.start_with?('%')
371
+ # Already encoded
372
+ return expr
373
+ elsif expr =~ /"/
374
+ # Split into segments
375
+ segments = expr.split('"')
376
+
377
+ return format_abnf_char(expr) if segments.empty?
378
+
379
+ seq = segments.inject([]) {|memo, s| memo.concat([[:hex, "#x22"], s])}[1..-1]
380
+ seq.unshift(:seq)
381
+ return format_abnf(seq, sep: nil, embedded: false)
382
+ else
383
+ return (@options[:html] ? %("<code class="grammar-literal">#{'%s' if sensitive}#{expr}</code>") : %(#{'%s' if sensitive}"#{expr}"))
384
+ end
385
+ end
386
+ parts = {
387
+ alt: (@options[:html] ? "<code>/</code> " : "/ "),
388
+ star: (@options[:html] ? "<code>*</code> " : "*"),
389
+ plus: (@options[:html] ? "<code>+</code> " : "1*"),
390
+ opt: (@options[:html] ? "<code>?</code> " : "?")
391
+ }
392
+ lbrac = (@options[:html] ? "<code>[</code> " : "[")
393
+ rbrac = (@options[:html] ? "<code>]</code> " : "]")
394
+ lparen = (@options[:html] ? "<code>(</code> " : "(")
395
+ rparen = (@options[:html] ? "<code>)</code> " : ")")
396
+
397
+ case expr.first
398
+ when :istr
399
+ # FIXME: if string part is segmented, need to do something different
400
+ format_abnf(expr.last, embedded: true, sensitive: false)
401
+ when :alt
402
+ this_sep = (sep ? sep : " ") + parts[expr.first.to_sym]
403
+ res = expr[1..-1].map {|e| format_abnf(e, embedded: true)}.join(this_sep)
404
+ embedded ? (lparen + res + rparen) : res
405
+ when :diff
406
+ raise RangeError, "ABNF does not support the diff operator"
407
+ when :opt
408
+ char = parts[expr.first.to_sym]
409
+ r = format_abnf(expr[1], embedded: true)
410
+ "#{lbrac}#{r}#{rbrac}"
411
+ when :plus, :star
412
+ char = parts[expr.first.to_sym]
413
+ r = format_abnf(expr[1], embedded: true)
414
+ "#{char}#{r}"
415
+ when :hex
416
+ escape_abnf_hex(expr.last[2..-1].hex.chr)
417
+ when :range
418
+ # Returns an [:alt] or [:not [:alt]] if composed of multiple sequences
419
+ # Note: ABNF does not support the `not` operator
420
+ res = format_abnf_range(expr.last)
421
+ res.is_a?(Array) ?
422
+ format_abnf(res, embedded: true) :
423
+ res
424
+ when :seq
425
+ this_sep = (sep ? sep : " ")
426
+ res = expr[1..-1].map do |e|
427
+ format_abnf(e, embedded: true)
428
+ end.join(this_sep)
429
+ embedded ? (lparen + res + rparen) : res
430
+ when :rept
431
+ # Expand repetition
432
+ min, max, value = expr[1..-1]
433
+ r = format_abnf(value, embedded: true)
434
+ if min == max
435
+ "#{min}#{r}"
436
+ elsif min == 0 && max == '*'
437
+ "#{parts[:star]}#{r}"
438
+ elsif min > 0 && max == '*'
439
+ "#{min}#{parts[:star]}#{r}"
440
+ else
441
+ "#{min}#{parts[:star]}#{max}#{r}"
442
+ end
443
+ else
444
+ raise "Unknown operator: #{expr.first}"
445
+ end
446
+ end
447
+
448
+ # Format a single-character string, prefering hex for non-main ASCII
449
+ def format_abnf_char(c)
450
+ if /[\x20-\x21\x23-\x7E]/.match?(c)
451
+ c.inspect
452
+ else
453
+ escape_abnf_hex(c)
454
+ end
455
+ end
456
+
457
+ # Format a range
458
+ #
459
+ # Presumes range has already been validated
460
+ def format_abnf_range(string)
461
+ alt, o_dash = [:alt], false
462
+
463
+ raise RangeError, "cannot format #{string.inspect} an ABNF range" if string.start_with?('^')
464
+
465
+ if string.end_with?('-')
466
+ o_dash = true
467
+ string = string[0..-2]
468
+ end
469
+
470
+ scanner = StringScanner.new(string)
471
+ hexes, deces = [], []
472
+ in_range = false
473
+ # Build op (alt) from different ranges/enums
474
+ while !scanner.eos?
475
+ if hex = scanner.scan(Terminals::HEX)
476
+ # Append any decimal values
477
+ alt << "%d" + deces.join(".") unless deces.empty?
478
+ deces = []
479
+
480
+ if in_range
481
+ # Add "." sequences for any previous hexes
482
+ alt << "%x" + hexes[0..-2].join(".") if hexes.length > 1
483
+ alt << "%x#{hexes.last}-#{hex[2..-1]}"
484
+ in_range, hexes = false, []
485
+ else
486
+ hexes << hex[2..-1]
487
+ end
488
+ elsif dec = scanner.scan(Terminals::R_CHAR)
489
+ # Append any hexadecimal values
490
+ alt << "%x" + hexes.join(".") unless hexes.empty?
491
+ hexes = []
492
+
493
+ if in_range
494
+ # Add "." sequences for any previous hexes
495
+ alt << "%d" + deces[0..-2].join(".") if deces.length > 1
496
+ alt << "%d#{deces.last}-#{dec.codepoints.first}"
497
+ in_range, deces = false, []
498
+ else
499
+ deces << dec.codepoints.first.to_s
500
+ end
501
+ end
502
+
503
+ in_range = true if scanner.scan(/\-/)
504
+ end
505
+
506
+ deces << '45' if o_dash
507
+
508
+ # Append hexes and deces as "." sequences (should be only one)
509
+ alt << "%d" + deces.join(".") unless deces.empty?
510
+ alt << "%x" + hexes.join(".") unless hexes.empty?
511
+
512
+ # FIXME: HTML abbreviations?
513
+ if alt.length == 2
514
+ # Just return the range or enum
515
+ alt.last
516
+ else
517
+ # Return the alt, which will be further formatted
518
+ alt
519
+ end
520
+ end
521
+
522
+ def escape_abnf_hex(u)
523
+ fmt = case u.ord
524
+ when 0x0000..0x00ff then "%02X"
525
+ when 0x0100..0xffff then "%04X"
526
+ else "%08X"
527
+ end
528
+ char = "%x" + (fmt % u.ord)
529
+ if @options[:html]
530
+ if u.ord <= 0x20
531
+ char = %(<abbr title="#{ASCII_ESCAPE_NAMES[u.ord]}">#{char}</abbr>)
532
+ elsif u.ord <= 0x7F
533
+ char = %(<abbr title="ascii '#{u}'">#{char}</abbr>)
534
+ elsif u.ord == 0x7F
535
+ char = %(<abbr title="delete">#{char}</abbr>)
536
+ elsif u.ord <= 0xFF
537
+ char = %(<abbr title="extended ascii '#{u}'">#{char}</abbr>)
538
+ else
539
+ char = %(<abbr title="unicode '#{u}'">#{char}</abbr>)
540
+ end
541
+ %(<code class="grammar-char-escape">#{char}</code>)
542
+ else
543
+ char
544
+ end
545
+ end
546
+
547
+ ##
548
+ # ISO EBNF Formatters
549
+ ##
550
+
551
+ # Format the expression part of a rule
552
+ def format_isoebnf(expr, sep: nil, embedded: false)
553
+ return (@options[:html] ? %(<a href="#grammar-production-#{expr}">#{expr}</a>) : expr.to_s) if expr.is_a?(Symbol)
554
+ if expr.is_a?(String)
555
+ expr = expr[2..-1].hex.chr if expr =~ /\A#x\h+/
556
+ expr.chars.each do |c|
557
+ raise RangeError, "cannot format #{expr.inspect} as an ISO EBNF String: #{c.inspect} is out of range" unless
558
+ ISOEBNF::TERMINAL_CHARACTER.match?(c)
559
+ end
560
+ if expr =~ /"/
561
+ return (@options[:html] ? %('<code class="grammar-literal">#{expr}</code>') : %('#{expr}'))
562
+ else
563
+ return (@options[:html] ? %("<code class="grammar-literal">#{expr}</code>") : %("#{expr}"))
564
+ end
565
+ end
566
+ parts = {
567
+ alt: (@options[:html] ? "<code>|</code> " : "| "),
568
+ diff: (@options[:html] ? "<code>-</code> " : "- "),
569
+ }
570
+ lparen = (@options[:html] ? "<code>(</code> " : "(")
571
+ rparen = (@options[:html] ? "<code>)</code> " : ")")
572
+
573
+ case expr.first
574
+ when :istr
575
+ # Looses fidelity, but, oh well ...
576
+ format_isoebnf(expr.last, embedded: true)
577
+ when :alt, :diff
578
+ this_sep = (sep ? sep : " ") + parts[expr.first.to_sym]
579
+ res = expr[1..-1].map {|e| format_isoebnf(e, embedded: true)}.join(this_sep)
580
+ embedded ? (lparen + res + rparen) : res
581
+ when :opt
582
+ r = format_isoebnf(expr[1], embedded: true)
583
+ "[#{r}]"
584
+ when :star
585
+ r = format_isoebnf(expr[1], embedded: true)
586
+ "{#{r}}"
587
+ when :plus
588
+ r = format_isoebnf(expr[1], embedded: true)
589
+ "#{r}, {#{r}}"
590
+ when :hex
591
+ format_isoebnf(expr[1], embedded: true)
592
+ when :range
593
+ res = format_isoebnf_range(expr.last)
594
+ res.is_a?(Array) ?
595
+ format_isoebnf(res, embedded: true) :
596
+ res
597
+ when :seq
598
+ this_sep = "," + (sep ? sep : " ")
599
+ res = expr[1..-1].map do |e|
600
+ format_isoebnf(e, embedded: true)
601
+ end.join(this_sep)
602
+ embedded ? (lparen + res + rparen) : res
603
+ when :rept
604
+ # Expand repetition
605
+ min, max, value = expr[1..-1]
606
+ if min == 0 && max == 1
607
+ format_isoebnf([:opt, value], sep: sep, embedded: embedded)
608
+ elsif min == 0 && max == '*'
609
+ format_isoebnf([:star, value], sep: sep, embedded: embedded)
610
+ elsif min == 1 && max == '*'
611
+ format_isoebnf([:plus, value], sep: sep, embedded: embedded)
612
+ else
613
+ val2 = [:seq]
614
+ while min > 0
615
+ val2 << value
616
+ min -= 1
617
+ max -= 1 unless max == '*'
618
+ end
619
+ if max == '*'
620
+ val2 << [:star, value]
621
+ else
622
+ opt = nil
623
+ while max > 0
624
+ opt = [:opt, opt ? [:seq, value, opt] : value]
625
+ max -= 1
626
+ end
627
+ val2 << opt if opt
628
+ end
629
+ format_isoebnf(val2, sep: sep, embedded: embedded)
630
+ end
631
+ else
632
+ raise "Unknown operator: #{expr.first}"
633
+ end
634
+ end
635
+
636
+ # Format a range
637
+ # Range is formatted as a aliteration of characters
638
+ def format_isoebnf_range(string)
639
+ chars = []
640
+ o_dash = false
641
+
642
+ raise RangeError, "cannot format #{string.inspect} an ABNF range" if string.start_with?('^')
643
+
644
+ if string.end_with?('-')
645
+ o_dash = true
646
+ string = string[0..-2]
647
+ end
648
+
649
+ scanner = StringScanner.new(string)
650
+ in_range = false
651
+ # Build chars from different ranges/enums
652
+ while !scanner.eos?
653
+ char = if hex = scanner.scan(Terminals::HEX)
654
+ hex[2..-1].hex.ord.char(Encoding::UTF_8)
655
+ else scanner.scan(Terminals::R_CHAR)
656
+ end
657
+ raise RangeError, "cannot format #{string.inspect} as an ISO EBNF Aliteration: #{char.inspect} is out of range" unless
658
+ char && ISOEBNF::TERMINAL_CHARACTER.match?(char)
659
+
660
+ if in_range
661
+ # calculate characters from chars.last to this char
662
+ raise RangeError, "cannot format #{string.inspect} as an ISO EBNF Aliteration" unless chars.last < char
663
+ chars.concat (chars.last..char).to_a[1..-1]
664
+ in_range = false
665
+ else
666
+ chars << char
667
+ end
668
+
669
+ in_range = true if scanner.scan(/\-/)
670
+ end
671
+
672
+ chars << '-' if o_dash
673
+
674
+ # Possibly only a single character (no character?)
675
+ chars.length == 1 ? chars.last.inspect : chars.unshift(:alt)
676
+ end
677
+
678
+ ERB_DESC = %q(
679
+ <table class="grammar">
680
+ <tbody id="grammar-productions" class="<%= @format %>">
681
+ <% for rule in @rules %>
682
+ <tr<%= %{ id="grammar-production-#{rule.sym}"} unless %w(=/ |).include?(rule.assign)%>>
683
+ <% if rule.id %>
684
+ <td><%= rule.id %></td>
685
+ <% end %>
686
+ <td><code><%== rule.sym %></code></td>
687
+ <td><%= rule.assign %></td>
688
+ <td><%= rule.formatted %></td>
689
+ </tr>
690
+ <% end %>
691
+ </tbody>
692
+ </table>
224
693
  ).gsub(/^ /, '')
225
694
  end
226
695
  end