ebnf 1.2.0 → 2.1.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (56) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +223 -199
  3. data/UNLICENSE +1 -1
  4. data/VERSION +1 -1
  5. data/bin/ebnf +38 -19
  6. data/etc/abnf-core.ebnf +52 -0
  7. data/etc/abnf.abnf +121 -0
  8. data/etc/abnf.ebnf +124 -0
  9. data/etc/abnf.sxp +45 -0
  10. data/etc/doap.ttl +23 -18
  11. data/etc/ebnf.ebnf +21 -33
  12. data/etc/ebnf.html +76 -160
  13. data/etc/{ebnf.rb → ebnf.ll1.rb} +30 -107
  14. data/etc/ebnf.ll1.sxp +182 -183
  15. data/etc/ebnf.peg.rb +90 -0
  16. data/etc/ebnf.peg.sxp +84 -0
  17. data/etc/ebnf.sxp +40 -41
  18. data/etc/iso-ebnf.ebnf +140 -0
  19. data/etc/iso-ebnf.isoebnf +138 -0
  20. data/etc/iso-ebnf.sxp +65 -0
  21. data/etc/sparql.ebnf +4 -4
  22. data/etc/sparql.html +1603 -1751
  23. data/etc/sparql.ll1.sxp +7372 -7372
  24. data/etc/sparql.peg.rb +532 -0
  25. data/etc/sparql.peg.sxp +597 -0
  26. data/etc/sparql.sxp +363 -362
  27. data/etc/turtle.ebnf +3 -3
  28. data/etc/turtle.html +465 -517
  29. data/etc/{turtle.rb → turtle.ll1.rb} +3 -4
  30. data/etc/turtle.ll1.sxp +425 -425
  31. data/etc/turtle.peg.rb +182 -0
  32. data/etc/turtle.peg.sxp +199 -0
  33. data/etc/turtle.sxp +103 -101
  34. data/lib/ebnf.rb +6 -1
  35. data/lib/ebnf/abnf.rb +301 -0
  36. data/lib/ebnf/abnf/core.rb +23 -0
  37. data/lib/ebnf/abnf/meta.rb +111 -0
  38. data/lib/ebnf/base.rb +114 -69
  39. data/lib/ebnf/bnf.rb +1 -26
  40. data/lib/ebnf/ebnf/meta.rb +90 -0
  41. data/lib/ebnf/isoebnf.rb +229 -0
  42. data/lib/ebnf/isoebnf/meta.rb +75 -0
  43. data/lib/ebnf/ll1.rb +131 -3
  44. data/lib/ebnf/ll1/lexer.rb +20 -22
  45. data/lib/ebnf/ll1/parser.rb +97 -64
  46. data/lib/ebnf/ll1/scanner.rb +82 -50
  47. data/lib/ebnf/native.rb +320 -0
  48. data/lib/ebnf/parser.rb +285 -302
  49. data/lib/ebnf/peg.rb +39 -0
  50. data/lib/ebnf/peg/parser.rb +561 -0
  51. data/lib/ebnf/peg/rule.rb +250 -0
  52. data/lib/ebnf/rule.rb +442 -148
  53. data/lib/ebnf/terminals.rb +21 -0
  54. data/lib/ebnf/writer.rb +587 -82
  55. metadata +125 -18
  56. data/etc/sparql.rb +0 -45773
@@ -0,0 +1,21 @@
1
+ # encoding: utf-8
2
+ # Terminal definitions for the EBNF grammar
3
+ module EBNF::Terminals
4
+ SYMBOL_BASE = %r(\b[a-zA-Z0-9_\.]+\b)u.freeze
5
+ SYMBOL = %r(#{SYMBOL_BASE}(?!\s*::=))u.freeze
6
+ HEX = %r(\#x\h+)u.freeze
7
+ CHAR = %r([\u0009\u000A\u000D\u0020-\uD7FF\u{10000}-\u{10FFFF}])u.freeze
8
+ R_CHAR = %r([\u0009\u000A\u000D\u0020-\u002C\u002E-\u005C\u005E-\uD7FF\u{10000}-\u{10FFFF}])u.freeze
9
+ RANGE = %r(\[(?:(?:#{R_CHAR}\-#{R_CHAR})|(?:#{HEX}\-#{HEX})|#{R_CHAR}|#{HEX})+-?\](?!\s+#{SYMBOL_BASE}\s*::=))u.freeze
10
+ LHS = %r((?:\[#{SYMBOL_BASE}\])?\s*#{SYMBOL_BASE}\s*::=)u.freeze
11
+ O_RANGE = %r(\[\^(?:(?:#{R_CHAR}\-#{R_CHAR})|(?:#{HEX}\-#{HEX}|#{R_CHAR}|#{HEX}))+-?\])u.freeze
12
+ STRING1 = %r("[\u0009\u000A\u000D\u0020\u0021\u0023-\uD7FF\u{10000}-\u{10FFFF}]*")u.freeze
13
+ STRING2 = %r('[\u0009\u000A\u000D\u0020-\u0026\u0028-\uD7FF\u{10000}-\u{10FFFF}]*')u.freeze
14
+ POSTFIX = %r([?*+])u.freeze
15
+ PASS = %r((
16
+ \s
17
+ | (?:(?:\#[^x]|//)[^\n\r]*)
18
+ | (?:/\*(?:(?:\*[^/])|[^*])*\*/)
19
+ | (?:\(\*(?:(?:\*[^\)])|[^*])*\*\))
20
+ )+)xmu.freeze
21
+ end
data/lib/ebnf/writer.rb CHANGED
@@ -1,22 +1,63 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
  require 'rdf'
3
3
  require 'strscan' unless defined?(StringScanner)
4
+ require "ostruct"
5
+ require 'unicode/types'
4
6
 
5
7
  ##
6
8
  # Serialize ruleset back to EBNF
7
9
  module EBNF
8
10
  class Writer
9
11
  LINE_LENGTH = 80
12
+ LINE_LENGTH_HTML = 200
13
+
14
+ # ASCII escape names
15
+ ASCII_ESCAPE_NAMES = [
16
+ "null", #x00
17
+ "start of heading", #x01
18
+ "start of text", #x02
19
+ "end of text", #x03
20
+ "end of transmission", #x04
21
+ "enquiry", #x05
22
+ "acknowledge", #x06
23
+ "bell", #x07
24
+ "backspace", #x08
25
+ "horizontal tab", #x09
26
+ "new line", #x0A
27
+ "vertical tab", #x0B
28
+ "form feed", #x0C
29
+ "carriage return", #x0D
30
+ "shift out", #x0E
31
+ "shift in", #x0F
32
+ "data link escape", #x10
33
+ "device control 1", #x11
34
+ "device control 2", #x12
35
+ "device control 3", #x13
36
+ "device control 4", #x14
37
+ "negative acknowledge", #x15
38
+ "synchronous idle", #x16
39
+ "end of trans. block", #x17
40
+ "cancel", #x18
41
+ "end of medium", #x19
42
+ "substitute", #x1A
43
+ "escape", #x1B
44
+ "file separator", #x1C
45
+ "group separator", #x1D
46
+ "record separator", #x1E
47
+ "unit separator", #x1F
48
+ "space" #x20
49
+ ]
10
50
 
11
51
  ##
12
52
  # Format rules to a String
13
53
  #
14
54
  # @param [Array<Rule>] rules
55
+ # @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
15
56
  # @return [Object]
16
- def self.string(*rules)
57
+ def self.string(*rules, format: :ebnf)
17
58
  require 'stringio' unless defined?(StringIO)
18
59
  buf = StringIO.new
19
- write(buf, *rules)
60
+ write(buf, *rules, format: format)
20
61
  buf.string
21
62
  end
22
63
 
@@ -24,9 +65,10 @@ module EBNF
24
65
  # Format rules to $stdout
25
66
  #
26
67
  # @param [Array<Rule>] rules
68
+ # @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
27
69
  # @return [Object]
28
- def self.print(*rules)
29
- write($stdout, *rules)
70
+ def self.print(*rules, format: :ebnf)
71
+ write($stdout, *rules, format: format)
30
72
  end
31
73
 
32
74
  ##
@@ -34,88 +76,174 @@ module EBNF
34
76
  #
35
77
  # @param [Object] out
36
78
  # @param [Array<Rule>] rules
79
+ # @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
37
80
  # @return [Object]
38
- def self.write(out, *rules)
39
- Writer.new(rules, out: out)
81
+ def self.write(out, *rules, format: :ebnf)
82
+ Writer.new(rules, out: out, format: format)
40
83
  end
41
84
 
42
85
  ##
43
86
  # Write formatted rules to an IO like object as HTML
44
87
  #
45
88
  # @param [Array<Rule>] rules
89
+ # @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
90
+ # @param [Boolean] validate (false) validate generated HTML.
46
91
  # @return [Object]
47
- def self.html(*rules)
92
+ def self.html(*rules, format: :ebnf, validate: false)
48
93
  require 'stringio' unless defined?(StringIO)
49
94
  buf = StringIO.new
50
- Writer.new(rules, out: buf, html: true)
95
+ Writer.new(rules, out: buf, html: true, format: format, validate: validate)
51
96
  buf.string
52
97
  end
53
98
 
54
99
  ##
55
100
  # @param [Array<Rule>] rules
101
+ # @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
102
+ # @param [Boolean] html (false) generate HTML output
103
+ # @param [Boolean] validate (false) validate generated HTML.
56
104
  # @param [Hash{Symbol => Object}] options
57
- # @param [#write] :out ($stdout)
58
- # @option options [Symbol] :format
59
- def initialize(rules, out: $stdout, html: false, **options)
60
- @options = options.dup
105
+ # @param [#write] out ($stdout)
106
+ def initialize(rules, out: $stdout, html: false, format: :ebnf, validate: false, **options)
107
+ @options = options.merge(html: html)
108
+ return if rules.empty?
61
109
 
62
110
  # Determine max LHS length
111
+ format_meth = "format_#{format}".to_sym
63
112
  max_id = rules.max_by {|r| r.id.to_s.length}.id.to_s.length
64
113
  max_sym = rules.max_by {|r| r.sym.to_s.length}.sym.to_s.length
65
- lhs_length = max_sym + 3
66
- lhs_fmt = "%<sym>-#{max_sym}s ::= "
67
- if max_id > 0
114
+ lhs_length = max_sym + 1
115
+ lhs_fmt = case format
116
+ when :abnf then "%<sym>-#{max_sym}s = "
117
+ when :ebnf then "%<sym>-#{max_sym}s ::= "
118
+ when :isoebnf then "%<sym>-#{max_sym}s = "
119
+ end
120
+ if format == :ebnf && max_id > 0
68
121
  lhs_fmt = "%<id>-#{max_id+2}s " + lhs_fmt
69
122
  lhs_length += max_id + 3
70
123
  end
71
- rhs_length = LINE_LENGTH - lhs_length
124
+ rhs_length = (html ? LINE_LENGTH_HTML : LINE_LENGTH) - lhs_length
72
125
 
73
126
  if html
74
127
  # Output as formatted HTML
75
128
  begin
76
- require 'haml'
77
- hout = Haml::Engine.new(HAML_DESC).render(self, rules: rules) do |rule|
78
- formatted_expr = format(rule.expr)
79
- formatted_expr.length > rhs_length ? format(rule.expr, "\n") : formatted_expr
129
+ require 'erubis'
130
+ require 'htmlentities'
131
+ @coder = HTMLEntities.new
132
+ eruby = Erubis::Eruby.new(ERB_DESC)
133
+ formatted_rules = rules.map do |rule|
134
+ if rule.kind == :terminals || rule.kind == :pass
135
+ OpenStruct.new(id: ("@#{rule.kind}"),
136
+ sym: nil,
137
+ assign: nil,
138
+ formatted: (
139
+ rule.kind == :terminals ?
140
+ "<strong># Productions for terminals</strong>" :
141
+ self.send(format_meth, rule.expr)))
142
+ else
143
+ formatted_expr = self.send(format_meth, rule.expr)
144
+ # Measure text without markup
145
+ formatted_expr_text = formatted_expr.gsub(%r{</?\w+[^>]*>}, '')
146
+ if formatted_expr_text.length > rhs_length && (format != :abnf || rule.alt?)
147
+ lines = []
148
+ # Can only reasonably split apart alts
149
+ self.send(format_meth, rule.expr, sep: "--rule-extensions--").
150
+ split(/\s*--rule-extensions--\s*/).each_with_index do |formatted, ndx|
151
+ assign = case format
152
+ when :ebnf
153
+ formatted.sub!(%r{\s*<code>\|</code>\s*}, '')
154
+ (ndx > 0 ? (rule.alt? ? '|' : '') : '::=')
155
+ when :abnf
156
+ formatted.sub!(%r{\s*<code>/</code>\s*}, '')
157
+ (ndx > 0 ? '=/' : '=')
158
+ else
159
+ formatted.sub!(%r{\s*<code>\|</code>\s*}, '')
160
+ (ndx > 0 ? (rule.alt? ? '|' : '') : '=')
161
+ end
162
+ lines << OpenStruct.new(id: ((ndx == 0 ? "[#{rule.id}]" : "") if rule.id),
163
+ sym: (rule.sym if ndx == 0 || format == :abnf),
164
+ assign: assign,
165
+ formatted: formatted)
166
+ end
167
+ if format == :isoebnf
168
+ lines << OpenStruct.new(assign: ';')
169
+ end
170
+ lines
171
+ else
172
+ OpenStruct.new(id: ("[#{rule.id}]" if rule.id),
173
+ sym: rule.sym,
174
+ assign: (format == :ebnf ? '::=' : '='),
175
+ formatted: (formatted_expr + (format == :isoebnf ? ' ;' : '')))
176
+ end
177
+ end
178
+ end.flatten
179
+
180
+ html_result = eruby.evaluate(format: format, rules: formatted_rules)
181
+
182
+ if validate
183
+ begin
184
+ require 'nokogumbo'
185
+ # Validate the output HTML
186
+ doc = Nokogiri::HTML5("<!DOCTYPE html>" + html_result, max_errors: 10)
187
+ raise EncodingError, "Errors found in generated HTML:\n " +
188
+ doc.errors.map(&:to_s).join("\n ") unless doc.errors.empty?
189
+ rescue LoadError
190
+ # Skip
191
+ end
80
192
  end
81
- out.write hout
193
+
194
+ out.write html_result
82
195
  return
83
196
  rescue LoadError
84
- $stderr.puts "Generating HTML requires haml gem to be loaded"
197
+ $stderr.puts "Generating HTML requires erubis and htmlentities gems to be loaded"
85
198
  end
86
199
  end
87
200
 
88
201
  # Format each rule, considering the available rhs size
89
202
  rules.each do |rule|
90
203
  buffer = if rule.pass?
91
- "%-#{lhs_length-2}s" % "@pass"
204
+ "\n%-#{lhs_length-2}s " % "@pass"
205
+ elsif rule.kind == :terminals
206
+ "\n%-#{lhs_length-2}s" % "@terminals"
92
207
  else
93
208
  lhs_fmt % {id: "[#{rule.id}]", sym: rule.sym}
94
209
  end
95
- formatted_expr = format(rule.expr)
96
- if formatted_expr.length > rhs_length
97
- buffer << format(rule.expr, ("\n" + " " * lhs_length))
210
+ formatted_expr = self.send(format_meth, rule.expr)
211
+ if formatted_expr.length > rhs_length && (format != :abnf || rule.alt?)
212
+ if format == :abnf
213
+ # No whitespace, use =/
214
+ self.send(format_meth, rule.expr, sep: "--rule-extensions--").
215
+ split(/\s*--rule-extensions--\s*/).each_with_index do |formatted, ndx|
216
+ if ndx > 0
217
+ buffer << "\n" + lhs_fmt.sub('= ', '=/') % {id: "[#{rule.id}]", sym: rule.sym}
218
+ end
219
+ buffer << formatted.sub(/\s*\/\s*/, '')
220
+ end
221
+ else
222
+ # Space out past "= "
223
+ buffer << self.send(format_meth, rule.expr, sep: ("\n" + " " * (lhs_length + (rule.alt? ? 2 : 4) - (format == :ebnf ? 0 : 2))))
224
+ buffer << ("\n" + " " * (lhs_length) + ';') if format == :isoebnf
225
+ end
98
226
  else
99
- buffer << formatted_expr
227
+ buffer << formatted_expr + (format == :isoebnf ? ' ;' : '')
100
228
  end
229
+ buffer << "\n\n" if [:terminals, :pass].include?(rule.kind)
101
230
  out.puts(buffer)
102
231
  end
103
232
  end
104
233
 
105
234
  protected
235
+
236
+ ##
237
+ # W3C EBNF Formatters
238
+ ##
239
+
106
240
  # Format the expression part of a rule
107
- def format(expr, sep = nil)
108
- return (@options[:html] ? %(<a href="#grammar-production-#{expr}">#{expr}</a>) : expr.to_s) if expr.is_a?(Symbol)
241
+ def format_ebnf(expr, sep: nil, embedded: false)
242
+ return (@options[:html] ? %(<a href="#grammar-production-#{@coder.encode expr}">#{@coder.encode expr}</a>) : expr.to_s) if expr.is_a?(Symbol)
109
243
  if expr.is_a?(String)
110
- if expr.length == 1
111
- return format_char(expr)
112
- elsif expr =~ /\A#x\h+/
113
- return (@options[:html] ? %(<code class="grammar-char-escape">#{expr}</code>) : expr)
114
- elsif expr =~ /"/
115
- return (@options[:html] ? %('<code class="grammar-literal">#{escape(expr, "'")}</code>') : %('#{escape(expr, "'")}'))
116
- else
117
- return (@options[:html] ? %("<code class="grammar-literal">#{escape(expr, '"')}</code>") : %("#{escape(expr, '"')}"))
118
- end
244
+ return expr.length == 1 ?
245
+ format_ebnf_char(expr) :
246
+ format_ebnf_string(expr, expr.include?('"') ? "'" : '"')
119
247
  end
120
248
  parts = {
121
249
  alt: (@options[:html] ? "<code>|</code> " : "| "),
@@ -128,95 +256,472 @@ module EBNF
128
256
  rparen = (@options[:html] ? "<code>)</code> " : ")")
129
257
 
130
258
  case expr.first
259
+ when :istr
260
+ # Looses fidelity, but, oh well ...
261
+ format_ebnf(expr.last, embedded: true)
131
262
  when :alt, :diff
132
263
  this_sep = (sep ? sep : " ") + parts[expr.first.to_sym]
133
- expr[1..-1].map {|e| format(e)}.join(this_sep)
264
+ res = expr[1..-1].map {|e| format_ebnf(e, embedded: true)}.join(this_sep)
265
+ embedded ? (lparen + res + rparen) : res
134
266
  when :star, :plus, :opt
135
- raise "Expected star expression to have a single operand" unless expr.length == 2
136
267
  char = parts[expr.first.to_sym]
137
- r = format(expr[1])
138
- (r.start_with?("(") || Array(expr[1]).length == 1) ? "#{r}#{char}" : "(#{r})#{char}"
268
+ r = format_ebnf(expr[1], embedded: true)
269
+ "#{r}#{char}"
139
270
  when :hex
140
- (@options[:html] ? %(<code class="grammar-char-escape">#{expr.last}</code>) : expr.last)
271
+ escape_ebnf_hex(expr.last[2..-1].hex.chr(Encoding::UTF_8))
141
272
  when :range
142
- format_range(expr.last)
273
+ format_ebnf_range(expr.last)
143
274
  when :seq
144
275
  this_sep = (sep ? sep : " ")
145
- expr[1..-1].map {|e| r = format(e); Array(e).length > 2 ? "#{lparen}#{r}#{rparen}" : r}.join(this_sep)
276
+ res = expr[1..-1].map do |e|
277
+ format_ebnf(e, embedded: true)
278
+ end.join(this_sep)
279
+ embedded ? (lparen + res + rparen) : res
280
+ when :rept
281
+ # Expand repetition
282
+ min, max, value = expr[1..-1]
283
+ if min == 0 && max == 1
284
+ format_ebnf([:opt, value], sep: sep, embedded: embedded)
285
+ elsif min == 0 && max == '*'
286
+ format_ebnf([:star, value], sep: sep, embedded: embedded)
287
+ elsif min == 1 && max == '*'
288
+ format_ebnf([:plus, value], sep: sep, embedded: embedded)
289
+ else
290
+ val2 = [:seq]
291
+ while min > 0
292
+ val2 << value
293
+ min -= 1
294
+ max -= 1 unless max == '*'
295
+ end
296
+ if max == '*'
297
+ val2 << [:star, value]
298
+ else
299
+ opt = nil
300
+ while max > 0
301
+ opt = [:opt, opt ? [:seq, value, opt] : value]
302
+ max -= 1
303
+ end
304
+ val2 << opt if opt
305
+ end
306
+ format_ebnf(val2, sep: sep, embedded: embedded)
307
+ end
146
308
  else
147
309
  raise "Unknown operator: #{expr.first}"
148
310
  end
149
311
  end
150
312
 
151
313
  # Format a single-character string, prefering hex for non-main ASCII
152
- def format_char(c)
314
+ def format_ebnf_char(c)
153
315
  case c.ord
154
- when 0x22 then (@options[:html] ? %('<code class="grammar-literal">"</code>') : %{'"'})
155
- when (0x23..0x7e) then (@options[:html] ? %("<code class="grammar-literal">#{c}</code>") : %{"#{c}"})
156
- else (@options[:html] ? %(<code class="grammar-char-escape">#{escape_hex(c)}</code>) : escape_hex(c))
316
+ when (0x21) then (@options[:html] ? %("<code class="grammar-literal">#{@coder.encode c}</code>") : %{"#{c}"})
317
+ when 0x22 then (@options[:html] ? %('<code class="grammar-literal">&quot;</code>') : %{'"'})
318
+ when (0x23..0x7e) then (@options[:html] ? %("<code class="grammar-literal">#{@coder.encode c}</code>") : %{"#{c}"})
319
+ when (0x80..0xFFFD) then (@options[:html] ? %("<code class="grammar-literal">#{@coder.encode c}</code>") : %{"#{c}"})
320
+ else escape_ebnf_hex(c)
157
321
  end
158
322
  end
159
323
 
160
324
  # Format a range
161
- def format_range(string)
325
+ def format_ebnf_range(string)
162
326
  lbrac = (@options[:html] ? "<code>[</code> " : "[")
163
327
  rbrac = (@options[:html] ? "<code>]</code> " : "]")
164
- dash = (@options[:html] ? "<code>-</code> " : "-")
165
328
 
166
329
  buffer = lbrac
167
330
  s = StringScanner.new(string)
168
331
  while !s.eos?
169
332
  case
170
333
  when s.scan(/\A[!"\u0024-\u007e]+/)
171
- buffer << (@options[:html] ? %(<code class="grammar-literal">#{s.matched}</code>) : s.matched)
334
+ buffer << (@options[:html] ? %(<code class="grammar-literal">#{@coder.encode s.matched}</code>) : s.matched)
172
335
  when s.scan(/\A#x\h+/)
173
- buffer << (@options[:html] ? %(<code class="grammar-char-escape">#{s.matched}</code>) : s.matched)
174
- when s.scan(/\A-/)
175
- buffer << dash
336
+ buffer << escape_ebnf_hex(s.matched[2..-1].hex.chr(Encoding::UTF_8))
176
337
  else
177
- buffer << (@options[:html] ? %(<code class="grammar-char-escape">#{escape_hex(s.getch)}</code>) : escape_hex(s.getch))
338
+ buffer << escape_ebnf_hex(s.getch)
178
339
  end
179
340
  end
180
341
  buffer + rbrac
181
342
  end
182
343
 
183
344
  # Escape a string, using as many UTF-8 characters as possible
184
- def escape(string, quote = '"')
185
- buffer = ""
345
+ def format_ebnf_string(string, quote = '"')
186
346
  string.each_char do |c|
187
- buffer << case (u = c.ord)
188
- when (0x00..0x1f) then "#x%02X" % u
189
- when quote.ord then "#x%02X" % u
190
- else c
347
+ case c.ord
348
+ when 0x00..0x19, quote.ord
349
+ raise RangeError, "cannot format #{string.inspect} as an EBNF String: #{c.inspect} is out of range" unless
350
+ ISOEBNF::TERMINAL_CHARACTER.match?(c)
191
351
  end
192
352
  end
193
- buffer
353
+
354
+ res = "#{quote}#{string}#{quote}"
355
+ @options[:html] ? @coder.encode(res) : res
194
356
  end
195
357
 
196
- def escape_hex(u)
358
+ def escape_ebnf_hex(u)
197
359
  fmt = case u.ord
360
+ when 0x00..0x20 then "#x%02X"
198
361
  when 0x0000..0x00ff then "#x%02X"
199
362
  when 0x0100..0xffff then "#x%04X"
200
363
  else "#x%08X"
201
364
  end
202
- sprintf(fmt, u.ord)
203
- end
204
-
205
- HAML_DESC = %q(
206
- %table.grammar
207
- %tbody#grammar-productions
208
- - rules.each do |rule|
209
- %tr{id: "grammar-production-#{rule.sym}"}
210
- - if rule.pass?
211
- %td{colspan: 3}
212
- %code<="@pass"
213
- - else
214
- %td<= "[#{rule.id}]"
215
- %td<
216
- %code<= rule.sym
217
- %td<= "::="
218
- %td
219
- != yield rule
365
+ char = fmt % u.ord
366
+ if @options[:html]
367
+ char = if u.ord <= 0x20
368
+ %(<abbr title="#{ASCII_ESCAPE_NAMES[u.ord]}">#{@coder.encode char}</abbr>)
369
+ elsif u.ord == 0x22
370
+ %(<abbr title="quot">>&quot;</abbr>)
371
+ elsif u.ord < 0x7F
372
+ %(<abbr title="ascii '#{@coder.encode u}'">#{@coder.encode char}</abbr>)
373
+ elsif u.ord == 0x7F
374
+ %(<abbr title="delete">#{@coder.encode char}</abbr>)
375
+ elsif u.ord <= 0xFF
376
+ %(<abbr title="extended ascii '#{@coder.encode char}'">#{char}</abbr>)
377
+ elsif (%w(Control Private-use Surrogate Noncharacter Reserved) - ::Unicode::Types.of(u)).empty?
378
+ %(<abbr title="unicode '#{u}'">#{char}</abbr>)
379
+ else
380
+ %(<abbr title="unicode '#{::Unicode::Types.of(u).first}'">#{char}</abbr>)
381
+ end
382
+ %(<code class="grammar-char-escape">#{char}</code>)
383
+ else
384
+ char
385
+ end
386
+ end
387
+
388
+ ##
389
+ # ABNF Formatters
390
+ ##
391
+
392
+ # Format the expression part of a rule
393
+ def format_abnf(expr, sep: nil, embedded: false, sensitive: true)
394
+ return (@options[:html] ? %(<a href="#grammar-production-#{@coder.encode expr}">#{@coder.encode expr}</a>) : expr.to_s) if expr.is_a?(Symbol)
395
+ if expr.is_a?(String)
396
+ if expr.length == 1
397
+ return format_abnf_char(expr)
398
+ elsif expr.start_with?('%')
399
+ # Already encoded
400
+ return expr
401
+ elsif expr =~ /"/
402
+ # Split into segments
403
+ segments = expr.split('"')
404
+
405
+ return format_abnf_char(expr) if segments.empty?
406
+
407
+ seq = segments.inject([]) {|memo, s| memo.concat([[:hex, "#x22"], s])}[1..-1]
408
+ seq.unshift(:seq)
409
+ return format_abnf(seq, sep: nil, embedded: false)
410
+ else
411
+ return (@options[:html] ? %("<code class="grammar-literal">#{'%s' if sensitive}#{@coder.encode expr}</code>") : %(#{'%s' if sensitive}"#{expr}"))
412
+ end
413
+ end
414
+ parts = {
415
+ alt: (@options[:html] ? "<code>/</code> " : "/ "),
416
+ star: (@options[:html] ? "<code>*</code> " : "*"),
417
+ plus: (@options[:html] ? "<code>+</code> " : "1*"),
418
+ opt: (@options[:html] ? "<code>?</code> " : "?")
419
+ }
420
+ lbrac = (@options[:html] ? "<code>[</code> " : "[")
421
+ rbrac = (@options[:html] ? "<code>]</code> " : "]")
422
+ lparen = (@options[:html] ? "<code>(</code> " : "(")
423
+ rparen = (@options[:html] ? "<code>)</code> " : ")")
424
+
425
+ case expr.first
426
+ when :istr
427
+ # FIXME: if string part is segmented, need to do something different
428
+ format_abnf(expr.last, embedded: true, sensitive: false)
429
+ when :alt
430
+ this_sep = (sep ? sep : " ") + parts[expr.first.to_sym]
431
+ res = expr[1..-1].map {|e| format_abnf(e, embedded: true)}.join(this_sep)
432
+ embedded ? (lparen + res + rparen) : res
433
+ when :diff
434
+ raise RangeError, "ABNF does not support the diff operator"
435
+ when :opt
436
+ char = parts[expr.first.to_sym]
437
+ r = format_abnf(expr[1], embedded: true)
438
+ "#{lbrac}#{r}#{rbrac}"
439
+ when :plus, :star
440
+ char = parts[expr.first.to_sym]
441
+ r = format_abnf(expr[1], embedded: true)
442
+ "#{char}#{r}"
443
+ when :hex
444
+ escape_abnf_hex(expr.last[2..-1].hex.chr)
445
+ when :range
446
+ # Returns an [:alt] or [:not [:alt]] if composed of multiple sequences
447
+ # Note: ABNF does not support the `not` operator
448
+ res = format_abnf_range(expr.last)
449
+ res.is_a?(Array) ?
450
+ format_abnf(res, embedded: true) :
451
+ res
452
+ when :seq
453
+ this_sep = (sep ? sep : " ")
454
+ res = expr[1..-1].map do |e|
455
+ format_abnf(e, embedded: true)
456
+ end.join(this_sep)
457
+ embedded ? (lparen + res + rparen) : res
458
+ when :rept
459
+ # Expand repetition
460
+ min, max, value = expr[1..-1]
461
+ r = format_abnf(value, embedded: true)
462
+ if min == max
463
+ "#{min}#{r}"
464
+ elsif min == 0 && max == '*'
465
+ "#{parts[:star]}#{r}"
466
+ elsif min > 0 && max == '*'
467
+ "#{min}#{parts[:star]}#{r}"
468
+ else
469
+ "#{min}#{parts[:star]}#{max}#{r}"
470
+ end
471
+ else
472
+ raise "Unknown operator: #{expr.first}"
473
+ end
474
+ end
475
+
476
+ # Format a single-character string, prefering hex for non-main ASCII
477
+ def format_abnf_char(c)
478
+ if /[\x20-\x21\x23-\x7E]/.match?(c)
479
+ @options[:html] ? %("<code class="grammar-literal">#{@coder.encode c}</code>") : c.inspect
480
+ else
481
+ escape_abnf_hex(c)
482
+ end
483
+ end
484
+
485
+ # Format a range
486
+ #
487
+ # Presumes range has already been validated
488
+ def format_abnf_range(string)
489
+ alt, o_dash = [:alt], false
490
+
491
+ raise RangeError, "cannot format #{string.inspect} an ABNF range" if string.start_with?('^')
492
+
493
+ if string.end_with?('-')
494
+ o_dash = true
495
+ string = string[0..-2]
496
+ end
497
+
498
+ scanner = StringScanner.new(string)
499
+ hexes, deces = [], []
500
+ in_range = false
501
+ # Build op (alt) from different ranges/enums
502
+ while !scanner.eos?
503
+ if hex = scanner.scan(Terminals::HEX)
504
+ # Append any decimal values
505
+ alt << "%d" + deces.join(".") unless deces.empty?
506
+ deces = []
507
+
508
+ if in_range
509
+ # Add "." sequences for any previous hexes
510
+ alt << "%x" + hexes[0..-2].join(".") if hexes.length > 1
511
+ alt << "%x#{hexes.last}-#{hex[2..-1]}"
512
+ in_range, hexes = false, []
513
+ else
514
+ hexes << hex[2..-1]
515
+ end
516
+ elsif dec = scanner.scan(Terminals::R_CHAR)
517
+ # Append any hexadecimal values
518
+ alt << "%x" + hexes.join(".") unless hexes.empty?
519
+ hexes = []
520
+
521
+ if in_range
522
+ # Add "." sequences for any previous hexes
523
+ alt << "%d" + deces[0..-2].join(".") if deces.length > 1
524
+ alt << "%d#{deces.last}-#{dec.codepoints.first}"
525
+ in_range, deces = false, []
526
+ else
527
+ deces << dec.codepoints.first.to_s
528
+ end
529
+ end
530
+
531
+ in_range = true if scanner.scan(/\-/)
532
+ end
533
+
534
+ deces << '45' if o_dash
535
+
536
+ # Append hexes and deces as "." sequences (should be only one)
537
+ alt << "%d" + deces.join(".") unless deces.empty?
538
+ alt << "%x" + hexes.join(".") unless hexes.empty?
539
+
540
+ # FIXME: HTML abbreviations?
541
+ if alt.length == 2
542
+ # Just return the range or enum
543
+ alt.last
544
+ else
545
+ # Return the alt, which will be further formatted
546
+ alt
547
+ end
548
+ end
549
+
550
+ def escape_abnf_hex(u)
551
+ fmt = case u.ord
552
+ when 0x0000..0x00ff then "%02X"
553
+ when 0x0100..0xffff then "%04X"
554
+ else "%08X"
555
+ end
556
+ char = "%x" + (fmt % u.ord)
557
+ if @options[:html]
558
+ if u.ord <= 0x20
559
+ char = %(<abbr title="#{ASCII_ESCAPE_NAMES[u.ord]}">#{@coder.encode char}</abbr>)
560
+ elsif u.ord == 0x22
561
+ %(<abbr title="quot">>&quot;</abbr>)
562
+ elsif u.ord < 0x7F
563
+ char = %(<abbr title="ascii '#{u}'">#{@coder.encode char}</abbr>)
564
+ elsif u.ord == 0x7F
565
+ char = %(<abbr title="delete">#{@coder.encode char}</abbr>)
566
+ elsif u.ord <= 0xFF
567
+ char = %(<abbr title="extended ascii '#{u}'">#{char}</abbr>)
568
+ else
569
+ char = %(<abbr title="unicode '#{u.unicode_normaliz}'">#{char}</abbr>)
570
+ end
571
+ %(<code class="grammar-char-escape">#{char}</code>)
572
+ else
573
+ char
574
+ end
575
+ end
576
+
577
+ ##
578
+ # ISO EBNF Formatters
579
+ ##
580
+
581
+ # Format the expression part of a rule
582
+ def format_isoebnf(expr, sep: nil, embedded: false)
583
+ return (@options[:html] ? %(<a href="#grammar-production-#{@coder.encode expr}">#{@coder.encode expr}</a>) : expr.to_s) if expr.is_a?(Symbol)
584
+ if expr.is_a?(String)
585
+ expr = expr[2..-1].hex.chr if expr =~ /\A#x\h+/
586
+ expr.chars.each do |c|
587
+ raise RangeError, "cannot format #{expr.inspect} as an ISO EBNF String: #{c.inspect} is out of range" unless
588
+ ISOEBNF::TERMINAL_CHARACTER.match?(c)
589
+ end
590
+ if expr =~ /"/
591
+ return (@options[:html] ? %('<code class="grammar-literal">#{@coder.encode expr}</code>') : %('#{expr}'))
592
+ else
593
+ return (@options[:html] ? %("<code class="grammar-literal">#{@coder.encode expr}</code>") : %("#{expr}"))
594
+ end
595
+ end
596
+ parts = {
597
+ alt: (@options[:html] ? "<code>|</code> " : "| "),
598
+ diff: (@options[:html] ? "<code>-</code> " : "- "),
599
+ }
600
+ lparen = (@options[:html] ? "<code>(</code> " : "(")
601
+ rparen = (@options[:html] ? "<code>)</code> " : ")")
602
+
603
+ case expr.first
604
+ when :istr
605
+ # Looses fidelity, but, oh well ...
606
+ format_isoebnf(expr.last, embedded: true)
607
+ when :alt, :diff
608
+ this_sep = (sep ? sep : " ") + parts[expr.first.to_sym]
609
+ res = expr[1..-1].map {|e| format_isoebnf(e, embedded: true)}.join(this_sep)
610
+ embedded ? (lparen + res + rparen) : res
611
+ when :opt
612
+ r = format_isoebnf(expr[1], embedded: true)
613
+ "[#{r}]"
614
+ when :star
615
+ r = format_isoebnf(expr[1], embedded: true)
616
+ "{#{r}}"
617
+ when :plus
618
+ r = format_isoebnf(expr[1], embedded: true)
619
+ "#{r}, {#{r}}"
620
+ when :hex
621
+ format_isoebnf(expr[1], embedded: true)
622
+ when :range
623
+ res = format_isoebnf_range(expr.last)
624
+ res.is_a?(Array) ?
625
+ format_isoebnf(res, embedded: true) :
626
+ res
627
+ when :seq
628
+ this_sep = "," + (sep ? sep : " ")
629
+ res = expr[1..-1].map do |e|
630
+ format_isoebnf(e, embedded: true)
631
+ end.join(this_sep)
632
+ embedded ? (lparen + res + rparen) : res
633
+ when :rept
634
+ # Expand repetition
635
+ min, max, value = expr[1..-1]
636
+ if min == 0 && max == 1
637
+ format_isoebnf([:opt, value], sep: sep, embedded: embedded)
638
+ elsif min == 0 && max == '*'
639
+ format_isoebnf([:star, value], sep: sep, embedded: embedded)
640
+ elsif min == 1 && max == '*'
641
+ format_isoebnf([:plus, value], sep: sep, embedded: embedded)
642
+ else
643
+ val2 = [:seq]
644
+ while min > 0
645
+ val2 << value
646
+ min -= 1
647
+ max -= 1 unless max == '*'
648
+ end
649
+ if max == '*'
650
+ val2 << [:star, value]
651
+ else
652
+ opt = nil
653
+ while max > 0
654
+ opt = [:opt, opt ? [:seq, value, opt] : value]
655
+ max -= 1
656
+ end
657
+ val2 << opt if opt
658
+ end
659
+ format_isoebnf(val2, sep: sep, embedded: embedded)
660
+ end
661
+ else
662
+ raise "Unknown operator: #{expr.first}"
663
+ end
664
+ end
665
+
666
+ # Format a range
667
+ # Range is formatted as a aliteration of characters
668
+ def format_isoebnf_range(string)
669
+ chars = []
670
+ o_dash = false
671
+
672
+ raise RangeError, "cannot format #{string.inspect} an ABNF range" if string.start_with?('^')
673
+
674
+ if string.end_with?('-')
675
+ o_dash = true
676
+ string = string[0..-2]
677
+ end
678
+
679
+ scanner = StringScanner.new(string)
680
+ in_range = false
681
+ # Build chars from different ranges/enums
682
+ while !scanner.eos?
683
+ char = if hex = scanner.scan(Terminals::HEX)
684
+ hex[2..-1].hex.ord.char(Encoding::UTF_8)
685
+ else scanner.scan(Terminals::R_CHAR)
686
+ end
687
+ raise RangeError, "cannot format #{string.inspect} as an ISO EBNF Aliteration: #{char.inspect} is out of range" unless
688
+ char && ISOEBNF::TERMINAL_CHARACTER.match?(char)
689
+
690
+ if in_range
691
+ # calculate characters from chars.last to this char
692
+ raise RangeError, "cannot format #{string.inspect} as an ISO EBNF Aliteration" unless chars.last < char
693
+ chars.concat (chars.last..char).to_a[1..-1]
694
+ in_range = false
695
+ else
696
+ chars << char
697
+ end
698
+
699
+ in_range = true if scanner.scan(/\-/)
700
+ end
701
+
702
+ chars << '-' if o_dash
703
+
704
+ # Possibly only a single character (no character?)
705
+ chars.length == 1 ? chars.last.inspect : chars.unshift(:alt)
706
+ end
707
+
708
+ ERB_DESC = %q(
709
+ <table class="grammar">
710
+ <tbody id="grammar-productions" class="<%= @format %>">
711
+ <% for rule in @rules %>
712
+ <tr<%= %{ id="grammar-production-#{rule.sym}"} unless %w(=/ |).include?(rule.assign) || rule.sym.nil?%>>
713
+ <% if rule.id %>
714
+ <td<%= " colspan=2" unless rule.sym %>><%= rule.id %></td>
715
+ <% end %>
716
+ <% if rule.sym %>
717
+ <td><code><%== rule.sym %></code></td>
718
+ <% end %>
719
+ <td><%= rule.assign %></td>
720
+ <td><%= rule.formatted %></td>
721
+ </tr>
722
+ <% end %>
723
+ </tbody>
724
+ </table>
220
725
  ).gsub(/^ /, '')
221
726
  end
222
727
  end