ebnf 1.2.0 → 2.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +223 -199
  3. data/UNLICENSE +1 -1
  4. data/VERSION +1 -1
  5. data/bin/ebnf +38 -19
  6. data/etc/abnf-core.ebnf +52 -0
  7. data/etc/abnf.abnf +121 -0
  8. data/etc/abnf.ebnf +124 -0
  9. data/etc/abnf.sxp +45 -0
  10. data/etc/doap.ttl +23 -18
  11. data/etc/ebnf.ebnf +21 -33
  12. data/etc/ebnf.html +76 -160
  13. data/etc/{ebnf.rb → ebnf.ll1.rb} +30 -107
  14. data/etc/ebnf.ll1.sxp +182 -183
  15. data/etc/ebnf.peg.rb +90 -0
  16. data/etc/ebnf.peg.sxp +84 -0
  17. data/etc/ebnf.sxp +40 -41
  18. data/etc/iso-ebnf.ebnf +140 -0
  19. data/etc/iso-ebnf.isoebnf +138 -0
  20. data/etc/iso-ebnf.sxp +65 -0
  21. data/etc/sparql.ebnf +4 -4
  22. data/etc/sparql.html +1603 -1751
  23. data/etc/sparql.ll1.sxp +7372 -7372
  24. data/etc/sparql.peg.rb +532 -0
  25. data/etc/sparql.peg.sxp +597 -0
  26. data/etc/sparql.sxp +363 -362
  27. data/etc/turtle.ebnf +3 -3
  28. data/etc/turtle.html +465 -517
  29. data/etc/{turtle.rb → turtle.ll1.rb} +3 -4
  30. data/etc/turtle.ll1.sxp +425 -425
  31. data/etc/turtle.peg.rb +182 -0
  32. data/etc/turtle.peg.sxp +199 -0
  33. data/etc/turtle.sxp +103 -101
  34. data/lib/ebnf.rb +6 -1
  35. data/lib/ebnf/abnf.rb +301 -0
  36. data/lib/ebnf/abnf/core.rb +23 -0
  37. data/lib/ebnf/abnf/meta.rb +111 -0
  38. data/lib/ebnf/base.rb +114 -69
  39. data/lib/ebnf/bnf.rb +1 -26
  40. data/lib/ebnf/ebnf/meta.rb +90 -0
  41. data/lib/ebnf/isoebnf.rb +229 -0
  42. data/lib/ebnf/isoebnf/meta.rb +75 -0
  43. data/lib/ebnf/ll1.rb +131 -3
  44. data/lib/ebnf/ll1/lexer.rb +20 -22
  45. data/lib/ebnf/ll1/parser.rb +97 -64
  46. data/lib/ebnf/ll1/scanner.rb +82 -50
  47. data/lib/ebnf/native.rb +320 -0
  48. data/lib/ebnf/parser.rb +285 -302
  49. data/lib/ebnf/peg.rb +39 -0
  50. data/lib/ebnf/peg/parser.rb +561 -0
  51. data/lib/ebnf/peg/rule.rb +250 -0
  52. data/lib/ebnf/rule.rb +442 -148
  53. data/lib/ebnf/terminals.rb +21 -0
  54. data/lib/ebnf/writer.rb +587 -82
  55. metadata +125 -18
  56. data/etc/sparql.rb +0 -45773
@@ -0,0 +1,21 @@
1
+ # encoding: utf-8
2
+ # Terminal definitions for the EBNF grammar
3
+ module EBNF::Terminals
4
+ SYMBOL_BASE = %r(\b[a-zA-Z0-9_\.]+\b)u.freeze
5
+ SYMBOL = %r(#{SYMBOL_BASE}(?!\s*::=))u.freeze
6
+ HEX = %r(\#x\h+)u.freeze
7
+ CHAR = %r([\u0009\u000A\u000D\u0020-\uD7FF\u{10000}-\u{10FFFF}])u.freeze
8
+ R_CHAR = %r([\u0009\u000A\u000D\u0020-\u002C\u002E-\u005C\u005E-\uD7FF\u{10000}-\u{10FFFF}])u.freeze
9
+ RANGE = %r(\[(?:(?:#{R_CHAR}\-#{R_CHAR})|(?:#{HEX}\-#{HEX})|#{R_CHAR}|#{HEX})+-?\](?!\s+#{SYMBOL_BASE}\s*::=))u.freeze
10
+ LHS = %r((?:\[#{SYMBOL_BASE}\])?\s*#{SYMBOL_BASE}\s*::=)u.freeze
11
+ O_RANGE = %r(\[\^(?:(?:#{R_CHAR}\-#{R_CHAR})|(?:#{HEX}\-#{HEX}|#{R_CHAR}|#{HEX}))+-?\])u.freeze
12
+ STRING1 = %r("[\u0009\u000A\u000D\u0020\u0021\u0023-\uD7FF\u{10000}-\u{10FFFF}]*")u.freeze
13
+ STRING2 = %r('[\u0009\u000A\u000D\u0020-\u0026\u0028-\uD7FF\u{10000}-\u{10FFFF}]*')u.freeze
14
+ POSTFIX = %r([?*+])u.freeze
15
+ PASS = %r((
16
+ \s
17
+ | (?:(?:\#[^x]|//)[^\n\r]*)
18
+ | (?:/\*(?:(?:\*[^/])|[^*])*\*/)
19
+ | (?:\(\*(?:(?:\*[^\)])|[^*])*\*\))
20
+ )+)xmu.freeze
21
+ end
data/lib/ebnf/writer.rb CHANGED
@@ -1,22 +1,63 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
  require 'rdf'
3
3
  require 'strscan' unless defined?(StringScanner)
4
+ require "ostruct"
5
+ require 'unicode/types'
4
6
 
5
7
  ##
6
8
  # Serialize ruleset back to EBNF
7
9
  module EBNF
8
10
  class Writer
9
11
  LINE_LENGTH = 80
12
+ LINE_LENGTH_HTML = 200
13
+
14
+ # ASCII escape names
15
+ ASCII_ESCAPE_NAMES = [
16
+ "null", #x00
17
+ "start of heading", #x01
18
+ "start of text", #x02
19
+ "end of text", #x03
20
+ "end of transmission", #x04
21
+ "enquiry", #x05
22
+ "acknowledge", #x06
23
+ "bell", #x07
24
+ "backspace", #x08
25
+ "horizontal tab", #x09
26
+ "new line", #x0A
27
+ "vertical tab", #x0B
28
+ "form feed", #x0C
29
+ "carriage return", #x0D
30
+ "shift out", #x0E
31
+ "shift in", #x0F
32
+ "data link escape", #x10
33
+ "device control 1", #x11
34
+ "device control 2", #x12
35
+ "device control 3", #x13
36
+ "device control 4", #x14
37
+ "negative acknowledge", #x15
38
+ "synchronous idle", #x16
39
+ "end of trans. block", #x17
40
+ "cancel", #x18
41
+ "end of medium", #x19
42
+ "substitute", #x1A
43
+ "escape", #x1B
44
+ "file separator", #x1C
45
+ "group separator", #x1D
46
+ "record separator", #x1E
47
+ "unit separator", #x1F
48
+ "space" #x20
49
+ ]
10
50
 
11
51
  ##
12
52
  # Format rules to a String
13
53
  #
14
54
  # @param [Array<Rule>] rules
55
+ # @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
15
56
  # @return [Object]
16
- def self.string(*rules)
57
+ def self.string(*rules, format: :ebnf)
17
58
  require 'stringio' unless defined?(StringIO)
18
59
  buf = StringIO.new
19
- write(buf, *rules)
60
+ write(buf, *rules, format: format)
20
61
  buf.string
21
62
  end
22
63
 
@@ -24,9 +65,10 @@ module EBNF
24
65
  # Format rules to $stdout
25
66
  #
26
67
  # @param [Array<Rule>] rules
68
+ # @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
27
69
  # @return [Object]
28
- def self.print(*rules)
29
- write($stdout, *rules)
70
+ def self.print(*rules, format: :ebnf)
71
+ write($stdout, *rules, format: format)
30
72
  end
31
73
 
32
74
  ##
@@ -34,88 +76,174 @@ module EBNF
34
76
  #
35
77
  # @param [Object] out
36
78
  # @param [Array<Rule>] rules
79
+ # @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
37
80
  # @return [Object]
38
- def self.write(out, *rules)
39
- Writer.new(rules, out: out)
81
+ def self.write(out, *rules, format: :ebnf)
82
+ Writer.new(rules, out: out, format: format)
40
83
  end
41
84
 
42
85
  ##
43
86
  # Write formatted rules to an IO like object as HTML
44
87
  #
45
88
  # @param [Array<Rule>] rules
89
+ # @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
90
+ # @param [Boolean] validate (false) validate generated HTML.
46
91
  # @return [Object]
47
- def self.html(*rules)
92
+ def self.html(*rules, format: :ebnf, validate: false)
48
93
  require 'stringio' unless defined?(StringIO)
49
94
  buf = StringIO.new
50
- Writer.new(rules, out: buf, html: true)
95
+ Writer.new(rules, out: buf, html: true, format: format, validate: validate)
51
96
  buf.string
52
97
  end
53
98
 
54
99
  ##
55
100
  # @param [Array<Rule>] rules
101
+ # @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
102
+ # @param [Boolean] html (false) generate HTML output
103
+ # @param [Boolean] validate (false) validate generated HTML.
56
104
  # @param [Hash{Symbol => Object}] options
57
- # @param [#write] :out ($stdout)
58
- # @option options [Symbol] :format
59
- def initialize(rules, out: $stdout, html: false, **options)
60
- @options = options.dup
105
+ # @param [#write] out ($stdout)
106
+ def initialize(rules, out: $stdout, html: false, format: :ebnf, validate: false, **options)
107
+ @options = options.merge(html: html)
108
+ return if rules.empty?
61
109
 
62
110
  # Determine max LHS length
111
+ format_meth = "format_#{format}".to_sym
63
112
  max_id = rules.max_by {|r| r.id.to_s.length}.id.to_s.length
64
113
  max_sym = rules.max_by {|r| r.sym.to_s.length}.sym.to_s.length
65
- lhs_length = max_sym + 3
66
- lhs_fmt = "%<sym>-#{max_sym}s ::= "
67
- if max_id > 0
114
+ lhs_length = max_sym + 1
115
+ lhs_fmt = case format
116
+ when :abnf then "%<sym>-#{max_sym}s = "
117
+ when :ebnf then "%<sym>-#{max_sym}s ::= "
118
+ when :isoebnf then "%<sym>-#{max_sym}s = "
119
+ end
120
+ if format == :ebnf && max_id > 0
68
121
  lhs_fmt = "%<id>-#{max_id+2}s " + lhs_fmt
69
122
  lhs_length += max_id + 3
70
123
  end
71
- rhs_length = LINE_LENGTH - lhs_length
124
+ rhs_length = (html ? LINE_LENGTH_HTML : LINE_LENGTH) - lhs_length
72
125
 
73
126
  if html
74
127
  # Output as formatted HTML
75
128
  begin
76
- require 'haml'
77
- hout = Haml::Engine.new(HAML_DESC).render(self, rules: rules) do |rule|
78
- formatted_expr = format(rule.expr)
79
- formatted_expr.length > rhs_length ? format(rule.expr, "\n") : formatted_expr
129
+ require 'erubis'
130
+ require 'htmlentities'
131
+ @coder = HTMLEntities.new
132
+ eruby = Erubis::Eruby.new(ERB_DESC)
133
+ formatted_rules = rules.map do |rule|
134
+ if rule.kind == :terminals || rule.kind == :pass
135
+ OpenStruct.new(id: ("@#{rule.kind}"),
136
+ sym: nil,
137
+ assign: nil,
138
+ formatted: (
139
+ rule.kind == :terminals ?
140
+ "<strong># Productions for terminals</strong>" :
141
+ self.send(format_meth, rule.expr)))
142
+ else
143
+ formatted_expr = self.send(format_meth, rule.expr)
144
+ # Measure text without markup
145
+ formatted_expr_text = formatted_expr.gsub(%r{</?\w+[^>]*>}, '')
146
+ if formatted_expr_text.length > rhs_length && (format != :abnf || rule.alt?)
147
+ lines = []
148
+ # Can only reasonably split apart alts
149
+ self.send(format_meth, rule.expr, sep: "--rule-extensions--").
150
+ split(/\s*--rule-extensions--\s*/).each_with_index do |formatted, ndx|
151
+ assign = case format
152
+ when :ebnf
153
+ formatted.sub!(%r{\s*<code>\|</code>\s*}, '')
154
+ (ndx > 0 ? (rule.alt? ? '|' : '') : '::=')
155
+ when :abnf
156
+ formatted.sub!(%r{\s*<code>/</code>\s*}, '')
157
+ (ndx > 0 ? '=/' : '=')
158
+ else
159
+ formatted.sub!(%r{\s*<code>\|</code>\s*}, '')
160
+ (ndx > 0 ? (rule.alt? ? '|' : '') : '=')
161
+ end
162
+ lines << OpenStruct.new(id: ((ndx == 0 ? "[#{rule.id}]" : "") if rule.id),
163
+ sym: (rule.sym if ndx == 0 || format == :abnf),
164
+ assign: assign,
165
+ formatted: formatted)
166
+ end
167
+ if format == :isoebnf
168
+ lines << OpenStruct.new(assign: ';')
169
+ end
170
+ lines
171
+ else
172
+ OpenStruct.new(id: ("[#{rule.id}]" if rule.id),
173
+ sym: rule.sym,
174
+ assign: (format == :ebnf ? '::=' : '='),
175
+ formatted: (formatted_expr + (format == :isoebnf ? ' ;' : '')))
176
+ end
177
+ end
178
+ end.flatten
179
+
180
+ html_result = eruby.evaluate(format: format, rules: formatted_rules)
181
+
182
+ if validate
183
+ begin
184
+ require 'nokogumbo'
185
+ # Validate the output HTML
186
+ doc = Nokogiri::HTML5("<!DOCTYPE html>" + html_result, max_errors: 10)
187
+ raise EncodingError, "Errors found in generated HTML:\n " +
188
+ doc.errors.map(&:to_s).join("\n ") unless doc.errors.empty?
189
+ rescue LoadError
190
+ # Skip
191
+ end
80
192
  end
81
- out.write hout
193
+
194
+ out.write html_result
82
195
  return
83
196
  rescue LoadError
84
- $stderr.puts "Generating HTML requires haml gem to be loaded"
197
+ $stderr.puts "Generating HTML requires erubis and htmlentities gems to be loaded"
85
198
  end
86
199
  end
87
200
 
88
201
  # Format each rule, considering the available rhs size
89
202
  rules.each do |rule|
90
203
  buffer = if rule.pass?
91
- "%-#{lhs_length-2}s" % "@pass"
204
+ "\n%-#{lhs_length-2}s " % "@pass"
205
+ elsif rule.kind == :terminals
206
+ "\n%-#{lhs_length-2}s" % "@terminals"
92
207
  else
93
208
  lhs_fmt % {id: "[#{rule.id}]", sym: rule.sym}
94
209
  end
95
- formatted_expr = format(rule.expr)
96
- if formatted_expr.length > rhs_length
97
- buffer << format(rule.expr, ("\n" + " " * lhs_length))
210
+ formatted_expr = self.send(format_meth, rule.expr)
211
+ if formatted_expr.length > rhs_length && (format != :abnf || rule.alt?)
212
+ if format == :abnf
213
+ # No whitespace, use =/
214
+ self.send(format_meth, rule.expr, sep: "--rule-extensions--").
215
+ split(/\s*--rule-extensions--\s*/).each_with_index do |formatted, ndx|
216
+ if ndx > 0
217
+ buffer << "\n" + lhs_fmt.sub('= ', '=/') % {id: "[#{rule.id}]", sym: rule.sym}
218
+ end
219
+ buffer << formatted.sub(/\s*\/\s*/, '')
220
+ end
221
+ else
222
+ # Space out past "= "
223
+ buffer << self.send(format_meth, rule.expr, sep: ("\n" + " " * (lhs_length + (rule.alt? ? 2 : 4) - (format == :ebnf ? 0 : 2))))
224
+ buffer << ("\n" + " " * (lhs_length) + ';') if format == :isoebnf
225
+ end
98
226
  else
99
- buffer << formatted_expr
227
+ buffer << formatted_expr + (format == :isoebnf ? ' ;' : '')
100
228
  end
229
+ buffer << "\n\n" if [:terminals, :pass].include?(rule.kind)
101
230
  out.puts(buffer)
102
231
  end
103
232
  end
104
233
 
105
234
  protected
235
+
236
+ ##
237
+ # W3C EBNF Formatters
238
+ ##
239
+
106
240
  # Format the expression part of a rule
107
- def format(expr, sep = nil)
108
- return (@options[:html] ? %(<a href="#grammar-production-#{expr}">#{expr}</a>) : expr.to_s) if expr.is_a?(Symbol)
241
+ def format_ebnf(expr, sep: nil, embedded: false)
242
+ return (@options[:html] ? %(<a href="#grammar-production-#{@coder.encode expr}">#{@coder.encode expr}</a>) : expr.to_s) if expr.is_a?(Symbol)
109
243
  if expr.is_a?(String)
110
- if expr.length == 1
111
- return format_char(expr)
112
- elsif expr =~ /\A#x\h+/
113
- return (@options[:html] ? %(<code class="grammar-char-escape">#{expr}</code>) : expr)
114
- elsif expr =~ /"/
115
- return (@options[:html] ? %('<code class="grammar-literal">#{escape(expr, "'")}</code>') : %('#{escape(expr, "'")}'))
116
- else
117
- return (@options[:html] ? %("<code class="grammar-literal">#{escape(expr, '"')}</code>") : %("#{escape(expr, '"')}"))
118
- end
244
+ return expr.length == 1 ?
245
+ format_ebnf_char(expr) :
246
+ format_ebnf_string(expr, expr.include?('"') ? "'" : '"')
119
247
  end
120
248
  parts = {
121
249
  alt: (@options[:html] ? "<code>|</code> " : "| "),
@@ -128,95 +256,472 @@ module EBNF
128
256
  rparen = (@options[:html] ? "<code>)</code> " : ")")
129
257
 
130
258
  case expr.first
259
+ when :istr
260
+ # Looses fidelity, but, oh well ...
261
+ format_ebnf(expr.last, embedded: true)
131
262
  when :alt, :diff
132
263
  this_sep = (sep ? sep : " ") + parts[expr.first.to_sym]
133
- expr[1..-1].map {|e| format(e)}.join(this_sep)
264
+ res = expr[1..-1].map {|e| format_ebnf(e, embedded: true)}.join(this_sep)
265
+ embedded ? (lparen + res + rparen) : res
134
266
  when :star, :plus, :opt
135
- raise "Expected star expression to have a single operand" unless expr.length == 2
136
267
  char = parts[expr.first.to_sym]
137
- r = format(expr[1])
138
- (r.start_with?("(") || Array(expr[1]).length == 1) ? "#{r}#{char}" : "(#{r})#{char}"
268
+ r = format_ebnf(expr[1], embedded: true)
269
+ "#{r}#{char}"
139
270
  when :hex
140
- (@options[:html] ? %(<code class="grammar-char-escape">#{expr.last}</code>) : expr.last)
271
+ escape_ebnf_hex(expr.last[2..-1].hex.chr(Encoding::UTF_8))
141
272
  when :range
142
- format_range(expr.last)
273
+ format_ebnf_range(expr.last)
143
274
  when :seq
144
275
  this_sep = (sep ? sep : " ")
145
- expr[1..-1].map {|e| r = format(e); Array(e).length > 2 ? "#{lparen}#{r}#{rparen}" : r}.join(this_sep)
276
+ res = expr[1..-1].map do |e|
277
+ format_ebnf(e, embedded: true)
278
+ end.join(this_sep)
279
+ embedded ? (lparen + res + rparen) : res
280
+ when :rept
281
+ # Expand repetition
282
+ min, max, value = expr[1..-1]
283
+ if min == 0 && max == 1
284
+ format_ebnf([:opt, value], sep: sep, embedded: embedded)
285
+ elsif min == 0 && max == '*'
286
+ format_ebnf([:star, value], sep: sep, embedded: embedded)
287
+ elsif min == 1 && max == '*'
288
+ format_ebnf([:plus, value], sep: sep, embedded: embedded)
289
+ else
290
+ val2 = [:seq]
291
+ while min > 0
292
+ val2 << value
293
+ min -= 1
294
+ max -= 1 unless max == '*'
295
+ end
296
+ if max == '*'
297
+ val2 << [:star, value]
298
+ else
299
+ opt = nil
300
+ while max > 0
301
+ opt = [:opt, opt ? [:seq, value, opt] : value]
302
+ max -= 1
303
+ end
304
+ val2 << opt if opt
305
+ end
306
+ format_ebnf(val2, sep: sep, embedded: embedded)
307
+ end
146
308
  else
147
309
  raise "Unknown operator: #{expr.first}"
148
310
  end
149
311
  end
150
312
 
151
313
  # Format a single-character string, prefering hex for non-main ASCII
152
- def format_char(c)
314
+ def format_ebnf_char(c)
153
315
  case c.ord
154
- when 0x22 then (@options[:html] ? %('<code class="grammar-literal">"</code>') : %{'"'})
155
- when (0x23..0x7e) then (@options[:html] ? %("<code class="grammar-literal">#{c}</code>") : %{"#{c}"})
156
- else (@options[:html] ? %(<code class="grammar-char-escape">#{escape_hex(c)}</code>) : escape_hex(c))
316
+ when (0x21) then (@options[:html] ? %("<code class="grammar-literal">#{@coder.encode c}</code>") : %{"#{c}"})
317
+ when 0x22 then (@options[:html] ? %('<code class="grammar-literal">&quot;</code>') : %{'"'})
318
+ when (0x23..0x7e) then (@options[:html] ? %("<code class="grammar-literal">#{@coder.encode c}</code>") : %{"#{c}"})
319
+ when (0x80..0xFFFD) then (@options[:html] ? %("<code class="grammar-literal">#{@coder.encode c}</code>") : %{"#{c}"})
320
+ else escape_ebnf_hex(c)
157
321
  end
158
322
  end
159
323
 
160
324
  # Format a range
161
- def format_range(string)
325
+ def format_ebnf_range(string)
162
326
  lbrac = (@options[:html] ? "<code>[</code> " : "[")
163
327
  rbrac = (@options[:html] ? "<code>]</code> " : "]")
164
- dash = (@options[:html] ? "<code>-</code> " : "-")
165
328
 
166
329
  buffer = lbrac
167
330
  s = StringScanner.new(string)
168
331
  while !s.eos?
169
332
  case
170
333
  when s.scan(/\A[!"\u0024-\u007e]+/)
171
- buffer << (@options[:html] ? %(<code class="grammar-literal">#{s.matched}</code>) : s.matched)
334
+ buffer << (@options[:html] ? %(<code class="grammar-literal">#{@coder.encode s.matched}</code>) : s.matched)
172
335
  when s.scan(/\A#x\h+/)
173
- buffer << (@options[:html] ? %(<code class="grammar-char-escape">#{s.matched}</code>) : s.matched)
174
- when s.scan(/\A-/)
175
- buffer << dash
336
+ buffer << escape_ebnf_hex(s.matched[2..-1].hex.chr(Encoding::UTF_8))
176
337
  else
177
- buffer << (@options[:html] ? %(<code class="grammar-char-escape">#{escape_hex(s.getch)}</code>) : escape_hex(s.getch))
338
+ buffer << escape_ebnf_hex(s.getch)
178
339
  end
179
340
  end
180
341
  buffer + rbrac
181
342
  end
182
343
 
183
344
  # Escape a string, using as many UTF-8 characters as possible
184
- def escape(string, quote = '"')
185
- buffer = ""
345
+ def format_ebnf_string(string, quote = '"')
186
346
  string.each_char do |c|
187
- buffer << case (u = c.ord)
188
- when (0x00..0x1f) then "#x%02X" % u
189
- when quote.ord then "#x%02X" % u
190
- else c
347
+ case c.ord
348
+ when 0x00..0x19, quote.ord
349
+ raise RangeError, "cannot format #{string.inspect} as an EBNF String: #{c.inspect} is out of range" unless
350
+ ISOEBNF::TERMINAL_CHARACTER.match?(c)
191
351
  end
192
352
  end
193
- buffer
353
+
354
+ res = "#{quote}#{string}#{quote}"
355
+ @options[:html] ? @coder.encode(res) : res
194
356
  end
195
357
 
196
- def escape_hex(u)
358
+ def escape_ebnf_hex(u)
197
359
  fmt = case u.ord
360
+ when 0x00..0x20 then "#x%02X"
198
361
  when 0x0000..0x00ff then "#x%02X"
199
362
  when 0x0100..0xffff then "#x%04X"
200
363
  else "#x%08X"
201
364
  end
202
- sprintf(fmt, u.ord)
203
- end
204
-
205
- HAML_DESC = %q(
206
- %table.grammar
207
- %tbody#grammar-productions
208
- - rules.each do |rule|
209
- %tr{id: "grammar-production-#{rule.sym}"}
210
- - if rule.pass?
211
- %td{colspan: 3}
212
- %code<="@pass"
213
- - else
214
- %td<= "[#{rule.id}]"
215
- %td<
216
- %code<= rule.sym
217
- %td<= "::="
218
- %td
219
- != yield rule
365
+ char = fmt % u.ord
366
+ if @options[:html]
367
+ char = if u.ord <= 0x20
368
+ %(<abbr title="#{ASCII_ESCAPE_NAMES[u.ord]}">#{@coder.encode char}</abbr>)
369
+ elsif u.ord == 0x22
370
+ %(<abbr title="quot">>&quot;</abbr>)
371
+ elsif u.ord < 0x7F
372
+ %(<abbr title="ascii '#{@coder.encode u}'">#{@coder.encode char}</abbr>)
373
+ elsif u.ord == 0x7F
374
+ %(<abbr title="delete">#{@coder.encode char}</abbr>)
375
+ elsif u.ord <= 0xFF
376
+ %(<abbr title="extended ascii '#{@coder.encode char}'">#{char}</abbr>)
377
+ elsif (%w(Control Private-use Surrogate Noncharacter Reserved) - ::Unicode::Types.of(u)).empty?
378
+ %(<abbr title="unicode '#{u}'">#{char}</abbr>)
379
+ else
380
+ %(<abbr title="unicode '#{::Unicode::Types.of(u).first}'">#{char}</abbr>)
381
+ end
382
+ %(<code class="grammar-char-escape">#{char}</code>)
383
+ else
384
+ char
385
+ end
386
+ end
387
+
388
+ ##
389
+ # ABNF Formatters
390
+ ##
391
+
392
+ # Format the expression part of a rule
393
+ def format_abnf(expr, sep: nil, embedded: false, sensitive: true)
394
+ return (@options[:html] ? %(<a href="#grammar-production-#{@coder.encode expr}">#{@coder.encode expr}</a>) : expr.to_s) if expr.is_a?(Symbol)
395
+ if expr.is_a?(String)
396
+ if expr.length == 1
397
+ return format_abnf_char(expr)
398
+ elsif expr.start_with?('%')
399
+ # Already encoded
400
+ return expr
401
+ elsif expr =~ /"/
402
+ # Split into segments
403
+ segments = expr.split('"')
404
+
405
+ return format_abnf_char(expr) if segments.empty?
406
+
407
+ seq = segments.inject([]) {|memo, s| memo.concat([[:hex, "#x22"], s])}[1..-1]
408
+ seq.unshift(:seq)
409
+ return format_abnf(seq, sep: nil, embedded: false)
410
+ else
411
+ return (@options[:html] ? %("<code class="grammar-literal">#{'%s' if sensitive}#{@coder.encode expr}</code>") : %(#{'%s' if sensitive}"#{expr}"))
412
+ end
413
+ end
414
+ parts = {
415
+ alt: (@options[:html] ? "<code>/</code> " : "/ "),
416
+ star: (@options[:html] ? "<code>*</code> " : "*"),
417
+ plus: (@options[:html] ? "<code>+</code> " : "1*"),
418
+ opt: (@options[:html] ? "<code>?</code> " : "?")
419
+ }
420
+ lbrac = (@options[:html] ? "<code>[</code> " : "[")
421
+ rbrac = (@options[:html] ? "<code>]</code> " : "]")
422
+ lparen = (@options[:html] ? "<code>(</code> " : "(")
423
+ rparen = (@options[:html] ? "<code>)</code> " : ")")
424
+
425
+ case expr.first
426
+ when :istr
427
+ # FIXME: if string part is segmented, need to do something different
428
+ format_abnf(expr.last, embedded: true, sensitive: false)
429
+ when :alt
430
+ this_sep = (sep ? sep : " ") + parts[expr.first.to_sym]
431
+ res = expr[1..-1].map {|e| format_abnf(e, embedded: true)}.join(this_sep)
432
+ embedded ? (lparen + res + rparen) : res
433
+ when :diff
434
+ raise RangeError, "ABNF does not support the diff operator"
435
+ when :opt
436
+ char = parts[expr.first.to_sym]
437
+ r = format_abnf(expr[1], embedded: true)
438
+ "#{lbrac}#{r}#{rbrac}"
439
+ when :plus, :star
440
+ char = parts[expr.first.to_sym]
441
+ r = format_abnf(expr[1], embedded: true)
442
+ "#{char}#{r}"
443
+ when :hex
444
+ escape_abnf_hex(expr.last[2..-1].hex.chr)
445
+ when :range
446
+ # Returns an [:alt] or [:not [:alt]] if composed of multiple sequences
447
+ # Note: ABNF does not support the `not` operator
448
+ res = format_abnf_range(expr.last)
449
+ res.is_a?(Array) ?
450
+ format_abnf(res, embedded: true) :
451
+ res
452
+ when :seq
453
+ this_sep = (sep ? sep : " ")
454
+ res = expr[1..-1].map do |e|
455
+ format_abnf(e, embedded: true)
456
+ end.join(this_sep)
457
+ embedded ? (lparen + res + rparen) : res
458
+ when :rept
459
+ # Expand repetition
460
+ min, max, value = expr[1..-1]
461
+ r = format_abnf(value, embedded: true)
462
+ if min == max
463
+ "#{min}#{r}"
464
+ elsif min == 0 && max == '*'
465
+ "#{parts[:star]}#{r}"
466
+ elsif min > 0 && max == '*'
467
+ "#{min}#{parts[:star]}#{r}"
468
+ else
469
+ "#{min}#{parts[:star]}#{max}#{r}"
470
+ end
471
+ else
472
+ raise "Unknown operator: #{expr.first}"
473
+ end
474
+ end
475
+
476
+ # Format a single-character string, prefering hex for non-main ASCII
477
+ def format_abnf_char(c)
478
+ if /[\x20-\x21\x23-\x7E]/.match?(c)
479
+ @options[:html] ? %("<code class="grammar-literal">#{@coder.encode c}</code>") : c.inspect
480
+ else
481
+ escape_abnf_hex(c)
482
+ end
483
+ end
484
+
485
+ # Format a range
486
+ #
487
+ # Presumes range has already been validated
488
+ def format_abnf_range(string)
489
+ alt, o_dash = [:alt], false
490
+
491
+ raise RangeError, "cannot format #{string.inspect} an ABNF range" if string.start_with?('^')
492
+
493
+ if string.end_with?('-')
494
+ o_dash = true
495
+ string = string[0..-2]
496
+ end
497
+
498
+ scanner = StringScanner.new(string)
499
+ hexes, deces = [], []
500
+ in_range = false
501
+ # Build op (alt) from different ranges/enums
502
+ while !scanner.eos?
503
+ if hex = scanner.scan(Terminals::HEX)
504
+ # Append any decimal values
505
+ alt << "%d" + deces.join(".") unless deces.empty?
506
+ deces = []
507
+
508
+ if in_range
509
+ # Add "." sequences for any previous hexes
510
+ alt << "%x" + hexes[0..-2].join(".") if hexes.length > 1
511
+ alt << "%x#{hexes.last}-#{hex[2..-1]}"
512
+ in_range, hexes = false, []
513
+ else
514
+ hexes << hex[2..-1]
515
+ end
516
+ elsif dec = scanner.scan(Terminals::R_CHAR)
517
+ # Append any hexadecimal values
518
+ alt << "%x" + hexes.join(".") unless hexes.empty?
519
+ hexes = []
520
+
521
+ if in_range
522
+ # Add "." sequences for any previous hexes
523
+ alt << "%d" + deces[0..-2].join(".") if deces.length > 1
524
+ alt << "%d#{deces.last}-#{dec.codepoints.first}"
525
+ in_range, deces = false, []
526
+ else
527
+ deces << dec.codepoints.first.to_s
528
+ end
529
+ end
530
+
531
+ in_range = true if scanner.scan(/\-/)
532
+ end
533
+
534
+ deces << '45' if o_dash
535
+
536
+ # Append hexes and deces as "." sequences (should be only one)
537
+ alt << "%d" + deces.join(".") unless deces.empty?
538
+ alt << "%x" + hexes.join(".") unless hexes.empty?
539
+
540
+ # FIXME: HTML abbreviations?
541
+ if alt.length == 2
542
+ # Just return the range or enum
543
+ alt.last
544
+ else
545
+ # Return the alt, which will be further formatted
546
+ alt
547
+ end
548
+ end
549
+
550
+ def escape_abnf_hex(u)
551
+ fmt = case u.ord
552
+ when 0x0000..0x00ff then "%02X"
553
+ when 0x0100..0xffff then "%04X"
554
+ else "%08X"
555
+ end
556
+ char = "%x" + (fmt % u.ord)
557
+ if @options[:html]
558
+ if u.ord <= 0x20
559
+ char = %(<abbr title="#{ASCII_ESCAPE_NAMES[u.ord]}">#{@coder.encode char}</abbr>)
560
+ elsif u.ord == 0x22
561
+ %(<abbr title="quot">>&quot;</abbr>)
562
+ elsif u.ord < 0x7F
563
+ char = %(<abbr title="ascii '#{u}'">#{@coder.encode char}</abbr>)
564
+ elsif u.ord == 0x7F
565
+ char = %(<abbr title="delete">#{@coder.encode char}</abbr>)
566
+ elsif u.ord <= 0xFF
567
+ char = %(<abbr title="extended ascii '#{u}'">#{char}</abbr>)
568
+ else
569
+ char = %(<abbr title="unicode '#{u.unicode_normaliz}'">#{char}</abbr>)
570
+ end
571
+ %(<code class="grammar-char-escape">#{char}</code>)
572
+ else
573
+ char
574
+ end
575
+ end
576
+
577
+ ##
578
+ # ISO EBNF Formatters
579
+ ##
580
+
581
+ # Format the expression part of a rule
582
+ def format_isoebnf(expr, sep: nil, embedded: false)
583
+ return (@options[:html] ? %(<a href="#grammar-production-#{@coder.encode expr}">#{@coder.encode expr}</a>) : expr.to_s) if expr.is_a?(Symbol)
584
+ if expr.is_a?(String)
585
+ expr = expr[2..-1].hex.chr if expr =~ /\A#x\h+/
586
+ expr.chars.each do |c|
587
+ raise RangeError, "cannot format #{expr.inspect} as an ISO EBNF String: #{c.inspect} is out of range" unless
588
+ ISOEBNF::TERMINAL_CHARACTER.match?(c)
589
+ end
590
+ if expr =~ /"/
591
+ return (@options[:html] ? %('<code class="grammar-literal">#{@coder.encode expr}</code>') : %('#{expr}'))
592
+ else
593
+ return (@options[:html] ? %("<code class="grammar-literal">#{@coder.encode expr}</code>") : %("#{expr}"))
594
+ end
595
+ end
596
+ parts = {
597
+ alt: (@options[:html] ? "<code>|</code> " : "| "),
598
+ diff: (@options[:html] ? "<code>-</code> " : "- "),
599
+ }
600
+ lparen = (@options[:html] ? "<code>(</code> " : "(")
601
+ rparen = (@options[:html] ? "<code>)</code> " : ")")
602
+
603
+ case expr.first
604
+ when :istr
605
+ # Looses fidelity, but, oh well ...
606
+ format_isoebnf(expr.last, embedded: true)
607
+ when :alt, :diff
608
+ this_sep = (sep ? sep : " ") + parts[expr.first.to_sym]
609
+ res = expr[1..-1].map {|e| format_isoebnf(e, embedded: true)}.join(this_sep)
610
+ embedded ? (lparen + res + rparen) : res
611
+ when :opt
612
+ r = format_isoebnf(expr[1], embedded: true)
613
+ "[#{r}]"
614
+ when :star
615
+ r = format_isoebnf(expr[1], embedded: true)
616
+ "{#{r}}"
617
+ when :plus
618
+ r = format_isoebnf(expr[1], embedded: true)
619
+ "#{r}, {#{r}}"
620
+ when :hex
621
+ format_isoebnf(expr[1], embedded: true)
622
+ when :range
623
+ res = format_isoebnf_range(expr.last)
624
+ res.is_a?(Array) ?
625
+ format_isoebnf(res, embedded: true) :
626
+ res
627
+ when :seq
628
+ this_sep = "," + (sep ? sep : " ")
629
+ res = expr[1..-1].map do |e|
630
+ format_isoebnf(e, embedded: true)
631
+ end.join(this_sep)
632
+ embedded ? (lparen + res + rparen) : res
633
+ when :rept
634
+ # Expand repetition
635
+ min, max, value = expr[1..-1]
636
+ if min == 0 && max == 1
637
+ format_isoebnf([:opt, value], sep: sep, embedded: embedded)
638
+ elsif min == 0 && max == '*'
639
+ format_isoebnf([:star, value], sep: sep, embedded: embedded)
640
+ elsif min == 1 && max == '*'
641
+ format_isoebnf([:plus, value], sep: sep, embedded: embedded)
642
+ else
643
+ val2 = [:seq]
644
+ while min > 0
645
+ val2 << value
646
+ min -= 1
647
+ max -= 1 unless max == '*'
648
+ end
649
+ if max == '*'
650
+ val2 << [:star, value]
651
+ else
652
+ opt = nil
653
+ while max > 0
654
+ opt = [:opt, opt ? [:seq, value, opt] : value]
655
+ max -= 1
656
+ end
657
+ val2 << opt if opt
658
+ end
659
+ format_isoebnf(val2, sep: sep, embedded: embedded)
660
+ end
661
+ else
662
+ raise "Unknown operator: #{expr.first}"
663
+ end
664
+ end
665
+
666
+ # Format a range
667
+ # Range is formatted as a aliteration of characters
668
+ def format_isoebnf_range(string)
669
+ chars = []
670
+ o_dash = false
671
+
672
+ raise RangeError, "cannot format #{string.inspect} an ABNF range" if string.start_with?('^')
673
+
674
+ if string.end_with?('-')
675
+ o_dash = true
676
+ string = string[0..-2]
677
+ end
678
+
679
+ scanner = StringScanner.new(string)
680
+ in_range = false
681
+ # Build chars from different ranges/enums
682
+ while !scanner.eos?
683
+ char = if hex = scanner.scan(Terminals::HEX)
684
+ hex[2..-1].hex.ord.char(Encoding::UTF_8)
685
+ else scanner.scan(Terminals::R_CHAR)
686
+ end
687
+ raise RangeError, "cannot format #{string.inspect} as an ISO EBNF Aliteration: #{char.inspect} is out of range" unless
688
+ char && ISOEBNF::TERMINAL_CHARACTER.match?(char)
689
+
690
+ if in_range
691
+ # calculate characters from chars.last to this char
692
+ raise RangeError, "cannot format #{string.inspect} as an ISO EBNF Aliteration" unless chars.last < char
693
+ chars.concat (chars.last..char).to_a[1..-1]
694
+ in_range = false
695
+ else
696
+ chars << char
697
+ end
698
+
699
+ in_range = true if scanner.scan(/\-/)
700
+ end
701
+
702
+ chars << '-' if o_dash
703
+
704
+ # Possibly only a single character (no character?)
705
+ chars.length == 1 ? chars.last.inspect : chars.unshift(:alt)
706
+ end
707
+
708
+ ERB_DESC = %q(
709
+ <table class="grammar">
710
+ <tbody id="grammar-productions" class="<%= @format %>">
711
+ <% for rule in @rules %>
712
+ <tr<%= %{ id="grammar-production-#{rule.sym}"} unless %w(=/ |).include?(rule.assign) || rule.sym.nil?%>>
713
+ <% if rule.id %>
714
+ <td<%= " colspan=2" unless rule.sym %>><%= rule.id %></td>
715
+ <% end %>
716
+ <% if rule.sym %>
717
+ <td><code><%== rule.sym %></code></td>
718
+ <% end %>
719
+ <td><%= rule.assign %></td>
720
+ <td><%= rule.formatted %></td>
721
+ </tr>
722
+ <% end %>
723
+ </tbody>
724
+ </table>
220
725
  ).gsub(/^ /, '')
221
726
  end
222
727
  end