ebnf 1.1.2 → 2.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (56) hide show
  1. checksums.yaml +5 -5
  2. data/README.md +218 -196
  3. data/UNLICENSE +1 -1
  4. data/VERSION +1 -1
  5. data/bin/ebnf +40 -21
  6. data/etc/abnf-core.ebnf +52 -0
  7. data/etc/abnf.abnf +121 -0
  8. data/etc/abnf.ebnf +124 -0
  9. data/etc/abnf.sxp +45 -0
  10. data/etc/doap.ttl +23 -15
  11. data/etc/ebnf.ebnf +21 -33
  12. data/etc/ebnf.html +171 -160
  13. data/etc/{ebnf.rb → ebnf.ll1.rb} +30 -107
  14. data/etc/ebnf.ll1.sxp +182 -183
  15. data/etc/ebnf.peg.rb +90 -0
  16. data/etc/ebnf.peg.sxp +84 -0
  17. data/etc/ebnf.sxp +40 -41
  18. data/etc/iso-ebnf.ebnf +140 -0
  19. data/etc/iso-ebnf.isoebnf +138 -0
  20. data/etc/iso-ebnf.sxp +65 -0
  21. data/etc/sparql.ebnf +4 -4
  22. data/etc/sparql.html +1603 -1751
  23. data/etc/sparql.ll1.sxp +7372 -7372
  24. data/etc/sparql.peg.rb +532 -0
  25. data/etc/sparql.peg.sxp +597 -0
  26. data/etc/sparql.sxp +363 -362
  27. data/etc/turtle.ebnf +3 -3
  28. data/etc/turtle.html +465 -517
  29. data/etc/{turtle.rb → turtle.ll1.rb} +3 -4
  30. data/etc/turtle.ll1.sxp +425 -425
  31. data/etc/turtle.peg.rb +182 -0
  32. data/etc/turtle.peg.sxp +199 -0
  33. data/etc/turtle.sxp +103 -101
  34. data/lib/ebnf.rb +7 -2
  35. data/lib/ebnf/abnf.rb +301 -0
  36. data/lib/ebnf/abnf/core.rb +23 -0
  37. data/lib/ebnf/abnf/meta.rb +111 -0
  38. data/lib/ebnf/base.rb +128 -87
  39. data/lib/ebnf/bnf.rb +1 -26
  40. data/lib/ebnf/ebnf/meta.rb +90 -0
  41. data/lib/ebnf/isoebnf.rb +229 -0
  42. data/lib/ebnf/isoebnf/meta.rb +75 -0
  43. data/lib/ebnf/ll1.rb +140 -8
  44. data/lib/ebnf/ll1/lexer.rb +37 -32
  45. data/lib/ebnf/ll1/parser.rb +113 -73
  46. data/lib/ebnf/ll1/scanner.rb +83 -51
  47. data/lib/ebnf/native.rb +320 -0
  48. data/lib/ebnf/parser.rb +285 -302
  49. data/lib/ebnf/peg.rb +39 -0
  50. data/lib/ebnf/peg/parser.rb +561 -0
  51. data/lib/ebnf/peg/rule.rb +241 -0
  52. data/lib/ebnf/rule.rb +453 -163
  53. data/lib/ebnf/terminals.rb +21 -0
  54. data/lib/ebnf/writer.rb +561 -88
  55. metadata +114 -28
  56. data/etc/sparql.rb +0 -45773
@@ -0,0 +1,21 @@
1
+ # encoding: utf-8
2
+ # Terminal definitions for the EBNF grammar
3
+ module EBNF::Terminals
4
+ SYMBOL_BASE = %r(\b[a-zA-Z0-9_\.]+\b)u.freeze
5
+ SYMBOL = %r(#{SYMBOL_BASE}(?!\s*::=))u.freeze
6
+ HEX = %r(\#x\h+)u.freeze
7
+ CHAR = %r([\u0009\u000A\u000D\u0020-\uD7FF\u{10000}-\u{10FFFF}])u.freeze
8
+ R_CHAR = %r([\u0009\u000A\u000D\u0020-\u002C\u002E-\u005C\u005E-\uD7FF\u{10000}-\u{10FFFF}])u.freeze
9
+ RANGE = %r(\[(?:(?:#{R_CHAR}\-#{R_CHAR})|(?:#{HEX}\-#{HEX})|#{R_CHAR}|#{HEX})+-?\](?!\s+#{SYMBOL_BASE}\s*::=))u.freeze
10
+ LHS = %r((?:\[#{SYMBOL_BASE}\])?\s*#{SYMBOL_BASE}\s*::=)u.freeze
11
+ O_RANGE = %r(\[\^(?:(?:#{R_CHAR}\-#{R_CHAR})|(?:#{HEX}\-#{HEX}|#{R_CHAR}|#{HEX}))+-?\])u.freeze
12
+ STRING1 = %r("[\u0009\u000A\u000D\u0020\u0021\u0023-\uD7FF\u{10000}-\u{10FFFF}]*")u.freeze
13
+ STRING2 = %r('[\u0009\u000A\u000D\u0020-\u0026\u0028-\uD7FF\u{10000}-\u{10FFFF}]*')u.freeze
14
+ POSTFIX = %r([?*+])u.freeze
15
+ PASS = %r((
16
+ \s
17
+ | (?:(?:\#[^x]|//)[^\n\r]*)
18
+ | (?:/\*(?:(?:\*[^/])|[^*])*\*/)
19
+ | (?:\(\*(?:(?:\*[^\)])|[^*])*\*\))
20
+ )+)xmu.freeze
21
+ end
@@ -1,22 +1,62 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
  require 'rdf'
3
3
  require 'strscan' unless defined?(StringScanner)
4
+ require "ostruct"
4
5
 
5
6
  ##
6
7
  # Serialize ruleset back to EBNF
7
8
  module EBNF
8
9
  class Writer
9
10
  LINE_LENGTH = 80
11
+ LINE_LENGTH_HTML = 200
12
+
13
+ # ASCII escape names
14
+ ASCII_ESCAPE_NAMES = [
15
+ "null", #x00
16
+ "start of heading", #x01
17
+ "start of text", #x02
18
+ "end of text", #x03
19
+ "end of transmission", #x04
20
+ "enquiry", #x05
21
+ "acknowledge", #x06
22
+ "bell", #x07
23
+ "backspace", #x08
24
+ "horizontal tab", #x09
25
+ "new line", #x0A
26
+ "vertical tab", #x0B
27
+ "form feed", #x0C
28
+ "carriage return", #x0D
29
+ "shift out", #x0E
30
+ "shift in", #x0F
31
+ "data link escape", #x10
32
+ "device control 1", #x11
33
+ "device control 2", #x12
34
+ "device control 3", #x13
35
+ "device control 4", #x14
36
+ "negative acknowledge", #x15
37
+ "synchronous idle", #x16
38
+ "end of trans. block", #x17
39
+ "cancel", #x18
40
+ "end of medium", #x19
41
+ "substitute", #x1A
42
+ "escape", #x1B
43
+ "file separator", #x1C
44
+ "group separator", #x1D
45
+ "record separator", #x1E
46
+ "unit separator", #x1F
47
+ "space" #x20
48
+ ]
10
49
 
11
50
  ##
12
51
  # Format rules to a String
13
52
  #
14
53
  # @param [Array<Rule>] rules
54
+ # @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
15
55
  # @return [Object]
16
- def self.string(*rules)
56
+ def self.string(*rules, format: :ebnf)
17
57
  require 'stringio' unless defined?(StringIO)
18
58
  buf = StringIO.new
19
- write(buf, *rules)
59
+ write(buf, *rules, format: format)
20
60
  buf.string
21
61
  end
22
62
 
@@ -24,9 +64,10 @@ module EBNF
24
64
  # Format rules to $stdout
25
65
  #
26
66
  # @param [Array<Rule>] rules
67
+ # @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
27
68
  # @return [Object]
28
- def self.print(*rules)
29
- write($stdout, *rules)
69
+ def self.print(*rules, format: :ebnf)
70
+ write($stdout, *rules, format: format)
30
71
  end
31
72
 
32
73
  ##
@@ -34,92 +75,155 @@ module EBNF
34
75
  #
35
76
  # @param [Object] out
36
77
  # @param [Array<Rule>] rules
78
+ # @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
37
79
  # @return [Object]
38
- def self.write(out, *rules)
39
- Writer.new(rules, out: out)
80
+ def self.write(out, *rules, format: :ebnf)
81
+ Writer.new(rules, out: out, format: format)
40
82
  end
41
83
 
42
84
  ##
43
85
  # Write formatted rules to an IO like object as HTML
44
86
  #
45
87
  # @param [Array<Rule>] rules
88
+ # @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
46
89
  # @return [Object]
47
- def self.html(*rules)
90
+ def self.html(*rules, format: :ebnf)
48
91
  require 'stringio' unless defined?(StringIO)
49
92
  buf = StringIO.new
50
- Writer.new(rules, out: buf, html: true)
93
+ Writer.new(rules, out: buf, html: true, format: format)
51
94
  buf.string
52
95
  end
53
96
 
54
97
  ##
55
98
  # @param [Array<Rule>] rules
56
99
  # @param [Hash{Symbol => Object}] options
57
- # @option options [Symbol] :format
58
- # @option options [#write] :out ($stdout)
59
- # @option options [Boolean] :html (false)
60
- # Format as HTML
61
- def initialize(rules, options = {})
62
- @options = options.dup
63
- out = options.fetch(:out, $stdio)
64
- #fmt = options.fetch(:format, :ebnf)
100
+ # @param [#write] out ($stdout)
101
+ # @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
102
+ # @option options [Symbol] format
103
+ # @option options [Boolean] html (false)
104
+ def initialize(rules, out: $stdout, html: false, format: :ebnf, **options)
105
+ @options = options.merge(html: html)
106
+ return if rules.empty?
65
107
 
66
108
  # Determine max LHS length
109
+ format_meth = "format_#{format}".to_sym
67
110
  max_id = rules.max_by {|r| r.id.to_s.length}.id.to_s.length
68
111
  max_sym = rules.max_by {|r| r.sym.to_s.length}.sym.to_s.length
69
- lhs_length = max_sym + 3
70
- lhs_fmt = "%<sym>-#{max_sym}s ::= "
71
- if max_id > 0
112
+ lhs_length = max_sym + 1
113
+ lhs_fmt = case format
114
+ when :abnf then "%<sym>-#{max_sym}s = "
115
+ when :ebnf then "%<sym>-#{max_sym}s ::= "
116
+ when :isoebnf then "%<sym>-#{max_sym}s = "
117
+ end
118
+ if format == :ebnf && max_id > 0
72
119
  lhs_fmt = "%<id>-#{max_id+2}s " + lhs_fmt
73
120
  lhs_length += max_id + 3
74
121
  end
75
- rhs_length = LINE_LENGTH - lhs_length
122
+ rhs_length = (html ? LINE_LENGTH_HTML : LINE_LENGTH) - lhs_length
76
123
 
77
- if @options[:html]
124
+ if html
78
125
  # Output as formatted HTML
79
126
  begin
80
- require 'haml'
81
- html = Haml::Engine.new(HAML_DESC).render(self, rules: rules) do |rule|
82
- formatted_expr = format(rule.expr)
83
- formatted_expr.length > rhs_length ? format(rule.expr, "\n") : formatted_expr
84
- end
85
- out.write html
127
+ require 'erubis'
128
+ require 'htmlentities'
129
+ @coder = HTMLEntities.new
130
+ eruby = Erubis::Eruby.new(ERB_DESC)
131
+ formatted_rules = rules.map do |rule|
132
+ if rule.kind == :terminals || rule.kind == :pass
133
+ OpenStruct.new(id: ("@#{rule.kind}"),
134
+ sym: nil,
135
+ assign: nil,
136
+ formatted: ("<strong># Productions for terminals</strong>" if rule.kind == :terminals))
137
+ else
138
+ formatted_expr = self.send(format_meth, rule.expr)
139
+ # Measure text without markup
140
+ formatted_expr_text = formatted_expr.gsub(%r{</?\w+[^>]*>}, '')
141
+ if formatted_expr_text.length > rhs_length && (format != :abnf || rule.alt?)
142
+ lines = []
143
+ # Can only reasonably split apart alts
144
+ self.send(format_meth, rule.expr, sep: "--rule-extensions--").
145
+ split(/\s*--rule-extensions--\s*/).each_with_index do |formatted, ndx|
146
+ assign = case format
147
+ when :ebnf
148
+ formatted.sub!(%r{\s*<code>\|</code>\s*}, '')
149
+ (ndx > 0 ? (rule.alt? ? '|' : '') : '::=')
150
+ when :abnf
151
+ formatted.sub!(%r{\s*<code>/</code>\s*}, '')
152
+ (ndx > 0 ? '=/' : '=')
153
+ else
154
+ formatted.sub!(%r{\s*<code>\|</code>\s*}, '')
155
+ (ndx > 0 ? (rule.alt? ? '|' : '') : '=')
156
+ end
157
+ lines << OpenStruct.new(id: ((ndx == 0 ? "[#{rule.id}]" : "") if rule.id),
158
+ sym: (rule.sym if ndx == 0 || format == :abnf),
159
+ assign: assign,
160
+ formatted: formatted)
161
+ end
162
+ if format == :isoebnf
163
+ lines << OpenStruct.new(assign: ';')
164
+ end
165
+ lines
166
+ else
167
+ OpenStruct.new(id: ("[#{rule.id}]" if rule.id),
168
+ sym: rule.sym,
169
+ assign: (format == :ebnf ? '::=' : '='),
170
+ formatted: (formatted_expr + (format == :isoebnf ? ' ;' : '')))
171
+ end
172
+ end
173
+ end.flatten
174
+ out.write eruby.evaluate(format: format, rules: formatted_rules)
86
175
  return
87
176
  rescue LoadError
88
- $stderr.puts "Generating HTML requires haml gem to be loaded"
177
+ $stderr.puts "Generating HTML requires erubis and htmlentities gems to be loaded"
89
178
  end
90
179
  end
91
180
 
92
181
  # Format each rule, considering the available rhs size
93
182
  rules.each do |rule|
94
183
  buffer = if rule.pass?
95
- "%-#{lhs_length-2}s" % "@pass"
184
+ "\n%-#{lhs_length-2}s " % "@pass"
185
+ elsif rule.kind == :terminals
186
+ "\n%-#{lhs_length-2}s" % "@terminals"
96
187
  else
97
188
  lhs_fmt % {id: "[#{rule.id}]", sym: rule.sym}
98
189
  end
99
- formatted_expr = format(rule.expr)
100
- if formatted_expr.length > rhs_length
101
- buffer << format(rule.expr, ("\n" + " " * lhs_length))
190
+ formatted_expr = self.send(format_meth, rule.expr)
191
+ if formatted_expr.length > rhs_length && (format != :abnf || rule.alt?)
192
+ if format == :abnf
193
+ # No whitespace, use =/
194
+ self.send(format_meth, rule.expr, sep: "--rule-extensions--").
195
+ split(/\s*--rule-extensions--\s*/).each_with_index do |formatted, ndx|
196
+ if ndx > 0
197
+ buffer << "\n" + lhs_fmt.sub('= ', '=/') % {id: "[#{rule.id}]", sym: rule.sym}
198
+ end
199
+ buffer << formatted.sub(/\s*\/\s*/, '')
200
+ end
201
+ else
202
+ # Space out past "= "
203
+ buffer << self.send(format_meth, rule.expr, sep: ("\n" + " " * (lhs_length + (rule.alt? ? 2 : 4) - (format == :ebnf ? 0 : 2))))
204
+ buffer << ("\n" + " " * (lhs_length) + ';') if format == :isoebnf
205
+ end
102
206
  else
103
- buffer << formatted_expr
207
+ buffer << formatted_expr + (format == :isoebnf ? ' ;' : '')
104
208
  end
209
+ buffer << "\n\n" if [:terminals, :pass].include?(rule.kind)
105
210
  out.puts(buffer)
106
211
  end
107
212
  end
108
213
 
109
214
  protected
215
+
216
+ ##
217
+ # W3C EBNF Formatters
218
+ ##
219
+
110
220
  # Format the expression part of a rule
111
- def format(expr, sep = nil)
112
- return (@options[:html] ? %(<a href="#grammar-production-#{expr}">#{expr}</a>) : expr.to_s) if expr.is_a?(Symbol)
221
+ def format_ebnf(expr, sep: nil, embedded: false)
222
+ return (@options[:html] ? %(<a href="#grammar-production-#{@coder.encode expr}">#{@coder.encode expr}</a>) : expr.to_s) if expr.is_a?(Symbol)
113
223
  if expr.is_a?(String)
114
- if expr.length == 1
115
- return format_char(expr)
116
- elsif expr =~ /\A#x\h+/
117
- return (@options[:html] ? %(<code class="grammar-char-escape">#{expr}</code>) : expr)
118
- elsif expr =~ /"/
119
- return (@options[:html] ? %('<code class="grammar-literal">#{escape(expr, "'")}</code>') : %('#{escape(expr, "'")}'))
120
- else
121
- return (@options[:html] ? %("<code class="grammar-literal">#{escape(expr, '"')}</code>") : %("#{escape(expr, '"')}"))
122
- end
224
+ return expr.length == 1 ?
225
+ format_ebnf_char(expr) :
226
+ format_ebnf_string(expr, expr.include?('"') ? "'" : '"')
123
227
  end
124
228
  parts = {
125
229
  alt: (@options[:html] ? "<code>|</code> " : "| "),
@@ -132,95 +236,464 @@ module EBNF
132
236
  rparen = (@options[:html] ? "<code>)</code> " : ")")
133
237
 
134
238
  case expr.first
239
+ when :istr
240
+ # Looses fidelity, but, oh well ...
241
+ format_ebnf(expr.last, embedded: true)
135
242
  when :alt, :diff
136
243
  this_sep = (sep ? sep : " ") + parts[expr.first.to_sym]
137
- expr[1..-1].map {|e| format(e)}.join(this_sep)
244
+ res = expr[1..-1].map {|e| format_ebnf(e, embedded: true)}.join(this_sep)
245
+ embedded ? (lparen + res + rparen) : res
138
246
  when :star, :plus, :opt
139
- raise "Expected star expression to have a single operand" unless expr.length == 2
140
247
  char = parts[expr.first.to_sym]
141
- r = format(expr[1])
142
- (r.start_with?("(") || Array(expr[1]).length == 1) ? "#{r}#{char}" : "(#{r})#{char}"
248
+ r = format_ebnf(expr[1], embedded: true)
249
+ "#{r}#{char}"
143
250
  when :hex
144
- (@options[:html] ? %(<code class="grammar-char-escape">#{expr.last}</code>) : expr.last)
251
+ escape_ebnf_hex(expr.last[2..-1].hex.chr(Encoding::UTF_8))
145
252
  when :range
146
- format_range(expr.last)
253
+ format_ebnf_range(expr.last)
147
254
  when :seq
148
255
  this_sep = (sep ? sep : " ")
149
- expr[1..-1].map {|e| r = format(e); Array(e).length > 2 ? "#{lparen}#{r}#{rparen}" : r}.join(this_sep)
256
+ res = expr[1..-1].map do |e|
257
+ format_ebnf(e, embedded: true)
258
+ end.join(this_sep)
259
+ embedded ? (lparen + res + rparen) : res
260
+ when :rept
261
+ # Expand repetition
262
+ min, max, value = expr[1..-1]
263
+ if min == 0 && max == 1
264
+ format_ebnf([:opt, value], sep: sep, embedded: embedded)
265
+ elsif min == 0 && max == '*'
266
+ format_ebnf([:star, value], sep: sep, embedded: embedded)
267
+ elsif min == 1 && max == '*'
268
+ format_ebnf([:plus, value], sep: sep, embedded: embedded)
269
+ else
270
+ val2 = [:seq]
271
+ while min > 0
272
+ val2 << value
273
+ min -= 1
274
+ max -= 1 unless max == '*'
275
+ end
276
+ if max == '*'
277
+ val2 << [:star, value]
278
+ else
279
+ opt = nil
280
+ while max > 0
281
+ opt = [:opt, opt ? [:seq, value, opt] : value]
282
+ max -= 1
283
+ end
284
+ val2 << opt if opt
285
+ end
286
+ format_ebnf(val2, sep: sep, embedded: embedded)
287
+ end
150
288
  else
151
289
  raise "Unknown operator: #{expr.first}"
152
290
  end
153
291
  end
154
292
 
155
293
  # Format a single-character string, prefering hex for non-main ASCII
156
- def format_char(c)
294
+ def format_ebnf_char(c)
157
295
  case c.ord
158
- when 0x22 then (@options[:html] ? %('<code class="grammar-literal">"</code>') : %{'"'})
159
- when (0x23..0x7e) then (@options[:html] ? %("<code class="grammar-literal">#{c}</code>") : %{"#{c}"})
160
- else (@options[:html] ? %(<code class="grammar-char-escape">#{escape_hex(c)}</code>) : escape_hex(c))
296
+ when (0x21) then (@options[:html] ? %("<code class="grammar-literal">#{@coder.encode c}</code>") : %{"#{c}"})
297
+ when 0x22 then (@options[:html] ? %('<code class="grammar-literal">&quot;</code>') : %{'"'})
298
+ when (0x23..0x7e) then (@options[:html] ? %("<code class="grammar-literal">#{@coder.encode c}</code>") : %{"#{c}"})
299
+ when (0x80..0xFFFD) then (@options[:html] ? %("<code class="grammar-literal">#{@coder.encode c}</code>") : %{"#{c}"})
300
+ else escape_ebnf_hex(c)
161
301
  end
162
302
  end
163
303
 
164
304
  # Format a range
165
- def format_range(string)
305
+ def format_ebnf_range(string)
166
306
  lbrac = (@options[:html] ? "<code>[</code> " : "[")
167
307
  rbrac = (@options[:html] ? "<code>]</code> " : "]")
168
- dash = (@options[:html] ? "<code>-</code> " : "-")
169
308
 
170
309
  buffer = lbrac
171
310
  s = StringScanner.new(string)
172
311
  while !s.eos?
173
312
  case
174
313
  when s.scan(/\A[!"\u0024-\u007e]+/)
175
- buffer << (@options[:html] ? %(<code class="grammar-literal">#{s.matched}</code>) : s.matched)
314
+ buffer << (@options[:html] ? %(<code class="grammar-literal">#{@coder.encode s.matched}</code>) : s.matched)
176
315
  when s.scan(/\A#x\h+/)
177
- buffer << (@options[:html] ? %(<code class="grammar-char-escape">#{s.matched}</code>) : s.matched)
178
- when s.scan(/\A-/)
179
- buffer << dash
316
+ buffer << escape_ebnf_hex(s.matched[2..-1].hex.chr(Encoding::UTF_8))
180
317
  else
181
- buffer << (@options[:html] ? %(<code class="grammar-char-escape">#{escape_hex(s.getch)}</code>) : escape_hex(s.getch))
318
+ buffer << escape_ebnf_hex(s.getch)
182
319
  end
183
320
  end
184
321
  buffer + rbrac
185
322
  end
186
323
 
187
324
  # Escape a string, using as many UTF-8 characters as possible
188
- def escape(string, quote = '"')
189
- buffer = ""
325
+ def format_ebnf_string(string, quote = '"')
190
326
  string.each_char do |c|
191
- buffer << case (u = c.ord)
192
- when (0x00..0x1f) then "#x%02X" % u
193
- when quote.ord then "#x%02X" % u
194
- else c
327
+ case c.ord
328
+ when 0x00..0x19, quote.ord
329
+ raise RangeError, "cannot format #{string.inspect} as an EBNF String: #{c.inspect} is out of range" unless
330
+ ISOEBNF::TERMINAL_CHARACTER.match?(c)
195
331
  end
196
332
  end
197
- buffer
333
+
334
+ res = "#{quote}#{string}#{quote}"
335
+ @options[:html] ? @coder.encode(res) : res
198
336
  end
199
337
 
200
- def escape_hex(u)
338
+ def escape_ebnf_hex(u)
201
339
  fmt = case u.ord
340
+ when 0x00..0x20 then "#x%02X"
202
341
  when 0x0000..0x00ff then "#x%02X"
203
342
  when 0x0100..0xffff then "#x%04X"
204
343
  else "#x%08X"
205
344
  end
206
- sprintf(fmt, u.ord)
207
- end
208
-
209
- HAML_DESC = %q(
210
- %table.grammar
211
- %tbody#grammar-productions
212
- - rules.each do |rule|
213
- %tr{id: "grammar-production-#{rule.sym}"}
214
- - if rule.pass?
215
- %td{colspan: 3}
216
- %code<="@pass"
217
- - else
218
- %td<= "[#{rule.id}]"
219
- %td<
220
- %code<= rule.sym
221
- %td<= "::="
222
- %td
223
- != yield rule
345
+ char = fmt % u.ord
346
+ if @options[:html]
347
+ if u.ord <= 0x20
348
+ char = %(<abbr title="#{ASCII_ESCAPE_NAMES[u.ord]}">#{@coder.encode char}</abbr>)
349
+ elsif u.ord < 0x7F
350
+ char = %(<abbr title="ascii '#{@coder.encode u}'">#{@coder.encode char}</abbr>)
351
+ elsif u.ord == 0x7F
352
+ char = %(<abbr title="delete">#{@coder.encode char}</abbr>)
353
+ elsif u.ord <= 0xFF
354
+ char = %(<abbr title="extended ascii '#{u}'">#{char}</abbr>)
355
+ else
356
+ char = %(<abbr title="unicode '#{u}'">#{char}</abbr>)
357
+ end
358
+ %(<code class="grammar-char-escape">#{char}</code>)
359
+ else
360
+ char
361
+ end
362
+ end
363
+
364
+ ##
365
+ # ABNF Formatters
366
+ ##
367
+
368
+ # Format the expression part of a rule
369
+ def format_abnf(expr, sep: nil, embedded: false, sensitive: true)
370
+ return (@options[:html] ? %(<a href="#grammar-production-#{@coder.encode expr}">#{@coder.encode expr}</a>) : expr.to_s) if expr.is_a?(Symbol)
371
+ if expr.is_a?(String)
372
+ if expr.length == 1
373
+ return format_abnf_char(expr)
374
+ elsif expr.start_with?('%')
375
+ # Already encoded
376
+ return expr
377
+ elsif expr =~ /"/
378
+ # Split into segments
379
+ segments = expr.split('"')
380
+
381
+ return format_abnf_char(expr) if segments.empty?
382
+
383
+ seq = segments.inject([]) {|memo, s| memo.concat([[:hex, "#x22"], s])}[1..-1]
384
+ seq.unshift(:seq)
385
+ return format_abnf(seq, sep: nil, embedded: false)
386
+ else
387
+ return (@options[:html] ? %("<code class="grammar-literal">#{'%s' if sensitive}#{@coder.encode expr}</code>") : %(#{'%s' if sensitive}"#{expr}"))
388
+ end
389
+ end
390
+ parts = {
391
+ alt: (@options[:html] ? "<code>/</code> " : "/ "),
392
+ star: (@options[:html] ? "<code>*</code> " : "*"),
393
+ plus: (@options[:html] ? "<code>+</code> " : "1*"),
394
+ opt: (@options[:html] ? "<code>?</code> " : "?")
395
+ }
396
+ lbrac = (@options[:html] ? "<code>[</code> " : "[")
397
+ rbrac = (@options[:html] ? "<code>]</code> " : "]")
398
+ lparen = (@options[:html] ? "<code>(</code> " : "(")
399
+ rparen = (@options[:html] ? "<code>)</code> " : ")")
400
+
401
+ case expr.first
402
+ when :istr
403
+ # FIXME: if string part is segmented, need to do something different
404
+ format_abnf(expr.last, embedded: true, sensitive: false)
405
+ when :alt
406
+ this_sep = (sep ? sep : " ") + parts[expr.first.to_sym]
407
+ res = expr[1..-1].map {|e| format_abnf(e, embedded: true)}.join(this_sep)
408
+ embedded ? (lparen + res + rparen) : res
409
+ when :diff
410
+ raise RangeError, "ABNF does not support the diff operator"
411
+ when :opt
412
+ char = parts[expr.first.to_sym]
413
+ r = format_abnf(expr[1], embedded: true)
414
+ "#{lbrac}#{r}#{rbrac}"
415
+ when :plus, :star
416
+ char = parts[expr.first.to_sym]
417
+ r = format_abnf(expr[1], embedded: true)
418
+ "#{char}#{r}"
419
+ when :hex
420
+ escape_abnf_hex(expr.last[2..-1].hex.chr)
421
+ when :range
422
+ # Returns an [:alt] or [:not [:alt]] if composed of multiple sequences
423
+ # Note: ABNF does not support the `not` operator
424
+ res = format_abnf_range(expr.last)
425
+ res.is_a?(Array) ?
426
+ format_abnf(res, embedded: true) :
427
+ res
428
+ when :seq
429
+ this_sep = (sep ? sep : " ")
430
+ res = expr[1..-1].map do |e|
431
+ format_abnf(e, embedded: true)
432
+ end.join(this_sep)
433
+ embedded ? (lparen + res + rparen) : res
434
+ when :rept
435
+ # Expand repetition
436
+ min, max, value = expr[1..-1]
437
+ r = format_abnf(value, embedded: true)
438
+ if min == max
439
+ "#{min}#{r}"
440
+ elsif min == 0 && max == '*'
441
+ "#{parts[:star]}#{r}"
442
+ elsif min > 0 && max == '*'
443
+ "#{min}#{parts[:star]}#{r}"
444
+ else
445
+ "#{min}#{parts[:star]}#{max}#{r}"
446
+ end
447
+ else
448
+ raise "Unknown operator: #{expr.first}"
449
+ end
450
+ end
451
+
452
+ # Format a single-character string, prefering hex for non-main ASCII
453
+ def format_abnf_char(c)
454
+ if /[\x20-\x21\x23-\x7E]/.match?(c)
455
+ c.inspect
456
+ else
457
+ escape_abnf_hex(c)
458
+ end
459
+ end
460
+
461
+ # Format a range
462
+ #
463
+ # Presumes range has already been validated
464
+ def format_abnf_range(string)
465
+ alt, o_dash = [:alt], false
466
+
467
+ raise RangeError, "cannot format #{string.inspect} an ABNF range" if string.start_with?('^')
468
+
469
+ if string.end_with?('-')
470
+ o_dash = true
471
+ string = string[0..-2]
472
+ end
473
+
474
+ scanner = StringScanner.new(string)
475
+ hexes, deces = [], []
476
+ in_range = false
477
+ # Build op (alt) from different ranges/enums
478
+ while !scanner.eos?
479
+ if hex = scanner.scan(Terminals::HEX)
480
+ # Append any decimal values
481
+ alt << "%d" + deces.join(".") unless deces.empty?
482
+ deces = []
483
+
484
+ if in_range
485
+ # Add "." sequences for any previous hexes
486
+ alt << "%x" + hexes[0..-2].join(".") if hexes.length > 1
487
+ alt << "%x#{hexes.last}-#{hex[2..-1]}"
488
+ in_range, hexes = false, []
489
+ else
490
+ hexes << hex[2..-1]
491
+ end
492
+ elsif dec = scanner.scan(Terminals::R_CHAR)
493
+ # Append any hexadecimal values
494
+ alt << "%x" + hexes.join(".") unless hexes.empty?
495
+ hexes = []
496
+
497
+ if in_range
498
+ # Add "." sequences for any previous hexes
499
+ alt << "%d" + deces[0..-2].join(".") if deces.length > 1
500
+ alt << "%d#{deces.last}-#{dec.codepoints.first}"
501
+ in_range, deces = false, []
502
+ else
503
+ deces << dec.codepoints.first.to_s
504
+ end
505
+ end
506
+
507
+ in_range = true if scanner.scan(/\-/)
508
+ end
509
+
510
+ deces << '45' if o_dash
511
+
512
+ # Append hexes and deces as "." sequences (should be only one)
513
+ alt << "%d" + deces.join(".") unless deces.empty?
514
+ alt << "%x" + hexes.join(".") unless hexes.empty?
515
+
516
+ # FIXME: HTML abbreviations?
517
+ if alt.length == 2
518
+ # Just return the range or enum
519
+ alt.last
520
+ else
521
+ # Return the alt, which will be further formatted
522
+ alt
523
+ end
524
+ end
525
+
526
+ def escape_abnf_hex(u)
527
+ fmt = case u.ord
528
+ when 0x0000..0x00ff then "%02X"
529
+ when 0x0100..0xffff then "%04X"
530
+ else "%08X"
531
+ end
532
+ char = "%x" + (fmt % u.ord)
533
+ if @options[:html]
534
+ if u.ord <= 0x20
535
+ char = %(<abbr title="#{ASCII_ESCAPE_NAMES[u.ord]}">#{@coder.encode char}</abbr>)
536
+ elsif u.ord <= 0x7F
537
+ char = %(<abbr title="ascii '#{u}'">#{@coder.encode char}</abbr>)
538
+ elsif u.ord == 0x7F
539
+ char = %(<abbr title="delete">#{@coder.encode char}</abbr>)
540
+ elsif u.ord <= 0xFF
541
+ char = %(<abbr title="extended ascii '#{u}'">#{char}</abbr>)
542
+ else
543
+ char = %(<abbr title="unicode '#{u}'">#{char}</abbr>)
544
+ end
545
+ %(<code class="grammar-char-escape">#{char}</code>)
546
+ else
547
+ char
548
+ end
549
+ end
550
+
551
+ ##
552
+ # ISO EBNF Formatters
553
+ ##
554
+
555
+ # Format the expression part of a rule
556
+ def format_isoebnf(expr, sep: nil, embedded: false)
557
+ return (@options[:html] ? %(<a href="#grammar-production-#{@coder.encode expr}">#{@coder.encode expr}</a>) : expr.to_s) if expr.is_a?(Symbol)
558
+ if expr.is_a?(String)
559
+ expr = expr[2..-1].hex.chr if expr =~ /\A#x\h+/
560
+ expr.chars.each do |c|
561
+ raise RangeError, "cannot format #{expr.inspect} as an ISO EBNF String: #{c.inspect} is out of range" unless
562
+ ISOEBNF::TERMINAL_CHARACTER.match?(c)
563
+ end
564
+ if expr =~ /"/
565
+ return (@options[:html] ? %('<code class="grammar-literal">#{@coder.encode expr}</code>') : %('#{expr}'))
566
+ else
567
+ return (@options[:html] ? %("<code class="grammar-literal">#{@coder.encode expr}</code>") : %("#{expr}"))
568
+ end
569
+ end
570
+ parts = {
571
+ alt: (@options[:html] ? "<code>|</code> " : "| "),
572
+ diff: (@options[:html] ? "<code>-</code> " : "- "),
573
+ }
574
+ lparen = (@options[:html] ? "<code>(</code> " : "(")
575
+ rparen = (@options[:html] ? "<code>)</code> " : ")")
576
+
577
+ case expr.first
578
+ when :istr
579
+ # Looses fidelity, but, oh well ...
580
+ format_isoebnf(expr.last, embedded: true)
581
+ when :alt, :diff
582
+ this_sep = (sep ? sep : " ") + parts[expr.first.to_sym]
583
+ res = expr[1..-1].map {|e| format_isoebnf(e, embedded: true)}.join(this_sep)
584
+ embedded ? (lparen + res + rparen) : res
585
+ when :opt
586
+ r = format_isoebnf(expr[1], embedded: true)
587
+ "[#{r}]"
588
+ when :star
589
+ r = format_isoebnf(expr[1], embedded: true)
590
+ "{#{r}}"
591
+ when :plus
592
+ r = format_isoebnf(expr[1], embedded: true)
593
+ "#{r}, {#{r}}"
594
+ when :hex
595
+ format_isoebnf(expr[1], embedded: true)
596
+ when :range
597
+ res = format_isoebnf_range(expr.last)
598
+ res.is_a?(Array) ?
599
+ format_isoebnf(res, embedded: true) :
600
+ res
601
+ when :seq
602
+ this_sep = "," + (sep ? sep : " ")
603
+ res = expr[1..-1].map do |e|
604
+ format_isoebnf(e, embedded: true)
605
+ end.join(this_sep)
606
+ embedded ? (lparen + res + rparen) : res
607
+ when :rept
608
+ # Expand repetition
609
+ min, max, value = expr[1..-1]
610
+ if min == 0 && max == 1
611
+ format_isoebnf([:opt, value], sep: sep, embedded: embedded)
612
+ elsif min == 0 && max == '*'
613
+ format_isoebnf([:star, value], sep: sep, embedded: embedded)
614
+ elsif min == 1 && max == '*'
615
+ format_isoebnf([:plus, value], sep: sep, embedded: embedded)
616
+ else
617
+ val2 = [:seq]
618
+ while min > 0
619
+ val2 << value
620
+ min -= 1
621
+ max -= 1 unless max == '*'
622
+ end
623
+ if max == '*'
624
+ val2 << [:star, value]
625
+ else
626
+ opt = nil
627
+ while max > 0
628
+ opt = [:opt, opt ? [:seq, value, opt] : value]
629
+ max -= 1
630
+ end
631
+ val2 << opt if opt
632
+ end
633
+ format_isoebnf(val2, sep: sep, embedded: embedded)
634
+ end
635
+ else
636
+ raise "Unknown operator: #{expr.first}"
637
+ end
638
+ end
639
+
640
+ # Format a range
641
+ # Range is formatted as a aliteration of characters
642
+ def format_isoebnf_range(string)
643
+ chars = []
644
+ o_dash = false
645
+
646
+ raise RangeError, "cannot format #{string.inspect} an ABNF range" if string.start_with?('^')
647
+
648
+ if string.end_with?('-')
649
+ o_dash = true
650
+ string = string[0..-2]
651
+ end
652
+
653
+ scanner = StringScanner.new(string)
654
+ in_range = false
655
+ # Build chars from different ranges/enums
656
+ while !scanner.eos?
657
+ char = if hex = scanner.scan(Terminals::HEX)
658
+ hex[2..-1].hex.ord.char(Encoding::UTF_8)
659
+ else scanner.scan(Terminals::R_CHAR)
660
+ end
661
+ raise RangeError, "cannot format #{string.inspect} as an ISO EBNF Aliteration: #{char.inspect} is out of range" unless
662
+ char && ISOEBNF::TERMINAL_CHARACTER.match?(char)
663
+
664
+ if in_range
665
+ # calculate characters from chars.last to this char
666
+ raise RangeError, "cannot format #{string.inspect} as an ISO EBNF Aliteration" unless chars.last < char
667
+ chars.concat (chars.last..char).to_a[1..-1]
668
+ in_range = false
669
+ else
670
+ chars << char
671
+ end
672
+
673
+ in_range = true if scanner.scan(/\-/)
674
+ end
675
+
676
+ chars << '-' if o_dash
677
+
678
+ # Possibly only a single character (no character?)
679
+ chars.length == 1 ? chars.last.inspect : chars.unshift(:alt)
680
+ end
681
+
682
+ ERB_DESC = %q(
683
+ <table class="grammar">
684
+ <tbody id="grammar-productions" class="<%= @format %>">
685
+ <% for rule in @rules %>
686
+ <tr<%= %{ id="grammar-production-#{rule.sym}"} unless %w(=/ |).include?(rule.assign)%>>
687
+ <% if rule.id %>
688
+ <td><%= rule.id %></td>
689
+ <% end %>
690
+ <td><code><%== rule.sym %></code></td>
691
+ <td><%= rule.assign %></td>
692
+ <td><%= rule.formatted %></td>
693
+ </tr>
694
+ <% end %>
695
+ </tbody>
696
+ </table>
224
697
  ).gsub(/^ /, '')
225
698
  end
226
699
  end