ebnf 1.1.3 → 2.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +221 -198
  3. data/UNLICENSE +1 -1
  4. data/VERSION +1 -1
  5. data/bin/ebnf +40 -21
  6. data/etc/abnf-core.ebnf +52 -0
  7. data/etc/abnf.abnf +121 -0
  8. data/etc/abnf.ebnf +124 -0
  9. data/etc/abnf.sxp +45 -0
  10. data/etc/doap.ttl +23 -15
  11. data/etc/ebnf.ebnf +21 -33
  12. data/etc/ebnf.html +171 -160
  13. data/etc/{ebnf.rb → ebnf.ll1.rb} +30 -107
  14. data/etc/ebnf.ll1.sxp +182 -183
  15. data/etc/ebnf.peg.rb +90 -0
  16. data/etc/ebnf.peg.sxp +84 -0
  17. data/etc/ebnf.sxp +40 -41
  18. data/etc/iso-ebnf.ebnf +140 -0
  19. data/etc/iso-ebnf.isoebnf +138 -0
  20. data/etc/iso-ebnf.sxp +65 -0
  21. data/etc/sparql.ebnf +4 -4
  22. data/etc/sparql.html +1603 -1751
  23. data/etc/sparql.ll1.sxp +7372 -7372
  24. data/etc/sparql.peg.rb +532 -0
  25. data/etc/sparql.peg.sxp +597 -0
  26. data/etc/sparql.sxp +363 -362
  27. data/etc/turtle.ebnf +3 -3
  28. data/etc/turtle.html +465 -517
  29. data/etc/{turtle.rb → turtle.ll1.rb} +3 -4
  30. data/etc/turtle.ll1.sxp +425 -425
  31. data/etc/turtle.peg.rb +182 -0
  32. data/etc/turtle.peg.sxp +199 -0
  33. data/etc/turtle.sxp +103 -101
  34. data/lib/ebnf.rb +7 -2
  35. data/lib/ebnf/abnf.rb +301 -0
  36. data/lib/ebnf/abnf/core.rb +23 -0
  37. data/lib/ebnf/abnf/meta.rb +111 -0
  38. data/lib/ebnf/base.rb +113 -69
  39. data/lib/ebnf/bnf.rb +1 -26
  40. data/lib/ebnf/ebnf/meta.rb +90 -0
  41. data/lib/ebnf/isoebnf.rb +229 -0
  42. data/lib/ebnf/isoebnf/meta.rb +75 -0
  43. data/lib/ebnf/ll1.rb +138 -6
  44. data/lib/ebnf/ll1/lexer.rb +37 -32
  45. data/lib/ebnf/ll1/parser.rb +113 -73
  46. data/lib/ebnf/ll1/scanner.rb +83 -51
  47. data/lib/ebnf/native.rb +320 -0
  48. data/lib/ebnf/parser.rb +285 -302
  49. data/lib/ebnf/peg.rb +39 -0
  50. data/lib/ebnf/peg/parser.rb +561 -0
  51. data/lib/ebnf/peg/rule.rb +250 -0
  52. data/lib/ebnf/rule.rb +443 -148
  53. data/lib/ebnf/terminals.rb +21 -0
  54. data/lib/ebnf/writer.rb +565 -83
  55. metadata +107 -29
  56. data/etc/sparql.rb +0 -45773
@@ -0,0 +1,21 @@
1
+ # encoding: utf-8
2
+ # Terminal definitions for the EBNF grammar
3
+ module EBNF::Terminals
4
+ SYMBOL_BASE = %r(\b[a-zA-Z0-9_\.]+\b)u.freeze
5
+ SYMBOL = %r(#{SYMBOL_BASE}(?!\s*::=))u.freeze
6
+ HEX = %r(\#x\h+)u.freeze
7
+ CHAR = %r([\u0009\u000A\u000D\u0020-\uD7FF\u{10000}-\u{10FFFF}])u.freeze
8
+ R_CHAR = %r([\u0009\u000A\u000D\u0020-\u002C\u002E-\u005C\u005E-\uD7FF\u{10000}-\u{10FFFF}])u.freeze
9
+ RANGE = %r(\[(?:(?:#{R_CHAR}\-#{R_CHAR})|(?:#{HEX}\-#{HEX})|#{R_CHAR}|#{HEX})+-?\](?!\s+#{SYMBOL_BASE}\s*::=))u.freeze
10
+ LHS = %r((?:\[#{SYMBOL_BASE}\])?\s*#{SYMBOL_BASE}\s*::=)u.freeze
11
+ O_RANGE = %r(\[\^(?:(?:#{R_CHAR}\-#{R_CHAR})|(?:#{HEX}\-#{HEX}|#{R_CHAR}|#{HEX}))+-?\])u.freeze
12
+ STRING1 = %r("[\u0009\u000A\u000D\u0020\u0021\u0023-\uD7FF\u{10000}-\u{10FFFF}]*")u.freeze
13
+ STRING2 = %r('[\u0009\u000A\u000D\u0020-\u0026\u0028-\uD7FF\u{10000}-\u{10FFFF}]*')u.freeze
14
+ POSTFIX = %r([?*+])u.freeze
15
+ PASS = %r((
16
+ \s
17
+ | (?:(?:\#[^x]|//)[^\n\r]*)
18
+ | (?:/\*(?:(?:\*[^/])|[^*])*\*/)
19
+ | (?:\(\*(?:(?:\*[^\)])|[^*])*\*\))
20
+ )+)xmu.freeze
21
+ end
@@ -1,22 +1,62 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
  require 'rdf'
3
3
  require 'strscan' unless defined?(StringScanner)
4
+ require "ostruct"
4
5
 
5
6
  ##
6
7
  # Serialize ruleset back to EBNF
7
8
  module EBNF
8
9
  class Writer
9
10
  LINE_LENGTH = 80
11
+ LINE_LENGTH_HTML = 200
12
+
13
+ # ASCII escape names
14
+ ASCII_ESCAPE_NAMES = [
15
+ "null", #x00
16
+ "start of heading", #x01
17
+ "start of text", #x02
18
+ "end of text", #x03
19
+ "end of transmission", #x04
20
+ "enquiry", #x05
21
+ "acknowledge", #x06
22
+ "bell", #x07
23
+ "backspace", #x08
24
+ "horizontal tab", #x09
25
+ "new line", #x0A
26
+ "vertical tab", #x0B
27
+ "form feed", #x0C
28
+ "carriage return", #x0D
29
+ "shift out", #x0E
30
+ "shift in", #x0F
31
+ "data link escape", #x10
32
+ "device control 1", #x11
33
+ "device control 2", #x12
34
+ "device control 3", #x13
35
+ "device control 4", #x14
36
+ "negative acknowledge", #x15
37
+ "synchronous idle", #x16
38
+ "end of trans. block", #x17
39
+ "cancel", #x18
40
+ "end of medium", #x19
41
+ "substitute", #x1A
42
+ "escape", #x1B
43
+ "file separator", #x1C
44
+ "group separator", #x1D
45
+ "record separator", #x1E
46
+ "unit separator", #x1F
47
+ "space" #x20
48
+ ]
10
49
 
11
50
  ##
12
51
  # Format rules to a String
13
52
  #
14
53
  # @param [Array<Rule>] rules
54
+ # @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
15
55
  # @return [Object]
16
- def self.string(*rules)
56
+ def self.string(*rules, format: :ebnf)
17
57
  require 'stringio' unless defined?(StringIO)
18
58
  buf = StringIO.new
19
- write(buf, *rules)
59
+ write(buf, *rules, format: format)
20
60
  buf.string
21
61
  end
22
62
 
@@ -24,9 +64,10 @@ module EBNF
24
64
  # Format rules to $stdout
25
65
  #
26
66
  # @param [Array<Rule>] rules
67
+ # @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
27
68
  # @return [Object]
28
- def self.print(*rules)
29
- write($stdout, *rules)
69
+ def self.print(*rules, format: :ebnf)
70
+ write($stdout, *rules, format: format)
30
71
  end
31
72
 
32
73
  ##
@@ -34,88 +75,158 @@ module EBNF
34
75
  #
35
76
  # @param [Object] out
36
77
  # @param [Array<Rule>] rules
78
+ # @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
37
79
  # @return [Object]
38
- def self.write(out, *rules)
39
- Writer.new(rules, out: out)
80
+ def self.write(out, *rules, format: :ebnf)
81
+ Writer.new(rules, out: out, format: format)
40
82
  end
41
83
 
42
84
  ##
43
85
  # Write formatted rules to an IO like object as HTML
44
86
  #
45
87
  # @param [Array<Rule>] rules
88
+ # @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
46
89
  # @return [Object]
47
- def self.html(*rules)
90
+ def self.html(*rules, format: :ebnf)
48
91
  require 'stringio' unless defined?(StringIO)
49
92
  buf = StringIO.new
50
- Writer.new(rules, out: buf, html: true)
93
+ Writer.new(rules, out: buf, html: true, format: format)
51
94
  buf.string
52
95
  end
53
96
 
54
97
  ##
55
98
  # @param [Array<Rule>] rules
56
99
  # @param [Hash{Symbol => Object}] options
57
- # @param [#write] :out ($stdout)
58
- # @option options [Symbol] :format
59
- def initialize(rules, out: $stdout, html: false, **options)
60
- @options = options.dup
100
+ # @param [#write] out ($stdout)
101
+ # @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
102
+ # @option options [Symbol] format
103
+ # @option options [Boolean] html (false)
104
+ def initialize(rules, out: $stdout, html: false, format: :ebnf, **options)
105
+ @options = options.merge(html: html)
106
+ return if rules.empty?
61
107
 
62
108
  # Determine max LHS length
109
+ format_meth = "format_#{format}".to_sym
63
110
  max_id = rules.max_by {|r| r.id.to_s.length}.id.to_s.length
64
111
  max_sym = rules.max_by {|r| r.sym.to_s.length}.sym.to_s.length
65
- lhs_length = max_sym + 3
66
- lhs_fmt = "%<sym>-#{max_sym}s ::= "
67
- if max_id > 0
112
+ lhs_length = max_sym + 1
113
+ lhs_fmt = case format
114
+ when :abnf then "%<sym>-#{max_sym}s = "
115
+ when :ebnf then "%<sym>-#{max_sym}s ::= "
116
+ when :isoebnf then "%<sym>-#{max_sym}s = "
117
+ end
118
+ if format == :ebnf && max_id > 0
68
119
  lhs_fmt = "%<id>-#{max_id+2}s " + lhs_fmt
69
120
  lhs_length += max_id + 3
70
121
  end
71
- rhs_length = LINE_LENGTH - lhs_length
122
+ rhs_length = (html ? LINE_LENGTH_HTML : LINE_LENGTH) - lhs_length
72
123
 
73
124
  if html
74
125
  # Output as formatted HTML
75
126
  begin
76
- require 'haml'
77
- hout = Haml::Engine.new(HAML_DESC).render(self, rules: rules) do |rule|
78
- formatted_expr = format(rule.expr)
79
- formatted_expr.length > rhs_length ? format(rule.expr, "\n") : formatted_expr
80
- end
81
- out.write hout
127
+ require 'erubis'
128
+ require 'htmlentities'
129
+ @coder = HTMLEntities.new
130
+ eruby = Erubis::Eruby.new(ERB_DESC)
131
+ formatted_rules = rules.map do |rule|
132
+ if rule.kind == :terminals || rule.kind == :pass
133
+ OpenStruct.new(id: ("@#{rule.kind}"),
134
+ sym: nil,
135
+ assign: nil,
136
+ formatted: (
137
+ rule.kind == :terminals ?
138
+ "<strong># Productions for terminals</strong>" :
139
+ self.send(format_meth, rule.expr)))
140
+ else
141
+ formatted_expr = self.send(format_meth, rule.expr)
142
+ # Measure text without markup
143
+ formatted_expr_text = formatted_expr.gsub(%r{</?\w+[^>]*>}, '')
144
+ if formatted_expr_text.length > rhs_length && (format != :abnf || rule.alt?)
145
+ lines = []
146
+ # Can only reasonably split apart alts
147
+ self.send(format_meth, rule.expr, sep: "--rule-extensions--").
148
+ split(/\s*--rule-extensions--\s*/).each_with_index do |formatted, ndx|
149
+ assign = case format
150
+ when :ebnf
151
+ formatted.sub!(%r{\s*<code>\|</code>\s*}, '')
152
+ (ndx > 0 ? (rule.alt? ? '|' : '') : '::=')
153
+ when :abnf
154
+ formatted.sub!(%r{\s*<code>/</code>\s*}, '')
155
+ (ndx > 0 ? '=/' : '=')
156
+ else
157
+ formatted.sub!(%r{\s*<code>\|</code>\s*}, '')
158
+ (ndx > 0 ? (rule.alt? ? '|' : '') : '=')
159
+ end
160
+ lines << OpenStruct.new(id: ((ndx == 0 ? "[#{rule.id}]" : "") if rule.id),
161
+ sym: (rule.sym if ndx == 0 || format == :abnf),
162
+ assign: assign,
163
+ formatted: formatted)
164
+ end
165
+ if format == :isoebnf
166
+ lines << OpenStruct.new(assign: ';')
167
+ end
168
+ lines
169
+ else
170
+ OpenStruct.new(id: ("[#{rule.id}]" if rule.id),
171
+ sym: rule.sym,
172
+ assign: (format == :ebnf ? '::=' : '='),
173
+ formatted: (formatted_expr + (format == :isoebnf ? ' ;' : '')))
174
+ end
175
+ end
176
+ end.flatten
177
+ out.write eruby.evaluate(format: format, rules: formatted_rules)
82
178
  return
83
179
  rescue LoadError
84
- $stderr.puts "Generating HTML requires haml gem to be loaded"
180
+ $stderr.puts "Generating HTML requires erubis and htmlentities gems to be loaded"
85
181
  end
86
182
  end
87
183
 
88
184
  # Format each rule, considering the available rhs size
89
185
  rules.each do |rule|
90
186
  buffer = if rule.pass?
91
- "%-#{lhs_length-2}s" % "@pass"
187
+ "\n%-#{lhs_length-2}s " % "@pass"
188
+ elsif rule.kind == :terminals
189
+ "\n%-#{lhs_length-2}s" % "@terminals"
92
190
  else
93
191
  lhs_fmt % {id: "[#{rule.id}]", sym: rule.sym}
94
192
  end
95
- formatted_expr = format(rule.expr)
96
- if formatted_expr.length > rhs_length
97
- buffer << format(rule.expr, ("\n" + " " * lhs_length))
193
+ formatted_expr = self.send(format_meth, rule.expr)
194
+ if formatted_expr.length > rhs_length && (format != :abnf || rule.alt?)
195
+ if format == :abnf
196
+ # No whitespace, use =/
197
+ self.send(format_meth, rule.expr, sep: "--rule-extensions--").
198
+ split(/\s*--rule-extensions--\s*/).each_with_index do |formatted, ndx|
199
+ if ndx > 0
200
+ buffer << "\n" + lhs_fmt.sub('= ', '=/') % {id: "[#{rule.id}]", sym: rule.sym}
201
+ end
202
+ buffer << formatted.sub(/\s*\/\s*/, '')
203
+ end
204
+ else
205
+ # Space out past "= "
206
+ buffer << self.send(format_meth, rule.expr, sep: ("\n" + " " * (lhs_length + (rule.alt? ? 2 : 4) - (format == :ebnf ? 0 : 2))))
207
+ buffer << ("\n" + " " * (lhs_length) + ';') if format == :isoebnf
208
+ end
98
209
  else
99
- buffer << formatted_expr
210
+ buffer << formatted_expr + (format == :isoebnf ? ' ;' : '')
100
211
  end
212
+ buffer << "\n\n" if [:terminals, :pass].include?(rule.kind)
101
213
  out.puts(buffer)
102
214
  end
103
215
  end
104
216
 
105
217
  protected
218
+
219
+ ##
220
+ # W3C EBNF Formatters
221
+ ##
222
+
106
223
  # Format the expression part of a rule
107
- def format(expr, sep = nil)
108
- return (@options[:html] ? %(<a href="#grammar-production-#{expr}">#{expr}</a>) : expr.to_s) if expr.is_a?(Symbol)
224
+ def format_ebnf(expr, sep: nil, embedded: false)
225
+ return (@options[:html] ? %(<a href="#grammar-production-#{@coder.encode expr}">#{@coder.encode expr}</a>) : expr.to_s) if expr.is_a?(Symbol)
109
226
  if expr.is_a?(String)
110
- if expr.length == 1
111
- return format_char(expr)
112
- elsif expr =~ /\A#x\h+/
113
- return (@options[:html] ? %(<code class="grammar-char-escape">#{expr}</code>) : expr)
114
- elsif expr =~ /"/
115
- return (@options[:html] ? %('<code class="grammar-literal">#{escape(expr, "'")}</code>') : %('#{escape(expr, "'")}'))
116
- else
117
- return (@options[:html] ? %("<code class="grammar-literal">#{escape(expr, '"')}</code>") : %("#{escape(expr, '"')}"))
118
- end
227
+ return expr.length == 1 ?
228
+ format_ebnf_char(expr) :
229
+ format_ebnf_string(expr, expr.include?('"') ? "'" : '"')
119
230
  end
120
231
  parts = {
121
232
  alt: (@options[:html] ? "<code>|</code> " : "| "),
@@ -128,95 +239,466 @@ module EBNF
128
239
  rparen = (@options[:html] ? "<code>)</code> " : ")")
129
240
 
130
241
  case expr.first
242
+ when :istr
243
+ # Looses fidelity, but, oh well ...
244
+ format_ebnf(expr.last, embedded: true)
131
245
  when :alt, :diff
132
246
  this_sep = (sep ? sep : " ") + parts[expr.first.to_sym]
133
- expr[1..-1].map {|e| format(e)}.join(this_sep)
247
+ res = expr[1..-1].map {|e| format_ebnf(e, embedded: true)}.join(this_sep)
248
+ embedded ? (lparen + res + rparen) : res
134
249
  when :star, :plus, :opt
135
- raise "Expected star expression to have a single operand" unless expr.length == 2
136
250
  char = parts[expr.first.to_sym]
137
- r = format(expr[1])
138
- (r.start_with?("(") || Array(expr[1]).length == 1) ? "#{r}#{char}" : "(#{r})#{char}"
251
+ r = format_ebnf(expr[1], embedded: true)
252
+ "#{r}#{char}"
139
253
  when :hex
140
- (@options[:html] ? %(<code class="grammar-char-escape">#{expr.last}</code>) : expr.last)
254
+ escape_ebnf_hex(expr.last[2..-1].hex.chr(Encoding::UTF_8))
141
255
  when :range
142
- format_range(expr.last)
256
+ format_ebnf_range(expr.last)
143
257
  when :seq
144
258
  this_sep = (sep ? sep : " ")
145
- expr[1..-1].map {|e| r = format(e); Array(e).length > 2 ? "#{lparen}#{r}#{rparen}" : r}.join(this_sep)
259
+ res = expr[1..-1].map do |e|
260
+ format_ebnf(e, embedded: true)
261
+ end.join(this_sep)
262
+ embedded ? (lparen + res + rparen) : res
263
+ when :rept
264
+ # Expand repetition
265
+ min, max, value = expr[1..-1]
266
+ if min == 0 && max == 1
267
+ format_ebnf([:opt, value], sep: sep, embedded: embedded)
268
+ elsif min == 0 && max == '*'
269
+ format_ebnf([:star, value], sep: sep, embedded: embedded)
270
+ elsif min == 1 && max == '*'
271
+ format_ebnf([:plus, value], sep: sep, embedded: embedded)
272
+ else
273
+ val2 = [:seq]
274
+ while min > 0
275
+ val2 << value
276
+ min -= 1
277
+ max -= 1 unless max == '*'
278
+ end
279
+ if max == '*'
280
+ val2 << [:star, value]
281
+ else
282
+ opt = nil
283
+ while max > 0
284
+ opt = [:opt, opt ? [:seq, value, opt] : value]
285
+ max -= 1
286
+ end
287
+ val2 << opt if opt
288
+ end
289
+ format_ebnf(val2, sep: sep, embedded: embedded)
290
+ end
146
291
  else
147
292
  raise "Unknown operator: #{expr.first}"
148
293
  end
149
294
  end
150
295
 
151
296
  # Format a single-character string, prefering hex for non-main ASCII
152
- def format_char(c)
297
+ def format_ebnf_char(c)
153
298
  case c.ord
154
- when 0x22 then (@options[:html] ? %('<code class="grammar-literal">"</code>') : %{'"'})
155
- when (0x23..0x7e) then (@options[:html] ? %("<code class="grammar-literal">#{c}</code>") : %{"#{c}"})
156
- else (@options[:html] ? %(<code class="grammar-char-escape">#{escape_hex(c)}</code>) : escape_hex(c))
299
+ when (0x21) then (@options[:html] ? %("<code class="grammar-literal">#{@coder.encode c}</code>") : %{"#{c}"})
300
+ when 0x22 then (@options[:html] ? %('<code class="grammar-literal">&quot;</code>') : %{'"'})
301
+ when (0x23..0x7e) then (@options[:html] ? %("<code class="grammar-literal">#{@coder.encode c}</code>") : %{"#{c}"})
302
+ when (0x80..0xFFFD) then (@options[:html] ? %("<code class="grammar-literal">#{@coder.encode c}</code>") : %{"#{c}"})
303
+ else escape_ebnf_hex(c)
157
304
  end
158
305
  end
159
306
 
160
307
  # Format a range
161
- def format_range(string)
308
+ def format_ebnf_range(string)
162
309
  lbrac = (@options[:html] ? "<code>[</code> " : "[")
163
310
  rbrac = (@options[:html] ? "<code>]</code> " : "]")
164
- dash = (@options[:html] ? "<code>-</code> " : "-")
165
311
 
166
312
  buffer = lbrac
167
313
  s = StringScanner.new(string)
168
314
  while !s.eos?
169
315
  case
170
316
  when s.scan(/\A[!"\u0024-\u007e]+/)
171
- buffer << (@options[:html] ? %(<code class="grammar-literal">#{s.matched}</code>) : s.matched)
317
+ buffer << (@options[:html] ? %(<code class="grammar-literal">#{@coder.encode s.matched}</code>) : s.matched)
172
318
  when s.scan(/\A#x\h+/)
173
- buffer << (@options[:html] ? %(<code class="grammar-char-escape">#{s.matched}</code>) : s.matched)
174
- when s.scan(/\A-/)
175
- buffer << dash
319
+ buffer << escape_ebnf_hex(s.matched[2..-1].hex.chr(Encoding::UTF_8))
176
320
  else
177
- buffer << (@options[:html] ? %(<code class="grammar-char-escape">#{escape_hex(s.getch)}</code>) : escape_hex(s.getch))
321
+ buffer << escape_ebnf_hex(s.getch)
178
322
  end
179
323
  end
180
324
  buffer + rbrac
181
325
  end
182
326
 
183
327
  # Escape a string, using as many UTF-8 characters as possible
184
- def escape(string, quote = '"')
185
- buffer = ""
328
+ def format_ebnf_string(string, quote = '"')
186
329
  string.each_char do |c|
187
- buffer << case (u = c.ord)
188
- when (0x00..0x1f) then "#x%02X" % u
189
- when quote.ord then "#x%02X" % u
190
- else c
330
+ case c.ord
331
+ when 0x00..0x19, quote.ord
332
+ raise RangeError, "cannot format #{string.inspect} as an EBNF String: #{c.inspect} is out of range" unless
333
+ ISOEBNF::TERMINAL_CHARACTER.match?(c)
191
334
  end
192
335
  end
193
- buffer
336
+
337
+ res = "#{quote}#{string}#{quote}"
338
+ @options[:html] ? @coder.encode(res) : res
194
339
  end
195
340
 
196
- def escape_hex(u)
341
+ def escape_ebnf_hex(u)
197
342
  fmt = case u.ord
343
+ when 0x00..0x20 then "#x%02X"
198
344
  when 0x0000..0x00ff then "#x%02X"
199
345
  when 0x0100..0xffff then "#x%04X"
200
346
  else "#x%08X"
201
347
  end
202
- sprintf(fmt, u.ord)
203
- end
204
-
205
- HAML_DESC = %q(
206
- %table.grammar
207
- %tbody#grammar-productions
208
- - rules.each do |rule|
209
- %tr{id: "grammar-production-#{rule.sym}"}
210
- - if rule.pass?
211
- %td{colspan: 3}
212
- %code<="@pass"
213
- - else
214
- %td<= "[#{rule.id}]"
215
- %td<
216
- %code<= rule.sym
217
- %td<= "::="
218
- %td
219
- != yield rule
348
+ char = fmt % u.ord
349
+ if @options[:html]
350
+ if u.ord <= 0x20
351
+ char = %(<abbr title="#{ASCII_ESCAPE_NAMES[u.ord]}">#{@coder.encode char}</abbr>)
352
+ elsif u.ord < 0x7F
353
+ char = %(<abbr title="ascii '#{@coder.encode u}'">#{@coder.encode char}</abbr>)
354
+ elsif u.ord == 0x7F
355
+ char = %(<abbr title="delete">#{@coder.encode char}</abbr>)
356
+ elsif u.ord <= 0xFF
357
+ char = %(<abbr title="extended ascii '#{u}'">#{char}</abbr>)
358
+ else
359
+ char = %(<abbr title="unicode '#{u}'">#{char}</abbr>)
360
+ end
361
+ %(<code class="grammar-char-escape">#{char}</code>)
362
+ else
363
+ char
364
+ end
365
+ end
366
+
367
+ ##
368
+ # ABNF Formatters
369
+ ##
370
+
371
+ # Format the expression part of a rule
372
+ def format_abnf(expr, sep: nil, embedded: false, sensitive: true)
373
+ return (@options[:html] ? %(<a href="#grammar-production-#{@coder.encode expr}">#{@coder.encode expr}</a>) : expr.to_s) if expr.is_a?(Symbol)
374
+ if expr.is_a?(String)
375
+ if expr.length == 1
376
+ return format_abnf_char(expr)
377
+ elsif expr.start_with?('%')
378
+ # Already encoded
379
+ return expr
380
+ elsif expr =~ /"/
381
+ # Split into segments
382
+ segments = expr.split('"')
383
+
384
+ return format_abnf_char(expr) if segments.empty?
385
+
386
+ seq = segments.inject([]) {|memo, s| memo.concat([[:hex, "#x22"], s])}[1..-1]
387
+ seq.unshift(:seq)
388
+ return format_abnf(seq, sep: nil, embedded: false)
389
+ else
390
+ return (@options[:html] ? %("<code class="grammar-literal">#{'%s' if sensitive}#{@coder.encode expr}</code>") : %(#{'%s' if sensitive}"#{expr}"))
391
+ end
392
+ end
393
+ parts = {
394
+ alt: (@options[:html] ? "<code>/</code> " : "/ "),
395
+ star: (@options[:html] ? "<code>*</code> " : "*"),
396
+ plus: (@options[:html] ? "<code>+</code> " : "1*"),
397
+ opt: (@options[:html] ? "<code>?</code> " : "?")
398
+ }
399
+ lbrac = (@options[:html] ? "<code>[</code> " : "[")
400
+ rbrac = (@options[:html] ? "<code>]</code> " : "]")
401
+ lparen = (@options[:html] ? "<code>(</code> " : "(")
402
+ rparen = (@options[:html] ? "<code>)</code> " : ")")
403
+
404
+ case expr.first
405
+ when :istr
406
+ # FIXME: if string part is segmented, need to do something different
407
+ format_abnf(expr.last, embedded: true, sensitive: false)
408
+ when :alt
409
+ this_sep = (sep ? sep : " ") + parts[expr.first.to_sym]
410
+ res = expr[1..-1].map {|e| format_abnf(e, embedded: true)}.join(this_sep)
411
+ embedded ? (lparen + res + rparen) : res
412
+ when :diff
413
+ raise RangeError, "ABNF does not support the diff operator"
414
+ when :opt
415
+ char = parts[expr.first.to_sym]
416
+ r = format_abnf(expr[1], embedded: true)
417
+ "#{lbrac}#{r}#{rbrac}"
418
+ when :plus, :star
419
+ char = parts[expr.first.to_sym]
420
+ r = format_abnf(expr[1], embedded: true)
421
+ "#{char}#{r}"
422
+ when :hex
423
+ escape_abnf_hex(expr.last[2..-1].hex.chr)
424
+ when :range
425
+ # Returns an [:alt] or [:not [:alt]] if composed of multiple sequences
426
+ # Note: ABNF does not support the `not` operator
427
+ res = format_abnf_range(expr.last)
428
+ res.is_a?(Array) ?
429
+ format_abnf(res, embedded: true) :
430
+ res
431
+ when :seq
432
+ this_sep = (sep ? sep : " ")
433
+ res = expr[1..-1].map do |e|
434
+ format_abnf(e, embedded: true)
435
+ end.join(this_sep)
436
+ embedded ? (lparen + res + rparen) : res
437
+ when :rept
438
+ # Expand repetition
439
+ min, max, value = expr[1..-1]
440
+ r = format_abnf(value, embedded: true)
441
+ if min == max
442
+ "#{min}#{r}"
443
+ elsif min == 0 && max == '*'
444
+ "#{parts[:star]}#{r}"
445
+ elsif min > 0 && max == '*'
446
+ "#{min}#{parts[:star]}#{r}"
447
+ else
448
+ "#{min}#{parts[:star]}#{max}#{r}"
449
+ end
450
+ else
451
+ raise "Unknown operator: #{expr.first}"
452
+ end
453
+ end
454
+
455
+ # Format a single-character string, prefering hex for non-main ASCII
456
+ def format_abnf_char(c)
457
+ if /[\x20-\x21\x23-\x7E]/.match?(c)
458
+ c.inspect
459
+ else
460
+ escape_abnf_hex(c)
461
+ end
462
+ end
463
+
464
+ # Format a range
465
+ #
466
+ # Presumes range has already been validated
467
+ def format_abnf_range(string)
468
+ alt, o_dash = [:alt], false
469
+
470
+ raise RangeError, "cannot format #{string.inspect} an ABNF range" if string.start_with?('^')
471
+
472
+ if string.end_with?('-')
473
+ o_dash = true
474
+ string = string[0..-2]
475
+ end
476
+
477
+ scanner = StringScanner.new(string)
478
+ hexes, deces = [], []
479
+ in_range = false
480
+ # Build op (alt) from different ranges/enums
481
+ while !scanner.eos?
482
+ if hex = scanner.scan(Terminals::HEX)
483
+ # Append any decimal values
484
+ alt << "%d" + deces.join(".") unless deces.empty?
485
+ deces = []
486
+
487
+ if in_range
488
+ # Add "." sequences for any previous hexes
489
+ alt << "%x" + hexes[0..-2].join(".") if hexes.length > 1
490
+ alt << "%x#{hexes.last}-#{hex[2..-1]}"
491
+ in_range, hexes = false, []
492
+ else
493
+ hexes << hex[2..-1]
494
+ end
495
+ elsif dec = scanner.scan(Terminals::R_CHAR)
496
+ # Append any hexadecimal values
497
+ alt << "%x" + hexes.join(".") unless hexes.empty?
498
+ hexes = []
499
+
500
+ if in_range
501
+ # Add "." sequences for any previous hexes
502
+ alt << "%d" + deces[0..-2].join(".") if deces.length > 1
503
+ alt << "%d#{deces.last}-#{dec.codepoints.first}"
504
+ in_range, deces = false, []
505
+ else
506
+ deces << dec.codepoints.first.to_s
507
+ end
508
+ end
509
+
510
+ in_range = true if scanner.scan(/\-/)
511
+ end
512
+
513
+ deces << '45' if o_dash
514
+
515
+ # Append hexes and deces as "." sequences (should be only one)
516
+ alt << "%d" + deces.join(".") unless deces.empty?
517
+ alt << "%x" + hexes.join(".") unless hexes.empty?
518
+
519
+ # FIXME: HTML abbreviations?
520
+ if alt.length == 2
521
+ # Just return the range or enum
522
+ alt.last
523
+ else
524
+ # Return the alt, which will be further formatted
525
+ alt
526
+ end
527
+ end
528
+
529
+ def escape_abnf_hex(u)
530
+ fmt = case u.ord
531
+ when 0x0000..0x00ff then "%02X"
532
+ when 0x0100..0xffff then "%04X"
533
+ else "%08X"
534
+ end
535
+ char = "%x" + (fmt % u.ord)
536
+ if @options[:html]
537
+ if u.ord <= 0x20
538
+ char = %(<abbr title="#{ASCII_ESCAPE_NAMES[u.ord]}">#{@coder.encode char}</abbr>)
539
+ elsif u.ord <= 0x7F
540
+ char = %(<abbr title="ascii '#{u}'">#{@coder.encode char}</abbr>)
541
+ elsif u.ord == 0x7F
542
+ char = %(<abbr title="delete">#{@coder.encode char}</abbr>)
543
+ elsif u.ord <= 0xFF
544
+ char = %(<abbr title="extended ascii '#{u}'">#{char}</abbr>)
545
+ else
546
+ char = %(<abbr title="unicode '#{u}'">#{char}</abbr>)
547
+ end
548
+ %(<code class="grammar-char-escape">#{char}</code>)
549
+ else
550
+ char
551
+ end
552
+ end
553
+
554
+ ##
555
+ # ISO EBNF Formatters
556
+ ##
557
+
558
+ # Format the expression part of a rule
559
+ def format_isoebnf(expr, sep: nil, embedded: false)
560
+ return (@options[:html] ? %(<a href="#grammar-production-#{@coder.encode expr}">#{@coder.encode expr}</a>) : expr.to_s) if expr.is_a?(Symbol)
561
+ if expr.is_a?(String)
562
+ expr = expr[2..-1].hex.chr if expr =~ /\A#x\h+/
563
+ expr.chars.each do |c|
564
+ raise RangeError, "cannot format #{expr.inspect} as an ISO EBNF String: #{c.inspect} is out of range" unless
565
+ ISOEBNF::TERMINAL_CHARACTER.match?(c)
566
+ end
567
+ if expr =~ /"/
568
+ return (@options[:html] ? %('<code class="grammar-literal">#{@coder.encode expr}</code>') : %('#{expr}'))
569
+ else
570
+ return (@options[:html] ? %("<code class="grammar-literal">#{@coder.encode expr}</code>") : %("#{expr}"))
571
+ end
572
+ end
573
+ parts = {
574
+ alt: (@options[:html] ? "<code>|</code> " : "| "),
575
+ diff: (@options[:html] ? "<code>-</code> " : "- "),
576
+ }
577
+ lparen = (@options[:html] ? "<code>(</code> " : "(")
578
+ rparen = (@options[:html] ? "<code>)</code> " : ")")
579
+
580
+ case expr.first
581
+ when :istr
582
+ # Looses fidelity, but, oh well ...
583
+ format_isoebnf(expr.last, embedded: true)
584
+ when :alt, :diff
585
+ this_sep = (sep ? sep : " ") + parts[expr.first.to_sym]
586
+ res = expr[1..-1].map {|e| format_isoebnf(e, embedded: true)}.join(this_sep)
587
+ embedded ? (lparen + res + rparen) : res
588
+ when :opt
589
+ r = format_isoebnf(expr[1], embedded: true)
590
+ "[#{r}]"
591
+ when :star
592
+ r = format_isoebnf(expr[1], embedded: true)
593
+ "{#{r}}"
594
+ when :plus
595
+ r = format_isoebnf(expr[1], embedded: true)
596
+ "#{r}, {#{r}}"
597
+ when :hex
598
+ format_isoebnf(expr[1], embedded: true)
599
+ when :range
600
+ res = format_isoebnf_range(expr.last)
601
+ res.is_a?(Array) ?
602
+ format_isoebnf(res, embedded: true) :
603
+ res
604
+ when :seq
605
+ this_sep = "," + (sep ? sep : " ")
606
+ res = expr[1..-1].map do |e|
607
+ format_isoebnf(e, embedded: true)
608
+ end.join(this_sep)
609
+ embedded ? (lparen + res + rparen) : res
610
+ when :rept
611
+ # Expand repetition
612
+ min, max, value = expr[1..-1]
613
+ if min == 0 && max == 1
614
+ format_isoebnf([:opt, value], sep: sep, embedded: embedded)
615
+ elsif min == 0 && max == '*'
616
+ format_isoebnf([:star, value], sep: sep, embedded: embedded)
617
+ elsif min == 1 && max == '*'
618
+ format_isoebnf([:plus, value], sep: sep, embedded: embedded)
619
+ else
620
+ val2 = [:seq]
621
+ while min > 0
622
+ val2 << value
623
+ min -= 1
624
+ max -= 1 unless max == '*'
625
+ end
626
+ if max == '*'
627
+ val2 << [:star, value]
628
+ else
629
+ opt = nil
630
+ while max > 0
631
+ opt = [:opt, opt ? [:seq, value, opt] : value]
632
+ max -= 1
633
+ end
634
+ val2 << opt if opt
635
+ end
636
+ format_isoebnf(val2, sep: sep, embedded: embedded)
637
+ end
638
+ else
639
+ raise "Unknown operator: #{expr.first}"
640
+ end
641
+ end
642
+
643
+ # Format a range
644
+ # Range is formatted as a aliteration of characters
645
+ def format_isoebnf_range(string)
646
+ chars = []
647
+ o_dash = false
648
+
649
+ raise RangeError, "cannot format #{string.inspect} an ABNF range" if string.start_with?('^')
650
+
651
+ if string.end_with?('-')
652
+ o_dash = true
653
+ string = string[0..-2]
654
+ end
655
+
656
+ scanner = StringScanner.new(string)
657
+ in_range = false
658
+ # Build chars from different ranges/enums
659
+ while !scanner.eos?
660
+ char = if hex = scanner.scan(Terminals::HEX)
661
+ hex[2..-1].hex.ord.char(Encoding::UTF_8)
662
+ else scanner.scan(Terminals::R_CHAR)
663
+ end
664
+ raise RangeError, "cannot format #{string.inspect} as an ISO EBNF Aliteration: #{char.inspect} is out of range" unless
665
+ char && ISOEBNF::TERMINAL_CHARACTER.match?(char)
666
+
667
+ if in_range
668
+ # calculate characters from chars.last to this char
669
+ raise RangeError, "cannot format #{string.inspect} as an ISO EBNF Aliteration" unless chars.last < char
670
+ chars.concat (chars.last..char).to_a[1..-1]
671
+ in_range = false
672
+ else
673
+ chars << char
674
+ end
675
+
676
+ in_range = true if scanner.scan(/\-/)
677
+ end
678
+
679
+ chars << '-' if o_dash
680
+
681
+ # Possibly only a single character (no character?)
682
+ chars.length == 1 ? chars.last.inspect : chars.unshift(:alt)
683
+ end
684
+
685
+ ERB_DESC = %q(
686
+ <table class="grammar">
687
+ <tbody id="grammar-productions" class="<%= @format %>">
688
+ <% for rule in @rules %>
689
+ <tr<%= %{ id="grammar-production-#{rule.sym}"} unless %w(=/ |).include?(rule.assign)%>>
690
+ <% if rule.id %>
691
+ <td<%= " colspan=2" unless rule.sym %>><%= rule.id %></td>
692
+ <% end %>
693
+ <% if rule.sym %>
694
+ <td><code><%== rule.sym %></code></td>
695
+ <% end %>
696
+ <td><%= rule.assign %></td>
697
+ <td><%= rule.formatted %></td>
698
+ </tr>
699
+ <% end %>
700
+ </tbody>
701
+ </table>
220
702
  ).gsub(/^ /, '')
221
703
  end
222
704
  end