ebnf 1.1.2 → 2.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. checksums.yaml +5 -5
  2. data/README.md +218 -196
  3. data/UNLICENSE +1 -1
  4. data/VERSION +1 -1
  5. data/bin/ebnf +40 -21
  6. data/etc/abnf-core.ebnf +52 -0
  7. data/etc/abnf.abnf +121 -0
  8. data/etc/abnf.ebnf +124 -0
  9. data/etc/abnf.sxp +45 -0
  10. data/etc/doap.ttl +23 -15
  11. data/etc/ebnf.ebnf +21 -33
  12. data/etc/ebnf.html +171 -160
  13. data/etc/{ebnf.rb → ebnf.ll1.rb} +30 -107
  14. data/etc/ebnf.ll1.sxp +182 -183
  15. data/etc/ebnf.peg.rb +90 -0
  16. data/etc/ebnf.peg.sxp +84 -0
  17. data/etc/ebnf.sxp +40 -41
  18. data/etc/iso-ebnf.ebnf +140 -0
  19. data/etc/iso-ebnf.isoebnf +138 -0
  20. data/etc/iso-ebnf.sxp +65 -0
  21. data/etc/sparql.ebnf +4 -4
  22. data/etc/sparql.html +1603 -1751
  23. data/etc/sparql.ll1.sxp +7372 -7372
  24. data/etc/sparql.peg.rb +532 -0
  25. data/etc/sparql.peg.sxp +597 -0
  26. data/etc/sparql.sxp +363 -362
  27. data/etc/turtle.ebnf +3 -3
  28. data/etc/turtle.html +465 -517
  29. data/etc/{turtle.rb → turtle.ll1.rb} +3 -4
  30. data/etc/turtle.ll1.sxp +425 -425
  31. data/etc/turtle.peg.rb +182 -0
  32. data/etc/turtle.peg.sxp +199 -0
  33. data/etc/turtle.sxp +103 -101
  34. data/lib/ebnf.rb +7 -2
  35. data/lib/ebnf/abnf.rb +301 -0
  36. data/lib/ebnf/abnf/core.rb +23 -0
  37. data/lib/ebnf/abnf/meta.rb +111 -0
  38. data/lib/ebnf/base.rb +128 -87
  39. data/lib/ebnf/bnf.rb +1 -26
  40. data/lib/ebnf/ebnf/meta.rb +90 -0
  41. data/lib/ebnf/isoebnf.rb +229 -0
  42. data/lib/ebnf/isoebnf/meta.rb +75 -0
  43. data/lib/ebnf/ll1.rb +140 -8
  44. data/lib/ebnf/ll1/lexer.rb +37 -32
  45. data/lib/ebnf/ll1/parser.rb +113 -73
  46. data/lib/ebnf/ll1/scanner.rb +83 -51
  47. data/lib/ebnf/native.rb +320 -0
  48. data/lib/ebnf/parser.rb +285 -302
  49. data/lib/ebnf/peg.rb +39 -0
  50. data/lib/ebnf/peg/parser.rb +561 -0
  51. data/lib/ebnf/peg/rule.rb +241 -0
  52. data/lib/ebnf/rule.rb +453 -163
  53. data/lib/ebnf/terminals.rb +21 -0
  54. data/lib/ebnf/writer.rb +561 -88
  55. metadata +114 -28
  56. data/etc/sparql.rb +0 -45773
@@ -0,0 +1,21 @@
1
+ # encoding: utf-8
2
+ # Terminal definitions for the EBNF grammar
3
+ module EBNF::Terminals
4
+ SYMBOL_BASE = %r(\b[a-zA-Z0-9_\.]+\b)u.freeze
5
+ SYMBOL = %r(#{SYMBOL_BASE}(?!\s*::=))u.freeze
6
+ HEX = %r(\#x\h+)u.freeze
7
+ CHAR = %r([\u0009\u000A\u000D\u0020-\uD7FF\u{10000}-\u{10FFFF}])u.freeze
8
+ R_CHAR = %r([\u0009\u000A\u000D\u0020-\u002C\u002E-\u005C\u005E-\uD7FF\u{10000}-\u{10FFFF}])u.freeze
9
+ RANGE = %r(\[(?:(?:#{R_CHAR}\-#{R_CHAR})|(?:#{HEX}\-#{HEX})|#{R_CHAR}|#{HEX})+-?\](?!\s+#{SYMBOL_BASE}\s*::=))u.freeze
10
+ LHS = %r((?:\[#{SYMBOL_BASE}\])?\s*#{SYMBOL_BASE}\s*::=)u.freeze
11
+ O_RANGE = %r(\[\^(?:(?:#{R_CHAR}\-#{R_CHAR})|(?:#{HEX}\-#{HEX}|#{R_CHAR}|#{HEX}))+-?\])u.freeze
12
+ STRING1 = %r("[\u0009\u000A\u000D\u0020\u0021\u0023-\uD7FF\u{10000}-\u{10FFFF}]*")u.freeze
13
+ STRING2 = %r('[\u0009\u000A\u000D\u0020-\u0026\u0028-\uD7FF\u{10000}-\u{10FFFF}]*')u.freeze
14
+ POSTFIX = %r([?*+])u.freeze
15
+ PASS = %r((
16
+ \s
17
+ | (?:(?:\#[^x]|//)[^\n\r]*)
18
+ | (?:/\*(?:(?:\*[^/])|[^*])*\*/)
19
+ | (?:\(\*(?:(?:\*[^\)])|[^*])*\*\))
20
+ )+)xmu.freeze
21
+ end
@@ -1,22 +1,62 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
  require 'rdf'
3
3
  require 'strscan' unless defined?(StringScanner)
4
+ require "ostruct"
4
5
 
5
6
  ##
6
7
  # Serialize ruleset back to EBNF
7
8
  module EBNF
8
9
  class Writer
9
10
  LINE_LENGTH = 80
11
+ LINE_LENGTH_HTML = 200
12
+
13
+ # ASCII escape names
14
+ ASCII_ESCAPE_NAMES = [
15
+ "null", #x00
16
+ "start of heading", #x01
17
+ "start of text", #x02
18
+ "end of text", #x03
19
+ "end of transmission", #x04
20
+ "enquiry", #x05
21
+ "acknowledge", #x06
22
+ "bell", #x07
23
+ "backspace", #x08
24
+ "horizontal tab", #x09
25
+ "new line", #x0A
26
+ "vertical tab", #x0B
27
+ "form feed", #x0C
28
+ "carriage return", #x0D
29
+ "shift out", #x0E
30
+ "shift in", #x0F
31
+ "data link escape", #x10
32
+ "device control 1", #x11
33
+ "device control 2", #x12
34
+ "device control 3", #x13
35
+ "device control 4", #x14
36
+ "negative acknowledge", #x15
37
+ "synchronous idle", #x16
38
+ "end of trans. block", #x17
39
+ "cancel", #x18
40
+ "end of medium", #x19
41
+ "substitute", #x1A
42
+ "escape", #x1B
43
+ "file separator", #x1C
44
+ "group separator", #x1D
45
+ "record separator", #x1E
46
+ "unit separator", #x1F
47
+ "space" #x20
48
+ ]
10
49
 
11
50
  ##
12
51
  # Format rules to a String
13
52
  #
14
53
  # @param [Array<Rule>] rules
54
+ # @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
15
55
  # @return [Object]
16
- def self.string(*rules)
56
+ def self.string(*rules, format: :ebnf)
17
57
  require 'stringio' unless defined?(StringIO)
18
58
  buf = StringIO.new
19
- write(buf, *rules)
59
+ write(buf, *rules, format: format)
20
60
  buf.string
21
61
  end
22
62
 
@@ -24,9 +64,10 @@ module EBNF
24
64
  # Format rules to $stdout
25
65
  #
26
66
  # @param [Array<Rule>] rules
67
+ # @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
27
68
  # @return [Object]
28
- def self.print(*rules)
29
- write($stdout, *rules)
69
+ def self.print(*rules, format: :ebnf)
70
+ write($stdout, *rules, format: format)
30
71
  end
31
72
 
32
73
  ##
@@ -34,92 +75,155 @@ module EBNF
34
75
  #
35
76
  # @param [Object] out
36
77
  # @param [Array<Rule>] rules
78
+ # @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
37
79
  # @return [Object]
38
- def self.write(out, *rules)
39
- Writer.new(rules, out: out)
80
+ def self.write(out, *rules, format: :ebnf)
81
+ Writer.new(rules, out: out, format: format)
40
82
  end
41
83
 
42
84
  ##
43
85
  # Write formatted rules to an IO like object as HTML
44
86
  #
45
87
  # @param [Array<Rule>] rules
88
+ # @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
46
89
  # @return [Object]
47
- def self.html(*rules)
90
+ def self.html(*rules, format: :ebnf)
48
91
  require 'stringio' unless defined?(StringIO)
49
92
  buf = StringIO.new
50
- Writer.new(rules, out: buf, html: true)
93
+ Writer.new(rules, out: buf, html: true, format: format)
51
94
  buf.string
52
95
  end
53
96
 
54
97
  ##
55
98
  # @param [Array<Rule>] rules
56
99
  # @param [Hash{Symbol => Object}] options
57
- # @option options [Symbol] :format
58
- # @option options [#write] :out ($stdout)
59
- # @option options [Boolean] :html (false)
60
- # Format as HTML
61
- def initialize(rules, options = {})
62
- @options = options.dup
63
- out = options.fetch(:out, $stdio)
64
- #fmt = options.fetch(:format, :ebnf)
100
+ # @param [#write] out ($stdout)
101
+ # @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
102
+ # @option options [Symbol] format
103
+ # @option options [Boolean] html (false)
104
+ def initialize(rules, out: $stdout, html: false, format: :ebnf, **options)
105
+ @options = options.merge(html: html)
106
+ return if rules.empty?
65
107
 
66
108
  # Determine max LHS length
109
+ format_meth = "format_#{format}".to_sym
67
110
  max_id = rules.max_by {|r| r.id.to_s.length}.id.to_s.length
68
111
  max_sym = rules.max_by {|r| r.sym.to_s.length}.sym.to_s.length
69
- lhs_length = max_sym + 3
70
- lhs_fmt = "%<sym>-#{max_sym}s ::= "
71
- if max_id > 0
112
+ lhs_length = max_sym + 1
113
+ lhs_fmt = case format
114
+ when :abnf then "%<sym>-#{max_sym}s = "
115
+ when :ebnf then "%<sym>-#{max_sym}s ::= "
116
+ when :isoebnf then "%<sym>-#{max_sym}s = "
117
+ end
118
+ if format == :ebnf && max_id > 0
72
119
  lhs_fmt = "%<id>-#{max_id+2}s " + lhs_fmt
73
120
  lhs_length += max_id + 3
74
121
  end
75
- rhs_length = LINE_LENGTH - lhs_length
122
+ rhs_length = (html ? LINE_LENGTH_HTML : LINE_LENGTH) - lhs_length
76
123
 
77
- if @options[:html]
124
+ if html
78
125
  # Output as formatted HTML
79
126
  begin
80
- require 'haml'
81
- html = Haml::Engine.new(HAML_DESC).render(self, rules: rules) do |rule|
82
- formatted_expr = format(rule.expr)
83
- formatted_expr.length > rhs_length ? format(rule.expr, "\n") : formatted_expr
84
- end
85
- out.write html
127
+ require 'erubis'
128
+ require 'htmlentities'
129
+ @coder = HTMLEntities.new
130
+ eruby = Erubis::Eruby.new(ERB_DESC)
131
+ formatted_rules = rules.map do |rule|
132
+ if rule.kind == :terminals || rule.kind == :pass
133
+ OpenStruct.new(id: ("@#{rule.kind}"),
134
+ sym: nil,
135
+ assign: nil,
136
+ formatted: ("<strong># Productions for terminals</strong>" if rule.kind == :terminals))
137
+ else
138
+ formatted_expr = self.send(format_meth, rule.expr)
139
+ # Measure text without markup
140
+ formatted_expr_text = formatted_expr.gsub(%r{</?\w+[^>]*>}, '')
141
+ if formatted_expr_text.length > rhs_length && (format != :abnf || rule.alt?)
142
+ lines = []
143
+ # Can only reasonably split apart alts
144
+ self.send(format_meth, rule.expr, sep: "--rule-extensions--").
145
+ split(/\s*--rule-extensions--\s*/).each_with_index do |formatted, ndx|
146
+ assign = case format
147
+ when :ebnf
148
+ formatted.sub!(%r{\s*<code>\|</code>\s*}, '')
149
+ (ndx > 0 ? (rule.alt? ? '|' : '') : '::=')
150
+ when :abnf
151
+ formatted.sub!(%r{\s*<code>/</code>\s*}, '')
152
+ (ndx > 0 ? '=/' : '=')
153
+ else
154
+ formatted.sub!(%r{\s*<code>\|</code>\s*}, '')
155
+ (ndx > 0 ? (rule.alt? ? '|' : '') : '=')
156
+ end
157
+ lines << OpenStruct.new(id: ((ndx == 0 ? "[#{rule.id}]" : "") if rule.id),
158
+ sym: (rule.sym if ndx == 0 || format == :abnf),
159
+ assign: assign,
160
+ formatted: formatted)
161
+ end
162
+ if format == :isoebnf
163
+ lines << OpenStruct.new(assign: ';')
164
+ end
165
+ lines
166
+ else
167
+ OpenStruct.new(id: ("[#{rule.id}]" if rule.id),
168
+ sym: rule.sym,
169
+ assign: (format == :ebnf ? '::=' : '='),
170
+ formatted: (formatted_expr + (format == :isoebnf ? ' ;' : '')))
171
+ end
172
+ end
173
+ end.flatten
174
+ out.write eruby.evaluate(format: format, rules: formatted_rules)
86
175
  return
87
176
  rescue LoadError
88
- $stderr.puts "Generating HTML requires haml gem to be loaded"
177
+ $stderr.puts "Generating HTML requires erubis and htmlentities gems to be loaded"
89
178
  end
90
179
  end
91
180
 
92
181
  # Format each rule, considering the available rhs size
93
182
  rules.each do |rule|
94
183
  buffer = if rule.pass?
95
- "%-#{lhs_length-2}s" % "@pass"
184
+ "\n%-#{lhs_length-2}s " % "@pass"
185
+ elsif rule.kind == :terminals
186
+ "\n%-#{lhs_length-2}s" % "@terminals"
96
187
  else
97
188
  lhs_fmt % {id: "[#{rule.id}]", sym: rule.sym}
98
189
  end
99
- formatted_expr = format(rule.expr)
100
- if formatted_expr.length > rhs_length
101
- buffer << format(rule.expr, ("\n" + " " * lhs_length))
190
+ formatted_expr = self.send(format_meth, rule.expr)
191
+ if formatted_expr.length > rhs_length && (format != :abnf || rule.alt?)
192
+ if format == :abnf
193
+ # No whitespace, use =/
194
+ self.send(format_meth, rule.expr, sep: "--rule-extensions--").
195
+ split(/\s*--rule-extensions--\s*/).each_with_index do |formatted, ndx|
196
+ if ndx > 0
197
+ buffer << "\n" + lhs_fmt.sub('= ', '=/') % {id: "[#{rule.id}]", sym: rule.sym}
198
+ end
199
+ buffer << formatted.sub(/\s*\/\s*/, '')
200
+ end
201
+ else
202
+ # Space out past "= "
203
+ buffer << self.send(format_meth, rule.expr, sep: ("\n" + " " * (lhs_length + (rule.alt? ? 2 : 4) - (format == :ebnf ? 0 : 2))))
204
+ buffer << ("\n" + " " * (lhs_length) + ';') if format == :isoebnf
205
+ end
102
206
  else
103
- buffer << formatted_expr
207
+ buffer << formatted_expr + (format == :isoebnf ? ' ;' : '')
104
208
  end
209
+ buffer << "\n\n" if [:terminals, :pass].include?(rule.kind)
105
210
  out.puts(buffer)
106
211
  end
107
212
  end
108
213
 
109
214
  protected
215
+
216
+ ##
217
+ # W3C EBNF Formatters
218
+ ##
219
+
110
220
  # Format the expression part of a rule
111
- def format(expr, sep = nil)
112
- return (@options[:html] ? %(<a href="#grammar-production-#{expr}">#{expr}</a>) : expr.to_s) if expr.is_a?(Symbol)
221
+ def format_ebnf(expr, sep: nil, embedded: false)
222
+ return (@options[:html] ? %(<a href="#grammar-production-#{@coder.encode expr}">#{@coder.encode expr}</a>) : expr.to_s) if expr.is_a?(Symbol)
113
223
  if expr.is_a?(String)
114
- if expr.length == 1
115
- return format_char(expr)
116
- elsif expr =~ /\A#x\h+/
117
- return (@options[:html] ? %(<code class="grammar-char-escape">#{expr}</code>) : expr)
118
- elsif expr =~ /"/
119
- return (@options[:html] ? %('<code class="grammar-literal">#{escape(expr, "'")}</code>') : %('#{escape(expr, "'")}'))
120
- else
121
- return (@options[:html] ? %("<code class="grammar-literal">#{escape(expr, '"')}</code>") : %("#{escape(expr, '"')}"))
122
- end
224
+ return expr.length == 1 ?
225
+ format_ebnf_char(expr) :
226
+ format_ebnf_string(expr, expr.include?('"') ? "'" : '"')
123
227
  end
124
228
  parts = {
125
229
  alt: (@options[:html] ? "<code>|</code> " : "| "),
@@ -132,95 +236,464 @@ module EBNF
132
236
  rparen = (@options[:html] ? "<code>)</code> " : ")")
133
237
 
134
238
  case expr.first
239
+ when :istr
240
+ # Looses fidelity, but, oh well ...
241
+ format_ebnf(expr.last, embedded: true)
135
242
  when :alt, :diff
136
243
  this_sep = (sep ? sep : " ") + parts[expr.first.to_sym]
137
- expr[1..-1].map {|e| format(e)}.join(this_sep)
244
+ res = expr[1..-1].map {|e| format_ebnf(e, embedded: true)}.join(this_sep)
245
+ embedded ? (lparen + res + rparen) : res
138
246
  when :star, :plus, :opt
139
- raise "Expected star expression to have a single operand" unless expr.length == 2
140
247
  char = parts[expr.first.to_sym]
141
- r = format(expr[1])
142
- (r.start_with?("(") || Array(expr[1]).length == 1) ? "#{r}#{char}" : "(#{r})#{char}"
248
+ r = format_ebnf(expr[1], embedded: true)
249
+ "#{r}#{char}"
143
250
  when :hex
144
- (@options[:html] ? %(<code class="grammar-char-escape">#{expr.last}</code>) : expr.last)
251
+ escape_ebnf_hex(expr.last[2..-1].hex.chr(Encoding::UTF_8))
145
252
  when :range
146
- format_range(expr.last)
253
+ format_ebnf_range(expr.last)
147
254
  when :seq
148
255
  this_sep = (sep ? sep : " ")
149
- expr[1..-1].map {|e| r = format(e); Array(e).length > 2 ? "#{lparen}#{r}#{rparen}" : r}.join(this_sep)
256
+ res = expr[1..-1].map do |e|
257
+ format_ebnf(e, embedded: true)
258
+ end.join(this_sep)
259
+ embedded ? (lparen + res + rparen) : res
260
+ when :rept
261
+ # Expand repetition
262
+ min, max, value = expr[1..-1]
263
+ if min == 0 && max == 1
264
+ format_ebnf([:opt, value], sep: sep, embedded: embedded)
265
+ elsif min == 0 && max == '*'
266
+ format_ebnf([:star, value], sep: sep, embedded: embedded)
267
+ elsif min == 1 && max == '*'
268
+ format_ebnf([:plus, value], sep: sep, embedded: embedded)
269
+ else
270
+ val2 = [:seq]
271
+ while min > 0
272
+ val2 << value
273
+ min -= 1
274
+ max -= 1 unless max == '*'
275
+ end
276
+ if max == '*'
277
+ val2 << [:star, value]
278
+ else
279
+ opt = nil
280
+ while max > 0
281
+ opt = [:opt, opt ? [:seq, value, opt] : value]
282
+ max -= 1
283
+ end
284
+ val2 << opt if opt
285
+ end
286
+ format_ebnf(val2, sep: sep, embedded: embedded)
287
+ end
150
288
  else
151
289
  raise "Unknown operator: #{expr.first}"
152
290
  end
153
291
  end
154
292
 
155
293
  # Format a single-character string, prefering hex for non-main ASCII
156
- def format_char(c)
294
+ def format_ebnf_char(c)
157
295
  case c.ord
158
- when 0x22 then (@options[:html] ? %('<code class="grammar-literal">"</code>') : %{'"'})
159
- when (0x23..0x7e) then (@options[:html] ? %("<code class="grammar-literal">#{c}</code>") : %{"#{c}"})
160
- else (@options[:html] ? %(<code class="grammar-char-escape">#{escape_hex(c)}</code>) : escape_hex(c))
296
+ when (0x21) then (@options[:html] ? %("<code class="grammar-literal">#{@coder.encode c}</code>") : %{"#{c}"})
297
+ when 0x22 then (@options[:html] ? %('<code class="grammar-literal">&quot;</code>') : %{'"'})
298
+ when (0x23..0x7e) then (@options[:html] ? %("<code class="grammar-literal">#{@coder.encode c}</code>") : %{"#{c}"})
299
+ when (0x80..0xFFFD) then (@options[:html] ? %("<code class="grammar-literal">#{@coder.encode c}</code>") : %{"#{c}"})
300
+ else escape_ebnf_hex(c)
161
301
  end
162
302
  end
163
303
 
164
304
  # Format a range
165
- def format_range(string)
305
+ def format_ebnf_range(string)
166
306
  lbrac = (@options[:html] ? "<code>[</code> " : "[")
167
307
  rbrac = (@options[:html] ? "<code>]</code> " : "]")
168
- dash = (@options[:html] ? "<code>-</code> " : "-")
169
308
 
170
309
  buffer = lbrac
171
310
  s = StringScanner.new(string)
172
311
  while !s.eos?
173
312
  case
174
313
  when s.scan(/\A[!"\u0024-\u007e]+/)
175
- buffer << (@options[:html] ? %(<code class="grammar-literal">#{s.matched}</code>) : s.matched)
314
+ buffer << (@options[:html] ? %(<code class="grammar-literal">#{@coder.encode s.matched}</code>) : s.matched)
176
315
  when s.scan(/\A#x\h+/)
177
- buffer << (@options[:html] ? %(<code class="grammar-char-escape">#{s.matched}</code>) : s.matched)
178
- when s.scan(/\A-/)
179
- buffer << dash
316
+ buffer << escape_ebnf_hex(s.matched[2..-1].hex.chr(Encoding::UTF_8))
180
317
  else
181
- buffer << (@options[:html] ? %(<code class="grammar-char-escape">#{escape_hex(s.getch)}</code>) : escape_hex(s.getch))
318
+ buffer << escape_ebnf_hex(s.getch)
182
319
  end
183
320
  end
184
321
  buffer + rbrac
185
322
  end
186
323
 
187
324
  # Escape a string, using as many UTF-8 characters as possible
188
- def escape(string, quote = '"')
189
- buffer = ""
325
+ def format_ebnf_string(string, quote = '"')
190
326
  string.each_char do |c|
191
- buffer << case (u = c.ord)
192
- when (0x00..0x1f) then "#x%02X" % u
193
- when quote.ord then "#x%02X" % u
194
- else c
327
+ case c.ord
328
+ when 0x00..0x19, quote.ord
329
+ raise RangeError, "cannot format #{string.inspect} as an EBNF String: #{c.inspect} is out of range" unless
330
+ ISOEBNF::TERMINAL_CHARACTER.match?(c)
195
331
  end
196
332
  end
197
- buffer
333
+
334
+ res = "#{quote}#{string}#{quote}"
335
+ @options[:html] ? @coder.encode(res) : res
198
336
  end
199
337
 
200
- def escape_hex(u)
338
+ def escape_ebnf_hex(u)
201
339
  fmt = case u.ord
340
+ when 0x00..0x20 then "#x%02X"
202
341
  when 0x0000..0x00ff then "#x%02X"
203
342
  when 0x0100..0xffff then "#x%04X"
204
343
  else "#x%08X"
205
344
  end
206
- sprintf(fmt, u.ord)
207
- end
208
-
209
- HAML_DESC = %q(
210
- %table.grammar
211
- %tbody#grammar-productions
212
- - rules.each do |rule|
213
- %tr{id: "grammar-production-#{rule.sym}"}
214
- - if rule.pass?
215
- %td{colspan: 3}
216
- %code<="@pass"
217
- - else
218
- %td<= "[#{rule.id}]"
219
- %td<
220
- %code<= rule.sym
221
- %td<= "::="
222
- %td
223
- != yield rule
345
+ char = fmt % u.ord
346
+ if @options[:html]
347
+ if u.ord <= 0x20
348
+ char = %(<abbr title="#{ASCII_ESCAPE_NAMES[u.ord]}">#{@coder.encode char}</abbr>)
349
+ elsif u.ord < 0x7F
350
+ char = %(<abbr title="ascii '#{@coder.encode u}'">#{@coder.encode char}</abbr>)
351
+ elsif u.ord == 0x7F
352
+ char = %(<abbr title="delete">#{@coder.encode char}</abbr>)
353
+ elsif u.ord <= 0xFF
354
+ char = %(<abbr title="extended ascii '#{u}'">#{char}</abbr>)
355
+ else
356
+ char = %(<abbr title="unicode '#{u}'">#{char}</abbr>)
357
+ end
358
+ %(<code class="grammar-char-escape">#{char}</code>)
359
+ else
360
+ char
361
+ end
362
+ end
363
+
364
+ ##
365
+ # ABNF Formatters
366
+ ##
367
+
368
+ # Format the expression part of a rule
369
+ def format_abnf(expr, sep: nil, embedded: false, sensitive: true)
370
+ return (@options[:html] ? %(<a href="#grammar-production-#{@coder.encode expr}">#{@coder.encode expr}</a>) : expr.to_s) if expr.is_a?(Symbol)
371
+ if expr.is_a?(String)
372
+ if expr.length == 1
373
+ return format_abnf_char(expr)
374
+ elsif expr.start_with?('%')
375
+ # Already encoded
376
+ return expr
377
+ elsif expr =~ /"/
378
+ # Split into segments
379
+ segments = expr.split('"')
380
+
381
+ return format_abnf_char(expr) if segments.empty?
382
+
383
+ seq = segments.inject([]) {|memo, s| memo.concat([[:hex, "#x22"], s])}[1..-1]
384
+ seq.unshift(:seq)
385
+ return format_abnf(seq, sep: nil, embedded: false)
386
+ else
387
+ return (@options[:html] ? %("<code class="grammar-literal">#{'%s' if sensitive}#{@coder.encode expr}</code>") : %(#{'%s' if sensitive}"#{expr}"))
388
+ end
389
+ end
390
+ parts = {
391
+ alt: (@options[:html] ? "<code>/</code> " : "/ "),
392
+ star: (@options[:html] ? "<code>*</code> " : "*"),
393
+ plus: (@options[:html] ? "<code>+</code> " : "1*"),
394
+ opt: (@options[:html] ? "<code>?</code> " : "?")
395
+ }
396
+ lbrac = (@options[:html] ? "<code>[</code> " : "[")
397
+ rbrac = (@options[:html] ? "<code>]</code> " : "]")
398
+ lparen = (@options[:html] ? "<code>(</code> " : "(")
399
+ rparen = (@options[:html] ? "<code>)</code> " : ")")
400
+
401
+ case expr.first
402
+ when :istr
403
+ # FIXME: if string part is segmented, need to do something different
404
+ format_abnf(expr.last, embedded: true, sensitive: false)
405
+ when :alt
406
+ this_sep = (sep ? sep : " ") + parts[expr.first.to_sym]
407
+ res = expr[1..-1].map {|e| format_abnf(e, embedded: true)}.join(this_sep)
408
+ embedded ? (lparen + res + rparen) : res
409
+ when :diff
410
+ raise RangeError, "ABNF does not support the diff operator"
411
+ when :opt
412
+ char = parts[expr.first.to_sym]
413
+ r = format_abnf(expr[1], embedded: true)
414
+ "#{lbrac}#{r}#{rbrac}"
415
+ when :plus, :star
416
+ char = parts[expr.first.to_sym]
417
+ r = format_abnf(expr[1], embedded: true)
418
+ "#{char}#{r}"
419
+ when :hex
420
+ escape_abnf_hex(expr.last[2..-1].hex.chr)
421
+ when :range
422
+ # Returns an [:alt] or [:not [:alt]] if composed of multiple sequences
423
+ # Note: ABNF does not support the `not` operator
424
+ res = format_abnf_range(expr.last)
425
+ res.is_a?(Array) ?
426
+ format_abnf(res, embedded: true) :
427
+ res
428
+ when :seq
429
+ this_sep = (sep ? sep : " ")
430
+ res = expr[1..-1].map do |e|
431
+ format_abnf(e, embedded: true)
432
+ end.join(this_sep)
433
+ embedded ? (lparen + res + rparen) : res
434
+ when :rept
435
+ # Expand repetition
436
+ min, max, value = expr[1..-1]
437
+ r = format_abnf(value, embedded: true)
438
+ if min == max
439
+ "#{min}#{r}"
440
+ elsif min == 0 && max == '*'
441
+ "#{parts[:star]}#{r}"
442
+ elsif min > 0 && max == '*'
443
+ "#{min}#{parts[:star]}#{r}"
444
+ else
445
+ "#{min}#{parts[:star]}#{max}#{r}"
446
+ end
447
+ else
448
+ raise "Unknown operator: #{expr.first}"
449
+ end
450
+ end
451
+
452
+ # Format a single-character string, prefering hex for non-main ASCII
453
+ def format_abnf_char(c)
454
+ if /[\x20-\x21\x23-\x7E]/.match?(c)
455
+ c.inspect
456
+ else
457
+ escape_abnf_hex(c)
458
+ end
459
+ end
460
+
461
+ # Format a range
462
+ #
463
+ # Presumes range has already been validated
464
+ def format_abnf_range(string)
465
+ alt, o_dash = [:alt], false
466
+
467
+ raise RangeError, "cannot format #{string.inspect} an ABNF range" if string.start_with?('^')
468
+
469
+ if string.end_with?('-')
470
+ o_dash = true
471
+ string = string[0..-2]
472
+ end
473
+
474
+ scanner = StringScanner.new(string)
475
+ hexes, deces = [], []
476
+ in_range = false
477
+ # Build op (alt) from different ranges/enums
478
+ while !scanner.eos?
479
+ if hex = scanner.scan(Terminals::HEX)
480
+ # Append any decimal values
481
+ alt << "%d" + deces.join(".") unless deces.empty?
482
+ deces = []
483
+
484
+ if in_range
485
+ # Add "." sequences for any previous hexes
486
+ alt << "%x" + hexes[0..-2].join(".") if hexes.length > 1
487
+ alt << "%x#{hexes.last}-#{hex[2..-1]}"
488
+ in_range, hexes = false, []
489
+ else
490
+ hexes << hex[2..-1]
491
+ end
492
+ elsif dec = scanner.scan(Terminals::R_CHAR)
493
+ # Append any hexadecimal values
494
+ alt << "%x" + hexes.join(".") unless hexes.empty?
495
+ hexes = []
496
+
497
+ if in_range
498
+ # Add "." sequences for any previous hexes
499
+ alt << "%d" + deces[0..-2].join(".") if deces.length > 1
500
+ alt << "%d#{deces.last}-#{dec.codepoints.first}"
501
+ in_range, deces = false, []
502
+ else
503
+ deces << dec.codepoints.first.to_s
504
+ end
505
+ end
506
+
507
+ in_range = true if scanner.scan(/\-/)
508
+ end
509
+
510
+ deces << '45' if o_dash
511
+
512
+ # Append hexes and deces as "." sequences (should be only one)
513
+ alt << "%d" + deces.join(".") unless deces.empty?
514
+ alt << "%x" + hexes.join(".") unless hexes.empty?
515
+
516
+ # FIXME: HTML abbreviations?
517
+ if alt.length == 2
518
+ # Just return the range or enum
519
+ alt.last
520
+ else
521
+ # Return the alt, which will be further formatted
522
+ alt
523
+ end
524
+ end
525
+
526
+ def escape_abnf_hex(u)
527
+ fmt = case u.ord
528
+ when 0x0000..0x00ff then "%02X"
529
+ when 0x0100..0xffff then "%04X"
530
+ else "%08X"
531
+ end
532
+ char = "%x" + (fmt % u.ord)
533
+ if @options[:html]
534
+ if u.ord <= 0x20
535
+ char = %(<abbr title="#{ASCII_ESCAPE_NAMES[u.ord]}">#{@coder.encode char}</abbr>)
536
+ elsif u.ord <= 0x7F
537
+ char = %(<abbr title="ascii '#{u}'">#{@coder.encode char}</abbr>)
538
+ elsif u.ord == 0x7F
539
+ char = %(<abbr title="delete">#{@coder.encode char}</abbr>)
540
+ elsif u.ord <= 0xFF
541
+ char = %(<abbr title="extended ascii '#{u}'">#{char}</abbr>)
542
+ else
543
+ char = %(<abbr title="unicode '#{u}'">#{char}</abbr>)
544
+ end
545
+ %(<code class="grammar-char-escape">#{char}</code>)
546
+ else
547
+ char
548
+ end
549
+ end
550
+
551
+ ##
552
+ # ISO EBNF Formatters
553
+ ##
554
+
555
+ # Format the expression part of a rule
556
+ def format_isoebnf(expr, sep: nil, embedded: false)
557
+ return (@options[:html] ? %(<a href="#grammar-production-#{@coder.encode expr}">#{@coder.encode expr}</a>) : expr.to_s) if expr.is_a?(Symbol)
558
+ if expr.is_a?(String)
559
+ expr = expr[2..-1].hex.chr if expr =~ /\A#x\h+/
560
+ expr.chars.each do |c|
561
+ raise RangeError, "cannot format #{expr.inspect} as an ISO EBNF String: #{c.inspect} is out of range" unless
562
+ ISOEBNF::TERMINAL_CHARACTER.match?(c)
563
+ end
564
+ if expr =~ /"/
565
+ return (@options[:html] ? %('<code class="grammar-literal">#{@coder.encode expr}</code>') : %('#{expr}'))
566
+ else
567
+ return (@options[:html] ? %("<code class="grammar-literal">#{@coder.encode expr}</code>") : %("#{expr}"))
568
+ end
569
+ end
570
+ parts = {
571
+ alt: (@options[:html] ? "<code>|</code> " : "| "),
572
+ diff: (@options[:html] ? "<code>-</code> " : "- "),
573
+ }
574
+ lparen = (@options[:html] ? "<code>(</code> " : "(")
575
+ rparen = (@options[:html] ? "<code>)</code> " : ")")
576
+
577
+ case expr.first
578
+ when :istr
579
+ # Looses fidelity, but, oh well ...
580
+ format_isoebnf(expr.last, embedded: true)
581
+ when :alt, :diff
582
+ this_sep = (sep ? sep : " ") + parts[expr.first.to_sym]
583
+ res = expr[1..-1].map {|e| format_isoebnf(e, embedded: true)}.join(this_sep)
584
+ embedded ? (lparen + res + rparen) : res
585
+ when :opt
586
+ r = format_isoebnf(expr[1], embedded: true)
587
+ "[#{r}]"
588
+ when :star
589
+ r = format_isoebnf(expr[1], embedded: true)
590
+ "{#{r}}"
591
+ when :plus
592
+ r = format_isoebnf(expr[1], embedded: true)
593
+ "#{r}, {#{r}}"
594
+ when :hex
595
+ format_isoebnf(expr[1], embedded: true)
596
+ when :range
597
+ res = format_isoebnf_range(expr.last)
598
+ res.is_a?(Array) ?
599
+ format_isoebnf(res, embedded: true) :
600
+ res
601
+ when :seq
602
+ this_sep = "," + (sep ? sep : " ")
603
+ res = expr[1..-1].map do |e|
604
+ format_isoebnf(e, embedded: true)
605
+ end.join(this_sep)
606
+ embedded ? (lparen + res + rparen) : res
607
+ when :rept
608
+ # Expand repetition
609
+ min, max, value = expr[1..-1]
610
+ if min == 0 && max == 1
611
+ format_isoebnf([:opt, value], sep: sep, embedded: embedded)
612
+ elsif min == 0 && max == '*'
613
+ format_isoebnf([:star, value], sep: sep, embedded: embedded)
614
+ elsif min == 1 && max == '*'
615
+ format_isoebnf([:plus, value], sep: sep, embedded: embedded)
616
+ else
617
+ val2 = [:seq]
618
+ while min > 0
619
+ val2 << value
620
+ min -= 1
621
+ max -= 1 unless max == '*'
622
+ end
623
+ if max == '*'
624
+ val2 << [:star, value]
625
+ else
626
+ opt = nil
627
+ while max > 0
628
+ opt = [:opt, opt ? [:seq, value, opt] : value]
629
+ max -= 1
630
+ end
631
+ val2 << opt if opt
632
+ end
633
+ format_isoebnf(val2, sep: sep, embedded: embedded)
634
+ end
635
+ else
636
+ raise "Unknown operator: #{expr.first}"
637
+ end
638
+ end
639
+
640
+ # Format a range
641
+ # Range is formatted as a aliteration of characters
642
+ def format_isoebnf_range(string)
643
+ chars = []
644
+ o_dash = false
645
+
646
+ raise RangeError, "cannot format #{string.inspect} an ABNF range" if string.start_with?('^')
647
+
648
+ if string.end_with?('-')
649
+ o_dash = true
650
+ string = string[0..-2]
651
+ end
652
+
653
+ scanner = StringScanner.new(string)
654
+ in_range = false
655
+ # Build chars from different ranges/enums
656
+ while !scanner.eos?
657
+ char = if hex = scanner.scan(Terminals::HEX)
658
+ hex[2..-1].hex.ord.char(Encoding::UTF_8)
659
+ else scanner.scan(Terminals::R_CHAR)
660
+ end
661
+ raise RangeError, "cannot format #{string.inspect} as an ISO EBNF Aliteration: #{char.inspect} is out of range" unless
662
+ char && ISOEBNF::TERMINAL_CHARACTER.match?(char)
663
+
664
+ if in_range
665
+ # calculate characters from chars.last to this char
666
+ raise RangeError, "cannot format #{string.inspect} as an ISO EBNF Aliteration" unless chars.last < char
667
+ chars.concat (chars.last..char).to_a[1..-1]
668
+ in_range = false
669
+ else
670
+ chars << char
671
+ end
672
+
673
+ in_range = true if scanner.scan(/\-/)
674
+ end
675
+
676
+ chars << '-' if o_dash
677
+
678
+ # Possibly only a single character (no character?)
679
+ chars.length == 1 ? chars.last.inspect : chars.unshift(:alt)
680
+ end
681
+
682
+ ERB_DESC = %q(
683
+ <table class="grammar">
684
+ <tbody id="grammar-productions" class="<%= @format %>">
685
+ <% for rule in @rules %>
686
+ <tr<%= %{ id="grammar-production-#{rule.sym}"} unless %w(=/ |).include?(rule.assign)%>>
687
+ <% if rule.id %>
688
+ <td><%= rule.id %></td>
689
+ <% end %>
690
+ <td><code><%== rule.sym %></code></td>
691
+ <td><%= rule.assign %></td>
692
+ <td><%= rule.formatted %></td>
693
+ </tr>
694
+ <% end %>
695
+ </tbody>
696
+ </table>
224
697
  ).gsub(/^ /, '')
225
698
  end
226
699
  end