ebnf 1.2.0 → 2.1.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (56) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +223 -199
  3. data/UNLICENSE +1 -1
  4. data/VERSION +1 -1
  5. data/bin/ebnf +38 -19
  6. data/etc/abnf-core.ebnf +52 -0
  7. data/etc/abnf.abnf +121 -0
  8. data/etc/abnf.ebnf +124 -0
  9. data/etc/abnf.sxp +45 -0
  10. data/etc/doap.ttl +23 -18
  11. data/etc/ebnf.ebnf +21 -33
  12. data/etc/ebnf.html +76 -160
  13. data/etc/{ebnf.rb → ebnf.ll1.rb} +30 -107
  14. data/etc/ebnf.ll1.sxp +182 -183
  15. data/etc/ebnf.peg.rb +90 -0
  16. data/etc/ebnf.peg.sxp +84 -0
  17. data/etc/ebnf.sxp +40 -41
  18. data/etc/iso-ebnf.ebnf +140 -0
  19. data/etc/iso-ebnf.isoebnf +138 -0
  20. data/etc/iso-ebnf.sxp +65 -0
  21. data/etc/sparql.ebnf +4 -4
  22. data/etc/sparql.html +1603 -1751
  23. data/etc/sparql.ll1.sxp +7372 -7372
  24. data/etc/sparql.peg.rb +532 -0
  25. data/etc/sparql.peg.sxp +597 -0
  26. data/etc/sparql.sxp +363 -362
  27. data/etc/turtle.ebnf +3 -3
  28. data/etc/turtle.html +465 -517
  29. data/etc/{turtle.rb → turtle.ll1.rb} +3 -4
  30. data/etc/turtle.ll1.sxp +425 -425
  31. data/etc/turtle.peg.rb +182 -0
  32. data/etc/turtle.peg.sxp +199 -0
  33. data/etc/turtle.sxp +103 -101
  34. data/lib/ebnf.rb +6 -1
  35. data/lib/ebnf/abnf.rb +301 -0
  36. data/lib/ebnf/abnf/core.rb +23 -0
  37. data/lib/ebnf/abnf/meta.rb +111 -0
  38. data/lib/ebnf/base.rb +114 -69
  39. data/lib/ebnf/bnf.rb +1 -26
  40. data/lib/ebnf/ebnf/meta.rb +90 -0
  41. data/lib/ebnf/isoebnf.rb +229 -0
  42. data/lib/ebnf/isoebnf/meta.rb +75 -0
  43. data/lib/ebnf/ll1.rb +131 -3
  44. data/lib/ebnf/ll1/lexer.rb +20 -22
  45. data/lib/ebnf/ll1/parser.rb +97 -64
  46. data/lib/ebnf/ll1/scanner.rb +82 -50
  47. data/lib/ebnf/native.rb +320 -0
  48. data/lib/ebnf/parser.rb +285 -302
  49. data/lib/ebnf/peg.rb +39 -0
  50. data/lib/ebnf/peg/parser.rb +561 -0
  51. data/lib/ebnf/peg/rule.rb +250 -0
  52. data/lib/ebnf/rule.rb +442 -148
  53. data/lib/ebnf/terminals.rb +21 -0
  54. data/lib/ebnf/writer.rb +587 -82
  55. metadata +125 -18
  56. data/etc/sparql.rb +0 -45773
data/lib/ebnf/base.rb CHANGED
@@ -2,7 +2,7 @@ require 'strscan'
2
2
 
3
3
  # Extended Bakus-Nour Form (EBNF), being the W3C variation is
4
4
  # originaly defined in the
5
- # [W3C XML 1.0 Spec](http://www.w3.org/TR/REC-xml/#sec-notation).
5
+ # [W3C XML 1.0 Spec](https://www.w3.org/TR/REC-xml/#sec-notation).
6
6
  #
7
7
  # This version attempts to be less strict than the strict definition
8
8
  # to allow for coloquial variations (such as in the Turtle syntax).
@@ -12,8 +12,8 @@ require 'strscan'
12
12
  #
13
13
  # Comments include the content between '/*' and '*/'
14
14
  #
15
- # @see http://www.w3.org/2000/10/swap/grammar/ebnf2turtle.py
16
- # @see http://www.w3.org/2000/10/swap/grammar/ebnf2bnf.n3
15
+ # @see https://www.w3.org/2000/10/swap/grammar/ebnf2turtle.py
16
+ # @see https://www.w3.org/2000/10/swap/grammar/ebnf2bnf.n3
17
17
  #
18
18
  # Based on bnf2turtle by Dan Connolly.
19
19
  #
@@ -36,7 +36,7 @@ require 'strscan'
36
36
  # derived mechanically from the specification.
37
37
  #
38
38
  #
39
- # [N3 design note]: http://www.w3.org/DesignIssues/Notation3
39
+ # [N3 design note]: https://www.w3.org/DesignIssues/Notation3
40
40
  #
41
41
  # Related Work
42
42
  # ------------
@@ -59,31 +59,15 @@ require 'strscan'
59
59
  # expression of the grammar in terms of the higher level EBNF
60
60
  # constructs.
61
61
  #
62
- # [goal]: http://www.w3.org/2002/02/mid/1086902566.21030.1479.camel@dirk;list=public-cwm-bugs
63
- # [n3p announcement]: http://lists.w3.org/Archives/Public/public-cwm-talk/2004OctDec/0029.html
64
- # [Yacker]: http://www.w3.org/1999/02/26-modules/User/Yacker
65
- # [SPARQL specification]: http://www.w3.org/TR/rdf-sparql-query/
66
- # [Cwm Release 1.1.0rc1]: http://lists.w3.org/Archives/Public/public-cwm-announce/2005JulSep/0000.html
67
- # [bnf-rules.n3]: http://www.w3.org/2000/10/swap/grammar/bnf-rules.n3
62
+ # [goal]: https://www.w3.org/2002/02/mid/1086902566.21030.1479.camel@dirk;list=public-cwm-bugs
63
+ # [n3p announcement]: https://lists.w3.org/Archives/Public/public-cwm-talk/2004OctDec/0029.html
64
+ # [Yacker]: https://rubygems/02/26-modules/User/Yacker
65
+ # [SPARQL specification]: https://www.w3.org/TR/rdf-sparql-query/
66
+ # [Cwm Release 1.1.0rc1]: https://lists.w3.org/Archives/Public/public-cwm-announce/2005JulSep/0000.html
67
+ # [bnf-rules.n3]: https://www.w3.org/2000/10/swap/grammar/bnf-rules.n3
68
68
  #
69
- # Open Issues and Future Work
70
- # ---------------------------
71
- #
72
- # The yacker output also has the terminals compiled to elaborate regular
73
- # expressions. The best strategy for dealing with lexical tokens is not
74
- # yet clear. Many tokens in SPARQL are case insensitive; this is not yet
75
- # captured formally.
76
- #
77
- # The schema for the EBNF vocabulary used here (``g:seq``, ``g:alt``, ...)
78
- # is not yet published; it should be aligned with [swap/grammar/bnf][]
79
- # and the [bnf2html.n3][] rules (and/or the style of linked XHTML grammar
80
- # in the SPARQL and XML specificiations).
81
- #
82
- # It would be interesting to corroborate the claim in the SPARQL spec
83
- # that the grammar is LL(1) with a mechanical proof based on N3 rules.
84
- #
85
- # [swap/grammar/bnf]: http://www.w3.org/2000/10/swap/grammar/bnf
86
- # [bnf2html.n3]: http://www.w3.org/2000/10/swap/grammar/bnf2html.n3
69
+ # [swap/grammar/bnf]: https://www.w3.org/2000/10/swap/grammar/bnf
70
+ # [bnf2html.n3]: https://www.w3.org/2000/10/swap/grammar/bnf2html.n3
87
71
  #
88
72
  # Background
89
73
  # ----------
@@ -93,14 +77,15 @@ require 'strscan'
93
77
  # of N3 that maps directly to (and from) the standard XML syntax for
94
78
  # RDF.
95
79
  #
96
- # [N3 Primer]: http://www.w3.org/2000/10/swap/Primer.html
80
+ # [N3 Primer]: https://www.w3.org/2000/10/swap/Primer.html
97
81
  #
98
82
  # @author Gregg Kellogg
99
83
  module EBNF
100
84
  class Base
101
85
  include BNF
102
86
  include LL1
103
- include Parser
87
+ include Native
88
+ include PEG
104
89
 
105
90
  # Abstract syntax tree from parse
106
91
  #
@@ -116,24 +101,33 @@ module EBNF
116
101
  # in S-Expressions (similar to SPARQL SSE)
117
102
  #
118
103
  # @param [#read, #to_s] input
104
+ # @param [Symbol] format (:ebnf)
105
+ # Format of input, one of `:abnf`, `:ebnf`, `:isoebnf`, `:isoebnf`, `:native`, or `:sxp`.
106
+ # Use `:native` for the native EBNF parser, rather than the PEG parser.
119
107
  # @param [Hash{Symbol => Object}] options
120
- # @param [Symbol] :format (:ebnf)
121
- # Format of input, one of :ebnf, or :sxp
122
108
  # @option options [Boolean, Array] :debug
123
109
  # Output debug information to an array or $stdout.
110
+ # @option options [Boolean, Array] :validate
111
+ # Validate resulting grammar.
124
112
  def initialize(input, format: :ebnf, **options)
125
113
  @options = options.dup
126
114
  @lineno, @depth, @errors = 1, 0, []
127
- terminal = false
128
115
  @ast = []
129
116
 
130
117
  input = input.respond_to?(:read) ? input.read : input.to_s
131
118
 
132
119
  case format
133
- when :sxp
134
- require 'sxp' unless defined?(SXP)
135
- @ast = SXP::Reader::Basic.read(input).map {|e| Rule.from_sxp(e)}
120
+ when :abnf
121
+ abnf = ABNF.new(input, **options)
122
+ @ast = abnf.ast
136
123
  when :ebnf
124
+ ebnf = Parser.new(input, **options)
125
+ @ast = ebnf.ast
126
+ when :isoebnf
127
+ iso = ISOEBNF.new(input, **options)
128
+ @ast = iso.ast
129
+ when :native
130
+ terminals = false
137
131
  scanner = StringScanner.new(input)
138
132
 
139
133
  eachRule(scanner) do |r|
@@ -141,7 +135,9 @@ module EBNF
141
135
  case r
142
136
  when /^@terminals/
143
137
  # Switch mode to parsing terminals
144
- terminal = true
138
+ terminals = true
139
+ rule = Rule.new(nil, nil, nil, kind: :terminals, ebnf: self)
140
+ @ast << rule
145
141
  when /^@pass\s*(.*)$/m
146
142
  expr = expression($1).first
147
143
  rule = Rule.new(nil, nil, expr, kind: :pass, ebnf: self)
@@ -150,14 +146,49 @@ module EBNF
150
146
  else
151
147
  rule = depth {ruleParts(r)}
152
148
 
153
- rule.kind = :terminal if terminal # Override after we've parsed @terminals
149
+ rule.kind = :terminal if terminals # Override after we've parsed @terminals
154
150
  rule.orig = r
155
151
  @ast << rule
156
152
  end
157
153
  end
154
+ when :sxp
155
+ require 'sxp' unless defined?(SXP)
156
+ @ast = SXP::Reader::Basic.read(input).map {|e| Rule.from_sxp(e)}
158
157
  else
159
158
  raise "unknown input format #{format.inspect}"
160
159
  end
160
+
161
+ validate! if @options[:validate]
162
+ end
163
+
164
+ ##
165
+ # Validate the grammar.
166
+ #
167
+ # Makes sure that rules reference either strings or other defined rules.
168
+ #
169
+ # @raise [RangeError]
170
+ def validate!
171
+ ast.each do |rule|
172
+ begin
173
+ rule.validate!(@ast)
174
+ rescue SyntaxError => e
175
+ error("In rule #{rule.sym}: #{e.message}")
176
+ end
177
+ end
178
+ raise SyntaxError, errors.join("\n") unless errors.empty?
179
+ end
180
+
181
+ ##
182
+ # Is the grammar valid?
183
+ #
184
+ # Uses `#validate!` and catches `RangeError`
185
+ #
186
+ # @return [Boolean]
187
+ def valid?
188
+ validate!
189
+ true
190
+ rescue SyntaxError
191
+ false
161
192
  end
162
193
 
163
194
  # Iterate over each rule or terminal, except empty
@@ -173,64 +204,63 @@ module EBNF
173
204
  # @return [String]
174
205
  def to_sxp
175
206
  require 'sxp' unless defined?(SXP)
176
- SXP::Generator.string(ast.sort_by{|r| r.id.to_f}.map(&:for_sxp))
207
+ SXP::Generator.string(ast.map(&:for_sxp))
177
208
  end
178
209
 
179
210
  ##
180
211
  # Output formatted EBNF
212
+ #
213
+ # @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
181
214
  # @return [String]
182
- def to_s
183
- Writer.string(*ast)
215
+ def to_s(format: :ebnf)
216
+ Writer.string(*ast, format: format)
184
217
  end
185
218
 
186
219
  ##
187
220
  # Output formatted EBNF as HTML
221
+ #
222
+ # @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
223
+ # @param [Boolean] validate (false) validate generated HTML.
188
224
  # @return [String]
189
- def to_html
190
- Writer.html(*ast)
225
+ def to_html(format: :ebnf, validate: false)
226
+ Writer.html(*ast, format: format, validate: validate)
191
227
  end
192
228
 
193
229
  ##
194
230
  # Output Ruby parser files
195
231
  #
196
232
  # @param [IO, StringIO] output
197
- # @param [String] :grammarFile
198
- # @param [String] :mod_name ('Branch')
199
- def to_ruby(output = $stdout, grammarFile: nil, mod_name: 'Branch')
233
+ # @param [String] grammarFile
234
+ # @param [String] mod_name ('Meta')
235
+ def to_ruby(output = $stdout, grammarFile: nil, mod_name: 'Meta', **options)
200
236
  unless output == $stdout
201
- output.puts "# This file is automatically generated by #{__FILE__}"
202
- output.puts "# BRANCH derived from #{grammarFile}" if grammarFile
237
+ output.puts "# This file is automatically generated by ebnf version #{EBNF::VERSION}"
238
+ output.puts "# Derived from #{grammarFile}" if grammarFile
203
239
  unless self.errors.empty?
204
- output.puts "# Note, tables completed with errors, may need to be resolved manually:"
240
+ output.puts "# Note, grammar has errors, may need to be resolved manually:"
205
241
  #output.puts "# #{pp.conflicts.map{|c| c.join("\n# ")}.join("\n# ")}"
206
242
  end
207
243
  output.puts "module #{mod_name}"
208
- output.puts " START = #{self.start.inspect}"
209
- output.puts
244
+ output.puts " START = #{self.start.inspect}\n" if self.start
245
+ end
246
+
247
+ # Either output LL(1) BRANCH tables or rules for PEG parsing
248
+ if ast.first.first
249
+ to_ruby_ll1(output)
250
+ else
251
+ to_ruby_peg(output)
210
252
  end
211
- self.outputTable(output, 'BRANCH', self.branch, 1)
212
- self.outputTable(output, 'TERMINALS', self.terminals, 1)
213
- self.outputTable(output, 'FIRST', self.first, 1)
214
- self.outputTable(output, 'FOLLOW', self.follow, 1)
215
- self.outputTable(output, 'CLEANUP', self.cleanup, 1)
216
- self.outputTable(output, 'PASS', [self.pass], 1) if self.pass
217
253
  unless output == $stdout
218
254
  output.puts "end"
219
255
  end
220
256
  end
221
257
 
222
- def dup
223
- new_obj = super
224
- new_obj.instance_variable_set(:@ast, @ast.dup)
225
- new_obj
226
- end
227
-
228
258
  ##
229
- # Find a rule given a symbol
230
- # @param [Symbol] sym
231
- # @return [Rule]
232
- def find_rule(sym)
233
- (@find ||= {})[sym] ||= ast.detect {|r| r.sym == sym}
259
+ # Renumber, rule identifiers
260
+ def renumber!
261
+ ast.each_with_index do |rule, index|
262
+ rule.id = (index + 1).to_s
263
+ end
234
264
  end
235
265
 
236
266
  ##
@@ -241,6 +271,7 @@ module EBNF
241
271
  def to_ttl(prefix = nil, ns = "http://example.org/")
242
272
  unless ast.empty?
243
273
  [
274
+ "@prefix dc: <http://purl.org/dc/terms/>.",
244
275
  "@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>.",
245
276
  "@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>.",
246
277
  ("@prefix #{prefix}: <#{ns}>." if prefix),
@@ -253,7 +284,21 @@ module EBNF
253
284
  ].compact
254
285
  end.join("\n") +
255
286
 
256
- ast.sort.map(&:to_ttl).join("\n")
287
+ ast.map(&:to_ttl).join("\n")
288
+ end
289
+
290
+ def dup
291
+ new_obj = super
292
+ new_obj.instance_variable_set(:@ast, @ast.dup)
293
+ new_obj
294
+ end
295
+
296
+ ##
297
+ # Find a rule given a symbol
298
+ # @param [Symbol] sym
299
+ # @return [Rule]
300
+ def find_rule(sym)
301
+ (@find ||= {})[sym] ||= ast.detect {|r| r.sym == sym}
257
302
  end
258
303
 
259
304
  def depth
data/lib/ebnf/bnf.rb CHANGED
@@ -17,32 +17,7 @@ module EBNF
17
17
  new_ast += new_rules
18
18
  end
19
19
 
20
- # Consolodate equivalent terminal rules
21
- to_rewrite = {}
22
- new_ast.select {|r| r.terminal?}.each do |src_rule|
23
- new_ast.select {|r| r.terminal?}.each do |dst_rule|
24
- if src_rule.equivalent?(dst_rule) && src_rule != dst_rule
25
- debug("make_bnf") {"equivalent rules: #{src_rule.inspect} and #{dst_rule.inspect}"}
26
- (to_rewrite[src_rule] ||= []) << dst_rule
27
- end
28
- end
29
- end
30
-
31
- # Replace references to equivalent rules with canonical rule
32
- to_rewrite.each do |src_rule, dst_rules|
33
- dst_rules.each do |dst_rule|
34
- new_ast.each do |mod_rule|
35
- debug("make_bnf") {"rewrite #{mod_rule.inspect} from #{dst_rule.sym} to #{src_rule.sym}"}
36
- mod_rule.rewrite(dst_rule, src_rule)
37
- end
38
- end
39
- end
40
-
41
- # AST now has just rewritten rules
42
- compacted_ast = new_ast - to_rewrite.values.flatten.compact
43
-
44
- # Sort AST by number
45
- @ast = compacted_ast
20
+ @ast = new_ast
46
21
  progress("make_bnf") {"End: #{@ast.length} rules"}
47
22
  self
48
23
  end
@@ -0,0 +1,90 @@
1
+ # This file is automatically generated by ebnf version 2.0.0
2
+ # Derived from etc/ebnf.ebnf
3
+ module EBNFMeta
4
+ RULES = [
5
+ EBNF::Rule.new(:ebnf, "1", [:star, :_ebnf_1]).extend(EBNF::PEG::Rule),
6
+ EBNF::Rule.new(:_ebnf_1, "1.1", [:alt, :declaration, :rule]).extend(EBNF::PEG::Rule),
7
+ EBNF::Rule.new(:declaration, "2", [:alt, "@terminals", :pass]).extend(EBNF::PEG::Rule),
8
+ EBNF::Rule.new(:rule, "3", [:seq, :LHS, :expression]).extend(EBNF::PEG::Rule),
9
+ EBNF::Rule.new(:expression, "4", [:seq, :alt]).extend(EBNF::PEG::Rule),
10
+ EBNF::Rule.new(:alt, "5", [:seq, :seq, :_alt_1]).extend(EBNF::PEG::Rule),
11
+ EBNF::Rule.new(:_alt_1, "5.1", [:star, :_alt_2]).extend(EBNF::PEG::Rule),
12
+ EBNF::Rule.new(:_alt_2, "5.2", [:seq, "|", :seq]).extend(EBNF::PEG::Rule),
13
+ EBNF::Rule.new(:seq, "6", [:plus, :diff]).extend(EBNF::PEG::Rule),
14
+ EBNF::Rule.new(:diff, "7", [:seq, :postfix, :_diff_1]).extend(EBNF::PEG::Rule),
15
+ EBNF::Rule.new(:_diff_1, "7.1", [:opt, :_diff_2]).extend(EBNF::PEG::Rule),
16
+ EBNF::Rule.new(:_diff_2, "7.2", [:seq, "-", :postfix]).extend(EBNF::PEG::Rule),
17
+ EBNF::Rule.new(:postfix, "8", [:seq, :primary, :_postfix_1]).extend(EBNF::PEG::Rule),
18
+ EBNF::Rule.new(:_postfix_1, "8.1", [:opt, :POSTFIX]).extend(EBNF::PEG::Rule),
19
+ EBNF::Rule.new(:primary, "9", [:alt, :HEX, :SYMBOL, :O_RANGE, :RANGE, :STRING1, :STRING2, :_primary_1]).extend(EBNF::PEG::Rule),
20
+ EBNF::Rule.new(:_primary_1, "9.1", [:seq, "(", :expression, ")"]).extend(EBNF::PEG::Rule),
21
+ EBNF::Rule.new(:pass, "10", [:seq, "@pass", :expression]).extend(EBNF::PEG::Rule),
22
+ EBNF::Rule.new(:_terminals, nil, [:seq], kind: :terminals).extend(EBNF::PEG::Rule),
23
+ EBNF::Rule.new(:LHS, "11", [:seq, :_LHS_1, :SYMBOL, :_LHS_2, "::="], kind: :terminal).extend(EBNF::PEG::Rule),
24
+ EBNF::Rule.new(:_LHS_1, "11.1", [:opt, :_LHS_3], kind: :terminal).extend(EBNF::PEG::Rule),
25
+ EBNF::Rule.new(:_LHS_3, "11.3", [:seq, "[", :SYMBOL, "]", :_LHS_4], kind: :terminal).extend(EBNF::PEG::Rule),
26
+ EBNF::Rule.new(:_LHS_4, "11.4", [:plus, " "], kind: :terminal).extend(EBNF::PEG::Rule),
27
+ EBNF::Rule.new(:_LHS_2, "11.2", [:star, " "], kind: :terminal).extend(EBNF::PEG::Rule),
28
+ EBNF::Rule.new(:SYMBOL, "12", [:plus, :_SYMBOL_1], kind: :terminal).extend(EBNF::PEG::Rule),
29
+ EBNF::Rule.new(:_SYMBOL_1, "12.1", [:alt, :_SYMBOL_2, :_SYMBOL_3, :_SYMBOL_4, "_", "."], kind: :terminal).extend(EBNF::PEG::Rule),
30
+ EBNF::Rule.new(:_SYMBOL_2, "12.2", [:range, "a-z"], kind: :terminal).extend(EBNF::PEG::Rule),
31
+ EBNF::Rule.new(:_SYMBOL_3, "12.3", [:range, "A-Z"], kind: :terminal).extend(EBNF::PEG::Rule),
32
+ EBNF::Rule.new(:_SYMBOL_4, "12.4", [:range, "0-9"], kind: :terminal).extend(EBNF::PEG::Rule),
33
+ EBNF::Rule.new(:HEX, "13", [:seq, "#x", :_HEX_1], kind: :terminal).extend(EBNF::PEG::Rule),
34
+ EBNF::Rule.new(:_HEX_1, "13.1", [:plus, :_HEX_2], kind: :terminal).extend(EBNF::PEG::Rule),
35
+ EBNF::Rule.new(:_HEX_2, "13.2", [:alt, :_HEX_3, :_HEX_4, :_HEX_5], kind: :terminal).extend(EBNF::PEG::Rule),
36
+ EBNF::Rule.new(:_HEX_3, "13.3", [:range, "a-f"], kind: :terminal).extend(EBNF::PEG::Rule),
37
+ EBNF::Rule.new(:_HEX_4, "13.4", [:range, "A-F"], kind: :terminal).extend(EBNF::PEG::Rule),
38
+ EBNF::Rule.new(:_HEX_5, "13.5", [:range, "0-9"], kind: :terminal).extend(EBNF::PEG::Rule),
39
+ EBNF::Rule.new(:RANGE, "14", [:seq, "[", :_RANGE_1, :_RANGE_2, :_RANGE_3], kind: :terminal).extend(EBNF::PEG::Rule),
40
+ EBNF::Rule.new(:_RANGE_1, "14.1", [:plus, :_RANGE_4], kind: :terminal).extend(EBNF::PEG::Rule),
41
+ EBNF::Rule.new(:_RANGE_4, "14.4", [:alt, :_RANGE_5, :_RANGE_6, :R_CHAR, :HEX], kind: :terminal).extend(EBNF::PEG::Rule),
42
+ EBNF::Rule.new(:_RANGE_5, "14.5", [:seq, :R_CHAR, "-", :R_CHAR], kind: :terminal).extend(EBNF::PEG::Rule),
43
+ EBNF::Rule.new(:_RANGE_6, "14.6", [:seq, :HEX, "-", :HEX], kind: :terminal).extend(EBNF::PEG::Rule),
44
+ EBNF::Rule.new(:_RANGE_2, "14.2", [:opt, "-"], kind: :terminal).extend(EBNF::PEG::Rule),
45
+ EBNF::Rule.new(:_RANGE_3, "14.3", [:diff, "]", :LHS], kind: :terminal).extend(EBNF::PEG::Rule),
46
+ EBNF::Rule.new(:O_RANGE, "15", [:seq, "[^", :_O_RANGE_1, :_O_RANGE_2, "]"], kind: :terminal).extend(EBNF::PEG::Rule),
47
+ EBNF::Rule.new(:_O_RANGE_1, "15.1", [:plus, :_O_RANGE_3], kind: :terminal).extend(EBNF::PEG::Rule),
48
+ EBNF::Rule.new(:_O_RANGE_3, "15.3", [:alt, :_O_RANGE_4, :_O_RANGE_5, :R_CHAR, :HEX], kind: :terminal).extend(EBNF::PEG::Rule),
49
+ EBNF::Rule.new(:_O_RANGE_4, "15.4", [:seq, :R_CHAR, "-", :R_CHAR], kind: :terminal).extend(EBNF::PEG::Rule),
50
+ EBNF::Rule.new(:_O_RANGE_5, "15.5", [:seq, :HEX, "-", :HEX], kind: :terminal).extend(EBNF::PEG::Rule),
51
+ EBNF::Rule.new(:_O_RANGE_2, "15.2", [:opt, "-"], kind: :terminal).extend(EBNF::PEG::Rule),
52
+ EBNF::Rule.new(:STRING1, "16", [:seq, "\"", :_STRING1_1, "\""], kind: :terminal).extend(EBNF::PEG::Rule),
53
+ EBNF::Rule.new(:_STRING1_1, "16.1", [:star, :_STRING1_2], kind: :terminal).extend(EBNF::PEG::Rule),
54
+ EBNF::Rule.new(:_STRING1_2, "16.2", [:diff, :CHAR, "\""], kind: :terminal).extend(EBNF::PEG::Rule),
55
+ EBNF::Rule.new(:STRING2, "17", [:seq, "'", :_STRING2_1, "'"], kind: :terminal).extend(EBNF::PEG::Rule),
56
+ EBNF::Rule.new(:_STRING2_1, "17.1", [:star, :_STRING2_2], kind: :terminal).extend(EBNF::PEG::Rule),
57
+ EBNF::Rule.new(:_STRING2_2, "17.2", [:diff, :CHAR, "'"], kind: :terminal).extend(EBNF::PEG::Rule),
58
+ EBNF::Rule.new(:CHAR, "18", [:alt, :_CHAR_1, :_CHAR_2, :_CHAR_3, :_CHAR_4], kind: :terminal).extend(EBNF::PEG::Rule),
59
+ EBNF::Rule.new(:_CHAR_1, "18.1", [:range, "#x9#xA#xD"], kind: :terminal).extend(EBNF::PEG::Rule),
60
+ EBNF::Rule.new(:_CHAR_2, "18.2", [:range, "#x20-#xD7FF"], kind: :terminal).extend(EBNF::PEG::Rule),
61
+ EBNF::Rule.new(:_CHAR_3, "18.3", [:range, "#xE000-#xFFFD"], kind: :terminal).extend(EBNF::PEG::Rule),
62
+ EBNF::Rule.new(:_CHAR_4, "18.4", [:range, "#x10000-#x10FFFF"], kind: :terminal).extend(EBNF::PEG::Rule),
63
+ EBNF::Rule.new(:R_CHAR, "19", [:diff, :CHAR, :_R_CHAR_1], kind: :terminal).extend(EBNF::PEG::Rule),
64
+ EBNF::Rule.new(:_R_CHAR_1, "19.1", [:alt, "]", "-", :HEX], kind: :terminal).extend(EBNF::PEG::Rule),
65
+ EBNF::Rule.new(:POSTFIX, "20", [:range, "?*+"], kind: :terminal).extend(EBNF::PEG::Rule),
66
+ EBNF::Rule.new(:PASS, "21", [:alt, :_PASS_1, :_PASS_2, :_PASS_3, :_PASS_4], kind: :terminal).extend(EBNF::PEG::Rule),
67
+ EBNF::Rule.new(:_PASS_1, "21.1", [:range, "#x9#xA#xD#x20"], kind: :terminal).extend(EBNF::PEG::Rule),
68
+ EBNF::Rule.new(:_PASS_2, "21.2", [:seq, :_PASS_5, :_PASS_6], kind: :terminal).extend(EBNF::PEG::Rule),
69
+ EBNF::Rule.new(:_PASS_5, "21.5", [:alt, :_PASS_7, "//"], kind: :terminal).extend(EBNF::PEG::Rule),
70
+ EBNF::Rule.new(:_PASS_7, "21.7", [:diff, "#", "#x"], kind: :terminal).extend(EBNF::PEG::Rule),
71
+ EBNF::Rule.new(:_PASS_6, "21.6", [:star, :_PASS_8], kind: :terminal).extend(EBNF::PEG::Rule),
72
+ EBNF::Rule.new(:_PASS_8, "21.8", [:range, "^#xA#xD"], kind: :terminal).extend(EBNF::PEG::Rule),
73
+ EBNF::Rule.new(:_PASS_3, "21.3", [:seq, "/*", :_PASS_9, "*/"], kind: :terminal).extend(EBNF::PEG::Rule),
74
+ EBNF::Rule.new(:_PASS_9, "21.9", [:star, :_PASS_10], kind: :terminal).extend(EBNF::PEG::Rule),
75
+ EBNF::Rule.new(:_PASS_10, "21.10", [:alt, :_PASS_11, :_PASS_12], kind: :terminal).extend(EBNF::PEG::Rule),
76
+ EBNF::Rule.new(:_PASS_11, "21.11", [:opt, :_PASS_13], kind: :terminal).extend(EBNF::PEG::Rule),
77
+ EBNF::Rule.new(:_PASS_13, "21.13", [:seq, "*", :_PASS_14], kind: :terminal).extend(EBNF::PEG::Rule),
78
+ EBNF::Rule.new(:_PASS_14, "21.14", [:range, "^/"], kind: :terminal).extend(EBNF::PEG::Rule),
79
+ EBNF::Rule.new(:_PASS_12, "21.12", [:range, "^*"], kind: :terminal).extend(EBNF::PEG::Rule),
80
+ EBNF::Rule.new(:_PASS_4, "21.4", [:seq, "(*", :_PASS_15, "*)"], kind: :terminal).extend(EBNF::PEG::Rule),
81
+ EBNF::Rule.new(:_PASS_15, "21.15", [:star, :_PASS_16], kind: :terminal).extend(EBNF::PEG::Rule),
82
+ EBNF::Rule.new(:_PASS_16, "21.16", [:alt, :_PASS_17, :_PASS_18], kind: :terminal).extend(EBNF::PEG::Rule),
83
+ EBNF::Rule.new(:_PASS_17, "21.17", [:opt, :_PASS_19], kind: :terminal).extend(EBNF::PEG::Rule),
84
+ EBNF::Rule.new(:_PASS_19, "21.19", [:seq, "*", :_PASS_20], kind: :terminal).extend(EBNF::PEG::Rule),
85
+ EBNF::Rule.new(:_PASS_20, "21.20", [:range, "^)"], kind: :terminal).extend(EBNF::PEG::Rule),
86
+ EBNF::Rule.new(:_PASS_18, "21.18", [:range, "^*"], kind: :terminal).extend(EBNF::PEG::Rule),
87
+ EBNF::Rule.new(:_pass, nil, [:seq, :PASS], kind: :pass).extend(EBNF::PEG::Rule),
88
+ ]
89
+ end
90
+
@@ -0,0 +1,229 @@
1
+ require_relative 'isoebnf/meta'
2
+ require 'logger'
3
+
4
+ # ISO EBNF parser
5
+ # Parses ISO EBNF into an array of {EBNF::Rule}.
6
+ module EBNF
7
+ class ISOEBNF
8
+ include EBNF::PEG::Parser
9
+
10
+ # The base for terminal-character, which omits "'", '"', and '?'.
11
+ # Could be more optimized, and one might quible
12
+ # with the overly-strictly defined character set,
13
+ # but it is correct.
14
+ TERMINAL_CHARACTER_BASE = %r{
15
+ [a-zA-Z0-9] | # letter | decimal digit
16
+ , | # concatenate symbol
17
+ = | # defining symbol
18
+ [\|\/!] | # definition separator symbol
19
+ \*\) | # end comment symbol
20
+ \) | # end group symbol
21
+ \] | # end option symbol
22
+ \} | # end repeat symbol
23
+ \- | # except symbol
24
+ #\' | # first quote symbol
25
+ \* | # repetition symbol
26
+ #\" | # second quote symbol
27
+ #\? | # special sequence symbol
28
+ \(\* | # start comment symbol
29
+ \( | # start group symbol
30
+ \[ | # start option symbol
31
+ \{ | # start repeat symbol
32
+ [;\.] | # terminator symbol
33
+ [:+_%@&$<>^\x20\x23\\`~] # other character
34
+ }x
35
+
36
+ TERMINAL_CHARACTER = %r{#{TERMINAL_CHARACTER_BASE}|['"\?]}
37
+ FIRST_TERMINAL_CHARACTER = %r{#{TERMINAL_CHARACTER_BASE}|["\?]}
38
+ SECOND_TERMINAL_CHARACTER = %r{#{TERMINAL_CHARACTER_BASE}|['\?]}
39
+ SPECIAL_SEQUENCE_CHARACTER = %r{#{TERMINAL_CHARACTER_BASE}|['"]}
40
+
41
+ # Abstract syntax tree from parse
42
+ #
43
+ # @return [Array<EBNF::Rule>]
44
+ attr_reader :ast
45
+
46
+ # `[14] integer ::= decimal_digit+`
47
+ terminal(:integer, /\d+/) do |value, prod|
48
+ value.to_i
49
+ end
50
+
51
+ # `[15] meta_identifier ::= letter meta_identifier_character*`
52
+ terminal(:meta_identifier, /[a-zA-Z][a-zA-Z0-9_]*/) do |value|
53
+ value.to_sym
54
+ end
55
+
56
+ # `[17] terminal_string ::= ("'" first_terminal_character+ "'")`
57
+ # ` | ('"' second_terminal_character+ '"')`
58
+ terminal(:terminal_string, /(?:'#{FIRST_TERMINAL_CHARACTER}+')|(?:"#{SECOND_TERMINAL_CHARACTER}+")/x) do |value|
59
+ value[1..-2]
60
+ end
61
+
62
+ # `[20] special_sequence ::= '?' special_sequence_character* '?'`
63
+ terminal(:special_sequence, /\?#{SPECIAL_SEQUENCE_CHARACTER}+\?/)
64
+
65
+ # `[22] terminal_character ::= [a-zA-Z0-9]`
66
+ # ` | [,=;*}#x2d?([{;]`
67
+ # ` | '*)'`
68
+ # ` | '(*'`
69
+ # ` | ']'`
70
+ # ` | other_character`
71
+ terminal(:terminal_character, TERMINAL_CHARACTER)
72
+
73
+ # `[25] empty ::= ''`
74
+ terminal(:empty, //)
75
+
76
+ # `[26] definition_separator_symbol ::= '|' | '/' | '!'`
77
+ terminal(:definition_separator_symbol, /[\|\/!]/)
78
+
79
+ # `[27] terminator_symbol ::= ';' | '.'`
80
+ terminal(:terminator_symbol, /[;\.]/)
81
+
82
+ # `[28] start_option_symbol ::= '['
83
+ terminal(:start_option_symbol, /\[|(?:\(\/)/)
84
+
85
+ # `[29] end_option_symbol ::= ']'`
86
+ terminal(:end_option_symbol, /\]/)
87
+
88
+ # `[30] start_repeat_symbol ::= '{' | '(:'`
89
+ terminal(:start_repeat_symbol, /{|\(:/)
90
+
91
+ # `[31] end_repeat_symbol ::= '}' | ':)'`
92
+ terminal(:end_repeat_symbol, /}|:\)/)
93
+
94
+ # ## Non-terminal productions
95
+
96
+ # `[2] syntax_rule ::= meta_identifier '=' definitions_list terminator_symbol`
97
+ production(:syntax_rule, clear_packrat: true) do |value, data, callback|
98
+ # value contains an expression.
99
+ # Invoke callback
100
+ sym = value[0][:meta_identifier]
101
+ definitions_list = value[2][:definitions_list]
102
+ callback.call(:rule, EBNF::Rule.new(sym.to_sym, nil, definitions_list))
103
+ nil
104
+ end
105
+
106
+ # Setting `as_hash: true` in the start production makes the value of the form of a hash, rather than an array of hashes.
107
+ #
108
+ # `[3] definitions_list ::= single_definition (definition_separator_symbol definitions_list)*`
109
+ start_production(:definitions_list, as_hash: true)
110
+ production(:definitions_list) do |value|
111
+ if value[:_definitions_list_1].length > 0
112
+ [:alt, value[:single_definition]] + value[:_definitions_list_1]
113
+ else
114
+ value[:single_definition]
115
+ end
116
+ end
117
+ production(:_definitions_list_1) do |value|
118
+ Array(value.first)
119
+ end
120
+ start_production(:_definitions_list_2, as_hash: true)
121
+ production(:_definitions_list_2) do |value|
122
+ if Array(value[:definitions_list]).first == :alt
123
+ value[:definitions_list][1..-1]
124
+ else
125
+ [value[:definitions_list]]
126
+ end
127
+ end
128
+
129
+ # `[4] single_definition ::= term (',' term)*`
130
+ start_production(:single_definition, as_hash: true)
131
+ production(:single_definition) do |value|
132
+ if value[:_single_definition_1].length > 0
133
+ [:seq, value[:term]] + value[:_single_definition_1]
134
+ else
135
+ value[:term]
136
+ end
137
+ end
138
+ production(:_single_definition_1) do |value|
139
+ value.map {|a1| a1.last[:term]}.compact # Get rid of '|'
140
+ end
141
+
142
+ # `[5] term ::= factor ('-' exception)?`
143
+ start_production(:term, as_hash: true)
144
+ production(:term) do |value|
145
+ if value[:_term_1]
146
+ [:diff, value[:factor], value[:_term_1]]
147
+ else
148
+ value[:factor]
149
+ end
150
+ end
151
+ production(:_term_1) do |value|
152
+ value.last[:exception] if value
153
+ end
154
+
155
+ # `[6] exception ::= factor`
156
+ start_production(:exception, as_hash: true)
157
+ production(:exception) do |value|
158
+ value[:factor]
159
+ end
160
+
161
+ # `[7] factor ::= (integer '*')? primary`
162
+ start_production(:factor, as_hash: true)
163
+ production(:factor) do |value|
164
+ if value[:_factor_1]
165
+ [:rept, value[:_factor_1], value[:_factor_1], value[:primary]]
166
+ else
167
+ value[:primary]
168
+ end
169
+ end
170
+ production(:_factor_2) do |value|
171
+ value.first[:integer]
172
+ end
173
+
174
+ # `[9] optional_sequence ::= start_option_symbol definitions_list end_option_symbol`
175
+ production(:optional_sequence) do |value|
176
+ [:opt, value[1][:definitions_list]]
177
+ end
178
+
179
+ # `[10] repeated_sequence ::= start_repeat_symbol definitions_list end_repeat_symbol`
180
+ production(:repeated_sequence) do |value|
181
+ [:star, value[1][:definitions_list]]
182
+ end
183
+
184
+ # `[11] grouped_sequence ::= '(' definitions_list ')'`
185
+ production(:grouped_sequence) do |value|
186
+ [:seq, value[1][:definitions_list]]
187
+ end
188
+
189
+ # ## Parser invocation.
190
+ # On start, yield ourselves if a block is given, otherwise, return this parser instance
191
+ #
192
+ # @param [#read, #to_s] input
193
+ # @param [Hash{Symbol => Object}] options
194
+ # @option options [Boolean] :level
195
+ # Trace level. 0(debug), 1(info), 2(warn), 3(error).
196
+ # @return [EBNFParser]
197
+ def initialize(input, **options, &block)
198
+ # If the `level` option is set, instantiate a logger for collecting trace information.
199
+ if options.has_key?(:level)
200
+ options[:logger] = Logger.new(STDERR)
201
+ options[:logger].level = options[:level]
202
+ options[:logger].formatter = lambda {|severity, datetime, progname, msg| "#{severity} #{msg}\n"}
203
+ end
204
+
205
+ # Read input, if necessary, which will be used in a Scanner.
206
+ @input = input.respond_to?(:read) ? input.read : input.to_s
207
+
208
+ parsing_terminals = false
209
+ @ast = []
210
+ parse(@input,
211
+ :syntax,
212
+ ISOEBNFMeta::RULES,
213
+ whitespace: %r{([\x09-\x0d\x20]|(?:\(\*(?:(?:\*[^\)])|[^*])*\*\)))+},
214
+ **options
215
+ ) do |context, *data|
216
+ rule = case context
217
+ when :rule
218
+ # A rule which has already been turned into a `Rule` object.
219
+ rule = data.first
220
+ rule.kind = :terminal if parsing_terminals
221
+ rule
222
+ end
223
+ @ast << rule if rule
224
+ end
225
+ rescue EBNF::PEG::Parser::Error => e
226
+ raise SyntaxError, e.message
227
+ end
228
+ end
229
+ end