ebnf 1.2.0 → 2.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +223 -199
  3. data/UNLICENSE +1 -1
  4. data/VERSION +1 -1
  5. data/bin/ebnf +38 -19
  6. data/etc/abnf-core.ebnf +52 -0
  7. data/etc/abnf.abnf +121 -0
  8. data/etc/abnf.ebnf +124 -0
  9. data/etc/abnf.sxp +45 -0
  10. data/etc/doap.ttl +23 -18
  11. data/etc/ebnf.ebnf +21 -33
  12. data/etc/ebnf.html +76 -160
  13. data/etc/{ebnf.rb → ebnf.ll1.rb} +30 -107
  14. data/etc/ebnf.ll1.sxp +182 -183
  15. data/etc/ebnf.peg.rb +90 -0
  16. data/etc/ebnf.peg.sxp +84 -0
  17. data/etc/ebnf.sxp +40 -41
  18. data/etc/iso-ebnf.ebnf +140 -0
  19. data/etc/iso-ebnf.isoebnf +138 -0
  20. data/etc/iso-ebnf.sxp +65 -0
  21. data/etc/sparql.ebnf +4 -4
  22. data/etc/sparql.html +1603 -1751
  23. data/etc/sparql.ll1.sxp +7372 -7372
  24. data/etc/sparql.peg.rb +532 -0
  25. data/etc/sparql.peg.sxp +597 -0
  26. data/etc/sparql.sxp +363 -362
  27. data/etc/turtle.ebnf +3 -3
  28. data/etc/turtle.html +465 -517
  29. data/etc/{turtle.rb → turtle.ll1.rb} +3 -4
  30. data/etc/turtle.ll1.sxp +425 -425
  31. data/etc/turtle.peg.rb +182 -0
  32. data/etc/turtle.peg.sxp +199 -0
  33. data/etc/turtle.sxp +103 -101
  34. data/lib/ebnf.rb +6 -1
  35. data/lib/ebnf/abnf.rb +301 -0
  36. data/lib/ebnf/abnf/core.rb +23 -0
  37. data/lib/ebnf/abnf/meta.rb +111 -0
  38. data/lib/ebnf/base.rb +114 -69
  39. data/lib/ebnf/bnf.rb +1 -26
  40. data/lib/ebnf/ebnf/meta.rb +90 -0
  41. data/lib/ebnf/isoebnf.rb +229 -0
  42. data/lib/ebnf/isoebnf/meta.rb +75 -0
  43. data/lib/ebnf/ll1.rb +131 -3
  44. data/lib/ebnf/ll1/lexer.rb +20 -22
  45. data/lib/ebnf/ll1/parser.rb +97 -64
  46. data/lib/ebnf/ll1/scanner.rb +82 -50
  47. data/lib/ebnf/native.rb +320 -0
  48. data/lib/ebnf/parser.rb +285 -302
  49. data/lib/ebnf/peg.rb +39 -0
  50. data/lib/ebnf/peg/parser.rb +561 -0
  51. data/lib/ebnf/peg/rule.rb +250 -0
  52. data/lib/ebnf/rule.rb +442 -148
  53. data/lib/ebnf/terminals.rb +21 -0
  54. data/lib/ebnf/writer.rb +587 -82
  55. metadata +125 -18
  56. data/etc/sparql.rb +0 -45773
data/lib/ebnf/base.rb CHANGED
@@ -2,7 +2,7 @@ require 'strscan'
2
2
 
3
3
  # Extended Bakus-Nour Form (EBNF), being the W3C variation is
4
4
  # originaly defined in the
5
- # [W3C XML 1.0 Spec](http://www.w3.org/TR/REC-xml/#sec-notation).
5
+ # [W3C XML 1.0 Spec](https://www.w3.org/TR/REC-xml/#sec-notation).
6
6
  #
7
7
  # This version attempts to be less strict than the strict definition
8
8
  # to allow for coloquial variations (such as in the Turtle syntax).
@@ -12,8 +12,8 @@ require 'strscan'
12
12
  #
13
13
  # Comments include the content between '/*' and '*/'
14
14
  #
15
- # @see http://www.w3.org/2000/10/swap/grammar/ebnf2turtle.py
16
- # @see http://www.w3.org/2000/10/swap/grammar/ebnf2bnf.n3
15
+ # @see https://www.w3.org/2000/10/swap/grammar/ebnf2turtle.py
16
+ # @see https://www.w3.org/2000/10/swap/grammar/ebnf2bnf.n3
17
17
  #
18
18
  # Based on bnf2turtle by Dan Connolly.
19
19
  #
@@ -36,7 +36,7 @@ require 'strscan'
36
36
  # derived mechanically from the specification.
37
37
  #
38
38
  #
39
- # [N3 design note]: http://www.w3.org/DesignIssues/Notation3
39
+ # [N3 design note]: https://www.w3.org/DesignIssues/Notation3
40
40
  #
41
41
  # Related Work
42
42
  # ------------
@@ -59,31 +59,15 @@ require 'strscan'
59
59
  # expression of the grammar in terms of the higher level EBNF
60
60
  # constructs.
61
61
  #
62
- # [goal]: http://www.w3.org/2002/02/mid/1086902566.21030.1479.camel@dirk;list=public-cwm-bugs
63
- # [n3p announcement]: http://lists.w3.org/Archives/Public/public-cwm-talk/2004OctDec/0029.html
64
- # [Yacker]: http://www.w3.org/1999/02/26-modules/User/Yacker
65
- # [SPARQL specification]: http://www.w3.org/TR/rdf-sparql-query/
66
- # [Cwm Release 1.1.0rc1]: http://lists.w3.org/Archives/Public/public-cwm-announce/2005JulSep/0000.html
67
- # [bnf-rules.n3]: http://www.w3.org/2000/10/swap/grammar/bnf-rules.n3
62
+ # [goal]: https://www.w3.org/2002/02/mid/1086902566.21030.1479.camel@dirk;list=public-cwm-bugs
63
+ # [n3p announcement]: https://lists.w3.org/Archives/Public/public-cwm-talk/2004OctDec/0029.html
64
+ # [Yacker]: https://rubygems/02/26-modules/User/Yacker
65
+ # [SPARQL specification]: https://www.w3.org/TR/rdf-sparql-query/
66
+ # [Cwm Release 1.1.0rc1]: https://lists.w3.org/Archives/Public/public-cwm-announce/2005JulSep/0000.html
67
+ # [bnf-rules.n3]: https://www.w3.org/2000/10/swap/grammar/bnf-rules.n3
68
68
  #
69
- # Open Issues and Future Work
70
- # ---------------------------
71
- #
72
- # The yacker output also has the terminals compiled to elaborate regular
73
- # expressions. The best strategy for dealing with lexical tokens is not
74
- # yet clear. Many tokens in SPARQL are case insensitive; this is not yet
75
- # captured formally.
76
- #
77
- # The schema for the EBNF vocabulary used here (``g:seq``, ``g:alt``, ...)
78
- # is not yet published; it should be aligned with [swap/grammar/bnf][]
79
- # and the [bnf2html.n3][] rules (and/or the style of linked XHTML grammar
80
- # in the SPARQL and XML specificiations).
81
- #
82
- # It would be interesting to corroborate the claim in the SPARQL spec
83
- # that the grammar is LL(1) with a mechanical proof based on N3 rules.
84
- #
85
- # [swap/grammar/bnf]: http://www.w3.org/2000/10/swap/grammar/bnf
86
- # [bnf2html.n3]: http://www.w3.org/2000/10/swap/grammar/bnf2html.n3
69
+ # [swap/grammar/bnf]: https://www.w3.org/2000/10/swap/grammar/bnf
70
+ # [bnf2html.n3]: https://www.w3.org/2000/10/swap/grammar/bnf2html.n3
87
71
  #
88
72
  # Background
89
73
  # ----------
@@ -93,14 +77,15 @@ require 'strscan'
93
77
  # of N3 that maps directly to (and from) the standard XML syntax for
94
78
  # RDF.
95
79
  #
96
- # [N3 Primer]: http://www.w3.org/2000/10/swap/Primer.html
80
+ # [N3 Primer]: https://www.w3.org/2000/10/swap/Primer.html
97
81
  #
98
82
  # @author Gregg Kellogg
99
83
  module EBNF
100
84
  class Base
101
85
  include BNF
102
86
  include LL1
103
- include Parser
87
+ include Native
88
+ include PEG
104
89
 
105
90
  # Abstract syntax tree from parse
106
91
  #
@@ -116,24 +101,33 @@ module EBNF
116
101
  # in S-Expressions (similar to SPARQL SSE)
117
102
  #
118
103
  # @param [#read, #to_s] input
104
+ # @param [Symbol] format (:ebnf)
105
+ # Format of input, one of `:abnf`, `:ebnf`, `:isoebnf`, `:isoebnf`, `:native`, or `:sxp`.
106
+ # Use `:native` for the native EBNF parser, rather than the PEG parser.
119
107
  # @param [Hash{Symbol => Object}] options
120
- # @param [Symbol] :format (:ebnf)
121
- # Format of input, one of :ebnf, or :sxp
122
108
  # @option options [Boolean, Array] :debug
123
109
  # Output debug information to an array or $stdout.
110
+ # @option options [Boolean, Array] :validate
111
+ # Validate resulting grammar.
124
112
  def initialize(input, format: :ebnf, **options)
125
113
  @options = options.dup
126
114
  @lineno, @depth, @errors = 1, 0, []
127
- terminal = false
128
115
  @ast = []
129
116
 
130
117
  input = input.respond_to?(:read) ? input.read : input.to_s
131
118
 
132
119
  case format
133
- when :sxp
134
- require 'sxp' unless defined?(SXP)
135
- @ast = SXP::Reader::Basic.read(input).map {|e| Rule.from_sxp(e)}
120
+ when :abnf
121
+ abnf = ABNF.new(input, **options)
122
+ @ast = abnf.ast
136
123
  when :ebnf
124
+ ebnf = Parser.new(input, **options)
125
+ @ast = ebnf.ast
126
+ when :isoebnf
127
+ iso = ISOEBNF.new(input, **options)
128
+ @ast = iso.ast
129
+ when :native
130
+ terminals = false
137
131
  scanner = StringScanner.new(input)
138
132
 
139
133
  eachRule(scanner) do |r|
@@ -141,7 +135,9 @@ module EBNF
141
135
  case r
142
136
  when /^@terminals/
143
137
  # Switch mode to parsing terminals
144
- terminal = true
138
+ terminals = true
139
+ rule = Rule.new(nil, nil, nil, kind: :terminals, ebnf: self)
140
+ @ast << rule
145
141
  when /^@pass\s*(.*)$/m
146
142
  expr = expression($1).first
147
143
  rule = Rule.new(nil, nil, expr, kind: :pass, ebnf: self)
@@ -150,14 +146,49 @@ module EBNF
150
146
  else
151
147
  rule = depth {ruleParts(r)}
152
148
 
153
- rule.kind = :terminal if terminal # Override after we've parsed @terminals
149
+ rule.kind = :terminal if terminals # Override after we've parsed @terminals
154
150
  rule.orig = r
155
151
  @ast << rule
156
152
  end
157
153
  end
154
+ when :sxp
155
+ require 'sxp' unless defined?(SXP)
156
+ @ast = SXP::Reader::Basic.read(input).map {|e| Rule.from_sxp(e)}
158
157
  else
159
158
  raise "unknown input format #{format.inspect}"
160
159
  end
160
+
161
+ validate! if @options[:validate]
162
+ end
163
+
164
+ ##
165
+ # Validate the grammar.
166
+ #
167
+ # Makes sure that rules reference either strings or other defined rules.
168
+ #
169
+ # @raise [RangeError]
170
+ def validate!
171
+ ast.each do |rule|
172
+ begin
173
+ rule.validate!(@ast)
174
+ rescue SyntaxError => e
175
+ error("In rule #{rule.sym}: #{e.message}")
176
+ end
177
+ end
178
+ raise SyntaxError, errors.join("\n") unless errors.empty?
179
+ end
180
+
181
+ ##
182
+ # Is the grammar valid?
183
+ #
184
+ # Uses `#validate!` and catches `RangeError`
185
+ #
186
+ # @return [Boolean]
187
+ def valid?
188
+ validate!
189
+ true
190
+ rescue SyntaxError
191
+ false
161
192
  end
162
193
 
163
194
  # Iterate over each rule or terminal, except empty
@@ -173,64 +204,63 @@ module EBNF
173
204
  # @return [String]
174
205
  def to_sxp
175
206
  require 'sxp' unless defined?(SXP)
176
- SXP::Generator.string(ast.sort_by{|r| r.id.to_f}.map(&:for_sxp))
207
+ SXP::Generator.string(ast.map(&:for_sxp))
177
208
  end
178
209
 
179
210
  ##
180
211
  # Output formatted EBNF
212
+ #
213
+ # @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
181
214
  # @return [String]
182
- def to_s
183
- Writer.string(*ast)
215
+ def to_s(format: :ebnf)
216
+ Writer.string(*ast, format: format)
184
217
  end
185
218
 
186
219
  ##
187
220
  # Output formatted EBNF as HTML
221
+ #
222
+ # @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
223
+ # @param [Boolean] validate (false) validate generated HTML.
188
224
  # @return [String]
189
- def to_html
190
- Writer.html(*ast)
225
+ def to_html(format: :ebnf, validate: false)
226
+ Writer.html(*ast, format: format, validate: validate)
191
227
  end
192
228
 
193
229
  ##
194
230
  # Output Ruby parser files
195
231
  #
196
232
  # @param [IO, StringIO] output
197
- # @param [String] :grammarFile
198
- # @param [String] :mod_name ('Branch')
199
- def to_ruby(output = $stdout, grammarFile: nil, mod_name: 'Branch')
233
+ # @param [String] grammarFile
234
+ # @param [String] mod_name ('Meta')
235
+ def to_ruby(output = $stdout, grammarFile: nil, mod_name: 'Meta', **options)
200
236
  unless output == $stdout
201
- output.puts "# This file is automatically generated by #{__FILE__}"
202
- output.puts "# BRANCH derived from #{grammarFile}" if grammarFile
237
+ output.puts "# This file is automatically generated by ebnf version #{EBNF::VERSION}"
238
+ output.puts "# Derived from #{grammarFile}" if grammarFile
203
239
  unless self.errors.empty?
204
- output.puts "# Note, tables completed with errors, may need to be resolved manually:"
240
+ output.puts "# Note, grammar has errors, may need to be resolved manually:"
205
241
  #output.puts "# #{pp.conflicts.map{|c| c.join("\n# ")}.join("\n# ")}"
206
242
  end
207
243
  output.puts "module #{mod_name}"
208
- output.puts " START = #{self.start.inspect}"
209
- output.puts
244
+ output.puts " START = #{self.start.inspect}\n" if self.start
245
+ end
246
+
247
+ # Either output LL(1) BRANCH tables or rules for PEG parsing
248
+ if ast.first.first
249
+ to_ruby_ll1(output)
250
+ else
251
+ to_ruby_peg(output)
210
252
  end
211
- self.outputTable(output, 'BRANCH', self.branch, 1)
212
- self.outputTable(output, 'TERMINALS', self.terminals, 1)
213
- self.outputTable(output, 'FIRST', self.first, 1)
214
- self.outputTable(output, 'FOLLOW', self.follow, 1)
215
- self.outputTable(output, 'CLEANUP', self.cleanup, 1)
216
- self.outputTable(output, 'PASS', [self.pass], 1) if self.pass
217
253
  unless output == $stdout
218
254
  output.puts "end"
219
255
  end
220
256
  end
221
257
 
222
- def dup
223
- new_obj = super
224
- new_obj.instance_variable_set(:@ast, @ast.dup)
225
- new_obj
226
- end
227
-
228
258
  ##
229
- # Find a rule given a symbol
230
- # @param [Symbol] sym
231
- # @return [Rule]
232
- def find_rule(sym)
233
- (@find ||= {})[sym] ||= ast.detect {|r| r.sym == sym}
259
+ # Renumber, rule identifiers
260
+ def renumber!
261
+ ast.each_with_index do |rule, index|
262
+ rule.id = (index + 1).to_s
263
+ end
234
264
  end
235
265
 
236
266
  ##
@@ -241,6 +271,7 @@ module EBNF
241
271
  def to_ttl(prefix = nil, ns = "http://example.org/")
242
272
  unless ast.empty?
243
273
  [
274
+ "@prefix dc: <http://purl.org/dc/terms/>.",
244
275
  "@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>.",
245
276
  "@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>.",
246
277
  ("@prefix #{prefix}: <#{ns}>." if prefix),
@@ -253,7 +284,21 @@ module EBNF
253
284
  ].compact
254
285
  end.join("\n") +
255
286
 
256
- ast.sort.map(&:to_ttl).join("\n")
287
+ ast.map(&:to_ttl).join("\n")
288
+ end
289
+
290
+ def dup
291
+ new_obj = super
292
+ new_obj.instance_variable_set(:@ast, @ast.dup)
293
+ new_obj
294
+ end
295
+
296
+ ##
297
+ # Find a rule given a symbol
298
+ # @param [Symbol] sym
299
+ # @return [Rule]
300
+ def find_rule(sym)
301
+ (@find ||= {})[sym] ||= ast.detect {|r| r.sym == sym}
257
302
  end
258
303
 
259
304
  def depth
data/lib/ebnf/bnf.rb CHANGED
@@ -17,32 +17,7 @@ module EBNF
17
17
  new_ast += new_rules
18
18
  end
19
19
 
20
- # Consolodate equivalent terminal rules
21
- to_rewrite = {}
22
- new_ast.select {|r| r.terminal?}.each do |src_rule|
23
- new_ast.select {|r| r.terminal?}.each do |dst_rule|
24
- if src_rule.equivalent?(dst_rule) && src_rule != dst_rule
25
- debug("make_bnf") {"equivalent rules: #{src_rule.inspect} and #{dst_rule.inspect}"}
26
- (to_rewrite[src_rule] ||= []) << dst_rule
27
- end
28
- end
29
- end
30
-
31
- # Replace references to equivalent rules with canonical rule
32
- to_rewrite.each do |src_rule, dst_rules|
33
- dst_rules.each do |dst_rule|
34
- new_ast.each do |mod_rule|
35
- debug("make_bnf") {"rewrite #{mod_rule.inspect} from #{dst_rule.sym} to #{src_rule.sym}"}
36
- mod_rule.rewrite(dst_rule, src_rule)
37
- end
38
- end
39
- end
40
-
41
- # AST now has just rewritten rules
42
- compacted_ast = new_ast - to_rewrite.values.flatten.compact
43
-
44
- # Sort AST by number
45
- @ast = compacted_ast
20
+ @ast = new_ast
46
21
  progress("make_bnf") {"End: #{@ast.length} rules"}
47
22
  self
48
23
  end
@@ -0,0 +1,90 @@
1
+ # This file is automatically generated by ebnf version 2.0.0
2
+ # Derived from etc/ebnf.ebnf
3
+ module EBNFMeta
4
+ RULES = [
5
+ EBNF::Rule.new(:ebnf, "1", [:star, :_ebnf_1]).extend(EBNF::PEG::Rule),
6
+ EBNF::Rule.new(:_ebnf_1, "1.1", [:alt, :declaration, :rule]).extend(EBNF::PEG::Rule),
7
+ EBNF::Rule.new(:declaration, "2", [:alt, "@terminals", :pass]).extend(EBNF::PEG::Rule),
8
+ EBNF::Rule.new(:rule, "3", [:seq, :LHS, :expression]).extend(EBNF::PEG::Rule),
9
+ EBNF::Rule.new(:expression, "4", [:seq, :alt]).extend(EBNF::PEG::Rule),
10
+ EBNF::Rule.new(:alt, "5", [:seq, :seq, :_alt_1]).extend(EBNF::PEG::Rule),
11
+ EBNF::Rule.new(:_alt_1, "5.1", [:star, :_alt_2]).extend(EBNF::PEG::Rule),
12
+ EBNF::Rule.new(:_alt_2, "5.2", [:seq, "|", :seq]).extend(EBNF::PEG::Rule),
13
+ EBNF::Rule.new(:seq, "6", [:plus, :diff]).extend(EBNF::PEG::Rule),
14
+ EBNF::Rule.new(:diff, "7", [:seq, :postfix, :_diff_1]).extend(EBNF::PEG::Rule),
15
+ EBNF::Rule.new(:_diff_1, "7.1", [:opt, :_diff_2]).extend(EBNF::PEG::Rule),
16
+ EBNF::Rule.new(:_diff_2, "7.2", [:seq, "-", :postfix]).extend(EBNF::PEG::Rule),
17
+ EBNF::Rule.new(:postfix, "8", [:seq, :primary, :_postfix_1]).extend(EBNF::PEG::Rule),
18
+ EBNF::Rule.new(:_postfix_1, "8.1", [:opt, :POSTFIX]).extend(EBNF::PEG::Rule),
19
+ EBNF::Rule.new(:primary, "9", [:alt, :HEX, :SYMBOL, :O_RANGE, :RANGE, :STRING1, :STRING2, :_primary_1]).extend(EBNF::PEG::Rule),
20
+ EBNF::Rule.new(:_primary_1, "9.1", [:seq, "(", :expression, ")"]).extend(EBNF::PEG::Rule),
21
+ EBNF::Rule.new(:pass, "10", [:seq, "@pass", :expression]).extend(EBNF::PEG::Rule),
22
+ EBNF::Rule.new(:_terminals, nil, [:seq], kind: :terminals).extend(EBNF::PEG::Rule),
23
+ EBNF::Rule.new(:LHS, "11", [:seq, :_LHS_1, :SYMBOL, :_LHS_2, "::="], kind: :terminal).extend(EBNF::PEG::Rule),
24
+ EBNF::Rule.new(:_LHS_1, "11.1", [:opt, :_LHS_3], kind: :terminal).extend(EBNF::PEG::Rule),
25
+ EBNF::Rule.new(:_LHS_3, "11.3", [:seq, "[", :SYMBOL, "]", :_LHS_4], kind: :terminal).extend(EBNF::PEG::Rule),
26
+ EBNF::Rule.new(:_LHS_4, "11.4", [:plus, " "], kind: :terminal).extend(EBNF::PEG::Rule),
27
+ EBNF::Rule.new(:_LHS_2, "11.2", [:star, " "], kind: :terminal).extend(EBNF::PEG::Rule),
28
+ EBNF::Rule.new(:SYMBOL, "12", [:plus, :_SYMBOL_1], kind: :terminal).extend(EBNF::PEG::Rule),
29
+ EBNF::Rule.new(:_SYMBOL_1, "12.1", [:alt, :_SYMBOL_2, :_SYMBOL_3, :_SYMBOL_4, "_", "."], kind: :terminal).extend(EBNF::PEG::Rule),
30
+ EBNF::Rule.new(:_SYMBOL_2, "12.2", [:range, "a-z"], kind: :terminal).extend(EBNF::PEG::Rule),
31
+ EBNF::Rule.new(:_SYMBOL_3, "12.3", [:range, "A-Z"], kind: :terminal).extend(EBNF::PEG::Rule),
32
+ EBNF::Rule.new(:_SYMBOL_4, "12.4", [:range, "0-9"], kind: :terminal).extend(EBNF::PEG::Rule),
33
+ EBNF::Rule.new(:HEX, "13", [:seq, "#x", :_HEX_1], kind: :terminal).extend(EBNF::PEG::Rule),
34
+ EBNF::Rule.new(:_HEX_1, "13.1", [:plus, :_HEX_2], kind: :terminal).extend(EBNF::PEG::Rule),
35
+ EBNF::Rule.new(:_HEX_2, "13.2", [:alt, :_HEX_3, :_HEX_4, :_HEX_5], kind: :terminal).extend(EBNF::PEG::Rule),
36
+ EBNF::Rule.new(:_HEX_3, "13.3", [:range, "a-f"], kind: :terminal).extend(EBNF::PEG::Rule),
37
+ EBNF::Rule.new(:_HEX_4, "13.4", [:range, "A-F"], kind: :terminal).extend(EBNF::PEG::Rule),
38
+ EBNF::Rule.new(:_HEX_5, "13.5", [:range, "0-9"], kind: :terminal).extend(EBNF::PEG::Rule),
39
+ EBNF::Rule.new(:RANGE, "14", [:seq, "[", :_RANGE_1, :_RANGE_2, :_RANGE_3], kind: :terminal).extend(EBNF::PEG::Rule),
40
+ EBNF::Rule.new(:_RANGE_1, "14.1", [:plus, :_RANGE_4], kind: :terminal).extend(EBNF::PEG::Rule),
41
+ EBNF::Rule.new(:_RANGE_4, "14.4", [:alt, :_RANGE_5, :_RANGE_6, :R_CHAR, :HEX], kind: :terminal).extend(EBNF::PEG::Rule),
42
+ EBNF::Rule.new(:_RANGE_5, "14.5", [:seq, :R_CHAR, "-", :R_CHAR], kind: :terminal).extend(EBNF::PEG::Rule),
43
+ EBNF::Rule.new(:_RANGE_6, "14.6", [:seq, :HEX, "-", :HEX], kind: :terminal).extend(EBNF::PEG::Rule),
44
+ EBNF::Rule.new(:_RANGE_2, "14.2", [:opt, "-"], kind: :terminal).extend(EBNF::PEG::Rule),
45
+ EBNF::Rule.new(:_RANGE_3, "14.3", [:diff, "]", :LHS], kind: :terminal).extend(EBNF::PEG::Rule),
46
+ EBNF::Rule.new(:O_RANGE, "15", [:seq, "[^", :_O_RANGE_1, :_O_RANGE_2, "]"], kind: :terminal).extend(EBNF::PEG::Rule),
47
+ EBNF::Rule.new(:_O_RANGE_1, "15.1", [:plus, :_O_RANGE_3], kind: :terminal).extend(EBNF::PEG::Rule),
48
+ EBNF::Rule.new(:_O_RANGE_3, "15.3", [:alt, :_O_RANGE_4, :_O_RANGE_5, :R_CHAR, :HEX], kind: :terminal).extend(EBNF::PEG::Rule),
49
+ EBNF::Rule.new(:_O_RANGE_4, "15.4", [:seq, :R_CHAR, "-", :R_CHAR], kind: :terminal).extend(EBNF::PEG::Rule),
50
+ EBNF::Rule.new(:_O_RANGE_5, "15.5", [:seq, :HEX, "-", :HEX], kind: :terminal).extend(EBNF::PEG::Rule),
51
+ EBNF::Rule.new(:_O_RANGE_2, "15.2", [:opt, "-"], kind: :terminal).extend(EBNF::PEG::Rule),
52
+ EBNF::Rule.new(:STRING1, "16", [:seq, "\"", :_STRING1_1, "\""], kind: :terminal).extend(EBNF::PEG::Rule),
53
+ EBNF::Rule.new(:_STRING1_1, "16.1", [:star, :_STRING1_2], kind: :terminal).extend(EBNF::PEG::Rule),
54
+ EBNF::Rule.new(:_STRING1_2, "16.2", [:diff, :CHAR, "\""], kind: :terminal).extend(EBNF::PEG::Rule),
55
+ EBNF::Rule.new(:STRING2, "17", [:seq, "'", :_STRING2_1, "'"], kind: :terminal).extend(EBNF::PEG::Rule),
56
+ EBNF::Rule.new(:_STRING2_1, "17.1", [:star, :_STRING2_2], kind: :terminal).extend(EBNF::PEG::Rule),
57
+ EBNF::Rule.new(:_STRING2_2, "17.2", [:diff, :CHAR, "'"], kind: :terminal).extend(EBNF::PEG::Rule),
58
+ EBNF::Rule.new(:CHAR, "18", [:alt, :_CHAR_1, :_CHAR_2, :_CHAR_3, :_CHAR_4], kind: :terminal).extend(EBNF::PEG::Rule),
59
+ EBNF::Rule.new(:_CHAR_1, "18.1", [:range, "#x9#xA#xD"], kind: :terminal).extend(EBNF::PEG::Rule),
60
+ EBNF::Rule.new(:_CHAR_2, "18.2", [:range, "#x20-#xD7FF"], kind: :terminal).extend(EBNF::PEG::Rule),
61
+ EBNF::Rule.new(:_CHAR_3, "18.3", [:range, "#xE000-#xFFFD"], kind: :terminal).extend(EBNF::PEG::Rule),
62
+ EBNF::Rule.new(:_CHAR_4, "18.4", [:range, "#x10000-#x10FFFF"], kind: :terminal).extend(EBNF::PEG::Rule),
63
+ EBNF::Rule.new(:R_CHAR, "19", [:diff, :CHAR, :_R_CHAR_1], kind: :terminal).extend(EBNF::PEG::Rule),
64
+ EBNF::Rule.new(:_R_CHAR_1, "19.1", [:alt, "]", "-", :HEX], kind: :terminal).extend(EBNF::PEG::Rule),
65
+ EBNF::Rule.new(:POSTFIX, "20", [:range, "?*+"], kind: :terminal).extend(EBNF::PEG::Rule),
66
+ EBNF::Rule.new(:PASS, "21", [:alt, :_PASS_1, :_PASS_2, :_PASS_3, :_PASS_4], kind: :terminal).extend(EBNF::PEG::Rule),
67
+ EBNF::Rule.new(:_PASS_1, "21.1", [:range, "#x9#xA#xD#x20"], kind: :terminal).extend(EBNF::PEG::Rule),
68
+ EBNF::Rule.new(:_PASS_2, "21.2", [:seq, :_PASS_5, :_PASS_6], kind: :terminal).extend(EBNF::PEG::Rule),
69
+ EBNF::Rule.new(:_PASS_5, "21.5", [:alt, :_PASS_7, "//"], kind: :terminal).extend(EBNF::PEG::Rule),
70
+ EBNF::Rule.new(:_PASS_7, "21.7", [:diff, "#", "#x"], kind: :terminal).extend(EBNF::PEG::Rule),
71
+ EBNF::Rule.new(:_PASS_6, "21.6", [:star, :_PASS_8], kind: :terminal).extend(EBNF::PEG::Rule),
72
+ EBNF::Rule.new(:_PASS_8, "21.8", [:range, "^#xA#xD"], kind: :terminal).extend(EBNF::PEG::Rule),
73
+ EBNF::Rule.new(:_PASS_3, "21.3", [:seq, "/*", :_PASS_9, "*/"], kind: :terminal).extend(EBNF::PEG::Rule),
74
+ EBNF::Rule.new(:_PASS_9, "21.9", [:star, :_PASS_10], kind: :terminal).extend(EBNF::PEG::Rule),
75
+ EBNF::Rule.new(:_PASS_10, "21.10", [:alt, :_PASS_11, :_PASS_12], kind: :terminal).extend(EBNF::PEG::Rule),
76
+ EBNF::Rule.new(:_PASS_11, "21.11", [:opt, :_PASS_13], kind: :terminal).extend(EBNF::PEG::Rule),
77
+ EBNF::Rule.new(:_PASS_13, "21.13", [:seq, "*", :_PASS_14], kind: :terminal).extend(EBNF::PEG::Rule),
78
+ EBNF::Rule.new(:_PASS_14, "21.14", [:range, "^/"], kind: :terminal).extend(EBNF::PEG::Rule),
79
+ EBNF::Rule.new(:_PASS_12, "21.12", [:range, "^*"], kind: :terminal).extend(EBNF::PEG::Rule),
80
+ EBNF::Rule.new(:_PASS_4, "21.4", [:seq, "(*", :_PASS_15, "*)"], kind: :terminal).extend(EBNF::PEG::Rule),
81
+ EBNF::Rule.new(:_PASS_15, "21.15", [:star, :_PASS_16], kind: :terminal).extend(EBNF::PEG::Rule),
82
+ EBNF::Rule.new(:_PASS_16, "21.16", [:alt, :_PASS_17, :_PASS_18], kind: :terminal).extend(EBNF::PEG::Rule),
83
+ EBNF::Rule.new(:_PASS_17, "21.17", [:opt, :_PASS_19], kind: :terminal).extend(EBNF::PEG::Rule),
84
+ EBNF::Rule.new(:_PASS_19, "21.19", [:seq, "*", :_PASS_20], kind: :terminal).extend(EBNF::PEG::Rule),
85
+ EBNF::Rule.new(:_PASS_20, "21.20", [:range, "^)"], kind: :terminal).extend(EBNF::PEG::Rule),
86
+ EBNF::Rule.new(:_PASS_18, "21.18", [:range, "^*"], kind: :terminal).extend(EBNF::PEG::Rule),
87
+ EBNF::Rule.new(:_pass, nil, [:seq, :PASS], kind: :pass).extend(EBNF::PEG::Rule),
88
+ ]
89
+ end
90
+
@@ -0,0 +1,229 @@
1
+ require_relative 'isoebnf/meta'
2
+ require 'logger'
3
+
4
+ # ISO EBNF parser
5
+ # Parses ISO EBNF into an array of {EBNF::Rule}.
6
+ module EBNF
7
+ class ISOEBNF
8
+ include EBNF::PEG::Parser
9
+
10
+ # The base for terminal-character, which omits "'", '"', and '?'.
11
+ # Could be more optimized, and one might quible
12
+ # with the overly-strictly defined character set,
13
+ # but it is correct.
14
+ TERMINAL_CHARACTER_BASE = %r{
15
+ [a-zA-Z0-9] | # letter | decimal digit
16
+ , | # concatenate symbol
17
+ = | # defining symbol
18
+ [\|\/!] | # definition separator symbol
19
+ \*\) | # end comment symbol
20
+ \) | # end group symbol
21
+ \] | # end option symbol
22
+ \} | # end repeat symbol
23
+ \- | # except symbol
24
+ #\' | # first quote symbol
25
+ \* | # repetition symbol
26
+ #\" | # second quote symbol
27
+ #\? | # special sequence symbol
28
+ \(\* | # start comment symbol
29
+ \( | # start group symbol
30
+ \[ | # start option symbol
31
+ \{ | # start repeat symbol
32
+ [;\.] | # terminator symbol
33
+ [:+_%@&$<>^\x20\x23\\`~] # other character
34
+ }x
35
+
36
+ TERMINAL_CHARACTER = %r{#{TERMINAL_CHARACTER_BASE}|['"\?]}
37
+ FIRST_TERMINAL_CHARACTER = %r{#{TERMINAL_CHARACTER_BASE}|["\?]}
38
+ SECOND_TERMINAL_CHARACTER = %r{#{TERMINAL_CHARACTER_BASE}|['\?]}
39
+ SPECIAL_SEQUENCE_CHARACTER = %r{#{TERMINAL_CHARACTER_BASE}|['"]}
40
+
41
+ # Abstract syntax tree from parse
42
+ #
43
+ # @return [Array<EBNF::Rule>]
44
+ attr_reader :ast
45
+
46
+ # `[14] integer ::= decimal_digit+`
47
+ terminal(:integer, /\d+/) do |value, prod|
48
+ value.to_i
49
+ end
50
+
51
+ # `[15] meta_identifier ::= letter meta_identifier_character*`
52
+ terminal(:meta_identifier, /[a-zA-Z][a-zA-Z0-9_]*/) do |value|
53
+ value.to_sym
54
+ end
55
+
56
+ # `[17] terminal_string ::= ("'" first_terminal_character+ "'")`
57
+ # ` | ('"' second_terminal_character+ '"')`
58
+ terminal(:terminal_string, /(?:'#{FIRST_TERMINAL_CHARACTER}+')|(?:"#{SECOND_TERMINAL_CHARACTER}+")/x) do |value|
59
+ value[1..-2]
60
+ end
61
+
62
+ # `[20] special_sequence ::= '?' special_sequence_character* '?'`
63
+ terminal(:special_sequence, /\?#{SPECIAL_SEQUENCE_CHARACTER}+\?/)
64
+
65
+ # `[22] terminal_character ::= [a-zA-Z0-9]`
66
+ # ` | [,=;*}#x2d?([{;]`
67
+ # ` | '*)'`
68
+ # ` | '(*'`
69
+ # ` | ']'`
70
+ # ` | other_character`
71
+ terminal(:terminal_character, TERMINAL_CHARACTER)
72
+
73
+ # `[25] empty ::= ''`
74
+ terminal(:empty, //)
75
+
76
+ # `[26] definition_separator_symbol ::= '|' | '/' | '!'`
77
+ terminal(:definition_separator_symbol, /[\|\/!]/)
78
+
79
+ # `[27] terminator_symbol ::= ';' | '.'`
80
+ terminal(:terminator_symbol, /[;\.]/)
81
+
82
+ # `[28] start_option_symbol ::= '['
83
+ terminal(:start_option_symbol, /\[|(?:\(\/)/)
84
+
85
+ # `[29] end_option_symbol ::= ']'`
86
+ terminal(:end_option_symbol, /\]/)
87
+
88
+ # `[30] start_repeat_symbol ::= '{' | '(:'`
89
+ terminal(:start_repeat_symbol, /{|\(:/)
90
+
91
+ # `[31] end_repeat_symbol ::= '}' | ':)'`
92
+ terminal(:end_repeat_symbol, /}|:\)/)
93
+
94
+ # ## Non-terminal productions
95
+
96
+ # `[2] syntax_rule ::= meta_identifier '=' definitions_list terminator_symbol`
97
+ production(:syntax_rule, clear_packrat: true) do |value, data, callback|
98
+ # value contains an expression.
99
+ # Invoke callback
100
+ sym = value[0][:meta_identifier]
101
+ definitions_list = value[2][:definitions_list]
102
+ callback.call(:rule, EBNF::Rule.new(sym.to_sym, nil, definitions_list))
103
+ nil
104
+ end
105
+
106
+ # Setting `as_hash: true` in the start production makes the value of the form of a hash, rather than an array of hashes.
107
+ #
108
+ # `[3] definitions_list ::= single_definition (definition_separator_symbol definitions_list)*`
109
+ start_production(:definitions_list, as_hash: true)
110
+ production(:definitions_list) do |value|
111
+ if value[:_definitions_list_1].length > 0
112
+ [:alt, value[:single_definition]] + value[:_definitions_list_1]
113
+ else
114
+ value[:single_definition]
115
+ end
116
+ end
117
+ production(:_definitions_list_1) do |value|
118
+ Array(value.first)
119
+ end
120
+ start_production(:_definitions_list_2, as_hash: true)
121
+ production(:_definitions_list_2) do |value|
122
+ if Array(value[:definitions_list]).first == :alt
123
+ value[:definitions_list][1..-1]
124
+ else
125
+ [value[:definitions_list]]
126
+ end
127
+ end
128
+
129
+ # `[4] single_definition ::= term (',' term)*`
130
+ start_production(:single_definition, as_hash: true)
131
+ production(:single_definition) do |value|
132
+ if value[:_single_definition_1].length > 0
133
+ [:seq, value[:term]] + value[:_single_definition_1]
134
+ else
135
+ value[:term]
136
+ end
137
+ end
138
+ production(:_single_definition_1) do |value|
139
+ value.map {|a1| a1.last[:term]}.compact # Get rid of '|'
140
+ end
141
+
142
+ # `[5] term ::= factor ('-' exception)?`
143
+ start_production(:term, as_hash: true)
144
+ production(:term) do |value|
145
+ if value[:_term_1]
146
+ [:diff, value[:factor], value[:_term_1]]
147
+ else
148
+ value[:factor]
149
+ end
150
+ end
151
+ production(:_term_1) do |value|
152
+ value.last[:exception] if value
153
+ end
154
+
155
+ # `[6] exception ::= factor`
156
+ start_production(:exception, as_hash: true)
157
+ production(:exception) do |value|
158
+ value[:factor]
159
+ end
160
+
161
+ # `[7] factor ::= (integer '*')? primary`
162
+ start_production(:factor, as_hash: true)
163
+ production(:factor) do |value|
164
+ if value[:_factor_1]
165
+ [:rept, value[:_factor_1], value[:_factor_1], value[:primary]]
166
+ else
167
+ value[:primary]
168
+ end
169
+ end
170
+ production(:_factor_2) do |value|
171
+ value.first[:integer]
172
+ end
173
+
174
+ # `[9] optional_sequence ::= start_option_symbol definitions_list end_option_symbol`
175
+ production(:optional_sequence) do |value|
176
+ [:opt, value[1][:definitions_list]]
177
+ end
178
+
179
+ # `[10] repeated_sequence ::= start_repeat_symbol definitions_list end_repeat_symbol`
180
+ production(:repeated_sequence) do |value|
181
+ [:star, value[1][:definitions_list]]
182
+ end
183
+
184
+ # `[11] grouped_sequence ::= '(' definitions_list ')'`
185
+ production(:grouped_sequence) do |value|
186
+ [:seq, value[1][:definitions_list]]
187
+ end
188
+
189
+ # ## Parser invocation.
190
+ # On start, yield ourselves if a block is given, otherwise, return this parser instance
191
+ #
192
+ # @param [#read, #to_s] input
193
+ # @param [Hash{Symbol => Object}] options
194
+ # @option options [Boolean] :level
195
+ # Trace level. 0(debug), 1(info), 2(warn), 3(error).
196
+ # @return [EBNFParser]
197
+ def initialize(input, **options, &block)
198
+ # If the `level` option is set, instantiate a logger for collecting trace information.
199
+ if options.has_key?(:level)
200
+ options[:logger] = Logger.new(STDERR)
201
+ options[:logger].level = options[:level]
202
+ options[:logger].formatter = lambda {|severity, datetime, progname, msg| "#{severity} #{msg}\n"}
203
+ end
204
+
205
+ # Read input, if necessary, which will be used in a Scanner.
206
+ @input = input.respond_to?(:read) ? input.read : input.to_s
207
+
208
+ parsing_terminals = false
209
+ @ast = []
210
+ parse(@input,
211
+ :syntax,
212
+ ISOEBNFMeta::RULES,
213
+ whitespace: %r{([\x09-\x0d\x20]|(?:\(\*(?:(?:\*[^\)])|[^*])*\*\)))+},
214
+ **options
215
+ ) do |context, *data|
216
+ rule = case context
217
+ when :rule
218
+ # A rule which has already been turned into a `Rule` object.
219
+ rule = data.first
220
+ rule.kind = :terminal if parsing_terminals
221
+ rule
222
+ end
223
+ @ast << rule if rule
224
+ end
225
+ rescue EBNF::PEG::Parser::Error => e
226
+ raise SyntaxError, e.message
227
+ end
228
+ end
229
+ end