ebnf 1.2.0 → 2.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +223 -199
- data/UNLICENSE +1 -1
- data/VERSION +1 -1
- data/bin/ebnf +38 -19
- data/etc/abnf-core.ebnf +52 -0
- data/etc/abnf.abnf +121 -0
- data/etc/abnf.ebnf +124 -0
- data/etc/abnf.sxp +45 -0
- data/etc/doap.ttl +23 -18
- data/etc/ebnf.ebnf +21 -33
- data/etc/ebnf.html +76 -160
- data/etc/{ebnf.rb → ebnf.ll1.rb} +30 -107
- data/etc/ebnf.ll1.sxp +182 -183
- data/etc/ebnf.peg.rb +90 -0
- data/etc/ebnf.peg.sxp +84 -0
- data/etc/ebnf.sxp +40 -41
- data/etc/iso-ebnf.ebnf +140 -0
- data/etc/iso-ebnf.isoebnf +138 -0
- data/etc/iso-ebnf.sxp +65 -0
- data/etc/sparql.ebnf +4 -4
- data/etc/sparql.html +1603 -1751
- data/etc/sparql.ll1.sxp +7372 -7372
- data/etc/sparql.peg.rb +532 -0
- data/etc/sparql.peg.sxp +597 -0
- data/etc/sparql.sxp +363 -362
- data/etc/turtle.ebnf +3 -3
- data/etc/turtle.html +465 -517
- data/etc/{turtle.rb → turtle.ll1.rb} +3 -4
- data/etc/turtle.ll1.sxp +425 -425
- data/etc/turtle.peg.rb +182 -0
- data/etc/turtle.peg.sxp +199 -0
- data/etc/turtle.sxp +103 -101
- data/lib/ebnf.rb +6 -1
- data/lib/ebnf/abnf.rb +301 -0
- data/lib/ebnf/abnf/core.rb +23 -0
- data/lib/ebnf/abnf/meta.rb +111 -0
- data/lib/ebnf/base.rb +114 -69
- data/lib/ebnf/bnf.rb +1 -26
- data/lib/ebnf/ebnf/meta.rb +90 -0
- data/lib/ebnf/isoebnf.rb +229 -0
- data/lib/ebnf/isoebnf/meta.rb +75 -0
- data/lib/ebnf/ll1.rb +131 -3
- data/lib/ebnf/ll1/lexer.rb +20 -22
- data/lib/ebnf/ll1/parser.rb +97 -64
- data/lib/ebnf/ll1/scanner.rb +82 -50
- data/lib/ebnf/native.rb +320 -0
- data/lib/ebnf/parser.rb +285 -302
- data/lib/ebnf/peg.rb +39 -0
- data/lib/ebnf/peg/parser.rb +561 -0
- data/lib/ebnf/peg/rule.rb +250 -0
- data/lib/ebnf/rule.rb +442 -148
- data/lib/ebnf/terminals.rb +21 -0
- data/lib/ebnf/writer.rb +587 -82
- metadata +125 -18
- data/etc/sparql.rb +0 -45773
data/lib/ebnf/base.rb
CHANGED
@@ -2,7 +2,7 @@ require 'strscan'
|
|
2
2
|
|
3
3
|
# Extended Bakus-Nour Form (EBNF), being the W3C variation is
|
4
4
|
# originaly defined in the
|
5
|
-
# [W3C XML 1.0 Spec](
|
5
|
+
# [W3C XML 1.0 Spec](https://www.w3.org/TR/REC-xml/#sec-notation).
|
6
6
|
#
|
7
7
|
# This version attempts to be less strict than the strict definition
|
8
8
|
# to allow for coloquial variations (such as in the Turtle syntax).
|
@@ -12,8 +12,8 @@ require 'strscan'
|
|
12
12
|
#
|
13
13
|
# Comments include the content between '/*' and '*/'
|
14
14
|
#
|
15
|
-
# @see
|
16
|
-
# @see
|
15
|
+
# @see https://www.w3.org/2000/10/swap/grammar/ebnf2turtle.py
|
16
|
+
# @see https://www.w3.org/2000/10/swap/grammar/ebnf2bnf.n3
|
17
17
|
#
|
18
18
|
# Based on bnf2turtle by Dan Connolly.
|
19
19
|
#
|
@@ -36,7 +36,7 @@ require 'strscan'
|
|
36
36
|
# derived mechanically from the specification.
|
37
37
|
#
|
38
38
|
#
|
39
|
-
# [N3 design note]:
|
39
|
+
# [N3 design note]: https://www.w3.org/DesignIssues/Notation3
|
40
40
|
#
|
41
41
|
# Related Work
|
42
42
|
# ------------
|
@@ -59,31 +59,15 @@ require 'strscan'
|
|
59
59
|
# expression of the grammar in terms of the higher level EBNF
|
60
60
|
# constructs.
|
61
61
|
#
|
62
|
-
# [goal]:
|
63
|
-
# [n3p announcement]:
|
64
|
-
# [Yacker]:
|
65
|
-
# [SPARQL specification]:
|
66
|
-
# [Cwm Release 1.1.0rc1]:
|
67
|
-
# [bnf-rules.n3]:
|
62
|
+
# [goal]: https://www.w3.org/2002/02/mid/1086902566.21030.1479.camel@dirk;list=public-cwm-bugs
|
63
|
+
# [n3p announcement]: https://lists.w3.org/Archives/Public/public-cwm-talk/2004OctDec/0029.html
|
64
|
+
# [Yacker]: https://rubygems/02/26-modules/User/Yacker
|
65
|
+
# [SPARQL specification]: https://www.w3.org/TR/rdf-sparql-query/
|
66
|
+
# [Cwm Release 1.1.0rc1]: https://lists.w3.org/Archives/Public/public-cwm-announce/2005JulSep/0000.html
|
67
|
+
# [bnf-rules.n3]: https://www.w3.org/2000/10/swap/grammar/bnf-rules.n3
|
68
68
|
#
|
69
|
-
#
|
70
|
-
#
|
71
|
-
#
|
72
|
-
# The yacker output also has the terminals compiled to elaborate regular
|
73
|
-
# expressions. The best strategy for dealing with lexical tokens is not
|
74
|
-
# yet clear. Many tokens in SPARQL are case insensitive; this is not yet
|
75
|
-
# captured formally.
|
76
|
-
#
|
77
|
-
# The schema for the EBNF vocabulary used here (``g:seq``, ``g:alt``, ...)
|
78
|
-
# is not yet published; it should be aligned with [swap/grammar/bnf][]
|
79
|
-
# and the [bnf2html.n3][] rules (and/or the style of linked XHTML grammar
|
80
|
-
# in the SPARQL and XML specificiations).
|
81
|
-
#
|
82
|
-
# It would be interesting to corroborate the claim in the SPARQL spec
|
83
|
-
# that the grammar is LL(1) with a mechanical proof based on N3 rules.
|
84
|
-
#
|
85
|
-
# [swap/grammar/bnf]: http://www.w3.org/2000/10/swap/grammar/bnf
|
86
|
-
# [bnf2html.n3]: http://www.w3.org/2000/10/swap/grammar/bnf2html.n3
|
69
|
+
# [swap/grammar/bnf]: https://www.w3.org/2000/10/swap/grammar/bnf
|
70
|
+
# [bnf2html.n3]: https://www.w3.org/2000/10/swap/grammar/bnf2html.n3
|
87
71
|
#
|
88
72
|
# Background
|
89
73
|
# ----------
|
@@ -93,14 +77,15 @@ require 'strscan'
|
|
93
77
|
# of N3 that maps directly to (and from) the standard XML syntax for
|
94
78
|
# RDF.
|
95
79
|
#
|
96
|
-
# [N3 Primer]:
|
80
|
+
# [N3 Primer]: https://www.w3.org/2000/10/swap/Primer.html
|
97
81
|
#
|
98
82
|
# @author Gregg Kellogg
|
99
83
|
module EBNF
|
100
84
|
class Base
|
101
85
|
include BNF
|
102
86
|
include LL1
|
103
|
-
include
|
87
|
+
include Native
|
88
|
+
include PEG
|
104
89
|
|
105
90
|
# Abstract syntax tree from parse
|
106
91
|
#
|
@@ -116,24 +101,33 @@ module EBNF
|
|
116
101
|
# in S-Expressions (similar to SPARQL SSE)
|
117
102
|
#
|
118
103
|
# @param [#read, #to_s] input
|
104
|
+
# @param [Symbol] format (:ebnf)
|
105
|
+
# Format of input, one of `:abnf`, `:ebnf`, `:isoebnf`, `:isoebnf`, `:native`, or `:sxp`.
|
106
|
+
# Use `:native` for the native EBNF parser, rather than the PEG parser.
|
119
107
|
# @param [Hash{Symbol => Object}] options
|
120
|
-
# @param [Symbol] :format (:ebnf)
|
121
|
-
# Format of input, one of :ebnf, or :sxp
|
122
108
|
# @option options [Boolean, Array] :debug
|
123
109
|
# Output debug information to an array or $stdout.
|
110
|
+
# @option options [Boolean, Array] :validate
|
111
|
+
# Validate resulting grammar.
|
124
112
|
def initialize(input, format: :ebnf, **options)
|
125
113
|
@options = options.dup
|
126
114
|
@lineno, @depth, @errors = 1, 0, []
|
127
|
-
terminal = false
|
128
115
|
@ast = []
|
129
116
|
|
130
117
|
input = input.respond_to?(:read) ? input.read : input.to_s
|
131
118
|
|
132
119
|
case format
|
133
|
-
when :
|
134
|
-
|
135
|
-
@ast =
|
120
|
+
when :abnf
|
121
|
+
abnf = ABNF.new(input, **options)
|
122
|
+
@ast = abnf.ast
|
136
123
|
when :ebnf
|
124
|
+
ebnf = Parser.new(input, **options)
|
125
|
+
@ast = ebnf.ast
|
126
|
+
when :isoebnf
|
127
|
+
iso = ISOEBNF.new(input, **options)
|
128
|
+
@ast = iso.ast
|
129
|
+
when :native
|
130
|
+
terminals = false
|
137
131
|
scanner = StringScanner.new(input)
|
138
132
|
|
139
133
|
eachRule(scanner) do |r|
|
@@ -141,7 +135,9 @@ module EBNF
|
|
141
135
|
case r
|
142
136
|
when /^@terminals/
|
143
137
|
# Switch mode to parsing terminals
|
144
|
-
|
138
|
+
terminals = true
|
139
|
+
rule = Rule.new(nil, nil, nil, kind: :terminals, ebnf: self)
|
140
|
+
@ast << rule
|
145
141
|
when /^@pass\s*(.*)$/m
|
146
142
|
expr = expression($1).first
|
147
143
|
rule = Rule.new(nil, nil, expr, kind: :pass, ebnf: self)
|
@@ -150,14 +146,49 @@ module EBNF
|
|
150
146
|
else
|
151
147
|
rule = depth {ruleParts(r)}
|
152
148
|
|
153
|
-
rule.kind = :terminal if
|
149
|
+
rule.kind = :terminal if terminals # Override after we've parsed @terminals
|
154
150
|
rule.orig = r
|
155
151
|
@ast << rule
|
156
152
|
end
|
157
153
|
end
|
154
|
+
when :sxp
|
155
|
+
require 'sxp' unless defined?(SXP)
|
156
|
+
@ast = SXP::Reader::Basic.read(input).map {|e| Rule.from_sxp(e)}
|
158
157
|
else
|
159
158
|
raise "unknown input format #{format.inspect}"
|
160
159
|
end
|
160
|
+
|
161
|
+
validate! if @options[:validate]
|
162
|
+
end
|
163
|
+
|
164
|
+
##
|
165
|
+
# Validate the grammar.
|
166
|
+
#
|
167
|
+
# Makes sure that rules reference either strings or other defined rules.
|
168
|
+
#
|
169
|
+
# @raise [RangeError]
|
170
|
+
def validate!
|
171
|
+
ast.each do |rule|
|
172
|
+
begin
|
173
|
+
rule.validate!(@ast)
|
174
|
+
rescue SyntaxError => e
|
175
|
+
error("In rule #{rule.sym}: #{e.message}")
|
176
|
+
end
|
177
|
+
end
|
178
|
+
raise SyntaxError, errors.join("\n") unless errors.empty?
|
179
|
+
end
|
180
|
+
|
181
|
+
##
|
182
|
+
# Is the grammar valid?
|
183
|
+
#
|
184
|
+
# Uses `#validate!` and catches `RangeError`
|
185
|
+
#
|
186
|
+
# @return [Boolean]
|
187
|
+
def valid?
|
188
|
+
validate!
|
189
|
+
true
|
190
|
+
rescue SyntaxError
|
191
|
+
false
|
161
192
|
end
|
162
193
|
|
163
194
|
# Iterate over each rule or terminal, except empty
|
@@ -173,64 +204,63 @@ module EBNF
|
|
173
204
|
# @return [String]
|
174
205
|
def to_sxp
|
175
206
|
require 'sxp' unless defined?(SXP)
|
176
|
-
SXP::Generator.string(ast.
|
207
|
+
SXP::Generator.string(ast.map(&:for_sxp))
|
177
208
|
end
|
178
209
|
|
179
210
|
##
|
180
211
|
# Output formatted EBNF
|
212
|
+
#
|
213
|
+
# @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
|
181
214
|
# @return [String]
|
182
|
-
def to_s
|
183
|
-
Writer.string(*ast)
|
215
|
+
def to_s(format: :ebnf)
|
216
|
+
Writer.string(*ast, format: format)
|
184
217
|
end
|
185
218
|
|
186
219
|
##
|
187
220
|
# Output formatted EBNF as HTML
|
221
|
+
#
|
222
|
+
# @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
|
223
|
+
# @param [Boolean] validate (false) validate generated HTML.
|
188
224
|
# @return [String]
|
189
|
-
def to_html
|
190
|
-
Writer.html(*ast)
|
225
|
+
def to_html(format: :ebnf, validate: false)
|
226
|
+
Writer.html(*ast, format: format, validate: validate)
|
191
227
|
end
|
192
228
|
|
193
229
|
##
|
194
230
|
# Output Ruby parser files
|
195
231
|
#
|
196
232
|
# @param [IO, StringIO] output
|
197
|
-
# @param [String]
|
198
|
-
# @param [String]
|
199
|
-
def to_ruby(output = $stdout, grammarFile: nil, mod_name: '
|
233
|
+
# @param [String] grammarFile
|
234
|
+
# @param [String] mod_name ('Meta')
|
235
|
+
def to_ruby(output = $stdout, grammarFile: nil, mod_name: 'Meta', **options)
|
200
236
|
unless output == $stdout
|
201
|
-
output.puts "# This file is automatically generated by #{
|
202
|
-
output.puts "#
|
237
|
+
output.puts "# This file is automatically generated by ebnf version #{EBNF::VERSION}"
|
238
|
+
output.puts "# Derived from #{grammarFile}" if grammarFile
|
203
239
|
unless self.errors.empty?
|
204
|
-
output.puts "# Note,
|
240
|
+
output.puts "# Note, grammar has errors, may need to be resolved manually:"
|
205
241
|
#output.puts "# #{pp.conflicts.map{|c| c.join("\n# ")}.join("\n# ")}"
|
206
242
|
end
|
207
243
|
output.puts "module #{mod_name}"
|
208
|
-
output.puts " START = #{self.start.inspect}"
|
209
|
-
|
244
|
+
output.puts " START = #{self.start.inspect}\n" if self.start
|
245
|
+
end
|
246
|
+
|
247
|
+
# Either output LL(1) BRANCH tables or rules for PEG parsing
|
248
|
+
if ast.first.first
|
249
|
+
to_ruby_ll1(output)
|
250
|
+
else
|
251
|
+
to_ruby_peg(output)
|
210
252
|
end
|
211
|
-
self.outputTable(output, 'BRANCH', self.branch, 1)
|
212
|
-
self.outputTable(output, 'TERMINALS', self.terminals, 1)
|
213
|
-
self.outputTable(output, 'FIRST', self.first, 1)
|
214
|
-
self.outputTable(output, 'FOLLOW', self.follow, 1)
|
215
|
-
self.outputTable(output, 'CLEANUP', self.cleanup, 1)
|
216
|
-
self.outputTable(output, 'PASS', [self.pass], 1) if self.pass
|
217
253
|
unless output == $stdout
|
218
254
|
output.puts "end"
|
219
255
|
end
|
220
256
|
end
|
221
257
|
|
222
|
-
def dup
|
223
|
-
new_obj = super
|
224
|
-
new_obj.instance_variable_set(:@ast, @ast.dup)
|
225
|
-
new_obj
|
226
|
-
end
|
227
|
-
|
228
258
|
##
|
229
|
-
#
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
259
|
+
# Renumber, rule identifiers
|
260
|
+
def renumber!
|
261
|
+
ast.each_with_index do |rule, index|
|
262
|
+
rule.id = (index + 1).to_s
|
263
|
+
end
|
234
264
|
end
|
235
265
|
|
236
266
|
##
|
@@ -241,6 +271,7 @@ module EBNF
|
|
241
271
|
def to_ttl(prefix = nil, ns = "http://example.org/")
|
242
272
|
unless ast.empty?
|
243
273
|
[
|
274
|
+
"@prefix dc: <http://purl.org/dc/terms/>.",
|
244
275
|
"@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>.",
|
245
276
|
"@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>.",
|
246
277
|
("@prefix #{prefix}: <#{ns}>." if prefix),
|
@@ -253,7 +284,21 @@ module EBNF
|
|
253
284
|
].compact
|
254
285
|
end.join("\n") +
|
255
286
|
|
256
|
-
ast.
|
287
|
+
ast.map(&:to_ttl).join("\n")
|
288
|
+
end
|
289
|
+
|
290
|
+
def dup
|
291
|
+
new_obj = super
|
292
|
+
new_obj.instance_variable_set(:@ast, @ast.dup)
|
293
|
+
new_obj
|
294
|
+
end
|
295
|
+
|
296
|
+
##
|
297
|
+
# Find a rule given a symbol
|
298
|
+
# @param [Symbol] sym
|
299
|
+
# @return [Rule]
|
300
|
+
def find_rule(sym)
|
301
|
+
(@find ||= {})[sym] ||= ast.detect {|r| r.sym == sym}
|
257
302
|
end
|
258
303
|
|
259
304
|
def depth
|
data/lib/ebnf/bnf.rb
CHANGED
@@ -17,32 +17,7 @@ module EBNF
|
|
17
17
|
new_ast += new_rules
|
18
18
|
end
|
19
19
|
|
20
|
-
|
21
|
-
to_rewrite = {}
|
22
|
-
new_ast.select {|r| r.terminal?}.each do |src_rule|
|
23
|
-
new_ast.select {|r| r.terminal?}.each do |dst_rule|
|
24
|
-
if src_rule.equivalent?(dst_rule) && src_rule != dst_rule
|
25
|
-
debug("make_bnf") {"equivalent rules: #{src_rule.inspect} and #{dst_rule.inspect}"}
|
26
|
-
(to_rewrite[src_rule] ||= []) << dst_rule
|
27
|
-
end
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
31
|
-
# Replace references to equivalent rules with canonical rule
|
32
|
-
to_rewrite.each do |src_rule, dst_rules|
|
33
|
-
dst_rules.each do |dst_rule|
|
34
|
-
new_ast.each do |mod_rule|
|
35
|
-
debug("make_bnf") {"rewrite #{mod_rule.inspect} from #{dst_rule.sym} to #{src_rule.sym}"}
|
36
|
-
mod_rule.rewrite(dst_rule, src_rule)
|
37
|
-
end
|
38
|
-
end
|
39
|
-
end
|
40
|
-
|
41
|
-
# AST now has just rewritten rules
|
42
|
-
compacted_ast = new_ast - to_rewrite.values.flatten.compact
|
43
|
-
|
44
|
-
# Sort AST by number
|
45
|
-
@ast = compacted_ast
|
20
|
+
@ast = new_ast
|
46
21
|
progress("make_bnf") {"End: #{@ast.length} rules"}
|
47
22
|
self
|
48
23
|
end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
# This file is automatically generated by ebnf version 2.0.0
|
2
|
+
# Derived from etc/ebnf.ebnf
|
3
|
+
module EBNFMeta
|
4
|
+
RULES = [
|
5
|
+
EBNF::Rule.new(:ebnf, "1", [:star, :_ebnf_1]).extend(EBNF::PEG::Rule),
|
6
|
+
EBNF::Rule.new(:_ebnf_1, "1.1", [:alt, :declaration, :rule]).extend(EBNF::PEG::Rule),
|
7
|
+
EBNF::Rule.new(:declaration, "2", [:alt, "@terminals", :pass]).extend(EBNF::PEG::Rule),
|
8
|
+
EBNF::Rule.new(:rule, "3", [:seq, :LHS, :expression]).extend(EBNF::PEG::Rule),
|
9
|
+
EBNF::Rule.new(:expression, "4", [:seq, :alt]).extend(EBNF::PEG::Rule),
|
10
|
+
EBNF::Rule.new(:alt, "5", [:seq, :seq, :_alt_1]).extend(EBNF::PEG::Rule),
|
11
|
+
EBNF::Rule.new(:_alt_1, "5.1", [:star, :_alt_2]).extend(EBNF::PEG::Rule),
|
12
|
+
EBNF::Rule.new(:_alt_2, "5.2", [:seq, "|", :seq]).extend(EBNF::PEG::Rule),
|
13
|
+
EBNF::Rule.new(:seq, "6", [:plus, :diff]).extend(EBNF::PEG::Rule),
|
14
|
+
EBNF::Rule.new(:diff, "7", [:seq, :postfix, :_diff_1]).extend(EBNF::PEG::Rule),
|
15
|
+
EBNF::Rule.new(:_diff_1, "7.1", [:opt, :_diff_2]).extend(EBNF::PEG::Rule),
|
16
|
+
EBNF::Rule.new(:_diff_2, "7.2", [:seq, "-", :postfix]).extend(EBNF::PEG::Rule),
|
17
|
+
EBNF::Rule.new(:postfix, "8", [:seq, :primary, :_postfix_1]).extend(EBNF::PEG::Rule),
|
18
|
+
EBNF::Rule.new(:_postfix_1, "8.1", [:opt, :POSTFIX]).extend(EBNF::PEG::Rule),
|
19
|
+
EBNF::Rule.new(:primary, "9", [:alt, :HEX, :SYMBOL, :O_RANGE, :RANGE, :STRING1, :STRING2, :_primary_1]).extend(EBNF::PEG::Rule),
|
20
|
+
EBNF::Rule.new(:_primary_1, "9.1", [:seq, "(", :expression, ")"]).extend(EBNF::PEG::Rule),
|
21
|
+
EBNF::Rule.new(:pass, "10", [:seq, "@pass", :expression]).extend(EBNF::PEG::Rule),
|
22
|
+
EBNF::Rule.new(:_terminals, nil, [:seq], kind: :terminals).extend(EBNF::PEG::Rule),
|
23
|
+
EBNF::Rule.new(:LHS, "11", [:seq, :_LHS_1, :SYMBOL, :_LHS_2, "::="], kind: :terminal).extend(EBNF::PEG::Rule),
|
24
|
+
EBNF::Rule.new(:_LHS_1, "11.1", [:opt, :_LHS_3], kind: :terminal).extend(EBNF::PEG::Rule),
|
25
|
+
EBNF::Rule.new(:_LHS_3, "11.3", [:seq, "[", :SYMBOL, "]", :_LHS_4], kind: :terminal).extend(EBNF::PEG::Rule),
|
26
|
+
EBNF::Rule.new(:_LHS_4, "11.4", [:plus, " "], kind: :terminal).extend(EBNF::PEG::Rule),
|
27
|
+
EBNF::Rule.new(:_LHS_2, "11.2", [:star, " "], kind: :terminal).extend(EBNF::PEG::Rule),
|
28
|
+
EBNF::Rule.new(:SYMBOL, "12", [:plus, :_SYMBOL_1], kind: :terminal).extend(EBNF::PEG::Rule),
|
29
|
+
EBNF::Rule.new(:_SYMBOL_1, "12.1", [:alt, :_SYMBOL_2, :_SYMBOL_3, :_SYMBOL_4, "_", "."], kind: :terminal).extend(EBNF::PEG::Rule),
|
30
|
+
EBNF::Rule.new(:_SYMBOL_2, "12.2", [:range, "a-z"], kind: :terminal).extend(EBNF::PEG::Rule),
|
31
|
+
EBNF::Rule.new(:_SYMBOL_3, "12.3", [:range, "A-Z"], kind: :terminal).extend(EBNF::PEG::Rule),
|
32
|
+
EBNF::Rule.new(:_SYMBOL_4, "12.4", [:range, "0-9"], kind: :terminal).extend(EBNF::PEG::Rule),
|
33
|
+
EBNF::Rule.new(:HEX, "13", [:seq, "#x", :_HEX_1], kind: :terminal).extend(EBNF::PEG::Rule),
|
34
|
+
EBNF::Rule.new(:_HEX_1, "13.1", [:plus, :_HEX_2], kind: :terminal).extend(EBNF::PEG::Rule),
|
35
|
+
EBNF::Rule.new(:_HEX_2, "13.2", [:alt, :_HEX_3, :_HEX_4, :_HEX_5], kind: :terminal).extend(EBNF::PEG::Rule),
|
36
|
+
EBNF::Rule.new(:_HEX_3, "13.3", [:range, "a-f"], kind: :terminal).extend(EBNF::PEG::Rule),
|
37
|
+
EBNF::Rule.new(:_HEX_4, "13.4", [:range, "A-F"], kind: :terminal).extend(EBNF::PEG::Rule),
|
38
|
+
EBNF::Rule.new(:_HEX_5, "13.5", [:range, "0-9"], kind: :terminal).extend(EBNF::PEG::Rule),
|
39
|
+
EBNF::Rule.new(:RANGE, "14", [:seq, "[", :_RANGE_1, :_RANGE_2, :_RANGE_3], kind: :terminal).extend(EBNF::PEG::Rule),
|
40
|
+
EBNF::Rule.new(:_RANGE_1, "14.1", [:plus, :_RANGE_4], kind: :terminal).extend(EBNF::PEG::Rule),
|
41
|
+
EBNF::Rule.new(:_RANGE_4, "14.4", [:alt, :_RANGE_5, :_RANGE_6, :R_CHAR, :HEX], kind: :terminal).extend(EBNF::PEG::Rule),
|
42
|
+
EBNF::Rule.new(:_RANGE_5, "14.5", [:seq, :R_CHAR, "-", :R_CHAR], kind: :terminal).extend(EBNF::PEG::Rule),
|
43
|
+
EBNF::Rule.new(:_RANGE_6, "14.6", [:seq, :HEX, "-", :HEX], kind: :terminal).extend(EBNF::PEG::Rule),
|
44
|
+
EBNF::Rule.new(:_RANGE_2, "14.2", [:opt, "-"], kind: :terminal).extend(EBNF::PEG::Rule),
|
45
|
+
EBNF::Rule.new(:_RANGE_3, "14.3", [:diff, "]", :LHS], kind: :terminal).extend(EBNF::PEG::Rule),
|
46
|
+
EBNF::Rule.new(:O_RANGE, "15", [:seq, "[^", :_O_RANGE_1, :_O_RANGE_2, "]"], kind: :terminal).extend(EBNF::PEG::Rule),
|
47
|
+
EBNF::Rule.new(:_O_RANGE_1, "15.1", [:plus, :_O_RANGE_3], kind: :terminal).extend(EBNF::PEG::Rule),
|
48
|
+
EBNF::Rule.new(:_O_RANGE_3, "15.3", [:alt, :_O_RANGE_4, :_O_RANGE_5, :R_CHAR, :HEX], kind: :terminal).extend(EBNF::PEG::Rule),
|
49
|
+
EBNF::Rule.new(:_O_RANGE_4, "15.4", [:seq, :R_CHAR, "-", :R_CHAR], kind: :terminal).extend(EBNF::PEG::Rule),
|
50
|
+
EBNF::Rule.new(:_O_RANGE_5, "15.5", [:seq, :HEX, "-", :HEX], kind: :terminal).extend(EBNF::PEG::Rule),
|
51
|
+
EBNF::Rule.new(:_O_RANGE_2, "15.2", [:opt, "-"], kind: :terminal).extend(EBNF::PEG::Rule),
|
52
|
+
EBNF::Rule.new(:STRING1, "16", [:seq, "\"", :_STRING1_1, "\""], kind: :terminal).extend(EBNF::PEG::Rule),
|
53
|
+
EBNF::Rule.new(:_STRING1_1, "16.1", [:star, :_STRING1_2], kind: :terminal).extend(EBNF::PEG::Rule),
|
54
|
+
EBNF::Rule.new(:_STRING1_2, "16.2", [:diff, :CHAR, "\""], kind: :terminal).extend(EBNF::PEG::Rule),
|
55
|
+
EBNF::Rule.new(:STRING2, "17", [:seq, "'", :_STRING2_1, "'"], kind: :terminal).extend(EBNF::PEG::Rule),
|
56
|
+
EBNF::Rule.new(:_STRING2_1, "17.1", [:star, :_STRING2_2], kind: :terminal).extend(EBNF::PEG::Rule),
|
57
|
+
EBNF::Rule.new(:_STRING2_2, "17.2", [:diff, :CHAR, "'"], kind: :terminal).extend(EBNF::PEG::Rule),
|
58
|
+
EBNF::Rule.new(:CHAR, "18", [:alt, :_CHAR_1, :_CHAR_2, :_CHAR_3, :_CHAR_4], kind: :terminal).extend(EBNF::PEG::Rule),
|
59
|
+
EBNF::Rule.new(:_CHAR_1, "18.1", [:range, "#x9#xA#xD"], kind: :terminal).extend(EBNF::PEG::Rule),
|
60
|
+
EBNF::Rule.new(:_CHAR_2, "18.2", [:range, "#x20-#xD7FF"], kind: :terminal).extend(EBNF::PEG::Rule),
|
61
|
+
EBNF::Rule.new(:_CHAR_3, "18.3", [:range, "#xE000-#xFFFD"], kind: :terminal).extend(EBNF::PEG::Rule),
|
62
|
+
EBNF::Rule.new(:_CHAR_4, "18.4", [:range, "#x10000-#x10FFFF"], kind: :terminal).extend(EBNF::PEG::Rule),
|
63
|
+
EBNF::Rule.new(:R_CHAR, "19", [:diff, :CHAR, :_R_CHAR_1], kind: :terminal).extend(EBNF::PEG::Rule),
|
64
|
+
EBNF::Rule.new(:_R_CHAR_1, "19.1", [:alt, "]", "-", :HEX], kind: :terminal).extend(EBNF::PEG::Rule),
|
65
|
+
EBNF::Rule.new(:POSTFIX, "20", [:range, "?*+"], kind: :terminal).extend(EBNF::PEG::Rule),
|
66
|
+
EBNF::Rule.new(:PASS, "21", [:alt, :_PASS_1, :_PASS_2, :_PASS_3, :_PASS_4], kind: :terminal).extend(EBNF::PEG::Rule),
|
67
|
+
EBNF::Rule.new(:_PASS_1, "21.1", [:range, "#x9#xA#xD#x20"], kind: :terminal).extend(EBNF::PEG::Rule),
|
68
|
+
EBNF::Rule.new(:_PASS_2, "21.2", [:seq, :_PASS_5, :_PASS_6], kind: :terminal).extend(EBNF::PEG::Rule),
|
69
|
+
EBNF::Rule.new(:_PASS_5, "21.5", [:alt, :_PASS_7, "//"], kind: :terminal).extend(EBNF::PEG::Rule),
|
70
|
+
EBNF::Rule.new(:_PASS_7, "21.7", [:diff, "#", "#x"], kind: :terminal).extend(EBNF::PEG::Rule),
|
71
|
+
EBNF::Rule.new(:_PASS_6, "21.6", [:star, :_PASS_8], kind: :terminal).extend(EBNF::PEG::Rule),
|
72
|
+
EBNF::Rule.new(:_PASS_8, "21.8", [:range, "^#xA#xD"], kind: :terminal).extend(EBNF::PEG::Rule),
|
73
|
+
EBNF::Rule.new(:_PASS_3, "21.3", [:seq, "/*", :_PASS_9, "*/"], kind: :terminal).extend(EBNF::PEG::Rule),
|
74
|
+
EBNF::Rule.new(:_PASS_9, "21.9", [:star, :_PASS_10], kind: :terminal).extend(EBNF::PEG::Rule),
|
75
|
+
EBNF::Rule.new(:_PASS_10, "21.10", [:alt, :_PASS_11, :_PASS_12], kind: :terminal).extend(EBNF::PEG::Rule),
|
76
|
+
EBNF::Rule.new(:_PASS_11, "21.11", [:opt, :_PASS_13], kind: :terminal).extend(EBNF::PEG::Rule),
|
77
|
+
EBNF::Rule.new(:_PASS_13, "21.13", [:seq, "*", :_PASS_14], kind: :terminal).extend(EBNF::PEG::Rule),
|
78
|
+
EBNF::Rule.new(:_PASS_14, "21.14", [:range, "^/"], kind: :terminal).extend(EBNF::PEG::Rule),
|
79
|
+
EBNF::Rule.new(:_PASS_12, "21.12", [:range, "^*"], kind: :terminal).extend(EBNF::PEG::Rule),
|
80
|
+
EBNF::Rule.new(:_PASS_4, "21.4", [:seq, "(*", :_PASS_15, "*)"], kind: :terminal).extend(EBNF::PEG::Rule),
|
81
|
+
EBNF::Rule.new(:_PASS_15, "21.15", [:star, :_PASS_16], kind: :terminal).extend(EBNF::PEG::Rule),
|
82
|
+
EBNF::Rule.new(:_PASS_16, "21.16", [:alt, :_PASS_17, :_PASS_18], kind: :terminal).extend(EBNF::PEG::Rule),
|
83
|
+
EBNF::Rule.new(:_PASS_17, "21.17", [:opt, :_PASS_19], kind: :terminal).extend(EBNF::PEG::Rule),
|
84
|
+
EBNF::Rule.new(:_PASS_19, "21.19", [:seq, "*", :_PASS_20], kind: :terminal).extend(EBNF::PEG::Rule),
|
85
|
+
EBNF::Rule.new(:_PASS_20, "21.20", [:range, "^)"], kind: :terminal).extend(EBNF::PEG::Rule),
|
86
|
+
EBNF::Rule.new(:_PASS_18, "21.18", [:range, "^*"], kind: :terminal).extend(EBNF::PEG::Rule),
|
87
|
+
EBNF::Rule.new(:_pass, nil, [:seq, :PASS], kind: :pass).extend(EBNF::PEG::Rule),
|
88
|
+
]
|
89
|
+
end
|
90
|
+
|
data/lib/ebnf/isoebnf.rb
ADDED
@@ -0,0 +1,229 @@
|
|
1
|
+
require_relative 'isoebnf/meta'
|
2
|
+
require 'logger'
|
3
|
+
|
4
|
+
# ISO EBNF parser
|
5
|
+
# Parses ISO EBNF into an array of {EBNF::Rule}.
|
6
|
+
module EBNF
|
7
|
+
class ISOEBNF
|
8
|
+
include EBNF::PEG::Parser
|
9
|
+
|
10
|
+
# The base for terminal-character, which omits "'", '"', and '?'.
|
11
|
+
# Could be more optimized, and one might quible
|
12
|
+
# with the overly-strictly defined character set,
|
13
|
+
# but it is correct.
|
14
|
+
TERMINAL_CHARACTER_BASE = %r{
|
15
|
+
[a-zA-Z0-9] | # letter | decimal digit
|
16
|
+
, | # concatenate symbol
|
17
|
+
= | # defining symbol
|
18
|
+
[\|\/!] | # definition separator symbol
|
19
|
+
\*\) | # end comment symbol
|
20
|
+
\) | # end group symbol
|
21
|
+
\] | # end option symbol
|
22
|
+
\} | # end repeat symbol
|
23
|
+
\- | # except symbol
|
24
|
+
#\' | # first quote symbol
|
25
|
+
\* | # repetition symbol
|
26
|
+
#\" | # second quote symbol
|
27
|
+
#\? | # special sequence symbol
|
28
|
+
\(\* | # start comment symbol
|
29
|
+
\( | # start group symbol
|
30
|
+
\[ | # start option symbol
|
31
|
+
\{ | # start repeat symbol
|
32
|
+
[;\.] | # terminator symbol
|
33
|
+
[:+_%@&$<>^\x20\x23\\`~] # other character
|
34
|
+
}x
|
35
|
+
|
36
|
+
TERMINAL_CHARACTER = %r{#{TERMINAL_CHARACTER_BASE}|['"\?]}
|
37
|
+
FIRST_TERMINAL_CHARACTER = %r{#{TERMINAL_CHARACTER_BASE}|["\?]}
|
38
|
+
SECOND_TERMINAL_CHARACTER = %r{#{TERMINAL_CHARACTER_BASE}|['\?]}
|
39
|
+
SPECIAL_SEQUENCE_CHARACTER = %r{#{TERMINAL_CHARACTER_BASE}|['"]}
|
40
|
+
|
41
|
+
# Abstract syntax tree from parse
|
42
|
+
#
|
43
|
+
# @return [Array<EBNF::Rule>]
|
44
|
+
attr_reader :ast
|
45
|
+
|
46
|
+
# `[14] integer ::= decimal_digit+`
|
47
|
+
terminal(:integer, /\d+/) do |value, prod|
|
48
|
+
value.to_i
|
49
|
+
end
|
50
|
+
|
51
|
+
# `[15] meta_identifier ::= letter meta_identifier_character*`
|
52
|
+
terminal(:meta_identifier, /[a-zA-Z][a-zA-Z0-9_]*/) do |value|
|
53
|
+
value.to_sym
|
54
|
+
end
|
55
|
+
|
56
|
+
# `[17] terminal_string ::= ("'" first_terminal_character+ "'")`
|
57
|
+
# ` | ('"' second_terminal_character+ '"')`
|
58
|
+
terminal(:terminal_string, /(?:'#{FIRST_TERMINAL_CHARACTER}+')|(?:"#{SECOND_TERMINAL_CHARACTER}+")/x) do |value|
|
59
|
+
value[1..-2]
|
60
|
+
end
|
61
|
+
|
62
|
+
# `[20] special_sequence ::= '?' special_sequence_character* '?'`
|
63
|
+
terminal(:special_sequence, /\?#{SPECIAL_SEQUENCE_CHARACTER}+\?/)
|
64
|
+
|
65
|
+
# `[22] terminal_character ::= [a-zA-Z0-9]`
|
66
|
+
# ` | [,=;*}#x2d?([{;]`
|
67
|
+
# ` | '*)'`
|
68
|
+
# ` | '(*'`
|
69
|
+
# ` | ']'`
|
70
|
+
# ` | other_character`
|
71
|
+
terminal(:terminal_character, TERMINAL_CHARACTER)
|
72
|
+
|
73
|
+
# `[25] empty ::= ''`
|
74
|
+
terminal(:empty, //)
|
75
|
+
|
76
|
+
# `[26] definition_separator_symbol ::= '|' | '/' | '!'`
|
77
|
+
terminal(:definition_separator_symbol, /[\|\/!]/)
|
78
|
+
|
79
|
+
# `[27] terminator_symbol ::= ';' | '.'`
|
80
|
+
terminal(:terminator_symbol, /[;\.]/)
|
81
|
+
|
82
|
+
# `[28] start_option_symbol ::= '['
|
83
|
+
terminal(:start_option_symbol, /\[|(?:\(\/)/)
|
84
|
+
|
85
|
+
# `[29] end_option_symbol ::= ']'`
|
86
|
+
terminal(:end_option_symbol, /\]/)
|
87
|
+
|
88
|
+
# `[30] start_repeat_symbol ::= '{' | '(:'`
|
89
|
+
terminal(:start_repeat_symbol, /{|\(:/)
|
90
|
+
|
91
|
+
# `[31] end_repeat_symbol ::= '}' | ':)'`
|
92
|
+
terminal(:end_repeat_symbol, /}|:\)/)
|
93
|
+
|
94
|
+
# ## Non-terminal productions
|
95
|
+
|
96
|
+
# `[2] syntax_rule ::= meta_identifier '=' definitions_list terminator_symbol`
|
97
|
+
production(:syntax_rule, clear_packrat: true) do |value, data, callback|
|
98
|
+
# value contains an expression.
|
99
|
+
# Invoke callback
|
100
|
+
sym = value[0][:meta_identifier]
|
101
|
+
definitions_list = value[2][:definitions_list]
|
102
|
+
callback.call(:rule, EBNF::Rule.new(sym.to_sym, nil, definitions_list))
|
103
|
+
nil
|
104
|
+
end
|
105
|
+
|
106
|
+
# Setting `as_hash: true` in the start production makes the value of the form of a hash, rather than an array of hashes.
|
107
|
+
#
|
108
|
+
# `[3] definitions_list ::= single_definition (definition_separator_symbol definitions_list)*`
|
109
|
+
start_production(:definitions_list, as_hash: true)
|
110
|
+
production(:definitions_list) do |value|
|
111
|
+
if value[:_definitions_list_1].length > 0
|
112
|
+
[:alt, value[:single_definition]] + value[:_definitions_list_1]
|
113
|
+
else
|
114
|
+
value[:single_definition]
|
115
|
+
end
|
116
|
+
end
|
117
|
+
production(:_definitions_list_1) do |value|
|
118
|
+
Array(value.first)
|
119
|
+
end
|
120
|
+
start_production(:_definitions_list_2, as_hash: true)
|
121
|
+
production(:_definitions_list_2) do |value|
|
122
|
+
if Array(value[:definitions_list]).first == :alt
|
123
|
+
value[:definitions_list][1..-1]
|
124
|
+
else
|
125
|
+
[value[:definitions_list]]
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
# `[4] single_definition ::= term (',' term)*`
|
130
|
+
start_production(:single_definition, as_hash: true)
|
131
|
+
production(:single_definition) do |value|
|
132
|
+
if value[:_single_definition_1].length > 0
|
133
|
+
[:seq, value[:term]] + value[:_single_definition_1]
|
134
|
+
else
|
135
|
+
value[:term]
|
136
|
+
end
|
137
|
+
end
|
138
|
+
production(:_single_definition_1) do |value|
|
139
|
+
value.map {|a1| a1.last[:term]}.compact # Get rid of '|'
|
140
|
+
end
|
141
|
+
|
142
|
+
# `[5] term ::= factor ('-' exception)?`
|
143
|
+
start_production(:term, as_hash: true)
|
144
|
+
production(:term) do |value|
|
145
|
+
if value[:_term_1]
|
146
|
+
[:diff, value[:factor], value[:_term_1]]
|
147
|
+
else
|
148
|
+
value[:factor]
|
149
|
+
end
|
150
|
+
end
|
151
|
+
production(:_term_1) do |value|
|
152
|
+
value.last[:exception] if value
|
153
|
+
end
|
154
|
+
|
155
|
+
# `[6] exception ::= factor`
|
156
|
+
start_production(:exception, as_hash: true)
|
157
|
+
production(:exception) do |value|
|
158
|
+
value[:factor]
|
159
|
+
end
|
160
|
+
|
161
|
+
# `[7] factor ::= (integer '*')? primary`
|
162
|
+
start_production(:factor, as_hash: true)
|
163
|
+
production(:factor) do |value|
|
164
|
+
if value[:_factor_1]
|
165
|
+
[:rept, value[:_factor_1], value[:_factor_1], value[:primary]]
|
166
|
+
else
|
167
|
+
value[:primary]
|
168
|
+
end
|
169
|
+
end
|
170
|
+
production(:_factor_2) do |value|
|
171
|
+
value.first[:integer]
|
172
|
+
end
|
173
|
+
|
174
|
+
# `[9] optional_sequence ::= start_option_symbol definitions_list end_option_symbol`
|
175
|
+
production(:optional_sequence) do |value|
|
176
|
+
[:opt, value[1][:definitions_list]]
|
177
|
+
end
|
178
|
+
|
179
|
+
# `[10] repeated_sequence ::= start_repeat_symbol definitions_list end_repeat_symbol`
|
180
|
+
production(:repeated_sequence) do |value|
|
181
|
+
[:star, value[1][:definitions_list]]
|
182
|
+
end
|
183
|
+
|
184
|
+
# `[11] grouped_sequence ::= '(' definitions_list ')'`
|
185
|
+
production(:grouped_sequence) do |value|
|
186
|
+
[:seq, value[1][:definitions_list]]
|
187
|
+
end
|
188
|
+
|
189
|
+
# ## Parser invocation.
|
190
|
+
# On start, yield ourselves if a block is given, otherwise, return this parser instance
|
191
|
+
#
|
192
|
+
# @param [#read, #to_s] input
|
193
|
+
# @param [Hash{Symbol => Object}] options
|
194
|
+
# @option options [Boolean] :level
|
195
|
+
# Trace level. 0(debug), 1(info), 2(warn), 3(error).
|
196
|
+
# @return [EBNFParser]
|
197
|
+
def initialize(input, **options, &block)
|
198
|
+
# If the `level` option is set, instantiate a logger for collecting trace information.
|
199
|
+
if options.has_key?(:level)
|
200
|
+
options[:logger] = Logger.new(STDERR)
|
201
|
+
options[:logger].level = options[:level]
|
202
|
+
options[:logger].formatter = lambda {|severity, datetime, progname, msg| "#{severity} #{msg}\n"}
|
203
|
+
end
|
204
|
+
|
205
|
+
# Read input, if necessary, which will be used in a Scanner.
|
206
|
+
@input = input.respond_to?(:read) ? input.read : input.to_s
|
207
|
+
|
208
|
+
parsing_terminals = false
|
209
|
+
@ast = []
|
210
|
+
parse(@input,
|
211
|
+
:syntax,
|
212
|
+
ISOEBNFMeta::RULES,
|
213
|
+
whitespace: %r{([\x09-\x0d\x20]|(?:\(\*(?:(?:\*[^\)])|[^*])*\*\)))+},
|
214
|
+
**options
|
215
|
+
) do |context, *data|
|
216
|
+
rule = case context
|
217
|
+
when :rule
|
218
|
+
# A rule which has already been turned into a `Rule` object.
|
219
|
+
rule = data.first
|
220
|
+
rule.kind = :terminal if parsing_terminals
|
221
|
+
rule
|
222
|
+
end
|
223
|
+
@ast << rule if rule
|
224
|
+
end
|
225
|
+
rescue EBNF::PEG::Parser::Error => e
|
226
|
+
raise SyntaxError, e.message
|
227
|
+
end
|
228
|
+
end
|
229
|
+
end
|