ebnf 1.1.3 → 2.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +221 -198
- data/UNLICENSE +1 -1
- data/VERSION +1 -1
- data/bin/ebnf +40 -21
- data/etc/abnf-core.ebnf +52 -0
- data/etc/abnf.abnf +121 -0
- data/etc/abnf.ebnf +124 -0
- data/etc/abnf.sxp +45 -0
- data/etc/doap.ttl +23 -15
- data/etc/ebnf.ebnf +21 -33
- data/etc/ebnf.html +171 -160
- data/etc/{ebnf.rb → ebnf.ll1.rb} +30 -107
- data/etc/ebnf.ll1.sxp +182 -183
- data/etc/ebnf.peg.rb +90 -0
- data/etc/ebnf.peg.sxp +84 -0
- data/etc/ebnf.sxp +40 -41
- data/etc/iso-ebnf.ebnf +140 -0
- data/etc/iso-ebnf.isoebnf +138 -0
- data/etc/iso-ebnf.sxp +65 -0
- data/etc/sparql.ebnf +4 -4
- data/etc/sparql.html +1603 -1751
- data/etc/sparql.ll1.sxp +7372 -7372
- data/etc/sparql.peg.rb +532 -0
- data/etc/sparql.peg.sxp +597 -0
- data/etc/sparql.sxp +363 -362
- data/etc/turtle.ebnf +3 -3
- data/etc/turtle.html +465 -517
- data/etc/{turtle.rb → turtle.ll1.rb} +3 -4
- data/etc/turtle.ll1.sxp +425 -425
- data/etc/turtle.peg.rb +182 -0
- data/etc/turtle.peg.sxp +199 -0
- data/etc/turtle.sxp +103 -101
- data/lib/ebnf.rb +7 -2
- data/lib/ebnf/abnf.rb +301 -0
- data/lib/ebnf/abnf/core.rb +23 -0
- data/lib/ebnf/abnf/meta.rb +111 -0
- data/lib/ebnf/base.rb +113 -69
- data/lib/ebnf/bnf.rb +1 -26
- data/lib/ebnf/ebnf/meta.rb +90 -0
- data/lib/ebnf/isoebnf.rb +229 -0
- data/lib/ebnf/isoebnf/meta.rb +75 -0
- data/lib/ebnf/ll1.rb +138 -6
- data/lib/ebnf/ll1/lexer.rb +37 -32
- data/lib/ebnf/ll1/parser.rb +113 -73
- data/lib/ebnf/ll1/scanner.rb +83 -51
- data/lib/ebnf/native.rb +320 -0
- data/lib/ebnf/parser.rb +285 -302
- data/lib/ebnf/peg.rb +39 -0
- data/lib/ebnf/peg/parser.rb +561 -0
- data/lib/ebnf/peg/rule.rb +250 -0
- data/lib/ebnf/rule.rb +443 -148
- data/lib/ebnf/terminals.rb +21 -0
- data/lib/ebnf/writer.rb +565 -83
- metadata +107 -29
- data/etc/sparql.rb +0 -45773
data/lib/ebnf/base.rb
CHANGED
@@ -2,7 +2,7 @@ require 'strscan'
|
|
2
2
|
|
3
3
|
# Extended Bakus-Nour Form (EBNF), being the W3C variation is
|
4
4
|
# originaly defined in the
|
5
|
-
# [W3C XML 1.0 Spec](
|
5
|
+
# [W3C XML 1.0 Spec](https://www.w3.org/TR/REC-xml/#sec-notation).
|
6
6
|
#
|
7
7
|
# This version attempts to be less strict than the strict definition
|
8
8
|
# to allow for coloquial variations (such as in the Turtle syntax).
|
@@ -12,8 +12,8 @@ require 'strscan'
|
|
12
12
|
#
|
13
13
|
# Comments include the content between '/*' and '*/'
|
14
14
|
#
|
15
|
-
# @see
|
16
|
-
# @see
|
15
|
+
# @see https://www.w3.org/2000/10/swap/grammar/ebnf2turtle.py
|
16
|
+
# @see https://www.w3.org/2000/10/swap/grammar/ebnf2bnf.n3
|
17
17
|
#
|
18
18
|
# Based on bnf2turtle by Dan Connolly.
|
19
19
|
#
|
@@ -36,7 +36,7 @@ require 'strscan'
|
|
36
36
|
# derived mechanically from the specification.
|
37
37
|
#
|
38
38
|
#
|
39
|
-
# [N3 design note]:
|
39
|
+
# [N3 design note]: https://www.w3.org/DesignIssues/Notation3
|
40
40
|
#
|
41
41
|
# Related Work
|
42
42
|
# ------------
|
@@ -59,31 +59,15 @@ require 'strscan'
|
|
59
59
|
# expression of the grammar in terms of the higher level EBNF
|
60
60
|
# constructs.
|
61
61
|
#
|
62
|
-
# [goal]:
|
63
|
-
# [n3p announcement]:
|
64
|
-
# [Yacker]:
|
65
|
-
# [SPARQL specification]:
|
66
|
-
# [Cwm Release 1.1.0rc1]:
|
67
|
-
# [bnf-rules.n3]:
|
62
|
+
# [goal]: https://www.w3.org/2002/02/mid/1086902566.21030.1479.camel@dirk;list=public-cwm-bugs
|
63
|
+
# [n3p announcement]: https://lists.w3.org/Archives/Public/public-cwm-talk/2004OctDec/0029.html
|
64
|
+
# [Yacker]: https://rubygems/02/26-modules/User/Yacker
|
65
|
+
# [SPARQL specification]: https://www.w3.org/TR/rdf-sparql-query/
|
66
|
+
# [Cwm Release 1.1.0rc1]: https://lists.w3.org/Archives/Public/public-cwm-announce/2005JulSep/0000.html
|
67
|
+
# [bnf-rules.n3]: https://www.w3.org/2000/10/swap/grammar/bnf-rules.n3
|
68
68
|
#
|
69
|
-
#
|
70
|
-
#
|
71
|
-
#
|
72
|
-
# The yacker output also has the terminals compiled to elaborate regular
|
73
|
-
# expressions. The best strategy for dealing with lexical tokens is not
|
74
|
-
# yet clear. Many tokens in SPARQL are case insensitive; this is not yet
|
75
|
-
# captured formally.
|
76
|
-
#
|
77
|
-
# The schema for the EBNF vocabulary used here (``g:seq``, ``g:alt``, ...)
|
78
|
-
# is not yet published; it should be aligned with [swap/grammar/bnf][]
|
79
|
-
# and the [bnf2html.n3][] rules (and/or the style of linked XHTML grammar
|
80
|
-
# in the SPARQL and XML specificiations).
|
81
|
-
#
|
82
|
-
# It would be interesting to corroborate the claim in the SPARQL spec
|
83
|
-
# that the grammar is LL(1) with a mechanical proof based on N3 rules.
|
84
|
-
#
|
85
|
-
# [swap/grammar/bnf]: http://www.w3.org/2000/10/swap/grammar/bnf
|
86
|
-
# [bnf2html.n3]: http://www.w3.org/2000/10/swap/grammar/bnf2html.n3
|
69
|
+
# [swap/grammar/bnf]: https://www.w3.org/2000/10/swap/grammar/bnf
|
70
|
+
# [bnf2html.n3]: https://www.w3.org/2000/10/swap/grammar/bnf2html.n3
|
87
71
|
#
|
88
72
|
# Background
|
89
73
|
# ----------
|
@@ -93,14 +77,15 @@ require 'strscan'
|
|
93
77
|
# of N3 that maps directly to (and from) the standard XML syntax for
|
94
78
|
# RDF.
|
95
79
|
#
|
96
|
-
# [N3 Primer]:
|
80
|
+
# [N3 Primer]: https://www.w3.org/2000/10/swap/Primer.html
|
97
81
|
#
|
98
82
|
# @author Gregg Kellogg
|
99
83
|
module EBNF
|
100
84
|
class Base
|
101
85
|
include BNF
|
102
86
|
include LL1
|
103
|
-
include
|
87
|
+
include Native
|
88
|
+
include PEG
|
104
89
|
|
105
90
|
# Abstract syntax tree from parse
|
106
91
|
#
|
@@ -116,24 +101,33 @@ module EBNF
|
|
116
101
|
# in S-Expressions (similar to SPARQL SSE)
|
117
102
|
#
|
118
103
|
# @param [#read, #to_s] input
|
104
|
+
# @param [Symbol] format (:ebnf)
|
105
|
+
# Format of input, one of `:abnf`, `:ebnf`, `:isoebnf`, `:isoebnf`, `:native`, or `:sxp`.
|
106
|
+
# Use `:native` for the native EBNF parser, rather than the PEG parser.
|
119
107
|
# @param [Hash{Symbol => Object}] options
|
120
|
-
# @param [Symbol] :format (:ebnf)
|
121
|
-
# Format of input, one of :ebnf, or :sxp
|
122
108
|
# @option options [Boolean, Array] :debug
|
123
109
|
# Output debug information to an array or $stdout.
|
110
|
+
# @option options [Boolean, Array] :validate
|
111
|
+
# Validate resulting grammar.
|
124
112
|
def initialize(input, format: :ebnf, **options)
|
125
113
|
@options = options.dup
|
126
114
|
@lineno, @depth, @errors = 1, 0, []
|
127
|
-
terminal = false
|
128
115
|
@ast = []
|
129
116
|
|
130
117
|
input = input.respond_to?(:read) ? input.read : input.to_s
|
131
118
|
|
132
119
|
case format
|
133
|
-
when :
|
134
|
-
|
135
|
-
@ast =
|
120
|
+
when :abnf
|
121
|
+
abnf = ABNF.new(input, **options)
|
122
|
+
@ast = abnf.ast
|
136
123
|
when :ebnf
|
124
|
+
ebnf = Parser.new(input, **options)
|
125
|
+
@ast = ebnf.ast
|
126
|
+
when :isoebnf
|
127
|
+
iso = ISOEBNF.new(input, **options)
|
128
|
+
@ast = iso.ast
|
129
|
+
when :native
|
130
|
+
terminals = false
|
137
131
|
scanner = StringScanner.new(input)
|
138
132
|
|
139
133
|
eachRule(scanner) do |r|
|
@@ -141,7 +135,9 @@ module EBNF
|
|
141
135
|
case r
|
142
136
|
when /^@terminals/
|
143
137
|
# Switch mode to parsing terminals
|
144
|
-
|
138
|
+
terminals = true
|
139
|
+
rule = Rule.new(nil, nil, nil, kind: :terminals, ebnf: self)
|
140
|
+
@ast << rule
|
145
141
|
when /^@pass\s*(.*)$/m
|
146
142
|
expr = expression($1).first
|
147
143
|
rule = Rule.new(nil, nil, expr, kind: :pass, ebnf: self)
|
@@ -150,14 +146,49 @@ module EBNF
|
|
150
146
|
else
|
151
147
|
rule = depth {ruleParts(r)}
|
152
148
|
|
153
|
-
rule.kind = :terminal if
|
149
|
+
rule.kind = :terminal if terminals # Override after we've parsed @terminals
|
154
150
|
rule.orig = r
|
155
151
|
@ast << rule
|
156
152
|
end
|
157
153
|
end
|
154
|
+
when :sxp
|
155
|
+
require 'sxp' unless defined?(SXP)
|
156
|
+
@ast = SXP::Reader::Basic.read(input).map {|e| Rule.from_sxp(e)}
|
158
157
|
else
|
159
158
|
raise "unknown input format #{format.inspect}"
|
160
159
|
end
|
160
|
+
|
161
|
+
validate! if @options[:validate]
|
162
|
+
end
|
163
|
+
|
164
|
+
##
|
165
|
+
# Validate the grammar.
|
166
|
+
#
|
167
|
+
# Makes sure that rules reference either strings or other defined rules.
|
168
|
+
#
|
169
|
+
# @raise [RangeError]
|
170
|
+
def validate!
|
171
|
+
ast.each do |rule|
|
172
|
+
begin
|
173
|
+
rule.validate!(@ast)
|
174
|
+
rescue SyntaxError => e
|
175
|
+
error("In rule #{rule.sym}: #{e.message}")
|
176
|
+
end
|
177
|
+
end
|
178
|
+
raise SyntaxError, errors.join("\n") unless errors.empty?
|
179
|
+
end
|
180
|
+
|
181
|
+
##
|
182
|
+
# Is the grammar valid?
|
183
|
+
#
|
184
|
+
# Uses `#validate!` and catches `RangeError`
|
185
|
+
#
|
186
|
+
# @return [Boolean]
|
187
|
+
def valid?
|
188
|
+
validate!
|
189
|
+
true
|
190
|
+
rescue SyntaxError
|
191
|
+
false
|
161
192
|
end
|
162
193
|
|
163
194
|
# Iterate over each rule or terminal, except empty
|
@@ -173,64 +204,62 @@ module EBNF
|
|
173
204
|
# @return [String]
|
174
205
|
def to_sxp
|
175
206
|
require 'sxp' unless defined?(SXP)
|
176
|
-
SXP::Generator.string(ast.
|
207
|
+
SXP::Generator.string(ast.map(&:for_sxp))
|
177
208
|
end
|
178
209
|
|
179
210
|
##
|
180
211
|
# Output formatted EBNF
|
212
|
+
#
|
213
|
+
# @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
|
181
214
|
# @return [String]
|
182
|
-
def to_s
|
183
|
-
Writer.string(*ast)
|
215
|
+
def to_s(format: :ebnf)
|
216
|
+
Writer.string(*ast, format: format)
|
184
217
|
end
|
185
218
|
|
186
219
|
##
|
187
220
|
# Output formatted EBNF as HTML
|
221
|
+
#
|
222
|
+
# @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
|
188
223
|
# @return [String]
|
189
|
-
def to_html
|
190
|
-
Writer.html(*ast)
|
224
|
+
def to_html(format: :ebnf)
|
225
|
+
Writer.html(*ast, format: format)
|
191
226
|
end
|
192
227
|
|
193
228
|
##
|
194
229
|
# Output Ruby parser files
|
195
230
|
#
|
196
231
|
# @param [IO, StringIO] output
|
197
|
-
# @param [String]
|
198
|
-
# @param [String]
|
199
|
-
def to_ruby(output = $stdout, grammarFile: nil, mod_name: '
|
232
|
+
# @param [String] grammarFile
|
233
|
+
# @param [String] mod_name ('Meta')
|
234
|
+
def to_ruby(output = $stdout, grammarFile: nil, mod_name: 'Meta', **options)
|
200
235
|
unless output == $stdout
|
201
|
-
output.puts "# This file is automatically generated by #{
|
202
|
-
output.puts "#
|
236
|
+
output.puts "# This file is automatically generated by ebnf version #{EBNF::VERSION}"
|
237
|
+
output.puts "# Derived from #{grammarFile}" if grammarFile
|
203
238
|
unless self.errors.empty?
|
204
|
-
output.puts "# Note,
|
239
|
+
output.puts "# Note, grammar has errors, may need to be resolved manually:"
|
205
240
|
#output.puts "# #{pp.conflicts.map{|c| c.join("\n# ")}.join("\n# ")}"
|
206
241
|
end
|
207
242
|
output.puts "module #{mod_name}"
|
208
|
-
output.puts " START = #{self.start.inspect}"
|
209
|
-
|
243
|
+
output.puts " START = #{self.start.inspect}\n" if self.start
|
244
|
+
end
|
245
|
+
|
246
|
+
# Either output LL(1) BRANCH tables or rules for PEG parsing
|
247
|
+
if ast.first.first
|
248
|
+
to_ruby_ll1(output)
|
249
|
+
else
|
250
|
+
to_ruby_peg(output)
|
210
251
|
end
|
211
|
-
self.outputTable(output, 'BRANCH', self.branch, 1)
|
212
|
-
self.outputTable(output, 'TERMINALS', self.terminals, 1)
|
213
|
-
self.outputTable(output, 'FIRST', self.first, 1)
|
214
|
-
self.outputTable(output, 'FOLLOW', self.follow, 1)
|
215
|
-
self.outputTable(output, 'CLEANUP', self.cleanup, 1)
|
216
|
-
self.outputTable(output, 'PASS', [self.pass], 1) if self.pass
|
217
252
|
unless output == $stdout
|
218
253
|
output.puts "end"
|
219
254
|
end
|
220
255
|
end
|
221
256
|
|
222
|
-
def dup
|
223
|
-
new_obj = super
|
224
|
-
new_obj.instance_variable_set(:@ast, @ast.dup)
|
225
|
-
new_obj
|
226
|
-
end
|
227
|
-
|
228
257
|
##
|
229
|
-
#
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
258
|
+
# Renumber, rule identifiers
|
259
|
+
def renumber!
|
260
|
+
ast.each_with_index do |rule, index|
|
261
|
+
rule.id = (index + 1).to_s
|
262
|
+
end
|
234
263
|
end
|
235
264
|
|
236
265
|
##
|
@@ -241,6 +270,7 @@ module EBNF
|
|
241
270
|
def to_ttl(prefix = nil, ns = "http://example.org/")
|
242
271
|
unless ast.empty?
|
243
272
|
[
|
273
|
+
"@prefix dc: <http://purl.org/dc/terms/>.",
|
244
274
|
"@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>.",
|
245
275
|
"@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>.",
|
246
276
|
("@prefix #{prefix}: <#{ns}>." if prefix),
|
@@ -253,7 +283,21 @@ module EBNF
|
|
253
283
|
].compact
|
254
284
|
end.join("\n") +
|
255
285
|
|
256
|
-
ast.
|
286
|
+
ast.map(&:to_ttl).join("\n")
|
287
|
+
end
|
288
|
+
|
289
|
+
def dup
|
290
|
+
new_obj = super
|
291
|
+
new_obj.instance_variable_set(:@ast, @ast.dup)
|
292
|
+
new_obj
|
293
|
+
end
|
294
|
+
|
295
|
+
##
|
296
|
+
# Find a rule given a symbol
|
297
|
+
# @param [Symbol] sym
|
298
|
+
# @return [Rule]
|
299
|
+
def find_rule(sym)
|
300
|
+
(@find ||= {})[sym] ||= ast.detect {|r| r.sym == sym}
|
257
301
|
end
|
258
302
|
|
259
303
|
def depth
|
data/lib/ebnf/bnf.rb
CHANGED
@@ -17,32 +17,7 @@ module EBNF
|
|
17
17
|
new_ast += new_rules
|
18
18
|
end
|
19
19
|
|
20
|
-
|
21
|
-
to_rewrite = {}
|
22
|
-
new_ast.select {|r| r.terminal?}.each do |src_rule|
|
23
|
-
new_ast.select {|r| r.terminal?}.each do |dst_rule|
|
24
|
-
if src_rule.equivalent?(dst_rule) && src_rule != dst_rule
|
25
|
-
debug("make_bnf") {"equivalent rules: #{src_rule.inspect} and #{dst_rule.inspect}"}
|
26
|
-
(to_rewrite[src_rule] ||= []) << dst_rule
|
27
|
-
end
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
31
|
-
# Replace references to equivalent rules with canonical rule
|
32
|
-
to_rewrite.each do |src_rule, dst_rules|
|
33
|
-
dst_rules.each do |dst_rule|
|
34
|
-
new_ast.each do |mod_rule|
|
35
|
-
debug("make_bnf") {"rewrite #{mod_rule.inspect} from #{dst_rule.sym} to #{src_rule.sym}"}
|
36
|
-
mod_rule.rewrite(dst_rule, src_rule)
|
37
|
-
end
|
38
|
-
end
|
39
|
-
end
|
40
|
-
|
41
|
-
# AST now has just rewritten rules
|
42
|
-
compacted_ast = new_ast - to_rewrite.values.flatten.compact
|
43
|
-
|
44
|
-
# Sort AST by number
|
45
|
-
@ast = compacted_ast
|
20
|
+
@ast = new_ast
|
46
21
|
progress("make_bnf") {"End: #{@ast.length} rules"}
|
47
22
|
self
|
48
23
|
end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
# This file is automatically generated by ebnf version 2.0.0
|
2
|
+
# Derived from etc/ebnf.ebnf
|
3
|
+
module EBNFMeta
|
4
|
+
RULES = [
|
5
|
+
EBNF::Rule.new(:ebnf, "1", [:star, :_ebnf_1]).extend(EBNF::PEG::Rule),
|
6
|
+
EBNF::Rule.new(:_ebnf_1, "1.1", [:alt, :declaration, :rule]).extend(EBNF::PEG::Rule),
|
7
|
+
EBNF::Rule.new(:declaration, "2", [:alt, "@terminals", :pass]).extend(EBNF::PEG::Rule),
|
8
|
+
EBNF::Rule.new(:rule, "3", [:seq, :LHS, :expression]).extend(EBNF::PEG::Rule),
|
9
|
+
EBNF::Rule.new(:expression, "4", [:seq, :alt]).extend(EBNF::PEG::Rule),
|
10
|
+
EBNF::Rule.new(:alt, "5", [:seq, :seq, :_alt_1]).extend(EBNF::PEG::Rule),
|
11
|
+
EBNF::Rule.new(:_alt_1, "5.1", [:star, :_alt_2]).extend(EBNF::PEG::Rule),
|
12
|
+
EBNF::Rule.new(:_alt_2, "5.2", [:seq, "|", :seq]).extend(EBNF::PEG::Rule),
|
13
|
+
EBNF::Rule.new(:seq, "6", [:plus, :diff]).extend(EBNF::PEG::Rule),
|
14
|
+
EBNF::Rule.new(:diff, "7", [:seq, :postfix, :_diff_1]).extend(EBNF::PEG::Rule),
|
15
|
+
EBNF::Rule.new(:_diff_1, "7.1", [:opt, :_diff_2]).extend(EBNF::PEG::Rule),
|
16
|
+
EBNF::Rule.new(:_diff_2, "7.2", [:seq, "-", :postfix]).extend(EBNF::PEG::Rule),
|
17
|
+
EBNF::Rule.new(:postfix, "8", [:seq, :primary, :_postfix_1]).extend(EBNF::PEG::Rule),
|
18
|
+
EBNF::Rule.new(:_postfix_1, "8.1", [:opt, :POSTFIX]).extend(EBNF::PEG::Rule),
|
19
|
+
EBNF::Rule.new(:primary, "9", [:alt, :HEX, :SYMBOL, :O_RANGE, :RANGE, :STRING1, :STRING2, :_primary_1]).extend(EBNF::PEG::Rule),
|
20
|
+
EBNF::Rule.new(:_primary_1, "9.1", [:seq, "(", :expression, ")"]).extend(EBNF::PEG::Rule),
|
21
|
+
EBNF::Rule.new(:pass, "10", [:seq, "@pass", :expression]).extend(EBNF::PEG::Rule),
|
22
|
+
EBNF::Rule.new(:_terminals, nil, [:seq], kind: :terminals).extend(EBNF::PEG::Rule),
|
23
|
+
EBNF::Rule.new(:LHS, "11", [:seq, :_LHS_1, :SYMBOL, :_LHS_2, "::="], kind: :terminal).extend(EBNF::PEG::Rule),
|
24
|
+
EBNF::Rule.new(:_LHS_1, "11.1", [:opt, :_LHS_3], kind: :terminal).extend(EBNF::PEG::Rule),
|
25
|
+
EBNF::Rule.new(:_LHS_3, "11.3", [:seq, "[", :SYMBOL, "]", :_LHS_4], kind: :terminal).extend(EBNF::PEG::Rule),
|
26
|
+
EBNF::Rule.new(:_LHS_4, "11.4", [:plus, " "], kind: :terminal).extend(EBNF::PEG::Rule),
|
27
|
+
EBNF::Rule.new(:_LHS_2, "11.2", [:star, " "], kind: :terminal).extend(EBNF::PEG::Rule),
|
28
|
+
EBNF::Rule.new(:SYMBOL, "12", [:plus, :_SYMBOL_1], kind: :terminal).extend(EBNF::PEG::Rule),
|
29
|
+
EBNF::Rule.new(:_SYMBOL_1, "12.1", [:alt, :_SYMBOL_2, :_SYMBOL_3, :_SYMBOL_4, "_", "."], kind: :terminal).extend(EBNF::PEG::Rule),
|
30
|
+
EBNF::Rule.new(:_SYMBOL_2, "12.2", [:range, "a-z"], kind: :terminal).extend(EBNF::PEG::Rule),
|
31
|
+
EBNF::Rule.new(:_SYMBOL_3, "12.3", [:range, "A-Z"], kind: :terminal).extend(EBNF::PEG::Rule),
|
32
|
+
EBNF::Rule.new(:_SYMBOL_4, "12.4", [:range, "0-9"], kind: :terminal).extend(EBNF::PEG::Rule),
|
33
|
+
EBNF::Rule.new(:HEX, "13", [:seq, "#x", :_HEX_1], kind: :terminal).extend(EBNF::PEG::Rule),
|
34
|
+
EBNF::Rule.new(:_HEX_1, "13.1", [:plus, :_HEX_2], kind: :terminal).extend(EBNF::PEG::Rule),
|
35
|
+
EBNF::Rule.new(:_HEX_2, "13.2", [:alt, :_HEX_3, :_HEX_4, :_HEX_5], kind: :terminal).extend(EBNF::PEG::Rule),
|
36
|
+
EBNF::Rule.new(:_HEX_3, "13.3", [:range, "a-f"], kind: :terminal).extend(EBNF::PEG::Rule),
|
37
|
+
EBNF::Rule.new(:_HEX_4, "13.4", [:range, "A-F"], kind: :terminal).extend(EBNF::PEG::Rule),
|
38
|
+
EBNF::Rule.new(:_HEX_5, "13.5", [:range, "0-9"], kind: :terminal).extend(EBNF::PEG::Rule),
|
39
|
+
EBNF::Rule.new(:RANGE, "14", [:seq, "[", :_RANGE_1, :_RANGE_2, :_RANGE_3], kind: :terminal).extend(EBNF::PEG::Rule),
|
40
|
+
EBNF::Rule.new(:_RANGE_1, "14.1", [:plus, :_RANGE_4], kind: :terminal).extend(EBNF::PEG::Rule),
|
41
|
+
EBNF::Rule.new(:_RANGE_4, "14.4", [:alt, :_RANGE_5, :_RANGE_6, :R_CHAR, :HEX], kind: :terminal).extend(EBNF::PEG::Rule),
|
42
|
+
EBNF::Rule.new(:_RANGE_5, "14.5", [:seq, :R_CHAR, "-", :R_CHAR], kind: :terminal).extend(EBNF::PEG::Rule),
|
43
|
+
EBNF::Rule.new(:_RANGE_6, "14.6", [:seq, :HEX, "-", :HEX], kind: :terminal).extend(EBNF::PEG::Rule),
|
44
|
+
EBNF::Rule.new(:_RANGE_2, "14.2", [:opt, "-"], kind: :terminal).extend(EBNF::PEG::Rule),
|
45
|
+
EBNF::Rule.new(:_RANGE_3, "14.3", [:diff, "]", :LHS], kind: :terminal).extend(EBNF::PEG::Rule),
|
46
|
+
EBNF::Rule.new(:O_RANGE, "15", [:seq, "[^", :_O_RANGE_1, :_O_RANGE_2, "]"], kind: :terminal).extend(EBNF::PEG::Rule),
|
47
|
+
EBNF::Rule.new(:_O_RANGE_1, "15.1", [:plus, :_O_RANGE_3], kind: :terminal).extend(EBNF::PEG::Rule),
|
48
|
+
EBNF::Rule.new(:_O_RANGE_3, "15.3", [:alt, :_O_RANGE_4, :_O_RANGE_5, :R_CHAR, :HEX], kind: :terminal).extend(EBNF::PEG::Rule),
|
49
|
+
EBNF::Rule.new(:_O_RANGE_4, "15.4", [:seq, :R_CHAR, "-", :R_CHAR], kind: :terminal).extend(EBNF::PEG::Rule),
|
50
|
+
EBNF::Rule.new(:_O_RANGE_5, "15.5", [:seq, :HEX, "-", :HEX], kind: :terminal).extend(EBNF::PEG::Rule),
|
51
|
+
EBNF::Rule.new(:_O_RANGE_2, "15.2", [:opt, "-"], kind: :terminal).extend(EBNF::PEG::Rule),
|
52
|
+
EBNF::Rule.new(:STRING1, "16", [:seq, "\"", :_STRING1_1, "\""], kind: :terminal).extend(EBNF::PEG::Rule),
|
53
|
+
EBNF::Rule.new(:_STRING1_1, "16.1", [:star, :_STRING1_2], kind: :terminal).extend(EBNF::PEG::Rule),
|
54
|
+
EBNF::Rule.new(:_STRING1_2, "16.2", [:diff, :CHAR, "\""], kind: :terminal).extend(EBNF::PEG::Rule),
|
55
|
+
EBNF::Rule.new(:STRING2, "17", [:seq, "'", :_STRING2_1, "'"], kind: :terminal).extend(EBNF::PEG::Rule),
|
56
|
+
EBNF::Rule.new(:_STRING2_1, "17.1", [:star, :_STRING2_2], kind: :terminal).extend(EBNF::PEG::Rule),
|
57
|
+
EBNF::Rule.new(:_STRING2_2, "17.2", [:diff, :CHAR, "'"], kind: :terminal).extend(EBNF::PEG::Rule),
|
58
|
+
EBNF::Rule.new(:CHAR, "18", [:alt, :_CHAR_1, :_CHAR_2, :_CHAR_3, :_CHAR_4], kind: :terminal).extend(EBNF::PEG::Rule),
|
59
|
+
EBNF::Rule.new(:_CHAR_1, "18.1", [:range, "#x9#xA#xD"], kind: :terminal).extend(EBNF::PEG::Rule),
|
60
|
+
EBNF::Rule.new(:_CHAR_2, "18.2", [:range, "#x20-#xD7FF"], kind: :terminal).extend(EBNF::PEG::Rule),
|
61
|
+
EBNF::Rule.new(:_CHAR_3, "18.3", [:range, "#xE000-#xFFFD"], kind: :terminal).extend(EBNF::PEG::Rule),
|
62
|
+
EBNF::Rule.new(:_CHAR_4, "18.4", [:range, "#x10000-#x10FFFF"], kind: :terminal).extend(EBNF::PEG::Rule),
|
63
|
+
EBNF::Rule.new(:R_CHAR, "19", [:diff, :CHAR, :_R_CHAR_1], kind: :terminal).extend(EBNF::PEG::Rule),
|
64
|
+
EBNF::Rule.new(:_R_CHAR_1, "19.1", [:alt, "]", "-", :HEX], kind: :terminal).extend(EBNF::PEG::Rule),
|
65
|
+
EBNF::Rule.new(:POSTFIX, "20", [:range, "?*+"], kind: :terminal).extend(EBNF::PEG::Rule),
|
66
|
+
EBNF::Rule.new(:PASS, "21", [:alt, :_PASS_1, :_PASS_2, :_PASS_3, :_PASS_4], kind: :terminal).extend(EBNF::PEG::Rule),
|
67
|
+
EBNF::Rule.new(:_PASS_1, "21.1", [:range, "#x9#xA#xD#x20"], kind: :terminal).extend(EBNF::PEG::Rule),
|
68
|
+
EBNF::Rule.new(:_PASS_2, "21.2", [:seq, :_PASS_5, :_PASS_6], kind: :terminal).extend(EBNF::PEG::Rule),
|
69
|
+
EBNF::Rule.new(:_PASS_5, "21.5", [:alt, :_PASS_7, "//"], kind: :terminal).extend(EBNF::PEG::Rule),
|
70
|
+
EBNF::Rule.new(:_PASS_7, "21.7", [:diff, "#", "#x"], kind: :terminal).extend(EBNF::PEG::Rule),
|
71
|
+
EBNF::Rule.new(:_PASS_6, "21.6", [:star, :_PASS_8], kind: :terminal).extend(EBNF::PEG::Rule),
|
72
|
+
EBNF::Rule.new(:_PASS_8, "21.8", [:range, "^#xA#xD"], kind: :terminal).extend(EBNF::PEG::Rule),
|
73
|
+
EBNF::Rule.new(:_PASS_3, "21.3", [:seq, "/*", :_PASS_9, "*/"], kind: :terminal).extend(EBNF::PEG::Rule),
|
74
|
+
EBNF::Rule.new(:_PASS_9, "21.9", [:star, :_PASS_10], kind: :terminal).extend(EBNF::PEG::Rule),
|
75
|
+
EBNF::Rule.new(:_PASS_10, "21.10", [:alt, :_PASS_11, :_PASS_12], kind: :terminal).extend(EBNF::PEG::Rule),
|
76
|
+
EBNF::Rule.new(:_PASS_11, "21.11", [:opt, :_PASS_13], kind: :terminal).extend(EBNF::PEG::Rule),
|
77
|
+
EBNF::Rule.new(:_PASS_13, "21.13", [:seq, "*", :_PASS_14], kind: :terminal).extend(EBNF::PEG::Rule),
|
78
|
+
EBNF::Rule.new(:_PASS_14, "21.14", [:range, "^/"], kind: :terminal).extend(EBNF::PEG::Rule),
|
79
|
+
EBNF::Rule.new(:_PASS_12, "21.12", [:range, "^*"], kind: :terminal).extend(EBNF::PEG::Rule),
|
80
|
+
EBNF::Rule.new(:_PASS_4, "21.4", [:seq, "(*", :_PASS_15, "*)"], kind: :terminal).extend(EBNF::PEG::Rule),
|
81
|
+
EBNF::Rule.new(:_PASS_15, "21.15", [:star, :_PASS_16], kind: :terminal).extend(EBNF::PEG::Rule),
|
82
|
+
EBNF::Rule.new(:_PASS_16, "21.16", [:alt, :_PASS_17, :_PASS_18], kind: :terminal).extend(EBNF::PEG::Rule),
|
83
|
+
EBNF::Rule.new(:_PASS_17, "21.17", [:opt, :_PASS_19], kind: :terminal).extend(EBNF::PEG::Rule),
|
84
|
+
EBNF::Rule.new(:_PASS_19, "21.19", [:seq, "*", :_PASS_20], kind: :terminal).extend(EBNF::PEG::Rule),
|
85
|
+
EBNF::Rule.new(:_PASS_20, "21.20", [:range, "^)"], kind: :terminal).extend(EBNF::PEG::Rule),
|
86
|
+
EBNF::Rule.new(:_PASS_18, "21.18", [:range, "^*"], kind: :terminal).extend(EBNF::PEG::Rule),
|
87
|
+
EBNF::Rule.new(:_pass, nil, [:seq, :PASS], kind: :pass).extend(EBNF::PEG::Rule),
|
88
|
+
]
|
89
|
+
end
|
90
|
+
|
data/lib/ebnf/isoebnf.rb
ADDED
@@ -0,0 +1,229 @@
|
|
1
|
+
require_relative 'isoebnf/meta'
|
2
|
+
require 'logger'
|
3
|
+
|
4
|
+
# ISO EBNF parser
|
5
|
+
# Parses ISO EBNF into an array of {EBNF::Rule}.
|
6
|
+
module EBNF
|
7
|
+
class ISOEBNF
|
8
|
+
include EBNF::PEG::Parser
|
9
|
+
|
10
|
+
# The base for terminal-character, which omits "'", '"', and '?'.
|
11
|
+
# Could be more optimized, and one might quible
|
12
|
+
# with the overly-strictly defined character set,
|
13
|
+
# but it is correct.
|
14
|
+
TERMINAL_CHARACTER_BASE = %r{
|
15
|
+
[a-zA-Z0-9] | # letter | decimal digit
|
16
|
+
, | # concatenate symbol
|
17
|
+
= | # defining symbol
|
18
|
+
[\|\/!] | # definition separator symbol
|
19
|
+
\*\) | # end comment symbol
|
20
|
+
\) | # end group symbol
|
21
|
+
\] | # end option symbol
|
22
|
+
\} | # end repeat symbol
|
23
|
+
\- | # except symbol
|
24
|
+
#\' | # first quote symbol
|
25
|
+
\* | # repetition symbol
|
26
|
+
#\" | # second quote symbol
|
27
|
+
#\? | # special sequence symbol
|
28
|
+
\(\* | # start comment symbol
|
29
|
+
\( | # start group symbol
|
30
|
+
\[ | # start option symbol
|
31
|
+
\{ | # start repeat symbol
|
32
|
+
[;\.] | # terminator symbol
|
33
|
+
[:+_%@&$<>^\x20\x23\\`~] # other character
|
34
|
+
}x
|
35
|
+
|
36
|
+
TERMINAL_CHARACTER = %r{#{TERMINAL_CHARACTER_BASE}|['"\?]}
|
37
|
+
FIRST_TERMINAL_CHARACTER = %r{#{TERMINAL_CHARACTER_BASE}|["\?]}
|
38
|
+
SECOND_TERMINAL_CHARACTER = %r{#{TERMINAL_CHARACTER_BASE}|['\?]}
|
39
|
+
SPECIAL_SEQUENCE_CHARACTER = %r{#{TERMINAL_CHARACTER_BASE}|['"]}
|
40
|
+
|
41
|
+
# Abstract syntax tree from parse
|
42
|
+
#
|
43
|
+
# @return [Array<EBNF::Rule>]
|
44
|
+
attr_reader :ast
|
45
|
+
|
46
|
+
# `[14] integer ::= decimal_digit+`
|
47
|
+
terminal(:integer, /\d+/) do |value, prod|
|
48
|
+
value.to_i
|
49
|
+
end
|
50
|
+
|
51
|
+
# `[15] meta_identifier ::= letter meta_identifier_character*`
|
52
|
+
terminal(:meta_identifier, /[a-zA-Z][a-zA-Z0-9_]*/) do |value|
|
53
|
+
value.to_sym
|
54
|
+
end
|
55
|
+
|
56
|
+
# `[17] terminal_string ::= ("'" first_terminal_character+ "'")`
|
57
|
+
# ` | ('"' second_terminal_character+ '"')`
|
58
|
+
terminal(:terminal_string, /(?:'#{FIRST_TERMINAL_CHARACTER}+')|(?:"#{SECOND_TERMINAL_CHARACTER}+")/x) do |value|
|
59
|
+
value[1..-2]
|
60
|
+
end
|
61
|
+
|
62
|
+
# `[20] special_sequence ::= '?' special_sequence_character* '?'`
|
63
|
+
terminal(:special_sequence, /\?#{SPECIAL_SEQUENCE_CHARACTER}+\?/)
|
64
|
+
|
65
|
+
# `[22] terminal_character ::= [a-zA-Z0-9]`
|
66
|
+
# ` | [,=;*}#x2d?([{;]`
|
67
|
+
# ` | '*)'`
|
68
|
+
# ` | '(*'`
|
69
|
+
# ` | ']'`
|
70
|
+
# ` | other_character`
|
71
|
+
terminal(:terminal_character, TERMINAL_CHARACTER)
|
72
|
+
|
73
|
+
# `[25] empty ::= ''`
|
74
|
+
terminal(:empty, //)
|
75
|
+
|
76
|
+
# `[26] definition_separator_symbol ::= '|' | '/' | '!'`
|
77
|
+
terminal(:definition_separator_symbol, /[\|\/!]/)
|
78
|
+
|
79
|
+
# `[27] terminator_symbol ::= ';' | '.'`
|
80
|
+
terminal(:terminator_symbol, /[;\.]/)
|
81
|
+
|
82
|
+
# `[28] start_option_symbol ::= '['
|
83
|
+
terminal(:start_option_symbol, /\[|(?:\(\/)/)
|
84
|
+
|
85
|
+
# `[29] end_option_symbol ::= ']'`
|
86
|
+
terminal(:end_option_symbol, /\]/)
|
87
|
+
|
88
|
+
# `[30] start_repeat_symbol ::= '{' | '(:'`
|
89
|
+
terminal(:start_repeat_symbol, /{|\(:/)
|
90
|
+
|
91
|
+
# `[31] end_repeat_symbol ::= '}' | ':)'`
|
92
|
+
terminal(:end_repeat_symbol, /}|:\)/)
|
93
|
+
|
94
|
+
# ## Non-terminal productions
|
95
|
+
|
96
|
+
# `[2] syntax_rule ::= meta_identifier '=' definitions_list terminator_symbol`
|
97
|
+
production(:syntax_rule, clear_packrat: true) do |value, data, callback|
|
98
|
+
# value contains an expression.
|
99
|
+
# Invoke callback
|
100
|
+
sym = value[0][:meta_identifier]
|
101
|
+
definitions_list = value[2][:definitions_list]
|
102
|
+
callback.call(:rule, EBNF::Rule.new(sym.to_sym, nil, definitions_list))
|
103
|
+
nil
|
104
|
+
end
|
105
|
+
|
106
|
+
# Setting `as_hash: true` in the start production makes the value of the form of a hash, rather than an array of hashes.
|
107
|
+
#
|
108
|
+
# `[3] definitions_list ::= single_definition (definition_separator_symbol definitions_list)*`
|
109
|
+
start_production(:definitions_list, as_hash: true)
|
110
|
+
production(:definitions_list) do |value|
|
111
|
+
if value[:_definitions_list_1].length > 0
|
112
|
+
[:alt, value[:single_definition]] + value[:_definitions_list_1]
|
113
|
+
else
|
114
|
+
value[:single_definition]
|
115
|
+
end
|
116
|
+
end
|
117
|
+
production(:_definitions_list_1) do |value|
|
118
|
+
Array(value.first)
|
119
|
+
end
|
120
|
+
start_production(:_definitions_list_2, as_hash: true)
|
121
|
+
production(:_definitions_list_2) do |value|
|
122
|
+
if Array(value[:definitions_list]).first == :alt
|
123
|
+
value[:definitions_list][1..-1]
|
124
|
+
else
|
125
|
+
[value[:definitions_list]]
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
# `[4] single_definition ::= term (',' term)*`
|
130
|
+
start_production(:single_definition, as_hash: true)
|
131
|
+
production(:single_definition) do |value|
|
132
|
+
if value[:_single_definition_1].length > 0
|
133
|
+
[:seq, value[:term]] + value[:_single_definition_1]
|
134
|
+
else
|
135
|
+
value[:term]
|
136
|
+
end
|
137
|
+
end
|
138
|
+
production(:_single_definition_1) do |value|
|
139
|
+
value.map {|a1| a1.last[:term]}.compact # Get rid of '|'
|
140
|
+
end
|
141
|
+
|
142
|
+
# `[5] term ::= factor ('-' exception)?`
|
143
|
+
start_production(:term, as_hash: true)
|
144
|
+
production(:term) do |value|
|
145
|
+
if value[:_term_1]
|
146
|
+
[:diff, value[:factor], value[:_term_1]]
|
147
|
+
else
|
148
|
+
value[:factor]
|
149
|
+
end
|
150
|
+
end
|
151
|
+
production(:_term_1) do |value|
|
152
|
+
value.last[:exception] if value
|
153
|
+
end
|
154
|
+
|
155
|
+
# `[6] exception ::= factor`
|
156
|
+
start_production(:exception, as_hash: true)
|
157
|
+
production(:exception) do |value|
|
158
|
+
value[:factor]
|
159
|
+
end
|
160
|
+
|
161
|
+
# `[7] factor ::= (integer '*')? primary`
|
162
|
+
start_production(:factor, as_hash: true)
|
163
|
+
production(:factor) do |value|
|
164
|
+
if value[:_factor_1]
|
165
|
+
[:rept, value[:_factor_1], value[:_factor_1], value[:primary]]
|
166
|
+
else
|
167
|
+
value[:primary]
|
168
|
+
end
|
169
|
+
end
|
170
|
+
production(:_factor_2) do |value|
|
171
|
+
value.first[:integer]
|
172
|
+
end
|
173
|
+
|
174
|
+
# `[9] optional_sequence ::= start_option_symbol definitions_list end_option_symbol`
|
175
|
+
production(:optional_sequence) do |value|
|
176
|
+
[:opt, value[1][:definitions_list]]
|
177
|
+
end
|
178
|
+
|
179
|
+
# `[10] repeated_sequence ::= start_repeat_symbol definitions_list end_repeat_symbol`
|
180
|
+
production(:repeated_sequence) do |value|
|
181
|
+
[:star, value[1][:definitions_list]]
|
182
|
+
end
|
183
|
+
|
184
|
+
# `[11] grouped_sequence ::= '(' definitions_list ')'`
|
185
|
+
production(:grouped_sequence) do |value|
|
186
|
+
[:seq, value[1][:definitions_list]]
|
187
|
+
end
|
188
|
+
|
189
|
+
# ## Parser invocation.
|
190
|
+
# On start, yield ourselves if a block is given, otherwise, return this parser instance
|
191
|
+
#
|
192
|
+
# @param [#read, #to_s] input
|
193
|
+
# @param [Hash{Symbol => Object}] options
|
194
|
+
# @option options [Boolean] :level
|
195
|
+
# Trace level. 0(debug), 1(info), 2(warn), 3(error).
|
196
|
+
# @return [EBNFParser]
|
197
|
+
def initialize(input, **options, &block)
|
198
|
+
# If the `level` option is set, instantiate a logger for collecting trace information.
|
199
|
+
if options.has_key?(:level)
|
200
|
+
options[:logger] = Logger.new(STDERR)
|
201
|
+
options[:logger].level = options[:level]
|
202
|
+
options[:logger].formatter = lambda {|severity, datetime, progname, msg| "#{severity} #{msg}\n"}
|
203
|
+
end
|
204
|
+
|
205
|
+
# Read input, if necessary, which will be used in a Scanner.
|
206
|
+
@input = input.respond_to?(:read) ? input.read : input.to_s
|
207
|
+
|
208
|
+
parsing_terminals = false
|
209
|
+
@ast = []
|
210
|
+
parse(@input,
|
211
|
+
:syntax,
|
212
|
+
ISOEBNFMeta::RULES,
|
213
|
+
whitespace: %r{([\x09-\x0d\x20]|(?:\(\*(?:(?:\*[^\)])|[^*])*\*\)))+},
|
214
|
+
**options
|
215
|
+
) do |context, *data|
|
216
|
+
rule = case context
|
217
|
+
when :rule
|
218
|
+
# A rule which has already been turned into a `Rule` object.
|
219
|
+
rule = data.first
|
220
|
+
rule.kind = :terminal if parsing_terminals
|
221
|
+
rule
|
222
|
+
end
|
223
|
+
@ast << rule if rule
|
224
|
+
end
|
225
|
+
rescue EBNF::PEG::Parser::Error => e
|
226
|
+
raise SyntaxError, e.message
|
227
|
+
end
|
228
|
+
end
|
229
|
+
end
|