ebnf 0.0.1 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +21 -7
- data/VERSION +1 -1
- data/bin/ebnf +73 -16
- data/etc/{ebnf.bnf → ebnf.ebnf} +2 -2
- data/etc/ebnf.ll1 +1010 -0
- data/etc/turtle.ebnf +70 -0
- data/etc/turtle.ll1 +1565 -0
- data/etc/turtle.rb +1375 -0
- data/lib/ebnf.rb +16 -1023
- data/lib/ebnf/base.rb +266 -0
- data/lib/ebnf/bnf.rb +50 -0
- data/lib/ebnf/ll1.rb +321 -0
- data/lib/ebnf/ll1/lexer.rb +11 -11
- data/lib/ebnf/ll1/parser.rb +28 -32
- data/lib/ebnf/ll1/scanner.rb +1 -1
- data/lib/ebnf/parser.rb +297 -0
- data/lib/ebnf/rule.rb +362 -0
- metadata +12 -3
data/lib/ebnf/rule.rb
ADDED
@@ -0,0 +1,362 @@
|
|
1
|
+
module EBNF
|
2
|
+
# Represent individual parsed rules
|
3
|
+
class Rule
|
4
|
+
# Operations which are flattened to seprate rules in to_bnf
|
5
|
+
BNF_OPS = %w{
|
6
|
+
alt opt plus seq star
|
7
|
+
}.map(&:to_sym).freeze
|
8
|
+
|
9
|
+
TERM_OPS = %w{
|
10
|
+
diff hex range
|
11
|
+
}.map(&:to_sym).freeze
|
12
|
+
|
13
|
+
# @!attribute [rw] sym for rule
|
14
|
+
# @return [Symbol]
|
15
|
+
attr_accessor :sym
|
16
|
+
|
17
|
+
# @!attribute [rw] id of rule
|
18
|
+
# @return [String]
|
19
|
+
attr_accessor :id
|
20
|
+
|
21
|
+
# A comprehension is a sequence which contains all elements but the first of the original rule.
|
22
|
+
# @!attribute [rw] comprehension of this rule
|
23
|
+
# @return [Rule]
|
24
|
+
attr_accessor :comp
|
25
|
+
|
26
|
+
# @!attribute [rw] kind of rule
|
27
|
+
# @return [:rule, :terminal, or :pass]
|
28
|
+
attr_accessor :kind
|
29
|
+
|
30
|
+
# @!attribute [rw] expr rule expression
|
31
|
+
# @return [Array]
|
32
|
+
attr_accessor :expr
|
33
|
+
|
34
|
+
# @!attribute [rw] orig original rule
|
35
|
+
# @return [String]
|
36
|
+
attr_accessor :orig
|
37
|
+
|
38
|
+
# @!attribute [r] first terminals that immediately procede this rule
|
39
|
+
# @return [Array<Rule>]
|
40
|
+
attr_reader :first
|
41
|
+
|
42
|
+
# @!attribute [r] follow terminals that immediately follow this rule
|
43
|
+
# @return [Array<Rule>]
|
44
|
+
attr_reader :follow
|
45
|
+
|
46
|
+
# @!attribute [rw] start indicates that this is a starting rule
|
47
|
+
# @return [Boolean]
|
48
|
+
attr_accessor :start
|
49
|
+
|
50
|
+
# @param [Integer] id
|
51
|
+
# @param [Symbol] sym
|
52
|
+
# @param [Array] expr
|
53
|
+
# @param [Hash{Symbol => Object}] options
|
54
|
+
# @option options [Symbol] :kind
|
55
|
+
# @option options [String] :ebnf
|
56
|
+
def initialize(sym, id, expr, options = {})
|
57
|
+
@sym, @id = sym, id
|
58
|
+
@expr = expr.is_a?(Array) ? expr : [:seq, expr]
|
59
|
+
@ebnf = options[:ebnf]
|
60
|
+
@top_rule = options.fetch(:top_rule, self)
|
61
|
+
@kind = case
|
62
|
+
when options[:kind] then options[:kind]
|
63
|
+
when sym.to_s == sym.to_s.upcase then :terminal
|
64
|
+
when !BNF_OPS.include?(@expr.first) then :terminal
|
65
|
+
else :rule
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
# Build a new rule creating a symbol and numbering from the current rule
|
70
|
+
# Symbol and number creation is handled by the top-most rule in such a chain
|
71
|
+
#
|
72
|
+
# @param [Array] expr
|
73
|
+
# @param [Hash{Symbol => Object}] options
|
74
|
+
# @option options [Symbol] :kind
|
75
|
+
# @option options [String] :ebnf EBNF instance (used for messages)
|
76
|
+
def build(expr, options = {})
|
77
|
+
new_sym, new_id = (@top_rule ||self).send(:make_sym_id)
|
78
|
+
Rule.new(new_sym, new_id, expr, {
|
79
|
+
:kind => options[:kind],
|
80
|
+
:ebnf => @ebnf,
|
81
|
+
:top_rule => @top_rule || self,
|
82
|
+
}.merge(options))
|
83
|
+
end
|
84
|
+
|
85
|
+
# Serializes this rule to an S-Expression
|
86
|
+
# @return [String]
|
87
|
+
def to_sxp
|
88
|
+
elements = [kind, sym, id]
|
89
|
+
elements << [:start, true] if start
|
90
|
+
elements << first.sort_by(&:to_s).unshift(:first) if first
|
91
|
+
elements << follow.sort_by(&:to_s).unshift(:follow) if follow
|
92
|
+
elements << expr
|
93
|
+
begin
|
94
|
+
require 'sxp'
|
95
|
+
SXP::Generator.string(elements)
|
96
|
+
rescue LoadError
|
97
|
+
elements.to_sxp
|
98
|
+
end
|
99
|
+
end
|
100
|
+
def to_s; to_sxp; end
|
101
|
+
|
102
|
+
# Serializes this rule to an Turtle
|
103
|
+
# @return [String]
|
104
|
+
def to_ttl
|
105
|
+
@ebnf.debug("to_ttl") {inspect}
|
106
|
+
comment = orig.strip.
|
107
|
+
gsub(/"""/, '\"\"\"').
|
108
|
+
gsub("\\", "\\\\").
|
109
|
+
sub(/^\"/, '\"').
|
110
|
+
sub(/\"$/m, '\"')
|
111
|
+
statements = [
|
112
|
+
%{:#{id} rdfs:label "#{id}"; rdf:value "#{sym}";},
|
113
|
+
%{ rdfs:comment #{comment.inspect};},
|
114
|
+
]
|
115
|
+
|
116
|
+
statements += ttl_expr(expr, kind == :terminal ? "re" : "g", 1, false)
|
117
|
+
"\n" + statements.join("\n")
|
118
|
+
end
|
119
|
+
|
120
|
+
##
|
121
|
+
# Transform EBNF rule to BNF rules:
|
122
|
+
#
|
123
|
+
# * Transform (a [n] rule (op1 (op2))) into two rules:
|
124
|
+
# (a [n] rule (op1 a.2))
|
125
|
+
# (_a_1 [n.1] rule (op2))
|
126
|
+
# * Transform (a rule (opt b)) into (a rule (alt _empty "foo"))
|
127
|
+
# * Transform (a rule (star b)) into (a rule (alt _empty (seq b a)))
|
128
|
+
# * Transform (a rule (plus b)) into (a rule (seq b (star b)
|
129
|
+
# @return [Array<Rule>]
|
130
|
+
def to_bnf
|
131
|
+
return [self] unless kind == :rule
|
132
|
+
new_rules = []
|
133
|
+
|
134
|
+
# Look for rules containing recursive definition and rewrite to multiple rules. If `expr` contains elements which are in array form, where the first element of that array is a symbol, create a new rule for it.
|
135
|
+
if expr.any? {|e| e.is_a?(Array) && (BNF_OPS + TERM_OPS).include?(e.first)}
|
136
|
+
# * Transform (a [n] rule (op1 (op2))) into two rules:
|
137
|
+
# (a.1 [n.1] rule (op1 a.2))
|
138
|
+
# (a.2 [n.2] rule (op2))
|
139
|
+
# duplicate ourselves for rewriting
|
140
|
+
this = dup
|
141
|
+
new_rules << this
|
142
|
+
|
143
|
+
expr.each_with_index do |e, index|
|
144
|
+
next unless e.is_a?(Array) && e.first.is_a?(Symbol)
|
145
|
+
new_rule = build(e)
|
146
|
+
this.expr[index] = new_rule.sym
|
147
|
+
new_rules << new_rule
|
148
|
+
end
|
149
|
+
|
150
|
+
# Return new rules after recursively applying #to_bnf
|
151
|
+
new_rules = new_rules.map {|r| r.to_bnf}.flatten
|
152
|
+
elsif expr.first == :opt
|
153
|
+
this = dup
|
154
|
+
# * Transform (a rule (opt b)) into (a rule (alt _empty b))
|
155
|
+
this.expr = [:alt, :_empty, expr.last]
|
156
|
+
new_rules = this.to_bnf
|
157
|
+
elsif expr.first == :star
|
158
|
+
# * Transform (a rule (star b)) into (a rule (alt _empty (seq b a)))
|
159
|
+
this = dup
|
160
|
+
new_rule = this.build([:seq, expr.last, this.sym])
|
161
|
+
this.expr = [:alt, :_empty, new_rule.sym]
|
162
|
+
new_rules = [this] + new_rule.to_bnf
|
163
|
+
elsif expr.first == :plus
|
164
|
+
# * Transform (a rule (plus b)) into (a rule (seq b (star b)
|
165
|
+
this = dup
|
166
|
+
this.expr = [:seq, expr.last, [:star, expr.last]]
|
167
|
+
new_rules = this.to_bnf
|
168
|
+
elsif [:alt, :seq].include?(expr.first)
|
169
|
+
# Otherwise, no further transformation necessary
|
170
|
+
new_rules << self
|
171
|
+
elsif [:diff, :hex, :range].include?(expr.first)
|
172
|
+
# This rules are fine, the just need to be terminals
|
173
|
+
raise "Encountered #{expr.first.inspect}, which is a #{self.kind}, not :terminal" unless self.kind == :terminal
|
174
|
+
new_rules << self
|
175
|
+
else
|
176
|
+
# Some case we didn't think of
|
177
|
+
raise "Error trying to transform #{expr.inspect} to BNF"
|
178
|
+
end
|
179
|
+
|
180
|
+
return new_rules
|
181
|
+
end
|
182
|
+
|
183
|
+
# Does this rule start with a sym? It does if expr is that sym,
|
184
|
+
# expr starts with alt and contains that sym, or
|
185
|
+
# expr starts with seq and the next element is that sym
|
186
|
+
# @param [Symbol, class] sym
|
187
|
+
# Symbol matching any start element, or if it is String, any start element which is a String
|
188
|
+
# @return [Array<Symbol, String>] list of symbol (singular), or strings which are start symbol, or nil if there are none
|
189
|
+
def starts_with(sym)
|
190
|
+
if seq? && sym === (v = expr.fetch(1, nil))
|
191
|
+
[v]
|
192
|
+
elsif alt? && expr.any? {|e| sym === e}
|
193
|
+
expr.select {|e| sym === e}
|
194
|
+
else
|
195
|
+
nil
|
196
|
+
end
|
197
|
+
end
|
198
|
+
|
199
|
+
# Add terminal as proceding this rule
|
200
|
+
# @param [Array<Rule>] terminals
|
201
|
+
# @return [Integer] if number of terminals added
|
202
|
+
def add_first(terminals)
|
203
|
+
@first ||= []
|
204
|
+
terminals -= @first # Remove those already in first
|
205
|
+
@first += terminals
|
206
|
+
terminals.length
|
207
|
+
end
|
208
|
+
|
209
|
+
# Add terminal as following this rule. Don't add _eps as a follow
|
210
|
+
#
|
211
|
+
# @param [Array<Rule>] terminals
|
212
|
+
# @return [Integer] if number of terminals added
|
213
|
+
def add_follow(terminals)
|
214
|
+
terminals -= @follow || [] # Remove those already in first
|
215
|
+
terminals -= [:_eps] # Special case, don't add empty string as a follow terminal
|
216
|
+
unless terminals.empty?
|
217
|
+
@follow ||= []
|
218
|
+
@follow += terminals
|
219
|
+
end
|
220
|
+
terminals.length
|
221
|
+
end
|
222
|
+
|
223
|
+
# Is this rule of the form (seq ...)?
|
224
|
+
def seq?
|
225
|
+
expr.is_a?(Array) && expr.first == :seq
|
226
|
+
end
|
227
|
+
|
228
|
+
# Is this rule of the form (alt ...)?
|
229
|
+
def alt?
|
230
|
+
expr.is_a?(Array) && expr.first == :alt
|
231
|
+
end
|
232
|
+
|
233
|
+
def inspect
|
234
|
+
"#<EBNF::Rule:#{object_id} " +
|
235
|
+
{:sym => sym, :id => id, :kind => kind, :expr => expr}.inspect +
|
236
|
+
">"
|
237
|
+
end
|
238
|
+
|
239
|
+
# Two rules are equal if they have the same {#sym}, {#kind} and {#expr}
|
240
|
+
# @param [Rule] other
|
241
|
+
# @return [Boolean]
|
242
|
+
def ==(other)
|
243
|
+
sym == other.sym &&
|
244
|
+
kind == other.kind &&
|
245
|
+
expr == other.expr
|
246
|
+
end
|
247
|
+
|
248
|
+
# Two rules are equivalent if they have the same {#expr}
|
249
|
+
# @param [Rule] other
|
250
|
+
# @return [Boolean]
|
251
|
+
def equivalent?(other)
|
252
|
+
expr == other.expr
|
253
|
+
end
|
254
|
+
|
255
|
+
# Rewrite the rule substituting src_rule for dst_rule wherever
|
256
|
+
# it is used in the production (first level only).
|
257
|
+
# @param [Rule] src_rule
|
258
|
+
# @param [Rule] dst_rule
|
259
|
+
# @return [Rule]
|
260
|
+
def rewrite(src_rule, dst_rule)
|
261
|
+
case @expr
|
262
|
+
when Array
|
263
|
+
@expr = @expr.map {|e| e == src_rule.sym ? dst_rule.sym : e}
|
264
|
+
else
|
265
|
+
@expr = dst_rule.sym if @expr == src_rule.sym
|
266
|
+
end
|
267
|
+
self
|
268
|
+
end
|
269
|
+
|
270
|
+
# Rules compare using their ids
|
271
|
+
def <=>(other)
|
272
|
+
if id.to_i == other.id.to_i
|
273
|
+
id <=> other.id
|
274
|
+
else
|
275
|
+
id.to_i <=> other.id.to_i
|
276
|
+
end
|
277
|
+
end
|
278
|
+
|
279
|
+
private
|
280
|
+
def ttl_expr(expr, pfx, depth, is_obj = true)
|
281
|
+
indent = ' ' * depth
|
282
|
+
@ebnf.debug("ttl_expr", :depth => depth) {expr.inspect}
|
283
|
+
op = expr.shift if expr.is_a?(Array)
|
284
|
+
statements = []
|
285
|
+
|
286
|
+
if is_obj
|
287
|
+
bra, ket = "[ ", " ]"
|
288
|
+
else
|
289
|
+
bra = ket = ''
|
290
|
+
end
|
291
|
+
|
292
|
+
case op
|
293
|
+
when :seq, :alt, :diff
|
294
|
+
statements << %{#{indent}#{bra}#{pfx}:#{op} (}
|
295
|
+
expr.each {|a| statements += ttl_expr(a, pfx, depth + 1)}
|
296
|
+
statements << %{#{indent} )#{ket}}
|
297
|
+
when :opt, :plus, :star
|
298
|
+
statements << %{#{indent}#{bra}#{pfx}:#{op} }
|
299
|
+
statements += ttl_expr(expr.first, pfx, depth + 1)
|
300
|
+
statements << %{#{indent} #{ket}} unless ket.empty?
|
301
|
+
when :_empty, :_eps, :_empty
|
302
|
+
statements << %{#{indent}"g:#{op.to_s[1..-1]}"}
|
303
|
+
when :"'"
|
304
|
+
statements << %{#{indent}"#{esc(expr)}"}
|
305
|
+
when :range
|
306
|
+
statements << %{#{indent}#{bra} re:matches #{cclass(expr.first).inspect} #{ket}}
|
307
|
+
when :hex
|
308
|
+
raise "didn't expect \" in expr" if expr.include?(:'"')
|
309
|
+
statements << %{#{indent}#{bra} re:matches #{cclass(expr.first).inspect} #{ket}}
|
310
|
+
else
|
311
|
+
if is_obj
|
312
|
+
statements << %{#{indent}#{expr.inspect}}
|
313
|
+
else
|
314
|
+
statements << %{#{indent}g:seq ( #{expr.inspect} )}
|
315
|
+
end
|
316
|
+
end
|
317
|
+
|
318
|
+
statements.last << " ." unless is_obj
|
319
|
+
@ebnf.debug("statements", :depth => depth) {statements.join("\n")}
|
320
|
+
statements
|
321
|
+
end
|
322
|
+
|
323
|
+
##
|
324
|
+
# turn an XML BNF character class into an N3 literal for that
|
325
|
+
# character class (less the outer quote marks)
|
326
|
+
#
|
327
|
+
# >>> cclass("^<>'{}|^`")
|
328
|
+
# "[^<>'{}|^`]"
|
329
|
+
# >>> cclass("#x0300-#x036F")
|
330
|
+
# "[\\u0300-\\u036F]"
|
331
|
+
# >>> cclass("#xC0-#xD6")
|
332
|
+
# "[\\u00C0-\\u00D6]"
|
333
|
+
# >>> cclass("#x370-#x37D")
|
334
|
+
# "[\\u0370-\\u037D]"
|
335
|
+
#
|
336
|
+
# as in: ECHAR ::= '\' [tbnrf\"']
|
337
|
+
# >>> cclass("tbnrf\\\"'")
|
338
|
+
# 'tbnrf\\\\\\"\''
|
339
|
+
#
|
340
|
+
# >>> cclass("^#x22#x5C#x0A#x0D")
|
341
|
+
# '^\\u0022\\\\\\u005C\\u000A\\u000D'
|
342
|
+
def cclass(txt)
|
343
|
+
'[' +
|
344
|
+
txt.gsub(/\#x[0-9a-fA-F]+/) do |hx|
|
345
|
+
hx = hx[2..-1]
|
346
|
+
if hx.length <= 4
|
347
|
+
"\\u#{'0' * (4 - hx.length)}#{hx}"
|
348
|
+
elsif hx.length <= 8
|
349
|
+
"\\U#{'0' * (8 - hx.length)}#{hx}"
|
350
|
+
end
|
351
|
+
end +
|
352
|
+
']'
|
353
|
+
end
|
354
|
+
|
355
|
+
# Make a new symbol/number combination
|
356
|
+
def make_sym_id
|
357
|
+
@id_seq ||= 0
|
358
|
+
@id_seq += 1
|
359
|
+
["_#{@sym}_#{@id_seq}".to_sym, "#{@id}.#{@id_seq}"]
|
360
|
+
end
|
361
|
+
end
|
362
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ebnf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-02-
|
12
|
+
date: 2013-02-26 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: sxp
|
@@ -88,13 +88,22 @@ files:
|
|
88
88
|
- README.md
|
89
89
|
- UNLICENSE
|
90
90
|
- VERSION
|
91
|
+
- lib/ebnf/base.rb
|
92
|
+
- lib/ebnf/bnf.rb
|
91
93
|
- lib/ebnf/ll1/lexer.rb
|
92
94
|
- lib/ebnf/ll1/parser.rb
|
93
95
|
- lib/ebnf/ll1/scanner.rb
|
96
|
+
- lib/ebnf/ll1.rb
|
97
|
+
- lib/ebnf/parser.rb
|
98
|
+
- lib/ebnf/rule.rb
|
94
99
|
- lib/ebnf/version.rb
|
95
100
|
- lib/ebnf.rb
|
96
101
|
- etc/doap.ttl
|
97
|
-
- etc/ebnf.
|
102
|
+
- etc/ebnf.ebnf
|
103
|
+
- etc/ebnf.ll1
|
104
|
+
- etc/turtle.ebnf
|
105
|
+
- etc/turtle.ll1
|
106
|
+
- etc/turtle.rb
|
98
107
|
- bin/ebnf
|
99
108
|
homepage: http://github.com/gkellogg/ebnf
|
100
109
|
licenses:
|