ebnf 0.0.1 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +21 -7
- data/VERSION +1 -1
- data/bin/ebnf +73 -16
- data/etc/{ebnf.bnf → ebnf.ebnf} +2 -2
- data/etc/ebnf.ll1 +1010 -0
- data/etc/turtle.ebnf +70 -0
- data/etc/turtle.ll1 +1565 -0
- data/etc/turtle.rb +1375 -0
- data/lib/ebnf.rb +16 -1023
- data/lib/ebnf/base.rb +266 -0
- data/lib/ebnf/bnf.rb +50 -0
- data/lib/ebnf/ll1.rb +321 -0
- data/lib/ebnf/ll1/lexer.rb +11 -11
- data/lib/ebnf/ll1/parser.rb +28 -32
- data/lib/ebnf/ll1/scanner.rb +1 -1
- data/lib/ebnf/parser.rb +297 -0
- data/lib/ebnf/rule.rb +362 -0
- metadata +12 -3
data/lib/ebnf/rule.rb
ADDED
@@ -0,0 +1,362 @@
|
|
1
|
+
module EBNF
|
2
|
+
# Represent individual parsed rules
|
3
|
+
class Rule
|
4
|
+
# Operations which are flattened to seprate rules in to_bnf
|
5
|
+
BNF_OPS = %w{
|
6
|
+
alt opt plus seq star
|
7
|
+
}.map(&:to_sym).freeze
|
8
|
+
|
9
|
+
TERM_OPS = %w{
|
10
|
+
diff hex range
|
11
|
+
}.map(&:to_sym).freeze
|
12
|
+
|
13
|
+
# @!attribute [rw] sym for rule
|
14
|
+
# @return [Symbol]
|
15
|
+
attr_accessor :sym
|
16
|
+
|
17
|
+
# @!attribute [rw] id of rule
|
18
|
+
# @return [String]
|
19
|
+
attr_accessor :id
|
20
|
+
|
21
|
+
# A comprehension is a sequence which contains all elements but the first of the original rule.
|
22
|
+
# @!attribute [rw] comprehension of this rule
|
23
|
+
# @return [Rule]
|
24
|
+
attr_accessor :comp
|
25
|
+
|
26
|
+
# @!attribute [rw] kind of rule
|
27
|
+
# @return [:rule, :terminal, or :pass]
|
28
|
+
attr_accessor :kind
|
29
|
+
|
30
|
+
# @!attribute [rw] expr rule expression
|
31
|
+
# @return [Array]
|
32
|
+
attr_accessor :expr
|
33
|
+
|
34
|
+
# @!attribute [rw] orig original rule
|
35
|
+
# @return [String]
|
36
|
+
attr_accessor :orig
|
37
|
+
|
38
|
+
# @!attribute [r] first terminals that immediately procede this rule
|
39
|
+
# @return [Array<Rule>]
|
40
|
+
attr_reader :first
|
41
|
+
|
42
|
+
# @!attribute [r] follow terminals that immediately follow this rule
|
43
|
+
# @return [Array<Rule>]
|
44
|
+
attr_reader :follow
|
45
|
+
|
46
|
+
# @!attribute [rw] start indicates that this is a starting rule
|
47
|
+
# @return [Boolean]
|
48
|
+
attr_accessor :start
|
49
|
+
|
50
|
+
# @param [Integer] id
|
51
|
+
# @param [Symbol] sym
|
52
|
+
# @param [Array] expr
|
53
|
+
# @param [Hash{Symbol => Object}] options
|
54
|
+
# @option options [Symbol] :kind
|
55
|
+
# @option options [String] :ebnf
|
56
|
+
def initialize(sym, id, expr, options = {})
|
57
|
+
@sym, @id = sym, id
|
58
|
+
@expr = expr.is_a?(Array) ? expr : [:seq, expr]
|
59
|
+
@ebnf = options[:ebnf]
|
60
|
+
@top_rule = options.fetch(:top_rule, self)
|
61
|
+
@kind = case
|
62
|
+
when options[:kind] then options[:kind]
|
63
|
+
when sym.to_s == sym.to_s.upcase then :terminal
|
64
|
+
when !BNF_OPS.include?(@expr.first) then :terminal
|
65
|
+
else :rule
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
# Build a new rule creating a symbol and numbering from the current rule
|
70
|
+
# Symbol and number creation is handled by the top-most rule in such a chain
|
71
|
+
#
|
72
|
+
# @param [Array] expr
|
73
|
+
# @param [Hash{Symbol => Object}] options
|
74
|
+
# @option options [Symbol] :kind
|
75
|
+
# @option options [String] :ebnf EBNF instance (used for messages)
|
76
|
+
def build(expr, options = {})
|
77
|
+
new_sym, new_id = (@top_rule ||self).send(:make_sym_id)
|
78
|
+
Rule.new(new_sym, new_id, expr, {
|
79
|
+
:kind => options[:kind],
|
80
|
+
:ebnf => @ebnf,
|
81
|
+
:top_rule => @top_rule || self,
|
82
|
+
}.merge(options))
|
83
|
+
end
|
84
|
+
|
85
|
+
# Serializes this rule to an S-Expression
|
86
|
+
# @return [String]
|
87
|
+
def to_sxp
|
88
|
+
elements = [kind, sym, id]
|
89
|
+
elements << [:start, true] if start
|
90
|
+
elements << first.sort_by(&:to_s).unshift(:first) if first
|
91
|
+
elements << follow.sort_by(&:to_s).unshift(:follow) if follow
|
92
|
+
elements << expr
|
93
|
+
begin
|
94
|
+
require 'sxp'
|
95
|
+
SXP::Generator.string(elements)
|
96
|
+
rescue LoadError
|
97
|
+
elements.to_sxp
|
98
|
+
end
|
99
|
+
end
|
100
|
+
def to_s; to_sxp; end
|
101
|
+
|
102
|
+
# Serializes this rule to an Turtle
|
103
|
+
# @return [String]
|
104
|
+
def to_ttl
|
105
|
+
@ebnf.debug("to_ttl") {inspect}
|
106
|
+
comment = orig.strip.
|
107
|
+
gsub(/"""/, '\"\"\"').
|
108
|
+
gsub("\\", "\\\\").
|
109
|
+
sub(/^\"/, '\"').
|
110
|
+
sub(/\"$/m, '\"')
|
111
|
+
statements = [
|
112
|
+
%{:#{id} rdfs:label "#{id}"; rdf:value "#{sym}";},
|
113
|
+
%{ rdfs:comment #{comment.inspect};},
|
114
|
+
]
|
115
|
+
|
116
|
+
statements += ttl_expr(expr, kind == :terminal ? "re" : "g", 1, false)
|
117
|
+
"\n" + statements.join("\n")
|
118
|
+
end
|
119
|
+
|
120
|
+
##
|
121
|
+
# Transform EBNF rule to BNF rules:
|
122
|
+
#
|
123
|
+
# * Transform (a [n] rule (op1 (op2))) into two rules:
|
124
|
+
# (a [n] rule (op1 a.2))
|
125
|
+
# (_a_1 [n.1] rule (op2))
|
126
|
+
# * Transform (a rule (opt b)) into (a rule (alt _empty "foo"))
|
127
|
+
# * Transform (a rule (star b)) into (a rule (alt _empty (seq b a)))
|
128
|
+
# * Transform (a rule (plus b)) into (a rule (seq b (star b)
|
129
|
+
# @return [Array<Rule>]
|
130
|
+
def to_bnf
|
131
|
+
return [self] unless kind == :rule
|
132
|
+
new_rules = []
|
133
|
+
|
134
|
+
# Look for rules containing recursive definition and rewrite to multiple rules. If `expr` contains elements which are in array form, where the first element of that array is a symbol, create a new rule for it.
|
135
|
+
if expr.any? {|e| e.is_a?(Array) && (BNF_OPS + TERM_OPS).include?(e.first)}
|
136
|
+
# * Transform (a [n] rule (op1 (op2))) into two rules:
|
137
|
+
# (a.1 [n.1] rule (op1 a.2))
|
138
|
+
# (a.2 [n.2] rule (op2))
|
139
|
+
# duplicate ourselves for rewriting
|
140
|
+
this = dup
|
141
|
+
new_rules << this
|
142
|
+
|
143
|
+
expr.each_with_index do |e, index|
|
144
|
+
next unless e.is_a?(Array) && e.first.is_a?(Symbol)
|
145
|
+
new_rule = build(e)
|
146
|
+
this.expr[index] = new_rule.sym
|
147
|
+
new_rules << new_rule
|
148
|
+
end
|
149
|
+
|
150
|
+
# Return new rules after recursively applying #to_bnf
|
151
|
+
new_rules = new_rules.map {|r| r.to_bnf}.flatten
|
152
|
+
elsif expr.first == :opt
|
153
|
+
this = dup
|
154
|
+
# * Transform (a rule (opt b)) into (a rule (alt _empty b))
|
155
|
+
this.expr = [:alt, :_empty, expr.last]
|
156
|
+
new_rules = this.to_bnf
|
157
|
+
elsif expr.first == :star
|
158
|
+
# * Transform (a rule (star b)) into (a rule (alt _empty (seq b a)))
|
159
|
+
this = dup
|
160
|
+
new_rule = this.build([:seq, expr.last, this.sym])
|
161
|
+
this.expr = [:alt, :_empty, new_rule.sym]
|
162
|
+
new_rules = [this] + new_rule.to_bnf
|
163
|
+
elsif expr.first == :plus
|
164
|
+
# * Transform (a rule (plus b)) into (a rule (seq b (star b)
|
165
|
+
this = dup
|
166
|
+
this.expr = [:seq, expr.last, [:star, expr.last]]
|
167
|
+
new_rules = this.to_bnf
|
168
|
+
elsif [:alt, :seq].include?(expr.first)
|
169
|
+
# Otherwise, no further transformation necessary
|
170
|
+
new_rules << self
|
171
|
+
elsif [:diff, :hex, :range].include?(expr.first)
|
172
|
+
# This rules are fine, the just need to be terminals
|
173
|
+
raise "Encountered #{expr.first.inspect}, which is a #{self.kind}, not :terminal" unless self.kind == :terminal
|
174
|
+
new_rules << self
|
175
|
+
else
|
176
|
+
# Some case we didn't think of
|
177
|
+
raise "Error trying to transform #{expr.inspect} to BNF"
|
178
|
+
end
|
179
|
+
|
180
|
+
return new_rules
|
181
|
+
end
|
182
|
+
|
183
|
+
# Does this rule start with a sym? It does if expr is that sym,
|
184
|
+
# expr starts with alt and contains that sym, or
|
185
|
+
# expr starts with seq and the next element is that sym
|
186
|
+
# @param [Symbol, class] sym
|
187
|
+
# Symbol matching any start element, or if it is String, any start element which is a String
|
188
|
+
# @return [Array<Symbol, String>] list of symbol (singular), or strings which are start symbol, or nil if there are none
|
189
|
+
def starts_with(sym)
|
190
|
+
if seq? && sym === (v = expr.fetch(1, nil))
|
191
|
+
[v]
|
192
|
+
elsif alt? && expr.any? {|e| sym === e}
|
193
|
+
expr.select {|e| sym === e}
|
194
|
+
else
|
195
|
+
nil
|
196
|
+
end
|
197
|
+
end
|
198
|
+
|
199
|
+
# Add terminal as proceding this rule
|
200
|
+
# @param [Array<Rule>] terminals
|
201
|
+
# @return [Integer] if number of terminals added
|
202
|
+
def add_first(terminals)
|
203
|
+
@first ||= []
|
204
|
+
terminals -= @first # Remove those already in first
|
205
|
+
@first += terminals
|
206
|
+
terminals.length
|
207
|
+
end
|
208
|
+
|
209
|
+
# Add terminal as following this rule. Don't add _eps as a follow
|
210
|
+
#
|
211
|
+
# @param [Array<Rule>] terminals
|
212
|
+
# @return [Integer] if number of terminals added
|
213
|
+
def add_follow(terminals)
|
214
|
+
terminals -= @follow || [] # Remove those already in first
|
215
|
+
terminals -= [:_eps] # Special case, don't add empty string as a follow terminal
|
216
|
+
unless terminals.empty?
|
217
|
+
@follow ||= []
|
218
|
+
@follow += terminals
|
219
|
+
end
|
220
|
+
terminals.length
|
221
|
+
end
|
222
|
+
|
223
|
+
# Is this rule of the form (seq ...)?
|
224
|
+
def seq?
|
225
|
+
expr.is_a?(Array) && expr.first == :seq
|
226
|
+
end
|
227
|
+
|
228
|
+
# Is this rule of the form (alt ...)?
|
229
|
+
def alt?
|
230
|
+
expr.is_a?(Array) && expr.first == :alt
|
231
|
+
end
|
232
|
+
|
233
|
+
def inspect
|
234
|
+
"#<EBNF::Rule:#{object_id} " +
|
235
|
+
{:sym => sym, :id => id, :kind => kind, :expr => expr}.inspect +
|
236
|
+
">"
|
237
|
+
end
|
238
|
+
|
239
|
+
# Two rules are equal if they have the same {#sym}, {#kind} and {#expr}
|
240
|
+
# @param [Rule] other
|
241
|
+
# @return [Boolean]
|
242
|
+
def ==(other)
|
243
|
+
sym == other.sym &&
|
244
|
+
kind == other.kind &&
|
245
|
+
expr == other.expr
|
246
|
+
end
|
247
|
+
|
248
|
+
# Two rules are equivalent if they have the same {#expr}
|
249
|
+
# @param [Rule] other
|
250
|
+
# @return [Boolean]
|
251
|
+
def equivalent?(other)
|
252
|
+
expr == other.expr
|
253
|
+
end
|
254
|
+
|
255
|
+
# Rewrite the rule substituting src_rule for dst_rule wherever
|
256
|
+
# it is used in the production (first level only).
|
257
|
+
# @param [Rule] src_rule
|
258
|
+
# @param [Rule] dst_rule
|
259
|
+
# @return [Rule]
|
260
|
+
def rewrite(src_rule, dst_rule)
|
261
|
+
case @expr
|
262
|
+
when Array
|
263
|
+
@expr = @expr.map {|e| e == src_rule.sym ? dst_rule.sym : e}
|
264
|
+
else
|
265
|
+
@expr = dst_rule.sym if @expr == src_rule.sym
|
266
|
+
end
|
267
|
+
self
|
268
|
+
end
|
269
|
+
|
270
|
+
# Rules compare using their ids
|
271
|
+
def <=>(other)
|
272
|
+
if id.to_i == other.id.to_i
|
273
|
+
id <=> other.id
|
274
|
+
else
|
275
|
+
id.to_i <=> other.id.to_i
|
276
|
+
end
|
277
|
+
end
|
278
|
+
|
279
|
+
private
|
280
|
+
def ttl_expr(expr, pfx, depth, is_obj = true)
|
281
|
+
indent = ' ' * depth
|
282
|
+
@ebnf.debug("ttl_expr", :depth => depth) {expr.inspect}
|
283
|
+
op = expr.shift if expr.is_a?(Array)
|
284
|
+
statements = []
|
285
|
+
|
286
|
+
if is_obj
|
287
|
+
bra, ket = "[ ", " ]"
|
288
|
+
else
|
289
|
+
bra = ket = ''
|
290
|
+
end
|
291
|
+
|
292
|
+
case op
|
293
|
+
when :seq, :alt, :diff
|
294
|
+
statements << %{#{indent}#{bra}#{pfx}:#{op} (}
|
295
|
+
expr.each {|a| statements += ttl_expr(a, pfx, depth + 1)}
|
296
|
+
statements << %{#{indent} )#{ket}}
|
297
|
+
when :opt, :plus, :star
|
298
|
+
statements << %{#{indent}#{bra}#{pfx}:#{op} }
|
299
|
+
statements += ttl_expr(expr.first, pfx, depth + 1)
|
300
|
+
statements << %{#{indent} #{ket}} unless ket.empty?
|
301
|
+
when :_empty, :_eps, :_empty
|
302
|
+
statements << %{#{indent}"g:#{op.to_s[1..-1]}"}
|
303
|
+
when :"'"
|
304
|
+
statements << %{#{indent}"#{esc(expr)}"}
|
305
|
+
when :range
|
306
|
+
statements << %{#{indent}#{bra} re:matches #{cclass(expr.first).inspect} #{ket}}
|
307
|
+
when :hex
|
308
|
+
raise "didn't expect \" in expr" if expr.include?(:'"')
|
309
|
+
statements << %{#{indent}#{bra} re:matches #{cclass(expr.first).inspect} #{ket}}
|
310
|
+
else
|
311
|
+
if is_obj
|
312
|
+
statements << %{#{indent}#{expr.inspect}}
|
313
|
+
else
|
314
|
+
statements << %{#{indent}g:seq ( #{expr.inspect} )}
|
315
|
+
end
|
316
|
+
end
|
317
|
+
|
318
|
+
statements.last << " ." unless is_obj
|
319
|
+
@ebnf.debug("statements", :depth => depth) {statements.join("\n")}
|
320
|
+
statements
|
321
|
+
end
|
322
|
+
|
323
|
+
##
|
324
|
+
# turn an XML BNF character class into an N3 literal for that
|
325
|
+
# character class (less the outer quote marks)
|
326
|
+
#
|
327
|
+
# >>> cclass("^<>'{}|^`")
|
328
|
+
# "[^<>'{}|^`]"
|
329
|
+
# >>> cclass("#x0300-#x036F")
|
330
|
+
# "[\\u0300-\\u036F]"
|
331
|
+
# >>> cclass("#xC0-#xD6")
|
332
|
+
# "[\\u00C0-\\u00D6]"
|
333
|
+
# >>> cclass("#x370-#x37D")
|
334
|
+
# "[\\u0370-\\u037D]"
|
335
|
+
#
|
336
|
+
# as in: ECHAR ::= '\' [tbnrf\"']
|
337
|
+
# >>> cclass("tbnrf\\\"'")
|
338
|
+
# 'tbnrf\\\\\\"\''
|
339
|
+
#
|
340
|
+
# >>> cclass("^#x22#x5C#x0A#x0D")
|
341
|
+
# '^\\u0022\\\\\\u005C\\u000A\\u000D'
|
342
|
+
def cclass(txt)
|
343
|
+
'[' +
|
344
|
+
txt.gsub(/\#x[0-9a-fA-F]+/) do |hx|
|
345
|
+
hx = hx[2..-1]
|
346
|
+
if hx.length <= 4
|
347
|
+
"\\u#{'0' * (4 - hx.length)}#{hx}"
|
348
|
+
elsif hx.length <= 8
|
349
|
+
"\\U#{'0' * (8 - hx.length)}#{hx}"
|
350
|
+
end
|
351
|
+
end +
|
352
|
+
']'
|
353
|
+
end
|
354
|
+
|
355
|
+
# Make a new symbol/number combination
|
356
|
+
def make_sym_id
|
357
|
+
@id_seq ||= 0
|
358
|
+
@id_seq += 1
|
359
|
+
["_#{@sym}_#{@id_seq}".to_sym, "#{@id}.#{@id_seq}"]
|
360
|
+
end
|
361
|
+
end
|
362
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ebnf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-02-
|
12
|
+
date: 2013-02-26 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: sxp
|
@@ -88,13 +88,22 @@ files:
|
|
88
88
|
- README.md
|
89
89
|
- UNLICENSE
|
90
90
|
- VERSION
|
91
|
+
- lib/ebnf/base.rb
|
92
|
+
- lib/ebnf/bnf.rb
|
91
93
|
- lib/ebnf/ll1/lexer.rb
|
92
94
|
- lib/ebnf/ll1/parser.rb
|
93
95
|
- lib/ebnf/ll1/scanner.rb
|
96
|
+
- lib/ebnf/ll1.rb
|
97
|
+
- lib/ebnf/parser.rb
|
98
|
+
- lib/ebnf/rule.rb
|
94
99
|
- lib/ebnf/version.rb
|
95
100
|
- lib/ebnf.rb
|
96
101
|
- etc/doap.ttl
|
97
|
-
- etc/ebnf.
|
102
|
+
- etc/ebnf.ebnf
|
103
|
+
- etc/ebnf.ll1
|
104
|
+
- etc/turtle.ebnf
|
105
|
+
- etc/turtle.ll1
|
106
|
+
- etc/turtle.rb
|
98
107
|
- bin/ebnf
|
99
108
|
homepage: http://github.com/gkellogg/ebnf
|
100
109
|
licenses:
|