ebnf 0.0.1 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/lib/ebnf/rule.rb ADDED
@@ -0,0 +1,362 @@
1
+ module EBNF
2
+ # Represent individual parsed rules
3
+ class Rule
4
+ # Operations which are flattened to seprate rules in to_bnf
5
+ BNF_OPS = %w{
6
+ alt opt plus seq star
7
+ }.map(&:to_sym).freeze
8
+
9
+ TERM_OPS = %w{
10
+ diff hex range
11
+ }.map(&:to_sym).freeze
12
+
13
+ # @!attribute [rw] sym for rule
14
+ # @return [Symbol]
15
+ attr_accessor :sym
16
+
17
+ # @!attribute [rw] id of rule
18
+ # @return [String]
19
+ attr_accessor :id
20
+
21
+ # A comprehension is a sequence which contains all elements but the first of the original rule.
22
+ # @!attribute [rw] comprehension of this rule
23
+ # @return [Rule]
24
+ attr_accessor :comp
25
+
26
+ # @!attribute [rw] kind of rule
27
+ # @return [:rule, :terminal, or :pass]
28
+ attr_accessor :kind
29
+
30
+ # @!attribute [rw] expr rule expression
31
+ # @return [Array]
32
+ attr_accessor :expr
33
+
34
+ # @!attribute [rw] orig original rule
35
+ # @return [String]
36
+ attr_accessor :orig
37
+
38
+ # @!attribute [r] first terminals that immediately procede this rule
39
+ # @return [Array<Rule>]
40
+ attr_reader :first
41
+
42
+ # @!attribute [r] follow terminals that immediately follow this rule
43
+ # @return [Array<Rule>]
44
+ attr_reader :follow
45
+
46
+ # @!attribute [rw] start indicates that this is a starting rule
47
+ # @return [Boolean]
48
+ attr_accessor :start
49
+
50
+ # @param [Integer] id
51
+ # @param [Symbol] sym
52
+ # @param [Array] expr
53
+ # @param [Hash{Symbol => Object}] options
54
+ # @option options [Symbol] :kind
55
+ # @option options [String] :ebnf
56
+ def initialize(sym, id, expr, options = {})
57
+ @sym, @id = sym, id
58
+ @expr = expr.is_a?(Array) ? expr : [:seq, expr]
59
+ @ebnf = options[:ebnf]
60
+ @top_rule = options.fetch(:top_rule, self)
61
+ @kind = case
62
+ when options[:kind] then options[:kind]
63
+ when sym.to_s == sym.to_s.upcase then :terminal
64
+ when !BNF_OPS.include?(@expr.first) then :terminal
65
+ else :rule
66
+ end
67
+ end
68
+
69
+ # Build a new rule creating a symbol and numbering from the current rule
70
+ # Symbol and number creation is handled by the top-most rule in such a chain
71
+ #
72
+ # @param [Array] expr
73
+ # @param [Hash{Symbol => Object}] options
74
+ # @option options [Symbol] :kind
75
+ # @option options [String] :ebnf EBNF instance (used for messages)
76
+ def build(expr, options = {})
77
+ new_sym, new_id = (@top_rule ||self).send(:make_sym_id)
78
+ Rule.new(new_sym, new_id, expr, {
79
+ :kind => options[:kind],
80
+ :ebnf => @ebnf,
81
+ :top_rule => @top_rule || self,
82
+ }.merge(options))
83
+ end
84
+
85
+ # Serializes this rule to an S-Expression
86
+ # @return [String]
87
+ def to_sxp
88
+ elements = [kind, sym, id]
89
+ elements << [:start, true] if start
90
+ elements << first.sort_by(&:to_s).unshift(:first) if first
91
+ elements << follow.sort_by(&:to_s).unshift(:follow) if follow
92
+ elements << expr
93
+ begin
94
+ require 'sxp'
95
+ SXP::Generator.string(elements)
96
+ rescue LoadError
97
+ elements.to_sxp
98
+ end
99
+ end
100
+ def to_s; to_sxp; end
101
+
102
+ # Serializes this rule to an Turtle
103
+ # @return [String]
104
+ def to_ttl
105
+ @ebnf.debug("to_ttl") {inspect}
106
+ comment = orig.strip.
107
+ gsub(/"""/, '\"\"\"').
108
+ gsub("\\", "\\\\").
109
+ sub(/^\"/, '\"').
110
+ sub(/\"$/m, '\"')
111
+ statements = [
112
+ %{:#{id} rdfs:label "#{id}"; rdf:value "#{sym}";},
113
+ %{ rdfs:comment #{comment.inspect};},
114
+ ]
115
+
116
+ statements += ttl_expr(expr, kind == :terminal ? "re" : "g", 1, false)
117
+ "\n" + statements.join("\n")
118
+ end
119
+
120
+ ##
121
+ # Transform EBNF rule to BNF rules:
122
+ #
123
+ # * Transform (a [n] rule (op1 (op2))) into two rules:
124
+ # (a [n] rule (op1 a.2))
125
+ # (_a_1 [n.1] rule (op2))
126
+ # * Transform (a rule (opt b)) into (a rule (alt _empty "foo"))
127
+ # * Transform (a rule (star b)) into (a rule (alt _empty (seq b a)))
128
+ # * Transform (a rule (plus b)) into (a rule (seq b (star b)
129
+ # @return [Array<Rule>]
130
+ def to_bnf
131
+ return [self] unless kind == :rule
132
+ new_rules = []
133
+
134
+ # Look for rules containing recursive definition and rewrite to multiple rules. If `expr` contains elements which are in array form, where the first element of that array is a symbol, create a new rule for it.
135
+ if expr.any? {|e| e.is_a?(Array) && (BNF_OPS + TERM_OPS).include?(e.first)}
136
+ # * Transform (a [n] rule (op1 (op2))) into two rules:
137
+ # (a.1 [n.1] rule (op1 a.2))
138
+ # (a.2 [n.2] rule (op2))
139
+ # duplicate ourselves for rewriting
140
+ this = dup
141
+ new_rules << this
142
+
143
+ expr.each_with_index do |e, index|
144
+ next unless e.is_a?(Array) && e.first.is_a?(Symbol)
145
+ new_rule = build(e)
146
+ this.expr[index] = new_rule.sym
147
+ new_rules << new_rule
148
+ end
149
+
150
+ # Return new rules after recursively applying #to_bnf
151
+ new_rules = new_rules.map {|r| r.to_bnf}.flatten
152
+ elsif expr.first == :opt
153
+ this = dup
154
+ # * Transform (a rule (opt b)) into (a rule (alt _empty b))
155
+ this.expr = [:alt, :_empty, expr.last]
156
+ new_rules = this.to_bnf
157
+ elsif expr.first == :star
158
+ # * Transform (a rule (star b)) into (a rule (alt _empty (seq b a)))
159
+ this = dup
160
+ new_rule = this.build([:seq, expr.last, this.sym])
161
+ this.expr = [:alt, :_empty, new_rule.sym]
162
+ new_rules = [this] + new_rule.to_bnf
163
+ elsif expr.first == :plus
164
+ # * Transform (a rule (plus b)) into (a rule (seq b (star b)
165
+ this = dup
166
+ this.expr = [:seq, expr.last, [:star, expr.last]]
167
+ new_rules = this.to_bnf
168
+ elsif [:alt, :seq].include?(expr.first)
169
+ # Otherwise, no further transformation necessary
170
+ new_rules << self
171
+ elsif [:diff, :hex, :range].include?(expr.first)
172
+ # This rules are fine, the just need to be terminals
173
+ raise "Encountered #{expr.first.inspect}, which is a #{self.kind}, not :terminal" unless self.kind == :terminal
174
+ new_rules << self
175
+ else
176
+ # Some case we didn't think of
177
+ raise "Error trying to transform #{expr.inspect} to BNF"
178
+ end
179
+
180
+ return new_rules
181
+ end
182
+
183
+ # Does this rule start with a sym? It does if expr is that sym,
184
+ # expr starts with alt and contains that sym, or
185
+ # expr starts with seq and the next element is that sym
186
+ # @param [Symbol, class] sym
187
+ # Symbol matching any start element, or if it is String, any start element which is a String
188
+ # @return [Array<Symbol, String>] list of symbol (singular), or strings which are start symbol, or nil if there are none
189
+ def starts_with(sym)
190
+ if seq? && sym === (v = expr.fetch(1, nil))
191
+ [v]
192
+ elsif alt? && expr.any? {|e| sym === e}
193
+ expr.select {|e| sym === e}
194
+ else
195
+ nil
196
+ end
197
+ end
198
+
199
+ # Add terminal as proceding this rule
200
+ # @param [Array<Rule>] terminals
201
+ # @return [Integer] if number of terminals added
202
+ def add_first(terminals)
203
+ @first ||= []
204
+ terminals -= @first # Remove those already in first
205
+ @first += terminals
206
+ terminals.length
207
+ end
208
+
209
+ # Add terminal as following this rule. Don't add _eps as a follow
210
+ #
211
+ # @param [Array<Rule>] terminals
212
+ # @return [Integer] if number of terminals added
213
+ def add_follow(terminals)
214
+ terminals -= @follow || [] # Remove those already in first
215
+ terminals -= [:_eps] # Special case, don't add empty string as a follow terminal
216
+ unless terminals.empty?
217
+ @follow ||= []
218
+ @follow += terminals
219
+ end
220
+ terminals.length
221
+ end
222
+
223
+ # Is this rule of the form (seq ...)?
224
+ def seq?
225
+ expr.is_a?(Array) && expr.first == :seq
226
+ end
227
+
228
+ # Is this rule of the form (alt ...)?
229
+ def alt?
230
+ expr.is_a?(Array) && expr.first == :alt
231
+ end
232
+
233
+ def inspect
234
+ "#<EBNF::Rule:#{object_id} " +
235
+ {:sym => sym, :id => id, :kind => kind, :expr => expr}.inspect +
236
+ ">"
237
+ end
238
+
239
+ # Two rules are equal if they have the same {#sym}, {#kind} and {#expr}
240
+ # @param [Rule] other
241
+ # @return [Boolean]
242
+ def ==(other)
243
+ sym == other.sym &&
244
+ kind == other.kind &&
245
+ expr == other.expr
246
+ end
247
+
248
+ # Two rules are equivalent if they have the same {#expr}
249
+ # @param [Rule] other
250
+ # @return [Boolean]
251
+ def equivalent?(other)
252
+ expr == other.expr
253
+ end
254
+
255
+ # Rewrite the rule substituting src_rule for dst_rule wherever
256
+ # it is used in the production (first level only).
257
+ # @param [Rule] src_rule
258
+ # @param [Rule] dst_rule
259
+ # @return [Rule]
260
+ def rewrite(src_rule, dst_rule)
261
+ case @expr
262
+ when Array
263
+ @expr = @expr.map {|e| e == src_rule.sym ? dst_rule.sym : e}
264
+ else
265
+ @expr = dst_rule.sym if @expr == src_rule.sym
266
+ end
267
+ self
268
+ end
269
+
270
+ # Rules compare using their ids
271
+ def <=>(other)
272
+ if id.to_i == other.id.to_i
273
+ id <=> other.id
274
+ else
275
+ id.to_i <=> other.id.to_i
276
+ end
277
+ end
278
+
279
+ private
280
+ def ttl_expr(expr, pfx, depth, is_obj = true)
281
+ indent = ' ' * depth
282
+ @ebnf.debug("ttl_expr", :depth => depth) {expr.inspect}
283
+ op = expr.shift if expr.is_a?(Array)
284
+ statements = []
285
+
286
+ if is_obj
287
+ bra, ket = "[ ", " ]"
288
+ else
289
+ bra = ket = ''
290
+ end
291
+
292
+ case op
293
+ when :seq, :alt, :diff
294
+ statements << %{#{indent}#{bra}#{pfx}:#{op} (}
295
+ expr.each {|a| statements += ttl_expr(a, pfx, depth + 1)}
296
+ statements << %{#{indent} )#{ket}}
297
+ when :opt, :plus, :star
298
+ statements << %{#{indent}#{bra}#{pfx}:#{op} }
299
+ statements += ttl_expr(expr.first, pfx, depth + 1)
300
+ statements << %{#{indent} #{ket}} unless ket.empty?
301
+ when :_empty, :_eps, :_empty
302
+ statements << %{#{indent}"g:#{op.to_s[1..-1]}"}
303
+ when :"'"
304
+ statements << %{#{indent}"#{esc(expr)}"}
305
+ when :range
306
+ statements << %{#{indent}#{bra} re:matches #{cclass(expr.first).inspect} #{ket}}
307
+ when :hex
308
+ raise "didn't expect \" in expr" if expr.include?(:'"')
309
+ statements << %{#{indent}#{bra} re:matches #{cclass(expr.first).inspect} #{ket}}
310
+ else
311
+ if is_obj
312
+ statements << %{#{indent}#{expr.inspect}}
313
+ else
314
+ statements << %{#{indent}g:seq ( #{expr.inspect} )}
315
+ end
316
+ end
317
+
318
+ statements.last << " ." unless is_obj
319
+ @ebnf.debug("statements", :depth => depth) {statements.join("\n")}
320
+ statements
321
+ end
322
+
323
+ ##
324
+ # turn an XML BNF character class into an N3 literal for that
325
+ # character class (less the outer quote marks)
326
+ #
327
+ # >>> cclass("^<>'{}|^`")
328
+ # "[^<>'{}|^`]"
329
+ # >>> cclass("#x0300-#x036F")
330
+ # "[\\u0300-\\u036F]"
331
+ # >>> cclass("#xC0-#xD6")
332
+ # "[\\u00C0-\\u00D6]"
333
+ # >>> cclass("#x370-#x37D")
334
+ # "[\\u0370-\\u037D]"
335
+ #
336
+ # as in: ECHAR ::= '\' [tbnrf\"']
337
+ # >>> cclass("tbnrf\\\"'")
338
+ # 'tbnrf\\\\\\"\''
339
+ #
340
+ # >>> cclass("^#x22#x5C#x0A#x0D")
341
+ # '^\\u0022\\\\\\u005C\\u000A\\u000D'
342
+ def cclass(txt)
343
+ '[' +
344
+ txt.gsub(/\#x[0-9a-fA-F]+/) do |hx|
345
+ hx = hx[2..-1]
346
+ if hx.length <= 4
347
+ "\\u#{'0' * (4 - hx.length)}#{hx}"
348
+ elsif hx.length <= 8
349
+ "\\U#{'0' * (8 - hx.length)}#{hx}"
350
+ end
351
+ end +
352
+ ']'
353
+ end
354
+
355
+ # Make a new symbol/number combination
356
+ def make_sym_id
357
+ @id_seq ||= 0
358
+ @id_seq += 1
359
+ ["_#{@sym}_#{@id_seq}".to_sym, "#{@id}.#{@id_seq}"]
360
+ end
361
+ end
362
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ebnf
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.1.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-02-24 00:00:00.000000000 Z
12
+ date: 2013-02-26 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: sxp
@@ -88,13 +88,22 @@ files:
88
88
  - README.md
89
89
  - UNLICENSE
90
90
  - VERSION
91
+ - lib/ebnf/base.rb
92
+ - lib/ebnf/bnf.rb
91
93
  - lib/ebnf/ll1/lexer.rb
92
94
  - lib/ebnf/ll1/parser.rb
93
95
  - lib/ebnf/ll1/scanner.rb
96
+ - lib/ebnf/ll1.rb
97
+ - lib/ebnf/parser.rb
98
+ - lib/ebnf/rule.rb
94
99
  - lib/ebnf/version.rb
95
100
  - lib/ebnf.rb
96
101
  - etc/doap.ttl
97
- - etc/ebnf.bnf
102
+ - etc/ebnf.ebnf
103
+ - etc/ebnf.ll1
104
+ - etc/turtle.ebnf
105
+ - etc/turtle.ll1
106
+ - etc/turtle.rb
98
107
  - bin/ebnf
99
108
  homepage: http://github.com/gkellogg/ebnf
100
109
  licenses: