ebnf 0.0.1 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/ebnf/rule.rb ADDED
@@ -0,0 +1,362 @@
1
+ module EBNF
2
+ # Represent individual parsed rules
3
+ class Rule
4
+ # Operations which are flattened to seprate rules in to_bnf
5
+ BNF_OPS = %w{
6
+ alt opt plus seq star
7
+ }.map(&:to_sym).freeze
8
+
9
+ TERM_OPS = %w{
10
+ diff hex range
11
+ }.map(&:to_sym).freeze
12
+
13
+ # @!attribute [rw] sym for rule
14
+ # @return [Symbol]
15
+ attr_accessor :sym
16
+
17
+ # @!attribute [rw] id of rule
18
+ # @return [String]
19
+ attr_accessor :id
20
+
21
+ # A comprehension is a sequence which contains all elements but the first of the original rule.
22
+ # @!attribute [rw] comprehension of this rule
23
+ # @return [Rule]
24
+ attr_accessor :comp
25
+
26
+ # @!attribute [rw] kind of rule
27
+ # @return [:rule, :terminal, or :pass]
28
+ attr_accessor :kind
29
+
30
+ # @!attribute [rw] expr rule expression
31
+ # @return [Array]
32
+ attr_accessor :expr
33
+
34
+ # @!attribute [rw] orig original rule
35
+ # @return [String]
36
+ attr_accessor :orig
37
+
38
+ # @!attribute [r] first terminals that immediately procede this rule
39
+ # @return [Array<Rule>]
40
+ attr_reader :first
41
+
42
+ # @!attribute [r] follow terminals that immediately follow this rule
43
+ # @return [Array<Rule>]
44
+ attr_reader :follow
45
+
46
+ # @!attribute [rw] start indicates that this is a starting rule
47
+ # @return [Boolean]
48
+ attr_accessor :start
49
+
50
+ # @param [Integer] id
51
+ # @param [Symbol] sym
52
+ # @param [Array] expr
53
+ # @param [Hash{Symbol => Object}] options
54
+ # @option options [Symbol] :kind
55
+ # @option options [String] :ebnf
56
+ def initialize(sym, id, expr, options = {})
57
+ @sym, @id = sym, id
58
+ @expr = expr.is_a?(Array) ? expr : [:seq, expr]
59
+ @ebnf = options[:ebnf]
60
+ @top_rule = options.fetch(:top_rule, self)
61
+ @kind = case
62
+ when options[:kind] then options[:kind]
63
+ when sym.to_s == sym.to_s.upcase then :terminal
64
+ when !BNF_OPS.include?(@expr.first) then :terminal
65
+ else :rule
66
+ end
67
+ end
68
+
69
+ # Build a new rule creating a symbol and numbering from the current rule
70
+ # Symbol and number creation is handled by the top-most rule in such a chain
71
+ #
72
+ # @param [Array] expr
73
+ # @param [Hash{Symbol => Object}] options
74
+ # @option options [Symbol] :kind
75
+ # @option options [String] :ebnf EBNF instance (used for messages)
76
+ def build(expr, options = {})
77
+ new_sym, new_id = (@top_rule ||self).send(:make_sym_id)
78
+ Rule.new(new_sym, new_id, expr, {
79
+ :kind => options[:kind],
80
+ :ebnf => @ebnf,
81
+ :top_rule => @top_rule || self,
82
+ }.merge(options))
83
+ end
84
+
85
+ # Serializes this rule to an S-Expression
86
+ # @return [String]
87
+ def to_sxp
88
+ elements = [kind, sym, id]
89
+ elements << [:start, true] if start
90
+ elements << first.sort_by(&:to_s).unshift(:first) if first
91
+ elements << follow.sort_by(&:to_s).unshift(:follow) if follow
92
+ elements << expr
93
+ begin
94
+ require 'sxp'
95
+ SXP::Generator.string(elements)
96
+ rescue LoadError
97
+ elements.to_sxp
98
+ end
99
+ end
100
+ def to_s; to_sxp; end
101
+
102
+ # Serializes this rule to an Turtle
103
+ # @return [String]
104
+ def to_ttl
105
+ @ebnf.debug("to_ttl") {inspect}
106
+ comment = orig.strip.
107
+ gsub(/"""/, '\"\"\"').
108
+ gsub("\\", "\\\\").
109
+ sub(/^\"/, '\"').
110
+ sub(/\"$/m, '\"')
111
+ statements = [
112
+ %{:#{id} rdfs:label "#{id}"; rdf:value "#{sym}";},
113
+ %{ rdfs:comment #{comment.inspect};},
114
+ ]
115
+
116
+ statements += ttl_expr(expr, kind == :terminal ? "re" : "g", 1, false)
117
+ "\n" + statements.join("\n")
118
+ end
119
+
120
+ ##
121
+ # Transform EBNF rule to BNF rules:
122
+ #
123
+ # * Transform (a [n] rule (op1 (op2))) into two rules:
124
+ # (a [n] rule (op1 a.2))
125
+ # (_a_1 [n.1] rule (op2))
126
+ # * Transform (a rule (opt b)) into (a rule (alt _empty "foo"))
127
+ # * Transform (a rule (star b)) into (a rule (alt _empty (seq b a)))
128
+ # * Transform (a rule (plus b)) into (a rule (seq b (star b)
129
+ # @return [Array<Rule>]
130
+ def to_bnf
131
+ return [self] unless kind == :rule
132
+ new_rules = []
133
+
134
+ # Look for rules containing recursive definition and rewrite to multiple rules. If `expr` contains elements which are in array form, where the first element of that array is a symbol, create a new rule for it.
135
+ if expr.any? {|e| e.is_a?(Array) && (BNF_OPS + TERM_OPS).include?(e.first)}
136
+ # * Transform (a [n] rule (op1 (op2))) into two rules:
137
+ # (a.1 [n.1] rule (op1 a.2))
138
+ # (a.2 [n.2] rule (op2))
139
+ # duplicate ourselves for rewriting
140
+ this = dup
141
+ new_rules << this
142
+
143
+ expr.each_with_index do |e, index|
144
+ next unless e.is_a?(Array) && e.first.is_a?(Symbol)
145
+ new_rule = build(e)
146
+ this.expr[index] = new_rule.sym
147
+ new_rules << new_rule
148
+ end
149
+
150
+ # Return new rules after recursively applying #to_bnf
151
+ new_rules = new_rules.map {|r| r.to_bnf}.flatten
152
+ elsif expr.first == :opt
153
+ this = dup
154
+ # * Transform (a rule (opt b)) into (a rule (alt _empty b))
155
+ this.expr = [:alt, :_empty, expr.last]
156
+ new_rules = this.to_bnf
157
+ elsif expr.first == :star
158
+ # * Transform (a rule (star b)) into (a rule (alt _empty (seq b a)))
159
+ this = dup
160
+ new_rule = this.build([:seq, expr.last, this.sym])
161
+ this.expr = [:alt, :_empty, new_rule.sym]
162
+ new_rules = [this] + new_rule.to_bnf
163
+ elsif expr.first == :plus
164
+ # * Transform (a rule (plus b)) into (a rule (seq b (star b)
165
+ this = dup
166
+ this.expr = [:seq, expr.last, [:star, expr.last]]
167
+ new_rules = this.to_bnf
168
+ elsif [:alt, :seq].include?(expr.first)
169
+ # Otherwise, no further transformation necessary
170
+ new_rules << self
171
+ elsif [:diff, :hex, :range].include?(expr.first)
172
+ # This rules are fine, the just need to be terminals
173
+ raise "Encountered #{expr.first.inspect}, which is a #{self.kind}, not :terminal" unless self.kind == :terminal
174
+ new_rules << self
175
+ else
176
+ # Some case we didn't think of
177
+ raise "Error trying to transform #{expr.inspect} to BNF"
178
+ end
179
+
180
+ return new_rules
181
+ end
182
+
183
+ # Does this rule start with a sym? It does if expr is that sym,
184
+ # expr starts with alt and contains that sym, or
185
+ # expr starts with seq and the next element is that sym
186
+ # @param [Symbol, class] sym
187
+ # Symbol matching any start element, or if it is String, any start element which is a String
188
+ # @return [Array<Symbol, String>] list of symbol (singular), or strings which are start symbol, or nil if there are none
189
+ def starts_with(sym)
190
+ if seq? && sym === (v = expr.fetch(1, nil))
191
+ [v]
192
+ elsif alt? && expr.any? {|e| sym === e}
193
+ expr.select {|e| sym === e}
194
+ else
195
+ nil
196
+ end
197
+ end
198
+
199
+ # Add terminal as proceding this rule
200
+ # @param [Array<Rule>] terminals
201
+ # @return [Integer] if number of terminals added
202
+ def add_first(terminals)
203
+ @first ||= []
204
+ terminals -= @first # Remove those already in first
205
+ @first += terminals
206
+ terminals.length
207
+ end
208
+
209
+ # Add terminal as following this rule. Don't add _eps as a follow
210
+ #
211
+ # @param [Array<Rule>] terminals
212
+ # @return [Integer] if number of terminals added
213
+ def add_follow(terminals)
214
+ terminals -= @follow || [] # Remove those already in first
215
+ terminals -= [:_eps] # Special case, don't add empty string as a follow terminal
216
+ unless terminals.empty?
217
+ @follow ||= []
218
+ @follow += terminals
219
+ end
220
+ terminals.length
221
+ end
222
+
223
+ # Is this rule of the form (seq ...)?
224
+ def seq?
225
+ expr.is_a?(Array) && expr.first == :seq
226
+ end
227
+
228
+ # Is this rule of the form (alt ...)?
229
+ def alt?
230
+ expr.is_a?(Array) && expr.first == :alt
231
+ end
232
+
233
+ def inspect
234
+ "#<EBNF::Rule:#{object_id} " +
235
+ {:sym => sym, :id => id, :kind => kind, :expr => expr}.inspect +
236
+ ">"
237
+ end
238
+
239
+ # Two rules are equal if they have the same {#sym}, {#kind} and {#expr}
240
+ # @param [Rule] other
241
+ # @return [Boolean]
242
+ def ==(other)
243
+ sym == other.sym &&
244
+ kind == other.kind &&
245
+ expr == other.expr
246
+ end
247
+
248
+ # Two rules are equivalent if they have the same {#expr}
249
+ # @param [Rule] other
250
+ # @return [Boolean]
251
+ def equivalent?(other)
252
+ expr == other.expr
253
+ end
254
+
255
+ # Rewrite the rule substituting src_rule for dst_rule wherever
256
+ # it is used in the production (first level only).
257
+ # @param [Rule] src_rule
258
+ # @param [Rule] dst_rule
259
+ # @return [Rule]
260
+ def rewrite(src_rule, dst_rule)
261
+ case @expr
262
+ when Array
263
+ @expr = @expr.map {|e| e == src_rule.sym ? dst_rule.sym : e}
264
+ else
265
+ @expr = dst_rule.sym if @expr == src_rule.sym
266
+ end
267
+ self
268
+ end
269
+
270
+ # Rules compare using their ids
271
+ def <=>(other)
272
+ if id.to_i == other.id.to_i
273
+ id <=> other.id
274
+ else
275
+ id.to_i <=> other.id.to_i
276
+ end
277
+ end
278
+
279
+ private
280
+ def ttl_expr(expr, pfx, depth, is_obj = true)
281
+ indent = ' ' * depth
282
+ @ebnf.debug("ttl_expr", :depth => depth) {expr.inspect}
283
+ op = expr.shift if expr.is_a?(Array)
284
+ statements = []
285
+
286
+ if is_obj
287
+ bra, ket = "[ ", " ]"
288
+ else
289
+ bra = ket = ''
290
+ end
291
+
292
+ case op
293
+ when :seq, :alt, :diff
294
+ statements << %{#{indent}#{bra}#{pfx}:#{op} (}
295
+ expr.each {|a| statements += ttl_expr(a, pfx, depth + 1)}
296
+ statements << %{#{indent} )#{ket}}
297
+ when :opt, :plus, :star
298
+ statements << %{#{indent}#{bra}#{pfx}:#{op} }
299
+ statements += ttl_expr(expr.first, pfx, depth + 1)
300
+ statements << %{#{indent} #{ket}} unless ket.empty?
301
+ when :_empty, :_eps, :_empty
302
+ statements << %{#{indent}"g:#{op.to_s[1..-1]}"}
303
+ when :"'"
304
+ statements << %{#{indent}"#{esc(expr)}"}
305
+ when :range
306
+ statements << %{#{indent}#{bra} re:matches #{cclass(expr.first).inspect} #{ket}}
307
+ when :hex
308
+ raise "didn't expect \" in expr" if expr.include?(:'"')
309
+ statements << %{#{indent}#{bra} re:matches #{cclass(expr.first).inspect} #{ket}}
310
+ else
311
+ if is_obj
312
+ statements << %{#{indent}#{expr.inspect}}
313
+ else
314
+ statements << %{#{indent}g:seq ( #{expr.inspect} )}
315
+ end
316
+ end
317
+
318
+ statements.last << " ." unless is_obj
319
+ @ebnf.debug("statements", :depth => depth) {statements.join("\n")}
320
+ statements
321
+ end
322
+
323
+ ##
324
+ # turn an XML BNF character class into an N3 literal for that
325
+ # character class (less the outer quote marks)
326
+ #
327
+ # >>> cclass("^<>'{}|^`")
328
+ # "[^<>'{}|^`]"
329
+ # >>> cclass("#x0300-#x036F")
330
+ # "[\\u0300-\\u036F]"
331
+ # >>> cclass("#xC0-#xD6")
332
+ # "[\\u00C0-\\u00D6]"
333
+ # >>> cclass("#x370-#x37D")
334
+ # "[\\u0370-\\u037D]"
335
+ #
336
+ # as in: ECHAR ::= '\' [tbnrf\"']
337
+ # >>> cclass("tbnrf\\\"'")
338
+ # 'tbnrf\\\\\\"\''
339
+ #
340
+ # >>> cclass("^#x22#x5C#x0A#x0D")
341
+ # '^\\u0022\\\\\\u005C\\u000A\\u000D'
342
+ def cclass(txt)
343
+ '[' +
344
+ txt.gsub(/\#x[0-9a-fA-F]+/) do |hx|
345
+ hx = hx[2..-1]
346
+ if hx.length <= 4
347
+ "\\u#{'0' * (4 - hx.length)}#{hx}"
348
+ elsif hx.length <= 8
349
+ "\\U#{'0' * (8 - hx.length)}#{hx}"
350
+ end
351
+ end +
352
+ ']'
353
+ end
354
+
355
+ # Make a new symbol/number combination
356
+ def make_sym_id
357
+ @id_seq ||= 0
358
+ @id_seq += 1
359
+ ["_#{@sym}_#{@id_seq}".to_sym, "#{@id}.#{@id_seq}"]
360
+ end
361
+ end
362
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ebnf
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.1.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-02-24 00:00:00.000000000 Z
12
+ date: 2013-02-26 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: sxp
@@ -88,13 +88,22 @@ files:
88
88
  - README.md
89
89
  - UNLICENSE
90
90
  - VERSION
91
+ - lib/ebnf/base.rb
92
+ - lib/ebnf/bnf.rb
91
93
  - lib/ebnf/ll1/lexer.rb
92
94
  - lib/ebnf/ll1/parser.rb
93
95
  - lib/ebnf/ll1/scanner.rb
96
+ - lib/ebnf/ll1.rb
97
+ - lib/ebnf/parser.rb
98
+ - lib/ebnf/rule.rb
94
99
  - lib/ebnf/version.rb
95
100
  - lib/ebnf.rb
96
101
  - etc/doap.ttl
97
- - etc/ebnf.bnf
102
+ - etc/ebnf.ebnf
103
+ - etc/ebnf.ll1
104
+ - etc/turtle.ebnf
105
+ - etc/turtle.ll1
106
+ - etc/turtle.rb
98
107
  - bin/ebnf
99
108
  homepage: http://github.com/gkellogg/ebnf
100
109
  licenses: