ebnf 0.0.1 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/lib/ebnf/base.rb ADDED
@@ -0,0 +1,266 @@
1
+ require 'strscan'
2
+
3
+ # Extended Bakus-Nour Form (EBNF), being the W3C variation is
4
+ # originaly defined in the
5
+ # [W3C XML 1.0 Spec](http://www.w3.org/TR/REC-xml/#sec-notation).
6
+ #
7
+ # This version attempts to be less strict than the strict definition
8
+ # to allow for coloquial variations (such as in the Turtle syntax).
9
+ #
10
+ # A rule takes the following form:
11
+ # \[1\] symbol ::= expression
12
+ #
13
+ # Comments include the content between '/*' and '*/'
14
+ #
15
+ # @see http://www.w3.org/2000/10/swap/grammar/ebnf2turtle.py
16
+ # @see http://www.w3.org/2000/10/swap/grammar/ebnf2bnf.n3
17
+ #
18
+ # Based on bnf2turtle by Dan Connolly.
19
+ #
20
+ # Motivation
21
+ # ----------
22
+ #
23
+ # Many specifications include grammars that look formal but are not
24
+ # actually checked, by machine, against test data sets. Debugging the
25
+ # grammar in the XML specification has been a long, tedious manual
26
+ # process. Only when the loop is closed between a fully formal grammar
27
+ # and a large test data set can we be confident that we have an accurate
28
+ # specification of a language (and even then, only the syntax of the language).
29
+ #
30
+ #
31
+ # The grammar in the [N3 design note][] has evolved based on the original
32
+ # manual transcription into a python recursive-descent parser and
33
+ # subsequent development of test cases. Rather than maintain the grammar
34
+ # and the parser independently, our [goal] is to formalize the language
35
+ # syntax sufficiently to replace the manual implementation with one
36
+ # derived mechanically from the specification.
37
+ #
38
+ #
39
+ # [N3 design note]: http://www.w3.org/DesignIssues/Notation3
40
+ #
41
+ # Related Work
42
+ # ------------
43
+ #
44
+ # Sean Palmer's [n3p announcement][] demonstrated the feasibility of the
45
+ # approach, though that work did not cover some aspects of N3.
46
+ #
47
+ # In development of the [SPARQL specification][], Eric Prud'hommeaux
48
+ # developed [Yacker][], which converts EBNF syntax to perl and C and C++
49
+ # yacc grammars. It includes an interactive facility for checking
50
+ # strings against the resulting grammars.
51
+ # Yosi Scharf used it in [cwm Release 1.1.0rc1][], which includes
52
+ # a SPAQRL parser that is *almost* completely mechanically generated.
53
+ #
54
+ # The N3/turtle output from yacker is lower level than the EBNF notation
55
+ # from the XML specification; it has the ?, +, and * operators compiled
56
+ # down to pure context-free rules, obscuring the grammar
57
+ # structure. Since that transformation is straightforwardly expressed in
58
+ # semantic web rules (see [bnf-rules.n3][]), it seems best to keep the RDF
59
+ # expression of the grammar in terms of the higher level EBNF
60
+ # constructs.
61
+ #
62
+ # [goal]: http://www.w3.org/2002/02/mid/1086902566.21030.1479.camel@dirk;list=public-cwm-bugs
63
+ # [n3p announcement]: http://lists.w3.org/Archives/Public/public-cwm-talk/2004OctDec/0029.html
64
+ # [Yacker]: http://www.w3.org/1999/02/26-modules/User/Yacker
65
+ # [SPARQL specification]: http://www.w3.org/TR/rdf-sparql-query/
66
+ # [Cwm Release 1.1.0rc1]: http://lists.w3.org/Archives/Public/public-cwm-announce/2005JulSep/0000.html
67
+ # [bnf-rules.n3]: http://www.w3.org/2000/10/swap/grammar/bnf-rules.n3
68
+ #
69
+ # Open Issues and Future Work
70
+ # ---------------------------
71
+ #
72
+ # The yacker output also has the terminals compiled to elaborate regular
73
+ # expressions. The best strategy for dealing with lexical tokens is not
74
+ # yet clear. Many tokens in SPARQL are case insensitive; this is not yet
75
+ # captured formally.
76
+ #
77
+ # The schema for the EBNF vocabulary used here (``g:seq``, ``g:alt``, ...)
78
+ # is not yet published; it should be aligned with [swap/grammar/bnf][]
79
+ # and the [bnf2html.n3][] rules (and/or the style of linked XHTML grammar
80
+ # in the SPARQL and XML specificiations).
81
+ #
82
+ # It would be interesting to corroborate the claim in the SPARQL spec
83
+ # that the grammar is LL(1) with a mechanical proof based on N3 rules.
84
+ #
85
+ # [swap/grammar/bnf]: http://www.w3.org/2000/10/swap/grammar/bnf
86
+ # [bnf2html.n3]: http://www.w3.org/2000/10/swap/grammar/bnf2html.n3
87
+ #
88
+ # Background
89
+ # ----------
90
+ #
91
+ # The [N3 Primer] by Tim Berners-Lee introduces RDF and the Semantic
92
+ # web using N3, a teaching and scribbling language. Turtle is a subset
93
+ # of N3 that maps directly to (and from) the standard XML syntax for
94
+ # RDF.
95
+ #
96
+ # [N3 Primer]: http://www.w3.org/2000/10/swap/Primer.html
97
+ #
98
+ # @author Gregg Kellogg
99
+ module EBNF
100
+ class Base
101
+ include BNF
102
+ include LL1
103
+ include Parser
104
+
105
+ # Abstract syntax tree from parse
106
+ # @!attribute [r] ast
107
+ # @return [Array<Rule>]
108
+ attr_reader :ast
109
+
110
+ # Grammar errors, or errors found genering parse tables
111
+ # @!attribute [r] errors
112
+ # @return [Array<String>]
113
+ attr_accessor :errors
114
+
115
+ # Parse the string or file input generating an abstract syntax tree
116
+ # in S-Expressions (similar to SPARQL SSE)
117
+ #
118
+ # @param [#read, #to_s] input
119
+ # @param [Hash{Symbol => Object}] options
120
+ # @option options [Boolean, Array] :debug
121
+ # Output debug information to an array or STDOUT.
122
+ def initialize(input, options = {})
123
+ @options = options
124
+ @lineno, @depth, @errors = 1, 0, []
125
+ terminal = false
126
+ @ast = []
127
+
128
+ input = input.respond_to?(:read) ? input.read : input.to_s
129
+ scanner = StringScanner.new(input)
130
+
131
+ eachRule(scanner) do |r|
132
+ debug("rule string") {r.inspect}
133
+ case r
134
+ when /^@terminals/
135
+ # Switch mode to parsing terminals
136
+ terminal = true
137
+ when /^@pass\s*(.*)$/m
138
+ rule = depth {ruleParts("[0] " + r)}
139
+ rule.kind = :pass
140
+ rule.orig = r
141
+ @ast << rule
142
+ else
143
+ rule = depth {ruleParts(r)}
144
+
145
+ rule.kind = :terminal if terminal # Override after we've parsed @terminals
146
+ rule.orig = r
147
+ @ast << rule
148
+ end
149
+ end
150
+ end
151
+
152
+ # Iterate over each rule or terminal
153
+ # @param [:termina, :rule] kind
154
+ # @yield rule
155
+ # @yieldparam [Rule] rule
156
+ def each(kind, &block)
157
+ ast.each {|r| block.call(r) if r.kind == kind}
158
+ end
159
+
160
+ ##
161
+ # Write out parsed syntax string as an S-Expression
162
+ # @return [String]
163
+ def to_sxp
164
+ begin
165
+ require 'sxp'
166
+ SXP::Generator.string(ast.sort)
167
+ rescue LoadError
168
+ ast.to_sxp
169
+ end
170
+ end
171
+ def to_s; to_sxp; end
172
+
173
+ def dup
174
+ new_obj = super
175
+ new_obj.instance_variable_set(:@ast, @ast.dup)
176
+ new_obj
177
+ end
178
+
179
+ ##
180
+ # Find a rule given a symbol
181
+ # @param [Symbol] sym
182
+ # @return [Rule]
183
+ def find_rule(sym)
184
+ (@find ||= {})[sym] ||= ast.detect {|r| r.sym == sym}
185
+ end
186
+
187
+ ##
188
+ # Write out syntax tree as Turtle
189
+ # @param [String] prefix for language
190
+ # @param [String] ns URI for language
191
+ # @return [String]
192
+ def to_ttl(prefix, ns)
193
+ unless ast.empty?
194
+ [
195
+ "@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>.",
196
+ "@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>.",
197
+ "@prefix #{prefix}: <#{ns}>.",
198
+ "@prefix : <#{ns}>.",
199
+ "@prefix re: <http://www.w3.org/2000/10/swap/grammar/regex#>.",
200
+ "@prefix g: <http://www.w3.org/2000/10/swap/grammar/ebnf#>.",
201
+ "",
202
+ ":language rdfs:isDefinedBy <>; g:start :#{ast.first.id}.",
203
+ "",
204
+ ]
205
+ end.join("\n") +
206
+
207
+ ast.sort.
208
+ select {|a| [:rule, :terminal].include?(a.kind)}.
209
+ map(&:to_ttl).
210
+ join("\n")
211
+ end
212
+
213
+ def depth
214
+ @depth += 1
215
+ ret = yield
216
+ @depth -= 1
217
+ ret
218
+ end
219
+
220
+ # Progress output, less than debugging
221
+ def progress(*args)
222
+ return unless @options[:progress]
223
+ options = args.last.is_a?(Hash) ? args.pop : {}
224
+ depth = options[:depth] || @depth
225
+ args << yield if block_given?
226
+ message = "#{args.join(': ')}"
227
+ str = "[#{@lineno}]#{' ' * depth}#{message}"
228
+ @options[:debug] << str if @options[:debug].is_a?(Array)
229
+ $stderr.puts(str)
230
+ end
231
+
232
+ # Error output
233
+ def error(*args)
234
+ options = args.last.is_a?(Hash) ? args.pop : {}
235
+ depth = options[:depth] || @depth
236
+ args << yield if block_given?
237
+ message = "#{args.join(': ')}"
238
+ @errors << message
239
+ str = "[#{@lineno}]#{' ' * depth}#{message}"
240
+ @options[:debug] << str if @options[:debug].is_a?(Array)
241
+ $stderr.puts(str)
242
+ end
243
+
244
+ ##
245
+ # Progress output when debugging
246
+ #
247
+ # @overload debug(node, message)
248
+ # @param [String] node relative location in input
249
+ # @param [String] message ("")
250
+ #
251
+ # @overload debug(message)
252
+ # @param [String] message ("")
253
+ #
254
+ # @yieldreturn [String] added to message
255
+ def debug(*args)
256
+ return unless @options[:debug]
257
+ options = args.last.is_a?(Hash) ? args.pop : {}
258
+ depth = options[:depth] || @depth
259
+ args << yield if block_given?
260
+ message = "#{args.join(': ')}"
261
+ str = "[#{@lineno}]#{' ' * depth}#{message}"
262
+ @options[:debug] << str if @options[:debug].is_a?(Array)
263
+ $stderr.puts(str) if @options[:debug] == true
264
+ end
265
+ end
266
+ end
data/lib/ebnf/bnf.rb ADDED
@@ -0,0 +1,50 @@
1
+ module EBNF
2
+ module BNF
3
+ ##
4
+ # Transform EBNF Rule set to BNF:
5
+ #
6
+ # * Add rule [0] (_empty rule (seq))
7
+ # * Transform each rule into a set of rules that are just BNF, using {Rule#to_bnf}.
8
+ # @return [ENBF] self
9
+ def make_bnf
10
+ progress("make_bnf") {"Start: #{@ast.length} rules"}
11
+ new_ast = [Rule.new(:_empty, "0", [:seq], :kind => :rule)]
12
+
13
+ ast.each do |rule|
14
+ debug("make_bnf") {"expand from: #{rule.inspect}"}
15
+ new_rules = rule.to_bnf
16
+ debug(" => ") {new_rules.map(&:sym).join(', ')}
17
+ new_ast += new_rules
18
+ end
19
+
20
+ # Consolodate equivalent terminal rules
21
+ to_rewrite = {}
22
+ new_ast.select {|r| r.kind == :terminal}.each do |src_rule|
23
+ new_ast.select {|r| r.kind == :terminal}.each do |dst_rule|
24
+ if src_rule.equivalent?(dst_rule) && src_rule != dst_rule
25
+ debug("make_bnf") {"equivalent rules: #{src_rule.inspect} and #{dst_rule.inspect}"}
26
+ (to_rewrite[src_rule] ||= []) << dst_rule
27
+ end
28
+ end
29
+ end
30
+
31
+ # Replace references to equivalent rules with canonical rule
32
+ to_rewrite.each do |src_rule, dst_rules|
33
+ dst_rules.each do |dst_rule|
34
+ new_ast.each do |mod_rule|
35
+ debug("make_bnf") {"rewrite #{mod_rule.inspect} from #{dst_rule.sym} to #{src_rule.sym}"}
36
+ mod_rule.rewrite(dst_rule, src_rule)
37
+ end
38
+ end
39
+ end
40
+
41
+ # AST now has just rewritten rules
42
+ compacted_ast = new_ast - to_rewrite.values.flatten.compact
43
+
44
+ # Sort AST by number
45
+ @ast = compacted_ast
46
+ progress("make_bnf") {"End: #{@ast.length} rules"}
47
+ self
48
+ end
49
+ end
50
+ end
data/lib/ebnf/ll1.rb ADDED
@@ -0,0 +1,321 @@
1
+ module EBNF
2
+ module LL1
3
+ autoload :Lexer, "ebnf/ll1/lexer"
4
+ autoload :Parser, "ebnf/ll1/parser"
5
+ autoload :Scanner, "ebnf/ll1/scanner"
6
+
7
+ # Branch table, represented as a recursive hash.
8
+ # The table is indexed by rule symbol, which in-turn references a hash of terminals (which are the first terminals of the production), which in turn reference the sequence of rules that follow, given that terminal as input
9
+ # @!attribute [r] branch
10
+ # @return [Hash{Symbol => Hash{String, Symbol => Array<Symbol>}}]
11
+ attr_reader :branch
12
+
13
+ # First table
14
+ # @!attribute [r] first
15
+ # @return [Hash{Symbol, String => Symbol}]
16
+ attr_reader :first
17
+
18
+ # Follow table
19
+ # @!attribute [r] first
20
+ # @return [Hash{Symbol, String => Symbol}]
21
+ attr_reader :follow
22
+
23
+ # Terminal table
24
+ # The list of terminals used in the grammar.
25
+ # @!attribute [r] terminals
26
+ # @return [Array<String, Symbol>]
27
+ attr_reader :terminals
28
+
29
+ # Start symbol
30
+ # The rule which starts the grammar
31
+ # @!attribute[r] start
32
+ # @return [Symbol]
33
+ attr_reader :start
34
+
35
+ ##
36
+ # Create first/follow for each rule using techniques defined for LL(1) parsers.
37
+ #
38
+ # @return [EBNF] self
39
+ # @see http://en.wikipedia.org/wiki/LL_parser#Constructing_an_LL.281.29_parsing_table
40
+ # @param [Symbol] start
41
+ # Set of symbols which are start rules
42
+ def first_follow(start)
43
+ # Add _eof to follow all start rules
44
+ if @start = start
45
+ start_rule = find_rule(@start)
46
+ raise "No rule found for start symbol #{@start}" unless start_rule
47
+ start_rule.add_follow([:_eof])
48
+ start_rule.start = true
49
+ end
50
+
51
+ # Comprehnsion rule, create shorter versions of all non-terminal sequences
52
+ comprehensions = []
53
+ begin
54
+ comprehensions = []
55
+ ast.select {|r| r.seq? && r.kind == :rule && r.expr.length > 2}.each do |rule|
56
+ new_expr = rule.expr[2..-1].unshift(:seq)
57
+ unless ast.any? {|r| r.expr == new_expr}
58
+ debug("first_follow") {"add comprehension rule for #{rule.sym} => #{new_expr.inspect}"}
59
+ new_rule = rule.build(new_expr)
60
+ rule.comp = new_rule
61
+ comprehensions << new_rule
62
+ end
63
+ end
64
+
65
+ @ast += comprehensions
66
+ progress("first_follow") {"comprehensions #{comprehensions.length}"}
67
+ end while !comprehensions.empty?
68
+
69
+ # Fi(a w' ) = { a } for every terminal a
70
+ # For each rule who's expr's first element of a seq a terminal, or having any element of alt a terminal, add that terminal to the first set for this rule
71
+ each(:rule) do |rule|
72
+ each(:terminal) do |terminal|
73
+ rule.add_first([terminal.sym]) if rule.starts_with(terminal.sym)
74
+ end
75
+
76
+ # Add strings to first for strings which are start elements
77
+ start_strs = rule.starts_with(String)
78
+ rule.add_first(start_strs) if start_strs
79
+ end
80
+
81
+ # # Fi(ε) = { ε }
82
+ # Add _eps as a first of _empty
83
+ empty = ast.detect {|r| r.sym == :_empty}
84
+ empty.add_first([:_eps])
85
+
86
+ # Loop until no more first elements are added
87
+ firsts, follows = 0, 0
88
+ begin
89
+ firsts, follows = 0, 0
90
+ each(:rule) do |rule|
91
+ each(:rule) do |first_rule|
92
+ next if first_rule == rule || first_rule.first.nil?
93
+
94
+ # Fi(A w' ) = Fi(A) for every nonterminal A with ε not in Fi(A)
95
+ # For each rule that starts with another rule having firsts, add the firsts of that rule to this rule, unless it already has those terminals in its first
96
+ if rule.starts_with(first_rule.sym)
97
+ depth {debug("FF.1") {"add first #{first_rule.first.inspect} to #{rule.sym}"}}
98
+ firsts += rule.add_first(first_rule.first)
99
+ end
100
+
101
+ # Fi(A w' ) = Fi(A) \ { ε } ∪ Fi(w' ) for every nonterminal A with ε in Fi(A)
102
+ # For each rule starting with eps, add the terminals for the comprehension of this rule
103
+ if rule.seq? &&
104
+ rule.expr.fetch(1, nil) == first_rule &&
105
+ first_rule.first.include?(:_eps) &&
106
+ (comp = rule.comp)
107
+
108
+ depth {debug("FF.2") {"add first #{first_rule.first.inspect} to #{comp.sym}"}}
109
+ firsts += comp.add_first(first_rule.first)
110
+ end
111
+ end
112
+
113
+ # Only run these rules if the rule is a sequence having two or more elements, whos first element is also a sequence and first_rule is the comprehension of rule
114
+ if rule.seq? && (comp = rule.comp)
115
+ #if there is a rule of the form Aj → wAiw' , then
116
+ #
117
+ if (ai = find_rule(rule.expr[1])) && ai.kind == :rule && comp.first
118
+ # * if the terminal a is in Fi(w' ), then add a to Fo(Ai)
119
+ #
120
+ # Add follow terminals based on the first terminals
121
+ # of a comprehension of this rule (having the same
122
+ # sequence other than the first rule in the sequence)
123
+ #
124
+ # @example
125
+ # rule: (seq a b c)
126
+ # first_rule: (seq b c)
127
+ # if first_rule.first == [T]
128
+ # => a.follow += [T]
129
+ depth {debug("FF.3") {"add follow #{comp.first.inspect} to #{ai.sym}"}}
130
+ follows += ai.add_follow(comp.first)
131
+ end
132
+
133
+ # Follows of a rule are also follows of the comprehension of the rule.
134
+ if rule.follow
135
+ depth {debug("FF.4") {"add follow #{rule.follow.inspect} to #{comp.sym}"}}
136
+ follows += comp.add_follow(rule.follow)
137
+ end
138
+
139
+ # * if ε is in Fi(w' ), then add Fo(Aj) to Fo(Ai)
140
+ #
141
+ # If the comprehension of a sequence has an _eps first, then the follows of the rule also become the follows of the first member of the rule
142
+ if comp.first && comp.first.include?(:_eps) && rule.first &&
143
+ (member = find_rule(rule.expr.fetch(1, nil))) &&
144
+ member.kind == :rule
145
+
146
+ depth {debug("FF.5") {"add follow #{rule.follow.inspect} to #{member.sym}"}}
147
+ follows += member.add_follow(rule.first)
148
+ end
149
+ end
150
+
151
+ # Follows of a rule are also follows of the last production in the rule
152
+ if rule.seq? && rule.follow &&
153
+ (member = find_rule(rule.expr.last)) &&
154
+ member.kind == :rule
155
+
156
+ depth {debug("FF.6") {"add follow #{rule.follow.inspect} to #{member.sym}"}}
157
+ follows += member.add_follow(rule.follow)
158
+ end
159
+
160
+ # For alts, anything that follows the rule follows each member of the rule
161
+ if rule.alt? && rule.follow
162
+ rule.expr[1..-1].map {|s| find_rule(s)}.each do |mem|
163
+ if mem && mem.kind == :rule
164
+ depth {debug("FF.7") {"add follow #{rule.first.inspect} to #{mem.sym}"}}
165
+ follows += mem.add_follow(rule.follow)
166
+ end
167
+ end
168
+ end
169
+ end
170
+
171
+ progress("first_follow") {"firsts #{firsts}, follows #{follows}"}
172
+ end while (firsts + follows) > 0
173
+ end
174
+
175
+ ##
176
+ # Generate parser tables, {#branch}, {#first}, {#follow}, and {#terminals}
177
+ def build_tables
178
+ progress("build_tables") {
179
+ "Terminals: #{ast.count {|r| r.kind == :terminal}} " +
180
+ "Non-Terminals: #{ast.count {|r| r.kind == :rule}}"
181
+ }
182
+
183
+ @first = ast.
184
+ select(&:first).
185
+ inject({}) {|memo, r|
186
+ memo[r.sym] = r.first.reject {|t| t == :_eps};
187
+ memo
188
+ }
189
+ @follow = ast.
190
+ select(&:follow).
191
+ inject({}) {|memo, r|
192
+ memo[r.sym] = r.first.reject {|t| t == :_eps};
193
+ memo
194
+ }
195
+ @terminals = ast.map do |r|
196
+ (r.first || []) + (r.follow || [])
197
+ end.flatten.uniq
198
+ @terminals = (@terminals - [:_eps, :_eof, :_empty]).sort_by(&:to_s)
199
+
200
+ @branch = {}
201
+ @already = []
202
+ @agenda = []
203
+ do_production(@start)
204
+ while !@agenda.empty?
205
+ x = @agenda.shift
206
+ do_production(x)
207
+ end
208
+
209
+ if !@errors.empty?
210
+ progress("###### FAILED with #{errors.length} errors.")
211
+ @errors.each {|s| progress(" #{s}")}
212
+ raise "Table creation failed with errors"
213
+ else
214
+ progress("Ok for predictive parsing")
215
+ end
216
+ end
217
+
218
+ # Generate an output table in Ruby format
219
+ # @param [IO, StringIO] io
220
+ # @param [String] name of the table constant
221
+ # @param [String] table
222
+ # to output, one of {#branch}, {#first}, {#follow}, or {#terminals}
223
+ # @param [Integer] indent = 0
224
+ def outputTable(io, name, table, indent = 0)
225
+ ind0 = ' ' * indent
226
+ ind1 = ind0 + ' '
227
+ ind2 = ind1 + ' '
228
+
229
+ if table.is_a?(Hash)
230
+ io.puts "#{ind0}#{name} = {"
231
+ table.keys.sort_by(&:to_s).each do |prod|
232
+ case table[prod]
233
+ when Array
234
+ list = table[prod].map(&:inspect).join(",\n#{ind2}")
235
+ io.puts "#{ind1}#{prod.inspect} => [\n#{ind2}#{list}],"
236
+ when Hash
237
+ io.puts "#{ind1}#{prod.inspect} => {"
238
+ table[prod].keys.sort_by(&:to_s).each do |term|
239
+ list = table[prod][term].map(&:inspect).join(", ")
240
+ io.puts "#{ind2}#{term.inspect} => [#{list}],"
241
+ end
242
+ io.puts "#{ind1}},"
243
+ else
244
+ "Unknown table entry type: #{table[prod].class}"
245
+ end
246
+ end
247
+ io.puts "#{ind0}}.freeze\n"
248
+ else
249
+ io.puts "#{ind0}#{name} = [\n#{ind1}" +
250
+ table.sort_by(&:to_s).map(&:inspect).join(",\n#{ind1}") +
251
+ "\n#{ind0}].freeze\n"
252
+ end
253
+ end
254
+
255
+ private
256
+ def do_production(lhs)
257
+ rule = find_rule(lhs)
258
+ if rule.nil? || rule.kind != :rule || rule.sym == :_empty
259
+ progress("prod") {"Skip: #{lhs.inspect}"}
260
+ return
261
+ end
262
+ @already << lhs
263
+
264
+ branchDict = {}
265
+
266
+ progress("prod") {"Production #{lhs.inspect}"}
267
+
268
+ if rule.expr.first == :matches
269
+ debug("prod") {"Rule is regexp: #{rule}"}
270
+
271
+ error("No record of what token #{lhs} can start with") unless rule.first
272
+ return
273
+ end
274
+
275
+ if rule.alt?
276
+ # Add entries for each alternative, based on the alternative's first/seq
277
+ rule.expr[1..-1].each do |prod|
278
+ prod_rule = find_rule(prod)
279
+ debug(" Alt", prod)
280
+ @agenda << prod unless @already.include?(prod) || @agenda.include?(prod)
281
+ if prod == :_empty
282
+ debug(" empty")
283
+ branchDict[prod] = []
284
+ elsif prod_rule.nil? || prod_rule.first.nil?
285
+ debug(" no first =>", prod)
286
+ branchDict[prod] = [prod]
287
+ else
288
+ prod_rule.first.each do |f|
289
+ if branchDict.has_key?(f)
290
+ error("First/First Conflict: #{f} is also the condition for #{branchDict[f]}")
291
+ end
292
+ debug(" alt") {"[#{f}] => #{prod}"}
293
+ branchDict[f] = [prod]
294
+ end
295
+ end
296
+ end
297
+ else
298
+ error("prod") {"Expected lhs to be alt or seq, was: #{rule}"} unless rule.seq?
299
+ debug(" Seq", rule)
300
+ # Entries for each first element referencing the sequence
301
+ (rule.first || []).each do |f|
302
+ debug(" seq") {"[#{f}] => #{rule.expr[1..-1].inspect}"}
303
+ branchDict[f] = rule.expr[1..-1]
304
+ end
305
+
306
+ # Add each production to the agenda
307
+ rule.expr[1..-1].each do |prod|
308
+ @agenda << prod unless @already.include?(prod) || @agenda.include?(prod)
309
+ end
310
+ end
311
+
312
+ # Add follow rules
313
+ (rule.follow || []).each do |f|
314
+ debug(" Follow") {f.inspect}
315
+ branchDict[f] ||= []
316
+ end
317
+
318
+ @branch[lhs] = branchDict
319
+ end
320
+ end
321
+ end