ebnf 0.0.1 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +21 -7
- data/VERSION +1 -1
- data/bin/ebnf +73 -16
- data/etc/{ebnf.bnf → ebnf.ebnf} +2 -2
- data/etc/ebnf.ll1 +1010 -0
- data/etc/turtle.ebnf +70 -0
- data/etc/turtle.ll1 +1565 -0
- data/etc/turtle.rb +1375 -0
- data/lib/ebnf.rb +16 -1023
- data/lib/ebnf/base.rb +266 -0
- data/lib/ebnf/bnf.rb +50 -0
- data/lib/ebnf/ll1.rb +321 -0
- data/lib/ebnf/ll1/lexer.rb +11 -11
- data/lib/ebnf/ll1/parser.rb +28 -32
- data/lib/ebnf/ll1/scanner.rb +1 -1
- data/lib/ebnf/parser.rb +297 -0
- data/lib/ebnf/rule.rb +362 -0
- metadata +12 -3
data/lib/ebnf/base.rb
ADDED
@@ -0,0 +1,266 @@
|
|
1
|
+
require 'strscan'
|
2
|
+
|
3
|
+
# Extended Bakus-Nour Form (EBNF), being the W3C variation is
|
4
|
+
# originaly defined in the
|
5
|
+
# [W3C XML 1.0 Spec](http://www.w3.org/TR/REC-xml/#sec-notation).
|
6
|
+
#
|
7
|
+
# This version attempts to be less strict than the strict definition
|
8
|
+
# to allow for coloquial variations (such as in the Turtle syntax).
|
9
|
+
#
|
10
|
+
# A rule takes the following form:
|
11
|
+
# \[1\] symbol ::= expression
|
12
|
+
#
|
13
|
+
# Comments include the content between '/*' and '*/'
|
14
|
+
#
|
15
|
+
# @see http://www.w3.org/2000/10/swap/grammar/ebnf2turtle.py
|
16
|
+
# @see http://www.w3.org/2000/10/swap/grammar/ebnf2bnf.n3
|
17
|
+
#
|
18
|
+
# Based on bnf2turtle by Dan Connolly.
|
19
|
+
#
|
20
|
+
# Motivation
|
21
|
+
# ----------
|
22
|
+
#
|
23
|
+
# Many specifications include grammars that look formal but are not
|
24
|
+
# actually checked, by machine, against test data sets. Debugging the
|
25
|
+
# grammar in the XML specification has been a long, tedious manual
|
26
|
+
# process. Only when the loop is closed between a fully formal grammar
|
27
|
+
# and a large test data set can we be confident that we have an accurate
|
28
|
+
# specification of a language (and even then, only the syntax of the language).
|
29
|
+
#
|
30
|
+
#
|
31
|
+
# The grammar in the [N3 design note][] has evolved based on the original
|
32
|
+
# manual transcription into a python recursive-descent parser and
|
33
|
+
# subsequent development of test cases. Rather than maintain the grammar
|
34
|
+
# and the parser independently, our [goal] is to formalize the language
|
35
|
+
# syntax sufficiently to replace the manual implementation with one
|
36
|
+
# derived mechanically from the specification.
|
37
|
+
#
|
38
|
+
#
|
39
|
+
# [N3 design note]: http://www.w3.org/DesignIssues/Notation3
|
40
|
+
#
|
41
|
+
# Related Work
|
42
|
+
# ------------
|
43
|
+
#
|
44
|
+
# Sean Palmer's [n3p announcement][] demonstrated the feasibility of the
|
45
|
+
# approach, though that work did not cover some aspects of N3.
|
46
|
+
#
|
47
|
+
# In development of the [SPARQL specification][], Eric Prud'hommeaux
|
48
|
+
# developed [Yacker][], which converts EBNF syntax to perl and C and C++
|
49
|
+
# yacc grammars. It includes an interactive facility for checking
|
50
|
+
# strings against the resulting grammars.
|
51
|
+
# Yosi Scharf used it in [cwm Release 1.1.0rc1][], which includes
|
52
|
+
# a SPAQRL parser that is *almost* completely mechanically generated.
|
53
|
+
#
|
54
|
+
# The N3/turtle output from yacker is lower level than the EBNF notation
|
55
|
+
# from the XML specification; it has the ?, +, and * operators compiled
|
56
|
+
# down to pure context-free rules, obscuring the grammar
|
57
|
+
# structure. Since that transformation is straightforwardly expressed in
|
58
|
+
# semantic web rules (see [bnf-rules.n3][]), it seems best to keep the RDF
|
59
|
+
# expression of the grammar in terms of the higher level EBNF
|
60
|
+
# constructs.
|
61
|
+
#
|
62
|
+
# [goal]: http://www.w3.org/2002/02/mid/1086902566.21030.1479.camel@dirk;list=public-cwm-bugs
|
63
|
+
# [n3p announcement]: http://lists.w3.org/Archives/Public/public-cwm-talk/2004OctDec/0029.html
|
64
|
+
# [Yacker]: http://www.w3.org/1999/02/26-modules/User/Yacker
|
65
|
+
# [SPARQL specification]: http://www.w3.org/TR/rdf-sparql-query/
|
66
|
+
# [Cwm Release 1.1.0rc1]: http://lists.w3.org/Archives/Public/public-cwm-announce/2005JulSep/0000.html
|
67
|
+
# [bnf-rules.n3]: http://www.w3.org/2000/10/swap/grammar/bnf-rules.n3
|
68
|
+
#
|
69
|
+
# Open Issues and Future Work
|
70
|
+
# ---------------------------
|
71
|
+
#
|
72
|
+
# The yacker output also has the terminals compiled to elaborate regular
|
73
|
+
# expressions. The best strategy for dealing with lexical tokens is not
|
74
|
+
# yet clear. Many tokens in SPARQL are case insensitive; this is not yet
|
75
|
+
# captured formally.
|
76
|
+
#
|
77
|
+
# The schema for the EBNF vocabulary used here (``g:seq``, ``g:alt``, ...)
|
78
|
+
# is not yet published; it should be aligned with [swap/grammar/bnf][]
|
79
|
+
# and the [bnf2html.n3][] rules (and/or the style of linked XHTML grammar
|
80
|
+
# in the SPARQL and XML specificiations).
|
81
|
+
#
|
82
|
+
# It would be interesting to corroborate the claim in the SPARQL spec
|
83
|
+
# that the grammar is LL(1) with a mechanical proof based on N3 rules.
|
84
|
+
#
|
85
|
+
# [swap/grammar/bnf]: http://www.w3.org/2000/10/swap/grammar/bnf
|
86
|
+
# [bnf2html.n3]: http://www.w3.org/2000/10/swap/grammar/bnf2html.n3
|
87
|
+
#
|
88
|
+
# Background
|
89
|
+
# ----------
|
90
|
+
#
|
91
|
+
# The [N3 Primer] by Tim Berners-Lee introduces RDF and the Semantic
|
92
|
+
# web using N3, a teaching and scribbling language. Turtle is a subset
|
93
|
+
# of N3 that maps directly to (and from) the standard XML syntax for
|
94
|
+
# RDF.
|
95
|
+
#
|
96
|
+
# [N3 Primer]: http://www.w3.org/2000/10/swap/Primer.html
|
97
|
+
#
|
98
|
+
# @author Gregg Kellogg
|
99
|
+
module EBNF
|
100
|
+
class Base
|
101
|
+
include BNF
|
102
|
+
include LL1
|
103
|
+
include Parser
|
104
|
+
|
105
|
+
# Abstract syntax tree from parse
|
106
|
+
# @!attribute [r] ast
|
107
|
+
# @return [Array<Rule>]
|
108
|
+
attr_reader :ast
|
109
|
+
|
110
|
+
# Grammar errors, or errors found genering parse tables
|
111
|
+
# @!attribute [r] errors
|
112
|
+
# @return [Array<String>]
|
113
|
+
attr_accessor :errors
|
114
|
+
|
115
|
+
# Parse the string or file input generating an abstract syntax tree
|
116
|
+
# in S-Expressions (similar to SPARQL SSE)
|
117
|
+
#
|
118
|
+
# @param [#read, #to_s] input
|
119
|
+
# @param [Hash{Symbol => Object}] options
|
120
|
+
# @option options [Boolean, Array] :debug
|
121
|
+
# Output debug information to an array or STDOUT.
|
122
|
+
def initialize(input, options = {})
|
123
|
+
@options = options
|
124
|
+
@lineno, @depth, @errors = 1, 0, []
|
125
|
+
terminal = false
|
126
|
+
@ast = []
|
127
|
+
|
128
|
+
input = input.respond_to?(:read) ? input.read : input.to_s
|
129
|
+
scanner = StringScanner.new(input)
|
130
|
+
|
131
|
+
eachRule(scanner) do |r|
|
132
|
+
debug("rule string") {r.inspect}
|
133
|
+
case r
|
134
|
+
when /^@terminals/
|
135
|
+
# Switch mode to parsing terminals
|
136
|
+
terminal = true
|
137
|
+
when /^@pass\s*(.*)$/m
|
138
|
+
rule = depth {ruleParts("[0] " + r)}
|
139
|
+
rule.kind = :pass
|
140
|
+
rule.orig = r
|
141
|
+
@ast << rule
|
142
|
+
else
|
143
|
+
rule = depth {ruleParts(r)}
|
144
|
+
|
145
|
+
rule.kind = :terminal if terminal # Override after we've parsed @terminals
|
146
|
+
rule.orig = r
|
147
|
+
@ast << rule
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
# Iterate over each rule or terminal
|
153
|
+
# @param [:termina, :rule] kind
|
154
|
+
# @yield rule
|
155
|
+
# @yieldparam [Rule] rule
|
156
|
+
def each(kind, &block)
|
157
|
+
ast.each {|r| block.call(r) if r.kind == kind}
|
158
|
+
end
|
159
|
+
|
160
|
+
##
|
161
|
+
# Write out parsed syntax string as an S-Expression
|
162
|
+
# @return [String]
|
163
|
+
def to_sxp
|
164
|
+
begin
|
165
|
+
require 'sxp'
|
166
|
+
SXP::Generator.string(ast.sort)
|
167
|
+
rescue LoadError
|
168
|
+
ast.to_sxp
|
169
|
+
end
|
170
|
+
end
|
171
|
+
def to_s; to_sxp; end
|
172
|
+
|
173
|
+
def dup
|
174
|
+
new_obj = super
|
175
|
+
new_obj.instance_variable_set(:@ast, @ast.dup)
|
176
|
+
new_obj
|
177
|
+
end
|
178
|
+
|
179
|
+
##
|
180
|
+
# Find a rule given a symbol
|
181
|
+
# @param [Symbol] sym
|
182
|
+
# @return [Rule]
|
183
|
+
def find_rule(sym)
|
184
|
+
(@find ||= {})[sym] ||= ast.detect {|r| r.sym == sym}
|
185
|
+
end
|
186
|
+
|
187
|
+
##
|
188
|
+
# Write out syntax tree as Turtle
|
189
|
+
# @param [String] prefix for language
|
190
|
+
# @param [String] ns URI for language
|
191
|
+
# @return [String]
|
192
|
+
def to_ttl(prefix, ns)
|
193
|
+
unless ast.empty?
|
194
|
+
[
|
195
|
+
"@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>.",
|
196
|
+
"@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>.",
|
197
|
+
"@prefix #{prefix}: <#{ns}>.",
|
198
|
+
"@prefix : <#{ns}>.",
|
199
|
+
"@prefix re: <http://www.w3.org/2000/10/swap/grammar/regex#>.",
|
200
|
+
"@prefix g: <http://www.w3.org/2000/10/swap/grammar/ebnf#>.",
|
201
|
+
"",
|
202
|
+
":language rdfs:isDefinedBy <>; g:start :#{ast.first.id}.",
|
203
|
+
"",
|
204
|
+
]
|
205
|
+
end.join("\n") +
|
206
|
+
|
207
|
+
ast.sort.
|
208
|
+
select {|a| [:rule, :terminal].include?(a.kind)}.
|
209
|
+
map(&:to_ttl).
|
210
|
+
join("\n")
|
211
|
+
end
|
212
|
+
|
213
|
+
def depth
|
214
|
+
@depth += 1
|
215
|
+
ret = yield
|
216
|
+
@depth -= 1
|
217
|
+
ret
|
218
|
+
end
|
219
|
+
|
220
|
+
# Progress output, less than debugging
|
221
|
+
def progress(*args)
|
222
|
+
return unless @options[:progress]
|
223
|
+
options = args.last.is_a?(Hash) ? args.pop : {}
|
224
|
+
depth = options[:depth] || @depth
|
225
|
+
args << yield if block_given?
|
226
|
+
message = "#{args.join(': ')}"
|
227
|
+
str = "[#{@lineno}]#{' ' * depth}#{message}"
|
228
|
+
@options[:debug] << str if @options[:debug].is_a?(Array)
|
229
|
+
$stderr.puts(str)
|
230
|
+
end
|
231
|
+
|
232
|
+
# Error output
|
233
|
+
def error(*args)
|
234
|
+
options = args.last.is_a?(Hash) ? args.pop : {}
|
235
|
+
depth = options[:depth] || @depth
|
236
|
+
args << yield if block_given?
|
237
|
+
message = "#{args.join(': ')}"
|
238
|
+
@errors << message
|
239
|
+
str = "[#{@lineno}]#{' ' * depth}#{message}"
|
240
|
+
@options[:debug] << str if @options[:debug].is_a?(Array)
|
241
|
+
$stderr.puts(str)
|
242
|
+
end
|
243
|
+
|
244
|
+
##
|
245
|
+
# Progress output when debugging
|
246
|
+
#
|
247
|
+
# @overload debug(node, message)
|
248
|
+
# @param [String] node relative location in input
|
249
|
+
# @param [String] message ("")
|
250
|
+
#
|
251
|
+
# @overload debug(message)
|
252
|
+
# @param [String] message ("")
|
253
|
+
#
|
254
|
+
# @yieldreturn [String] added to message
|
255
|
+
def debug(*args)
|
256
|
+
return unless @options[:debug]
|
257
|
+
options = args.last.is_a?(Hash) ? args.pop : {}
|
258
|
+
depth = options[:depth] || @depth
|
259
|
+
args << yield if block_given?
|
260
|
+
message = "#{args.join(': ')}"
|
261
|
+
str = "[#{@lineno}]#{' ' * depth}#{message}"
|
262
|
+
@options[:debug] << str if @options[:debug].is_a?(Array)
|
263
|
+
$stderr.puts(str) if @options[:debug] == true
|
264
|
+
end
|
265
|
+
end
|
266
|
+
end
|
data/lib/ebnf/bnf.rb
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
module EBNF
|
2
|
+
module BNF
|
3
|
+
##
|
4
|
+
# Transform EBNF Rule set to BNF:
|
5
|
+
#
|
6
|
+
# * Add rule [0] (_empty rule (seq))
|
7
|
+
# * Transform each rule into a set of rules that are just BNF, using {Rule#to_bnf}.
|
8
|
+
# @return [ENBF] self
|
9
|
+
def make_bnf
|
10
|
+
progress("make_bnf") {"Start: #{@ast.length} rules"}
|
11
|
+
new_ast = [Rule.new(:_empty, "0", [:seq], :kind => :rule)]
|
12
|
+
|
13
|
+
ast.each do |rule|
|
14
|
+
debug("make_bnf") {"expand from: #{rule.inspect}"}
|
15
|
+
new_rules = rule.to_bnf
|
16
|
+
debug(" => ") {new_rules.map(&:sym).join(', ')}
|
17
|
+
new_ast += new_rules
|
18
|
+
end
|
19
|
+
|
20
|
+
# Consolodate equivalent terminal rules
|
21
|
+
to_rewrite = {}
|
22
|
+
new_ast.select {|r| r.kind == :terminal}.each do |src_rule|
|
23
|
+
new_ast.select {|r| r.kind == :terminal}.each do |dst_rule|
|
24
|
+
if src_rule.equivalent?(dst_rule) && src_rule != dst_rule
|
25
|
+
debug("make_bnf") {"equivalent rules: #{src_rule.inspect} and #{dst_rule.inspect}"}
|
26
|
+
(to_rewrite[src_rule] ||= []) << dst_rule
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
# Replace references to equivalent rules with canonical rule
|
32
|
+
to_rewrite.each do |src_rule, dst_rules|
|
33
|
+
dst_rules.each do |dst_rule|
|
34
|
+
new_ast.each do |mod_rule|
|
35
|
+
debug("make_bnf") {"rewrite #{mod_rule.inspect} from #{dst_rule.sym} to #{src_rule.sym}"}
|
36
|
+
mod_rule.rewrite(dst_rule, src_rule)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
# AST now has just rewritten rules
|
42
|
+
compacted_ast = new_ast - to_rewrite.values.flatten.compact
|
43
|
+
|
44
|
+
# Sort AST by number
|
45
|
+
@ast = compacted_ast
|
46
|
+
progress("make_bnf") {"End: #{@ast.length} rules"}
|
47
|
+
self
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
data/lib/ebnf/ll1.rb
ADDED
@@ -0,0 +1,321 @@
|
|
1
|
+
module EBNF
|
2
|
+
module LL1
|
3
|
+
autoload :Lexer, "ebnf/ll1/lexer"
|
4
|
+
autoload :Parser, "ebnf/ll1/parser"
|
5
|
+
autoload :Scanner, "ebnf/ll1/scanner"
|
6
|
+
|
7
|
+
# Branch table, represented as a recursive hash.
|
8
|
+
# The table is indexed by rule symbol, which in-turn references a hash of terminals (which are the first terminals of the production), which in turn reference the sequence of rules that follow, given that terminal as input
|
9
|
+
# @!attribute [r] branch
|
10
|
+
# @return [Hash{Symbol => Hash{String, Symbol => Array<Symbol>}}]
|
11
|
+
attr_reader :branch
|
12
|
+
|
13
|
+
# First table
|
14
|
+
# @!attribute [r] first
|
15
|
+
# @return [Hash{Symbol, String => Symbol}]
|
16
|
+
attr_reader :first
|
17
|
+
|
18
|
+
# Follow table
|
19
|
+
# @!attribute [r] first
|
20
|
+
# @return [Hash{Symbol, String => Symbol}]
|
21
|
+
attr_reader :follow
|
22
|
+
|
23
|
+
# Terminal table
|
24
|
+
# The list of terminals used in the grammar.
|
25
|
+
# @!attribute [r] terminals
|
26
|
+
# @return [Array<String, Symbol>]
|
27
|
+
attr_reader :terminals
|
28
|
+
|
29
|
+
# Start symbol
|
30
|
+
# The rule which starts the grammar
|
31
|
+
# @!attribute[r] start
|
32
|
+
# @return [Symbol]
|
33
|
+
attr_reader :start
|
34
|
+
|
35
|
+
##
|
36
|
+
# Create first/follow for each rule using techniques defined for LL(1) parsers.
|
37
|
+
#
|
38
|
+
# @return [EBNF] self
|
39
|
+
# @see http://en.wikipedia.org/wiki/LL_parser#Constructing_an_LL.281.29_parsing_table
|
40
|
+
# @param [Symbol] start
|
41
|
+
# Set of symbols which are start rules
|
42
|
+
def first_follow(start)
|
43
|
+
# Add _eof to follow all start rules
|
44
|
+
if @start = start
|
45
|
+
start_rule = find_rule(@start)
|
46
|
+
raise "No rule found for start symbol #{@start}" unless start_rule
|
47
|
+
start_rule.add_follow([:_eof])
|
48
|
+
start_rule.start = true
|
49
|
+
end
|
50
|
+
|
51
|
+
# Comprehnsion rule, create shorter versions of all non-terminal sequences
|
52
|
+
comprehensions = []
|
53
|
+
begin
|
54
|
+
comprehensions = []
|
55
|
+
ast.select {|r| r.seq? && r.kind == :rule && r.expr.length > 2}.each do |rule|
|
56
|
+
new_expr = rule.expr[2..-1].unshift(:seq)
|
57
|
+
unless ast.any? {|r| r.expr == new_expr}
|
58
|
+
debug("first_follow") {"add comprehension rule for #{rule.sym} => #{new_expr.inspect}"}
|
59
|
+
new_rule = rule.build(new_expr)
|
60
|
+
rule.comp = new_rule
|
61
|
+
comprehensions << new_rule
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
@ast += comprehensions
|
66
|
+
progress("first_follow") {"comprehensions #{comprehensions.length}"}
|
67
|
+
end while !comprehensions.empty?
|
68
|
+
|
69
|
+
# Fi(a w' ) = { a } for every terminal a
|
70
|
+
# For each rule who's expr's first element of a seq a terminal, or having any element of alt a terminal, add that terminal to the first set for this rule
|
71
|
+
each(:rule) do |rule|
|
72
|
+
each(:terminal) do |terminal|
|
73
|
+
rule.add_first([terminal.sym]) if rule.starts_with(terminal.sym)
|
74
|
+
end
|
75
|
+
|
76
|
+
# Add strings to first for strings which are start elements
|
77
|
+
start_strs = rule.starts_with(String)
|
78
|
+
rule.add_first(start_strs) if start_strs
|
79
|
+
end
|
80
|
+
|
81
|
+
# # Fi(ε) = { ε }
|
82
|
+
# Add _eps as a first of _empty
|
83
|
+
empty = ast.detect {|r| r.sym == :_empty}
|
84
|
+
empty.add_first([:_eps])
|
85
|
+
|
86
|
+
# Loop until no more first elements are added
|
87
|
+
firsts, follows = 0, 0
|
88
|
+
begin
|
89
|
+
firsts, follows = 0, 0
|
90
|
+
each(:rule) do |rule|
|
91
|
+
each(:rule) do |first_rule|
|
92
|
+
next if first_rule == rule || first_rule.first.nil?
|
93
|
+
|
94
|
+
# Fi(A w' ) = Fi(A) for every nonterminal A with ε not in Fi(A)
|
95
|
+
# For each rule that starts with another rule having firsts, add the firsts of that rule to this rule, unless it already has those terminals in its first
|
96
|
+
if rule.starts_with(first_rule.sym)
|
97
|
+
depth {debug("FF.1") {"add first #{first_rule.first.inspect} to #{rule.sym}"}}
|
98
|
+
firsts += rule.add_first(first_rule.first)
|
99
|
+
end
|
100
|
+
|
101
|
+
# Fi(A w' ) = Fi(A) \ { ε } ∪ Fi(w' ) for every nonterminal A with ε in Fi(A)
|
102
|
+
# For each rule starting with eps, add the terminals for the comprehension of this rule
|
103
|
+
if rule.seq? &&
|
104
|
+
rule.expr.fetch(1, nil) == first_rule &&
|
105
|
+
first_rule.first.include?(:_eps) &&
|
106
|
+
(comp = rule.comp)
|
107
|
+
|
108
|
+
depth {debug("FF.2") {"add first #{first_rule.first.inspect} to #{comp.sym}"}}
|
109
|
+
firsts += comp.add_first(first_rule.first)
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
# Only run these rules if the rule is a sequence having two or more elements, whos first element is also a sequence and first_rule is the comprehension of rule
|
114
|
+
if rule.seq? && (comp = rule.comp)
|
115
|
+
#if there is a rule of the form Aj → wAiw' , then
|
116
|
+
#
|
117
|
+
if (ai = find_rule(rule.expr[1])) && ai.kind == :rule && comp.first
|
118
|
+
# * if the terminal a is in Fi(w' ), then add a to Fo(Ai)
|
119
|
+
#
|
120
|
+
# Add follow terminals based on the first terminals
|
121
|
+
# of a comprehension of this rule (having the same
|
122
|
+
# sequence other than the first rule in the sequence)
|
123
|
+
#
|
124
|
+
# @example
|
125
|
+
# rule: (seq a b c)
|
126
|
+
# first_rule: (seq b c)
|
127
|
+
# if first_rule.first == [T]
|
128
|
+
# => a.follow += [T]
|
129
|
+
depth {debug("FF.3") {"add follow #{comp.first.inspect} to #{ai.sym}"}}
|
130
|
+
follows += ai.add_follow(comp.first)
|
131
|
+
end
|
132
|
+
|
133
|
+
# Follows of a rule are also follows of the comprehension of the rule.
|
134
|
+
if rule.follow
|
135
|
+
depth {debug("FF.4") {"add follow #{rule.follow.inspect} to #{comp.sym}"}}
|
136
|
+
follows += comp.add_follow(rule.follow)
|
137
|
+
end
|
138
|
+
|
139
|
+
# * if ε is in Fi(w' ), then add Fo(Aj) to Fo(Ai)
|
140
|
+
#
|
141
|
+
# If the comprehension of a sequence has an _eps first, then the follows of the rule also become the follows of the first member of the rule
|
142
|
+
if comp.first && comp.first.include?(:_eps) && rule.first &&
|
143
|
+
(member = find_rule(rule.expr.fetch(1, nil))) &&
|
144
|
+
member.kind == :rule
|
145
|
+
|
146
|
+
depth {debug("FF.5") {"add follow #{rule.follow.inspect} to #{member.sym}"}}
|
147
|
+
follows += member.add_follow(rule.first)
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
# Follows of a rule are also follows of the last production in the rule
|
152
|
+
if rule.seq? && rule.follow &&
|
153
|
+
(member = find_rule(rule.expr.last)) &&
|
154
|
+
member.kind == :rule
|
155
|
+
|
156
|
+
depth {debug("FF.6") {"add follow #{rule.follow.inspect} to #{member.sym}"}}
|
157
|
+
follows += member.add_follow(rule.follow)
|
158
|
+
end
|
159
|
+
|
160
|
+
# For alts, anything that follows the rule follows each member of the rule
|
161
|
+
if rule.alt? && rule.follow
|
162
|
+
rule.expr[1..-1].map {|s| find_rule(s)}.each do |mem|
|
163
|
+
if mem && mem.kind == :rule
|
164
|
+
depth {debug("FF.7") {"add follow #{rule.first.inspect} to #{mem.sym}"}}
|
165
|
+
follows += mem.add_follow(rule.follow)
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
progress("first_follow") {"firsts #{firsts}, follows #{follows}"}
|
172
|
+
end while (firsts + follows) > 0
|
173
|
+
end
|
174
|
+
|
175
|
+
##
|
176
|
+
# Generate parser tables, {#branch}, {#first}, {#follow}, and {#terminals}
|
177
|
+
def build_tables
|
178
|
+
progress("build_tables") {
|
179
|
+
"Terminals: #{ast.count {|r| r.kind == :terminal}} " +
|
180
|
+
"Non-Terminals: #{ast.count {|r| r.kind == :rule}}"
|
181
|
+
}
|
182
|
+
|
183
|
+
@first = ast.
|
184
|
+
select(&:first).
|
185
|
+
inject({}) {|memo, r|
|
186
|
+
memo[r.sym] = r.first.reject {|t| t == :_eps};
|
187
|
+
memo
|
188
|
+
}
|
189
|
+
@follow = ast.
|
190
|
+
select(&:follow).
|
191
|
+
inject({}) {|memo, r|
|
192
|
+
memo[r.sym] = r.first.reject {|t| t == :_eps};
|
193
|
+
memo
|
194
|
+
}
|
195
|
+
@terminals = ast.map do |r|
|
196
|
+
(r.first || []) + (r.follow || [])
|
197
|
+
end.flatten.uniq
|
198
|
+
@terminals = (@terminals - [:_eps, :_eof, :_empty]).sort_by(&:to_s)
|
199
|
+
|
200
|
+
@branch = {}
|
201
|
+
@already = []
|
202
|
+
@agenda = []
|
203
|
+
do_production(@start)
|
204
|
+
while !@agenda.empty?
|
205
|
+
x = @agenda.shift
|
206
|
+
do_production(x)
|
207
|
+
end
|
208
|
+
|
209
|
+
if !@errors.empty?
|
210
|
+
progress("###### FAILED with #{errors.length} errors.")
|
211
|
+
@errors.each {|s| progress(" #{s}")}
|
212
|
+
raise "Table creation failed with errors"
|
213
|
+
else
|
214
|
+
progress("Ok for predictive parsing")
|
215
|
+
end
|
216
|
+
end
|
217
|
+
|
218
|
+
# Generate an output table in Ruby format
|
219
|
+
# @param [IO, StringIO] io
|
220
|
+
# @param [String] name of the table constant
|
221
|
+
# @param [String] table
|
222
|
+
# to output, one of {#branch}, {#first}, {#follow}, or {#terminals}
|
223
|
+
# @param [Integer] indent = 0
|
224
|
+
def outputTable(io, name, table, indent = 0)
|
225
|
+
ind0 = ' ' * indent
|
226
|
+
ind1 = ind0 + ' '
|
227
|
+
ind2 = ind1 + ' '
|
228
|
+
|
229
|
+
if table.is_a?(Hash)
|
230
|
+
io.puts "#{ind0}#{name} = {"
|
231
|
+
table.keys.sort_by(&:to_s).each do |prod|
|
232
|
+
case table[prod]
|
233
|
+
when Array
|
234
|
+
list = table[prod].map(&:inspect).join(",\n#{ind2}")
|
235
|
+
io.puts "#{ind1}#{prod.inspect} => [\n#{ind2}#{list}],"
|
236
|
+
when Hash
|
237
|
+
io.puts "#{ind1}#{prod.inspect} => {"
|
238
|
+
table[prod].keys.sort_by(&:to_s).each do |term|
|
239
|
+
list = table[prod][term].map(&:inspect).join(", ")
|
240
|
+
io.puts "#{ind2}#{term.inspect} => [#{list}],"
|
241
|
+
end
|
242
|
+
io.puts "#{ind1}},"
|
243
|
+
else
|
244
|
+
"Unknown table entry type: #{table[prod].class}"
|
245
|
+
end
|
246
|
+
end
|
247
|
+
io.puts "#{ind0}}.freeze\n"
|
248
|
+
else
|
249
|
+
io.puts "#{ind0}#{name} = [\n#{ind1}" +
|
250
|
+
table.sort_by(&:to_s).map(&:inspect).join(",\n#{ind1}") +
|
251
|
+
"\n#{ind0}].freeze\n"
|
252
|
+
end
|
253
|
+
end
|
254
|
+
|
255
|
+
private
|
256
|
+
def do_production(lhs)
|
257
|
+
rule = find_rule(lhs)
|
258
|
+
if rule.nil? || rule.kind != :rule || rule.sym == :_empty
|
259
|
+
progress("prod") {"Skip: #{lhs.inspect}"}
|
260
|
+
return
|
261
|
+
end
|
262
|
+
@already << lhs
|
263
|
+
|
264
|
+
branchDict = {}
|
265
|
+
|
266
|
+
progress("prod") {"Production #{lhs.inspect}"}
|
267
|
+
|
268
|
+
if rule.expr.first == :matches
|
269
|
+
debug("prod") {"Rule is regexp: #{rule}"}
|
270
|
+
|
271
|
+
error("No record of what token #{lhs} can start with") unless rule.first
|
272
|
+
return
|
273
|
+
end
|
274
|
+
|
275
|
+
if rule.alt?
|
276
|
+
# Add entries for each alternative, based on the alternative's first/seq
|
277
|
+
rule.expr[1..-1].each do |prod|
|
278
|
+
prod_rule = find_rule(prod)
|
279
|
+
debug(" Alt", prod)
|
280
|
+
@agenda << prod unless @already.include?(prod) || @agenda.include?(prod)
|
281
|
+
if prod == :_empty
|
282
|
+
debug(" empty")
|
283
|
+
branchDict[prod] = []
|
284
|
+
elsif prod_rule.nil? || prod_rule.first.nil?
|
285
|
+
debug(" no first =>", prod)
|
286
|
+
branchDict[prod] = [prod]
|
287
|
+
else
|
288
|
+
prod_rule.first.each do |f|
|
289
|
+
if branchDict.has_key?(f)
|
290
|
+
error("First/First Conflict: #{f} is also the condition for #{branchDict[f]}")
|
291
|
+
end
|
292
|
+
debug(" alt") {"[#{f}] => #{prod}"}
|
293
|
+
branchDict[f] = [prod]
|
294
|
+
end
|
295
|
+
end
|
296
|
+
end
|
297
|
+
else
|
298
|
+
error("prod") {"Expected lhs to be alt or seq, was: #{rule}"} unless rule.seq?
|
299
|
+
debug(" Seq", rule)
|
300
|
+
# Entries for each first element referencing the sequence
|
301
|
+
(rule.first || []).each do |f|
|
302
|
+
debug(" seq") {"[#{f}] => #{rule.expr[1..-1].inspect}"}
|
303
|
+
branchDict[f] = rule.expr[1..-1]
|
304
|
+
end
|
305
|
+
|
306
|
+
# Add each production to the agenda
|
307
|
+
rule.expr[1..-1].each do |prod|
|
308
|
+
@agenda << prod unless @already.include?(prod) || @agenda.include?(prod)
|
309
|
+
end
|
310
|
+
end
|
311
|
+
|
312
|
+
# Add follow rules
|
313
|
+
(rule.follow || []).each do |f|
|
314
|
+
debug(" Follow") {f.inspect}
|
315
|
+
branchDict[f] ||= []
|
316
|
+
end
|
317
|
+
|
318
|
+
@branch[lhs] = branchDict
|
319
|
+
end
|
320
|
+
end
|
321
|
+
end
|