antelope 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +25 -23
- data/.rspec +3 -3
- data/.travis.yml +10 -9
- data/.yardopts +7 -7
- data/CONTRIBUTING.md +38 -38
- data/GENERATORS.md +124 -124
- data/Gemfile +7 -7
- data/LICENSE.txt +22 -22
- data/README.md +104 -104
- data/Rakefile +2 -2
- data/TODO.md +58 -58
- data/antelope.gemspec +28 -28
- data/bin/antelope +7 -7
- data/examples/deterministic.ace +35 -35
- data/examples/example.ace +51 -50
- data/examples/example.err +192 -0
- data/examples/{example.output → example.inf} +384 -385
- data/examples/liquidscript.ace +233 -162
- data/examples/simple.ace +22 -22
- data/lib/antelope/ace/compiler.rb +334 -334
- data/lib/antelope/ace/errors.rb +48 -48
- data/lib/antelope/ace/grammar/generation.rb +80 -80
- data/lib/antelope/ace/grammar/loading.rb +53 -53
- data/lib/antelope/ace/grammar/precedences.rb +68 -65
- data/lib/antelope/ace/grammar/productions.rb +156 -150
- data/lib/antelope/ace/grammar/symbols.rb +66 -66
- data/lib/antelope/ace/grammar.rb +69 -69
- data/lib/antelope/ace/precedence.rb +61 -61
- data/lib/antelope/ace/production.rb +57 -57
- data/lib/antelope/ace/scanner/argument.rb +57 -57
- data/lib/antelope/ace/scanner/first.rb +89 -89
- data/lib/antelope/ace/scanner/second.rb +177 -177
- data/lib/antelope/ace/scanner/third.rb +27 -27
- data/lib/antelope/ace/scanner.rb +134 -134
- data/lib/antelope/ace/token/epsilon.rb +24 -24
- data/lib/antelope/ace/token/error.rb +26 -26
- data/lib/antelope/ace/token/nonterminal.rb +17 -17
- data/lib/antelope/ace/token/terminal.rb +17 -17
- data/lib/antelope/ace/token.rb +238 -238
- data/lib/antelope/ace.rb +53 -53
- data/lib/antelope/cli.rb +55 -55
- data/lib/antelope/errors.rb +8 -8
- data/lib/antelope/generation/constructor/first.rb +88 -88
- data/lib/antelope/generation/constructor/follow.rb +103 -103
- data/lib/antelope/generation/constructor/nullable.rb +64 -64
- data/lib/antelope/generation/constructor.rb +126 -126
- data/lib/antelope/generation/errors.rb +17 -17
- data/lib/antelope/generation/null.rb +13 -13
- data/lib/antelope/generation/recognizer/rule.rb +216 -216
- data/lib/antelope/generation/recognizer/state.rb +130 -130
- data/lib/antelope/generation/recognizer.rb +180 -180
- data/lib/antelope/generation/tableizer.rb +175 -154
- data/lib/antelope/generation.rb +15 -15
- data/lib/antelope/generator/base.rb +264 -264
- data/lib/antelope/generator/c.rb +11 -11
- data/lib/antelope/generator/c_header.rb +105 -105
- data/lib/antelope/generator/c_source.rb +39 -39
- data/lib/antelope/generator/error.rb +34 -0
- data/lib/antelope/generator/group.rb +57 -57
- data/lib/antelope/generator/html.rb +51 -0
- data/lib/antelope/generator/info.rb +47 -0
- data/lib/antelope/generator/null.rb +18 -18
- data/lib/antelope/generator/output.rb +17 -49
- data/lib/antelope/generator/ruby.rb +79 -79
- data/lib/antelope/generator/templates/c_header.ant +36 -36
- data/lib/antelope/generator/templates/c_source.ant +202 -202
- data/lib/antelope/generator/templates/error.ant +33 -0
- data/lib/antelope/generator/templates/html/antelope.css +1 -0
- data/lib/antelope/generator/templates/html/antelope.html +1 -0
- data/lib/antelope/generator/templates/html/antelope.js +1 -0
- data/lib/antelope/generator/templates/html/css.ant +53 -0
- data/lib/antelope/generator/templates/html/html.ant +82 -0
- data/lib/antelope/generator/templates/html/js.ant +9 -0
- data/lib/antelope/generator/templates/info.ant +53 -0
- data/lib/antelope/generator/templates/ruby.ant +178 -146
- data/lib/antelope/generator.rb +66 -63
- data/lib/antelope/template/compiler.rb +78 -78
- data/lib/antelope/template/errors.rb +9 -9
- data/lib/antelope/template/scanner.rb +109 -109
- data/lib/antelope/template.rb +65 -60
- data/lib/antelope/version.rb +6 -6
- data/lib/antelope.rb +13 -13
- data/optimizations.txt +42 -0
- data/spec/antelope/ace/compiler_spec.rb +60 -60
- data/spec/antelope/ace/scanner_spec.rb +27 -27
- data/spec/antelope/constructor_spec.rb +133 -136
- data/spec/antelope/template_spec.rb +50 -49
- data/spec/fixtures/simple.ace +22 -22
- data/spec/spec_helper.rb +39 -39
- data/spec/support/benchmark_helper.rb +5 -5
- data/spec/support/grammar_helper.rb +15 -15
- data/subl/Ace (Ruby).JSON-tmLanguage +94 -94
- data/subl/Ace (Ruby).tmLanguage +153 -153
- metadata +17 -6
- data/lib/antelope/generator/templates/output.ant +0 -68
data/lib/antelope/ace.rb
CHANGED
@@ -1,53 +1,53 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
require "antelope/ace/errors"
|
4
|
-
require "antelope/ace/scanner"
|
5
|
-
require "antelope/ace/compiler"
|
6
|
-
require "antelope/ace/token"
|
7
|
-
require "antelope/ace/precedence"
|
8
|
-
require "antelope/ace/production"
|
9
|
-
require "antelope/ace/grammar"
|
10
|
-
|
11
|
-
module Antelope
|
12
|
-
|
13
|
-
# Defines the Ace file. The Ace file format works similarly to
|
14
|
-
# bison's y file format. The Ace file is seperated into three
|
15
|
-
# parts:
|
16
|
-
#
|
17
|
-
# <first>
|
18
|
-
# %%
|
19
|
-
# <second>
|
20
|
-
# %%
|
21
|
-
# <third>
|
22
|
-
#
|
23
|
-
# All parts may be empty; thus, the minimal file that Ace will
|
24
|
-
# accept would be
|
25
|
-
#
|
26
|
-
# %%
|
27
|
-
# %%
|
28
|
-
#
|
29
|
-
# The first part consists of _directives_ and _blocks_; directives
|
30
|
-
# look something like `"%" <directive>[ <argument>]*\n`, with
|
31
|
-
# `<directive>` being any alphanumerical character, including
|
32
|
-
# underscores and dashes, and `<argument>` being any word character
|
33
|
-
# or a quote-delimited string. Blocks consist of
|
34
|
-
# `"%{" <content> "\n" "\s"* "%}"`, with `<content>` being any
|
35
|
-
# characters. The content is copied directly into the body of the
|
36
|
-
# output.
|
37
|
-
#
|
38
|
-
# The second part consists of rules. Rules look something like
|
39
|
-
# this:
|
40
|
-
#
|
41
|
-
# <nonterminal>: (<nonterminal> | <terminal>)* ["{" <content> "}"] ["|" (<nonterminal> | <terminal>)* ["{" <content> "}"]]* [;]
|
42
|
-
#
|
43
|
-
# Where `<nonterminal>` is any lowercase alphabetical cahracter,
|
44
|
-
# `<terminal>` is any uppercase alphabetical character, and
|
45
|
-
# `<content>` is code to be used in the output file upon matching
|
46
|
-
# the specific rule.
|
47
|
-
#
|
48
|
-
# The third part consists of a body, which is copied directly into
|
49
|
-
# the output.
|
50
|
-
module Ace
|
51
|
-
|
52
|
-
end
|
53
|
-
end
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require "antelope/ace/errors"
|
4
|
+
require "antelope/ace/scanner"
|
5
|
+
require "antelope/ace/compiler"
|
6
|
+
require "antelope/ace/token"
|
7
|
+
require "antelope/ace/precedence"
|
8
|
+
require "antelope/ace/production"
|
9
|
+
require "antelope/ace/grammar"
|
10
|
+
|
11
|
+
module Antelope
|
12
|
+
|
13
|
+
# Defines the Ace file. The Ace file format works similarly to
|
14
|
+
# bison's y file format. The Ace file is seperated into three
|
15
|
+
# parts:
|
16
|
+
#
|
17
|
+
# <first>
|
18
|
+
# %%
|
19
|
+
# <second>
|
20
|
+
# %%
|
21
|
+
# <third>
|
22
|
+
#
|
23
|
+
# All parts may be empty; thus, the minimal file that Ace will
|
24
|
+
# accept would be
|
25
|
+
#
|
26
|
+
# %%
|
27
|
+
# %%
|
28
|
+
#
|
29
|
+
# The first part consists of _directives_ and _blocks_; directives
|
30
|
+
# look something like `"%" <directive>[ <argument>]*\n`, with
|
31
|
+
# `<directive>` being any alphanumerical character, including
|
32
|
+
# underscores and dashes, and `<argument>` being any word character
|
33
|
+
# or a quote-delimited string. Blocks consist of
|
34
|
+
# `"%{" <content> "\n" "\s"* "%}"`, with `<content>` being any
|
35
|
+
# characters. The content is copied directly into the body of the
|
36
|
+
# output.
|
37
|
+
#
|
38
|
+
# The second part consists of rules. Rules look something like
|
39
|
+
# this:
|
40
|
+
#
|
41
|
+
# <nonterminal>: (<nonterminal> | <terminal>)* ["{" <content> "}"] ["|" (<nonterminal> | <terminal>)* ["{" <content> "}"]]* [;]
|
42
|
+
#
|
43
|
+
# Where `<nonterminal>` is any lowercase alphabetical cahracter,
|
44
|
+
# `<terminal>` is any uppercase alphabetical character, and
|
45
|
+
# `<content>` is code to be used in the output file upon matching
|
46
|
+
# the specific rule.
|
47
|
+
#
|
48
|
+
# The third part consists of a body, which is copied directly into
|
49
|
+
# the output.
|
50
|
+
module Ace
|
51
|
+
|
52
|
+
end
|
53
|
+
end
|
data/lib/antelope/cli.rb
CHANGED
@@ -1,55 +1,55 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
require "thor"
|
4
|
-
|
5
|
-
module Antelope
|
6
|
-
|
7
|
-
# Handles the command line interface.
|
8
|
-
class CLI < Thor
|
9
|
-
|
10
|
-
class_option :verbose, default: false, type: :boolean
|
11
|
-
|
12
|
-
option :type, default: nil, type: :string,
|
13
|
-
desc: "The type of generator to use"
|
14
|
-
desc "compile FILE [FILE]*", "compile the given files"
|
15
|
-
|
16
|
-
# Compile.
|
17
|
-
def compile(*files)
|
18
|
-
files.each do |file|
|
19
|
-
compile_file(file)
|
20
|
-
end
|
21
|
-
end
|
22
|
-
|
23
|
-
desc "check FILE [FILE]*", "check the syntax of the given files"
|
24
|
-
|
25
|
-
# Check.
|
26
|
-
def check(*files)
|
27
|
-
files.each do |file|
|
28
|
-
compile_file(file, [Generator::Null])
|
29
|
-
end
|
30
|
-
end
|
31
|
-
|
32
|
-
private
|
33
|
-
|
34
|
-
# Compiles the given file, and then generates. If an error
|
35
|
-
# occurs, it prints it out to stderr, along with a backtrace if
|
36
|
-
# the verbose flag was set.
|
37
|
-
#
|
38
|
-
# @param file [String] the file to compile.
|
39
|
-
# @param gen [Array, Symbol] the generator to use.
|
40
|
-
# @return [void]
|
41
|
-
def compile_file(file, gen = :guess)
|
42
|
-
puts "Compiling #{file}... "
|
43
|
-
|
44
|
-
grammar = Ace::Grammar.from_file(file)
|
45
|
-
grammar.generate(options, gen)
|
46
|
-
|
47
|
-
rescue => e
|
48
|
-
$stderr.puts "Error while compiling: #{e.class}: #{e.message}"
|
49
|
-
|
50
|
-
if options[:verbose]
|
51
|
-
$stderr.puts e.backtrace[0..10].map { |_| "\t#{_}" }
|
52
|
-
end
|
53
|
-
end
|
54
|
-
end
|
55
|
-
end
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require "thor"
|
4
|
+
|
5
|
+
module Antelope
|
6
|
+
|
7
|
+
# Handles the command line interface.
|
8
|
+
class CLI < Thor
|
9
|
+
|
10
|
+
class_option :verbose, default: false, type: :boolean
|
11
|
+
|
12
|
+
option :type, default: nil, type: :string,
|
13
|
+
desc: "The type of generator to use"
|
14
|
+
desc "compile FILE [FILE]*", "compile the given files"
|
15
|
+
|
16
|
+
# Compile.
|
17
|
+
def compile(*files)
|
18
|
+
files.each do |file|
|
19
|
+
compile_file(file)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
desc "check FILE [FILE]*", "check the syntax of the given files"
|
24
|
+
|
25
|
+
# Check.
|
26
|
+
def check(*files)
|
27
|
+
files.each do |file|
|
28
|
+
compile_file(file, [Generator::Null])
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
private
|
33
|
+
|
34
|
+
# Compiles the given file, and then generates. If an error
|
35
|
+
# occurs, it prints it out to stderr, along with a backtrace if
|
36
|
+
# the verbose flag was set.
|
37
|
+
#
|
38
|
+
# @param file [String] the file to compile.
|
39
|
+
# @param gen [Array, Symbol] the generator to use.
|
40
|
+
# @return [void]
|
41
|
+
def compile_file(file, gen = :guess)
|
42
|
+
puts "Compiling #{file}... "
|
43
|
+
|
44
|
+
grammar = Ace::Grammar.from_file(file)
|
45
|
+
grammar.generate(options, gen)
|
46
|
+
|
47
|
+
rescue => e
|
48
|
+
$stderr.puts "Error while compiling: #{e.class}: #{e.message}"
|
49
|
+
|
50
|
+
if options[:verbose]
|
51
|
+
$stderr.puts e.backtrace[0..10].map { |_| "\t#{_}" }
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
data/lib/antelope/errors.rb
CHANGED
@@ -1,8 +1,8 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
module Antelope
|
4
|
-
|
5
|
-
# Every error in antelope inherits this error class.
|
6
|
-
class Error < StandardError
|
7
|
-
end
|
8
|
-
end
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module Antelope
|
4
|
+
|
5
|
+
# Every error in antelope inherits this error class.
|
6
|
+
class Error < StandardError
|
7
|
+
end
|
8
|
+
end
|
@@ -1,88 +1,88 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
module Antelope
|
4
|
-
module Generation
|
5
|
-
class Constructor
|
6
|
-
|
7
|
-
# Contains the methods to construct first sets for tokens.
|
8
|
-
module First
|
9
|
-
|
10
|
-
# Initialize.
|
11
|
-
def initialize
|
12
|
-
@firstifying = []
|
13
|
-
super
|
14
|
-
end
|
15
|
-
|
16
|
-
# Constructs the first set for a given token. This is how
|
17
|
-
# the method should behave:
|
18
|
-
#
|
19
|
-
# FIRST(ε) == [] # if ε is the epsilon token
|
20
|
-
# FIRST(x) == [x] # if x is a terminal
|
21
|
-
# FIRST(αβ) == if nullable?(α)
|
22
|
-
# FIRST(α) U FIRST(β)
|
23
|
-
# else
|
24
|
-
# FIRST(α)
|
25
|
-
# end
|
26
|
-
# FIRST(A) == FIRST(a_1) U FIRST(a_2) U ... U FIRST(a_n)
|
27
|
-
# # if A is a nonterminal and a_1, a_2, ..., a_3 are all
|
28
|
-
# # of the right-hand sides of its productions.
|
29
|
-
#
|
30
|
-
# @param token [Ace::Token, Array<Ace::Token>]
|
31
|
-
# @return [Set<Ace::Token::Terminal>]
|
32
|
-
# @see #first_array
|
33
|
-
def first(token)
|
34
|
-
case token
|
35
|
-
when Ace::Token::Nonterminal
|
36
|
-
firstifying(token) do
|
37
|
-
productions = grammar.productions[token.name]
|
38
|
-
productions.map { |prod|
|
39
|
-
first(prod[:items]) }.inject(Set.new, :+)
|
40
|
-
end
|
41
|
-
when Array
|
42
|
-
first_array(token)
|
43
|
-
when Ace::Token::Epsilon
|
44
|
-
Set.new
|
45
|
-
when Ace::Token::Terminal
|
46
|
-
Set.new([token])
|
47
|
-
else
|
48
|
-
incorrect_argument! token, Ace::Token, Array
|
49
|
-
end
|
50
|
-
end
|
51
|
-
|
52
|
-
private
|
53
|
-
|
54
|
-
# Determines the FIRST set of an array of tokens. First, it
|
55
|
-
# removes any terminals we are finding the FIRST set for;
|
56
|
-
# then, it determines which tokens we have to find the FIRST
|
57
|
-
# sets for (since some tokens may be nullable). We then add
|
58
|
-
# those sets to our set.
|
59
|
-
#
|
60
|
-
# @param tokens [Array<Ace::Token>]
|
61
|
-
# @return [Set<Ace::Token>]
|
62
|
-
def first_array(tokens)
|
63
|
-
tokens.dup.delete_if { |_| @firstifying.include?(_) }.
|
64
|
-
each_with_index.take_while do |token, i|
|
65
|
-
if i.zero?
|
66
|
-
true
|
67
|
-
else
|
68
|
-
nullable?(tokens[i - 1])
|
69
|
-
end
|
70
|
-
end.map(&:first).map { |_| first(_) }.inject(Set.new, :+)
|
71
|
-
end
|
72
|
-
|
73
|
-
# Helps keep track of the nonterminals we're finding FIRST
|
74
|
-
# sets for. This helps prevent recursion.
|
75
|
-
#
|
76
|
-
# @param tok [Ace::Token::Nonterminal]
|
77
|
-
# @yield once.
|
78
|
-
# @return [Set<Ace::Token>]
|
79
|
-
def firstifying(tok)
|
80
|
-
@firstifying << tok
|
81
|
-
out = yield
|
82
|
-
@firstifying.delete tok
|
83
|
-
out
|
84
|
-
end
|
85
|
-
end
|
86
|
-
end
|
87
|
-
end
|
88
|
-
end
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module Antelope
|
4
|
+
module Generation
|
5
|
+
class Constructor
|
6
|
+
|
7
|
+
# Contains the methods to construct first sets for tokens.
|
8
|
+
module First
|
9
|
+
|
10
|
+
# Initialize.
|
11
|
+
def initialize
|
12
|
+
@firstifying = []
|
13
|
+
super
|
14
|
+
end
|
15
|
+
|
16
|
+
# Constructs the first set for a given token. This is how
|
17
|
+
# the method should behave:
|
18
|
+
#
|
19
|
+
# FIRST(ε) == [] # if ε is the epsilon token
|
20
|
+
# FIRST(x) == [x] # if x is a terminal
|
21
|
+
# FIRST(αβ) == if nullable?(α)
|
22
|
+
# FIRST(α) U FIRST(β)
|
23
|
+
# else
|
24
|
+
# FIRST(α)
|
25
|
+
# end
|
26
|
+
# FIRST(A) == FIRST(a_1) U FIRST(a_2) U ... U FIRST(a_n)
|
27
|
+
# # if A is a nonterminal and a_1, a_2, ..., a_3 are all
|
28
|
+
# # of the right-hand sides of its productions.
|
29
|
+
#
|
30
|
+
# @param token [Ace::Token, Array<Ace::Token>]
|
31
|
+
# @return [Set<Ace::Token::Terminal>]
|
32
|
+
# @see #first_array
|
33
|
+
def first(token)
|
34
|
+
case token
|
35
|
+
when Ace::Token::Nonterminal
|
36
|
+
firstifying(token) do
|
37
|
+
productions = grammar.productions[token.name]
|
38
|
+
productions.map { |prod|
|
39
|
+
first(prod[:items]) }.inject(Set.new, :+)
|
40
|
+
end
|
41
|
+
when Array
|
42
|
+
first_array(token)
|
43
|
+
when Ace::Token::Epsilon
|
44
|
+
Set.new
|
45
|
+
when Ace::Token::Terminal
|
46
|
+
Set.new([token])
|
47
|
+
else
|
48
|
+
incorrect_argument! token, Ace::Token, Array
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
private
|
53
|
+
|
54
|
+
# Determines the FIRST set of an array of tokens. First, it
|
55
|
+
# removes any terminals we are finding the FIRST set for;
|
56
|
+
# then, it determines which tokens we have to find the FIRST
|
57
|
+
# sets for (since some tokens may be nullable). We then add
|
58
|
+
# those sets to our set.
|
59
|
+
#
|
60
|
+
# @param tokens [Array<Ace::Token>]
|
61
|
+
# @return [Set<Ace::Token>]
|
62
|
+
def first_array(tokens)
|
63
|
+
tokens.dup.delete_if { |_| @firstifying.include?(_) }.
|
64
|
+
each_with_index.take_while do |token, i|
|
65
|
+
if i.zero?
|
66
|
+
true
|
67
|
+
else
|
68
|
+
nullable?(tokens[i - 1])
|
69
|
+
end
|
70
|
+
end.map(&:first).map { |_| first(_) }.inject(Set.new, :+)
|
71
|
+
end
|
72
|
+
|
73
|
+
# Helps keep track of the nonterminals we're finding FIRST
|
74
|
+
# sets for. This helps prevent recursion.
|
75
|
+
#
|
76
|
+
# @param tok [Ace::Token::Nonterminal]
|
77
|
+
# @yield once.
|
78
|
+
# @return [Set<Ace::Token>]
|
79
|
+
def firstifying(tok)
|
80
|
+
@firstifying << tok
|
81
|
+
out = yield
|
82
|
+
@firstifying.delete tok
|
83
|
+
out
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
@@ -1,103 +1,103 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
module Antelope
|
4
|
-
module Generation
|
5
|
-
class Constructor
|
6
|
-
|
7
|
-
# Contains the methods to find the FOLLOW sets of nonterminals.
|
8
|
-
module Follow
|
9
|
-
|
10
|
-
# Initialize.
|
11
|
-
def initialize
|
12
|
-
@follows = {}
|
13
|
-
super
|
14
|
-
end
|
15
|
-
|
16
|
-
# Returns the FOLLOW set of the given token. If the given
|
17
|
-
# token isn't a nonterminal, it raises an error. It then
|
18
|
-
# generates the FOLLOW set for the given token, and then
|
19
|
-
# caches it.
|
20
|
-
#
|
21
|
-
# @return [Set<Ace::Token>]
|
22
|
-
# @see Constructor#incorrect_argument!
|
23
|
-
# @see #generate_follow_set
|
24
|
-
def follow(token)
|
25
|
-
unless token.is_a? Ace::Token::Nonterminal
|
26
|
-
incorrect_argument! token, Ace::Token::Nonterminal
|
27
|
-
end
|
28
|
-
|
29
|
-
@follows.fetch(token) { generate_follow_set(token) }
|
30
|
-
end
|
31
|
-
|
32
|
-
private
|
33
|
-
|
34
|
-
# Generates the FOLLOW set for the given token. It finds the
|
35
|
-
# positions at which the token appears in the grammar, and
|
36
|
-
# sees what could possibly follow it. For example, given the
|
37
|
-
# following production:
|
38
|
-
#
|
39
|
-
# A -> aBz
|
40
|
-
#
|
41
|
-
# With `a` and `z` being any combination of terminals and
|
42
|
-
# nonterminals, and we're trying to find the FOLLOW set of
|
43
|
-
# `B` we add the FIRST set of `z` to the FOLLOW set of `B`:
|
44
|
-
#
|
45
|
-
# FOLLOW(B) = FOLLOW(B) ∪ FIRST(z)
|
46
|
-
#
|
47
|
-
# In the case that `B` is at the end of a production, like so:
|
48
|
-
#
|
49
|
-
# A -> aB
|
50
|
-
#
|
51
|
-
# or
|
52
|
-
#
|
53
|
-
# A -> aBw
|
54
|
-
#
|
55
|
-
# (with `w` being nullable) We also add the FOLLOW set of `A`
|
56
|
-
# to `B`:
|
57
|
-
#
|
58
|
-
# FOLLOW(B) = FOLLOW(B) ∪ FOLLOW(A)
|
59
|
-
#
|
60
|
-
# In case this operation is potentially recursive, we make
|
61
|
-
# sure to set the FOLLOW set of `B` to an empty set (since we
|
62
|
-
# cache the result of a FOLLOW set, the empty set will be
|
63
|
-
# returned).
|
64
|
-
#
|
65
|
-
# @param token [Ace::Token::Nonterminal]
|
66
|
-
# @return [Set<Ace::Token>]
|
67
|
-
# @see First#first
|
68
|
-
# @see Nullable#nullable?
|
69
|
-
def generate_follow_set(token)
|
70
|
-
# Set it to the empty set so we don't end up recursing.
|
71
|
-
set = @follows[token] = Set.new
|
72
|
-
|
73
|
-
productions.each do |rule|
|
74
|
-
items = rule.items
|
75
|
-
|
76
|
-
# Find all of the positions within the rule that our token
|
77
|
-
# occurs, and then increment that position by one.
|
78
|
-
positions = items.each_with_index.
|
79
|
-
find_all { |t, _| t == token }.
|
80
|
-
map(&:last).map(&:succ)
|
81
|
-
|
82
|
-
# Find the FIRST set of every item after our token, and
|
83
|
-
# put that in our set.
|
84
|
-
positions.map { |pos| first(items[pos..-1]) }.
|
85
|
-
inject(set, :merge)
|
86
|
-
|
87
|
-
positions.each do |pos|
|
88
|
-
# If we're at the end of the rule...
|
89
|
-
if pos == items.size || nullable?(items[pos..-1])
|
90
|
-
# Then add the FOLLOW set of the left-hand side to our
|
91
|
-
# set.
|
92
|
-
set.merge follow(rule.label)
|
93
|
-
end
|
94
|
-
end
|
95
|
-
end
|
96
|
-
|
97
|
-
# Replace the cached empty set with our filled set.
|
98
|
-
@follows[token] = set
|
99
|
-
end
|
100
|
-
end
|
101
|
-
end
|
102
|
-
end
|
103
|
-
end
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module Antelope
|
4
|
+
module Generation
|
5
|
+
class Constructor
|
6
|
+
|
7
|
+
# Contains the methods to find the FOLLOW sets of nonterminals.
|
8
|
+
module Follow
|
9
|
+
|
10
|
+
# Initialize.
|
11
|
+
def initialize
|
12
|
+
@follows = {}
|
13
|
+
super
|
14
|
+
end
|
15
|
+
|
16
|
+
# Returns the FOLLOW set of the given token. If the given
|
17
|
+
# token isn't a nonterminal, it raises an error. It then
|
18
|
+
# generates the FOLLOW set for the given token, and then
|
19
|
+
# caches it.
|
20
|
+
#
|
21
|
+
# @return [Set<Ace::Token>]
|
22
|
+
# @see Constructor#incorrect_argument!
|
23
|
+
# @see #generate_follow_set
|
24
|
+
def follow(token)
|
25
|
+
unless token.is_a? Ace::Token::Nonterminal
|
26
|
+
incorrect_argument! token, Ace::Token::Nonterminal
|
27
|
+
end
|
28
|
+
|
29
|
+
@follows.fetch(token) { generate_follow_set(token) }
|
30
|
+
end
|
31
|
+
|
32
|
+
private
|
33
|
+
|
34
|
+
# Generates the FOLLOW set for the given token. It finds the
|
35
|
+
# positions at which the token appears in the grammar, and
|
36
|
+
# sees what could possibly follow it. For example, given the
|
37
|
+
# following production:
|
38
|
+
#
|
39
|
+
# A -> aBz
|
40
|
+
#
|
41
|
+
# With `a` and `z` being any combination of terminals and
|
42
|
+
# nonterminals, and we're trying to find the FOLLOW set of
|
43
|
+
# `B` we add the FIRST set of `z` to the FOLLOW set of `B`:
|
44
|
+
#
|
45
|
+
# FOLLOW(B) = FOLLOW(B) ∪ FIRST(z)
|
46
|
+
#
|
47
|
+
# In the case that `B` is at the end of a production, like so:
|
48
|
+
#
|
49
|
+
# A -> aB
|
50
|
+
#
|
51
|
+
# or
|
52
|
+
#
|
53
|
+
# A -> aBw
|
54
|
+
#
|
55
|
+
# (with `w` being nullable) We also add the FOLLOW set of `A`
|
56
|
+
# to `B`:
|
57
|
+
#
|
58
|
+
# FOLLOW(B) = FOLLOW(B) ∪ FOLLOW(A)
|
59
|
+
#
|
60
|
+
# In case this operation is potentially recursive, we make
|
61
|
+
# sure to set the FOLLOW set of `B` to an empty set (since we
|
62
|
+
# cache the result of a FOLLOW set, the empty set will be
|
63
|
+
# returned).
|
64
|
+
#
|
65
|
+
# @param token [Ace::Token::Nonterminal]
|
66
|
+
# @return [Set<Ace::Token>]
|
67
|
+
# @see First#first
|
68
|
+
# @see Nullable#nullable?
|
69
|
+
def generate_follow_set(token)
|
70
|
+
# Set it to the empty set so we don't end up recursing.
|
71
|
+
set = @follows[token] = Set.new
|
72
|
+
|
73
|
+
productions.each do |rule|
|
74
|
+
items = rule.items
|
75
|
+
|
76
|
+
# Find all of the positions within the rule that our token
|
77
|
+
# occurs, and then increment that position by one.
|
78
|
+
positions = items.each_with_index.
|
79
|
+
find_all { |t, _| t == token }.
|
80
|
+
map(&:last).map(&:succ)
|
81
|
+
|
82
|
+
# Find the FIRST set of every item after our token, and
|
83
|
+
# put that in our set.
|
84
|
+
positions.map { |pos| first(items[pos..-1]) }.
|
85
|
+
inject(set, :merge)
|
86
|
+
|
87
|
+
positions.each do |pos|
|
88
|
+
# If we're at the end of the rule...
|
89
|
+
if pos == items.size || nullable?(items[pos..-1])
|
90
|
+
# Then add the FOLLOW set of the left-hand side to our
|
91
|
+
# set.
|
92
|
+
set.merge follow(rule.label)
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
# Replace the cached empty set with our filled set.
|
98
|
+
@follows[token] = set
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|