antelope 0.2.4 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +11 -0
- data/bin/antelope +3 -3
- data/examples/{example.err → example.ace.err} +9 -9
- data/examples/{example.inf → example.ace.inf} +57 -9
- data/examples/example.ate +70 -0
- data/examples/example.ate.err +192 -0
- data/examples/example.ate.inf +432 -0
- data/lib/antelope/ace/compiler.rb +4 -4
- data/lib/antelope/ace/errors.rb +0 -18
- data/lib/antelope/ace.rb +6 -12
- data/lib/antelope/cli.rb +1 -1
- data/lib/antelope/dsl/compiler.rb +117 -0
- data/lib/antelope/dsl/contexts/base.rb +29 -0
- data/lib/antelope/dsl/contexts/main.rb +63 -0
- data/lib/antelope/dsl/contexts/match.rb +24 -0
- data/lib/antelope/dsl/contexts/precedence.rb +20 -0
- data/lib/antelope/dsl/contexts/production.rb +24 -0
- data/lib/antelope/dsl/contexts/terminal.rb +28 -0
- data/lib/antelope/dsl/contexts.rb +16 -0
- data/lib/antelope/dsl.rb +9 -0
- data/lib/antelope/errors.rb +18 -1
- data/lib/antelope/generation/constructor/first.rb +10 -12
- data/lib/antelope/generation/constructor/follow.rb +6 -6
- data/lib/antelope/generation/constructor/nullable.rb +6 -6
- data/lib/antelope/generation/constructor.rb +4 -4
- data/lib/antelope/generation/recognizer/rule.rb +17 -17
- data/lib/antelope/generation/recognizer/state.rb +9 -10
- data/lib/antelope/generation/recognizer.rb +8 -11
- data/lib/antelope/generation/tableizer.rb +2 -2
- data/lib/antelope/generator/base.rb +7 -7
- data/lib/antelope/generator/ruby.rb +1 -1
- data/lib/antelope/grammar/generation.rb +77 -0
- data/lib/antelope/grammar/loading.rb +84 -0
- data/lib/antelope/{ace → grammar}/precedence.rb +2 -4
- data/lib/antelope/grammar/precedences.rb +64 -0
- data/lib/antelope/{ace → grammar}/production.rb +11 -12
- data/lib/antelope/grammar/productions.rb +154 -0
- data/lib/antelope/grammar/symbols.rb +64 -0
- data/lib/antelope/{ace → grammar}/token/epsilon.rb +1 -2
- data/lib/antelope/{ace → grammar}/token/error.rb +1 -3
- data/lib/antelope/{ace → grammar}/token/nonterminal.rb +1 -3
- data/lib/antelope/{ace → grammar}/token/terminal.rb +1 -3
- data/lib/antelope/{ace → grammar}/token.rb +12 -15
- data/lib/antelope/grammar.rb +68 -0
- data/lib/antelope/version.rb +1 -1
- data/lib/antelope.rb +12 -6
- data/spec/antelope/ace/compiler_spec.rb +6 -6
- data/spec/antelope/ace/scanner_spec.rb +7 -7
- data/spec/antelope/generation/constructor_spec.rb +131 -0
- data/spec/support/grammar_helper.rb +2 -3
- metadata +32 -19
- data/lib/antelope/ace/grammar/generation.rb +0 -80
- data/lib/antelope/ace/grammar/loading.rb +0 -53
- data/lib/antelope/ace/grammar/precedences.rb +0 -68
- data/lib/antelope/ace/grammar/productions.rb +0 -156
- data/lib/antelope/ace/grammar/symbols.rb +0 -66
- data/lib/antelope/ace/grammar.rb +0 -69
- data/spec/antelope/constructor_spec.rb +0 -133
@@ -1,156 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
module Antelope
|
4
|
-
module Ace
|
5
|
-
class Grammar
|
6
|
-
|
7
|
-
# Manages the productions of the grammar.
|
8
|
-
module Productions
|
9
|
-
|
10
|
-
# Returns a hash of all of the productions. The result is
|
11
|
-
# cached.
|
12
|
-
#
|
13
|
-
# @return [Hash<(Symbol, Array<Production>)>]
|
14
|
-
def productions
|
15
|
-
@_productions || generate_productions
|
16
|
-
end
|
17
|
-
|
18
|
-
# Returns all productions for all nonterminals, sorted by id.
|
19
|
-
#
|
20
|
-
# @return [Array<Production>]
|
21
|
-
def all_productions
|
22
|
-
productions.values.flatten.sort_by(&:id)
|
23
|
-
end
|
24
|
-
|
25
|
-
# Finds a token based on its corresponding symbol. First
|
26
|
-
# checks the productions, to see if it's a nonterminal; then,
|
27
|
-
# tries to find it in the terminals; otherwise, if the symbol
|
28
|
-
# is `error`, it returns a {Token::Error}; if the symbol is
|
29
|
-
# `nothing` or `ε`, it returns a {Token::Epsilon}; if it's
|
30
|
-
# none of those, it raises an {UndefinedTokenError}.
|
31
|
-
#
|
32
|
-
# @raise [UndefinedTokenError] if the token doesn't exist.
|
33
|
-
# @param value [String, Symbol, #intern] the token's symbol to
|
34
|
-
# check.
|
35
|
-
# @return [Token]
|
36
|
-
def find_token(value)
|
37
|
-
value = value.intern
|
38
|
-
|
39
|
-
if productions.key?(value)
|
40
|
-
typed_nonterminals.find { |term| term.name == value } ||
|
41
|
-
Token::Nonterminal.new(value)
|
42
|
-
elsif terminal = terminals.
|
43
|
-
find { |term| term.name == value }
|
44
|
-
terminal
|
45
|
-
elsif value == :error
|
46
|
-
Token::Error.new
|
47
|
-
elsif [:nothing, :ε, :"%empty"].include?(value)
|
48
|
-
Token::Epsilon.new
|
49
|
-
else
|
50
|
-
raise UndefinedTokenError, "Could not find a token " \
|
51
|
-
"named #{value.inspect}"
|
52
|
-
end
|
53
|
-
end
|
54
|
-
|
55
|
-
private
|
56
|
-
|
57
|
-
# Actually generates the productions. Uses the rules from the
|
58
|
-
# compiler to construct the productions. Makes two loops over
|
59
|
-
# the compiler's rules; the first to tell the grammar that the
|
60
|
-
# nonterminal does exist, and the second to actually construct
|
61
|
-
# the productions. The first loop is for {#find_token},
|
62
|
-
# because otherwise it wouldn't be able to return a
|
63
|
-
# nonterminal properly.
|
64
|
-
#
|
65
|
-
# @return [Hash<(Symbol, Array<Production>)>]
|
66
|
-
def generate_productions
|
67
|
-
@_productions = {}
|
68
|
-
index = 0
|
69
|
-
|
70
|
-
rules = @compiler.rules.each do |rule|
|
71
|
-
productions[rule[:label]] = []
|
72
|
-
end
|
73
|
-
|
74
|
-
while index < rules.size
|
75
|
-
rule = rules[index]
|
76
|
-
productions[rule[:label]] <<
|
77
|
-
generate_production_for(rule, index)
|
78
|
-
index += 1
|
79
|
-
end
|
80
|
-
|
81
|
-
productions[:$start] = [default_production]
|
82
|
-
|
83
|
-
productions
|
84
|
-
end
|
85
|
-
|
86
|
-
# Generates a production for a given compiler rule. Converts
|
87
|
-
# the tokens in the set to their {Token} counterparts,
|
88
|
-
# and then sets the precedence for the production. If the
|
89
|
-
# precedence declaration from the compiler rule is empty,
|
90
|
-
# then it'll use the last terminal from the set to check for
|
91
|
-
# precedence; otherwise, it'll use the precedence declaration.
|
92
|
-
# This is to make sure that every production has a precedence
|
93
|
-
# declaration.
|
94
|
-
#
|
95
|
-
# @param rule [Hash] the compiler's rule.
|
96
|
-
# @param id [Numeric] the id for the production.
|
97
|
-
# @return [Production]
|
98
|
-
def generate_production_for(rule, id)
|
99
|
-
left = Token::Nonterminal.new(rule[:label])
|
100
|
-
items = rule[:set].map { |_| find_token(_[0]) }
|
101
|
-
prec = if rule[:prec].empty?
|
102
|
-
items.select(&:terminal?).first
|
103
|
-
else
|
104
|
-
rule[:prec].intern
|
105
|
-
end
|
106
|
-
|
107
|
-
prec = precedence_for(prec)
|
108
|
-
left.type = type_for(rule[:label])
|
109
|
-
left.id = rule[:label_id]
|
110
|
-
|
111
|
-
rule[:set].each_with_index do |tok, i|
|
112
|
-
items[i] = items[i].dup
|
113
|
-
items[i].id = tok[1]
|
114
|
-
end
|
115
|
-
items.delete_if(&:epsilon?)
|
116
|
-
|
117
|
-
Production.new(left, items, rule[:block], prec, id + 1)
|
118
|
-
end
|
119
|
-
|
120
|
-
# Returns the defined type for the given token name.
|
121
|
-
# Uses the `%type` directive to infer the corresponding types.
|
122
|
-
#
|
123
|
-
# @param token [Symbol] the token to check for
|
124
|
-
# types.
|
125
|
-
def type_for(token)
|
126
|
-
token = find_token(token) unless token.is_a?(Token)
|
127
|
-
|
128
|
-
case token
|
129
|
-
when Token::Nonterminal
|
130
|
-
token.type
|
131
|
-
when Token::Terminal
|
132
|
-
token.type
|
133
|
-
when Token::Epsilon
|
134
|
-
""
|
135
|
-
when Token::Error
|
136
|
-
""
|
137
|
-
end
|
138
|
-
end
|
139
|
-
|
140
|
-
# Creates the default production for the grammar. The left
|
141
|
-
# hand side of the production is the `:$start` symbol, with
|
142
|
-
# the right hand side being the first rule's left-hand side
|
143
|
-
# and the terminal `$`. This production is automagically
|
144
|
-
# given the last precedence, and an id of 0.
|
145
|
-
#
|
146
|
-
# @return [Production]
|
147
|
-
def default_production
|
148
|
-
Production.new(Token::Nonterminal.new(:$start), [
|
149
|
-
Token::Nonterminal.new(@compiler.rules.first[:label]),
|
150
|
-
Token::Terminal.new(:$end)
|
151
|
-
], "", precedence.last, 0)
|
152
|
-
end
|
153
|
-
end
|
154
|
-
end
|
155
|
-
end
|
156
|
-
end
|
@@ -1,66 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
module Antelope
|
4
|
-
module Ace
|
5
|
-
class Grammar
|
6
|
-
|
7
|
-
# Manages a list of the symbols in the grammar.
|
8
|
-
module Symbols
|
9
|
-
|
10
|
-
# A list of all terminals in the grammar. Checks the compiler
|
11
|
-
# options for terminals, and then returns an array of
|
12
|
-
# terminals. Caches the result.
|
13
|
-
#
|
14
|
-
# @return [Array<Token::Terminal>]
|
15
|
-
def terminals
|
16
|
-
@_terminals ||= begin
|
17
|
-
@compiler.options.fetch(:terminals) { [] }.map do |v|
|
18
|
-
Token::Terminal.new(*v)
|
19
|
-
end
|
20
|
-
end
|
21
|
-
end
|
22
|
-
|
23
|
-
# A list of all nonterminals in the grammar.
|
24
|
-
#
|
25
|
-
# @return [Array<Symbol>]
|
26
|
-
# @see #productions
|
27
|
-
def nonterminals
|
28
|
-
@_nonterminals ||= productions.keys
|
29
|
-
end
|
30
|
-
|
31
|
-
# A list of all nonterminals, with types.
|
32
|
-
#
|
33
|
-
# @return [Array<Token::Nonterminal>>]
|
34
|
-
def typed_nonterminals
|
35
|
-
@_typed_nonterminals ||= begin
|
36
|
-
typed = []
|
37
|
-
compiler.options[:nonterminals].each do |data|
|
38
|
-
data[1].each do |nonterm|
|
39
|
-
typed << Token::Nonterminal.new(nonterm, data[0])
|
40
|
-
end
|
41
|
-
end
|
42
|
-
typed
|
43
|
-
end
|
44
|
-
end
|
45
|
-
|
46
|
-
# A list of all symbols in the grammar; includes both
|
47
|
-
# terminals and nonterminals.
|
48
|
-
#
|
49
|
-
# @return [Array<Token::Terminal, Symbol>]
|
50
|
-
# @see #terminals
|
51
|
-
# @see #nonterminals
|
52
|
-
def symbols
|
53
|
-
@_symbols ||= terminals + nonterminals
|
54
|
-
end
|
55
|
-
|
56
|
-
# Checks to see if the grammar uses the `error` terminal
|
57
|
-
# anywhere.
|
58
|
-
#
|
59
|
-
# @return [Boolean]
|
60
|
-
def contains_error_token?
|
61
|
-
all_productions.any? { |_| _.items.any?(&:error?) }
|
62
|
-
end
|
63
|
-
end
|
64
|
-
end
|
65
|
-
end
|
66
|
-
end
|
data/lib/antelope/ace/grammar.rb
DELETED
@@ -1,69 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
require "hashie"
|
4
|
-
require "antelope/ace/grammar/symbols"
|
5
|
-
require "antelope/ace/grammar/productions"
|
6
|
-
require "antelope/ace/grammar/precedences"
|
7
|
-
require "antelope/ace/grammar/loading"
|
8
|
-
require "antelope/ace/grammar/generation"
|
9
|
-
|
10
|
-
module Antelope
|
11
|
-
module Ace
|
12
|
-
|
13
|
-
# Defines a grammar from an Ace file. This handles setting up
|
14
|
-
# productions, loading from files, symbols, precedence, and
|
15
|
-
# generation.
|
16
|
-
class Grammar
|
17
|
-
|
18
|
-
include Symbols
|
19
|
-
include Productions
|
20
|
-
include Precedences
|
21
|
-
include Loading
|
22
|
-
include Grammar::Generation
|
23
|
-
|
24
|
-
# Used by a generation class; this is all the generated states
|
25
|
-
# of the grammar.
|
26
|
-
#
|
27
|
-
# @return [Set<Generation::Recognizer::State>]
|
28
|
-
# @see Generation::Recognizer
|
29
|
-
attr_accessor :states
|
30
|
-
|
31
|
-
# The name of the grammar. This is normally assumed from a file
|
32
|
-
# name.
|
33
|
-
#
|
34
|
-
# @return [String]
|
35
|
-
attr_accessor :name
|
36
|
-
|
37
|
-
# The output directory for the grammar. This is normally the
|
38
|
-
# same directory as the Ace file.
|
39
|
-
#
|
40
|
-
# @return [Pathname]
|
41
|
-
attr_accessor :output
|
42
|
-
|
43
|
-
# The compiler for the Ace file.
|
44
|
-
#
|
45
|
-
# @return [Compiler]
|
46
|
-
attr_reader :compiler
|
47
|
-
|
48
|
-
# Initialize.
|
49
|
-
#
|
50
|
-
# @param name [String]
|
51
|
-
# @param output [String] the output directory. Automagically
|
52
|
-
# turned into a Pathname.
|
53
|
-
# @param compiler [Compiler]
|
54
|
-
def initialize(name, output, compiler)
|
55
|
-
@name = name
|
56
|
-
@output = Pathname.new(output)
|
57
|
-
@compiler = compiler
|
58
|
-
end
|
59
|
-
|
60
|
-
# Extra options from the compiler. This can be used by
|
61
|
-
# generators for output information.
|
62
|
-
#
|
63
|
-
# @return [Hash]
|
64
|
-
def options
|
65
|
-
compiler.options[:extra]
|
66
|
-
end
|
67
|
-
end
|
68
|
-
end
|
69
|
-
end
|
@@ -1,133 +0,0 @@
|
|
1
|
-
describe Generation::Constructor do
|
2
|
-
let(:grammar) { double("grammar") }
|
3
|
-
let(:terminal) { token(:TERMINAL) }
|
4
|
-
let(:epsilon) { token(:epsilon) }
|
5
|
-
|
6
|
-
subject { described_class.new(grammar) }
|
7
|
-
|
8
|
-
context "#nullable?" do
|
9
|
-
context "when given an epsilon token" do
|
10
|
-
it "returns true" do
|
11
|
-
expect(subject.nullable?(epsilon)).to be true
|
12
|
-
end
|
13
|
-
end
|
14
|
-
|
15
|
-
context "when given a terminal" do
|
16
|
-
it "returns false" do
|
17
|
-
expect(subject.nullable?(terminal)).to be false
|
18
|
-
end
|
19
|
-
end
|
20
|
-
|
21
|
-
context "when given an array" do
|
22
|
-
context "with one of the elements not nullable" do
|
23
|
-
it "returns false" do
|
24
|
-
expect(subject.nullable?([terminal, epsilon])).to be false
|
25
|
-
end
|
26
|
-
end
|
27
|
-
|
28
|
-
context "with all of the elements nullable" do
|
29
|
-
it "returns true" do
|
30
|
-
expect(subject.nullable?([epsilon, epsilon])).to be true
|
31
|
-
end
|
32
|
-
end
|
33
|
-
end
|
34
|
-
|
35
|
-
context "when given a nonterminal" do
|
36
|
-
let(:grammar) { with_recognizer }
|
37
|
-
|
38
|
-
context "with no nullable productions" do
|
39
|
-
let(:nonterminal) { Ace::Token::Nonterminal.new(:l) }
|
40
|
-
|
41
|
-
it "returns false" do
|
42
|
-
expect(subject.nullable?(nonterminal)).to be false
|
43
|
-
end
|
44
|
-
end
|
45
|
-
|
46
|
-
context "with a nullable production" do
|
47
|
-
let(:nonterminal) { Ace::Token::Nonterminal.new(:e) }
|
48
|
-
|
49
|
-
it "returns true" do
|
50
|
-
expect(subject.nullable?(nonterminal)).to be true
|
51
|
-
end
|
52
|
-
end
|
53
|
-
end
|
54
|
-
|
55
|
-
context "when given a bad argument" do
|
56
|
-
it "raises an error" do
|
57
|
-
expect { subject.nullable?(nil) }.to raise_error(ArgumentError)
|
58
|
-
end
|
59
|
-
end
|
60
|
-
end
|
61
|
-
|
62
|
-
context "#first" do
|
63
|
-
context "when given an epsilon token" do
|
64
|
-
it "generates an empty set" do
|
65
|
-
expect(subject.first(epsilon)).to eq Set.new
|
66
|
-
end
|
67
|
-
end
|
68
|
-
|
69
|
-
context "when given a terminal" do
|
70
|
-
it "generates a set" do
|
71
|
-
expect(subject.first(terminal)).to eq [terminal].to_set
|
72
|
-
end
|
73
|
-
end
|
74
|
-
|
75
|
-
context "when given an array" do
|
76
|
-
let(:terminal2) { token(:terminal, :TERMINAL2) }
|
77
|
-
|
78
|
-
it "generates a set" do
|
79
|
-
expect(subject.first([epsilon, terminal])).
|
80
|
-
to eq [terminal].to_set
|
81
|
-
expect(subject.first([terminal, terminal2])).
|
82
|
-
to eq [terminal].to_set
|
83
|
-
end
|
84
|
-
end
|
85
|
-
|
86
|
-
context "when given a nonterminal" do
|
87
|
-
let(:grammar) { with_recognizer }
|
88
|
-
let(:nonterminal) { token(:nonterminal, :e) }
|
89
|
-
|
90
|
-
it "generates a set" do
|
91
|
-
expect(subject.first(nonterminal)).
|
92
|
-
to eq [token(:terminal, :IDENT), token(:terminal, :STAR, "*")].to_set
|
93
|
-
end
|
94
|
-
end
|
95
|
-
|
96
|
-
context "when given a bad argument" do
|
97
|
-
it "raises an error" do
|
98
|
-
expect { subject.first(nil) }.to raise_error(ArgumentError)
|
99
|
-
end
|
100
|
-
end
|
101
|
-
end
|
102
|
-
|
103
|
-
context "#follow" do
|
104
|
-
context "when given a bad argument" do
|
105
|
-
it "raises an error" do
|
106
|
-
expect { subject.follow(nil) }.to raise_error(ArgumentError)
|
107
|
-
end
|
108
|
-
end
|
109
|
-
|
110
|
-
context "when given a nonterminal" do
|
111
|
-
let(:grammar) { with_recognizer }
|
112
|
-
let(:nonterminal) { token(:nonterminal, :l) }
|
113
|
-
|
114
|
-
before do
|
115
|
-
subject.productions.merge grammar.productions.values.flatten
|
116
|
-
end
|
117
|
-
|
118
|
-
it "generates a set" do
|
119
|
-
expect(subject.follow(nonterminal)).to eq [
|
120
|
-
token(:terminal, :EQUALS, "="),
|
121
|
-
token(:terminal, :"$end")
|
122
|
-
].to_set
|
123
|
-
end
|
124
|
-
end
|
125
|
-
end
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
def token(type, name = nil, value = nil, ttype = nil, id = nil)
|
130
|
-
type = Ace::Token.const_get(type.to_s.capitalize)
|
131
|
-
type.new(name, ttype, id, value)
|
132
|
-
end
|
133
|
-
end
|