rly 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +2 -2
- data/assets/ply_dump.erb +15 -0
- data/lib/rly.rb +2 -0
- data/lib/rly/lex.rb +54 -25
- data/lib/rly/lex_token.rb +8 -0
- data/lib/rly/parse/grammar.rb +211 -0
- data/lib/rly/parse/lr_item.rb +32 -0
- data/lib/rly/parse/lr_table.rb +529 -0
- data/lib/rly/parse/ply_dump.rb +52 -0
- data/lib/rly/parse/production.rb +38 -0
- data/lib/rly/parse/rule_parser.rb +68 -0
- data/lib/rly/parse/yacc_production.rb +11 -0
- data/lib/rly/parse/yacc_symbol.rb +6 -0
- data/lib/rly/version.rb +2 -1
- data/lib/rly/yacc.rb +355 -0
- data/spec/lex/{lexer_spec.rb → lex_spec.rb} +45 -24
- data/spec/parse/calc_spec.rb +95 -0
- data/spec/parse/grammar_spec.rb +239 -0
- data/spec/parse/lr_table_spec.rb +212 -0
- data/spec/parse/production_spec.rb +18 -0
- data/spec/parse/rule_parser_spec.rb +20 -0
- data/spec/parse/yacc_spec.rb +57 -0
- data/spec/spec_helper.rb +5 -0
- metadata +26 -4
@@ -1,19 +1,19 @@
|
|
1
1
|
require "rly"
|
2
2
|
|
3
3
|
describe Rly::Lex do
|
4
|
-
context "
|
4
|
+
context "Basic lexer" do
|
5
5
|
testLexer = Class.new(Rly::Lex) do
|
6
6
|
token :FIRST, /[a-z]+/
|
7
7
|
token :SECOND, /[A-Z]+/
|
8
8
|
end
|
9
9
|
|
10
|
-
it "
|
10
|
+
it "has a list of defined tokens" do
|
11
11
|
testLexer.tokens.map { |t, r, b| t }.should == [:FIRST, :SECOND]
|
12
12
|
end
|
13
13
|
|
14
|
-
it "
|
14
|
+
it "outputs tokens one by one" do
|
15
15
|
test = 'qweASDzxc'
|
16
|
-
l = testLexer.new(test)
|
16
|
+
l = testLexer.new(test)
|
17
17
|
|
18
18
|
tok = l.next
|
19
19
|
tok.type.should == :FIRST
|
@@ -27,40 +27,48 @@ describe Rly::Lex do
|
|
27
27
|
tok.type.should == :FIRST
|
28
28
|
tok.value.should == 'zxc'
|
29
29
|
|
30
|
-
|
30
|
+
l.next.should be_nil
|
31
|
+
end
|
32
|
+
|
33
|
+
it "provides tokens in terminals list" do
|
34
|
+
testLexer.terminals.should == [:FIRST, :SECOND]
|
31
35
|
end
|
32
36
|
end
|
33
37
|
|
34
|
-
context "
|
38
|
+
context "Lexer with literals defined" do
|
35
39
|
testLexer = Class.new(Rly::Lex) do
|
36
40
|
literals "+-*/"
|
37
41
|
end
|
38
42
|
|
39
|
-
it "
|
43
|
+
it "outputs literal tokens" do
|
40
44
|
test = '++--'
|
41
|
-
l = testLexer.new(test)
|
45
|
+
l = testLexer.new(test)
|
42
46
|
|
43
47
|
l.next.value.should == '+'
|
44
48
|
l.next.value.should == '+'
|
45
49
|
l.next.value.should == '-'
|
46
50
|
l.next.value.should == '-'
|
47
51
|
end
|
52
|
+
|
53
|
+
it "provides literals in terminals list" do
|
54
|
+
testLexer.terminals.should == ['+', '-', '*', '/']
|
55
|
+
end
|
48
56
|
end
|
49
57
|
|
50
|
-
context "
|
58
|
+
context "Lexer with ignores defined" do
|
51
59
|
testLexer = Class.new(Rly::Lex) do
|
52
60
|
ignore " \t"
|
53
61
|
end
|
54
62
|
|
55
|
-
it "
|
63
|
+
it "honours ignores list" do
|
56
64
|
test = " \t\t \t \t"
|
57
|
-
l = testLexer.new(test)
|
65
|
+
l = testLexer.new(test)
|
58
66
|
|
59
|
-
|
67
|
+
l.next.should be_nil
|
60
68
|
end
|
61
69
|
end
|
62
70
|
|
63
|
-
context "
|
71
|
+
context "Lexer with token that has a block given" do
|
64
72
|
testLexer = Class.new(Rly::Lex) do
|
65
73
|
token :TEST, /\d+/ do |t|
|
66
74
|
t.value = t.value.to_i
|
@@ -70,44 +78,46 @@ describe Rly::Lex do
|
|
70
78
|
|
71
79
|
it "calls a block to further process a token" do
|
72
80
|
test = "42"
|
73
|
-
l = testLexer.new(test)
|
81
|
+
l = testLexer.new(test)
|
74
82
|
|
75
|
-
l.next.value == 42
|
83
|
+
l.next.value.should == 42
|
76
84
|
end
|
77
85
|
end
|
78
86
|
|
79
|
-
context "
|
87
|
+
context "Lexer with unnamed token and block given" do
|
80
88
|
testLexer = Class.new(Rly::Lex) do
|
81
89
|
token /\n+/ do |t| t.lexer.lineno = t.value.count("\n"); t end
|
82
90
|
end
|
83
91
|
|
84
|
-
it "
|
92
|
+
it "processes but don't output tokens without a name" do
|
85
93
|
test = "\n\n\n"
|
86
94
|
l = testLexer.new(test)
|
87
95
|
|
88
|
-
|
96
|
+
l.next.should be_nil
|
89
97
|
|
90
98
|
l.lineno.should == 3
|
91
99
|
end
|
92
100
|
end
|
93
101
|
|
94
|
-
context "
|
102
|
+
context "Lexer with no error handler" do
|
95
103
|
it "raises an error, if there are no suitable tokens" do
|
96
104
|
testLexer = Class.new(Rly::Lex) do
|
97
105
|
token :NUM, /\d+/
|
98
106
|
end
|
99
107
|
l = testLexer.new("test")
|
100
108
|
|
101
|
-
expect { l.
|
109
|
+
expect { l.next } .to raise_error(Rly::LexError)
|
102
110
|
end
|
103
111
|
|
104
112
|
it "raises an error, if there is no possible tokens defined" do
|
105
113
|
testLexer = Class.new(Rly::Lex) do ; end
|
106
114
|
l = testLexer.new("test")
|
107
115
|
|
108
|
-
expect { l.
|
116
|
+
expect { l.next } .to raise_error(Rly::LexError)
|
109
117
|
end
|
118
|
+
end
|
110
119
|
|
120
|
+
context "Lexer with error handler" do
|
111
121
|
it "calls an error function if it is available, which returns a fixed token" do
|
112
122
|
testLexer = Class.new(Rly::Lex) do
|
113
123
|
token :NUM, /\d+/
|
@@ -119,11 +129,11 @@ describe Rly::Lex do
|
|
119
129
|
end
|
120
130
|
l = testLexer.new("test")
|
121
131
|
|
122
|
-
tok = l.
|
132
|
+
tok = l.next
|
123
133
|
tok.value.should == "BAD t"
|
124
134
|
tok.type.should == :error
|
125
135
|
|
126
|
-
tok = l.
|
136
|
+
tok = l.next
|
127
137
|
tok.value.should == "BAD e"
|
128
138
|
tok.type.should == :error
|
129
139
|
end
|
@@ -138,7 +148,18 @@ describe Rly::Lex do
|
|
138
148
|
end
|
139
149
|
l = testLexer.new("test1")
|
140
150
|
|
141
|
-
l.
|
151
|
+
l.next.value.should == '1'
|
142
152
|
end
|
143
153
|
end
|
154
|
+
|
155
|
+
it "doesn't try to skip chars over" do
|
156
|
+
testLexer = Class.new(Rly::Lex) do
|
157
|
+
token :NUM, /\d+/
|
158
|
+
literals ","
|
159
|
+
end
|
160
|
+
l = testLexer.new(",10")
|
161
|
+
|
162
|
+
l.next.type.should == ','
|
163
|
+
l.next.type.should == :NUM
|
164
|
+
end
|
144
165
|
end
|
@@ -0,0 +1,95 @@
|
|
1
|
+
require "rly"
|
2
|
+
|
3
|
+
module CalcSpecExample
|
4
|
+
class CalcLex < Rly::Lex
|
5
|
+
literals '=+-*/()'
|
6
|
+
ignore " \t"
|
7
|
+
|
8
|
+
token :NAME, /[a-zA-Z_][a-zA-Z0-9_]*/
|
9
|
+
|
10
|
+
token :NUMBER, /\d+/ do |t|
|
11
|
+
t.value = t.value.to_i
|
12
|
+
t
|
13
|
+
end
|
14
|
+
|
15
|
+
token(/\n+/) { |t| t.lexer.lineno += t.value.count("\n") }
|
16
|
+
|
17
|
+
on_error do |t|
|
18
|
+
puts "Illegal character #{t.value}"
|
19
|
+
t.lexer.pos += 1
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
class CalcParse < Rly::Yacc
|
24
|
+
def names
|
25
|
+
@names ||= {}
|
26
|
+
end
|
27
|
+
|
28
|
+
precedence :left, '+', '-'
|
29
|
+
precedence :left, '*', '/'
|
30
|
+
precedence :right, :UMINUS
|
31
|
+
|
32
|
+
rule 'statement : NAME "=" expression' do |st, n, _, e|
|
33
|
+
self.names[n.value] = e.value
|
34
|
+
end
|
35
|
+
|
36
|
+
rule 'statement : expression' do |st, e|
|
37
|
+
st.value = e.value
|
38
|
+
end
|
39
|
+
|
40
|
+
rule 'expression : expression "+" expression
|
41
|
+
| expression "-" expression
|
42
|
+
| expression "*" expression
|
43
|
+
| expression "/" expression' do |ex, e1, op, e2|
|
44
|
+
ex.value = e1.value.send(op.value, e2.value)
|
45
|
+
end
|
46
|
+
|
47
|
+
# rule 'expression : "-" expression %prec UMINUS' do |ex, _, e|
|
48
|
+
# ex.value = - e.value
|
49
|
+
# end
|
50
|
+
|
51
|
+
rule 'expression : "(" expression ")"' do |ex, _, e, _|
|
52
|
+
ex.value = e.value
|
53
|
+
end
|
54
|
+
|
55
|
+
rule 'expression : NUMBER' do |ex, n|
|
56
|
+
ex.value = n.value
|
57
|
+
end
|
58
|
+
|
59
|
+
rule 'expression : NAME' do |ex, n|
|
60
|
+
nval = self.names[n.value]
|
61
|
+
unless nval
|
62
|
+
puts "Undefined name '#{n.value}'"
|
63
|
+
nval = 0
|
64
|
+
end
|
65
|
+
ex.value = nval
|
66
|
+
end
|
67
|
+
|
68
|
+
# rule_error do |p|
|
69
|
+
# if p
|
70
|
+
# puts "Syntax error at '#{p.value}'"
|
71
|
+
# else
|
72
|
+
# puts "Syntax error at EOF"
|
73
|
+
# end
|
74
|
+
# end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
describe 'Calculator' do
|
79
|
+
before :each do
|
80
|
+
@calc = CalcSpecExample::CalcParse.new(CalcSpecExample::CalcLex.new)
|
81
|
+
end
|
82
|
+
|
83
|
+
it "calculates simple expressions" do
|
84
|
+
@calc.parse('2 + 2').should == 4
|
85
|
+
end
|
86
|
+
|
87
|
+
it "calculates complex expressions" do
|
88
|
+
@calc.parse('(3-1)*6/(3+1)').should == 3
|
89
|
+
end
|
90
|
+
|
91
|
+
it "keeps state between parses" do
|
92
|
+
@calc.parse('magic = 42')
|
93
|
+
@calc.parse('2 * magic').should == 84
|
94
|
+
end
|
95
|
+
end
|
@@ -0,0 +1,239 @@
|
|
1
|
+
require "rly"
|
2
|
+
require "rly/parse/grammar"
|
3
|
+
require "rly/parse/ply_dump"
|
4
|
+
|
5
|
+
describe Rly::Grammar do
|
6
|
+
it "requires a list of terminals to be initialized" do
|
7
|
+
g = Rly::Grammar.new([:NUMBER])
|
8
|
+
g.terminals[:NUMBER].should_not be_nil
|
9
|
+
end
|
10
|
+
|
11
|
+
it "rejects terminals named in lowercase" do
|
12
|
+
expect { Rly::Grammar.new([:test]) } .to raise_error(ArgumentError)
|
13
|
+
end
|
14
|
+
|
15
|
+
it "has a default terminal -- error" do
|
16
|
+
g = Rly::Grammar.new([])
|
17
|
+
g.terminals[:error].should_not be_nil
|
18
|
+
end
|
19
|
+
|
20
|
+
context "Precedence specs" do
|
21
|
+
it "allows to set precedence" do
|
22
|
+
g = Rly::Grammar.new([])
|
23
|
+
g.set_precedence('+', :left, 1)
|
24
|
+
end
|
25
|
+
|
26
|
+
it "does not allow to set precedence after any productions have been added" do
|
27
|
+
g = Rly::Grammar.new([])
|
28
|
+
g.add_production(:expression, [:expression, '+', :expression])
|
29
|
+
expect { g.set_precedence('+', :left, 1) } .to raise_error(RuntimeError)
|
30
|
+
end
|
31
|
+
|
32
|
+
it "does not allow setting precedence several times for same terminal" do
|
33
|
+
g = Rly::Grammar.new([])
|
34
|
+
g.set_precedence('+', :left, 1)
|
35
|
+
expect { g.set_precedence('+', :left, 1) } .to raise_error(ArgumentError)
|
36
|
+
end
|
37
|
+
|
38
|
+
it "allows setting only :left, :right or :noassoc precedence associations" do
|
39
|
+
g = Rly::Grammar.new([])
|
40
|
+
expect { g.set_precedence('+', :bad, 1) } .to raise_error(ArgumentError)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
context "Production specs" do
|
45
|
+
it "returns a Production object when adding production" do
|
46
|
+
g = Rly::Grammar.new([])
|
47
|
+
p = g.add_production(:expression, [:expression, '+', :expression])
|
48
|
+
p.should be_a(Rly::Production)
|
49
|
+
end
|
50
|
+
|
51
|
+
it "rejects productions not named in lowercase" do
|
52
|
+
g = Rly::Grammar.new([])
|
53
|
+
expect { g.add_production(:BAD, []) } .to raise_error(ArgumentError)
|
54
|
+
end
|
55
|
+
|
56
|
+
it "rejects production named :error" do
|
57
|
+
g = Rly::Grammar.new([])
|
58
|
+
expect { g.add_production(:error, []) } .to raise_error(ArgumentError)
|
59
|
+
end
|
60
|
+
|
61
|
+
it "registers one-char terminals" do
|
62
|
+
g = Rly::Grammar.new([])
|
63
|
+
g.add_production(:expression, [:expression, '+', :expression])
|
64
|
+
g.terminals['+'].should_not be_nil
|
65
|
+
end
|
66
|
+
|
67
|
+
it "raises ArgumentError if one-char terminal is not actually an one char" do
|
68
|
+
g = Rly::Grammar.new([])
|
69
|
+
expect { g.add_production(:expression, [:expression, 'lulz', :expression]) } .to raise_error(ArgumentError)
|
70
|
+
end
|
71
|
+
|
72
|
+
it "calculates production precedence based on rightmost terminal" do
|
73
|
+
g = Rly::Grammar.new([])
|
74
|
+
g.set_precedence('+', :left, 1)
|
75
|
+
p = g.add_production(:expression, [:expression, '+', :expression])
|
76
|
+
p.precedence.should == [:left, 1]
|
77
|
+
end
|
78
|
+
|
79
|
+
it "defaults precedence to [:right, 0]" do
|
80
|
+
g = Rly::Grammar.new([])
|
81
|
+
p = g.add_production(:expression, [:expression, '+', :expression])
|
82
|
+
p.precedence.should == [:right, 0]
|
83
|
+
end
|
84
|
+
|
85
|
+
it "adds production to the list of productions" do
|
86
|
+
g = Rly::Grammar.new([])
|
87
|
+
p = g.add_production(:expression, [:expression, '+', :expression])
|
88
|
+
g.productions.count.should == 2
|
89
|
+
g.productions.last == p
|
90
|
+
end
|
91
|
+
|
92
|
+
it "adds production to the list of productions referenced by names" do
|
93
|
+
g = Rly::Grammar.new([])
|
94
|
+
p = g.add_production(:expression, [:expression, '+', :expression])
|
95
|
+
g.prodnames.count.should == 1
|
96
|
+
g.prodnames[:expression].should == [p]
|
97
|
+
end
|
98
|
+
|
99
|
+
it "adds production to the list of non-terminals" do
|
100
|
+
g = Rly::Grammar.new([])
|
101
|
+
p = g.add_production(:expression, [:expression, '+', :expression])
|
102
|
+
g.nonterminals[:expression].should_not be_nil
|
103
|
+
end
|
104
|
+
|
105
|
+
it "adds production number to referenced terminals" do
|
106
|
+
g = Rly::Grammar.new([])
|
107
|
+
p = g.add_production(:expression, [:expression, '+', :expression])
|
108
|
+
g.terminals['+'].should == [p.index]
|
109
|
+
end
|
110
|
+
|
111
|
+
it "adds production number to referenced non-terminals" do
|
112
|
+
g = Rly::Grammar.new([])
|
113
|
+
p = g.add_production(:expression, [:expression, '+', :expression])
|
114
|
+
g.nonterminals[:expression].should == [p.index, p.index]
|
115
|
+
end
|
116
|
+
|
117
|
+
it "does not allow duplicate rules" do
|
118
|
+
g = Rly::Grammar.new([])
|
119
|
+
g.add_production(:expression, [:expression, '+', :expression])
|
120
|
+
expect { g.add_production(:expression, [:expression, '+', :expression]) } .to raise_error(ArgumentError)
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
context "Start symbol specs" do
|
125
|
+
before :each do
|
126
|
+
@g = Rly::Grammar.new([])
|
127
|
+
p = @g.add_production(:expression, [:expression, '+', :expression])
|
128
|
+
@g.set_start()
|
129
|
+
end
|
130
|
+
|
131
|
+
it "sets start symbol if it is specified explicitly" do
|
132
|
+
@g.start.should == :expression
|
133
|
+
end
|
134
|
+
|
135
|
+
it "sets start symbol based on first production if it is not specified explicitly" do
|
136
|
+
@g.start.should == :expression
|
137
|
+
end
|
138
|
+
|
139
|
+
it "accepts only existing non-terminal as a start" do
|
140
|
+
g = Rly::Grammar.new([:NUMBER])
|
141
|
+
p = g.add_production(:expression, [:expression, '+', :expression])
|
142
|
+
expect { g.set_start(:NUMBER) } .to raise_error(ArgumentError)
|
143
|
+
expect { g.set_start(:new_sym) } .to raise_error(ArgumentError)
|
144
|
+
end
|
145
|
+
|
146
|
+
it "sets zero rule to :S' -> :start" do
|
147
|
+
prod_0 = @g.productions[0]
|
148
|
+
prod_0.index.should == 0
|
149
|
+
prod_0.name.should == :"S'"
|
150
|
+
prod_0.prod.should == [:expression]
|
151
|
+
end
|
152
|
+
|
153
|
+
it "adds 0 to start rule nonterminals" do
|
154
|
+
@g.nonterminals[:expression][-1].should == 0
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
context "LR table generation specs" do
|
159
|
+
before :each do
|
160
|
+
@g = Rly::Grammar.new([:NUMBER])
|
161
|
+
|
162
|
+
@g.set_precedence('+', :left, 1)
|
163
|
+
@g.set_precedence('-', :left, 1)
|
164
|
+
|
165
|
+
@g.add_production(:statement, [:expression])
|
166
|
+
@g.add_production(:expression, [:expression, '+', :expression])
|
167
|
+
@g.add_production(:expression, [:expression, '-', :expression])
|
168
|
+
@g.add_production(:expression, [:NUMBER])
|
169
|
+
|
170
|
+
@g.set_start
|
171
|
+
|
172
|
+
@g.build_lritems
|
173
|
+
end
|
174
|
+
|
175
|
+
it "builds LR items for grammar" do
|
176
|
+
@g.productions.length.should == 5
|
177
|
+
items = [2, 2, 4, 4, 2]
|
178
|
+
@g.productions.each_with_index do |p, i|
|
179
|
+
p.lr_items.count.should == items[i]
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
it "sets LR items to correct default values" do
|
184
|
+
i = @g.productions[0].lr_items[0]
|
185
|
+
i.lr_after.should == [@g.productions[1]]
|
186
|
+
i.prod.should == [:'.', :statement]
|
187
|
+
|
188
|
+
i = @g.productions[0].lr_items[1]
|
189
|
+
i.lr_after.should == []
|
190
|
+
i.prod.should == [:statement, :'.']
|
191
|
+
|
192
|
+
i = @g.productions[2].lr_items[0]
|
193
|
+
i.lr_after.should == @g.productions[2..4]
|
194
|
+
i.prod.should == [:'.', :expression, '+', :expression]
|
195
|
+
end
|
196
|
+
|
197
|
+
it "builds correct FIRST table" do
|
198
|
+
first = @g.compute_first
|
199
|
+
first.should == {
|
200
|
+
:'$end' => [:'$end'],
|
201
|
+
'+' => ['+'],
|
202
|
+
'-' => ['-'],
|
203
|
+
:NUMBER => [:NUMBER],
|
204
|
+
:error => [:error],
|
205
|
+
:expression => [:NUMBER],
|
206
|
+
:statement => [:NUMBER]
|
207
|
+
}
|
208
|
+
end
|
209
|
+
|
210
|
+
it "builds correct FOLLOW table" do
|
211
|
+
@g.compute_first
|
212
|
+
follow = @g.compute_follow
|
213
|
+
follow.should == { :expression => [:'$end', '+', '-'], :statement => [:'$end'] }
|
214
|
+
end
|
215
|
+
end
|
216
|
+
|
217
|
+
it "should generate parser.out same as Ply does" do
|
218
|
+
pending "thx to python dicts we have a different order of states. ideas?"
|
219
|
+
g = Rly::Grammar.new([:NUMBER])
|
220
|
+
|
221
|
+
g.set_precedence('+', :left, 1)
|
222
|
+
g.set_precedence('-', :left, 1)
|
223
|
+
|
224
|
+
g.add_production(:statement, [:expression])
|
225
|
+
g.add_production(:expression, [:expression, '+', :expression])
|
226
|
+
g.add_production(:expression, [:expression, '-', :expression])
|
227
|
+
g.add_production(:expression, [:NUMBER])
|
228
|
+
|
229
|
+
g.set_start
|
230
|
+
|
231
|
+
d = Rly::PlyDump.new(g)
|
232
|
+
orig = File.join(File.dirname(__FILE__), '..', 'fixtures', 'minicalc_ply_parser.out')
|
233
|
+
dst = File.join(File.dirname(__FILE__), '..', 'fixtures', 'minicalc_ply_parser.out.new')
|
234
|
+
|
235
|
+
open(dst, 'w') { |f| f.write(d.to_s) }
|
236
|
+
|
237
|
+
d.to_s.should == open(orig).read
|
238
|
+
end
|
239
|
+
end
|