rly 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +2 -2
- data/assets/ply_dump.erb +15 -0
- data/lib/rly.rb +2 -0
- data/lib/rly/lex.rb +54 -25
- data/lib/rly/lex_token.rb +8 -0
- data/lib/rly/parse/grammar.rb +211 -0
- data/lib/rly/parse/lr_item.rb +32 -0
- data/lib/rly/parse/lr_table.rb +529 -0
- data/lib/rly/parse/ply_dump.rb +52 -0
- data/lib/rly/parse/production.rb +38 -0
- data/lib/rly/parse/rule_parser.rb +68 -0
- data/lib/rly/parse/yacc_production.rb +11 -0
- data/lib/rly/parse/yacc_symbol.rb +6 -0
- data/lib/rly/version.rb +2 -1
- data/lib/rly/yacc.rb +355 -0
- data/spec/lex/{lexer_spec.rb → lex_spec.rb} +45 -24
- data/spec/parse/calc_spec.rb +95 -0
- data/spec/parse/grammar_spec.rb +239 -0
- data/spec/parse/lr_table_spec.rb +212 -0
- data/spec/parse/production_spec.rb +18 -0
- data/spec/parse/rule_parser_spec.rb +20 -0
- data/spec/parse/yacc_spec.rb +57 -0
- data/spec/spec_helper.rb +5 -0
- metadata +26 -4
@@ -1,19 +1,19 @@
|
|
1
1
|
require "rly"
|
2
2
|
|
3
3
|
describe Rly::Lex do
|
4
|
-
context "
|
4
|
+
context "Basic lexer" do
|
5
5
|
testLexer = Class.new(Rly::Lex) do
|
6
6
|
token :FIRST, /[a-z]+/
|
7
7
|
token :SECOND, /[A-Z]+/
|
8
8
|
end
|
9
9
|
|
10
|
-
it "
|
10
|
+
it "has a list of defined tokens" do
|
11
11
|
testLexer.tokens.map { |t, r, b| t }.should == [:FIRST, :SECOND]
|
12
12
|
end
|
13
13
|
|
14
|
-
it "
|
14
|
+
it "outputs tokens one by one" do
|
15
15
|
test = 'qweASDzxc'
|
16
|
-
l = testLexer.new(test)
|
16
|
+
l = testLexer.new(test)
|
17
17
|
|
18
18
|
tok = l.next
|
19
19
|
tok.type.should == :FIRST
|
@@ -27,40 +27,48 @@ describe Rly::Lex do
|
|
27
27
|
tok.type.should == :FIRST
|
28
28
|
tok.value.should == 'zxc'
|
29
29
|
|
30
|
-
|
30
|
+
l.next.should be_nil
|
31
|
+
end
|
32
|
+
|
33
|
+
it "provides tokens in terminals list" do
|
34
|
+
testLexer.terminals.should == [:FIRST, :SECOND]
|
31
35
|
end
|
32
36
|
end
|
33
37
|
|
34
|
-
context "
|
38
|
+
context "Lexer with literals defined" do
|
35
39
|
testLexer = Class.new(Rly::Lex) do
|
36
40
|
literals "+-*/"
|
37
41
|
end
|
38
42
|
|
39
|
-
it "
|
43
|
+
it "outputs literal tokens" do
|
40
44
|
test = '++--'
|
41
|
-
l = testLexer.new(test)
|
45
|
+
l = testLexer.new(test)
|
42
46
|
|
43
47
|
l.next.value.should == '+'
|
44
48
|
l.next.value.should == '+'
|
45
49
|
l.next.value.should == '-'
|
46
50
|
l.next.value.should == '-'
|
47
51
|
end
|
52
|
+
|
53
|
+
it "provides literals in terminals list" do
|
54
|
+
testLexer.terminals.should == ['+', '-', '*', '/']
|
55
|
+
end
|
48
56
|
end
|
49
57
|
|
50
|
-
context "
|
58
|
+
context "Lexer with ignores defined" do
|
51
59
|
testLexer = Class.new(Rly::Lex) do
|
52
60
|
ignore " \t"
|
53
61
|
end
|
54
62
|
|
55
|
-
it "
|
63
|
+
it "honours ignores list" do
|
56
64
|
test = " \t\t \t \t"
|
57
|
-
l = testLexer.new(test)
|
65
|
+
l = testLexer.new(test)
|
58
66
|
|
59
|
-
|
67
|
+
l.next.should be_nil
|
60
68
|
end
|
61
69
|
end
|
62
70
|
|
63
|
-
context "
|
71
|
+
context "Lexer with token that has a block given" do
|
64
72
|
testLexer = Class.new(Rly::Lex) do
|
65
73
|
token :TEST, /\d+/ do |t|
|
66
74
|
t.value = t.value.to_i
|
@@ -70,44 +78,46 @@ describe Rly::Lex do
|
|
70
78
|
|
71
79
|
it "calls a block to further process a token" do
|
72
80
|
test = "42"
|
73
|
-
l = testLexer.new(test)
|
81
|
+
l = testLexer.new(test)
|
74
82
|
|
75
|
-
l.next.value == 42
|
83
|
+
l.next.value.should == 42
|
76
84
|
end
|
77
85
|
end
|
78
86
|
|
79
|
-
context "
|
87
|
+
context "Lexer with unnamed token and block given" do
|
80
88
|
testLexer = Class.new(Rly::Lex) do
|
81
89
|
token /\n+/ do |t| t.lexer.lineno = t.value.count("\n"); t end
|
82
90
|
end
|
83
91
|
|
84
|
-
it "
|
92
|
+
it "processes but don't output tokens without a name" do
|
85
93
|
test = "\n\n\n"
|
86
94
|
l = testLexer.new(test)
|
87
95
|
|
88
|
-
|
96
|
+
l.next.should be_nil
|
89
97
|
|
90
98
|
l.lineno.should == 3
|
91
99
|
end
|
92
100
|
end
|
93
101
|
|
94
|
-
context "
|
102
|
+
context "Lexer with no error handler" do
|
95
103
|
it "raises an error, if there are no suitable tokens" do
|
96
104
|
testLexer = Class.new(Rly::Lex) do
|
97
105
|
token :NUM, /\d+/
|
98
106
|
end
|
99
107
|
l = testLexer.new("test")
|
100
108
|
|
101
|
-
expect { l.
|
109
|
+
expect { l.next } .to raise_error(Rly::LexError)
|
102
110
|
end
|
103
111
|
|
104
112
|
it "raises an error, if there is no possible tokens defined" do
|
105
113
|
testLexer = Class.new(Rly::Lex) do ; end
|
106
114
|
l = testLexer.new("test")
|
107
115
|
|
108
|
-
expect { l.
|
116
|
+
expect { l.next } .to raise_error(Rly::LexError)
|
109
117
|
end
|
118
|
+
end
|
110
119
|
|
120
|
+
context "Lexer with error handler" do
|
111
121
|
it "calls an error function if it is available, which returns a fixed token" do
|
112
122
|
testLexer = Class.new(Rly::Lex) do
|
113
123
|
token :NUM, /\d+/
|
@@ -119,11 +129,11 @@ describe Rly::Lex do
|
|
119
129
|
end
|
120
130
|
l = testLexer.new("test")
|
121
131
|
|
122
|
-
tok = l.
|
132
|
+
tok = l.next
|
123
133
|
tok.value.should == "BAD t"
|
124
134
|
tok.type.should == :error
|
125
135
|
|
126
|
-
tok = l.
|
136
|
+
tok = l.next
|
127
137
|
tok.value.should == "BAD e"
|
128
138
|
tok.type.should == :error
|
129
139
|
end
|
@@ -138,7 +148,18 @@ describe Rly::Lex do
|
|
138
148
|
end
|
139
149
|
l = testLexer.new("test1")
|
140
150
|
|
141
|
-
l.
|
151
|
+
l.next.value.should == '1'
|
142
152
|
end
|
143
153
|
end
|
154
|
+
|
155
|
+
it "doesn't try to skip chars over" do
|
156
|
+
testLexer = Class.new(Rly::Lex) do
|
157
|
+
token :NUM, /\d+/
|
158
|
+
literals ","
|
159
|
+
end
|
160
|
+
l = testLexer.new(",10")
|
161
|
+
|
162
|
+
l.next.type.should == ','
|
163
|
+
l.next.type.should == :NUM
|
164
|
+
end
|
144
165
|
end
|
@@ -0,0 +1,95 @@
|
|
1
|
+
require "rly"
|
2
|
+
|
3
|
+
module CalcSpecExample
|
4
|
+
class CalcLex < Rly::Lex
|
5
|
+
literals '=+-*/()'
|
6
|
+
ignore " \t"
|
7
|
+
|
8
|
+
token :NAME, /[a-zA-Z_][a-zA-Z0-9_]*/
|
9
|
+
|
10
|
+
token :NUMBER, /\d+/ do |t|
|
11
|
+
t.value = t.value.to_i
|
12
|
+
t
|
13
|
+
end
|
14
|
+
|
15
|
+
token(/\n+/) { |t| t.lexer.lineno += t.value.count("\n") }
|
16
|
+
|
17
|
+
on_error do |t|
|
18
|
+
puts "Illegal character #{t.value}"
|
19
|
+
t.lexer.pos += 1
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
class CalcParse < Rly::Yacc
|
24
|
+
def names
|
25
|
+
@names ||= {}
|
26
|
+
end
|
27
|
+
|
28
|
+
precedence :left, '+', '-'
|
29
|
+
precedence :left, '*', '/'
|
30
|
+
precedence :right, :UMINUS
|
31
|
+
|
32
|
+
rule 'statement : NAME "=" expression' do |st, n, _, e|
|
33
|
+
self.names[n.value] = e.value
|
34
|
+
end
|
35
|
+
|
36
|
+
rule 'statement : expression' do |st, e|
|
37
|
+
st.value = e.value
|
38
|
+
end
|
39
|
+
|
40
|
+
rule 'expression : expression "+" expression
|
41
|
+
| expression "-" expression
|
42
|
+
| expression "*" expression
|
43
|
+
| expression "/" expression' do |ex, e1, op, e2|
|
44
|
+
ex.value = e1.value.send(op.value, e2.value)
|
45
|
+
end
|
46
|
+
|
47
|
+
# rule 'expression : "-" expression %prec UMINUS' do |ex, _, e|
|
48
|
+
# ex.value = - e.value
|
49
|
+
# end
|
50
|
+
|
51
|
+
rule 'expression : "(" expression ")"' do |ex, _, e, _|
|
52
|
+
ex.value = e.value
|
53
|
+
end
|
54
|
+
|
55
|
+
rule 'expression : NUMBER' do |ex, n|
|
56
|
+
ex.value = n.value
|
57
|
+
end
|
58
|
+
|
59
|
+
rule 'expression : NAME' do |ex, n|
|
60
|
+
nval = self.names[n.value]
|
61
|
+
unless nval
|
62
|
+
puts "Undefined name '#{n.value}'"
|
63
|
+
nval = 0
|
64
|
+
end
|
65
|
+
ex.value = nval
|
66
|
+
end
|
67
|
+
|
68
|
+
# rule_error do |p|
|
69
|
+
# if p
|
70
|
+
# puts "Syntax error at '#{p.value}'"
|
71
|
+
# else
|
72
|
+
# puts "Syntax error at EOF"
|
73
|
+
# end
|
74
|
+
# end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
describe 'Calculator' do
|
79
|
+
before :each do
|
80
|
+
@calc = CalcSpecExample::CalcParse.new(CalcSpecExample::CalcLex.new)
|
81
|
+
end
|
82
|
+
|
83
|
+
it "calculates simple expressions" do
|
84
|
+
@calc.parse('2 + 2').should == 4
|
85
|
+
end
|
86
|
+
|
87
|
+
it "calculates complex expressions" do
|
88
|
+
@calc.parse('(3-1)*6/(3+1)').should == 3
|
89
|
+
end
|
90
|
+
|
91
|
+
it "keeps state between parses" do
|
92
|
+
@calc.parse('magic = 42')
|
93
|
+
@calc.parse('2 * magic').should == 84
|
94
|
+
end
|
95
|
+
end
|
@@ -0,0 +1,239 @@
|
|
1
|
+
require "rly"
|
2
|
+
require "rly/parse/grammar"
|
3
|
+
require "rly/parse/ply_dump"
|
4
|
+
|
5
|
+
describe Rly::Grammar do
|
6
|
+
it "requires a list of terminals to be initialized" do
|
7
|
+
g = Rly::Grammar.new([:NUMBER])
|
8
|
+
g.terminals[:NUMBER].should_not be_nil
|
9
|
+
end
|
10
|
+
|
11
|
+
it "rejects terminals named in lowercase" do
|
12
|
+
expect { Rly::Grammar.new([:test]) } .to raise_error(ArgumentError)
|
13
|
+
end
|
14
|
+
|
15
|
+
it "has a default terminal -- error" do
|
16
|
+
g = Rly::Grammar.new([])
|
17
|
+
g.terminals[:error].should_not be_nil
|
18
|
+
end
|
19
|
+
|
20
|
+
context "Precedence specs" do
|
21
|
+
it "allows to set precedence" do
|
22
|
+
g = Rly::Grammar.new([])
|
23
|
+
g.set_precedence('+', :left, 1)
|
24
|
+
end
|
25
|
+
|
26
|
+
it "does not allow to set precedence after any productions have been added" do
|
27
|
+
g = Rly::Grammar.new([])
|
28
|
+
g.add_production(:expression, [:expression, '+', :expression])
|
29
|
+
expect { g.set_precedence('+', :left, 1) } .to raise_error(RuntimeError)
|
30
|
+
end
|
31
|
+
|
32
|
+
it "does not allow setting precedence several times for same terminal" do
|
33
|
+
g = Rly::Grammar.new([])
|
34
|
+
g.set_precedence('+', :left, 1)
|
35
|
+
expect { g.set_precedence('+', :left, 1) } .to raise_error(ArgumentError)
|
36
|
+
end
|
37
|
+
|
38
|
+
it "allows setting only :left, :right or :noassoc precedence associations" do
|
39
|
+
g = Rly::Grammar.new([])
|
40
|
+
expect { g.set_precedence('+', :bad, 1) } .to raise_error(ArgumentError)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
context "Production specs" do
|
45
|
+
it "returns a Production object when adding production" do
|
46
|
+
g = Rly::Grammar.new([])
|
47
|
+
p = g.add_production(:expression, [:expression, '+', :expression])
|
48
|
+
p.should be_a(Rly::Production)
|
49
|
+
end
|
50
|
+
|
51
|
+
it "rejects productions not named in lowercase" do
|
52
|
+
g = Rly::Grammar.new([])
|
53
|
+
expect { g.add_production(:BAD, []) } .to raise_error(ArgumentError)
|
54
|
+
end
|
55
|
+
|
56
|
+
it "rejects production named :error" do
|
57
|
+
g = Rly::Grammar.new([])
|
58
|
+
expect { g.add_production(:error, []) } .to raise_error(ArgumentError)
|
59
|
+
end
|
60
|
+
|
61
|
+
it "registers one-char terminals" do
|
62
|
+
g = Rly::Grammar.new([])
|
63
|
+
g.add_production(:expression, [:expression, '+', :expression])
|
64
|
+
g.terminals['+'].should_not be_nil
|
65
|
+
end
|
66
|
+
|
67
|
+
it "raises ArgumentError if one-char terminal is not actually an one char" do
|
68
|
+
g = Rly::Grammar.new([])
|
69
|
+
expect { g.add_production(:expression, [:expression, 'lulz', :expression]) } .to raise_error(ArgumentError)
|
70
|
+
end
|
71
|
+
|
72
|
+
it "calculates production precedence based on rightmost terminal" do
|
73
|
+
g = Rly::Grammar.new([])
|
74
|
+
g.set_precedence('+', :left, 1)
|
75
|
+
p = g.add_production(:expression, [:expression, '+', :expression])
|
76
|
+
p.precedence.should == [:left, 1]
|
77
|
+
end
|
78
|
+
|
79
|
+
it "defaults precedence to [:right, 0]" do
|
80
|
+
g = Rly::Grammar.new([])
|
81
|
+
p = g.add_production(:expression, [:expression, '+', :expression])
|
82
|
+
p.precedence.should == [:right, 0]
|
83
|
+
end
|
84
|
+
|
85
|
+
it "adds production to the list of productions" do
|
86
|
+
g = Rly::Grammar.new([])
|
87
|
+
p = g.add_production(:expression, [:expression, '+', :expression])
|
88
|
+
g.productions.count.should == 2
|
89
|
+
g.productions.last == p
|
90
|
+
end
|
91
|
+
|
92
|
+
it "adds production to the list of productions referenced by names" do
|
93
|
+
g = Rly::Grammar.new([])
|
94
|
+
p = g.add_production(:expression, [:expression, '+', :expression])
|
95
|
+
g.prodnames.count.should == 1
|
96
|
+
g.prodnames[:expression].should == [p]
|
97
|
+
end
|
98
|
+
|
99
|
+
it "adds production to the list of non-terminals" do
|
100
|
+
g = Rly::Grammar.new([])
|
101
|
+
p = g.add_production(:expression, [:expression, '+', :expression])
|
102
|
+
g.nonterminals[:expression].should_not be_nil
|
103
|
+
end
|
104
|
+
|
105
|
+
it "adds production number to referenced terminals" do
|
106
|
+
g = Rly::Grammar.new([])
|
107
|
+
p = g.add_production(:expression, [:expression, '+', :expression])
|
108
|
+
g.terminals['+'].should == [p.index]
|
109
|
+
end
|
110
|
+
|
111
|
+
it "adds production number to referenced non-terminals" do
|
112
|
+
g = Rly::Grammar.new([])
|
113
|
+
p = g.add_production(:expression, [:expression, '+', :expression])
|
114
|
+
g.nonterminals[:expression].should == [p.index, p.index]
|
115
|
+
end
|
116
|
+
|
117
|
+
it "does not allow duplicate rules" do
|
118
|
+
g = Rly::Grammar.new([])
|
119
|
+
g.add_production(:expression, [:expression, '+', :expression])
|
120
|
+
expect { g.add_production(:expression, [:expression, '+', :expression]) } .to raise_error(ArgumentError)
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
context "Start symbol specs" do
|
125
|
+
before :each do
|
126
|
+
@g = Rly::Grammar.new([])
|
127
|
+
p = @g.add_production(:expression, [:expression, '+', :expression])
|
128
|
+
@g.set_start()
|
129
|
+
end
|
130
|
+
|
131
|
+
it "sets start symbol if it is specified explicitly" do
|
132
|
+
@g.start.should == :expression
|
133
|
+
end
|
134
|
+
|
135
|
+
it "sets start symbol based on first production if it is not specified explicitly" do
|
136
|
+
@g.start.should == :expression
|
137
|
+
end
|
138
|
+
|
139
|
+
it "accepts only existing non-terminal as a start" do
|
140
|
+
g = Rly::Grammar.new([:NUMBER])
|
141
|
+
p = g.add_production(:expression, [:expression, '+', :expression])
|
142
|
+
expect { g.set_start(:NUMBER) } .to raise_error(ArgumentError)
|
143
|
+
expect { g.set_start(:new_sym) } .to raise_error(ArgumentError)
|
144
|
+
end
|
145
|
+
|
146
|
+
it "sets zero rule to :S' -> :start" do
|
147
|
+
prod_0 = @g.productions[0]
|
148
|
+
prod_0.index.should == 0
|
149
|
+
prod_0.name.should == :"S'"
|
150
|
+
prod_0.prod.should == [:expression]
|
151
|
+
end
|
152
|
+
|
153
|
+
it "adds 0 to start rule nonterminals" do
|
154
|
+
@g.nonterminals[:expression][-1].should == 0
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
context "LR table generation specs" do
|
159
|
+
before :each do
|
160
|
+
@g = Rly::Grammar.new([:NUMBER])
|
161
|
+
|
162
|
+
@g.set_precedence('+', :left, 1)
|
163
|
+
@g.set_precedence('-', :left, 1)
|
164
|
+
|
165
|
+
@g.add_production(:statement, [:expression])
|
166
|
+
@g.add_production(:expression, [:expression, '+', :expression])
|
167
|
+
@g.add_production(:expression, [:expression, '-', :expression])
|
168
|
+
@g.add_production(:expression, [:NUMBER])
|
169
|
+
|
170
|
+
@g.set_start
|
171
|
+
|
172
|
+
@g.build_lritems
|
173
|
+
end
|
174
|
+
|
175
|
+
it "builds LR items for grammar" do
|
176
|
+
@g.productions.length.should == 5
|
177
|
+
items = [2, 2, 4, 4, 2]
|
178
|
+
@g.productions.each_with_index do |p, i|
|
179
|
+
p.lr_items.count.should == items[i]
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
it "sets LR items to correct default values" do
|
184
|
+
i = @g.productions[0].lr_items[0]
|
185
|
+
i.lr_after.should == [@g.productions[1]]
|
186
|
+
i.prod.should == [:'.', :statement]
|
187
|
+
|
188
|
+
i = @g.productions[0].lr_items[1]
|
189
|
+
i.lr_after.should == []
|
190
|
+
i.prod.should == [:statement, :'.']
|
191
|
+
|
192
|
+
i = @g.productions[2].lr_items[0]
|
193
|
+
i.lr_after.should == @g.productions[2..4]
|
194
|
+
i.prod.should == [:'.', :expression, '+', :expression]
|
195
|
+
end
|
196
|
+
|
197
|
+
it "builds correct FIRST table" do
|
198
|
+
first = @g.compute_first
|
199
|
+
first.should == {
|
200
|
+
:'$end' => [:'$end'],
|
201
|
+
'+' => ['+'],
|
202
|
+
'-' => ['-'],
|
203
|
+
:NUMBER => [:NUMBER],
|
204
|
+
:error => [:error],
|
205
|
+
:expression => [:NUMBER],
|
206
|
+
:statement => [:NUMBER]
|
207
|
+
}
|
208
|
+
end
|
209
|
+
|
210
|
+
it "builds correct FOLLOW table" do
|
211
|
+
@g.compute_first
|
212
|
+
follow = @g.compute_follow
|
213
|
+
follow.should == { :expression => [:'$end', '+', '-'], :statement => [:'$end'] }
|
214
|
+
end
|
215
|
+
end
|
216
|
+
|
217
|
+
it "should generate parser.out same as Ply does" do
|
218
|
+
pending "thx to python dicts we have a different order of states. ideas?"
|
219
|
+
g = Rly::Grammar.new([:NUMBER])
|
220
|
+
|
221
|
+
g.set_precedence('+', :left, 1)
|
222
|
+
g.set_precedence('-', :left, 1)
|
223
|
+
|
224
|
+
g.add_production(:statement, [:expression])
|
225
|
+
g.add_production(:expression, [:expression, '+', :expression])
|
226
|
+
g.add_production(:expression, [:expression, '-', :expression])
|
227
|
+
g.add_production(:expression, [:NUMBER])
|
228
|
+
|
229
|
+
g.set_start
|
230
|
+
|
231
|
+
d = Rly::PlyDump.new(g)
|
232
|
+
orig = File.join(File.dirname(__FILE__), '..', 'fixtures', 'minicalc_ply_parser.out')
|
233
|
+
dst = File.join(File.dirname(__FILE__), '..', 'fixtures', 'minicalc_ply_parser.out.new')
|
234
|
+
|
235
|
+
open(dst, 'w') { |f| f.write(d.to_s) }
|
236
|
+
|
237
|
+
d.to_s.should == open(orig).read
|
238
|
+
end
|
239
|
+
end
|