rly 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,19 +1,19 @@
1
1
  require "rly"
2
2
 
3
3
  describe Rly::Lex do
4
- context "Simple Lexer" do
4
+ context "Basic lexer" do
5
5
  testLexer = Class.new(Rly::Lex) do
6
6
  token :FIRST, /[a-z]+/
7
7
  token :SECOND, /[A-Z]+/
8
8
  end
9
9
 
10
- it "should have a list of defined tokens" do
10
+ it "has a list of defined tokens" do
11
11
  testLexer.tokens.map { |t, r, b| t }.should == [:FIRST, :SECOND]
12
12
  end
13
13
 
14
- it "should output tokens one by one" do
14
+ it "outputs tokens one by one" do
15
15
  test = 'qweASDzxc'
16
- l = testLexer.new(test).to_enum
16
+ l = testLexer.new(test)
17
17
 
18
18
  tok = l.next
19
19
  tok.type.should == :FIRST
@@ -27,40 +27,48 @@ describe Rly::Lex do
27
27
  tok.type.should == :FIRST
28
28
  tok.value.should == 'zxc'
29
29
 
30
- expect { l.next } .to raise_error(StopIteration)
30
+ l.next.should be_nil
31
+ end
32
+
33
+ it "provides tokens in terminals list" do
34
+ testLexer.terminals.should == [:FIRST, :SECOND]
31
35
  end
32
36
  end
33
37
 
34
- context "Literals Lexer" do
38
+ context "Lexer with literals defined" do
35
39
  testLexer = Class.new(Rly::Lex) do
36
40
  literals "+-*/"
37
41
  end
38
42
 
39
- it "should output literal tokens" do
43
+ it "outputs literal tokens" do
40
44
  test = '++--'
41
- l = testLexer.new(test).to_enum
45
+ l = testLexer.new(test)
42
46
 
43
47
  l.next.value.should == '+'
44
48
  l.next.value.should == '+'
45
49
  l.next.value.should == '-'
46
50
  l.next.value.should == '-'
47
51
  end
52
+
53
+ it "provides literals in terminals list" do
54
+ testLexer.terminals.should == ['+', '-', '*', '/']
55
+ end
48
56
  end
49
57
 
50
- context "Ignores Lexer" do
58
+ context "Lexer with ignores defined" do
51
59
  testLexer = Class.new(Rly::Lex) do
52
60
  ignore " \t"
53
61
  end
54
62
 
55
- it "should honour ignores list" do
63
+ it "honours ignores list" do
56
64
  test = " \t\t \t \t"
57
- l = testLexer.new(test).to_enum
65
+ l = testLexer.new(test)
58
66
 
59
- expect { l.next } .to raise_error(StopIteration)
67
+ l.next.should be_nil
60
68
  end
61
69
  end
62
70
 
63
- context "Block-based Token Lexer" do
71
+ context "Lexer with token that has a block given" do
64
72
  testLexer = Class.new(Rly::Lex) do
65
73
  token :TEST, /\d+/ do |t|
66
74
  t.value = t.value.to_i
@@ -70,44 +78,46 @@ describe Rly::Lex do
70
78
 
71
79
  it "calls a block to further process a token" do
72
80
  test = "42"
73
- l = testLexer.new(test).to_enum
81
+ l = testLexer.new(test)
74
82
 
75
- l.next.value == 42
83
+ l.next.value.should == 42
76
84
  end
77
85
  end
78
86
 
79
- context "Non-outputtable tokens Lexer" do
87
+ context "Lexer with unnamed token and block given" do
80
88
  testLexer = Class.new(Rly::Lex) do
81
89
  token /\n+/ do |t| t.lexer.lineno = t.value.count("\n"); t end
82
90
  end
83
91
 
84
- it "process but don't output tokens without a name" do
92
+ it "processes but don't output tokens without a name" do
85
93
  test = "\n\n\n"
86
94
  l = testLexer.new(test)
87
95
 
88
- expect { l.to_enum.next } .to raise_error(StopIteration)
96
+ l.next.should be_nil
89
97
 
90
98
  l.lineno.should == 3
91
99
  end
92
100
  end
93
101
 
94
- context "Error handling" do
102
+ context "Lexer with no error handler" do
95
103
  it "raises an error, if there are no suitable tokens" do
96
104
  testLexer = Class.new(Rly::Lex) do
97
105
  token :NUM, /\d+/
98
106
  end
99
107
  l = testLexer.new("test")
100
108
 
101
- expect { l.to_enum.next } .to raise_error(Rly::LexError)
109
+ expect { l.next } .to raise_error(Rly::LexError)
102
110
  end
103
111
 
104
112
  it "raises an error, if there is no possible tokens defined" do
105
113
  testLexer = Class.new(Rly::Lex) do ; end
106
114
  l = testLexer.new("test")
107
115
 
108
- expect { l.to_enum.next } .to raise_error(Rly::LexError)
116
+ expect { l.next } .to raise_error(Rly::LexError)
109
117
  end
118
+ end
110
119
 
120
+ context "Lexer with error handler" do
111
121
  it "calls an error function if it is available, which returns a fixed token" do
112
122
  testLexer = Class.new(Rly::Lex) do
113
123
  token :NUM, /\d+/
@@ -119,11 +129,11 @@ describe Rly::Lex do
119
129
  end
120
130
  l = testLexer.new("test")
121
131
 
122
- tok = l.to_enum.next
132
+ tok = l.next
123
133
  tok.value.should == "BAD t"
124
134
  tok.type.should == :error
125
135
 
126
- tok = l.to_enum.next
136
+ tok = l.next
127
137
  tok.value.should == "BAD e"
128
138
  tok.type.should == :error
129
139
  end
@@ -138,7 +148,18 @@ describe Rly::Lex do
138
148
  end
139
149
  l = testLexer.new("test1")
140
150
 
141
- l.to_enum.next.value.should == '1'
151
+ l.next.value.should == '1'
142
152
  end
143
153
  end
154
+
155
+ it "doesn't try to skip chars over" do
156
+ testLexer = Class.new(Rly::Lex) do
157
+ token :NUM, /\d+/
158
+ literals ","
159
+ end
160
+ l = testLexer.new(",10")
161
+
162
+ l.next.type.should == ','
163
+ l.next.type.should == :NUM
164
+ end
144
165
  end
@@ -0,0 +1,95 @@
1
+ require "rly"
2
+
3
+ module CalcSpecExample
4
+ class CalcLex < Rly::Lex
5
+ literals '=+-*/()'
6
+ ignore " \t"
7
+
8
+ token :NAME, /[a-zA-Z_][a-zA-Z0-9_]*/
9
+
10
+ token :NUMBER, /\d+/ do |t|
11
+ t.value = t.value.to_i
12
+ t
13
+ end
14
+
15
+ token(/\n+/) { |t| t.lexer.lineno += t.value.count("\n") }
16
+
17
+ on_error do |t|
18
+ puts "Illegal character #{t.value}"
19
+ t.lexer.pos += 1
20
+ end
21
+ end
22
+
23
+ class CalcParse < Rly::Yacc
24
+ def names
25
+ @names ||= {}
26
+ end
27
+
28
+ precedence :left, '+', '-'
29
+ precedence :left, '*', '/'
30
+ precedence :right, :UMINUS
31
+
32
+ rule 'statement : NAME "=" expression' do |st, n, _, e|
33
+ self.names[n.value] = e.value
34
+ end
35
+
36
+ rule 'statement : expression' do |st, e|
37
+ st.value = e.value
38
+ end
39
+
40
+ rule 'expression : expression "+" expression
41
+ | expression "-" expression
42
+ | expression "*" expression
43
+ | expression "/" expression' do |ex, e1, op, e2|
44
+ ex.value = e1.value.send(op.value, e2.value)
45
+ end
46
+
47
+ # rule 'expression : "-" expression %prec UMINUS' do |ex, _, e|
48
+ # ex.value = - e.value
49
+ # end
50
+
51
+ rule 'expression : "(" expression ")"' do |ex, _, e, _|
52
+ ex.value = e.value
53
+ end
54
+
55
+ rule 'expression : NUMBER' do |ex, n|
56
+ ex.value = n.value
57
+ end
58
+
59
+ rule 'expression : NAME' do |ex, n|
60
+ nval = self.names[n.value]
61
+ unless nval
62
+ puts "Undefined name '#{n.value}'"
63
+ nval = 0
64
+ end
65
+ ex.value = nval
66
+ end
67
+
68
+ # rule_error do |p|
69
+ # if p
70
+ # puts "Syntax error at '#{p.value}'"
71
+ # else
72
+ # puts "Syntax error at EOF"
73
+ # end
74
+ # end
75
+ end
76
+ end
77
+
78
+ describe 'Calculator' do
79
+ before :each do
80
+ @calc = CalcSpecExample::CalcParse.new(CalcSpecExample::CalcLex.new)
81
+ end
82
+
83
+ it "calculates simple expressions" do
84
+ @calc.parse('2 + 2').should == 4
85
+ end
86
+
87
+ it "calculates complex expressions" do
88
+ @calc.parse('(3-1)*6/(3+1)').should == 3
89
+ end
90
+
91
+ it "keeps state between parses" do
92
+ @calc.parse('magic = 42')
93
+ @calc.parse('2 * magic').should == 84
94
+ end
95
+ end
@@ -0,0 +1,239 @@
1
+ require "rly"
2
+ require "rly/parse/grammar"
3
+ require "rly/parse/ply_dump"
4
+
5
+ describe Rly::Grammar do
6
+ it "requires a list of terminals to be initialized" do
7
+ g = Rly::Grammar.new([:NUMBER])
8
+ g.terminals[:NUMBER].should_not be_nil
9
+ end
10
+
11
+ it "rejects terminals named in lowercase" do
12
+ expect { Rly::Grammar.new([:test]) } .to raise_error(ArgumentError)
13
+ end
14
+
15
+ it "has a default terminal -- error" do
16
+ g = Rly::Grammar.new([])
17
+ g.terminals[:error].should_not be_nil
18
+ end
19
+
20
+ context "Precedence specs" do
21
+ it "allows to set precedence" do
22
+ g = Rly::Grammar.new([])
23
+ g.set_precedence('+', :left, 1)
24
+ end
25
+
26
+ it "does not allow to set precedence after any productions have been added" do
27
+ g = Rly::Grammar.new([])
28
+ g.add_production(:expression, [:expression, '+', :expression])
29
+ expect { g.set_precedence('+', :left, 1) } .to raise_error(RuntimeError)
30
+ end
31
+
32
+ it "does not allow setting precedence several times for same terminal" do
33
+ g = Rly::Grammar.new([])
34
+ g.set_precedence('+', :left, 1)
35
+ expect { g.set_precedence('+', :left, 1) } .to raise_error(ArgumentError)
36
+ end
37
+
38
+ it "allows setting only :left, :right or :noassoc precedence associations" do
39
+ g = Rly::Grammar.new([])
40
+ expect { g.set_precedence('+', :bad, 1) } .to raise_error(ArgumentError)
41
+ end
42
+ end
43
+
44
+ context "Production specs" do
45
+ it "returns a Production object when adding production" do
46
+ g = Rly::Grammar.new([])
47
+ p = g.add_production(:expression, [:expression, '+', :expression])
48
+ p.should be_a(Rly::Production)
49
+ end
50
+
51
+ it "rejects productions not named in lowercase" do
52
+ g = Rly::Grammar.new([])
53
+ expect { g.add_production(:BAD, []) } .to raise_error(ArgumentError)
54
+ end
55
+
56
+ it "rejects production named :error" do
57
+ g = Rly::Grammar.new([])
58
+ expect { g.add_production(:error, []) } .to raise_error(ArgumentError)
59
+ end
60
+
61
+ it "registers one-char terminals" do
62
+ g = Rly::Grammar.new([])
63
+ g.add_production(:expression, [:expression, '+', :expression])
64
+ g.terminals['+'].should_not be_nil
65
+ end
66
+
67
+ it "raises ArgumentError if one-char terminal is not actually an one char" do
68
+ g = Rly::Grammar.new([])
69
+ expect { g.add_production(:expression, [:expression, 'lulz', :expression]) } .to raise_error(ArgumentError)
70
+ end
71
+
72
+ it "calculates production precedence based on rightmost terminal" do
73
+ g = Rly::Grammar.new([])
74
+ g.set_precedence('+', :left, 1)
75
+ p = g.add_production(:expression, [:expression, '+', :expression])
76
+ p.precedence.should == [:left, 1]
77
+ end
78
+
79
+ it "defaults precedence to [:right, 0]" do
80
+ g = Rly::Grammar.new([])
81
+ p = g.add_production(:expression, [:expression, '+', :expression])
82
+ p.precedence.should == [:right, 0]
83
+ end
84
+
85
+ it "adds production to the list of productions" do
86
+ g = Rly::Grammar.new([])
87
+ p = g.add_production(:expression, [:expression, '+', :expression])
88
+ g.productions.count.should == 2
89
+ g.productions.last == p
90
+ end
91
+
92
+ it "adds production to the list of productions referenced by names" do
93
+ g = Rly::Grammar.new([])
94
+ p = g.add_production(:expression, [:expression, '+', :expression])
95
+ g.prodnames.count.should == 1
96
+ g.prodnames[:expression].should == [p]
97
+ end
98
+
99
+ it "adds production to the list of non-terminals" do
100
+ g = Rly::Grammar.new([])
101
+ p = g.add_production(:expression, [:expression, '+', :expression])
102
+ g.nonterminals[:expression].should_not be_nil
103
+ end
104
+
105
+ it "adds production number to referenced terminals" do
106
+ g = Rly::Grammar.new([])
107
+ p = g.add_production(:expression, [:expression, '+', :expression])
108
+ g.terminals['+'].should == [p.index]
109
+ end
110
+
111
+ it "adds production number to referenced non-terminals" do
112
+ g = Rly::Grammar.new([])
113
+ p = g.add_production(:expression, [:expression, '+', :expression])
114
+ g.nonterminals[:expression].should == [p.index, p.index]
115
+ end
116
+
117
+ it "does not allow duplicate rules" do
118
+ g = Rly::Grammar.new([])
119
+ g.add_production(:expression, [:expression, '+', :expression])
120
+ expect { g.add_production(:expression, [:expression, '+', :expression]) } .to raise_error(ArgumentError)
121
+ end
122
+ end
123
+
124
+ context "Start symbol specs" do
125
+ before :each do
126
+ @g = Rly::Grammar.new([])
127
+ p = @g.add_production(:expression, [:expression, '+', :expression])
128
+ @g.set_start()
129
+ end
130
+
131
+ it "sets start symbol if it is specified explicitly" do
132
+ @g.start.should == :expression
133
+ end
134
+
135
+ it "sets start symbol based on first production if it is not specified explicitly" do
136
+ @g.start.should == :expression
137
+ end
138
+
139
+ it "accepts only existing non-terminal as a start" do
140
+ g = Rly::Grammar.new([:NUMBER])
141
+ p = g.add_production(:expression, [:expression, '+', :expression])
142
+ expect { g.set_start(:NUMBER) } .to raise_error(ArgumentError)
143
+ expect { g.set_start(:new_sym) } .to raise_error(ArgumentError)
144
+ end
145
+
146
+ it "sets zero rule to :S' -> :start" do
147
+ prod_0 = @g.productions[0]
148
+ prod_0.index.should == 0
149
+ prod_0.name.should == :"S'"
150
+ prod_0.prod.should == [:expression]
151
+ end
152
+
153
+ it "adds 0 to start rule nonterminals" do
154
+ @g.nonterminals[:expression][-1].should == 0
155
+ end
156
+ end
157
+
158
+ context "LR table generation specs" do
159
+ before :each do
160
+ @g = Rly::Grammar.new([:NUMBER])
161
+
162
+ @g.set_precedence('+', :left, 1)
163
+ @g.set_precedence('-', :left, 1)
164
+
165
+ @g.add_production(:statement, [:expression])
166
+ @g.add_production(:expression, [:expression, '+', :expression])
167
+ @g.add_production(:expression, [:expression, '-', :expression])
168
+ @g.add_production(:expression, [:NUMBER])
169
+
170
+ @g.set_start
171
+
172
+ @g.build_lritems
173
+ end
174
+
175
+ it "builds LR items for grammar" do
176
+ @g.productions.length.should == 5
177
+ items = [2, 2, 4, 4, 2]
178
+ @g.productions.each_with_index do |p, i|
179
+ p.lr_items.count.should == items[i]
180
+ end
181
+ end
182
+
183
+ it "sets LR items to correct default values" do
184
+ i = @g.productions[0].lr_items[0]
185
+ i.lr_after.should == [@g.productions[1]]
186
+ i.prod.should == [:'.', :statement]
187
+
188
+ i = @g.productions[0].lr_items[1]
189
+ i.lr_after.should == []
190
+ i.prod.should == [:statement, :'.']
191
+
192
+ i = @g.productions[2].lr_items[0]
193
+ i.lr_after.should == @g.productions[2..4]
194
+ i.prod.should == [:'.', :expression, '+', :expression]
195
+ end
196
+
197
+ it "builds correct FIRST table" do
198
+ first = @g.compute_first
199
+ first.should == {
200
+ :'$end' => [:'$end'],
201
+ '+' => ['+'],
202
+ '-' => ['-'],
203
+ :NUMBER => [:NUMBER],
204
+ :error => [:error],
205
+ :expression => [:NUMBER],
206
+ :statement => [:NUMBER]
207
+ }
208
+ end
209
+
210
+ it "builds correct FOLLOW table" do
211
+ @g.compute_first
212
+ follow = @g.compute_follow
213
+ follow.should == { :expression => [:'$end', '+', '-'], :statement => [:'$end'] }
214
+ end
215
+ end
216
+
217
+ it "should generate parser.out same as Ply does" do
218
+ pending "thx to python dicts we have a different order of states. ideas?"
219
+ g = Rly::Grammar.new([:NUMBER])
220
+
221
+ g.set_precedence('+', :left, 1)
222
+ g.set_precedence('-', :left, 1)
223
+
224
+ g.add_production(:statement, [:expression])
225
+ g.add_production(:expression, [:expression, '+', :expression])
226
+ g.add_production(:expression, [:expression, '-', :expression])
227
+ g.add_production(:expression, [:NUMBER])
228
+
229
+ g.set_start
230
+
231
+ d = Rly::PlyDump.new(g)
232
+ orig = File.join(File.dirname(__FILE__), '..', 'fixtures', 'minicalc_ply_parser.out')
233
+ dst = File.join(File.dirname(__FILE__), '..', 'fixtures', 'minicalc_ply_parser.out.new')
234
+
235
+ open(dst, 'w') { |f| f.write(d.to_s) }
236
+
237
+ d.to_s.should == open(orig).read
238
+ end
239
+ end