rly 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,19 +1,19 @@
1
1
  require "rly"
2
2
 
3
3
  describe Rly::Lex do
4
- context "Simple Lexer" do
4
+ context "Basic lexer" do
5
5
  testLexer = Class.new(Rly::Lex) do
6
6
  token :FIRST, /[a-z]+/
7
7
  token :SECOND, /[A-Z]+/
8
8
  end
9
9
 
10
- it "should have a list of defined tokens" do
10
+ it "has a list of defined tokens" do
11
11
  testLexer.tokens.map { |t, r, b| t }.should == [:FIRST, :SECOND]
12
12
  end
13
13
 
14
- it "should output tokens one by one" do
14
+ it "outputs tokens one by one" do
15
15
  test = 'qweASDzxc'
16
- l = testLexer.new(test).to_enum
16
+ l = testLexer.new(test)
17
17
 
18
18
  tok = l.next
19
19
  tok.type.should == :FIRST
@@ -27,40 +27,48 @@ describe Rly::Lex do
27
27
  tok.type.should == :FIRST
28
28
  tok.value.should == 'zxc'
29
29
 
30
- expect { l.next } .to raise_error(StopIteration)
30
+ l.next.should be_nil
31
+ end
32
+
33
+ it "provides tokens in terminals list" do
34
+ testLexer.terminals.should == [:FIRST, :SECOND]
31
35
  end
32
36
  end
33
37
 
34
- context "Literals Lexer" do
38
+ context "Lexer with literals defined" do
35
39
  testLexer = Class.new(Rly::Lex) do
36
40
  literals "+-*/"
37
41
  end
38
42
 
39
- it "should output literal tokens" do
43
+ it "outputs literal tokens" do
40
44
  test = '++--'
41
- l = testLexer.new(test).to_enum
45
+ l = testLexer.new(test)
42
46
 
43
47
  l.next.value.should == '+'
44
48
  l.next.value.should == '+'
45
49
  l.next.value.should == '-'
46
50
  l.next.value.should == '-'
47
51
  end
52
+
53
+ it "provides literals in terminals list" do
54
+ testLexer.terminals.should == ['+', '-', '*', '/']
55
+ end
48
56
  end
49
57
 
50
- context "Ignores Lexer" do
58
+ context "Lexer with ignores defined" do
51
59
  testLexer = Class.new(Rly::Lex) do
52
60
  ignore " \t"
53
61
  end
54
62
 
55
- it "should honour ignores list" do
63
+ it "honours ignores list" do
56
64
  test = " \t\t \t \t"
57
- l = testLexer.new(test).to_enum
65
+ l = testLexer.new(test)
58
66
 
59
- expect { l.next } .to raise_error(StopIteration)
67
+ l.next.should be_nil
60
68
  end
61
69
  end
62
70
 
63
- context "Block-based Token Lexer" do
71
+ context "Lexer with token that has a block given" do
64
72
  testLexer = Class.new(Rly::Lex) do
65
73
  token :TEST, /\d+/ do |t|
66
74
  t.value = t.value.to_i
@@ -70,44 +78,46 @@ describe Rly::Lex do
70
78
 
71
79
  it "calls a block to further process a token" do
72
80
  test = "42"
73
- l = testLexer.new(test).to_enum
81
+ l = testLexer.new(test)
74
82
 
75
- l.next.value == 42
83
+ l.next.value.should == 42
76
84
  end
77
85
  end
78
86
 
79
- context "Non-outputtable tokens Lexer" do
87
+ context "Lexer with unnamed token and block given" do
80
88
  testLexer = Class.new(Rly::Lex) do
81
89
  token /\n+/ do |t| t.lexer.lineno = t.value.count("\n"); t end
82
90
  end
83
91
 
84
- it "process but don't output tokens without a name" do
92
+ it "processes but don't output tokens without a name" do
85
93
  test = "\n\n\n"
86
94
  l = testLexer.new(test)
87
95
 
88
- expect { l.to_enum.next } .to raise_error(StopIteration)
96
+ l.next.should be_nil
89
97
 
90
98
  l.lineno.should == 3
91
99
  end
92
100
  end
93
101
 
94
- context "Error handling" do
102
+ context "Lexer with no error handler" do
95
103
  it "raises an error, if there are no suitable tokens" do
96
104
  testLexer = Class.new(Rly::Lex) do
97
105
  token :NUM, /\d+/
98
106
  end
99
107
  l = testLexer.new("test")
100
108
 
101
- expect { l.to_enum.next } .to raise_error(Rly::LexError)
109
+ expect { l.next } .to raise_error(Rly::LexError)
102
110
  end
103
111
 
104
112
  it "raises an error, if there is no possible tokens defined" do
105
113
  testLexer = Class.new(Rly::Lex) do ; end
106
114
  l = testLexer.new("test")
107
115
 
108
- expect { l.to_enum.next } .to raise_error(Rly::LexError)
116
+ expect { l.next } .to raise_error(Rly::LexError)
109
117
  end
118
+ end
110
119
 
120
+ context "Lexer with error handler" do
111
121
  it "calls an error function if it is available, which returns a fixed token" do
112
122
  testLexer = Class.new(Rly::Lex) do
113
123
  token :NUM, /\d+/
@@ -119,11 +129,11 @@ describe Rly::Lex do
119
129
  end
120
130
  l = testLexer.new("test")
121
131
 
122
- tok = l.to_enum.next
132
+ tok = l.next
123
133
  tok.value.should == "BAD t"
124
134
  tok.type.should == :error
125
135
 
126
- tok = l.to_enum.next
136
+ tok = l.next
127
137
  tok.value.should == "BAD e"
128
138
  tok.type.should == :error
129
139
  end
@@ -138,7 +148,18 @@ describe Rly::Lex do
138
148
  end
139
149
  l = testLexer.new("test1")
140
150
 
141
- l.to_enum.next.value.should == '1'
151
+ l.next.value.should == '1'
142
152
  end
143
153
  end
154
+
155
+ it "doesn't try to skip chars over" do
156
+ testLexer = Class.new(Rly::Lex) do
157
+ token :NUM, /\d+/
158
+ literals ","
159
+ end
160
+ l = testLexer.new(",10")
161
+
162
+ l.next.type.should == ','
163
+ l.next.type.should == :NUM
164
+ end
144
165
  end
@@ -0,0 +1,95 @@
1
+ require "rly"
2
+
3
+ module CalcSpecExample
4
+ class CalcLex < Rly::Lex
5
+ literals '=+-*/()'
6
+ ignore " \t"
7
+
8
+ token :NAME, /[a-zA-Z_][a-zA-Z0-9_]*/
9
+
10
+ token :NUMBER, /\d+/ do |t|
11
+ t.value = t.value.to_i
12
+ t
13
+ end
14
+
15
+ token(/\n+/) { |t| t.lexer.lineno += t.value.count("\n") }
16
+
17
+ on_error do |t|
18
+ puts "Illegal character #{t.value}"
19
+ t.lexer.pos += 1
20
+ end
21
+ end
22
+
23
+ class CalcParse < Rly::Yacc
24
+ def names
25
+ @names ||= {}
26
+ end
27
+
28
+ precedence :left, '+', '-'
29
+ precedence :left, '*', '/'
30
+ precedence :right, :UMINUS
31
+
32
+ rule 'statement : NAME "=" expression' do |st, n, _, e|
33
+ self.names[n.value] = e.value
34
+ end
35
+
36
+ rule 'statement : expression' do |st, e|
37
+ st.value = e.value
38
+ end
39
+
40
+ rule 'expression : expression "+" expression
41
+ | expression "-" expression
42
+ | expression "*" expression
43
+ | expression "/" expression' do |ex, e1, op, e2|
44
+ ex.value = e1.value.send(op.value, e2.value)
45
+ end
46
+
47
+ # rule 'expression : "-" expression %prec UMINUS' do |ex, _, e|
48
+ # ex.value = - e.value
49
+ # end
50
+
51
+ rule 'expression : "(" expression ")"' do |ex, _, e, _|
52
+ ex.value = e.value
53
+ end
54
+
55
+ rule 'expression : NUMBER' do |ex, n|
56
+ ex.value = n.value
57
+ end
58
+
59
+ rule 'expression : NAME' do |ex, n|
60
+ nval = self.names[n.value]
61
+ unless nval
62
+ puts "Undefined name '#{n.value}'"
63
+ nval = 0
64
+ end
65
+ ex.value = nval
66
+ end
67
+
68
+ # rule_error do |p|
69
+ # if p
70
+ # puts "Syntax error at '#{p.value}'"
71
+ # else
72
+ # puts "Syntax error at EOF"
73
+ # end
74
+ # end
75
+ end
76
+ end
77
+
78
+ describe 'Calculator' do
79
+ before :each do
80
+ @calc = CalcSpecExample::CalcParse.new(CalcSpecExample::CalcLex.new)
81
+ end
82
+
83
+ it "calculates simple expressions" do
84
+ @calc.parse('2 + 2').should == 4
85
+ end
86
+
87
+ it "calculates complex expressions" do
88
+ @calc.parse('(3-1)*6/(3+1)').should == 3
89
+ end
90
+
91
+ it "keeps state between parses" do
92
+ @calc.parse('magic = 42')
93
+ @calc.parse('2 * magic').should == 84
94
+ end
95
+ end
@@ -0,0 +1,239 @@
1
+ require "rly"
2
+ require "rly/parse/grammar"
3
+ require "rly/parse/ply_dump"
4
+
5
+ describe Rly::Grammar do
6
+ it "requires a list of terminals to be initialized" do
7
+ g = Rly::Grammar.new([:NUMBER])
8
+ g.terminals[:NUMBER].should_not be_nil
9
+ end
10
+
11
+ it "rejects terminals named in lowercase" do
12
+ expect { Rly::Grammar.new([:test]) } .to raise_error(ArgumentError)
13
+ end
14
+
15
+ it "has a default terminal -- error" do
16
+ g = Rly::Grammar.new([])
17
+ g.terminals[:error].should_not be_nil
18
+ end
19
+
20
+ context "Precedence specs" do
21
+ it "allows to set precedence" do
22
+ g = Rly::Grammar.new([])
23
+ g.set_precedence('+', :left, 1)
24
+ end
25
+
26
+ it "does not allow to set precedence after any productions have been added" do
27
+ g = Rly::Grammar.new([])
28
+ g.add_production(:expression, [:expression, '+', :expression])
29
+ expect { g.set_precedence('+', :left, 1) } .to raise_error(RuntimeError)
30
+ end
31
+
32
+ it "does not allow setting precedence several times for same terminal" do
33
+ g = Rly::Grammar.new([])
34
+ g.set_precedence('+', :left, 1)
35
+ expect { g.set_precedence('+', :left, 1) } .to raise_error(ArgumentError)
36
+ end
37
+
38
+ it "allows setting only :left, :right or :noassoc precedence associations" do
39
+ g = Rly::Grammar.new([])
40
+ expect { g.set_precedence('+', :bad, 1) } .to raise_error(ArgumentError)
41
+ end
42
+ end
43
+
44
+ context "Production specs" do
45
+ it "returns a Production object when adding production" do
46
+ g = Rly::Grammar.new([])
47
+ p = g.add_production(:expression, [:expression, '+', :expression])
48
+ p.should be_a(Rly::Production)
49
+ end
50
+
51
+ it "rejects productions not named in lowercase" do
52
+ g = Rly::Grammar.new([])
53
+ expect { g.add_production(:BAD, []) } .to raise_error(ArgumentError)
54
+ end
55
+
56
+ it "rejects production named :error" do
57
+ g = Rly::Grammar.new([])
58
+ expect { g.add_production(:error, []) } .to raise_error(ArgumentError)
59
+ end
60
+
61
+ it "registers one-char terminals" do
62
+ g = Rly::Grammar.new([])
63
+ g.add_production(:expression, [:expression, '+', :expression])
64
+ g.terminals['+'].should_not be_nil
65
+ end
66
+
67
+ it "raises ArgumentError if one-char terminal is not actually an one char" do
68
+ g = Rly::Grammar.new([])
69
+ expect { g.add_production(:expression, [:expression, 'lulz', :expression]) } .to raise_error(ArgumentError)
70
+ end
71
+
72
+ it "calculates production precedence based on rightmost terminal" do
73
+ g = Rly::Grammar.new([])
74
+ g.set_precedence('+', :left, 1)
75
+ p = g.add_production(:expression, [:expression, '+', :expression])
76
+ p.precedence.should == [:left, 1]
77
+ end
78
+
79
+ it "defaults precedence to [:right, 0]" do
80
+ g = Rly::Grammar.new([])
81
+ p = g.add_production(:expression, [:expression, '+', :expression])
82
+ p.precedence.should == [:right, 0]
83
+ end
84
+
85
+ it "adds production to the list of productions" do
86
+ g = Rly::Grammar.new([])
87
+ p = g.add_production(:expression, [:expression, '+', :expression])
88
+ g.productions.count.should == 2
89
+ g.productions.last == p
90
+ end
91
+
92
+ it "adds production to the list of productions referenced by names" do
93
+ g = Rly::Grammar.new([])
94
+ p = g.add_production(:expression, [:expression, '+', :expression])
95
+ g.prodnames.count.should == 1
96
+ g.prodnames[:expression].should == [p]
97
+ end
98
+
99
+ it "adds production to the list of non-terminals" do
100
+ g = Rly::Grammar.new([])
101
+ p = g.add_production(:expression, [:expression, '+', :expression])
102
+ g.nonterminals[:expression].should_not be_nil
103
+ end
104
+
105
+ it "adds production number to referenced terminals" do
106
+ g = Rly::Grammar.new([])
107
+ p = g.add_production(:expression, [:expression, '+', :expression])
108
+ g.terminals['+'].should == [p.index]
109
+ end
110
+
111
+ it "adds production number to referenced non-terminals" do
112
+ g = Rly::Grammar.new([])
113
+ p = g.add_production(:expression, [:expression, '+', :expression])
114
+ g.nonterminals[:expression].should == [p.index, p.index]
115
+ end
116
+
117
+ it "does not allow duplicate rules" do
118
+ g = Rly::Grammar.new([])
119
+ g.add_production(:expression, [:expression, '+', :expression])
120
+ expect { g.add_production(:expression, [:expression, '+', :expression]) } .to raise_error(ArgumentError)
121
+ end
122
+ end
123
+
124
+ context "Start symbol specs" do
125
+ before :each do
126
+ @g = Rly::Grammar.new([])
127
+ p = @g.add_production(:expression, [:expression, '+', :expression])
128
+ @g.set_start()
129
+ end
130
+
131
+ it "sets start symbol if it is specified explicitly" do
132
+ @g.start.should == :expression
133
+ end
134
+
135
+ it "sets start symbol based on first production if it is not specified explicitly" do
136
+ @g.start.should == :expression
137
+ end
138
+
139
+ it "accepts only existing non-terminal as a start" do
140
+ g = Rly::Grammar.new([:NUMBER])
141
+ p = g.add_production(:expression, [:expression, '+', :expression])
142
+ expect { g.set_start(:NUMBER) } .to raise_error(ArgumentError)
143
+ expect { g.set_start(:new_sym) } .to raise_error(ArgumentError)
144
+ end
145
+
146
+ it "sets zero rule to :S' -> :start" do
147
+ prod_0 = @g.productions[0]
148
+ prod_0.index.should == 0
149
+ prod_0.name.should == :"S'"
150
+ prod_0.prod.should == [:expression]
151
+ end
152
+
153
+ it "adds 0 to start rule nonterminals" do
154
+ @g.nonterminals[:expression][-1].should == 0
155
+ end
156
+ end
157
+
158
+ context "LR table generation specs" do
159
+ before :each do
160
+ @g = Rly::Grammar.new([:NUMBER])
161
+
162
+ @g.set_precedence('+', :left, 1)
163
+ @g.set_precedence('-', :left, 1)
164
+
165
+ @g.add_production(:statement, [:expression])
166
+ @g.add_production(:expression, [:expression, '+', :expression])
167
+ @g.add_production(:expression, [:expression, '-', :expression])
168
+ @g.add_production(:expression, [:NUMBER])
169
+
170
+ @g.set_start
171
+
172
+ @g.build_lritems
173
+ end
174
+
175
+ it "builds LR items for grammar" do
176
+ @g.productions.length.should == 5
177
+ items = [2, 2, 4, 4, 2]
178
+ @g.productions.each_with_index do |p, i|
179
+ p.lr_items.count.should == items[i]
180
+ end
181
+ end
182
+
183
+ it "sets LR items to correct default values" do
184
+ i = @g.productions[0].lr_items[0]
185
+ i.lr_after.should == [@g.productions[1]]
186
+ i.prod.should == [:'.', :statement]
187
+
188
+ i = @g.productions[0].lr_items[1]
189
+ i.lr_after.should == []
190
+ i.prod.should == [:statement, :'.']
191
+
192
+ i = @g.productions[2].lr_items[0]
193
+ i.lr_after.should == @g.productions[2..4]
194
+ i.prod.should == [:'.', :expression, '+', :expression]
195
+ end
196
+
197
+ it "builds correct FIRST table" do
198
+ first = @g.compute_first
199
+ first.should == {
200
+ :'$end' => [:'$end'],
201
+ '+' => ['+'],
202
+ '-' => ['-'],
203
+ :NUMBER => [:NUMBER],
204
+ :error => [:error],
205
+ :expression => [:NUMBER],
206
+ :statement => [:NUMBER]
207
+ }
208
+ end
209
+
210
+ it "builds correct FOLLOW table" do
211
+ @g.compute_first
212
+ follow = @g.compute_follow
213
+ follow.should == { :expression => [:'$end', '+', '-'], :statement => [:'$end'] }
214
+ end
215
+ end
216
+
217
+ it "should generate parser.out same as Ply does" do
218
+ pending "thx to python dicts we have a different order of states. ideas?"
219
+ g = Rly::Grammar.new([:NUMBER])
220
+
221
+ g.set_precedence('+', :left, 1)
222
+ g.set_precedence('-', :left, 1)
223
+
224
+ g.add_production(:statement, [:expression])
225
+ g.add_production(:expression, [:expression, '+', :expression])
226
+ g.add_production(:expression, [:expression, '-', :expression])
227
+ g.add_production(:expression, [:NUMBER])
228
+
229
+ g.set_start
230
+
231
+ d = Rly::PlyDump.new(g)
232
+ orig = File.join(File.dirname(__FILE__), '..', 'fixtures', 'minicalc_ply_parser.out')
233
+ dst = File.join(File.dirname(__FILE__), '..', 'fixtures', 'minicalc_ply_parser.out.new')
234
+
235
+ open(dst, 'w') { |f| f.write(d.to_s) }
236
+
237
+ d.to_s.should == open(orig).read
238
+ end
239
+ end