ast_ast 0.0.0 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +1 -1
- data/README.md +103 -33
- data/Rakefile +5 -48
- data/lib/ast_ast.rb +6 -6
- data/lib/ast_ast/ast.rb +162 -1
- data/lib/ast_ast/bnf.rb +187 -0
- data/lib/ast_ast/token.rb +51 -8
- data/lib/ast_ast/tokeniser.rb +35 -13
- data/lib/ast_ast/tokens.rb +308 -6
- data/lib/ast_ast/tree.rb +74 -2
- data/lib/ast_ast/version.rb +3 -0
- data/spec/ast_ast/token_spec.rb +62 -0
- data/spec/ast_ast/tokeniser_spec.rb +101 -0
- data/spec/ast_ast/tokens_spec.rb +329 -0
- data/spec/spec_helper.rb +11 -0
- metadata +27 -45
- data/.document +0 -5
- data/.gitignore +0 -24
- data/VERSION +0 -1
- data/lib/ast_tokens.rb +0 -8
- data/test/helper.rb +0 -10
- data/test/test_ast_ast.rb +0 -5
- data/test/test_tokeniser.rb +0 -13
data/lib/ast_ast/tree.rb
CHANGED
@@ -1,5 +1,77 @@
|
|
1
1
|
module Ast
|
2
|
-
|
3
|
-
|
2
|
+
# Trees are similar to tokens, in that they have a pointer but trees
|
3
|
+
# are meant to be traversed. They can have branches (Trees within Tress).
|
4
|
+
class Tree < Array
|
5
|
+
attr_accessor :pos
|
6
|
+
|
7
|
+
def initialize(*args)
|
8
|
+
@pos = 0
|
9
|
+
super
|
10
|
+
end
|
11
|
+
|
12
|
+
def inspect
|
13
|
+
"{ #{self.to_s[1..-2]} }"
|
14
|
+
end
|
15
|
+
|
16
|
+
|
17
|
+
# @group Scanning/Checking/Skipping
|
18
|
+
|
19
|
+
def pos
|
20
|
+
@pos ||= 0
|
21
|
+
end
|
22
|
+
|
23
|
+
def inc
|
24
|
+
@pos += 1 unless self.eot?
|
25
|
+
end
|
26
|
+
|
27
|
+
def dec
|
28
|
+
@pos -= 1 unless @pos == 1
|
29
|
+
end
|
30
|
+
|
31
|
+
# @return [Token] the current token being 'pointed' to
|
32
|
+
def pointer
|
33
|
+
self[pos]
|
34
|
+
end
|
35
|
+
alias_method :curr_item, :pointer
|
36
|
+
|
37
|
+
# @return [boolean] whether at end of tokens
|
38
|
+
def eot?
|
39
|
+
pos >= self.size
|
40
|
+
end
|
41
|
+
|
42
|
+
def scan(type=nil)
|
43
|
+
a = self.check(type)
|
44
|
+
self.inc
|
45
|
+
a
|
46
|
+
end
|
47
|
+
|
48
|
+
def rest
|
49
|
+
self[@pos..-1]
|
50
|
+
end
|
51
|
+
|
52
|
+
def check(type=nil)
|
53
|
+
if type.nil?
|
54
|
+
self.pointer
|
55
|
+
else
|
56
|
+
if self.pointer.type == type
|
57
|
+
self.pointer
|
58
|
+
else
|
59
|
+
raise Error, "wrong type: #{type} for #{self.pointer}"
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def skip(type=nil)
|
65
|
+
if type.nil?
|
66
|
+
self.inc
|
67
|
+
else
|
68
|
+
if self.pointer.type == type
|
69
|
+
self.inc
|
70
|
+
else
|
71
|
+
raise Error, "wrong type: #{type} for #{self.pointer}"
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
4
76
|
end
|
5
77
|
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Ast::Token do
|
4
|
+
|
5
|
+
describe ".valid?" do
|
6
|
+
subject { Ast::Token }
|
7
|
+
|
8
|
+
it "returns false when given 3 item array" do
|
9
|
+
subject.valid?([:a, 'b', 1]).should be_false
|
10
|
+
end
|
11
|
+
|
12
|
+
it "returns false when 1st item is not symbol" do
|
13
|
+
subject.valid?(['a', 'b']).should be_false
|
14
|
+
end
|
15
|
+
|
16
|
+
it "returns false when given empty array" do
|
17
|
+
subject.valid?([]).should be_false
|
18
|
+
end
|
19
|
+
|
20
|
+
it "returns true when given [symbol, object]" do
|
21
|
+
subject.valid?([:a, 'b']).should be_true
|
22
|
+
end
|
23
|
+
|
24
|
+
it "returns true when given a Token" do
|
25
|
+
subject.valid?(Ast::Token.new(:a, 'b')).should be_true
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
|
30
|
+
context "when token has value" do
|
31
|
+
subject { Ast::Token.new(:a, 'b') }
|
32
|
+
|
33
|
+
describe "#to_s" do
|
34
|
+
it "shows type and value" do
|
35
|
+
subject.to_s.should == "<:a, \"b\">"
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
describe "#to_a" do
|
40
|
+
it "returns array with type and value" do
|
41
|
+
subject.to_a.should == [:a, 'b']
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
context "when token has no value" do
|
47
|
+
subject { Ast::Token.new(:a, nil) }
|
48
|
+
|
49
|
+
describe "#to_s" do
|
50
|
+
it "shows only type" do
|
51
|
+
subject.to_s.should == "<:a>"
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
describe "#to_a" do
|
56
|
+
it "returns array with only type" do
|
57
|
+
subject.to_a.should == [:a]
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
end
|
@@ -0,0 +1,101 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Ast::Tokeniser::Rule do
|
4
|
+
subject { Ast::Tokeniser::Rule.new(:test, /test/) }
|
5
|
+
|
6
|
+
describe "#name" do
|
7
|
+
specify { subject.name.should be_kind_of Symbol }
|
8
|
+
end
|
9
|
+
|
10
|
+
describe "#regex" do
|
11
|
+
specify { subject.regex.should be_kind_of Regexp }
|
12
|
+
end
|
13
|
+
|
14
|
+
describe "#block" do
|
15
|
+
specify { subject.block.should be_kind_of Proc }
|
16
|
+
context "when no block is given" do
|
17
|
+
it "use default proc which returns argument" do
|
18
|
+
subject.block.call(1).should == 1
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
describe "#run" do
|
24
|
+
|
25
|
+
context "when returning a string" do
|
26
|
+
subject { Ast::Tokeniser::Rule.new(:rword, /[a-z]+/) {|i| i.reverse } }
|
27
|
+
|
28
|
+
it "runs the block" do
|
29
|
+
subject.run("hello").should == "olleh"
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
context "when returning an array" do
|
34
|
+
subject { Ast::Tokeniser::Rule.new(:letter, /[a-z]+/) {|i| i.split('') } }
|
35
|
+
|
36
|
+
it "runs the block" do
|
37
|
+
subject.run("hello").should == %w(h e l l o)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
describe Ast::Tokeniser do
|
44
|
+
|
45
|
+
describe ".rule" do
|
46
|
+
|
47
|
+
class Klass1 < Ast::Tokeniser
|
48
|
+
rule :over, /b/
|
49
|
+
end
|
50
|
+
|
51
|
+
it "adds a new rule to list" do
|
52
|
+
Klass1.rule(:test, /c/)
|
53
|
+
Klass1.rules.map {|i| i.name}.should include :test
|
54
|
+
end
|
55
|
+
|
56
|
+
it "overwrites existing rules with same name" do
|
57
|
+
Klass1.rule(:over, /a/)
|
58
|
+
Klass1.rules.find_all {|i| i.name == :over}.size.should == 1
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
describe ".tokenise" do
|
63
|
+
|
64
|
+
class Klass2 < Ast::Tokeniser
|
65
|
+
rule :long, /--([a-zA-Z0-9]+)/ do |i|
|
66
|
+
i[1]
|
67
|
+
end
|
68
|
+
|
69
|
+
rule :short, /-([a-zA-Z0-9]+)/ do |i|
|
70
|
+
i[1].split('')
|
71
|
+
end
|
72
|
+
|
73
|
+
rule :word, /[a-zA-Z0-9]+/
|
74
|
+
end
|
75
|
+
|
76
|
+
specify { Klass2.tokenise("").should be_kind_of Ast::Tokens }
|
77
|
+
|
78
|
+
it "retuns the correct tokens" do
|
79
|
+
r = Klass2.tokenise("--along -sh aword")
|
80
|
+
r.to_a.should == [[:long, "along"], [:short, "s"], [:short, "h"], [:word, "aword"]]
|
81
|
+
end
|
82
|
+
|
83
|
+
it "runs example in Readme" do
|
84
|
+
string = "an example String, lorem!"
|
85
|
+
|
86
|
+
class StringTokens < Ast::Tokeniser
|
87
|
+
rule :article, /an|a|the/
|
88
|
+
rule :word, /[a-z]+/
|
89
|
+
rule :punct, /,|\.|!/
|
90
|
+
|
91
|
+
rule :pronoun, /[A-Z][a-z]+/ do |i|
|
92
|
+
i.downcase
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
r = [[:article, "an"], [:word, "example"], [:pronoun, "string"], [:punct, ","], [:word, "lorem"], [:punct, "!"]]
|
97
|
+
StringTokens.tokenise(string).to_a.should == r
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
end
|
@@ -0,0 +1,329 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Ast::Tokens do
|
4
|
+
|
5
|
+
describe "#<<" do
|
6
|
+
it "adds token to self" do
|
7
|
+
token = Ast::Token.new(:a, 'b')
|
8
|
+
subject << token
|
9
|
+
subject.include?(token).should be_true
|
10
|
+
end
|
11
|
+
|
12
|
+
it "converts an array to a token and adds to self" do
|
13
|
+
subject << [:a, 'b']
|
14
|
+
subject.to_a.include?([:a, 'b']).should be_true
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
describe "#to_a" do
|
19
|
+
subject { Ast::Tokens.new([[:a, 'b'], [:c, 'd']]) }
|
20
|
+
it "returns an array" do
|
21
|
+
subject.to_a.should be_kind_of Array
|
22
|
+
end
|
23
|
+
|
24
|
+
it "contains arrays" do
|
25
|
+
subject.to_a.each do |a|
|
26
|
+
a.should be_kind_of Array
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
context "When scanning tokens" do
|
32
|
+
subject {
|
33
|
+
Ast::Tokens.new([[:a, 'b'], [:c, 'd'], [:e, 'f'], [:g, 'h'], [:i, 'j']])
|
34
|
+
}
|
35
|
+
|
36
|
+
|
37
|
+
describe "#pointer" do
|
38
|
+
it "returns current token" do
|
39
|
+
subject.pointer.to_a.should == [:a, 'b']
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
describe "#inc" do
|
44
|
+
it "returns an integer" do
|
45
|
+
subject.pos = 1
|
46
|
+
subject.inc.should be_kind_of Integer
|
47
|
+
end
|
48
|
+
|
49
|
+
it "increments pointer position" do
|
50
|
+
expect {
|
51
|
+
subject.inc
|
52
|
+
}.to change {subject.pos}.by(1)
|
53
|
+
end
|
54
|
+
|
55
|
+
it "doesn't increment pointer when at end of tokens" do
|
56
|
+
subject.pos = 4
|
57
|
+
expect {
|
58
|
+
subject.inc
|
59
|
+
}.to change {subject.pos}.by(0)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
describe "#dec" do
|
64
|
+
it "returns an integer" do
|
65
|
+
subject.pos = 3
|
66
|
+
subject.dec.should be_kind_of Integer
|
67
|
+
end
|
68
|
+
|
69
|
+
it "decrements pointer position" do
|
70
|
+
subject.pos = 4
|
71
|
+
expect {
|
72
|
+
subject.dec
|
73
|
+
}.to change {subject.pos}.by(-1)
|
74
|
+
end
|
75
|
+
|
76
|
+
it "doesn't decrement pointer when at start of tokens" do
|
77
|
+
subject.pos = 0
|
78
|
+
expect {
|
79
|
+
subject.dec
|
80
|
+
}.to change {subject.pos}.by(0)
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
describe "#pointing_at?" do
|
85
|
+
it "returns true if type matches token type" do
|
86
|
+
subject.pointing_at?(:a).should be_true
|
87
|
+
end
|
88
|
+
|
89
|
+
it "returns false if type doesn't match token type" do
|
90
|
+
subject.pointing_at?(:z).should be_false
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
describe "#pointing_at" do
|
95
|
+
it "returns type of current token" do
|
96
|
+
subject.pointing_at.should == :a
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
describe "#peek" do
|
101
|
+
it "returns +len+ tokens from current token" do
|
102
|
+
subject.peek(2).to_a.should == [[:a, 'b'], [:c, 'd']]
|
103
|
+
end
|
104
|
+
|
105
|
+
it "returns all tokens if length given is too big" do
|
106
|
+
subject.peek(5).should == subject
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
describe "#scan" do
|
111
|
+
it "returns current token" do
|
112
|
+
subject.scan.to_a.should == [:a, 'b']
|
113
|
+
end
|
114
|
+
|
115
|
+
it "increments the pointer" do
|
116
|
+
expect {
|
117
|
+
subject.scan
|
118
|
+
}.to change {subject.pos}.by(1)
|
119
|
+
end
|
120
|
+
|
121
|
+
context "when given type" do
|
122
|
+
it "returns current token if types match" do
|
123
|
+
subject.scan(:a).to_a.should == [:a, 'b']
|
124
|
+
end
|
125
|
+
|
126
|
+
it "raises error if types doesn't match" do
|
127
|
+
lambda {
|
128
|
+
subject.scan(:z)
|
129
|
+
}.should raise_error(Ast::Tokens::Error)
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
describe "#check" do
|
135
|
+
it "returns current token" do
|
136
|
+
subject.check.to_a.should == [:a, 'b']
|
137
|
+
end
|
138
|
+
|
139
|
+
it "doesn't increment pointer" do
|
140
|
+
expect {
|
141
|
+
subject.check
|
142
|
+
}.to change {subject.pos}.by(0)
|
143
|
+
end
|
144
|
+
|
145
|
+
context "when given type" do
|
146
|
+
it "returns current token if types match" do
|
147
|
+
subject.check(:a).to_a.should == [:a, 'b']
|
148
|
+
end
|
149
|
+
|
150
|
+
it "raises error if types doesn't match" do
|
151
|
+
lambda {
|
152
|
+
subject.check(:z)
|
153
|
+
}.should raise_error(Ast::Tokens::Error)
|
154
|
+
end
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
describe "#skip" do
|
159
|
+
it "increments pointer" do
|
160
|
+
expect {
|
161
|
+
subject.skip
|
162
|
+
}.to change {subject.pos}.by(1)
|
163
|
+
end
|
164
|
+
|
165
|
+
context "when given type" do
|
166
|
+
it "increments pointer if type matches current token" do
|
167
|
+
expect {
|
168
|
+
subject.skip(:a)
|
169
|
+
}.to change {subject.pos}.by(1)
|
170
|
+
end
|
171
|
+
|
172
|
+
it "raises error if types don't match" do
|
173
|
+
lambda {
|
174
|
+
subject.skip(:z)
|
175
|
+
}.should raise_error(Ast::Tokens::Error)
|
176
|
+
end
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
describe "#eot?" do
|
181
|
+
it "returns false if not at end of tokens" do
|
182
|
+
subject.pos = 1
|
183
|
+
subject.eot?.should be_false
|
184
|
+
end
|
185
|
+
|
186
|
+
it "returns true if at end of tokens" do
|
187
|
+
subject.pos = 4
|
188
|
+
subject.eot?.should be_true
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
describe "#scan_until" do
|
193
|
+
specify { subject.scan_until(:d).should be_kind_of Ast::Tokens }
|
194
|
+
|
195
|
+
it "contains last matched item" do
|
196
|
+
subject.scan_until(:c).last.type.should == :c
|
197
|
+
end
|
198
|
+
|
199
|
+
it "return rest of tokens if no match found" do
|
200
|
+
subject.scan_until(:z).should == subject
|
201
|
+
end
|
202
|
+
|
203
|
+
it "increments pointer" do
|
204
|
+
expect {
|
205
|
+
subject.scan_until(:c)
|
206
|
+
}.to change {subject.pos}.by(2)
|
207
|
+
end
|
208
|
+
end
|
209
|
+
|
210
|
+
describe "#check_until" do
|
211
|
+
specify{ subject.check_until(:d).should be_kind_of Ast::Tokens }
|
212
|
+
|
213
|
+
it "contains last matched item" do
|
214
|
+
subject.check_until(:c).last.type.should == :c
|
215
|
+
end
|
216
|
+
|
217
|
+
it "returns rest of tokens if no match found" do
|
218
|
+
subject.check_until(:z).should == subject
|
219
|
+
end
|
220
|
+
|
221
|
+
it "doesn't change pointer" do
|
222
|
+
expect {
|
223
|
+
subject.check_until(:c)
|
224
|
+
}.to change {subject.pos}.by(0)
|
225
|
+
end
|
226
|
+
end
|
227
|
+
|
228
|
+
describe "#skip_until" do
|
229
|
+
specify { subject.skip_until(:d).should be_kind_of Integer }
|
230
|
+
|
231
|
+
it "counts last matched item" do
|
232
|
+
subject.skip_until(:c).should == 2
|
233
|
+
end
|
234
|
+
|
235
|
+
it "counts to end of tokens if no match found" do
|
236
|
+
subject.skip_until(:z).should == subject.length
|
237
|
+
end
|
238
|
+
|
239
|
+
it "increments pointer" do
|
240
|
+
expect {
|
241
|
+
subject.skip_until(:c)
|
242
|
+
}.to change {subject.pos}.by(2)
|
243
|
+
end
|
244
|
+
end
|
245
|
+
|
246
|
+
describe "#rest" do
|
247
|
+
it "returns all tokens after and including current token" do
|
248
|
+
subject.pos = 3
|
249
|
+
subject.rest.to_a.should == [[:g, 'h'], [:i, 'j']]
|
250
|
+
end
|
251
|
+
end
|
252
|
+
|
253
|
+
describe "#clear" do
|
254
|
+
it "sets pointer to end of tokens" do
|
255
|
+
subject.clear
|
256
|
+
subject.pos.should == 4
|
257
|
+
end
|
258
|
+
end
|
259
|
+
|
260
|
+
describe "#unscan" do
|
261
|
+
it "sets pointer to last position" do
|
262
|
+
subject.scan
|
263
|
+
subject.unscan
|
264
|
+
subject.pos.should == 0
|
265
|
+
end
|
266
|
+
|
267
|
+
it "sets previous position to nil" do
|
268
|
+
subject.scan
|
269
|
+
subject.unscan
|
270
|
+
subject.prev_pos.should be_nil
|
271
|
+
end
|
272
|
+
end
|
273
|
+
|
274
|
+
end
|
275
|
+
|
276
|
+
context "when enumerating" do
|
277
|
+
|
278
|
+
subject {
|
279
|
+
Ast::Tokens.new([[:a, 'b'], [:a, 'b'], [:a, 'b']])
|
280
|
+
}
|
281
|
+
|
282
|
+
describe "#each" do
|
283
|
+
it "passes type and value to block" do
|
284
|
+
subject.each do |t, v|
|
285
|
+
t.should == :a
|
286
|
+
v.should == 'b'
|
287
|
+
end
|
288
|
+
end
|
289
|
+
end
|
290
|
+
|
291
|
+
describe "#each_type" do
|
292
|
+
it "passes type to block" do
|
293
|
+
subject.each_type do |t|
|
294
|
+
t.should == :a
|
295
|
+
end
|
296
|
+
end
|
297
|
+
|
298
|
+
it "doesn't pass value to block" do
|
299
|
+
subject.each_type do |t, v|
|
300
|
+
v.should_not == 'b'
|
301
|
+
end
|
302
|
+
end
|
303
|
+
end
|
304
|
+
|
305
|
+
describe "#each_value" do
|
306
|
+
it "passes value to block" do
|
307
|
+
subject.each_value do |v|
|
308
|
+
v.should == 'b'
|
309
|
+
end
|
310
|
+
end
|
311
|
+
|
312
|
+
it "doesn't pass type to block" do
|
313
|
+
subject.each_value do |v, t|
|
314
|
+
t.should_not == :a
|
315
|
+
end
|
316
|
+
end
|
317
|
+
end
|
318
|
+
|
319
|
+
describe "#each_token" do
|
320
|
+
it "passes tokens to block" do
|
321
|
+
subject.each_token do |t|
|
322
|
+
t.should be_kind_of Ast::Token
|
323
|
+
end
|
324
|
+
end
|
325
|
+
end
|
326
|
+
|
327
|
+
end
|
328
|
+
|
329
|
+
end
|