ast_ast 0.0.0 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +1 -1
- data/README.md +103 -33
- data/Rakefile +5 -48
- data/lib/ast_ast.rb +6 -6
- data/lib/ast_ast/ast.rb +162 -1
- data/lib/ast_ast/bnf.rb +187 -0
- data/lib/ast_ast/token.rb +51 -8
- data/lib/ast_ast/tokeniser.rb +35 -13
- data/lib/ast_ast/tokens.rb +308 -6
- data/lib/ast_ast/tree.rb +74 -2
- data/lib/ast_ast/version.rb +3 -0
- data/spec/ast_ast/token_spec.rb +62 -0
- data/spec/ast_ast/tokeniser_spec.rb +101 -0
- data/spec/ast_ast/tokens_spec.rb +329 -0
- data/spec/spec_helper.rb +11 -0
- metadata +27 -45
- data/.document +0 -5
- data/.gitignore +0 -24
- data/VERSION +0 -1
- data/lib/ast_tokens.rb +0 -8
- data/test/helper.rb +0 -10
- data/test/test_ast_ast.rb +0 -5
- data/test/test_tokeniser.rb +0 -13
data/lib/ast_ast/token.rb
CHANGED
@@ -8,28 +8,71 @@ module Ast
|
|
8
8
|
end
|
9
9
|
|
10
10
|
# Check whether an array given is valid, ie. it has a symbol
|
11
|
-
# then
|
11
|
+
# then one or no objects only.
|
12
12
|
#
|
13
|
+
# @param arr [Array, Token]
|
13
14
|
# @example
|
14
15
|
#
|
15
16
|
# Ast::Token.valid? [:type, 'val'] #=> true
|
16
17
|
# Ast::Token.valid? ['wrong', 'val'] #=> false
|
17
18
|
# Ast::Token.valid? ['too', 'long', 1] #=> false
|
19
|
+
# Ast::Token.valid? [:single] #=> true
|
18
20
|
#
|
19
21
|
def self.valid?(arr)
|
20
22
|
if arr.is_a? Array
|
21
|
-
if arr.nil? || arr.size
|
22
|
-
|
23
|
+
if arr.nil? || arr.size > 2 || arr.size == 0
|
24
|
+
false
|
23
25
|
elsif !arr[0].is_a?(Symbol)
|
24
|
-
|
26
|
+
false
|
25
27
|
else
|
26
|
-
|
28
|
+
true
|
27
29
|
end
|
28
|
-
elsif arr.is_a?
|
29
|
-
|
30
|
+
elsif arr.is_a? Token
|
31
|
+
true
|
30
32
|
else
|
31
|
-
|
33
|
+
false
|
32
34
|
end
|
33
35
|
end
|
36
|
+
|
37
|
+
# Turn the Token to a String, similar to an array.
|
38
|
+
#
|
39
|
+
# @example
|
40
|
+
#
|
41
|
+
# Ast::Token.new(:test, "str").to_s
|
42
|
+
# #=> <:test "str">
|
43
|
+
#
|
44
|
+
# @return [String]
|
45
|
+
#
|
46
|
+
def to_s
|
47
|
+
if @value.nil?
|
48
|
+
"<:#{@type}>"
|
49
|
+
else
|
50
|
+
"<:#{@type}, #{@value.inspect}>"
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
# Turn the Token to an Array.
|
55
|
+
#
|
56
|
+
# @example
|
57
|
+
#
|
58
|
+
# Ast::Token.new(:test, "str").to_a
|
59
|
+
# #=> [:test, "str"]
|
60
|
+
#
|
61
|
+
# @return [Array]
|
62
|
+
#
|
63
|
+
def to_a
|
64
|
+
if @value.nil?
|
65
|
+
[@type]
|
66
|
+
else
|
67
|
+
[@type, @value]
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
# Make #inspect show something a bit prettier
|
72
|
+
def inspect
|
73
|
+
self.to_s
|
74
|
+
end
|
75
|
+
|
34
76
|
end
|
77
|
+
|
35
78
|
end
|
data/lib/ast_ast/tokeniser.rb
CHANGED
@@ -1,17 +1,19 @@
|
|
1
1
|
# @abstract
|
2
2
|
module Ast
|
3
3
|
class Tokeniser
|
4
|
-
attr_accessor :rules, :scanner
|
5
4
|
|
6
|
-
# Describes a single rule created within the Ast::Tokeniser subclass
|
7
5
|
class Rule
|
8
6
|
attr_accessor :name, :regex, :block
|
9
7
|
|
10
8
|
# Creates a new Rule instance
|
11
9
|
#
|
12
|
-
# @param [Symbol]
|
13
|
-
#
|
14
|
-
# @param [
|
10
|
+
# @param name [Symbol]
|
11
|
+
# Name of the token to be created.
|
12
|
+
# @param regex [Regexp]
|
13
|
+
# Regular expression to be matched
|
14
|
+
# @param block [Proc]
|
15
|
+
# Optional block to be executed with match(es)
|
16
|
+
#
|
15
17
|
def initialize(name, regex, &block)
|
16
18
|
@name = name
|
17
19
|
@regex = regex
|
@@ -45,36 +47,54 @@ module Ast
|
|
45
47
|
# end
|
46
48
|
#
|
47
49
|
# Klass.tokenise("split up")
|
48
|
-
# #=> [[:letter, "s"], [:letter, "p"], [:letter, "l"], [:letter, "i"],
|
50
|
+
# #=> [[:letter, "s"], [:letter, "p"], [:letter, "l"], [:letter, "i"],
|
51
|
+
# # [:letter, "t"], [:letter, "u"], [:letter, "p"]]
|
49
52
|
#
|
50
53
|
#
|
51
54
|
def run(val)
|
52
55
|
arr = val.match(@regex).to_a
|
53
56
|
val = arr unless arr.empty?
|
54
57
|
val = arr[0] if arr.size == 1
|
58
|
+
val = arr[0] if arr[0] == arr[1] # this happens with /(a|b|c)/ regexs
|
55
59
|
@block.call val
|
56
60
|
end
|
57
61
|
end
|
58
62
|
|
59
63
|
# Creates a new Rule and adds to the +@rules+ list.
|
60
|
-
# @see
|
64
|
+
# @see Rule#initialize
|
65
|
+
#
|
66
|
+
# @param name [Symbol]
|
67
|
+
# @param regex [Regexp]
|
68
|
+
#
|
61
69
|
def self.rule(name, regex, &block)
|
62
70
|
@rules ||= []
|
71
|
+
# make rules with same name overwrite first rule
|
72
|
+
@rules.delete_if {|i| i.name == name}
|
63
73
|
@rules << Rule.new(name, regex, &block)
|
64
74
|
end
|
65
75
|
|
76
|
+
# @return [Array]
|
77
|
+
# Rules that have been defined.
|
78
|
+
#
|
79
|
+
def self.rules; @rules; end
|
80
|
+
|
66
81
|
# Takes the input and uses the rules that were created to scan it.
|
67
82
|
#
|
68
|
-
# @param [String]
|
69
|
-
#
|
83
|
+
# @param [String]
|
84
|
+
# Input string to scan.
|
85
|
+
#
|
86
|
+
# @return [Tokens]
|
87
|
+
#
|
70
88
|
def self.tokenise(input)
|
71
89
|
@scanner = StringScanner.new(input)
|
72
90
|
|
73
|
-
result =
|
91
|
+
result = Tokens.new
|
74
92
|
until @scanner.eos?
|
93
|
+
m = false # keep track of matches
|
75
94
|
@rules.each do |i|
|
76
95
|
a = @scanner.scan(i.regex)
|
77
96
|
unless a.nil?
|
97
|
+
m = true # match happened
|
78
98
|
ran = i.run(a)
|
79
99
|
# split array into separate tokens, *not* values
|
80
100
|
if ran.is_a? Array
|
@@ -84,9 +104,11 @@ module Ast
|
|
84
104
|
end
|
85
105
|
end
|
86
106
|
end
|
87
|
-
#
|
88
|
-
|
89
|
-
|
107
|
+
unless m # if no match happened
|
108
|
+
# obviously no rule matches this so ignore it
|
109
|
+
# could add verbose mode?
|
110
|
+
@scanner.pos += 1 unless @scanner.eos?
|
111
|
+
end
|
90
112
|
end
|
91
113
|
result
|
92
114
|
end
|
data/lib/ast_ast/tokens.rb
CHANGED
@@ -1,22 +1,273 @@
|
|
1
1
|
module Ast
|
2
|
+
|
3
|
+
# An Array of Token instances basically, but with added methods
|
4
|
+
# which add StringScanner type capabilities.
|
2
5
|
class Tokens < Array
|
6
|
+
attr_accessor :prev_pos, :pos
|
7
|
+
|
8
|
+
class Error < StandardError; end
|
3
9
|
|
10
|
+
# Creates tokens for each item given if not already and sets
|
11
|
+
# pointer.
|
12
|
+
def initialize(args=[])
|
13
|
+
@pos = 0
|
14
|
+
return self if args == []
|
15
|
+
if args[0].is_a? Token
|
16
|
+
args.each_token do |i|
|
17
|
+
self << i
|
18
|
+
end
|
19
|
+
else
|
20
|
+
args.each do |i|
|
21
|
+
if i.size > 0
|
22
|
+
self << Token.new(i[0], i[1])
|
23
|
+
else
|
24
|
+
self << Token.new(i[0], nil)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
self
|
29
|
+
end
|
30
|
+
|
31
|
+
# Adds +val+ to self, if a Token is given it is added as expected.
|
32
|
+
# If an Array is given and it is valid, it will be converted to a
|
33
|
+
# Token and added, if invalid an error is raised.
|
34
|
+
#
|
35
|
+
# @param val [Token, .valid?]
|
36
|
+
# @return [Tokens]
|
37
|
+
#
|
4
38
|
def <<(val)
|
5
|
-
raise "value given #{val} is invalid" unless
|
39
|
+
raise "value given #{val} is invalid" unless Token.valid?(val)
|
6
40
|
if val.is_a? Array
|
7
|
-
|
41
|
+
if val.size > 0
|
42
|
+
self << Token.new(val[0], val[1])
|
43
|
+
else
|
44
|
+
self << Token.new(val[0], nil)
|
45
|
+
end
|
8
46
|
else
|
9
47
|
super
|
10
48
|
end
|
11
49
|
end
|
12
50
|
|
51
|
+
# Turns the Tokens, and Token instances inside into arrays.
|
52
|
+
#
|
53
|
+
# @return [Array]
|
54
|
+
#
|
55
|
+
def to_a
|
56
|
+
self.collect {|i| i.to_a }
|
57
|
+
end
|
58
|
+
|
59
|
+
def inspect
|
60
|
+
"#< #{@pos}/#{self.size-1} #{self.to_s[1..-2]} >"
|
61
|
+
end
|
62
|
+
|
63
|
+
# @group Scanning Tokens
|
64
|
+
|
65
|
+
# @return [Token] the current token being 'pointed' to
|
66
|
+
def pointer
|
67
|
+
self[@pos]
|
68
|
+
end
|
69
|
+
alias_method :curr_item, :pointer
|
70
|
+
|
71
|
+
# Increment the pointer unless at end of tokens.
|
72
|
+
#
|
73
|
+
# @return [Integer, nil]
|
74
|
+
# New position
|
75
|
+
#
|
76
|
+
def inc
|
77
|
+
@pos += 1 unless eot?
|
78
|
+
end
|
79
|
+
|
80
|
+
# Decrement the pointer unless at first token.
|
81
|
+
#
|
82
|
+
# @return [Integer, nil]
|
83
|
+
# New position
|
84
|
+
#
|
85
|
+
def dec
|
86
|
+
@pos -= 1 unless @pos == 0
|
87
|
+
end
|
88
|
+
|
89
|
+
# Checks whether the pointer is at a token with type +type+
|
90
|
+
#
|
91
|
+
# @return [true, false]
|
92
|
+
#
|
93
|
+
def pointing_at?(type)
|
94
|
+
pointing_at == type
|
95
|
+
end
|
96
|
+
|
97
|
+
# Gets the type of the current token.
|
98
|
+
#
|
99
|
+
# @return [Symbol]
|
100
|
+
#
|
101
|
+
def pointing_at
|
102
|
+
pointer.type
|
103
|
+
end
|
104
|
+
|
105
|
+
# Gets a list of tokens +len+ from current position, without
|
106
|
+
# advancing pointer.
|
107
|
+
#
|
108
|
+
# @param len [Integer]
|
109
|
+
# @return [Tokens]
|
110
|
+
#
|
111
|
+
def peek(len)
|
112
|
+
self[@pos..(@pos+len-1)]
|
113
|
+
end
|
114
|
+
|
115
|
+
# Reads the current token and advances the pointer. If a type is
|
116
|
+
# given it will throw an error if types do not match.
|
117
|
+
#
|
118
|
+
# @param type [Symbol]
|
119
|
+
# @return [Token]
|
120
|
+
#
|
121
|
+
# @raise [Error]
|
122
|
+
#
|
123
|
+
def scan(type=nil)
|
124
|
+
@prev_pos = @pos
|
125
|
+
a = check(type)
|
126
|
+
inc
|
127
|
+
a
|
128
|
+
end
|
129
|
+
|
130
|
+
# Reads the current token, but does not advance pointer. If a type
|
131
|
+
# is given it will throw an error if types do not match.
|
132
|
+
#
|
133
|
+
# @param type [Symbol]
|
134
|
+
# @return [Token]
|
135
|
+
#
|
136
|
+
# @raise [Error]
|
137
|
+
#
|
138
|
+
def check(type=nil)
|
139
|
+
if type.nil?
|
140
|
+
pointer
|
141
|
+
else
|
142
|
+
if pointing_at?(type)
|
143
|
+
pointer
|
144
|
+
else
|
145
|
+
raise Error, "wrong type: #{type} for #{self.pointer}"
|
146
|
+
end
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
# Attempts to skip the current token. If type is given will only skip
|
151
|
+
# a token of that type, will raise error for anything else.
|
152
|
+
#
|
153
|
+
# @param type [Symbol]
|
154
|
+
# @return [Integer]
|
155
|
+
# The new pointer position
|
156
|
+
#
|
157
|
+
# @raise [Error] if type of next token does not match +type+
|
158
|
+
#
|
159
|
+
def skip(type=nil)
|
160
|
+
@prev_pos = @pos
|
161
|
+
if type.nil?
|
162
|
+
inc
|
163
|
+
else
|
164
|
+
if pointing_at?(type)
|
165
|
+
inc
|
166
|
+
else
|
167
|
+
raise Error, "wrong type: #{type} for #{self.pointer}"
|
168
|
+
end
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
# @return [boolean] whether at end of tokens
|
173
|
+
def eot?
|
174
|
+
@pos >= self.size-1
|
175
|
+
end
|
176
|
+
|
177
|
+
# Scans the tokens until a token of +type+ is found. Returns tokens
|
178
|
+
# upto and including the matched token.
|
179
|
+
|
180
|
+
# Reads the tokens until a token of +type+ is found. Return tokens
|
181
|
+
# upto and including the matched token, also advances pointer.
|
182
|
+
#
|
183
|
+
# @see #scan
|
184
|
+
#
|
185
|
+
# @param type [Symbol]
|
186
|
+
# @return [Tokens]
|
187
|
+
#
|
188
|
+
def scan_until(type)
|
189
|
+
@prev_pos = @pos
|
190
|
+
r = Tokens.new
|
191
|
+
until pointing_at?(type) || self.eot?
|
192
|
+
r << scan
|
193
|
+
end
|
194
|
+
r << scan
|
195
|
+
r
|
196
|
+
end
|
197
|
+
|
198
|
+
# Reads the tokens until a token of +type+ is found. Returns tokens
|
199
|
+
# upto and including the matched token, but does not advance the
|
200
|
+
# pointer.
|
201
|
+
#
|
202
|
+
# @see #check
|
203
|
+
#
|
204
|
+
# @param type [Symbol]
|
205
|
+
# @return [Tokens]
|
206
|
+
#
|
207
|
+
def check_until(type)
|
208
|
+
r = Tokens.new
|
209
|
+
a = 0
|
210
|
+
until pointing_at?(type) || self.eot?
|
211
|
+
r << scan
|
212
|
+
a += 1
|
213
|
+
end
|
214
|
+
r << scan
|
215
|
+
@pos -= a + 1
|
216
|
+
r
|
217
|
+
end
|
218
|
+
|
219
|
+
# Advances the pointer until token of +type+ is found.
|
220
|
+
#
|
221
|
+
# @param type [Symbol]
|
222
|
+
# @return [Integer]
|
223
|
+
# Number of tokens advanced, including match
|
224
|
+
#
|
225
|
+
def skip_until(type)
|
226
|
+
@prev_pos = @pos
|
227
|
+
r = 0
|
228
|
+
until pointing_at?(type) || self.eot?
|
229
|
+
inc
|
230
|
+
r += 1
|
231
|
+
end
|
232
|
+
inc
|
233
|
+
r += 1
|
234
|
+
r
|
235
|
+
end
|
236
|
+
|
237
|
+
# @return [Tokens]
|
238
|
+
# All tokens after the current token.
|
239
|
+
#
|
240
|
+
def rest
|
241
|
+
self[pos..-1]
|
242
|
+
end
|
243
|
+
|
244
|
+
# Set the scan pointer to the end of the tokens.
|
245
|
+
#
|
246
|
+
def clear
|
247
|
+
@pos = self.size-1
|
248
|
+
end
|
249
|
+
|
250
|
+
# Sets the pointer to the previous remembered position. Only one
|
251
|
+
# previous position is remembered, which is updated every scan or
|
252
|
+
# skip.
|
253
|
+
#
|
254
|
+
def unscan
|
255
|
+
if @prev_pos
|
256
|
+
@pos = @prev_pos
|
257
|
+
@prev_pos = nil
|
258
|
+
end
|
259
|
+
end
|
260
|
+
alias_method :unskip, :unscan
|
261
|
+
|
262
|
+
# @endgroup
|
263
|
+
|
13
264
|
# @group Enumeration
|
14
265
|
|
15
266
|
alias_method :_each, :each
|
267
|
+
|
16
268
|
# Loops through the types and contents of each tag separately, passing them
|
17
269
|
# to the block given.
|
18
270
|
#
|
19
|
-
# @return [Ast::Tokens] returns self
|
20
271
|
# @yield [Symbol, Object] gives the type and content of each block in turn
|
21
272
|
#
|
22
273
|
# @example
|
@@ -39,16 +290,48 @@ module Ast
|
|
39
290
|
self
|
40
291
|
end
|
41
292
|
|
42
|
-
#
|
293
|
+
# Loops through the types of each tag, passing them to block given.
|
294
|
+
#
|
295
|
+
# @yield [Symbol]
|
43
296
|
# @see #each
|
297
|
+
#
|
298
|
+
# @example
|
299
|
+
#
|
300
|
+
# tokens = Ast::Tokens.new
|
301
|
+
# tokens << [:a, 1] << [:b, 2] << [:c, 3] << [:d, 4]
|
302
|
+
#
|
303
|
+
# sa.each_type do |t|
|
304
|
+
# puts t
|
305
|
+
# end
|
306
|
+
# #=> a
|
307
|
+
# #=> b
|
308
|
+
# #=> c
|
309
|
+
# #=> d
|
310
|
+
#
|
44
311
|
def each_type(&blck)
|
45
312
|
self._each do |i|
|
46
313
|
yield(i.type)
|
47
314
|
end
|
48
315
|
end
|
49
316
|
|
50
|
-
#
|
51
|
-
#
|
317
|
+
# Loops through the values of each tag, passing them to block given.
|
318
|
+
#
|
319
|
+
# @yield [Object]
|
320
|
+
# @see #each
|
321
|
+
#
|
322
|
+
# @example
|
323
|
+
#
|
324
|
+
# tokens = Ast::Tokens.new
|
325
|
+
# tokens << [:a, 1] << [:b, 2] << [:c, 3] << [:d, 4]
|
326
|
+
#
|
327
|
+
# sa.each_value do |v|
|
328
|
+
# puts v
|
329
|
+
# end
|
330
|
+
# #=> 1
|
331
|
+
# #=> 2
|
332
|
+
# #=> 3
|
333
|
+
# #=> 4
|
334
|
+
#
|
52
335
|
def each_value(&blck)
|
53
336
|
self._each do |i|
|
54
337
|
yield(i.value)
|
@@ -57,6 +340,25 @@ module Ast
|
|
57
340
|
|
58
341
|
# Evaluate block given for each token instance
|
59
342
|
# @see each
|
343
|
+
|
344
|
+
# Loops through the tokens, passing them to block given.
|
345
|
+
#
|
346
|
+
# @yield [Token]
|
347
|
+
# @see #each
|
348
|
+
#
|
349
|
+
# @example
|
350
|
+
#
|
351
|
+
# tokens = Ast::Tokens.new
|
352
|
+
# tokens << [:a, 1] << [:b, 2] << [:c, 3] << [:d, 4]
|
353
|
+
#
|
354
|
+
# sa.each_token do |token|
|
355
|
+
# puts token
|
356
|
+
# end
|
357
|
+
# #=> <:a, 1>
|
358
|
+
# #=> <:b, 2>
|
359
|
+
# #=> <:c, 3>
|
360
|
+
# #=> <:d, 4>
|
361
|
+
#
|
60
362
|
def each_token(&blck)
|
61
363
|
self._each do |i|
|
62
364
|
yield(i)
|