ast_ast 0.0.0 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/lib/ast_ast/token.rb CHANGED
@@ -8,28 +8,71 @@ module Ast
8
8
  end
9
9
 
10
10
  # Check whether an array given is valid, ie. it has a symbol
11
- # then an object only.
11
+ # then one or no objects only.
12
12
  #
13
+ # @param arr [Array, Token]
13
14
  # @example
14
15
  #
15
16
  # Ast::Token.valid? [:type, 'val'] #=> true
16
17
  # Ast::Token.valid? ['wrong', 'val'] #=> false
17
18
  # Ast::Token.valid? ['too', 'long', 1] #=> false
19
+ # Ast::Token.valid? [:single] #=> true
18
20
  #
19
21
  def self.valid?(arr)
20
22
  if arr.is_a? Array
21
- if arr.nil? || arr.size != 2
22
- return false
23
+ if arr.nil? || arr.size > 2 || arr.size == 0
24
+ false
23
25
  elsif !arr[0].is_a?(Symbol)
24
- return false
26
+ false
25
27
  else
26
- return true
28
+ true
27
29
  end
28
- elsif arr.is_a? Ast::Token
29
- return true
30
+ elsif arr.is_a? Token
31
+ true
30
32
  else
31
- return false
33
+ false
32
34
  end
33
35
  end
36
+
37
+ # Turn the Token to a String, similar to an array.
38
+ #
39
+ # @example
40
+ #
41
+ # Ast::Token.new(:test, "str").to_s
42
+ # #=> <:test "str">
43
+ #
44
+ # @return [String]
45
+ #
46
+ def to_s
47
+ if @value.nil?
48
+ "<:#{@type}>"
49
+ else
50
+ "<:#{@type}, #{@value.inspect}>"
51
+ end
52
+ end
53
+
54
+ # Turn the Token to an Array.
55
+ #
56
+ # @example
57
+ #
58
+ # Ast::Token.new(:test, "str").to_a
59
+ # #=> [:test, "str"]
60
+ #
61
+ # @return [Array]
62
+ #
63
+ def to_a
64
+ if @value.nil?
65
+ [@type]
66
+ else
67
+ [@type, @value]
68
+ end
69
+ end
70
+
71
+ # Make #inspect show something a bit prettier
72
+ def inspect
73
+ self.to_s
74
+ end
75
+
34
76
  end
77
+
35
78
  end
@@ -1,17 +1,19 @@
1
1
  # @abstract
2
2
  module Ast
3
3
  class Tokeniser
4
- attr_accessor :rules, :scanner
5
4
 
6
- # Describes a single rule created within the Ast::Tokeniser subclass
7
5
  class Rule
8
6
  attr_accessor :name, :regex, :block
9
7
 
10
8
  # Creates a new Rule instance
11
9
  #
12
- # @param [Symbol] name name of the token to be created
13
- # @param [Regexp] regex regular expression to be matched
14
- # @param [Proc] block optional block to be executed with match(es)
10
+ # @param name [Symbol]
11
+ # Name of the token to be created.
12
+ # @param regex [Regexp]
13
+ # Regular expression to be matched
14
+ # @param block [Proc]
15
+ # Optional block to be executed with match(es)
16
+ #
15
17
  def initialize(name, regex, &block)
16
18
  @name = name
17
19
  @regex = regex
@@ -45,36 +47,54 @@ module Ast
45
47
  # end
46
48
  #
47
49
  # Klass.tokenise("split up")
48
- # #=> [[:letter, "s"], [:letter, "p"], [:letter, "l"], [:letter, "i"], [:letter, "t"], [:letter, "u"], [:letter, "p"]]
50
+ # #=> [[:letter, "s"], [:letter, "p"], [:letter, "l"], [:letter, "i"],
51
+ # # [:letter, "t"], [:letter, "u"], [:letter, "p"]]
49
52
  #
50
53
  #
51
54
  def run(val)
52
55
  arr = val.match(@regex).to_a
53
56
  val = arr unless arr.empty?
54
57
  val = arr[0] if arr.size == 1
58
+ val = arr[0] if arr[0] == arr[1] # this happens with /(a|b|c)/ regexs
55
59
  @block.call val
56
60
  end
57
61
  end
58
62
 
59
63
  # Creates a new Rule and adds to the +@rules+ list.
60
- # @see Ast::Tokeniser::Rule#initialize
64
+ # @see Rule#initialize
65
+ #
66
+ # @param name [Symbol]
67
+ # @param regex [Regexp]
68
+ #
61
69
  def self.rule(name, regex, &block)
62
70
  @rules ||= []
71
+ # make rules with same name overwrite first rule
72
+ @rules.delete_if {|i| i.name == name}
63
73
  @rules << Rule.new(name, regex, &block)
64
74
  end
65
75
 
76
+ # @return [Array]
77
+ # Rules that have been defined.
78
+ #
79
+ def self.rules; @rules; end
80
+
66
81
  # Takes the input and uses the rules that were created to scan it.
67
82
  #
68
- # @param [String] input string to scan
69
- # @return [Array]
83
+ # @param [String]
84
+ # Input string to scan.
85
+ #
86
+ # @return [Tokens]
87
+ #
70
88
  def self.tokenise(input)
71
89
  @scanner = StringScanner.new(input)
72
90
 
73
- result = []
91
+ result = Tokens.new
74
92
  until @scanner.eos?
93
+ m = false # keep track of matches
75
94
  @rules.each do |i|
76
95
  a = @scanner.scan(i.regex)
77
96
  unless a.nil?
97
+ m = true # match happened
78
98
  ran = i.run(a)
79
99
  # split array into separate tokens, *not* values
80
100
  if ran.is_a? Array
@@ -84,9 +104,11 @@ module Ast
84
104
  end
85
105
  end
86
106
  end
87
- # obviously no rule matches this so ignore it
88
- # could add verbose mode where this throws an exception!
89
- @scanner.pos += 1 unless @scanner.eos?
107
+ unless m # if no match happened
108
+ # obviously no rule matches this so ignore it
109
+ # could add verbose mode?
110
+ @scanner.pos += 1 unless @scanner.eos?
111
+ end
90
112
  end
91
113
  result
92
114
  end
@@ -1,22 +1,273 @@
1
1
  module Ast
2
+
3
+ # An Array of Token instances basically, but with added methods
4
+ # which add StringScanner type capabilities.
2
5
  class Tokens < Array
6
+ attr_accessor :prev_pos, :pos
7
+
8
+ class Error < StandardError; end
3
9
 
10
+ # Creates tokens for each item given if not already and sets
11
+ # pointer.
12
+ def initialize(args=[])
13
+ @pos = 0
14
+ return self if args == []
15
+ if args[0].is_a? Token
16
+ args.each_token do |i|
17
+ self << i
18
+ end
19
+ else
20
+ args.each do |i|
21
+ if i.size > 0
22
+ self << Token.new(i[0], i[1])
23
+ else
24
+ self << Token.new(i[0], nil)
25
+ end
26
+ end
27
+ end
28
+ self
29
+ end
30
+
31
+ # Adds +val+ to self, if a Token is given it is added as expected.
32
+ # If an Array is given and it is valid, it will be converted to a
33
+ # Token and added, if invalid an error is raised.
34
+ #
35
+ # @param val [Token, .valid?]
36
+ # @return [Tokens]
37
+ #
4
38
  def <<(val)
5
- raise "value given #{val} is invalid" unless Ast::Token.valid?(val)
39
+ raise "value given #{val} is invalid" unless Token.valid?(val)
6
40
  if val.is_a? Array
7
- self << Ast::Token.new(val[0], val[1])
41
+ if val.size > 0
42
+ self << Token.new(val[0], val[1])
43
+ else
44
+ self << Token.new(val[0], nil)
45
+ end
8
46
  else
9
47
  super
10
48
  end
11
49
  end
12
50
 
51
+ # Turns the Tokens, and Token instances inside into arrays.
52
+ #
53
+ # @return [Array]
54
+ #
55
+ def to_a
56
+ self.collect {|i| i.to_a }
57
+ end
58
+
59
+ def inspect
60
+ "#< #{@pos}/#{self.size-1} #{self.to_s[1..-2]} >"
61
+ end
62
+
63
+ # @group Scanning Tokens
64
+
65
+ # @return [Token] the current token being 'pointed' to
66
+ def pointer
67
+ self[@pos]
68
+ end
69
+ alias_method :curr_item, :pointer
70
+
71
+ # Increment the pointer unless at end of tokens.
72
+ #
73
+ # @return [Integer, nil]
74
+ # New position
75
+ #
76
+ def inc
77
+ @pos += 1 unless eot?
78
+ end
79
+
80
+ # Decrement the pointer unless at first token.
81
+ #
82
+ # @return [Integer, nil]
83
+ # New position
84
+ #
85
+ def dec
86
+ @pos -= 1 unless @pos == 0
87
+ end
88
+
89
+ # Checks whether the pointer is at a token with type +type+
90
+ #
91
+ # @return [true, false]
92
+ #
93
+ def pointing_at?(type)
94
+ pointing_at == type
95
+ end
96
+
97
+ # Gets the type of the current token.
98
+ #
99
+ # @return [Symbol]
100
+ #
101
+ def pointing_at
102
+ pointer.type
103
+ end
104
+
105
+ # Gets a list of tokens +len+ from current position, without
106
+ # advancing pointer.
107
+ #
108
+ # @param len [Integer]
109
+ # @return [Tokens]
110
+ #
111
+ def peek(len)
112
+ self[@pos..(@pos+len-1)]
113
+ end
114
+
115
+ # Reads the current token and advances the pointer. If a type is
116
+ # given it will throw an error if types do not match.
117
+ #
118
+ # @param type [Symbol]
119
+ # @return [Token]
120
+ #
121
+ # @raise [Error]
122
+ #
123
+ def scan(type=nil)
124
+ @prev_pos = @pos
125
+ a = check(type)
126
+ inc
127
+ a
128
+ end
129
+
130
+ # Reads the current token, but does not advance pointer. If a type
131
+ # is given it will throw an error if types do not match.
132
+ #
133
+ # @param type [Symbol]
134
+ # @return [Token]
135
+ #
136
+ # @raise [Error]
137
+ #
138
+ def check(type=nil)
139
+ if type.nil?
140
+ pointer
141
+ else
142
+ if pointing_at?(type)
143
+ pointer
144
+ else
145
+ raise Error, "wrong type: #{type} for #{self.pointer}"
146
+ end
147
+ end
148
+ end
149
+
150
+ # Attempts to skip the current token. If type is given will only skip
151
+ # a token of that type, will raise error for anything else.
152
+ #
153
+ # @param type [Symbol]
154
+ # @return [Integer]
155
+ # The new pointer position
156
+ #
157
+ # @raise [Error] if type of next token does not match +type+
158
+ #
159
+ def skip(type=nil)
160
+ @prev_pos = @pos
161
+ if type.nil?
162
+ inc
163
+ else
164
+ if pointing_at?(type)
165
+ inc
166
+ else
167
+ raise Error, "wrong type: #{type} for #{self.pointer}"
168
+ end
169
+ end
170
+ end
171
+
172
+ # @return [boolean] whether at end of tokens
173
+ def eot?
174
+ @pos >= self.size-1
175
+ end
176
+
177
+ # Scans the tokens until a token of +type+ is found. Returns tokens
178
+ # upto and including the matched token.
179
+
180
+ # Reads the tokens until a token of +type+ is found. Return tokens
181
+ # upto and including the matched token, also advances pointer.
182
+ #
183
+ # @see #scan
184
+ #
185
+ # @param type [Symbol]
186
+ # @return [Tokens]
187
+ #
188
+ def scan_until(type)
189
+ @prev_pos = @pos
190
+ r = Tokens.new
191
+ until pointing_at?(type) || self.eot?
192
+ r << scan
193
+ end
194
+ r << scan
195
+ r
196
+ end
197
+
198
+ # Reads the tokens until a token of +type+ is found. Returns tokens
199
+ # upto and including the matched token, but does not advance the
200
+ # pointer.
201
+ #
202
+ # @see #check
203
+ #
204
+ # @param type [Symbol]
205
+ # @return [Tokens]
206
+ #
207
+ def check_until(type)
208
+ r = Tokens.new
209
+ a = 0
210
+ until pointing_at?(type) || self.eot?
211
+ r << scan
212
+ a += 1
213
+ end
214
+ r << scan
215
+ @pos -= a + 1
216
+ r
217
+ end
218
+
219
+ # Advances the pointer until token of +type+ is found.
220
+ #
221
+ # @param type [Symbol]
222
+ # @return [Integer]
223
+ # Number of tokens advanced, including match
224
+ #
225
+ def skip_until(type)
226
+ @prev_pos = @pos
227
+ r = 0
228
+ until pointing_at?(type) || self.eot?
229
+ inc
230
+ r += 1
231
+ end
232
+ inc
233
+ r += 1
234
+ r
235
+ end
236
+
237
+ # @return [Tokens]
238
+ # All tokens after the current token.
239
+ #
240
+ def rest
241
+ self[pos..-1]
242
+ end
243
+
244
+ # Set the scan pointer to the end of the tokens.
245
+ #
246
+ def clear
247
+ @pos = self.size-1
248
+ end
249
+
250
+ # Sets the pointer to the previous remembered position. Only one
251
+ # previous position is remembered, which is updated every scan or
252
+ # skip.
253
+ #
254
+ def unscan
255
+ if @prev_pos
256
+ @pos = @prev_pos
257
+ @prev_pos = nil
258
+ end
259
+ end
260
+ alias_method :unskip, :unscan
261
+
262
+ # @endgroup
263
+
13
264
  # @group Enumeration
14
265
 
15
266
  alias_method :_each, :each
267
+
16
268
  # Loops through the types and contents of each tag separately, passing them
17
269
  # to the block given.
18
270
  #
19
- # @return [Ast::Tokens] returns self
20
271
  # @yield [Symbol, Object] gives the type and content of each block in turn
21
272
  #
22
273
  # @example
@@ -39,16 +290,48 @@ module Ast
39
290
  self
40
291
  end
41
292
 
42
- # Evalute block given for the type of each token
293
+ # Loops through the types of each tag, passing them to block given.
294
+ #
295
+ # @yield [Symbol]
43
296
  # @see #each
297
+ #
298
+ # @example
299
+ #
300
+ # tokens = Ast::Tokens.new
301
+ # tokens << [:a, 1] << [:b, 2] << [:c, 3] << [:d, 4]
302
+ #
303
+ # sa.each_type do |t|
304
+ # puts t
305
+ # end
306
+ # #=> a
307
+ # #=> b
308
+ # #=> c
309
+ # #=> d
310
+ #
44
311
  def each_type(&blck)
45
312
  self._each do |i|
46
313
  yield(i.type)
47
314
  end
48
315
  end
49
316
 
50
- # Evaluate block given for the value of each token
51
- # @see each
317
+ # Loops through the values of each tag, passing them to block given.
318
+ #
319
+ # @yield [Object]
320
+ # @see #each
321
+ #
322
+ # @example
323
+ #
324
+ # tokens = Ast::Tokens.new
325
+ # tokens << [:a, 1] << [:b, 2] << [:c, 3] << [:d, 4]
326
+ #
327
+ # sa.each_value do |v|
328
+ # puts v
329
+ # end
330
+ # #=> 1
331
+ # #=> 2
332
+ # #=> 3
333
+ # #=> 4
334
+ #
52
335
  def each_value(&blck)
53
336
  self._each do |i|
54
337
  yield(i.value)
@@ -57,6 +340,25 @@ module Ast
57
340
 
58
341
  # Evaluate block given for each token instance
59
342
  # @see each
343
+
344
+ # Loops through the tokens, passing them to block given.
345
+ #
346
+ # @yield [Token]
347
+ # @see #each
348
+ #
349
+ # @example
350
+ #
351
+ # tokens = Ast::Tokens.new
352
+ # tokens << [:a, 1] << [:b, 2] << [:c, 3] << [:d, 4]
353
+ #
354
+ # sa.each_token do |token|
355
+ # puts token
356
+ # end
357
+ # #=> <:a, 1>
358
+ # #=> <:b, 2>
359
+ # #=> <:c, 3>
360
+ # #=> <:d, 4>
361
+ #
60
362
  def each_token(&blck)
61
363
  self._each do |i|
62
364
  yield(i)