syntax 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,108 @@
1
+ require 'syntax'
2
+
3
+ module Syntax
4
+
5
+ # A simple implementation of an XML lexer. It handles most cases. It is
6
+ # not a validating lexer, meaning it will happily process invalid XML without
7
+ # complaining.
8
+ class XML < Tokenizer
9
+
10
+ # Initialize the lexer.
11
+ def setup
12
+ @in_tag = false
13
+ end
14
+
15
+ # Step through a single iteration of the tokenization process. This will
16
+ # yield (potentially) many tokens, and possibly zero tokens.
17
+ def step
18
+ start_group :normal, matched if scan( /\s+/ )
19
+ if @in_tag
20
+ case
21
+ when scan( /([-\w]+):([-\w]+)/ )
22
+ start_group :namespace, subgroup(1)
23
+ start_group :punct, ":"
24
+ start_group :attribute, subgroup(2)
25
+ when scan( /\d+/ )
26
+ start_group :number, matched
27
+ when scan( /[-\w]+/ )
28
+ start_group :attribute, matched
29
+ when scan( %r{[/?]?>} )
30
+ @in_tag = false
31
+ start_group :punct, matched
32
+ when scan( /=/ )
33
+ start_group :punct, matched
34
+ when scan( /["']/ )
35
+ scan_string matched
36
+ else
37
+ append getch
38
+ end
39
+ elsif ( text = scan_until( /(?=[<&])/ ) )
40
+ start_group :normal, text unless text.empty?
41
+ if scan(/<!--.*?(-->|\Z)/m)
42
+ start_group :comment, matched
43
+ else
44
+ case peek(1)
45
+ when "<"
46
+ start_group :punct, getch
47
+ case peek(1)
48
+ when "?"
49
+ append getch
50
+ when "/"
51
+ append getch
52
+ when "!"
53
+ append getch
54
+ end
55
+ start_group :normal, matched if scan( /\s+/ )
56
+ if scan( /([-\w]+):([-\w]+)/ )
57
+ start_group :namespace, subgroup(1)
58
+ start_group :punct, ":"
59
+ start_group :tag, subgroup(2)
60
+ elsif scan( /[-\w]+/ )
61
+ start_group :tag, matched
62
+ end
63
+ @in_tag = true
64
+ when "&"
65
+ if scan( /&\S{1,10};/ )
66
+ start_group :entity, matched
67
+ else
68
+ start_group :normal, scan( /&/ )
69
+ end
70
+ end
71
+ end
72
+ else
73
+ append scan_until( /\Z/ )
74
+ end
75
+ end
76
+
77
+ private
78
+
79
+ # Scan the string starting at the current position, with the given
80
+ # delimiter character.
81
+ def scan_string( delim )
82
+ start_group :punct, delim
83
+ match = /(?=[&\\]|#{delim})/
84
+ loop do
85
+ break unless ( text = scan_until( match ) )
86
+ start_group :string, text unless text.empty?
87
+ case peek(1)
88
+ when "&"
89
+ if scan( /&\S{1,10};/ )
90
+ start_group :entity, matched
91
+ else
92
+ start_group :string, getch
93
+ end
94
+ when "\\"
95
+ start_group :string, getch
96
+ append getch || ""
97
+ when delim
98
+ start_group :punct, getch
99
+ break
100
+ end
101
+ end
102
+ end
103
+
104
+ end
105
+
106
+ SYNTAX["xml"] = XML
107
+
108
+ end
@@ -0,0 +1,105 @@
1
+ require 'syntax'
2
+
3
+ module Syntax
4
+
5
+ # A simple implementation of an YAML lexer. It handles most cases. It is
6
+ # not a validating lexer.
7
+ class YAML < Tokenizer
8
+
9
+ # Step through a single iteration of the tokenization process. This will
10
+ # yield (potentially) many tokens, and possibly zero tokens.
11
+ def step
12
+ if bol?
13
+ case
14
+ when scan(/---(\s*.+)?$/)
15
+ start_group :document, matched
16
+ when scan(/(\s*)([a-zA-Z][-\w]*)(\s*):/)
17
+ start_group :normal, subgroup(1)
18
+ start_group :key, subgroup(2)
19
+ start_group :normal, subgroup(3)
20
+ start_group :punct, ":"
21
+ when scan(/(\s*)-/)
22
+ start_group :normal, subgroup(1)
23
+ start_group :punct, "-"
24
+ when scan(/\s*$/)
25
+ start_group :normal, matched
26
+ when scan(/#.*$/)
27
+ start_group :comment, matched
28
+ else
29
+ append getch
30
+ end
31
+ else
32
+ case
33
+ when scan(/[\n\r]+/)
34
+ start_group :normal, matched
35
+ when scan(/[ \t]+/)
36
+ start_group :normal, matched
37
+ when scan(/!+(.*?^)?\S+/)
38
+ start_group :type, matched
39
+ when scan(/&\S+/)
40
+ start_group :anchor, matched
41
+ when scan(/\*\S+/)
42
+ start_group :ref, matched
43
+ when scan(/\d\d:\d\d:\d\d/)
44
+ start_group :time, matched
45
+ when scan(/\d\d\d\d-\d\d-\d\d\s\d\d:\d\d:\d\d(\.\d+)? [-+]\d\d:\d\d/)
46
+ start_group :date, matched
47
+ when scan(/['"]/)
48
+ start_group :punct, matched
49
+ scan_string matched
50
+ when scan(/:\w+/)
51
+ start_group :symbol, matched
52
+ when scan(/[:]/)
53
+ start_group :punct, matched
54
+ when scan(/#.*$/)
55
+ start_group :comment, matched
56
+ when scan(/>-?/)
57
+ start_group :punct, matched
58
+ start_group :normal, scan(/.*$/)
59
+ append getch until eos? || bol?
60
+ return if eos?
61
+ indent = check(/ */)
62
+ start_group :string
63
+ loop do
64
+ line = check_until(/[\n\r]|\Z/)
65
+ break if line.nil?
66
+ if line.chomp.length > 0
67
+ this_indent = line.chomp.match( /^\s*/ )[0]
68
+ break if this_indent.length < indent.length
69
+ end
70
+ append scan_until(/[\n\r]|\Z/)
71
+ end
72
+ else
73
+ start_group :normal, scan_until(/(?=$|#)/)
74
+ end
75
+ end
76
+ end
77
+
78
+ private
79
+
80
+ def scan_string( delim )
81
+ regex = /(?=[#{delim=="'" ? "" : "\\\\"}#{delim}])/
82
+ loop do
83
+ text = scan_until( regex )
84
+ if text.nil?
85
+ start_group :string, scan_until( /\Z/ )
86
+ break
87
+ else
88
+ start_group :string, text unless text.empty?
89
+ end
90
+
91
+ case peek(1)
92
+ when "\\"
93
+ start_group :expr, scan(/../)
94
+ else
95
+ start_group :punct, getch
96
+ break
97
+ end
98
+ end
99
+ end
100
+
101
+ end
102
+
103
+ SYNTAX["yaml"] = YAML
104
+
105
+ end
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ruby
2
+ $:.unshift "../lib"
3
+
4
+ Dir.chdir File.dirname(__FILE__)
5
+ Dir["**/tc_*.rb"].each { |file| load file }
@@ -0,0 +1,518 @@
1
+ $:.unshift "../../lib"
2
+
3
+ require 'test/unit'
4
+ require 'syntax/ruby'
5
+
6
+ class TC_Syntax_Ruby < Test::Unit::TestCase
7
+
8
+ def setup
9
+ @ruby = Syntax::Ruby.new
10
+ end
11
+
12
+ def test_empty
13
+ called = false
14
+ @ruby.tokenize( "" ) { |tok| called = true }
15
+ assert !called
16
+ end
17
+
18
+ def test_constant
19
+ called = false
20
+ @ruby.tokenize( "Foo" ) do |tok|
21
+ called = true
22
+ assert_equal :constant, tok.group
23
+ assert_equal "Foo", tok
24
+ end
25
+ assert called
26
+ end
27
+
28
+ def test_ident
29
+ called = false
30
+ @ruby.tokenize( "foo" ) do |tok|
31
+ called = true
32
+ assert_equal :ident, tok.group
33
+ assert_equal "foo", tok
34
+ end
35
+ assert called
36
+ end
37
+
38
+ def test_comment_eol
39
+ called = false
40
+ @ruby.tokenize( "# a comment" ) do |tok|
41
+ called = true
42
+ assert_equal :comment, tok.group
43
+ assert_equal "# a comment", tok
44
+ end
45
+ assert called
46
+ end
47
+
48
+ def test_comment_block
49
+ called = false
50
+ @ruby.tokenize( "=begin\nthis is a comment\n=end" ) do |tok|
51
+ called = true
52
+ assert_equal :comment, tok.group
53
+ assert_equal "=begin\nthis is a comment\n=end", tok
54
+ end
55
+ assert called
56
+ end
57
+
58
+ def test_keyword
59
+ Syntax::Ruby::KEYWORDS.each do |word|
60
+ tok = []
61
+ @ruby.tokenize( word ) { |t| tok << t }
62
+ assert_equal [ :keyword, word ], [ tok.first.group, tok.first ]
63
+ end
64
+ Syntax::Ruby::KEYWORDS.each do |word|
65
+ tok = []
66
+ @ruby.tokenize( "foo.#{word}" ) { |t| tok << t }
67
+ tok.shift
68
+ tok.shift
69
+ assert_equal [ :ident, word ], [ tok.first.group, tok.first ]
70
+ end
71
+ end
72
+
73
+ def test__END__
74
+ called = false
75
+ @ruby.tokenize( "__END__\n\nblah blah blah" ) do |tok|
76
+ called = true
77
+ assert_equal :comment, tok.group
78
+ assert_equal "__END__\n\nblah blah blah", tok
79
+ end
80
+ assert called
81
+ end
82
+
83
+ def test_def_paren
84
+ tok = nil
85
+ @ruby.start( "def foo(bar)" ) { |t| tok = t }
86
+ @ruby.step
87
+ assert_equal "def ", tok
88
+ assert_equal :keyword, tok.group
89
+ @ruby.step
90
+ assert_equal "foo", tok
91
+ assert_equal :method, tok.group
92
+ end
93
+
94
+ def test_def_space
95
+ tok = nil
96
+ @ruby.start( "def foo bar" ) { |t| tok = t }
97
+ @ruby.step
98
+ assert_equal "def ", tok
99
+ assert_equal :keyword, tok.group
100
+ @ruby.step
101
+ assert_equal "foo", tok
102
+ assert_equal :method, tok.group
103
+ end
104
+
105
+ def test_def_semicolon
106
+ tok = nil
107
+ @ruby.start( "def foo;" ) { |t| tok = t }
108
+ @ruby.step
109
+ assert_equal "def ", tok
110
+ assert_equal :keyword, tok.group
111
+ @ruby.step
112
+ assert_equal "foo", tok
113
+ assert_equal :method, tok.group
114
+ end
115
+
116
+ def test_class_space
117
+ tok = nil
118
+ @ruby.start( "class Foo\n" ) { |t| tok = t }
119
+ @ruby.step
120
+ assert_equal "class ", tok
121
+ assert_equal :keyword, tok.group
122
+ @ruby.step
123
+ assert_equal "Foo", tok
124
+ assert_equal :class, tok.group
125
+ end
126
+
127
+ def test_class_semicolon
128
+ tok = nil
129
+ @ruby.start( "class Foo;" ) { |t| tok = t }
130
+ @ruby.step
131
+ assert_equal "class ", tok
132
+ assert_equal :keyword, tok.group
133
+ @ruby.step
134
+ assert_equal "Foo", tok
135
+ assert_equal :class, tok.group
136
+ end
137
+
138
+ def test_class_extend
139
+ tok = nil
140
+ @ruby.start( "class Foo< Bang" ) { |t| tok = t }
141
+ @ruby.step
142
+ assert_equal "class ", tok
143
+ assert_equal :keyword, tok.group
144
+ @ruby.step
145
+ assert_equal "Foo", tok
146
+ assert_equal :class, tok.group
147
+ end
148
+
149
+ def test_module_space
150
+ tok = nil
151
+ @ruby.start( "module Foo\n" ) { |t| tok = t }
152
+ @ruby.step
153
+ assert_equal "module ", tok
154
+ assert_equal :keyword, tok.group
155
+ @ruby.step
156
+ assert_equal "Foo", tok
157
+ assert_equal :module, tok.group
158
+ end
159
+
160
+ def test_module_semicolon
161
+ tok = nil
162
+ @ruby.start( "module Foo;" ) { |t| tok = t }
163
+ @ruby.step
164
+ assert_equal "module ", tok
165
+ assert_equal :keyword, tok.group
166
+ @ruby.step
167
+ assert_equal "Foo", tok
168
+ assert_equal :module, tok.group
169
+ end
170
+
171
+ def test_module_other
172
+ tok = nil
173
+ @ruby.start( "module Foo!\n" ) { |t| tok = t }
174
+ @ruby.step
175
+ assert_equal "module ", tok
176
+ assert_equal :keyword, tok.group
177
+ @ruby.step
178
+ assert_equal "Foo!", tok
179
+ assert_equal :module, tok.group
180
+ end
181
+
182
+ def test_scope_operator
183
+ tok = []
184
+ @ruby.tokenize( "Foo::Bar" ) { |t| tok << t }
185
+ assert_equal "Foo", tok.shift
186
+ assert_equal "::", tok.first
187
+ assert_equal :punct, tok.shift.group
188
+ assert_equal "Bar", tok.first
189
+ end
190
+
191
+ def test_symbol_dquote
192
+ tok = []
193
+ @ruby.tokenize( ':"foo"' ) { |t| tok << t }
194
+ assert_equal ':"foo"', tok.first
195
+ assert_equal :symbol, tok.first.group
196
+ end
197
+
198
+ def test_symbol_squote
199
+ tok = []
200
+ @ruby.tokenize( ":'foo'" ) { |t| tok << t }
201
+ assert_equal ":'foo'", tok.first
202
+ assert_equal :symbol, tok.first.group
203
+ end
204
+
205
+ def test_symbol
206
+ tok = []
207
+ @ruby.tokenize( ":foo_bar?" ) { |t| tok << t }
208
+ assert_equal ":foo_bar?", tok.first
209
+ assert_equal :symbol, tok.first.group
210
+ end
211
+
212
+ def test_char
213
+ tok = []
214
+ @ruby.tokenize( "?." ) { |t| tok << t }
215
+ assert_equal "?.", tok.first
216
+ assert_equal :char, tok.first.group
217
+
218
+ tok.clear
219
+ @ruby.tokenize( '?\n' ) { |t| tok << t }
220
+ assert_equal '?\n', tok.first
221
+ assert_equal :char, tok.first.group
222
+ end
223
+
224
+ def test_specials
225
+ %w{__FILE__ __LINE__ true false nil self}.each do |word|
226
+ tok = []
227
+ @ruby.tokenize( word ) { |t| tok << t }
228
+ assert_equal word, tok.first
229
+ assert_equal :constant, tok.first.group
230
+ end
231
+
232
+ %w{__FILE__ __LINE__ true false nil self}.each do |word|
233
+ tok = []
234
+ @ruby.tokenize( "#{word}?" ) { |t| tok << t }
235
+ assert_equal "#{word}?", tok.first
236
+ assert_equal :ident, tok.first.group
237
+ end
238
+
239
+ %w{__FILE__ __LINE__ true false nil self}.each do |word|
240
+ tok = []
241
+ @ruby.tokenize( "#{word}!" ) { |t| tok << t }
242
+ assert_equal "#{word}!", tok.first
243
+ assert_equal :ident, tok.first.group
244
+ end
245
+
246
+ %w{__FILE__ __LINE__ true false nil self}.each do |word|
247
+ tok = []
248
+ @ruby.tokenize( "x.#{word}" ) { |t| tok << t }
249
+ tok.shift
250
+ tok.shift
251
+ assert_equal word, tok.first
252
+ assert_equal :ident, tok.first.group
253
+ end
254
+ end
255
+
256
+ def test_pct_r
257
+ tok = []
258
+ @ruby.tokenize( '%r{foo#{x}bar}' ) { |t| tok << t }
259
+ assert_equal [ :punct, "%r{" ], [ tok.first.group, tok.shift ]
260
+ assert_equal [ :regex, "foo" ], [ tok.first.group, tok.shift ]
261
+ assert_equal [ :expr, '#{x}' ], [ tok.first.group, tok.shift ]
262
+ assert_equal [ :regex, "bar" ], [ tok.first.group, tok.shift ]
263
+ assert_equal [ :punct, "}" ], [ tok.first.group, tok.shift ]
264
+
265
+ tok = []
266
+ @ruby.tokenize( '%r-foo#{x}bar-' ) { |t| tok << t }
267
+ assert_equal [ :punct, "%r-" ], [ tok.first.group, tok.shift ]
268
+ assert_equal [ :regex, "foo" ], [ tok.first.group, tok.shift ]
269
+ assert_equal [ :expr, '#{x}' ], [ tok.first.group, tok.shift ]
270
+ assert_equal [ :regex, "bar" ], [ tok.first.group, tok.shift ]
271
+ assert_equal [ :punct, "-" ], [ tok.first.group, tok.shift ]
272
+ end
273
+
274
+ def test_pct_w
275
+ tok = []
276
+ @ruby.tokenize( '%w-foo#{x} bar baz-' ) { |t| tok << t }
277
+ assert_equal [ :punct, "%w-" ], [ tok.first.group, tok.shift ]
278
+ assert_equal [ :string, 'foo#{x} bar baz' ], [ tok.first.group, tok.shift ]
279
+ assert_equal [ :punct, "-" ], [ tok.first.group, tok.shift ]
280
+ end
281
+
282
+ def test_pct_q
283
+ tok = []
284
+ @ruby.tokenize( '%q-hello #{world}-' ) { |t| tok << t }
285
+ assert_equal [ :punct, "%q-" ], [ tok.first.group, tok.shift ]
286
+ assert_equal [ :string, 'hello #{world}' ], [ tok.first.group, tok.shift ]
287
+ assert_equal [ :punct, "-" ], [ tok.first.group, tok.shift ]
288
+ end
289
+
290
+ def test_pct_s
291
+ tok = []
292
+ @ruby.tokenize( '%s-hello #{world}-' ) { |t| tok << t }
293
+ assert_equal [ :punct, "%s-" ], [ tok.first.group, tok.shift ]
294
+ assert_equal [ :symbol, 'hello #{world}' ], [ tok.first.group, tok.shift ]
295
+ assert_equal [ :punct, "-" ], [ tok.first.group, tok.shift ]
296
+ end
297
+
298
+ def test_pct_W
299
+ tok = []
300
+ @ruby.tokenize( '%W-foo#{x} bar baz-' ) { |t| tok << t }
301
+ assert_equal [ :punct, "%W-" ], [ tok.first.group, tok.shift ]
302
+ assert_equal [ :string, 'foo' ], [ tok.first.group, tok.shift ]
303
+ assert_equal [ :expr, '#{x}' ], [ tok.first.group, tok.shift ]
304
+ assert_equal [ :string, ' bar baz' ], [ tok.first.group, tok.shift ]
305
+ assert_equal [ :punct, "-" ], [ tok.first.group, tok.shift ]
306
+ end
307
+
308
+ def test_pct_Q
309
+ tok = []
310
+ @ruby.tokenize( '%Q-hello #{world}-' ) { |t| tok << t }
311
+ assert_equal [ :punct, "%Q-" ], [ tok.first.group, tok.shift ]
312
+ assert_equal [ :string, 'hello ' ], [ tok.first.group, tok.shift ]
313
+ assert_equal [ :expr, '#{world}' ], [ tok.first.group, tok.shift ]
314
+ assert_equal [ :punct, "-" ], [ tok.first.group, tok.shift ]
315
+ end
316
+
317
+ def test_pct_x
318
+ tok = []
319
+ @ruby.tokenize( '%x-ls /blah/#{foo}-' ) { |t| tok << t }
320
+ assert_equal [ :punct, "%x-" ], [ tok.first.group, tok.shift ]
321
+ assert_equal [ :string, 'ls /blah/' ], [ tok.first.group, tok.shift ]
322
+ assert_equal [ :expr, '#{foo}' ], [ tok.first.group, tok.shift ]
323
+ assert_equal [ :punct, "-" ], [ tok.first.group, tok.shift ]
324
+ end
325
+
326
+ def test_pct_string
327
+ tok = []
328
+ @ruby.tokenize( '%-hello #{world}-' ) { |t| tok << t }
329
+ assert_equal [ :punct, "%-" ], [ tok.first.group, tok.shift ]
330
+ assert_equal [ :string, 'hello ' ], [ tok.first.group, tok.shift ]
331
+ assert_equal [ :expr, '#{world}' ], [ tok.first.group, tok.shift ]
332
+ assert_equal [ :punct, "-" ], [ tok.first.group, tok.shift ]
333
+ end
334
+
335
+ def test_bad_pct_string
336
+ tok = []
337
+ @ruby.tokenize( '%0hello #{world}0' ) { |t| tok << t }
338
+ assert_equal [ :punct, "%" ], [ tok.first.group, tok.shift ]
339
+ assert_equal [ :number, '0' ], [ tok.first.group, tok.shift ]
340
+ assert_equal [ :ident, 'hello' ], [ tok.first.group, tok.shift ]
341
+ assert_equal [ :normal, ' ' ], [ tok.first.group, tok.shift ]
342
+ assert_equal [ :comment, '#{world}0' ], [ tok.first.group, tok.shift ]
343
+ end
344
+
345
+ def test_shift_left
346
+ tok = []
347
+ @ruby.tokenize( 'foo << 5' ) { |t| tok << t }
348
+ assert_equal [ :ident, "foo" ], [ tok.first.group, tok.shift ]
349
+ assert_equal [ :normal, " " ], [ tok.first.group, tok.shift ]
350
+ assert_equal [ :punct, "<<" ], [ tok.first.group, tok.shift ]
351
+ assert_equal [ :normal, " " ], [ tok.first.group, tok.shift ]
352
+ assert_equal [ :number, "5" ], [ tok.first.group, tok.shift ]
353
+ end
354
+
355
+ def test_here_doc_no_opts
356
+ tok = []
357
+ @ruby.tokenize( "foo <<EOF\n foo\n bar\n baz\nEOF" ) { |t| tok << t }
358
+ assert_equal [ :ident, "foo" ], [ tok.first.group, tok.shift ]
359
+ assert_equal [ :normal, " " ], [ tok.first.group, tok.shift ]
360
+ assert_equal [ :punct, "<<" ], [ tok.first.group, tok.shift ]
361
+ assert_equal [ :constant, "EOF" ], [ tok.first.group, tok.shift ]
362
+ assert_equal [ :string, "\n foo\n bar\n baz\n" ], [ tok.first.group, tok.shift ]
363
+ assert_equal [ :constant, "EOF" ], [ tok.first.group, tok.shift ]
364
+ end
365
+
366
+ def test_here_doc_no_opts_missing_end
367
+ tok = []
368
+ @ruby.tokenize( "foo <<EOF\n foo\n bar\n baz\n EOF" ) { |t| tok << t }
369
+ assert_equal [ :ident, "foo" ], [ tok.first.group, tok.shift ]
370
+ assert_equal [ :normal, " " ], [ tok.first.group, tok.shift ]
371
+ assert_equal [ :punct, "<<" ], [ tok.first.group, tok.shift ]
372
+ assert_equal [ :constant, "EOF" ], [ tok.first.group, tok.shift ]
373
+ assert_equal [ :string, "\n foo\n bar\n baz\n EOF" ], [ tok.first.group, tok.shift ]
374
+ end
375
+
376
+ def test_here_doc_float_right
377
+ tok = []
378
+ @ruby.tokenize( "foo <<-EOF\n foo\n bar\n baz\n EOF" ) { |t| tok << t }
379
+ assert_equal [ :ident, "foo" ], [ tok.first.group, tok.shift ]
380
+ assert_equal [ :normal, " " ], [ tok.first.group, tok.shift ]
381
+ assert_equal [ :punct, "<<-" ], [ tok.first.group, tok.shift ]
382
+ assert_equal [ :constant, "EOF" ], [ tok.first.group, tok.shift ]
383
+ assert_equal [ :string, "\n foo\n bar\n baz\n" ], [ tok.first.group, tok.shift ]
384
+ assert_equal [ :constant, " EOF" ], [ tok.first.group, tok.shift ]
385
+ end
386
+
387
+ def test_here_doc_single_quotes
388
+ tok = []
389
+ @ruby.tokenize( "foo <<'EOF'\n foo\#{x}\n bar\n baz\nEOF" ) { |t| tok << t }
390
+ assert_equal [ :ident, "foo" ], [ tok.first.group, tok.shift ]
391
+ assert_equal [ :normal, " " ], [ tok.first.group, tok.shift ]
392
+ assert_equal [ :punct, "<<'" ], [ tok.first.group, tok.shift ]
393
+ assert_equal [ :constant, "EOF" ], [ tok.first.group, tok.shift ]
394
+ assert_equal [ :punct, "'" ], [ tok.first.group, tok.shift ]
395
+ assert_equal [ :string, "\n foo\#{x}\n bar\n baz\n" ], [ tok.first.group, tok.shift ]
396
+ assert_equal [ :constant, "EOF" ], [ tok.first.group, tok.shift ]
397
+ end
398
+
399
+ def test_here_doc_double_quotes
400
+ tok = []
401
+ @ruby.tokenize( "foo <<\"EOF\"\n foo\#{x}\n bar\n baz\nEOF" ) { |t| tok << t }
402
+ assert_equal [ :ident, "foo" ], [ tok.first.group, tok.shift ]
403
+ assert_equal [ :normal, " " ], [ tok.first.group, tok.shift ]
404
+ assert_equal [ :punct, "<<\"" ], [ tok.first.group, tok.shift ]
405
+ assert_equal [ :constant, "EOF" ], [ tok.first.group, tok.shift ]
406
+ assert_equal [ :punct, "\"" ], [ tok.first.group, tok.shift ]
407
+ assert_equal [ :string, "\n foo" ], [ tok.first.group, tok.shift ]
408
+ assert_equal [ :expr, '#{x}' ], [ tok.first.group, tok.shift ]
409
+ assert_equal [ :string, "\n bar\n baz\n" ], [ tok.first.group, tok.shift ]
410
+ assert_equal [ :constant, "EOF" ], [ tok.first.group, tok.shift ]
411
+ end
412
+
413
+ def test_space
414
+ tok = []
415
+ @ruby.tokenize( "\n \t\t\n\n\r\n" ) { |t| tok << t }
416
+ assert_equal [ :normal, "\n \t\t\n\n\r\n" ], [ tok.first.group, tok.shift ]
417
+ end
418
+
419
+ def test_number
420
+ tok = []
421
+ @ruby.tokenize( "1 1.0 1e5 1.0e5 1_2.5 1_2.5_2 1_2.5_2e3_2" ) { |t| tok << t }
422
+ assert_equal [ :number, "1" ], [ tok.first.group, tok.shift ]
423
+ tok.shift
424
+ assert_equal [ :number, "1.0" ], [ tok.first.group, tok.shift ]
425
+ tok.shift
426
+ assert_equal [ :number, "1e5" ], [ tok.first.group, tok.shift ]
427
+ tok.shift
428
+ assert_equal [ :number, "1.0e5" ], [ tok.first.group, tok.shift ]
429
+ tok.shift
430
+ assert_equal [ :number, "1_2.5" ], [ tok.first.group, tok.shift ]
431
+ tok.shift
432
+ assert_equal [ :number, "1_2.5_2" ], [ tok.first.group, tok.shift ]
433
+ tok.shift
434
+ assert_equal [ :number, "1_2.5_2e3_2" ], [ tok.first.group, tok.shift ]
435
+ end
436
+
437
+ def test_dquoted_string
438
+ tok = []
439
+ @ruby.tokenize( '"foo #{x} bar\"\n\tbaz\xA5b\5\1234"' ) { |t| tok << t }
440
+ assert_equal [ :punct, '"' ], [ tok.first.group, tok.shift ]
441
+ assert_equal [ :string, 'foo ' ], [ tok.first.group, tok.shift ]
442
+ assert_equal [ :expr, '#{x}' ], [ tok.first.group, tok.shift ]
443
+ assert_equal [ :string, ' bar' ], [ tok.first.group, tok.shift ]
444
+ assert_equal [ :expr, '\"\n\t' ], [ tok.first.group, tok.shift ]
445
+ assert_equal [ :string, 'baz' ], [ tok.first.group, tok.shift ]
446
+ assert_equal [ :expr, '\xA5' ], [ tok.first.group, tok.shift ]
447
+ assert_equal [ :string, 'b' ], [ tok.first.group, tok.shift ]
448
+ assert_equal [ :expr, '\5\123' ], [ tok.first.group, tok.shift ]
449
+ assert_equal [ :string, '4' ], [ tok.first.group, tok.shift ]
450
+ assert_equal [ :punct, '"' ], [ tok.first.group, tok.shift ]
451
+ end
452
+
453
+ def test_squoted_string
454
+ tok = []
455
+ @ruby.tokenize( '\'foo #{x} bar\\\'\n\tbaz\\\\\xA5b\5\1234\'' ) { |t| tok << t }
456
+ assert_equal [ :punct, "'" ], [ tok.first.group, tok.shift ]
457
+ assert_equal [ :string, 'foo #{x} bar' ], [ tok.first.group, tok.shift ]
458
+ assert_equal [ :expr, '\\\'' ], [ tok.first.group, tok.shift ]
459
+ assert_equal [ :string, '\n\tbaz' ], [ tok.first.group, tok.shift ]
460
+ assert_equal [ :expr, '\\\\' ], [ tok.first.group, tok.shift ]
461
+ assert_equal [ :string, '\xA5b\5\1234' ], [ tok.first.group, tok.shift ]
462
+ assert_equal [ :punct, "'" ], [ tok.first.group, tok.shift ]
463
+ end
464
+
465
+ def test_dot_selector
466
+ tok = []
467
+ @ruby.tokenize( 'foo.nil' ) { |t| tok << t }
468
+ tok.shift
469
+ assert_equal [ :punct, "." ], [ tok.first.group, tok.shift ]
470
+ assert_equal [ :ident, "nil" ], [ tok.first.group, tok.shift ]
471
+ end
472
+
473
+ def test_dot_range_inclusive
474
+ tok = []
475
+ @ruby.tokenize( 'foo..nil' ) { |t| tok << t }
476
+ tok.shift
477
+ assert_equal [ :punct, ".." ], [ tok.first.group, tok.shift ]
478
+ assert_equal [ :constant, "nil" ], [ tok.first.group, tok.shift ]
479
+ end
480
+
481
+ def test_dot_range_exclusive
482
+ tok = []
483
+ @ruby.tokenize( 'foo...nil' ) { |t| tok << t }
484
+ tok.shift
485
+ assert_equal [ :punct, "..." ], [ tok.first.group, tok.shift ]
486
+ assert_equal [ :constant, "nil" ], [ tok.first.group, tok.shift ]
487
+ end
488
+
489
+ def test_dot_range_many
490
+ tok = []
491
+ @ruby.tokenize( 'foo.....nil' ) { |t| tok << t }
492
+ tok.shift
493
+ assert_equal [ :punct, "....." ], [ tok.first.group, tok.shift ]
494
+ assert_equal [ :constant, "nil" ], [ tok.first.group, tok.shift ]
495
+ end
496
+
497
+ def test_attribute
498
+ tok = []
499
+ @ruby.tokenize( '@var_foo' ) { |t| tok << t }
500
+ assert_equal [ :attribute, "@var_foo" ], [ tok.first.group, tok.shift ]
501
+ end
502
+
503
+ def test_global
504
+ tok = []
505
+ @ruby.tokenize( '$var_foo' ) { |t| tok << t }
506
+ assert_equal [ :global, "$var_foo" ], [ tok.first.group, tok.shift ]
507
+ tok = []
508
+ @ruby.tokenize( '$12' ) { |t| tok << t }
509
+ assert_equal [ :global, "$12" ], [ tok.first.group, tok.shift ]
510
+ tok = []
511
+ @ruby.tokenize( '$/f' ) { |t| tok << t }
512
+ assert_equal [ :global, "$/" ], [ tok.first.group, tok.shift ]
513
+ tok = []
514
+ @ruby.tokenize( "$\n" ) { |t| tok << t }
515
+ assert_equal [ :global, "$" ], [ tok.first.group, tok.shift ]
516
+ end
517
+
518
+ end