syntax 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,108 @@
1
+ require 'syntax'
2
+
3
+ module Syntax
4
+
5
+ # A simple implementation of an XML lexer. It handles most cases. It is
6
+ # not a validating lexer, meaning it will happily process invalid XML without
7
+ # complaining.
8
+ class XML < Tokenizer
9
+
10
+ # Initialize the lexer.
11
+ def setup
12
+ @in_tag = false
13
+ end
14
+
15
+ # Step through a single iteration of the tokenization process. This will
16
+ # yield (potentially) many tokens, and possibly zero tokens.
17
+ def step
18
+ start_group :normal, matched if scan( /\s+/ )
19
+ if @in_tag
20
+ case
21
+ when scan( /([-\w]+):([-\w]+)/ )
22
+ start_group :namespace, subgroup(1)
23
+ start_group :punct, ":"
24
+ start_group :attribute, subgroup(2)
25
+ when scan( /\d+/ )
26
+ start_group :number, matched
27
+ when scan( /[-\w]+/ )
28
+ start_group :attribute, matched
29
+ when scan( %r{[/?]?>} )
30
+ @in_tag = false
31
+ start_group :punct, matched
32
+ when scan( /=/ )
33
+ start_group :punct, matched
34
+ when scan( /["']/ )
35
+ scan_string matched
36
+ else
37
+ append getch
38
+ end
39
+ elsif ( text = scan_until( /(?=[<&])/ ) )
40
+ start_group :normal, text unless text.empty?
41
+ if scan(/<!--.*?(-->|\Z)/m)
42
+ start_group :comment, matched
43
+ else
44
+ case peek(1)
45
+ when "<"
46
+ start_group :punct, getch
47
+ case peek(1)
48
+ when "?"
49
+ append getch
50
+ when "/"
51
+ append getch
52
+ when "!"
53
+ append getch
54
+ end
55
+ start_group :normal, matched if scan( /\s+/ )
56
+ if scan( /([-\w]+):([-\w]+)/ )
57
+ start_group :namespace, subgroup(1)
58
+ start_group :punct, ":"
59
+ start_group :tag, subgroup(2)
60
+ elsif scan( /[-\w]+/ )
61
+ start_group :tag, matched
62
+ end
63
+ @in_tag = true
64
+ when "&"
65
+ if scan( /&\S{1,10};/ )
66
+ start_group :entity, matched
67
+ else
68
+ start_group :normal, scan( /&/ )
69
+ end
70
+ end
71
+ end
72
+ else
73
+ append scan_until( /\Z/ )
74
+ end
75
+ end
76
+
77
+ private
78
+
79
+ # Scan the string starting at the current position, with the given
80
+ # delimiter character.
81
+ def scan_string( delim )
82
+ start_group :punct, delim
83
+ match = /(?=[&\\]|#{delim})/
84
+ loop do
85
+ break unless ( text = scan_until( match ) )
86
+ start_group :string, text unless text.empty?
87
+ case peek(1)
88
+ when "&"
89
+ if scan( /&\S{1,10};/ )
90
+ start_group :entity, matched
91
+ else
92
+ start_group :string, getch
93
+ end
94
+ when "\\"
95
+ start_group :string, getch
96
+ append getch || ""
97
+ when delim
98
+ start_group :punct, getch
99
+ break
100
+ end
101
+ end
102
+ end
103
+
104
+ end
105
+
106
+ SYNTAX["xml"] = XML
107
+
108
+ end
@@ -0,0 +1,105 @@
1
+ require 'syntax'
2
+
3
+ module Syntax
4
+
5
+ # A simple implementation of an YAML lexer. It handles most cases. It is
6
+ # not a validating lexer.
7
+ class YAML < Tokenizer
8
+
9
+ # Step through a single iteration of the tokenization process. This will
10
+ # yield (potentially) many tokens, and possibly zero tokens.
11
+ def step
12
+ if bol?
13
+ case
14
+ when scan(/---(\s*.+)?$/)
15
+ start_group :document, matched
16
+ when scan(/(\s*)([a-zA-Z][-\w]*)(\s*):/)
17
+ start_group :normal, subgroup(1)
18
+ start_group :key, subgroup(2)
19
+ start_group :normal, subgroup(3)
20
+ start_group :punct, ":"
21
+ when scan(/(\s*)-/)
22
+ start_group :normal, subgroup(1)
23
+ start_group :punct, "-"
24
+ when scan(/\s*$/)
25
+ start_group :normal, matched
26
+ when scan(/#.*$/)
27
+ start_group :comment, matched
28
+ else
29
+ append getch
30
+ end
31
+ else
32
+ case
33
+ when scan(/[\n\r]+/)
34
+ start_group :normal, matched
35
+ when scan(/[ \t]+/)
36
+ start_group :normal, matched
37
+ when scan(/!+(.*?^)?\S+/)
38
+ start_group :type, matched
39
+ when scan(/&\S+/)
40
+ start_group :anchor, matched
41
+ when scan(/\*\S+/)
42
+ start_group :ref, matched
43
+ when scan(/\d\d:\d\d:\d\d/)
44
+ start_group :time, matched
45
+ when scan(/\d\d\d\d-\d\d-\d\d\s\d\d:\d\d:\d\d(\.\d+)? [-+]\d\d:\d\d/)
46
+ start_group :date, matched
47
+ when scan(/['"]/)
48
+ start_group :punct, matched
49
+ scan_string matched
50
+ when scan(/:\w+/)
51
+ start_group :symbol, matched
52
+ when scan(/[:]/)
53
+ start_group :punct, matched
54
+ when scan(/#.*$/)
55
+ start_group :comment, matched
56
+ when scan(/>-?/)
57
+ start_group :punct, matched
58
+ start_group :normal, scan(/.*$/)
59
+ append getch until eos? || bol?
60
+ return if eos?
61
+ indent = check(/ */)
62
+ start_group :string
63
+ loop do
64
+ line = check_until(/[\n\r]|\Z/)
65
+ break if line.nil?
66
+ if line.chomp.length > 0
67
+ this_indent = line.chomp.match( /^\s*/ )[0]
68
+ break if this_indent.length < indent.length
69
+ end
70
+ append scan_until(/[\n\r]|\Z/)
71
+ end
72
+ else
73
+ start_group :normal, scan_until(/(?=$|#)/)
74
+ end
75
+ end
76
+ end
77
+
78
+ private
79
+
80
+ def scan_string( delim )
81
+ regex = /(?=[#{delim=="'" ? "" : "\\\\"}#{delim}])/
82
+ loop do
83
+ text = scan_until( regex )
84
+ if text.nil?
85
+ start_group :string, scan_until( /\Z/ )
86
+ break
87
+ else
88
+ start_group :string, text unless text.empty?
89
+ end
90
+
91
+ case peek(1)
92
+ when "\\"
93
+ start_group :expr, scan(/../)
94
+ else
95
+ start_group :punct, getch
96
+ break
97
+ end
98
+ end
99
+ end
100
+
101
+ end
102
+
103
+ SYNTAX["yaml"] = YAML
104
+
105
+ end
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ruby
2
+ $:.unshift "../lib"
3
+
4
+ Dir.chdir File.dirname(__FILE__)
5
+ Dir["**/tc_*.rb"].each { |file| load file }
@@ -0,0 +1,518 @@
1
+ $:.unshift "../../lib"
2
+
3
+ require 'test/unit'
4
+ require 'syntax/ruby'
5
+
6
+ class TC_Syntax_Ruby < Test::Unit::TestCase
7
+
8
+ def setup
9
+ @ruby = Syntax::Ruby.new
10
+ end
11
+
12
+ def test_empty
13
+ called = false
14
+ @ruby.tokenize( "" ) { |tok| called = true }
15
+ assert !called
16
+ end
17
+
18
+ def test_constant
19
+ called = false
20
+ @ruby.tokenize( "Foo" ) do |tok|
21
+ called = true
22
+ assert_equal :constant, tok.group
23
+ assert_equal "Foo", tok
24
+ end
25
+ assert called
26
+ end
27
+
28
+ def test_ident
29
+ called = false
30
+ @ruby.tokenize( "foo" ) do |tok|
31
+ called = true
32
+ assert_equal :ident, tok.group
33
+ assert_equal "foo", tok
34
+ end
35
+ assert called
36
+ end
37
+
38
+ def test_comment_eol
39
+ called = false
40
+ @ruby.tokenize( "# a comment" ) do |tok|
41
+ called = true
42
+ assert_equal :comment, tok.group
43
+ assert_equal "# a comment", tok
44
+ end
45
+ assert called
46
+ end
47
+
48
+ def test_comment_block
49
+ called = false
50
+ @ruby.tokenize( "=begin\nthis is a comment\n=end" ) do |tok|
51
+ called = true
52
+ assert_equal :comment, tok.group
53
+ assert_equal "=begin\nthis is a comment\n=end", tok
54
+ end
55
+ assert called
56
+ end
57
+
58
+ def test_keyword
59
+ Syntax::Ruby::KEYWORDS.each do |word|
60
+ tok = []
61
+ @ruby.tokenize( word ) { |t| tok << t }
62
+ assert_equal [ :keyword, word ], [ tok.first.group, tok.first ]
63
+ end
64
+ Syntax::Ruby::KEYWORDS.each do |word|
65
+ tok = []
66
+ @ruby.tokenize( "foo.#{word}" ) { |t| tok << t }
67
+ tok.shift
68
+ tok.shift
69
+ assert_equal [ :ident, word ], [ tok.first.group, tok.first ]
70
+ end
71
+ end
72
+
73
+ def test__END__
74
+ called = false
75
+ @ruby.tokenize( "__END__\n\nblah blah blah" ) do |tok|
76
+ called = true
77
+ assert_equal :comment, tok.group
78
+ assert_equal "__END__\n\nblah blah blah", tok
79
+ end
80
+ assert called
81
+ end
82
+
83
+ def test_def_paren
84
+ tok = nil
85
+ @ruby.start( "def foo(bar)" ) { |t| tok = t }
86
+ @ruby.step
87
+ assert_equal "def ", tok
88
+ assert_equal :keyword, tok.group
89
+ @ruby.step
90
+ assert_equal "foo", tok
91
+ assert_equal :method, tok.group
92
+ end
93
+
94
+ def test_def_space
95
+ tok = nil
96
+ @ruby.start( "def foo bar" ) { |t| tok = t }
97
+ @ruby.step
98
+ assert_equal "def ", tok
99
+ assert_equal :keyword, tok.group
100
+ @ruby.step
101
+ assert_equal "foo", tok
102
+ assert_equal :method, tok.group
103
+ end
104
+
105
+ def test_def_semicolon
106
+ tok = nil
107
+ @ruby.start( "def foo;" ) { |t| tok = t }
108
+ @ruby.step
109
+ assert_equal "def ", tok
110
+ assert_equal :keyword, tok.group
111
+ @ruby.step
112
+ assert_equal "foo", tok
113
+ assert_equal :method, tok.group
114
+ end
115
+
116
+ def test_class_space
117
+ tok = nil
118
+ @ruby.start( "class Foo\n" ) { |t| tok = t }
119
+ @ruby.step
120
+ assert_equal "class ", tok
121
+ assert_equal :keyword, tok.group
122
+ @ruby.step
123
+ assert_equal "Foo", tok
124
+ assert_equal :class, tok.group
125
+ end
126
+
127
+ def test_class_semicolon
128
+ tok = nil
129
+ @ruby.start( "class Foo;" ) { |t| tok = t }
130
+ @ruby.step
131
+ assert_equal "class ", tok
132
+ assert_equal :keyword, tok.group
133
+ @ruby.step
134
+ assert_equal "Foo", tok
135
+ assert_equal :class, tok.group
136
+ end
137
+
138
+ def test_class_extend
139
+ tok = nil
140
+ @ruby.start( "class Foo< Bang" ) { |t| tok = t }
141
+ @ruby.step
142
+ assert_equal "class ", tok
143
+ assert_equal :keyword, tok.group
144
+ @ruby.step
145
+ assert_equal "Foo", tok
146
+ assert_equal :class, tok.group
147
+ end
148
+
149
+ def test_module_space
150
+ tok = nil
151
+ @ruby.start( "module Foo\n" ) { |t| tok = t }
152
+ @ruby.step
153
+ assert_equal "module ", tok
154
+ assert_equal :keyword, tok.group
155
+ @ruby.step
156
+ assert_equal "Foo", tok
157
+ assert_equal :module, tok.group
158
+ end
159
+
160
+ def test_module_semicolon
161
+ tok = nil
162
+ @ruby.start( "module Foo;" ) { |t| tok = t }
163
+ @ruby.step
164
+ assert_equal "module ", tok
165
+ assert_equal :keyword, tok.group
166
+ @ruby.step
167
+ assert_equal "Foo", tok
168
+ assert_equal :module, tok.group
169
+ end
170
+
171
+ def test_module_other
172
+ tok = nil
173
+ @ruby.start( "module Foo!\n" ) { |t| tok = t }
174
+ @ruby.step
175
+ assert_equal "module ", tok
176
+ assert_equal :keyword, tok.group
177
+ @ruby.step
178
+ assert_equal "Foo!", tok
179
+ assert_equal :module, tok.group
180
+ end
181
+
182
+ def test_scope_operator
183
+ tok = []
184
+ @ruby.tokenize( "Foo::Bar" ) { |t| tok << t }
185
+ assert_equal "Foo", tok.shift
186
+ assert_equal "::", tok.first
187
+ assert_equal :punct, tok.shift.group
188
+ assert_equal "Bar", tok.first
189
+ end
190
+
191
+ def test_symbol_dquote
192
+ tok = []
193
+ @ruby.tokenize( ':"foo"' ) { |t| tok << t }
194
+ assert_equal ':"foo"', tok.first
195
+ assert_equal :symbol, tok.first.group
196
+ end
197
+
198
+ def test_symbol_squote
199
+ tok = []
200
+ @ruby.tokenize( ":'foo'" ) { |t| tok << t }
201
+ assert_equal ":'foo'", tok.first
202
+ assert_equal :symbol, tok.first.group
203
+ end
204
+
205
+ def test_symbol
206
+ tok = []
207
+ @ruby.tokenize( ":foo_bar?" ) { |t| tok << t }
208
+ assert_equal ":foo_bar?", tok.first
209
+ assert_equal :symbol, tok.first.group
210
+ end
211
+
212
+ def test_char
213
+ tok = []
214
+ @ruby.tokenize( "?." ) { |t| tok << t }
215
+ assert_equal "?.", tok.first
216
+ assert_equal :char, tok.first.group
217
+
218
+ tok.clear
219
+ @ruby.tokenize( '?\n' ) { |t| tok << t }
220
+ assert_equal '?\n', tok.first
221
+ assert_equal :char, tok.first.group
222
+ end
223
+
224
+ def test_specials
225
+ %w{__FILE__ __LINE__ true false nil self}.each do |word|
226
+ tok = []
227
+ @ruby.tokenize( word ) { |t| tok << t }
228
+ assert_equal word, tok.first
229
+ assert_equal :constant, tok.first.group
230
+ end
231
+
232
+ %w{__FILE__ __LINE__ true false nil self}.each do |word|
233
+ tok = []
234
+ @ruby.tokenize( "#{word}?" ) { |t| tok << t }
235
+ assert_equal "#{word}?", tok.first
236
+ assert_equal :ident, tok.first.group
237
+ end
238
+
239
+ %w{__FILE__ __LINE__ true false nil self}.each do |word|
240
+ tok = []
241
+ @ruby.tokenize( "#{word}!" ) { |t| tok << t }
242
+ assert_equal "#{word}!", tok.first
243
+ assert_equal :ident, tok.first.group
244
+ end
245
+
246
+ %w{__FILE__ __LINE__ true false nil self}.each do |word|
247
+ tok = []
248
+ @ruby.tokenize( "x.#{word}" ) { |t| tok << t }
249
+ tok.shift
250
+ tok.shift
251
+ assert_equal word, tok.first
252
+ assert_equal :ident, tok.first.group
253
+ end
254
+ end
255
+
256
+ def test_pct_r
257
+ tok = []
258
+ @ruby.tokenize( '%r{foo#{x}bar}' ) { |t| tok << t }
259
+ assert_equal [ :punct, "%r{" ], [ tok.first.group, tok.shift ]
260
+ assert_equal [ :regex, "foo" ], [ tok.first.group, tok.shift ]
261
+ assert_equal [ :expr, '#{x}' ], [ tok.first.group, tok.shift ]
262
+ assert_equal [ :regex, "bar" ], [ tok.first.group, tok.shift ]
263
+ assert_equal [ :punct, "}" ], [ tok.first.group, tok.shift ]
264
+
265
+ tok = []
266
+ @ruby.tokenize( '%r-foo#{x}bar-' ) { |t| tok << t }
267
+ assert_equal [ :punct, "%r-" ], [ tok.first.group, tok.shift ]
268
+ assert_equal [ :regex, "foo" ], [ tok.first.group, tok.shift ]
269
+ assert_equal [ :expr, '#{x}' ], [ tok.first.group, tok.shift ]
270
+ assert_equal [ :regex, "bar" ], [ tok.first.group, tok.shift ]
271
+ assert_equal [ :punct, "-" ], [ tok.first.group, tok.shift ]
272
+ end
273
+
274
+ def test_pct_w
275
+ tok = []
276
+ @ruby.tokenize( '%w-foo#{x} bar baz-' ) { |t| tok << t }
277
+ assert_equal [ :punct, "%w-" ], [ tok.first.group, tok.shift ]
278
+ assert_equal [ :string, 'foo#{x} bar baz' ], [ tok.first.group, tok.shift ]
279
+ assert_equal [ :punct, "-" ], [ tok.first.group, tok.shift ]
280
+ end
281
+
282
+ def test_pct_q
283
+ tok = []
284
+ @ruby.tokenize( '%q-hello #{world}-' ) { |t| tok << t }
285
+ assert_equal [ :punct, "%q-" ], [ tok.first.group, tok.shift ]
286
+ assert_equal [ :string, 'hello #{world}' ], [ tok.first.group, tok.shift ]
287
+ assert_equal [ :punct, "-" ], [ tok.first.group, tok.shift ]
288
+ end
289
+
290
+ def test_pct_s
291
+ tok = []
292
+ @ruby.tokenize( '%s-hello #{world}-' ) { |t| tok << t }
293
+ assert_equal [ :punct, "%s-" ], [ tok.first.group, tok.shift ]
294
+ assert_equal [ :symbol, 'hello #{world}' ], [ tok.first.group, tok.shift ]
295
+ assert_equal [ :punct, "-" ], [ tok.first.group, tok.shift ]
296
+ end
297
+
298
+ def test_pct_W
299
+ tok = []
300
+ @ruby.tokenize( '%W-foo#{x} bar baz-' ) { |t| tok << t }
301
+ assert_equal [ :punct, "%W-" ], [ tok.first.group, tok.shift ]
302
+ assert_equal [ :string, 'foo' ], [ tok.first.group, tok.shift ]
303
+ assert_equal [ :expr, '#{x}' ], [ tok.first.group, tok.shift ]
304
+ assert_equal [ :string, ' bar baz' ], [ tok.first.group, tok.shift ]
305
+ assert_equal [ :punct, "-" ], [ tok.first.group, tok.shift ]
306
+ end
307
+
308
+ def test_pct_Q
309
+ tok = []
310
+ @ruby.tokenize( '%Q-hello #{world}-' ) { |t| tok << t }
311
+ assert_equal [ :punct, "%Q-" ], [ tok.first.group, tok.shift ]
312
+ assert_equal [ :string, 'hello ' ], [ tok.first.group, tok.shift ]
313
+ assert_equal [ :expr, '#{world}' ], [ tok.first.group, tok.shift ]
314
+ assert_equal [ :punct, "-" ], [ tok.first.group, tok.shift ]
315
+ end
316
+
317
+ def test_pct_x
318
+ tok = []
319
+ @ruby.tokenize( '%x-ls /blah/#{foo}-' ) { |t| tok << t }
320
+ assert_equal [ :punct, "%x-" ], [ tok.first.group, tok.shift ]
321
+ assert_equal [ :string, 'ls /blah/' ], [ tok.first.group, tok.shift ]
322
+ assert_equal [ :expr, '#{foo}' ], [ tok.first.group, tok.shift ]
323
+ assert_equal [ :punct, "-" ], [ tok.first.group, tok.shift ]
324
+ end
325
+
326
+ def test_pct_string
327
+ tok = []
328
+ @ruby.tokenize( '%-hello #{world}-' ) { |t| tok << t }
329
+ assert_equal [ :punct, "%-" ], [ tok.first.group, tok.shift ]
330
+ assert_equal [ :string, 'hello ' ], [ tok.first.group, tok.shift ]
331
+ assert_equal [ :expr, '#{world}' ], [ tok.first.group, tok.shift ]
332
+ assert_equal [ :punct, "-" ], [ tok.first.group, tok.shift ]
333
+ end
334
+
335
+ def test_bad_pct_string
336
+ tok = []
337
+ @ruby.tokenize( '%0hello #{world}0' ) { |t| tok << t }
338
+ assert_equal [ :punct, "%" ], [ tok.first.group, tok.shift ]
339
+ assert_equal [ :number, '0' ], [ tok.first.group, tok.shift ]
340
+ assert_equal [ :ident, 'hello' ], [ tok.first.group, tok.shift ]
341
+ assert_equal [ :normal, ' ' ], [ tok.first.group, tok.shift ]
342
+ assert_equal [ :comment, '#{world}0' ], [ tok.first.group, tok.shift ]
343
+ end
344
+
345
+ def test_shift_left
346
+ tok = []
347
+ @ruby.tokenize( 'foo << 5' ) { |t| tok << t }
348
+ assert_equal [ :ident, "foo" ], [ tok.first.group, tok.shift ]
349
+ assert_equal [ :normal, " " ], [ tok.first.group, tok.shift ]
350
+ assert_equal [ :punct, "<<" ], [ tok.first.group, tok.shift ]
351
+ assert_equal [ :normal, " " ], [ tok.first.group, tok.shift ]
352
+ assert_equal [ :number, "5" ], [ tok.first.group, tok.shift ]
353
+ end
354
+
355
+ def test_here_doc_no_opts
356
+ tok = []
357
+ @ruby.tokenize( "foo <<EOF\n foo\n bar\n baz\nEOF" ) { |t| tok << t }
358
+ assert_equal [ :ident, "foo" ], [ tok.first.group, tok.shift ]
359
+ assert_equal [ :normal, " " ], [ tok.first.group, tok.shift ]
360
+ assert_equal [ :punct, "<<" ], [ tok.first.group, tok.shift ]
361
+ assert_equal [ :constant, "EOF" ], [ tok.first.group, tok.shift ]
362
+ assert_equal [ :string, "\n foo\n bar\n baz\n" ], [ tok.first.group, tok.shift ]
363
+ assert_equal [ :constant, "EOF" ], [ tok.first.group, tok.shift ]
364
+ end
365
+
366
+ def test_here_doc_no_opts_missing_end
367
+ tok = []
368
+ @ruby.tokenize( "foo <<EOF\n foo\n bar\n baz\n EOF" ) { |t| tok << t }
369
+ assert_equal [ :ident, "foo" ], [ tok.first.group, tok.shift ]
370
+ assert_equal [ :normal, " " ], [ tok.first.group, tok.shift ]
371
+ assert_equal [ :punct, "<<" ], [ tok.first.group, tok.shift ]
372
+ assert_equal [ :constant, "EOF" ], [ tok.first.group, tok.shift ]
373
+ assert_equal [ :string, "\n foo\n bar\n baz\n EOF" ], [ tok.first.group, tok.shift ]
374
+ end
375
+
376
+ def test_here_doc_float_right
377
+ tok = []
378
+ @ruby.tokenize( "foo <<-EOF\n foo\n bar\n baz\n EOF" ) { |t| tok << t }
379
+ assert_equal [ :ident, "foo" ], [ tok.first.group, tok.shift ]
380
+ assert_equal [ :normal, " " ], [ tok.first.group, tok.shift ]
381
+ assert_equal [ :punct, "<<-" ], [ tok.first.group, tok.shift ]
382
+ assert_equal [ :constant, "EOF" ], [ tok.first.group, tok.shift ]
383
+ assert_equal [ :string, "\n foo\n bar\n baz\n" ], [ tok.first.group, tok.shift ]
384
+ assert_equal [ :constant, " EOF" ], [ tok.first.group, tok.shift ]
385
+ end
386
+
387
+ def test_here_doc_single_quotes
388
+ tok = []
389
+ @ruby.tokenize( "foo <<'EOF'\n foo\#{x}\n bar\n baz\nEOF" ) { |t| tok << t }
390
+ assert_equal [ :ident, "foo" ], [ tok.first.group, tok.shift ]
391
+ assert_equal [ :normal, " " ], [ tok.first.group, tok.shift ]
392
+ assert_equal [ :punct, "<<'" ], [ tok.first.group, tok.shift ]
393
+ assert_equal [ :constant, "EOF" ], [ tok.first.group, tok.shift ]
394
+ assert_equal [ :punct, "'" ], [ tok.first.group, tok.shift ]
395
+ assert_equal [ :string, "\n foo\#{x}\n bar\n baz\n" ], [ tok.first.group, tok.shift ]
396
+ assert_equal [ :constant, "EOF" ], [ tok.first.group, tok.shift ]
397
+ end
398
+
399
+ def test_here_doc_double_quotes
400
+ tok = []
401
+ @ruby.tokenize( "foo <<\"EOF\"\n foo\#{x}\n bar\n baz\nEOF" ) { |t| tok << t }
402
+ assert_equal [ :ident, "foo" ], [ tok.first.group, tok.shift ]
403
+ assert_equal [ :normal, " " ], [ tok.first.group, tok.shift ]
404
+ assert_equal [ :punct, "<<\"" ], [ tok.first.group, tok.shift ]
405
+ assert_equal [ :constant, "EOF" ], [ tok.first.group, tok.shift ]
406
+ assert_equal [ :punct, "\"" ], [ tok.first.group, tok.shift ]
407
+ assert_equal [ :string, "\n foo" ], [ tok.first.group, tok.shift ]
408
+ assert_equal [ :expr, '#{x}' ], [ tok.first.group, tok.shift ]
409
+ assert_equal [ :string, "\n bar\n baz\n" ], [ tok.first.group, tok.shift ]
410
+ assert_equal [ :constant, "EOF" ], [ tok.first.group, tok.shift ]
411
+ end
412
+
413
+ def test_space
414
+ tok = []
415
+ @ruby.tokenize( "\n \t\t\n\n\r\n" ) { |t| tok << t }
416
+ assert_equal [ :normal, "\n \t\t\n\n\r\n" ], [ tok.first.group, tok.shift ]
417
+ end
418
+
419
+ def test_number
420
+ tok = []
421
+ @ruby.tokenize( "1 1.0 1e5 1.0e5 1_2.5 1_2.5_2 1_2.5_2e3_2" ) { |t| tok << t }
422
+ assert_equal [ :number, "1" ], [ tok.first.group, tok.shift ]
423
+ tok.shift
424
+ assert_equal [ :number, "1.0" ], [ tok.first.group, tok.shift ]
425
+ tok.shift
426
+ assert_equal [ :number, "1e5" ], [ tok.first.group, tok.shift ]
427
+ tok.shift
428
+ assert_equal [ :number, "1.0e5" ], [ tok.first.group, tok.shift ]
429
+ tok.shift
430
+ assert_equal [ :number, "1_2.5" ], [ tok.first.group, tok.shift ]
431
+ tok.shift
432
+ assert_equal [ :number, "1_2.5_2" ], [ tok.first.group, tok.shift ]
433
+ tok.shift
434
+ assert_equal [ :number, "1_2.5_2e3_2" ], [ tok.first.group, tok.shift ]
435
+ end
436
+
437
+ def test_dquoted_string
438
+ tok = []
439
+ @ruby.tokenize( '"foo #{x} bar\"\n\tbaz\xA5b\5\1234"' ) { |t| tok << t }
440
+ assert_equal [ :punct, '"' ], [ tok.first.group, tok.shift ]
441
+ assert_equal [ :string, 'foo ' ], [ tok.first.group, tok.shift ]
442
+ assert_equal [ :expr, '#{x}' ], [ tok.first.group, tok.shift ]
443
+ assert_equal [ :string, ' bar' ], [ tok.first.group, tok.shift ]
444
+ assert_equal [ :expr, '\"\n\t' ], [ tok.first.group, tok.shift ]
445
+ assert_equal [ :string, 'baz' ], [ tok.first.group, tok.shift ]
446
+ assert_equal [ :expr, '\xA5' ], [ tok.first.group, tok.shift ]
447
+ assert_equal [ :string, 'b' ], [ tok.first.group, tok.shift ]
448
+ assert_equal [ :expr, '\5\123' ], [ tok.first.group, tok.shift ]
449
+ assert_equal [ :string, '4' ], [ tok.first.group, tok.shift ]
450
+ assert_equal [ :punct, '"' ], [ tok.first.group, tok.shift ]
451
+ end
452
+
453
+ def test_squoted_string
454
+ tok = []
455
+ @ruby.tokenize( '\'foo #{x} bar\\\'\n\tbaz\\\\\xA5b\5\1234\'' ) { |t| tok << t }
456
+ assert_equal [ :punct, "'" ], [ tok.first.group, tok.shift ]
457
+ assert_equal [ :string, 'foo #{x} bar' ], [ tok.first.group, tok.shift ]
458
+ assert_equal [ :expr, '\\\'' ], [ tok.first.group, tok.shift ]
459
+ assert_equal [ :string, '\n\tbaz' ], [ tok.first.group, tok.shift ]
460
+ assert_equal [ :expr, '\\\\' ], [ tok.first.group, tok.shift ]
461
+ assert_equal [ :string, '\xA5b\5\1234' ], [ tok.first.group, tok.shift ]
462
+ assert_equal [ :punct, "'" ], [ tok.first.group, tok.shift ]
463
+ end
464
+
465
+ def test_dot_selector
466
+ tok = []
467
+ @ruby.tokenize( 'foo.nil' ) { |t| tok << t }
468
+ tok.shift
469
+ assert_equal [ :punct, "." ], [ tok.first.group, tok.shift ]
470
+ assert_equal [ :ident, "nil" ], [ tok.first.group, tok.shift ]
471
+ end
472
+
473
+ def test_dot_range_inclusive
474
+ tok = []
475
+ @ruby.tokenize( 'foo..nil' ) { |t| tok << t }
476
+ tok.shift
477
+ assert_equal [ :punct, ".." ], [ tok.first.group, tok.shift ]
478
+ assert_equal [ :constant, "nil" ], [ tok.first.group, tok.shift ]
479
+ end
480
+
481
+ def test_dot_range_exclusive
482
+ tok = []
483
+ @ruby.tokenize( 'foo...nil' ) { |t| tok << t }
484
+ tok.shift
485
+ assert_equal [ :punct, "..." ], [ tok.first.group, tok.shift ]
486
+ assert_equal [ :constant, "nil" ], [ tok.first.group, tok.shift ]
487
+ end
488
+
489
+ def test_dot_range_many
490
+ tok = []
491
+ @ruby.tokenize( 'foo.....nil' ) { |t| tok << t }
492
+ tok.shift
493
+ assert_equal [ :punct, "....." ], [ tok.first.group, tok.shift ]
494
+ assert_equal [ :constant, "nil" ], [ tok.first.group, tok.shift ]
495
+ end
496
+
497
+ def test_attribute
498
+ tok = []
499
+ @ruby.tokenize( '@var_foo' ) { |t| tok << t }
500
+ assert_equal [ :attribute, "@var_foo" ], [ tok.first.group, tok.shift ]
501
+ end
502
+
503
+ def test_global
504
+ tok = []
505
+ @ruby.tokenize( '$var_foo' ) { |t| tok << t }
506
+ assert_equal [ :global, "$var_foo" ], [ tok.first.group, tok.shift ]
507
+ tok = []
508
+ @ruby.tokenize( '$12' ) { |t| tok << t }
509
+ assert_equal [ :global, "$12" ], [ tok.first.group, tok.shift ]
510
+ tok = []
511
+ @ruby.tokenize( '$/f' ) { |t| tok << t }
512
+ assert_equal [ :global, "$/" ], [ tok.first.group, tok.shift ]
513
+ tok = []
514
+ @ruby.tokenize( "$\n" ) { |t| tok << t }
515
+ assert_equal [ :global, "$" ], [ tok.first.group, tok.shift ]
516
+ end
517
+
518
+ end