syntax 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/syntax.rb +31 -0
- data/lib/syntax/common.rb +118 -0
- data/lib/syntax/convertors/html.rb +50 -0
- data/lib/syntax/ruby.rb +239 -0
- data/lib/syntax/version.rb +9 -0
- data/lib/syntax/xml.rb +108 -0
- data/lib/syntax/yaml.rb +105 -0
- data/test/ALL-TESTS.rb +5 -0
- data/test/syntax/tc_ruby.rb +518 -0
- data/test/syntax/tc_xml.rb +202 -0
- data/test/syntax/tc_yaml.rb +228 -0
- metadata +51 -0
data/lib/syntax/xml.rb
ADDED
@@ -0,0 +1,108 @@
|
|
1
|
+
require 'syntax'
|
2
|
+
|
3
|
+
module Syntax
|
4
|
+
|
5
|
+
# A simple implementation of an XML lexer. It handles most cases. It is
|
6
|
+
# not a validating lexer, meaning it will happily process invalid XML without
|
7
|
+
# complaining.
|
8
|
+
class XML < Tokenizer
|
9
|
+
|
10
|
+
# Initialize the lexer.
|
11
|
+
def setup
|
12
|
+
@in_tag = false
|
13
|
+
end
|
14
|
+
|
15
|
+
# Step through a single iteration of the tokenization process. This will
|
16
|
+
# yield (potentially) many tokens, and possibly zero tokens.
|
17
|
+
def step
|
18
|
+
start_group :normal, matched if scan( /\s+/ )
|
19
|
+
if @in_tag
|
20
|
+
case
|
21
|
+
when scan( /([-\w]+):([-\w]+)/ )
|
22
|
+
start_group :namespace, subgroup(1)
|
23
|
+
start_group :punct, ":"
|
24
|
+
start_group :attribute, subgroup(2)
|
25
|
+
when scan( /\d+/ )
|
26
|
+
start_group :number, matched
|
27
|
+
when scan( /[-\w]+/ )
|
28
|
+
start_group :attribute, matched
|
29
|
+
when scan( %r{[/?]?>} )
|
30
|
+
@in_tag = false
|
31
|
+
start_group :punct, matched
|
32
|
+
when scan( /=/ )
|
33
|
+
start_group :punct, matched
|
34
|
+
when scan( /["']/ )
|
35
|
+
scan_string matched
|
36
|
+
else
|
37
|
+
append getch
|
38
|
+
end
|
39
|
+
elsif ( text = scan_until( /(?=[<&])/ ) )
|
40
|
+
start_group :normal, text unless text.empty?
|
41
|
+
if scan(/<!--.*?(-->|\Z)/m)
|
42
|
+
start_group :comment, matched
|
43
|
+
else
|
44
|
+
case peek(1)
|
45
|
+
when "<"
|
46
|
+
start_group :punct, getch
|
47
|
+
case peek(1)
|
48
|
+
when "?"
|
49
|
+
append getch
|
50
|
+
when "/"
|
51
|
+
append getch
|
52
|
+
when "!"
|
53
|
+
append getch
|
54
|
+
end
|
55
|
+
start_group :normal, matched if scan( /\s+/ )
|
56
|
+
if scan( /([-\w]+):([-\w]+)/ )
|
57
|
+
start_group :namespace, subgroup(1)
|
58
|
+
start_group :punct, ":"
|
59
|
+
start_group :tag, subgroup(2)
|
60
|
+
elsif scan( /[-\w]+/ )
|
61
|
+
start_group :tag, matched
|
62
|
+
end
|
63
|
+
@in_tag = true
|
64
|
+
when "&"
|
65
|
+
if scan( /&\S{1,10};/ )
|
66
|
+
start_group :entity, matched
|
67
|
+
else
|
68
|
+
start_group :normal, scan( /&/ )
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
else
|
73
|
+
append scan_until( /\Z/ )
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
private
|
78
|
+
|
79
|
+
# Scan the string starting at the current position, with the given
|
80
|
+
# delimiter character.
|
81
|
+
def scan_string( delim )
|
82
|
+
start_group :punct, delim
|
83
|
+
match = /(?=[&\\]|#{delim})/
|
84
|
+
loop do
|
85
|
+
break unless ( text = scan_until( match ) )
|
86
|
+
start_group :string, text unless text.empty?
|
87
|
+
case peek(1)
|
88
|
+
when "&"
|
89
|
+
if scan( /&\S{1,10};/ )
|
90
|
+
start_group :entity, matched
|
91
|
+
else
|
92
|
+
start_group :string, getch
|
93
|
+
end
|
94
|
+
when "\\"
|
95
|
+
start_group :string, getch
|
96
|
+
append getch || ""
|
97
|
+
when delim
|
98
|
+
start_group :punct, getch
|
99
|
+
break
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
end
|
105
|
+
|
106
|
+
SYNTAX["xml"] = XML
|
107
|
+
|
108
|
+
end
|
data/lib/syntax/yaml.rb
ADDED
@@ -0,0 +1,105 @@
|
|
1
|
+
require 'syntax'
|
2
|
+
|
3
|
+
module Syntax
|
4
|
+
|
5
|
+
# A simple implementation of an YAML lexer. It handles most cases. It is
|
6
|
+
# not a validating lexer.
|
7
|
+
class YAML < Tokenizer
|
8
|
+
|
9
|
+
# Step through a single iteration of the tokenization process. This will
|
10
|
+
# yield (potentially) many tokens, and possibly zero tokens.
|
11
|
+
def step
|
12
|
+
if bol?
|
13
|
+
case
|
14
|
+
when scan(/---(\s*.+)?$/)
|
15
|
+
start_group :document, matched
|
16
|
+
when scan(/(\s*)([a-zA-Z][-\w]*)(\s*):/)
|
17
|
+
start_group :normal, subgroup(1)
|
18
|
+
start_group :key, subgroup(2)
|
19
|
+
start_group :normal, subgroup(3)
|
20
|
+
start_group :punct, ":"
|
21
|
+
when scan(/(\s*)-/)
|
22
|
+
start_group :normal, subgroup(1)
|
23
|
+
start_group :punct, "-"
|
24
|
+
when scan(/\s*$/)
|
25
|
+
start_group :normal, matched
|
26
|
+
when scan(/#.*$/)
|
27
|
+
start_group :comment, matched
|
28
|
+
else
|
29
|
+
append getch
|
30
|
+
end
|
31
|
+
else
|
32
|
+
case
|
33
|
+
when scan(/[\n\r]+/)
|
34
|
+
start_group :normal, matched
|
35
|
+
when scan(/[ \t]+/)
|
36
|
+
start_group :normal, matched
|
37
|
+
when scan(/!+(.*?^)?\S+/)
|
38
|
+
start_group :type, matched
|
39
|
+
when scan(/&\S+/)
|
40
|
+
start_group :anchor, matched
|
41
|
+
when scan(/\*\S+/)
|
42
|
+
start_group :ref, matched
|
43
|
+
when scan(/\d\d:\d\d:\d\d/)
|
44
|
+
start_group :time, matched
|
45
|
+
when scan(/\d\d\d\d-\d\d-\d\d\s\d\d:\d\d:\d\d(\.\d+)? [-+]\d\d:\d\d/)
|
46
|
+
start_group :date, matched
|
47
|
+
when scan(/['"]/)
|
48
|
+
start_group :punct, matched
|
49
|
+
scan_string matched
|
50
|
+
when scan(/:\w+/)
|
51
|
+
start_group :symbol, matched
|
52
|
+
when scan(/[:]/)
|
53
|
+
start_group :punct, matched
|
54
|
+
when scan(/#.*$/)
|
55
|
+
start_group :comment, matched
|
56
|
+
when scan(/>-?/)
|
57
|
+
start_group :punct, matched
|
58
|
+
start_group :normal, scan(/.*$/)
|
59
|
+
append getch until eos? || bol?
|
60
|
+
return if eos?
|
61
|
+
indent = check(/ */)
|
62
|
+
start_group :string
|
63
|
+
loop do
|
64
|
+
line = check_until(/[\n\r]|\Z/)
|
65
|
+
break if line.nil?
|
66
|
+
if line.chomp.length > 0
|
67
|
+
this_indent = line.chomp.match( /^\s*/ )[0]
|
68
|
+
break if this_indent.length < indent.length
|
69
|
+
end
|
70
|
+
append scan_until(/[\n\r]|\Z/)
|
71
|
+
end
|
72
|
+
else
|
73
|
+
start_group :normal, scan_until(/(?=$|#)/)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
private
|
79
|
+
|
80
|
+
def scan_string( delim )
|
81
|
+
regex = /(?=[#{delim=="'" ? "" : "\\\\"}#{delim}])/
|
82
|
+
loop do
|
83
|
+
text = scan_until( regex )
|
84
|
+
if text.nil?
|
85
|
+
start_group :string, scan_until( /\Z/ )
|
86
|
+
break
|
87
|
+
else
|
88
|
+
start_group :string, text unless text.empty?
|
89
|
+
end
|
90
|
+
|
91
|
+
case peek(1)
|
92
|
+
when "\\"
|
93
|
+
start_group :expr, scan(/../)
|
94
|
+
else
|
95
|
+
start_group :punct, getch
|
96
|
+
break
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
end
|
102
|
+
|
103
|
+
SYNTAX["yaml"] = YAML
|
104
|
+
|
105
|
+
end
|
data/test/ALL-TESTS.rb
ADDED
@@ -0,0 +1,518 @@
|
|
1
|
+
$:.unshift "../../lib"
|
2
|
+
|
3
|
+
require 'test/unit'
|
4
|
+
require 'syntax/ruby'
|
5
|
+
|
6
|
+
class TC_Syntax_Ruby < Test::Unit::TestCase
|
7
|
+
|
8
|
+
def setup
|
9
|
+
@ruby = Syntax::Ruby.new
|
10
|
+
end
|
11
|
+
|
12
|
+
def test_empty
|
13
|
+
called = false
|
14
|
+
@ruby.tokenize( "" ) { |tok| called = true }
|
15
|
+
assert !called
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_constant
|
19
|
+
called = false
|
20
|
+
@ruby.tokenize( "Foo" ) do |tok|
|
21
|
+
called = true
|
22
|
+
assert_equal :constant, tok.group
|
23
|
+
assert_equal "Foo", tok
|
24
|
+
end
|
25
|
+
assert called
|
26
|
+
end
|
27
|
+
|
28
|
+
def test_ident
|
29
|
+
called = false
|
30
|
+
@ruby.tokenize( "foo" ) do |tok|
|
31
|
+
called = true
|
32
|
+
assert_equal :ident, tok.group
|
33
|
+
assert_equal "foo", tok
|
34
|
+
end
|
35
|
+
assert called
|
36
|
+
end
|
37
|
+
|
38
|
+
def test_comment_eol
|
39
|
+
called = false
|
40
|
+
@ruby.tokenize( "# a comment" ) do |tok|
|
41
|
+
called = true
|
42
|
+
assert_equal :comment, tok.group
|
43
|
+
assert_equal "# a comment", tok
|
44
|
+
end
|
45
|
+
assert called
|
46
|
+
end
|
47
|
+
|
48
|
+
def test_comment_block
|
49
|
+
called = false
|
50
|
+
@ruby.tokenize( "=begin\nthis is a comment\n=end" ) do |tok|
|
51
|
+
called = true
|
52
|
+
assert_equal :comment, tok.group
|
53
|
+
assert_equal "=begin\nthis is a comment\n=end", tok
|
54
|
+
end
|
55
|
+
assert called
|
56
|
+
end
|
57
|
+
|
58
|
+
def test_keyword
|
59
|
+
Syntax::Ruby::KEYWORDS.each do |word|
|
60
|
+
tok = []
|
61
|
+
@ruby.tokenize( word ) { |t| tok << t }
|
62
|
+
assert_equal [ :keyword, word ], [ tok.first.group, tok.first ]
|
63
|
+
end
|
64
|
+
Syntax::Ruby::KEYWORDS.each do |word|
|
65
|
+
tok = []
|
66
|
+
@ruby.tokenize( "foo.#{word}" ) { |t| tok << t }
|
67
|
+
tok.shift
|
68
|
+
tok.shift
|
69
|
+
assert_equal [ :ident, word ], [ tok.first.group, tok.first ]
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
def test__END__
|
74
|
+
called = false
|
75
|
+
@ruby.tokenize( "__END__\n\nblah blah blah" ) do |tok|
|
76
|
+
called = true
|
77
|
+
assert_equal :comment, tok.group
|
78
|
+
assert_equal "__END__\n\nblah blah blah", tok
|
79
|
+
end
|
80
|
+
assert called
|
81
|
+
end
|
82
|
+
|
83
|
+
def test_def_paren
|
84
|
+
tok = nil
|
85
|
+
@ruby.start( "def foo(bar)" ) { |t| tok = t }
|
86
|
+
@ruby.step
|
87
|
+
assert_equal "def ", tok
|
88
|
+
assert_equal :keyword, tok.group
|
89
|
+
@ruby.step
|
90
|
+
assert_equal "foo", tok
|
91
|
+
assert_equal :method, tok.group
|
92
|
+
end
|
93
|
+
|
94
|
+
def test_def_space
|
95
|
+
tok = nil
|
96
|
+
@ruby.start( "def foo bar" ) { |t| tok = t }
|
97
|
+
@ruby.step
|
98
|
+
assert_equal "def ", tok
|
99
|
+
assert_equal :keyword, tok.group
|
100
|
+
@ruby.step
|
101
|
+
assert_equal "foo", tok
|
102
|
+
assert_equal :method, tok.group
|
103
|
+
end
|
104
|
+
|
105
|
+
def test_def_semicolon
|
106
|
+
tok = nil
|
107
|
+
@ruby.start( "def foo;" ) { |t| tok = t }
|
108
|
+
@ruby.step
|
109
|
+
assert_equal "def ", tok
|
110
|
+
assert_equal :keyword, tok.group
|
111
|
+
@ruby.step
|
112
|
+
assert_equal "foo", tok
|
113
|
+
assert_equal :method, tok.group
|
114
|
+
end
|
115
|
+
|
116
|
+
def test_class_space
|
117
|
+
tok = nil
|
118
|
+
@ruby.start( "class Foo\n" ) { |t| tok = t }
|
119
|
+
@ruby.step
|
120
|
+
assert_equal "class ", tok
|
121
|
+
assert_equal :keyword, tok.group
|
122
|
+
@ruby.step
|
123
|
+
assert_equal "Foo", tok
|
124
|
+
assert_equal :class, tok.group
|
125
|
+
end
|
126
|
+
|
127
|
+
def test_class_semicolon
|
128
|
+
tok = nil
|
129
|
+
@ruby.start( "class Foo;" ) { |t| tok = t }
|
130
|
+
@ruby.step
|
131
|
+
assert_equal "class ", tok
|
132
|
+
assert_equal :keyword, tok.group
|
133
|
+
@ruby.step
|
134
|
+
assert_equal "Foo", tok
|
135
|
+
assert_equal :class, tok.group
|
136
|
+
end
|
137
|
+
|
138
|
+
def test_class_extend
|
139
|
+
tok = nil
|
140
|
+
@ruby.start( "class Foo< Bang" ) { |t| tok = t }
|
141
|
+
@ruby.step
|
142
|
+
assert_equal "class ", tok
|
143
|
+
assert_equal :keyword, tok.group
|
144
|
+
@ruby.step
|
145
|
+
assert_equal "Foo", tok
|
146
|
+
assert_equal :class, tok.group
|
147
|
+
end
|
148
|
+
|
149
|
+
def test_module_space
|
150
|
+
tok = nil
|
151
|
+
@ruby.start( "module Foo\n" ) { |t| tok = t }
|
152
|
+
@ruby.step
|
153
|
+
assert_equal "module ", tok
|
154
|
+
assert_equal :keyword, tok.group
|
155
|
+
@ruby.step
|
156
|
+
assert_equal "Foo", tok
|
157
|
+
assert_equal :module, tok.group
|
158
|
+
end
|
159
|
+
|
160
|
+
def test_module_semicolon
|
161
|
+
tok = nil
|
162
|
+
@ruby.start( "module Foo;" ) { |t| tok = t }
|
163
|
+
@ruby.step
|
164
|
+
assert_equal "module ", tok
|
165
|
+
assert_equal :keyword, tok.group
|
166
|
+
@ruby.step
|
167
|
+
assert_equal "Foo", tok
|
168
|
+
assert_equal :module, tok.group
|
169
|
+
end
|
170
|
+
|
171
|
+
def test_module_other
|
172
|
+
tok = nil
|
173
|
+
@ruby.start( "module Foo!\n" ) { |t| tok = t }
|
174
|
+
@ruby.step
|
175
|
+
assert_equal "module ", tok
|
176
|
+
assert_equal :keyword, tok.group
|
177
|
+
@ruby.step
|
178
|
+
assert_equal "Foo!", tok
|
179
|
+
assert_equal :module, tok.group
|
180
|
+
end
|
181
|
+
|
182
|
+
def test_scope_operator
|
183
|
+
tok = []
|
184
|
+
@ruby.tokenize( "Foo::Bar" ) { |t| tok << t }
|
185
|
+
assert_equal "Foo", tok.shift
|
186
|
+
assert_equal "::", tok.first
|
187
|
+
assert_equal :punct, tok.shift.group
|
188
|
+
assert_equal "Bar", tok.first
|
189
|
+
end
|
190
|
+
|
191
|
+
def test_symbol_dquote
|
192
|
+
tok = []
|
193
|
+
@ruby.tokenize( ':"foo"' ) { |t| tok << t }
|
194
|
+
assert_equal ':"foo"', tok.first
|
195
|
+
assert_equal :symbol, tok.first.group
|
196
|
+
end
|
197
|
+
|
198
|
+
def test_symbol_squote
|
199
|
+
tok = []
|
200
|
+
@ruby.tokenize( ":'foo'" ) { |t| tok << t }
|
201
|
+
assert_equal ":'foo'", tok.first
|
202
|
+
assert_equal :symbol, tok.first.group
|
203
|
+
end
|
204
|
+
|
205
|
+
def test_symbol
|
206
|
+
tok = []
|
207
|
+
@ruby.tokenize( ":foo_bar?" ) { |t| tok << t }
|
208
|
+
assert_equal ":foo_bar?", tok.first
|
209
|
+
assert_equal :symbol, tok.first.group
|
210
|
+
end
|
211
|
+
|
212
|
+
def test_char
|
213
|
+
tok = []
|
214
|
+
@ruby.tokenize( "?." ) { |t| tok << t }
|
215
|
+
assert_equal "?.", tok.first
|
216
|
+
assert_equal :char, tok.first.group
|
217
|
+
|
218
|
+
tok.clear
|
219
|
+
@ruby.tokenize( '?\n' ) { |t| tok << t }
|
220
|
+
assert_equal '?\n', tok.first
|
221
|
+
assert_equal :char, tok.first.group
|
222
|
+
end
|
223
|
+
|
224
|
+
def test_specials
|
225
|
+
%w{__FILE__ __LINE__ true false nil self}.each do |word|
|
226
|
+
tok = []
|
227
|
+
@ruby.tokenize( word ) { |t| tok << t }
|
228
|
+
assert_equal word, tok.first
|
229
|
+
assert_equal :constant, tok.first.group
|
230
|
+
end
|
231
|
+
|
232
|
+
%w{__FILE__ __LINE__ true false nil self}.each do |word|
|
233
|
+
tok = []
|
234
|
+
@ruby.tokenize( "#{word}?" ) { |t| tok << t }
|
235
|
+
assert_equal "#{word}?", tok.first
|
236
|
+
assert_equal :ident, tok.first.group
|
237
|
+
end
|
238
|
+
|
239
|
+
%w{__FILE__ __LINE__ true false nil self}.each do |word|
|
240
|
+
tok = []
|
241
|
+
@ruby.tokenize( "#{word}!" ) { |t| tok << t }
|
242
|
+
assert_equal "#{word}!", tok.first
|
243
|
+
assert_equal :ident, tok.first.group
|
244
|
+
end
|
245
|
+
|
246
|
+
%w{__FILE__ __LINE__ true false nil self}.each do |word|
|
247
|
+
tok = []
|
248
|
+
@ruby.tokenize( "x.#{word}" ) { |t| tok << t }
|
249
|
+
tok.shift
|
250
|
+
tok.shift
|
251
|
+
assert_equal word, tok.first
|
252
|
+
assert_equal :ident, tok.first.group
|
253
|
+
end
|
254
|
+
end
|
255
|
+
|
256
|
+
def test_pct_r
|
257
|
+
tok = []
|
258
|
+
@ruby.tokenize( '%r{foo#{x}bar}' ) { |t| tok << t }
|
259
|
+
assert_equal [ :punct, "%r{" ], [ tok.first.group, tok.shift ]
|
260
|
+
assert_equal [ :regex, "foo" ], [ tok.first.group, tok.shift ]
|
261
|
+
assert_equal [ :expr, '#{x}' ], [ tok.first.group, tok.shift ]
|
262
|
+
assert_equal [ :regex, "bar" ], [ tok.first.group, tok.shift ]
|
263
|
+
assert_equal [ :punct, "}" ], [ tok.first.group, tok.shift ]
|
264
|
+
|
265
|
+
tok = []
|
266
|
+
@ruby.tokenize( '%r-foo#{x}bar-' ) { |t| tok << t }
|
267
|
+
assert_equal [ :punct, "%r-" ], [ tok.first.group, tok.shift ]
|
268
|
+
assert_equal [ :regex, "foo" ], [ tok.first.group, tok.shift ]
|
269
|
+
assert_equal [ :expr, '#{x}' ], [ tok.first.group, tok.shift ]
|
270
|
+
assert_equal [ :regex, "bar" ], [ tok.first.group, tok.shift ]
|
271
|
+
assert_equal [ :punct, "-" ], [ tok.first.group, tok.shift ]
|
272
|
+
end
|
273
|
+
|
274
|
+
def test_pct_w
|
275
|
+
tok = []
|
276
|
+
@ruby.tokenize( '%w-foo#{x} bar baz-' ) { |t| tok << t }
|
277
|
+
assert_equal [ :punct, "%w-" ], [ tok.first.group, tok.shift ]
|
278
|
+
assert_equal [ :string, 'foo#{x} bar baz' ], [ tok.first.group, tok.shift ]
|
279
|
+
assert_equal [ :punct, "-" ], [ tok.first.group, tok.shift ]
|
280
|
+
end
|
281
|
+
|
282
|
+
def test_pct_q
|
283
|
+
tok = []
|
284
|
+
@ruby.tokenize( '%q-hello #{world}-' ) { |t| tok << t }
|
285
|
+
assert_equal [ :punct, "%q-" ], [ tok.first.group, tok.shift ]
|
286
|
+
assert_equal [ :string, 'hello #{world}' ], [ tok.first.group, tok.shift ]
|
287
|
+
assert_equal [ :punct, "-" ], [ tok.first.group, tok.shift ]
|
288
|
+
end
|
289
|
+
|
290
|
+
def test_pct_s
|
291
|
+
tok = []
|
292
|
+
@ruby.tokenize( '%s-hello #{world}-' ) { |t| tok << t }
|
293
|
+
assert_equal [ :punct, "%s-" ], [ tok.first.group, tok.shift ]
|
294
|
+
assert_equal [ :symbol, 'hello #{world}' ], [ tok.first.group, tok.shift ]
|
295
|
+
assert_equal [ :punct, "-" ], [ tok.first.group, tok.shift ]
|
296
|
+
end
|
297
|
+
|
298
|
+
def test_pct_W
|
299
|
+
tok = []
|
300
|
+
@ruby.tokenize( '%W-foo#{x} bar baz-' ) { |t| tok << t }
|
301
|
+
assert_equal [ :punct, "%W-" ], [ tok.first.group, tok.shift ]
|
302
|
+
assert_equal [ :string, 'foo' ], [ tok.first.group, tok.shift ]
|
303
|
+
assert_equal [ :expr, '#{x}' ], [ tok.first.group, tok.shift ]
|
304
|
+
assert_equal [ :string, ' bar baz' ], [ tok.first.group, tok.shift ]
|
305
|
+
assert_equal [ :punct, "-" ], [ tok.first.group, tok.shift ]
|
306
|
+
end
|
307
|
+
|
308
|
+
def test_pct_Q
|
309
|
+
tok = []
|
310
|
+
@ruby.tokenize( '%Q-hello #{world}-' ) { |t| tok << t }
|
311
|
+
assert_equal [ :punct, "%Q-" ], [ tok.first.group, tok.shift ]
|
312
|
+
assert_equal [ :string, 'hello ' ], [ tok.first.group, tok.shift ]
|
313
|
+
assert_equal [ :expr, '#{world}' ], [ tok.first.group, tok.shift ]
|
314
|
+
assert_equal [ :punct, "-" ], [ tok.first.group, tok.shift ]
|
315
|
+
end
|
316
|
+
|
317
|
+
def test_pct_x
|
318
|
+
tok = []
|
319
|
+
@ruby.tokenize( '%x-ls /blah/#{foo}-' ) { |t| tok << t }
|
320
|
+
assert_equal [ :punct, "%x-" ], [ tok.first.group, tok.shift ]
|
321
|
+
assert_equal [ :string, 'ls /blah/' ], [ tok.first.group, tok.shift ]
|
322
|
+
assert_equal [ :expr, '#{foo}' ], [ tok.first.group, tok.shift ]
|
323
|
+
assert_equal [ :punct, "-" ], [ tok.first.group, tok.shift ]
|
324
|
+
end
|
325
|
+
|
326
|
+
def test_pct_string
|
327
|
+
tok = []
|
328
|
+
@ruby.tokenize( '%-hello #{world}-' ) { |t| tok << t }
|
329
|
+
assert_equal [ :punct, "%-" ], [ tok.first.group, tok.shift ]
|
330
|
+
assert_equal [ :string, 'hello ' ], [ tok.first.group, tok.shift ]
|
331
|
+
assert_equal [ :expr, '#{world}' ], [ tok.first.group, tok.shift ]
|
332
|
+
assert_equal [ :punct, "-" ], [ tok.first.group, tok.shift ]
|
333
|
+
end
|
334
|
+
|
335
|
+
def test_bad_pct_string
|
336
|
+
tok = []
|
337
|
+
@ruby.tokenize( '%0hello #{world}0' ) { |t| tok << t }
|
338
|
+
assert_equal [ :punct, "%" ], [ tok.first.group, tok.shift ]
|
339
|
+
assert_equal [ :number, '0' ], [ tok.first.group, tok.shift ]
|
340
|
+
assert_equal [ :ident, 'hello' ], [ tok.first.group, tok.shift ]
|
341
|
+
assert_equal [ :normal, ' ' ], [ tok.first.group, tok.shift ]
|
342
|
+
assert_equal [ :comment, '#{world}0' ], [ tok.first.group, tok.shift ]
|
343
|
+
end
|
344
|
+
|
345
|
+
def test_shift_left
|
346
|
+
tok = []
|
347
|
+
@ruby.tokenize( 'foo << 5' ) { |t| tok << t }
|
348
|
+
assert_equal [ :ident, "foo" ], [ tok.first.group, tok.shift ]
|
349
|
+
assert_equal [ :normal, " " ], [ tok.first.group, tok.shift ]
|
350
|
+
assert_equal [ :punct, "<<" ], [ tok.first.group, tok.shift ]
|
351
|
+
assert_equal [ :normal, " " ], [ tok.first.group, tok.shift ]
|
352
|
+
assert_equal [ :number, "5" ], [ tok.first.group, tok.shift ]
|
353
|
+
end
|
354
|
+
|
355
|
+
def test_here_doc_no_opts
|
356
|
+
tok = []
|
357
|
+
@ruby.tokenize( "foo <<EOF\n foo\n bar\n baz\nEOF" ) { |t| tok << t }
|
358
|
+
assert_equal [ :ident, "foo" ], [ tok.first.group, tok.shift ]
|
359
|
+
assert_equal [ :normal, " " ], [ tok.first.group, tok.shift ]
|
360
|
+
assert_equal [ :punct, "<<" ], [ tok.first.group, tok.shift ]
|
361
|
+
assert_equal [ :constant, "EOF" ], [ tok.first.group, tok.shift ]
|
362
|
+
assert_equal [ :string, "\n foo\n bar\n baz\n" ], [ tok.first.group, tok.shift ]
|
363
|
+
assert_equal [ :constant, "EOF" ], [ tok.first.group, tok.shift ]
|
364
|
+
end
|
365
|
+
|
366
|
+
def test_here_doc_no_opts_missing_end
|
367
|
+
tok = []
|
368
|
+
@ruby.tokenize( "foo <<EOF\n foo\n bar\n baz\n EOF" ) { |t| tok << t }
|
369
|
+
assert_equal [ :ident, "foo" ], [ tok.first.group, tok.shift ]
|
370
|
+
assert_equal [ :normal, " " ], [ tok.first.group, tok.shift ]
|
371
|
+
assert_equal [ :punct, "<<" ], [ tok.first.group, tok.shift ]
|
372
|
+
assert_equal [ :constant, "EOF" ], [ tok.first.group, tok.shift ]
|
373
|
+
assert_equal [ :string, "\n foo\n bar\n baz\n EOF" ], [ tok.first.group, tok.shift ]
|
374
|
+
end
|
375
|
+
|
376
|
+
def test_here_doc_float_right
|
377
|
+
tok = []
|
378
|
+
@ruby.tokenize( "foo <<-EOF\n foo\n bar\n baz\n EOF" ) { |t| tok << t }
|
379
|
+
assert_equal [ :ident, "foo" ], [ tok.first.group, tok.shift ]
|
380
|
+
assert_equal [ :normal, " " ], [ tok.first.group, tok.shift ]
|
381
|
+
assert_equal [ :punct, "<<-" ], [ tok.first.group, tok.shift ]
|
382
|
+
assert_equal [ :constant, "EOF" ], [ tok.first.group, tok.shift ]
|
383
|
+
assert_equal [ :string, "\n foo\n bar\n baz\n" ], [ tok.first.group, tok.shift ]
|
384
|
+
assert_equal [ :constant, " EOF" ], [ tok.first.group, tok.shift ]
|
385
|
+
end
|
386
|
+
|
387
|
+
def test_here_doc_single_quotes
|
388
|
+
tok = []
|
389
|
+
@ruby.tokenize( "foo <<'EOF'\n foo\#{x}\n bar\n baz\nEOF" ) { |t| tok << t }
|
390
|
+
assert_equal [ :ident, "foo" ], [ tok.first.group, tok.shift ]
|
391
|
+
assert_equal [ :normal, " " ], [ tok.first.group, tok.shift ]
|
392
|
+
assert_equal [ :punct, "<<'" ], [ tok.first.group, tok.shift ]
|
393
|
+
assert_equal [ :constant, "EOF" ], [ tok.first.group, tok.shift ]
|
394
|
+
assert_equal [ :punct, "'" ], [ tok.first.group, tok.shift ]
|
395
|
+
assert_equal [ :string, "\n foo\#{x}\n bar\n baz\n" ], [ tok.first.group, tok.shift ]
|
396
|
+
assert_equal [ :constant, "EOF" ], [ tok.first.group, tok.shift ]
|
397
|
+
end
|
398
|
+
|
399
|
+
def test_here_doc_double_quotes
|
400
|
+
tok = []
|
401
|
+
@ruby.tokenize( "foo <<\"EOF\"\n foo\#{x}\n bar\n baz\nEOF" ) { |t| tok << t }
|
402
|
+
assert_equal [ :ident, "foo" ], [ tok.first.group, tok.shift ]
|
403
|
+
assert_equal [ :normal, " " ], [ tok.first.group, tok.shift ]
|
404
|
+
assert_equal [ :punct, "<<\"" ], [ tok.first.group, tok.shift ]
|
405
|
+
assert_equal [ :constant, "EOF" ], [ tok.first.group, tok.shift ]
|
406
|
+
assert_equal [ :punct, "\"" ], [ tok.first.group, tok.shift ]
|
407
|
+
assert_equal [ :string, "\n foo" ], [ tok.first.group, tok.shift ]
|
408
|
+
assert_equal [ :expr, '#{x}' ], [ tok.first.group, tok.shift ]
|
409
|
+
assert_equal [ :string, "\n bar\n baz\n" ], [ tok.first.group, tok.shift ]
|
410
|
+
assert_equal [ :constant, "EOF" ], [ tok.first.group, tok.shift ]
|
411
|
+
end
|
412
|
+
|
413
|
+
def test_space
|
414
|
+
tok = []
|
415
|
+
@ruby.tokenize( "\n \t\t\n\n\r\n" ) { |t| tok << t }
|
416
|
+
assert_equal [ :normal, "\n \t\t\n\n\r\n" ], [ tok.first.group, tok.shift ]
|
417
|
+
end
|
418
|
+
|
419
|
+
def test_number
|
420
|
+
tok = []
|
421
|
+
@ruby.tokenize( "1 1.0 1e5 1.0e5 1_2.5 1_2.5_2 1_2.5_2e3_2" ) { |t| tok << t }
|
422
|
+
assert_equal [ :number, "1" ], [ tok.first.group, tok.shift ]
|
423
|
+
tok.shift
|
424
|
+
assert_equal [ :number, "1.0" ], [ tok.first.group, tok.shift ]
|
425
|
+
tok.shift
|
426
|
+
assert_equal [ :number, "1e5" ], [ tok.first.group, tok.shift ]
|
427
|
+
tok.shift
|
428
|
+
assert_equal [ :number, "1.0e5" ], [ tok.first.group, tok.shift ]
|
429
|
+
tok.shift
|
430
|
+
assert_equal [ :number, "1_2.5" ], [ tok.first.group, tok.shift ]
|
431
|
+
tok.shift
|
432
|
+
assert_equal [ :number, "1_2.5_2" ], [ tok.first.group, tok.shift ]
|
433
|
+
tok.shift
|
434
|
+
assert_equal [ :number, "1_2.5_2e3_2" ], [ tok.first.group, tok.shift ]
|
435
|
+
end
|
436
|
+
|
437
|
+
def test_dquoted_string
|
438
|
+
tok = []
|
439
|
+
@ruby.tokenize( '"foo #{x} bar\"\n\tbaz\xA5b\5\1234"' ) { |t| tok << t }
|
440
|
+
assert_equal [ :punct, '"' ], [ tok.first.group, tok.shift ]
|
441
|
+
assert_equal [ :string, 'foo ' ], [ tok.first.group, tok.shift ]
|
442
|
+
assert_equal [ :expr, '#{x}' ], [ tok.first.group, tok.shift ]
|
443
|
+
assert_equal [ :string, ' bar' ], [ tok.first.group, tok.shift ]
|
444
|
+
assert_equal [ :expr, '\"\n\t' ], [ tok.first.group, tok.shift ]
|
445
|
+
assert_equal [ :string, 'baz' ], [ tok.first.group, tok.shift ]
|
446
|
+
assert_equal [ :expr, '\xA5' ], [ tok.first.group, tok.shift ]
|
447
|
+
assert_equal [ :string, 'b' ], [ tok.first.group, tok.shift ]
|
448
|
+
assert_equal [ :expr, '\5\123' ], [ tok.first.group, tok.shift ]
|
449
|
+
assert_equal [ :string, '4' ], [ tok.first.group, tok.shift ]
|
450
|
+
assert_equal [ :punct, '"' ], [ tok.first.group, tok.shift ]
|
451
|
+
end
|
452
|
+
|
453
|
+
def test_squoted_string
|
454
|
+
tok = []
|
455
|
+
@ruby.tokenize( '\'foo #{x} bar\\\'\n\tbaz\\\\\xA5b\5\1234\'' ) { |t| tok << t }
|
456
|
+
assert_equal [ :punct, "'" ], [ tok.first.group, tok.shift ]
|
457
|
+
assert_equal [ :string, 'foo #{x} bar' ], [ tok.first.group, tok.shift ]
|
458
|
+
assert_equal [ :expr, '\\\'' ], [ tok.first.group, tok.shift ]
|
459
|
+
assert_equal [ :string, '\n\tbaz' ], [ tok.first.group, tok.shift ]
|
460
|
+
assert_equal [ :expr, '\\\\' ], [ tok.first.group, tok.shift ]
|
461
|
+
assert_equal [ :string, '\xA5b\5\1234' ], [ tok.first.group, tok.shift ]
|
462
|
+
assert_equal [ :punct, "'" ], [ tok.first.group, tok.shift ]
|
463
|
+
end
|
464
|
+
|
465
|
+
def test_dot_selector
|
466
|
+
tok = []
|
467
|
+
@ruby.tokenize( 'foo.nil' ) { |t| tok << t }
|
468
|
+
tok.shift
|
469
|
+
assert_equal [ :punct, "." ], [ tok.first.group, tok.shift ]
|
470
|
+
assert_equal [ :ident, "nil" ], [ tok.first.group, tok.shift ]
|
471
|
+
end
|
472
|
+
|
473
|
+
def test_dot_range_inclusive
|
474
|
+
tok = []
|
475
|
+
@ruby.tokenize( 'foo..nil' ) { |t| tok << t }
|
476
|
+
tok.shift
|
477
|
+
assert_equal [ :punct, ".." ], [ tok.first.group, tok.shift ]
|
478
|
+
assert_equal [ :constant, "nil" ], [ tok.first.group, tok.shift ]
|
479
|
+
end
|
480
|
+
|
481
|
+
def test_dot_range_exclusive
|
482
|
+
tok = []
|
483
|
+
@ruby.tokenize( 'foo...nil' ) { |t| tok << t }
|
484
|
+
tok.shift
|
485
|
+
assert_equal [ :punct, "..." ], [ tok.first.group, tok.shift ]
|
486
|
+
assert_equal [ :constant, "nil" ], [ tok.first.group, tok.shift ]
|
487
|
+
end
|
488
|
+
|
489
|
+
def test_dot_range_many
|
490
|
+
tok = []
|
491
|
+
@ruby.tokenize( 'foo.....nil' ) { |t| tok << t }
|
492
|
+
tok.shift
|
493
|
+
assert_equal [ :punct, "....." ], [ tok.first.group, tok.shift ]
|
494
|
+
assert_equal [ :constant, "nil" ], [ tok.first.group, tok.shift ]
|
495
|
+
end
|
496
|
+
|
497
|
+
def test_attribute
|
498
|
+
tok = []
|
499
|
+
@ruby.tokenize( '@var_foo' ) { |t| tok << t }
|
500
|
+
assert_equal [ :attribute, "@var_foo" ], [ tok.first.group, tok.shift ]
|
501
|
+
end
|
502
|
+
|
503
|
+
def test_global
|
504
|
+
tok = []
|
505
|
+
@ruby.tokenize( '$var_foo' ) { |t| tok << t }
|
506
|
+
assert_equal [ :global, "$var_foo" ], [ tok.first.group, tok.shift ]
|
507
|
+
tok = []
|
508
|
+
@ruby.tokenize( '$12' ) { |t| tok << t }
|
509
|
+
assert_equal [ :global, "$12" ], [ tok.first.group, tok.shift ]
|
510
|
+
tok = []
|
511
|
+
@ruby.tokenize( '$/f' ) { |t| tok << t }
|
512
|
+
assert_equal [ :global, "$/" ], [ tok.first.group, tok.shift ]
|
513
|
+
tok = []
|
514
|
+
@ruby.tokenize( "$\n" ) { |t| tok << t }
|
515
|
+
assert_equal [ :global, "$" ], [ tok.first.group, tok.shift ]
|
516
|
+
end
|
517
|
+
|
518
|
+
end
|