syntax 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/syntax.rb +31 -0
- data/lib/syntax/common.rb +118 -0
- data/lib/syntax/convertors/html.rb +50 -0
- data/lib/syntax/ruby.rb +239 -0
- data/lib/syntax/version.rb +9 -0
- data/lib/syntax/xml.rb +108 -0
- data/lib/syntax/yaml.rb +105 -0
- data/test/ALL-TESTS.rb +5 -0
- data/test/syntax/tc_ruby.rb +518 -0
- data/test/syntax/tc_xml.rb +202 -0
- data/test/syntax/tc_yaml.rb +228 -0
- metadata +51 -0
data/lib/syntax/xml.rb
ADDED
@@ -0,0 +1,108 @@
|
|
1
|
+
require 'syntax'
|
2
|
+
|
3
|
+
module Syntax
|
4
|
+
|
5
|
+
# A simple implementation of an XML lexer. It handles most cases. It is
|
6
|
+
# not a validating lexer, meaning it will happily process invalid XML without
|
7
|
+
# complaining.
|
8
|
+
class XML < Tokenizer
|
9
|
+
|
10
|
+
# Initialize the lexer.
|
11
|
+
def setup
|
12
|
+
@in_tag = false
|
13
|
+
end
|
14
|
+
|
15
|
+
# Step through a single iteration of the tokenization process. This will
|
16
|
+
# yield (potentially) many tokens, and possibly zero tokens.
|
17
|
+
def step
|
18
|
+
start_group :normal, matched if scan( /\s+/ )
|
19
|
+
if @in_tag
|
20
|
+
case
|
21
|
+
when scan( /([-\w]+):([-\w]+)/ )
|
22
|
+
start_group :namespace, subgroup(1)
|
23
|
+
start_group :punct, ":"
|
24
|
+
start_group :attribute, subgroup(2)
|
25
|
+
when scan( /\d+/ )
|
26
|
+
start_group :number, matched
|
27
|
+
when scan( /[-\w]+/ )
|
28
|
+
start_group :attribute, matched
|
29
|
+
when scan( %r{[/?]?>} )
|
30
|
+
@in_tag = false
|
31
|
+
start_group :punct, matched
|
32
|
+
when scan( /=/ )
|
33
|
+
start_group :punct, matched
|
34
|
+
when scan( /["']/ )
|
35
|
+
scan_string matched
|
36
|
+
else
|
37
|
+
append getch
|
38
|
+
end
|
39
|
+
elsif ( text = scan_until( /(?=[<&])/ ) )
|
40
|
+
start_group :normal, text unless text.empty?
|
41
|
+
if scan(/<!--.*?(-->|\Z)/m)
|
42
|
+
start_group :comment, matched
|
43
|
+
else
|
44
|
+
case peek(1)
|
45
|
+
when "<"
|
46
|
+
start_group :punct, getch
|
47
|
+
case peek(1)
|
48
|
+
when "?"
|
49
|
+
append getch
|
50
|
+
when "/"
|
51
|
+
append getch
|
52
|
+
when "!"
|
53
|
+
append getch
|
54
|
+
end
|
55
|
+
start_group :normal, matched if scan( /\s+/ )
|
56
|
+
if scan( /([-\w]+):([-\w]+)/ )
|
57
|
+
start_group :namespace, subgroup(1)
|
58
|
+
start_group :punct, ":"
|
59
|
+
start_group :tag, subgroup(2)
|
60
|
+
elsif scan( /[-\w]+/ )
|
61
|
+
start_group :tag, matched
|
62
|
+
end
|
63
|
+
@in_tag = true
|
64
|
+
when "&"
|
65
|
+
if scan( /&\S{1,10};/ )
|
66
|
+
start_group :entity, matched
|
67
|
+
else
|
68
|
+
start_group :normal, scan( /&/ )
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
else
|
73
|
+
append scan_until( /\Z/ )
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
private
|
78
|
+
|
79
|
+
# Scan the string starting at the current position, with the given
|
80
|
+
# delimiter character.
|
81
|
+
def scan_string( delim )
|
82
|
+
start_group :punct, delim
|
83
|
+
match = /(?=[&\\]|#{delim})/
|
84
|
+
loop do
|
85
|
+
break unless ( text = scan_until( match ) )
|
86
|
+
start_group :string, text unless text.empty?
|
87
|
+
case peek(1)
|
88
|
+
when "&"
|
89
|
+
if scan( /&\S{1,10};/ )
|
90
|
+
start_group :entity, matched
|
91
|
+
else
|
92
|
+
start_group :string, getch
|
93
|
+
end
|
94
|
+
when "\\"
|
95
|
+
start_group :string, getch
|
96
|
+
append getch || ""
|
97
|
+
when delim
|
98
|
+
start_group :punct, getch
|
99
|
+
break
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
end
|
105
|
+
|
106
|
+
SYNTAX["xml"] = XML
|
107
|
+
|
108
|
+
end
|
data/lib/syntax/yaml.rb
ADDED
@@ -0,0 +1,105 @@
|
|
1
|
+
require 'syntax'
|
2
|
+
|
3
|
+
module Syntax
|
4
|
+
|
5
|
+
# A simple implementation of an YAML lexer. It handles most cases. It is
|
6
|
+
# not a validating lexer.
|
7
|
+
class YAML < Tokenizer
|
8
|
+
|
9
|
+
# Step through a single iteration of the tokenization process. This will
|
10
|
+
# yield (potentially) many tokens, and possibly zero tokens.
|
11
|
+
def step
|
12
|
+
if bol?
|
13
|
+
case
|
14
|
+
when scan(/---(\s*.+)?$/)
|
15
|
+
start_group :document, matched
|
16
|
+
when scan(/(\s*)([a-zA-Z][-\w]*)(\s*):/)
|
17
|
+
start_group :normal, subgroup(1)
|
18
|
+
start_group :key, subgroup(2)
|
19
|
+
start_group :normal, subgroup(3)
|
20
|
+
start_group :punct, ":"
|
21
|
+
when scan(/(\s*)-/)
|
22
|
+
start_group :normal, subgroup(1)
|
23
|
+
start_group :punct, "-"
|
24
|
+
when scan(/\s*$/)
|
25
|
+
start_group :normal, matched
|
26
|
+
when scan(/#.*$/)
|
27
|
+
start_group :comment, matched
|
28
|
+
else
|
29
|
+
append getch
|
30
|
+
end
|
31
|
+
else
|
32
|
+
case
|
33
|
+
when scan(/[\n\r]+/)
|
34
|
+
start_group :normal, matched
|
35
|
+
when scan(/[ \t]+/)
|
36
|
+
start_group :normal, matched
|
37
|
+
when scan(/!+(.*?^)?\S+/)
|
38
|
+
start_group :type, matched
|
39
|
+
when scan(/&\S+/)
|
40
|
+
start_group :anchor, matched
|
41
|
+
when scan(/\*\S+/)
|
42
|
+
start_group :ref, matched
|
43
|
+
when scan(/\d\d:\d\d:\d\d/)
|
44
|
+
start_group :time, matched
|
45
|
+
when scan(/\d\d\d\d-\d\d-\d\d\s\d\d:\d\d:\d\d(\.\d+)? [-+]\d\d:\d\d/)
|
46
|
+
start_group :date, matched
|
47
|
+
when scan(/['"]/)
|
48
|
+
start_group :punct, matched
|
49
|
+
scan_string matched
|
50
|
+
when scan(/:\w+/)
|
51
|
+
start_group :symbol, matched
|
52
|
+
when scan(/[:]/)
|
53
|
+
start_group :punct, matched
|
54
|
+
when scan(/#.*$/)
|
55
|
+
start_group :comment, matched
|
56
|
+
when scan(/>-?/)
|
57
|
+
start_group :punct, matched
|
58
|
+
start_group :normal, scan(/.*$/)
|
59
|
+
append getch until eos? || bol?
|
60
|
+
return if eos?
|
61
|
+
indent = check(/ */)
|
62
|
+
start_group :string
|
63
|
+
loop do
|
64
|
+
line = check_until(/[\n\r]|\Z/)
|
65
|
+
break if line.nil?
|
66
|
+
if line.chomp.length > 0
|
67
|
+
this_indent = line.chomp.match( /^\s*/ )[0]
|
68
|
+
break if this_indent.length < indent.length
|
69
|
+
end
|
70
|
+
append scan_until(/[\n\r]|\Z/)
|
71
|
+
end
|
72
|
+
else
|
73
|
+
start_group :normal, scan_until(/(?=$|#)/)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
private
|
79
|
+
|
80
|
+
def scan_string( delim )
|
81
|
+
regex = /(?=[#{delim=="'" ? "" : "\\\\"}#{delim}])/
|
82
|
+
loop do
|
83
|
+
text = scan_until( regex )
|
84
|
+
if text.nil?
|
85
|
+
start_group :string, scan_until( /\Z/ )
|
86
|
+
break
|
87
|
+
else
|
88
|
+
start_group :string, text unless text.empty?
|
89
|
+
end
|
90
|
+
|
91
|
+
case peek(1)
|
92
|
+
when "\\"
|
93
|
+
start_group :expr, scan(/../)
|
94
|
+
else
|
95
|
+
start_group :punct, getch
|
96
|
+
break
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
end
|
102
|
+
|
103
|
+
SYNTAX["yaml"] = YAML
|
104
|
+
|
105
|
+
end
|
data/test/ALL-TESTS.rb
ADDED
@@ -0,0 +1,518 @@
|
|
1
|
+
$:.unshift "../../lib"
|
2
|
+
|
3
|
+
require 'test/unit'
|
4
|
+
require 'syntax/ruby'
|
5
|
+
|
6
|
+
class TC_Syntax_Ruby < Test::Unit::TestCase
|
7
|
+
|
8
|
+
def setup
|
9
|
+
@ruby = Syntax::Ruby.new
|
10
|
+
end
|
11
|
+
|
12
|
+
def test_empty
|
13
|
+
called = false
|
14
|
+
@ruby.tokenize( "" ) { |tok| called = true }
|
15
|
+
assert !called
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_constant
|
19
|
+
called = false
|
20
|
+
@ruby.tokenize( "Foo" ) do |tok|
|
21
|
+
called = true
|
22
|
+
assert_equal :constant, tok.group
|
23
|
+
assert_equal "Foo", tok
|
24
|
+
end
|
25
|
+
assert called
|
26
|
+
end
|
27
|
+
|
28
|
+
def test_ident
|
29
|
+
called = false
|
30
|
+
@ruby.tokenize( "foo" ) do |tok|
|
31
|
+
called = true
|
32
|
+
assert_equal :ident, tok.group
|
33
|
+
assert_equal "foo", tok
|
34
|
+
end
|
35
|
+
assert called
|
36
|
+
end
|
37
|
+
|
38
|
+
def test_comment_eol
|
39
|
+
called = false
|
40
|
+
@ruby.tokenize( "# a comment" ) do |tok|
|
41
|
+
called = true
|
42
|
+
assert_equal :comment, tok.group
|
43
|
+
assert_equal "# a comment", tok
|
44
|
+
end
|
45
|
+
assert called
|
46
|
+
end
|
47
|
+
|
48
|
+
def test_comment_block
|
49
|
+
called = false
|
50
|
+
@ruby.tokenize( "=begin\nthis is a comment\n=end" ) do |tok|
|
51
|
+
called = true
|
52
|
+
assert_equal :comment, tok.group
|
53
|
+
assert_equal "=begin\nthis is a comment\n=end", tok
|
54
|
+
end
|
55
|
+
assert called
|
56
|
+
end
|
57
|
+
|
58
|
+
def test_keyword
|
59
|
+
Syntax::Ruby::KEYWORDS.each do |word|
|
60
|
+
tok = []
|
61
|
+
@ruby.tokenize( word ) { |t| tok << t }
|
62
|
+
assert_equal [ :keyword, word ], [ tok.first.group, tok.first ]
|
63
|
+
end
|
64
|
+
Syntax::Ruby::KEYWORDS.each do |word|
|
65
|
+
tok = []
|
66
|
+
@ruby.tokenize( "foo.#{word}" ) { |t| tok << t }
|
67
|
+
tok.shift
|
68
|
+
tok.shift
|
69
|
+
assert_equal [ :ident, word ], [ tok.first.group, tok.first ]
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
def test__END__
|
74
|
+
called = false
|
75
|
+
@ruby.tokenize( "__END__\n\nblah blah blah" ) do |tok|
|
76
|
+
called = true
|
77
|
+
assert_equal :comment, tok.group
|
78
|
+
assert_equal "__END__\n\nblah blah blah", tok
|
79
|
+
end
|
80
|
+
assert called
|
81
|
+
end
|
82
|
+
|
83
|
+
def test_def_paren
|
84
|
+
tok = nil
|
85
|
+
@ruby.start( "def foo(bar)" ) { |t| tok = t }
|
86
|
+
@ruby.step
|
87
|
+
assert_equal "def ", tok
|
88
|
+
assert_equal :keyword, tok.group
|
89
|
+
@ruby.step
|
90
|
+
assert_equal "foo", tok
|
91
|
+
assert_equal :method, tok.group
|
92
|
+
end
|
93
|
+
|
94
|
+
def test_def_space
|
95
|
+
tok = nil
|
96
|
+
@ruby.start( "def foo bar" ) { |t| tok = t }
|
97
|
+
@ruby.step
|
98
|
+
assert_equal "def ", tok
|
99
|
+
assert_equal :keyword, tok.group
|
100
|
+
@ruby.step
|
101
|
+
assert_equal "foo", tok
|
102
|
+
assert_equal :method, tok.group
|
103
|
+
end
|
104
|
+
|
105
|
+
def test_def_semicolon
|
106
|
+
tok = nil
|
107
|
+
@ruby.start( "def foo;" ) { |t| tok = t }
|
108
|
+
@ruby.step
|
109
|
+
assert_equal "def ", tok
|
110
|
+
assert_equal :keyword, tok.group
|
111
|
+
@ruby.step
|
112
|
+
assert_equal "foo", tok
|
113
|
+
assert_equal :method, tok.group
|
114
|
+
end
|
115
|
+
|
116
|
+
def test_class_space
|
117
|
+
tok = nil
|
118
|
+
@ruby.start( "class Foo\n" ) { |t| tok = t }
|
119
|
+
@ruby.step
|
120
|
+
assert_equal "class ", tok
|
121
|
+
assert_equal :keyword, tok.group
|
122
|
+
@ruby.step
|
123
|
+
assert_equal "Foo", tok
|
124
|
+
assert_equal :class, tok.group
|
125
|
+
end
|
126
|
+
|
127
|
+
def test_class_semicolon
|
128
|
+
tok = nil
|
129
|
+
@ruby.start( "class Foo;" ) { |t| tok = t }
|
130
|
+
@ruby.step
|
131
|
+
assert_equal "class ", tok
|
132
|
+
assert_equal :keyword, tok.group
|
133
|
+
@ruby.step
|
134
|
+
assert_equal "Foo", tok
|
135
|
+
assert_equal :class, tok.group
|
136
|
+
end
|
137
|
+
|
138
|
+
def test_class_extend
|
139
|
+
tok = nil
|
140
|
+
@ruby.start( "class Foo< Bang" ) { |t| tok = t }
|
141
|
+
@ruby.step
|
142
|
+
assert_equal "class ", tok
|
143
|
+
assert_equal :keyword, tok.group
|
144
|
+
@ruby.step
|
145
|
+
assert_equal "Foo", tok
|
146
|
+
assert_equal :class, tok.group
|
147
|
+
end
|
148
|
+
|
149
|
+
def test_module_space
|
150
|
+
tok = nil
|
151
|
+
@ruby.start( "module Foo\n" ) { |t| tok = t }
|
152
|
+
@ruby.step
|
153
|
+
assert_equal "module ", tok
|
154
|
+
assert_equal :keyword, tok.group
|
155
|
+
@ruby.step
|
156
|
+
assert_equal "Foo", tok
|
157
|
+
assert_equal :module, tok.group
|
158
|
+
end
|
159
|
+
|
160
|
+
def test_module_semicolon
|
161
|
+
tok = nil
|
162
|
+
@ruby.start( "module Foo;" ) { |t| tok = t }
|
163
|
+
@ruby.step
|
164
|
+
assert_equal "module ", tok
|
165
|
+
assert_equal :keyword, tok.group
|
166
|
+
@ruby.step
|
167
|
+
assert_equal "Foo", tok
|
168
|
+
assert_equal :module, tok.group
|
169
|
+
end
|
170
|
+
|
171
|
+
def test_module_other
|
172
|
+
tok = nil
|
173
|
+
@ruby.start( "module Foo!\n" ) { |t| tok = t }
|
174
|
+
@ruby.step
|
175
|
+
assert_equal "module ", tok
|
176
|
+
assert_equal :keyword, tok.group
|
177
|
+
@ruby.step
|
178
|
+
assert_equal "Foo!", tok
|
179
|
+
assert_equal :module, tok.group
|
180
|
+
end
|
181
|
+
|
182
|
+
def test_scope_operator
|
183
|
+
tok = []
|
184
|
+
@ruby.tokenize( "Foo::Bar" ) { |t| tok << t }
|
185
|
+
assert_equal "Foo", tok.shift
|
186
|
+
assert_equal "::", tok.first
|
187
|
+
assert_equal :punct, tok.shift.group
|
188
|
+
assert_equal "Bar", tok.first
|
189
|
+
end
|
190
|
+
|
191
|
+
def test_symbol_dquote
|
192
|
+
tok = []
|
193
|
+
@ruby.tokenize( ':"foo"' ) { |t| tok << t }
|
194
|
+
assert_equal ':"foo"', tok.first
|
195
|
+
assert_equal :symbol, tok.first.group
|
196
|
+
end
|
197
|
+
|
198
|
+
def test_symbol_squote
|
199
|
+
tok = []
|
200
|
+
@ruby.tokenize( ":'foo'" ) { |t| tok << t }
|
201
|
+
assert_equal ":'foo'", tok.first
|
202
|
+
assert_equal :symbol, tok.first.group
|
203
|
+
end
|
204
|
+
|
205
|
+
def test_symbol
|
206
|
+
tok = []
|
207
|
+
@ruby.tokenize( ":foo_bar?" ) { |t| tok << t }
|
208
|
+
assert_equal ":foo_bar?", tok.first
|
209
|
+
assert_equal :symbol, tok.first.group
|
210
|
+
end
|
211
|
+
|
212
|
+
def test_char
|
213
|
+
tok = []
|
214
|
+
@ruby.tokenize( "?." ) { |t| tok << t }
|
215
|
+
assert_equal "?.", tok.first
|
216
|
+
assert_equal :char, tok.first.group
|
217
|
+
|
218
|
+
tok.clear
|
219
|
+
@ruby.tokenize( '?\n' ) { |t| tok << t }
|
220
|
+
assert_equal '?\n', tok.first
|
221
|
+
assert_equal :char, tok.first.group
|
222
|
+
end
|
223
|
+
|
224
|
+
def test_specials
|
225
|
+
%w{__FILE__ __LINE__ true false nil self}.each do |word|
|
226
|
+
tok = []
|
227
|
+
@ruby.tokenize( word ) { |t| tok << t }
|
228
|
+
assert_equal word, tok.first
|
229
|
+
assert_equal :constant, tok.first.group
|
230
|
+
end
|
231
|
+
|
232
|
+
%w{__FILE__ __LINE__ true false nil self}.each do |word|
|
233
|
+
tok = []
|
234
|
+
@ruby.tokenize( "#{word}?" ) { |t| tok << t }
|
235
|
+
assert_equal "#{word}?", tok.first
|
236
|
+
assert_equal :ident, tok.first.group
|
237
|
+
end
|
238
|
+
|
239
|
+
%w{__FILE__ __LINE__ true false nil self}.each do |word|
|
240
|
+
tok = []
|
241
|
+
@ruby.tokenize( "#{word}!" ) { |t| tok << t }
|
242
|
+
assert_equal "#{word}!", tok.first
|
243
|
+
assert_equal :ident, tok.first.group
|
244
|
+
end
|
245
|
+
|
246
|
+
%w{__FILE__ __LINE__ true false nil self}.each do |word|
|
247
|
+
tok = []
|
248
|
+
@ruby.tokenize( "x.#{word}" ) { |t| tok << t }
|
249
|
+
tok.shift
|
250
|
+
tok.shift
|
251
|
+
assert_equal word, tok.first
|
252
|
+
assert_equal :ident, tok.first.group
|
253
|
+
end
|
254
|
+
end
|
255
|
+
|
256
|
+
def test_pct_r
|
257
|
+
tok = []
|
258
|
+
@ruby.tokenize( '%r{foo#{x}bar}' ) { |t| tok << t }
|
259
|
+
assert_equal [ :punct, "%r{" ], [ tok.first.group, tok.shift ]
|
260
|
+
assert_equal [ :regex, "foo" ], [ tok.first.group, tok.shift ]
|
261
|
+
assert_equal [ :expr, '#{x}' ], [ tok.first.group, tok.shift ]
|
262
|
+
assert_equal [ :regex, "bar" ], [ tok.first.group, tok.shift ]
|
263
|
+
assert_equal [ :punct, "}" ], [ tok.first.group, tok.shift ]
|
264
|
+
|
265
|
+
tok = []
|
266
|
+
@ruby.tokenize( '%r-foo#{x}bar-' ) { |t| tok << t }
|
267
|
+
assert_equal [ :punct, "%r-" ], [ tok.first.group, tok.shift ]
|
268
|
+
assert_equal [ :regex, "foo" ], [ tok.first.group, tok.shift ]
|
269
|
+
assert_equal [ :expr, '#{x}' ], [ tok.first.group, tok.shift ]
|
270
|
+
assert_equal [ :regex, "bar" ], [ tok.first.group, tok.shift ]
|
271
|
+
assert_equal [ :punct, "-" ], [ tok.first.group, tok.shift ]
|
272
|
+
end
|
273
|
+
|
274
|
+
def test_pct_w
|
275
|
+
tok = []
|
276
|
+
@ruby.tokenize( '%w-foo#{x} bar baz-' ) { |t| tok << t }
|
277
|
+
assert_equal [ :punct, "%w-" ], [ tok.first.group, tok.shift ]
|
278
|
+
assert_equal [ :string, 'foo#{x} bar baz' ], [ tok.first.group, tok.shift ]
|
279
|
+
assert_equal [ :punct, "-" ], [ tok.first.group, tok.shift ]
|
280
|
+
end
|
281
|
+
|
282
|
+
def test_pct_q
|
283
|
+
tok = []
|
284
|
+
@ruby.tokenize( '%q-hello #{world}-' ) { |t| tok << t }
|
285
|
+
assert_equal [ :punct, "%q-" ], [ tok.first.group, tok.shift ]
|
286
|
+
assert_equal [ :string, 'hello #{world}' ], [ tok.first.group, tok.shift ]
|
287
|
+
assert_equal [ :punct, "-" ], [ tok.first.group, tok.shift ]
|
288
|
+
end
|
289
|
+
|
290
|
+
def test_pct_s
|
291
|
+
tok = []
|
292
|
+
@ruby.tokenize( '%s-hello #{world}-' ) { |t| tok << t }
|
293
|
+
assert_equal [ :punct, "%s-" ], [ tok.first.group, tok.shift ]
|
294
|
+
assert_equal [ :symbol, 'hello #{world}' ], [ tok.first.group, tok.shift ]
|
295
|
+
assert_equal [ :punct, "-" ], [ tok.first.group, tok.shift ]
|
296
|
+
end
|
297
|
+
|
298
|
+
def test_pct_W
|
299
|
+
tok = []
|
300
|
+
@ruby.tokenize( '%W-foo#{x} bar baz-' ) { |t| tok << t }
|
301
|
+
assert_equal [ :punct, "%W-" ], [ tok.first.group, tok.shift ]
|
302
|
+
assert_equal [ :string, 'foo' ], [ tok.first.group, tok.shift ]
|
303
|
+
assert_equal [ :expr, '#{x}' ], [ tok.first.group, tok.shift ]
|
304
|
+
assert_equal [ :string, ' bar baz' ], [ tok.first.group, tok.shift ]
|
305
|
+
assert_equal [ :punct, "-" ], [ tok.first.group, tok.shift ]
|
306
|
+
end
|
307
|
+
|
308
|
+
def test_pct_Q
|
309
|
+
tok = []
|
310
|
+
@ruby.tokenize( '%Q-hello #{world}-' ) { |t| tok << t }
|
311
|
+
assert_equal [ :punct, "%Q-" ], [ tok.first.group, tok.shift ]
|
312
|
+
assert_equal [ :string, 'hello ' ], [ tok.first.group, tok.shift ]
|
313
|
+
assert_equal [ :expr, '#{world}' ], [ tok.first.group, tok.shift ]
|
314
|
+
assert_equal [ :punct, "-" ], [ tok.first.group, tok.shift ]
|
315
|
+
end
|
316
|
+
|
317
|
+
def test_pct_x
|
318
|
+
tok = []
|
319
|
+
@ruby.tokenize( '%x-ls /blah/#{foo}-' ) { |t| tok << t }
|
320
|
+
assert_equal [ :punct, "%x-" ], [ tok.first.group, tok.shift ]
|
321
|
+
assert_equal [ :string, 'ls /blah/' ], [ tok.first.group, tok.shift ]
|
322
|
+
assert_equal [ :expr, '#{foo}' ], [ tok.first.group, tok.shift ]
|
323
|
+
assert_equal [ :punct, "-" ], [ tok.first.group, tok.shift ]
|
324
|
+
end
|
325
|
+
|
326
|
+
def test_pct_string
|
327
|
+
tok = []
|
328
|
+
@ruby.tokenize( '%-hello #{world}-' ) { |t| tok << t }
|
329
|
+
assert_equal [ :punct, "%-" ], [ tok.first.group, tok.shift ]
|
330
|
+
assert_equal [ :string, 'hello ' ], [ tok.first.group, tok.shift ]
|
331
|
+
assert_equal [ :expr, '#{world}' ], [ tok.first.group, tok.shift ]
|
332
|
+
assert_equal [ :punct, "-" ], [ tok.first.group, tok.shift ]
|
333
|
+
end
|
334
|
+
|
335
|
+
def test_bad_pct_string
|
336
|
+
tok = []
|
337
|
+
@ruby.tokenize( '%0hello #{world}0' ) { |t| tok << t }
|
338
|
+
assert_equal [ :punct, "%" ], [ tok.first.group, tok.shift ]
|
339
|
+
assert_equal [ :number, '0' ], [ tok.first.group, tok.shift ]
|
340
|
+
assert_equal [ :ident, 'hello' ], [ tok.first.group, tok.shift ]
|
341
|
+
assert_equal [ :normal, ' ' ], [ tok.first.group, tok.shift ]
|
342
|
+
assert_equal [ :comment, '#{world}0' ], [ tok.first.group, tok.shift ]
|
343
|
+
end
|
344
|
+
|
345
|
+
def test_shift_left
|
346
|
+
tok = []
|
347
|
+
@ruby.tokenize( 'foo << 5' ) { |t| tok << t }
|
348
|
+
assert_equal [ :ident, "foo" ], [ tok.first.group, tok.shift ]
|
349
|
+
assert_equal [ :normal, " " ], [ tok.first.group, tok.shift ]
|
350
|
+
assert_equal [ :punct, "<<" ], [ tok.first.group, tok.shift ]
|
351
|
+
assert_equal [ :normal, " " ], [ tok.first.group, tok.shift ]
|
352
|
+
assert_equal [ :number, "5" ], [ tok.first.group, tok.shift ]
|
353
|
+
end
|
354
|
+
|
355
|
+
def test_here_doc_no_opts
|
356
|
+
tok = []
|
357
|
+
@ruby.tokenize( "foo <<EOF\n foo\n bar\n baz\nEOF" ) { |t| tok << t }
|
358
|
+
assert_equal [ :ident, "foo" ], [ tok.first.group, tok.shift ]
|
359
|
+
assert_equal [ :normal, " " ], [ tok.first.group, tok.shift ]
|
360
|
+
assert_equal [ :punct, "<<" ], [ tok.first.group, tok.shift ]
|
361
|
+
assert_equal [ :constant, "EOF" ], [ tok.first.group, tok.shift ]
|
362
|
+
assert_equal [ :string, "\n foo\n bar\n baz\n" ], [ tok.first.group, tok.shift ]
|
363
|
+
assert_equal [ :constant, "EOF" ], [ tok.first.group, tok.shift ]
|
364
|
+
end
|
365
|
+
|
366
|
+
def test_here_doc_no_opts_missing_end
|
367
|
+
tok = []
|
368
|
+
@ruby.tokenize( "foo <<EOF\n foo\n bar\n baz\n EOF" ) { |t| tok << t }
|
369
|
+
assert_equal [ :ident, "foo" ], [ tok.first.group, tok.shift ]
|
370
|
+
assert_equal [ :normal, " " ], [ tok.first.group, tok.shift ]
|
371
|
+
assert_equal [ :punct, "<<" ], [ tok.first.group, tok.shift ]
|
372
|
+
assert_equal [ :constant, "EOF" ], [ tok.first.group, tok.shift ]
|
373
|
+
assert_equal [ :string, "\n foo\n bar\n baz\n EOF" ], [ tok.first.group, tok.shift ]
|
374
|
+
end
|
375
|
+
|
376
|
+
def test_here_doc_float_right
|
377
|
+
tok = []
|
378
|
+
@ruby.tokenize( "foo <<-EOF\n foo\n bar\n baz\n EOF" ) { |t| tok << t }
|
379
|
+
assert_equal [ :ident, "foo" ], [ tok.first.group, tok.shift ]
|
380
|
+
assert_equal [ :normal, " " ], [ tok.first.group, tok.shift ]
|
381
|
+
assert_equal [ :punct, "<<-" ], [ tok.first.group, tok.shift ]
|
382
|
+
assert_equal [ :constant, "EOF" ], [ tok.first.group, tok.shift ]
|
383
|
+
assert_equal [ :string, "\n foo\n bar\n baz\n" ], [ tok.first.group, tok.shift ]
|
384
|
+
assert_equal [ :constant, " EOF" ], [ tok.first.group, tok.shift ]
|
385
|
+
end
|
386
|
+
|
387
|
+
def test_here_doc_single_quotes
|
388
|
+
tok = []
|
389
|
+
@ruby.tokenize( "foo <<'EOF'\n foo\#{x}\n bar\n baz\nEOF" ) { |t| tok << t }
|
390
|
+
assert_equal [ :ident, "foo" ], [ tok.first.group, tok.shift ]
|
391
|
+
assert_equal [ :normal, " " ], [ tok.first.group, tok.shift ]
|
392
|
+
assert_equal [ :punct, "<<'" ], [ tok.first.group, tok.shift ]
|
393
|
+
assert_equal [ :constant, "EOF" ], [ tok.first.group, tok.shift ]
|
394
|
+
assert_equal [ :punct, "'" ], [ tok.first.group, tok.shift ]
|
395
|
+
assert_equal [ :string, "\n foo\#{x}\n bar\n baz\n" ], [ tok.first.group, tok.shift ]
|
396
|
+
assert_equal [ :constant, "EOF" ], [ tok.first.group, tok.shift ]
|
397
|
+
end
|
398
|
+
|
399
|
+
def test_here_doc_double_quotes
|
400
|
+
tok = []
|
401
|
+
@ruby.tokenize( "foo <<\"EOF\"\n foo\#{x}\n bar\n baz\nEOF" ) { |t| tok << t }
|
402
|
+
assert_equal [ :ident, "foo" ], [ tok.first.group, tok.shift ]
|
403
|
+
assert_equal [ :normal, " " ], [ tok.first.group, tok.shift ]
|
404
|
+
assert_equal [ :punct, "<<\"" ], [ tok.first.group, tok.shift ]
|
405
|
+
assert_equal [ :constant, "EOF" ], [ tok.first.group, tok.shift ]
|
406
|
+
assert_equal [ :punct, "\"" ], [ tok.first.group, tok.shift ]
|
407
|
+
assert_equal [ :string, "\n foo" ], [ tok.first.group, tok.shift ]
|
408
|
+
assert_equal [ :expr, '#{x}' ], [ tok.first.group, tok.shift ]
|
409
|
+
assert_equal [ :string, "\n bar\n baz\n" ], [ tok.first.group, tok.shift ]
|
410
|
+
assert_equal [ :constant, "EOF" ], [ tok.first.group, tok.shift ]
|
411
|
+
end
|
412
|
+
|
413
|
+
def test_space
|
414
|
+
tok = []
|
415
|
+
@ruby.tokenize( "\n \t\t\n\n\r\n" ) { |t| tok << t }
|
416
|
+
assert_equal [ :normal, "\n \t\t\n\n\r\n" ], [ tok.first.group, tok.shift ]
|
417
|
+
end
|
418
|
+
|
419
|
+
def test_number
|
420
|
+
tok = []
|
421
|
+
@ruby.tokenize( "1 1.0 1e5 1.0e5 1_2.5 1_2.5_2 1_2.5_2e3_2" ) { |t| tok << t }
|
422
|
+
assert_equal [ :number, "1" ], [ tok.first.group, tok.shift ]
|
423
|
+
tok.shift
|
424
|
+
assert_equal [ :number, "1.0" ], [ tok.first.group, tok.shift ]
|
425
|
+
tok.shift
|
426
|
+
assert_equal [ :number, "1e5" ], [ tok.first.group, tok.shift ]
|
427
|
+
tok.shift
|
428
|
+
assert_equal [ :number, "1.0e5" ], [ tok.first.group, tok.shift ]
|
429
|
+
tok.shift
|
430
|
+
assert_equal [ :number, "1_2.5" ], [ tok.first.group, tok.shift ]
|
431
|
+
tok.shift
|
432
|
+
assert_equal [ :number, "1_2.5_2" ], [ tok.first.group, tok.shift ]
|
433
|
+
tok.shift
|
434
|
+
assert_equal [ :number, "1_2.5_2e3_2" ], [ tok.first.group, tok.shift ]
|
435
|
+
end
|
436
|
+
|
437
|
+
def test_dquoted_string
|
438
|
+
tok = []
|
439
|
+
@ruby.tokenize( '"foo #{x} bar\"\n\tbaz\xA5b\5\1234"' ) { |t| tok << t }
|
440
|
+
assert_equal [ :punct, '"' ], [ tok.first.group, tok.shift ]
|
441
|
+
assert_equal [ :string, 'foo ' ], [ tok.first.group, tok.shift ]
|
442
|
+
assert_equal [ :expr, '#{x}' ], [ tok.first.group, tok.shift ]
|
443
|
+
assert_equal [ :string, ' bar' ], [ tok.first.group, tok.shift ]
|
444
|
+
assert_equal [ :expr, '\"\n\t' ], [ tok.first.group, tok.shift ]
|
445
|
+
assert_equal [ :string, 'baz' ], [ tok.first.group, tok.shift ]
|
446
|
+
assert_equal [ :expr, '\xA5' ], [ tok.first.group, tok.shift ]
|
447
|
+
assert_equal [ :string, 'b' ], [ tok.first.group, tok.shift ]
|
448
|
+
assert_equal [ :expr, '\5\123' ], [ tok.first.group, tok.shift ]
|
449
|
+
assert_equal [ :string, '4' ], [ tok.first.group, tok.shift ]
|
450
|
+
assert_equal [ :punct, '"' ], [ tok.first.group, tok.shift ]
|
451
|
+
end
|
452
|
+
|
453
|
+
def test_squoted_string
|
454
|
+
tok = []
|
455
|
+
@ruby.tokenize( '\'foo #{x} bar\\\'\n\tbaz\\\\\xA5b\5\1234\'' ) { |t| tok << t }
|
456
|
+
assert_equal [ :punct, "'" ], [ tok.first.group, tok.shift ]
|
457
|
+
assert_equal [ :string, 'foo #{x} bar' ], [ tok.first.group, tok.shift ]
|
458
|
+
assert_equal [ :expr, '\\\'' ], [ tok.first.group, tok.shift ]
|
459
|
+
assert_equal [ :string, '\n\tbaz' ], [ tok.first.group, tok.shift ]
|
460
|
+
assert_equal [ :expr, '\\\\' ], [ tok.first.group, tok.shift ]
|
461
|
+
assert_equal [ :string, '\xA5b\5\1234' ], [ tok.first.group, tok.shift ]
|
462
|
+
assert_equal [ :punct, "'" ], [ tok.first.group, tok.shift ]
|
463
|
+
end
|
464
|
+
|
465
|
+
def test_dot_selector
|
466
|
+
tok = []
|
467
|
+
@ruby.tokenize( 'foo.nil' ) { |t| tok << t }
|
468
|
+
tok.shift
|
469
|
+
assert_equal [ :punct, "." ], [ tok.first.group, tok.shift ]
|
470
|
+
assert_equal [ :ident, "nil" ], [ tok.first.group, tok.shift ]
|
471
|
+
end
|
472
|
+
|
473
|
+
def test_dot_range_inclusive
|
474
|
+
tok = []
|
475
|
+
@ruby.tokenize( 'foo..nil' ) { |t| tok << t }
|
476
|
+
tok.shift
|
477
|
+
assert_equal [ :punct, ".." ], [ tok.first.group, tok.shift ]
|
478
|
+
assert_equal [ :constant, "nil" ], [ tok.first.group, tok.shift ]
|
479
|
+
end
|
480
|
+
|
481
|
+
def test_dot_range_exclusive
|
482
|
+
tok = []
|
483
|
+
@ruby.tokenize( 'foo...nil' ) { |t| tok << t }
|
484
|
+
tok.shift
|
485
|
+
assert_equal [ :punct, "..." ], [ tok.first.group, tok.shift ]
|
486
|
+
assert_equal [ :constant, "nil" ], [ tok.first.group, tok.shift ]
|
487
|
+
end
|
488
|
+
|
489
|
+
def test_dot_range_many
|
490
|
+
tok = []
|
491
|
+
@ruby.tokenize( 'foo.....nil' ) { |t| tok << t }
|
492
|
+
tok.shift
|
493
|
+
assert_equal [ :punct, "....." ], [ tok.first.group, tok.shift ]
|
494
|
+
assert_equal [ :constant, "nil" ], [ tok.first.group, tok.shift ]
|
495
|
+
end
|
496
|
+
|
497
|
+
def test_attribute
|
498
|
+
tok = []
|
499
|
+
@ruby.tokenize( '@var_foo' ) { |t| tok << t }
|
500
|
+
assert_equal [ :attribute, "@var_foo" ], [ tok.first.group, tok.shift ]
|
501
|
+
end
|
502
|
+
|
503
|
+
def test_global
|
504
|
+
tok = []
|
505
|
+
@ruby.tokenize( '$var_foo' ) { |t| tok << t }
|
506
|
+
assert_equal [ :global, "$var_foo" ], [ tok.first.group, tok.shift ]
|
507
|
+
tok = []
|
508
|
+
@ruby.tokenize( '$12' ) { |t| tok << t }
|
509
|
+
assert_equal [ :global, "$12" ], [ tok.first.group, tok.shift ]
|
510
|
+
tok = []
|
511
|
+
@ruby.tokenize( '$/f' ) { |t| tok << t }
|
512
|
+
assert_equal [ :global, "$/" ], [ tok.first.group, tok.shift ]
|
513
|
+
tok = []
|
514
|
+
@ruby.tokenize( "$\n" ) { |t| tok << t }
|
515
|
+
assert_equal [ :global, "$" ], [ tok.first.group, tok.shift ]
|
516
|
+
end
|
517
|
+
|
518
|
+
end
|