syntax 0.7.0 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/syntax/common.rb +21 -0
- data/lib/syntax/convertors/abstract.rb +3 -0
- data/lib/syntax/lang/ruby.rb +28 -15
- data/lib/syntax/version.rb +2 -2
- data/test/syntax/tc_ruby.rb +297 -92
- data/test/syntax/tokenizer_testcase.rb +40 -0
- metadata +5 -4
data/lib/syntax/common.rb
CHANGED
@@ -78,8 +78,22 @@ module Syntax
|
|
78
78
|
finish
|
79
79
|
end
|
80
80
|
|
81
|
+
# Specify a set of tokenizer-specific options. Each tokenizer may (or may
|
82
|
+
# not) publish any options, but if a tokenizer does those options may be
|
83
|
+
# used to specify optional behavior.
|
84
|
+
def set( opts={} )
|
85
|
+
( @options ||= Hash.new ).update opts
|
86
|
+
end
|
87
|
+
|
88
|
+
# Get the value of the specified option.
|
89
|
+
def option(opt)
|
90
|
+
@options ? @options[opt] : nil
|
91
|
+
end
|
92
|
+
|
81
93
|
private
|
82
94
|
|
95
|
+
EOL = /(?=\r\n?|\n|$)/
|
96
|
+
|
83
97
|
# A convenience for delegating method calls to the scanner.
|
84
98
|
def self.delegate( sym )
|
85
99
|
define_method( sym ) { |*a| @text.__send__( sym, *a ) }
|
@@ -137,6 +151,13 @@ module Syntax
|
|
137
151
|
@chunk = ""
|
138
152
|
end
|
139
153
|
|
154
|
+
def subtokenize( syntax, text )
|
155
|
+
tokenizer = Syntax.load( syntax )
|
156
|
+
tokenizer.set @options if @options
|
157
|
+
flush_chunk
|
158
|
+
tokenizer.tokenize( text, &@callback )
|
159
|
+
end
|
160
|
+
|
140
161
|
end
|
141
162
|
|
142
163
|
end
|
@@ -7,6 +7,9 @@ module Syntax
|
|
7
7
|
# convenience methods to provide a common interface for all convertors.
|
8
8
|
class Abstract
|
9
9
|
|
10
|
+
# A reference to the tokenizer used by this convertor.
|
11
|
+
attr_reader :tokenizer
|
12
|
+
|
10
13
|
# A convenience method for instantiating a new convertor for a
|
11
14
|
# specific syntax.
|
12
15
|
def self.for_syntax( syntax )
|
data/lib/syntax/lang/ruby.rb
CHANGED
@@ -25,20 +25,20 @@ module Syntax
|
|
25
25
|
def step
|
26
26
|
case
|
27
27
|
when bol? && check( /=begin/ )
|
28
|
-
start_group( :comment, scan_until( /^=end
|
29
|
-
when bol? && check( /__END__
|
28
|
+
start_group( :comment, scan_until( /^=end#{EOL}/ ) )
|
29
|
+
when bol? && check( /__END__#{EOL}/ )
|
30
30
|
start_group( :comment, scan_until( /\Z/ ) )
|
31
31
|
else
|
32
32
|
case
|
33
33
|
when check( /def\s+/ )
|
34
34
|
start_group :keyword, scan( /def\s+/ )
|
35
|
-
start_group :method, scan_until(
|
35
|
+
start_group :method, scan_until( /(?=[;(\s]|#{EOL})/ )
|
36
36
|
when check( /class\s+/ )
|
37
37
|
start_group :keyword, scan( /class\s+/ )
|
38
|
-
start_group :class, scan_until(
|
38
|
+
start_group :class, scan_until( /(?=[;\s<]|#{EOL})/ )
|
39
39
|
when check( /module\s+/ )
|
40
40
|
start_group :keyword, scan( /module\s+/ )
|
41
|
-
start_group :module, scan_until(
|
41
|
+
start_group :module, scan_until( /(?=[;\s]|#{EOL})/ )
|
42
42
|
when check( /::/ )
|
43
43
|
start_group :punct, scan(/::/)
|
44
44
|
when check( /:"/ )
|
@@ -49,11 +49,11 @@ module Syntax
|
|
49
49
|
start_group :symbol, scan(/:/)
|
50
50
|
scan_delimited_region :symbol, :symbol, "", false
|
51
51
|
@allow_operator = true
|
52
|
-
when
|
53
|
-
start_group :symbol,
|
52
|
+
when scan( /:[_a-zA-Z@$][$@\w]*[=!?]?/ )
|
53
|
+
start_group :symbol, matched
|
54
54
|
@allow_operator = true
|
55
|
-
when
|
56
|
-
start_group :char,
|
55
|
+
when scan( /\?(\\[^\n\r]|[^\\\n\r\s])/ )
|
56
|
+
start_group :char, matched
|
57
57
|
@allow_operator = true
|
58
58
|
when check( /(__FILE__|__LINE__|true|false|nil|self)[?!]?/ )
|
59
59
|
if @selector || matched[-1] == ?? || matched[-1] == ?!
|
@@ -65,6 +65,9 @@ module Syntax
|
|
65
65
|
end
|
66
66
|
@selector = false
|
67
67
|
@allow_operator = true
|
68
|
+
when scan(/0([bB][01]+|[oO][0-7]+|[dD][0-9]+|[xX][0-9a-fA-F]+)/)
|
69
|
+
start_group :number, matched
|
70
|
+
@allow_operator = true
|
68
71
|
else
|
69
72
|
case peek(2)
|
70
73
|
when "%r"
|
@@ -120,7 +123,7 @@ module Syntax
|
|
120
123
|
when "#"
|
121
124
|
start_group :comment, scan( /#[^\n\r]*/ )
|
122
125
|
when /[A-Z]/
|
123
|
-
start_group :constant, scan( /\w+/ )
|
126
|
+
start_group @selector ? :ident : :constant, scan( /\w+/ )
|
124
127
|
@allow_operator = true
|
125
128
|
when /[a-z_]/
|
126
129
|
word = scan( /\w+[?!]?/ )
|
@@ -218,11 +221,11 @@ module Syntax
|
|
218
221
|
if heredoc
|
219
222
|
items << "(^"
|
220
223
|
items << '\s*' if heredoc == :float
|
221
|
-
items << "#{Regexp.escape(delim)}\s
|
224
|
+
items << "#{Regexp.escape(delim)}\s*?)#{EOL}"
|
222
225
|
else
|
223
226
|
items << "#{Regexp.escape(delim)}"
|
224
227
|
end
|
225
|
-
items << "|#(
|
228
|
+
items << "|#(\\$|@@?|\\{)" if exprs
|
226
229
|
items = Regexp.new( items )
|
227
230
|
|
228
231
|
loop do
|
@@ -263,25 +266,35 @@ module Syntax
|
|
263
266
|
start_group delim_group, matched
|
264
267
|
break
|
265
268
|
when /^#/
|
269
|
+
do_highlight = (option(:expressions) == :highlight)
|
270
|
+
start_region :expr if do_highlight
|
266
271
|
start_group :expr, matched
|
267
272
|
case matched[1]
|
268
273
|
when ?{
|
269
274
|
depth = 1
|
275
|
+
content = ""
|
270
276
|
while depth > 0
|
271
277
|
p = pos
|
272
278
|
c = scan_until( /[\{}]/ )
|
273
279
|
if c.nil?
|
274
|
-
|
280
|
+
content << scan_until( /\Z/ )
|
275
281
|
break
|
276
282
|
else
|
277
283
|
depth += ( matched == "{" ? 1 : -1 )
|
278
|
-
|
279
|
-
|
284
|
+
content << pre_match[p..-1]
|
285
|
+
content << matched if depth > 0
|
280
286
|
end
|
281
287
|
end
|
288
|
+
if do_highlight
|
289
|
+
subtokenize "ruby", content
|
290
|
+
start_group :expr, "}"
|
291
|
+
else
|
292
|
+
append content + "}"
|
293
|
+
end
|
282
294
|
when ?$, ?@
|
283
295
|
append scan( /\w+/ )
|
284
296
|
end
|
297
|
+
end_region :expr if do_highlight
|
285
298
|
else raise "unexpected match on #{matched}"
|
286
299
|
end
|
287
300
|
end
|
data/lib/syntax/version.rb
CHANGED
data/test/syntax/tc_ruby.rb
CHANGED
@@ -1,81 +1,69 @@
|
|
1
|
-
|
1
|
+
require File.dirname(__FILE__) + "/tokenizer_testcase"
|
2
2
|
|
3
|
-
|
4
|
-
require 'syntax/lang/ruby'
|
3
|
+
class TC_Syntax_Ruby < TokenizerTestCase
|
5
4
|
|
6
|
-
|
7
|
-
|
8
|
-
def tokenize( string )
|
9
|
-
@tokens = []
|
10
|
-
@ruby.tokenize( string ) { |tok| @tokens << tok }
|
11
|
-
end
|
12
|
-
|
13
|
-
def assert_next_token(group, lexeme, instruction=:none)
|
14
|
-
assert false, "no tokens in stack" if @tokens.nil? or @tokens.empty?
|
15
|
-
assert_equal [group, lexeme, instruction],
|
16
|
-
[@tokens.first.group, @tokens.first, @tokens.shift.instruction]
|
17
|
-
end
|
18
|
-
|
19
|
-
def assert_no_next_token
|
20
|
-
assert @tokens.empty?
|
21
|
-
end
|
22
|
-
|
23
|
-
def skip_token( n=1 )
|
24
|
-
n.times { @tokens.shift } unless @tokens.nil? || @tokens.empty?
|
25
|
-
end
|
26
|
-
|
27
|
-
def setup
|
28
|
-
@ruby = Syntax::Ruby.new
|
29
|
-
end
|
5
|
+
syntax "ruby"
|
30
6
|
|
31
7
|
def test_empty
|
32
|
-
tokenize
|
8
|
+
tokenize ""
|
33
9
|
assert_no_next_token
|
34
10
|
end
|
35
11
|
|
36
12
|
def test_constant
|
37
|
-
tokenize
|
13
|
+
tokenize "Foo"
|
38
14
|
assert_next_token :constant, "Foo"
|
39
15
|
end
|
40
16
|
|
41
17
|
def test_ident
|
42
|
-
tokenize
|
18
|
+
tokenize "foo"
|
43
19
|
assert_next_token :ident, "foo"
|
44
20
|
end
|
45
21
|
|
46
22
|
def test_comment_eol
|
47
|
-
tokenize
|
23
|
+
tokenize "# a comment\nfoo"
|
48
24
|
assert_next_token :comment, "# a comment"
|
49
25
|
assert_next_token :normal, "\n"
|
50
26
|
assert_next_token :ident, "foo"
|
51
27
|
end
|
52
28
|
|
53
29
|
def test_comment_block
|
54
|
-
tokenize
|
30
|
+
tokenize "=begin\nthis is a comment\n=end\nnoncomment"
|
55
31
|
assert_next_token :comment, "=begin\nthis is a comment\n=end"
|
56
32
|
assert_next_token :normal, "\n"
|
57
33
|
assert_next_token :ident, "noncomment"
|
58
34
|
end
|
59
35
|
|
36
|
+
def test_comment_block_with_CRNL
|
37
|
+
tokenize "=begin\r\nthis is a comment\r\n=end\r\nnoncomment"
|
38
|
+
assert_next_token :comment, "=begin\r\nthis is a comment\r\n=end"
|
39
|
+
assert_next_token :normal, "\r\n"
|
40
|
+
assert_next_token :ident, "noncomment"
|
41
|
+
end
|
42
|
+
|
60
43
|
def test_keyword
|
61
44
|
Syntax::Ruby::KEYWORDS.each do |word|
|
62
|
-
tokenize
|
45
|
+
tokenize word
|
63
46
|
assert_next_token :keyword, word
|
64
47
|
end
|
65
48
|
Syntax::Ruby::KEYWORDS.each do |word|
|
66
|
-
tokenize
|
49
|
+
tokenize "foo.#{word}"
|
67
50
|
skip_token 2
|
68
51
|
assert_next_token :ident, word
|
69
52
|
end
|
70
53
|
end
|
71
54
|
|
72
55
|
def test__END__
|
73
|
-
tokenize
|
56
|
+
tokenize "__END__\n\nblah blah blah"
|
74
57
|
assert_next_token :comment, "__END__\n\nblah blah blah"
|
75
58
|
end
|
76
59
|
|
60
|
+
def test__END__with_CRNL
|
61
|
+
tokenize "__END__\r\nblah blah blah"
|
62
|
+
assert_next_token :comment, "__END__\r\nblah blah blah"
|
63
|
+
end
|
64
|
+
|
77
65
|
def test_def_paren
|
78
|
-
tokenize
|
66
|
+
tokenize "def foo(bar)"
|
79
67
|
assert_next_token :keyword, "def "
|
80
68
|
assert_next_token :method, "foo"
|
81
69
|
assert_next_token :punct, "("
|
@@ -84,7 +72,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
|
|
84
72
|
end
|
85
73
|
|
86
74
|
def test_def_space
|
87
|
-
tokenize
|
75
|
+
tokenize "def foo bar"
|
88
76
|
assert_next_token :keyword, "def "
|
89
77
|
assert_next_token :method, "foo"
|
90
78
|
assert_next_token :normal, " "
|
@@ -92,28 +80,34 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
|
|
92
80
|
end
|
93
81
|
|
94
82
|
def test_def_semicolon
|
95
|
-
tokenize
|
83
|
+
tokenize "def foo;"
|
96
84
|
assert_next_token :keyword, "def "
|
97
85
|
assert_next_token :method, "foo"
|
98
86
|
assert_next_token :punct, ";"
|
99
87
|
end
|
100
88
|
|
89
|
+
def test_def_eol
|
90
|
+
tokenize "def foo"
|
91
|
+
assert_next_token :keyword, "def "
|
92
|
+
assert_next_token :method, "foo"
|
93
|
+
end
|
94
|
+
|
101
95
|
def test_class_space
|
102
|
-
tokenize
|
96
|
+
tokenize "class Foo\n"
|
103
97
|
assert_next_token :keyword, "class "
|
104
98
|
assert_next_token :class, "Foo"
|
105
99
|
assert_next_token :normal, "\n"
|
106
100
|
end
|
107
101
|
|
108
102
|
def test_class_semicolon
|
109
|
-
tokenize
|
103
|
+
tokenize "class Foo;"
|
110
104
|
assert_next_token :keyword, "class "
|
111
105
|
assert_next_token :class, "Foo"
|
112
106
|
assert_next_token :punct, ";"
|
113
107
|
end
|
114
108
|
|
115
109
|
def test_class_extend
|
116
|
-
tokenize
|
110
|
+
tokenize "class Foo< Bang"
|
117
111
|
assert_next_token :keyword, "class "
|
118
112
|
assert_next_token :class, "Foo"
|
119
113
|
assert_next_token :punct, "<"
|
@@ -122,34 +116,34 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
|
|
122
116
|
end
|
123
117
|
|
124
118
|
def test_module_space
|
125
|
-
tokenize
|
119
|
+
tokenize "module Foo\n"
|
126
120
|
assert_next_token :keyword, "module "
|
127
121
|
assert_next_token :module, "Foo"
|
128
122
|
assert_next_token :normal, "\n"
|
129
123
|
end
|
130
124
|
|
131
125
|
def test_module_semicolon
|
132
|
-
tokenize
|
126
|
+
tokenize "module Foo;"
|
133
127
|
assert_next_token :keyword, "module "
|
134
128
|
assert_next_token :module, "Foo"
|
135
129
|
assert_next_token :punct, ";"
|
136
130
|
end
|
137
131
|
|
138
132
|
def test_module_other
|
139
|
-
tokenize
|
133
|
+
tokenize "module Foo!\n"
|
140
134
|
assert_next_token :keyword, "module "
|
141
135
|
assert_next_token :module, "Foo!"
|
142
136
|
end
|
143
137
|
|
144
138
|
def test_scope_operator
|
145
|
-
tokenize
|
139
|
+
tokenize "Foo::Bar"
|
146
140
|
assert_next_token :constant, "Foo"
|
147
141
|
assert_next_token :punct, "::"
|
148
142
|
assert_next_token :constant, "Bar"
|
149
143
|
end
|
150
144
|
|
151
145
|
def test_symbol_dquote
|
152
|
-
tokenize
|
146
|
+
tokenize ':"foo"'
|
153
147
|
assert_next_token :symbol, ':"'
|
154
148
|
assert_next_token :symbol, '', :region_open
|
155
149
|
assert_next_token :symbol, 'foo'
|
@@ -159,7 +153,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
|
|
159
153
|
end
|
160
154
|
|
161
155
|
def test_symbol_squote
|
162
|
-
tokenize
|
156
|
+
tokenize ":'foo'"
|
163
157
|
assert_next_token :symbol, ":'"
|
164
158
|
assert_next_token :symbol, "", :region_open
|
165
159
|
assert_next_token :symbol, "foo"
|
@@ -169,43 +163,56 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
|
|
169
163
|
end
|
170
164
|
|
171
165
|
def test_symbol
|
172
|
-
tokenize
|
173
|
-
assert_next_token :symbol, ":
|
166
|
+
tokenize ":foo_123"
|
167
|
+
assert_next_token :symbol, ":foo_123"
|
168
|
+
|
169
|
+
tokenize ":123"
|
170
|
+
assert_next_token :punct, ":"
|
171
|
+
assert_next_token :number, "123"
|
172
|
+
|
173
|
+
tokenize ":foo="
|
174
|
+
assert_next_token :symbol, ":foo="
|
175
|
+
|
176
|
+
tokenize ":foo!"
|
177
|
+
assert_next_token :symbol, ":foo!"
|
178
|
+
|
179
|
+
tokenize ":foo?"
|
180
|
+
assert_next_token :symbol, ":foo?"
|
174
181
|
end
|
175
182
|
|
176
183
|
def test_char
|
177
|
-
tokenize
|
184
|
+
tokenize "?."
|
178
185
|
assert_next_token :char, "?."
|
179
186
|
|
180
|
-
tokenize
|
187
|
+
tokenize '?\n'
|
181
188
|
assert_next_token :char, '?\n'
|
182
189
|
end
|
183
190
|
|
184
191
|
def test_specials
|
185
192
|
%w{__FILE__ __LINE__ true false nil self}.each do |word|
|
186
|
-
tokenize
|
193
|
+
tokenize word
|
187
194
|
assert_next_token :constant, word
|
188
195
|
end
|
189
196
|
|
190
197
|
%w{__FILE__ __LINE__ true false nil self}.each do |word|
|
191
|
-
tokenize
|
198
|
+
tokenize "#{word}?"
|
192
199
|
assert_next_token :ident, "#{word}?"
|
193
200
|
end
|
194
201
|
|
195
202
|
%w{__FILE__ __LINE__ true false nil self}.each do |word|
|
196
|
-
tokenize
|
203
|
+
tokenize "#{word}!"
|
197
204
|
assert_next_token :ident, "#{word}!"
|
198
205
|
end
|
199
206
|
|
200
207
|
%w{__FILE__ __LINE__ true false nil self}.each do |word|
|
201
|
-
tokenize
|
208
|
+
tokenize "x.#{word}"
|
202
209
|
skip_token 2
|
203
210
|
assert_next_token :ident, word
|
204
211
|
end
|
205
212
|
end
|
206
213
|
|
207
214
|
def test_pct_r
|
208
|
-
tokenize
|
215
|
+
tokenize '%r{foo#{x}bar}'
|
209
216
|
assert_next_token :punct, "%r{"
|
210
217
|
assert_next_token :regex, "", :region_open
|
211
218
|
assert_next_token :regex, "foo"
|
@@ -214,7 +221,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
|
|
214
221
|
assert_next_token :regex, "", :region_close
|
215
222
|
assert_next_token :punct, "}"
|
216
223
|
|
217
|
-
tokenize
|
224
|
+
tokenize '%r-foo#{x}bar-'
|
218
225
|
assert_next_token :punct, "%r-"
|
219
226
|
assert_next_token :regex, "", :region_open
|
220
227
|
assert_next_token :regex, "foo"
|
@@ -238,7 +245,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
|
|
238
245
|
end
|
239
246
|
|
240
247
|
def test_pct_w_brace
|
241
|
-
tokenize
|
248
|
+
tokenize '%w{foo bar baz}'
|
242
249
|
assert_next_token :punct, "%w{"
|
243
250
|
assert_next_token :string, '', :region_open
|
244
251
|
assert_next_token :string, 'foo bar baz'
|
@@ -247,7 +254,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
|
|
247
254
|
end
|
248
255
|
|
249
256
|
def test_pct_w
|
250
|
-
tokenize
|
257
|
+
tokenize '%w-foo#{x} bar baz-'
|
251
258
|
assert_next_token :punct, "%w-"
|
252
259
|
assert_next_token :string, '', :region_open
|
253
260
|
assert_next_token :string, 'foo#{x} bar baz'
|
@@ -256,7 +263,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
|
|
256
263
|
end
|
257
264
|
|
258
265
|
def test_pct_q
|
259
|
-
tokenize
|
266
|
+
tokenize '%q-hello #{world}-'
|
260
267
|
assert_next_token :punct, "%q-"
|
261
268
|
assert_next_token :string, '', :region_open
|
262
269
|
assert_next_token :string, 'hello #{world}'
|
@@ -265,7 +272,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
|
|
265
272
|
end
|
266
273
|
|
267
274
|
def test_pct_s
|
268
|
-
tokenize
|
275
|
+
tokenize '%s-hello #{world}-'
|
269
276
|
assert_next_token :punct, "%s-"
|
270
277
|
assert_next_token :symbol, '', :region_open
|
271
278
|
assert_next_token :symbol, 'hello #{world}'
|
@@ -274,7 +281,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
|
|
274
281
|
end
|
275
282
|
|
276
283
|
def test_pct_W
|
277
|
-
tokenize
|
284
|
+
tokenize '%W-foo#{x} bar baz-'
|
278
285
|
assert_next_token :punct, "%W-"
|
279
286
|
assert_next_token :string, '', :region_open
|
280
287
|
assert_next_token :string, 'foo'
|
@@ -285,7 +292,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
|
|
285
292
|
end
|
286
293
|
|
287
294
|
def test_pct_Q
|
288
|
-
tokenize
|
295
|
+
tokenize '%Q-hello #{world}-'
|
289
296
|
assert_next_token :punct, "%Q-"
|
290
297
|
assert_next_token :string, '', :region_open
|
291
298
|
assert_next_token :string, 'hello '
|
@@ -295,7 +302,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
|
|
295
302
|
end
|
296
303
|
|
297
304
|
def test_pct_x
|
298
|
-
tokenize
|
305
|
+
tokenize '%x-ls /blah/#{foo}-'
|
299
306
|
assert_next_token :punct, "%x-"
|
300
307
|
assert_next_token :string, '', :region_open
|
301
308
|
assert_next_token :string, 'ls /blah/'
|
@@ -305,7 +312,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
|
|
305
312
|
end
|
306
313
|
|
307
314
|
def test_pct_string
|
308
|
-
tokenize
|
315
|
+
tokenize '%-hello #{world}-'
|
309
316
|
assert_next_token :punct, "%-"
|
310
317
|
assert_next_token :string, '', :region_open
|
311
318
|
assert_next_token :string, 'hello '
|
@@ -315,7 +322,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
|
|
315
322
|
end
|
316
323
|
|
317
324
|
def test_bad_pct_string
|
318
|
-
tokenize
|
325
|
+
tokenize '%0hello #{world}0'
|
319
326
|
assert_next_token :punct, "%"
|
320
327
|
assert_next_token :number, '0'
|
321
328
|
assert_next_token :ident, 'hello'
|
@@ -324,7 +331,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
|
|
324
331
|
end
|
325
332
|
|
326
333
|
def test_shift_left
|
327
|
-
tokenize
|
334
|
+
tokenize 'foo << 5'
|
328
335
|
assert_next_token :ident, "foo"
|
329
336
|
assert_next_token :normal, " "
|
330
337
|
assert_next_token :punct, "<<"
|
@@ -333,14 +340,14 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
|
|
333
340
|
end
|
334
341
|
|
335
342
|
def test_shift_left_no_white
|
336
|
-
tokenize
|
343
|
+
tokenize 'foo<<5'
|
337
344
|
assert_next_token :ident, "foo"
|
338
345
|
assert_next_token :punct, "<<"
|
339
346
|
assert_next_token :number, "5"
|
340
347
|
end
|
341
348
|
|
342
349
|
def test_here_doc_no_opts
|
343
|
-
tokenize
|
350
|
+
tokenize "foo <<EOF\n foo\n bar\n baz\nEOF"
|
344
351
|
assert_next_token :ident, "foo"
|
345
352
|
assert_next_token :normal, " "
|
346
353
|
assert_next_token :punct, "<<"
|
@@ -352,7 +359,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
|
|
352
359
|
end
|
353
360
|
|
354
361
|
def test_here_doc_no_opts_missing_end
|
355
|
-
tokenize
|
362
|
+
tokenize "foo <<EOF\n foo\n bar\n baz\n EOF"
|
356
363
|
assert_next_token :ident, "foo"
|
357
364
|
assert_next_token :normal, " "
|
358
365
|
assert_next_token :punct, "<<"
|
@@ -363,7 +370,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
|
|
363
370
|
end
|
364
371
|
|
365
372
|
def test_here_doc_float_right
|
366
|
-
tokenize
|
373
|
+
tokenize "foo <<-EOF\n foo\n bar\n baz\n EOF"
|
367
374
|
assert_next_token :ident, "foo"
|
368
375
|
assert_next_token :normal, " "
|
369
376
|
assert_next_token :punct, "<<-"
|
@@ -375,7 +382,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
|
|
375
382
|
end
|
376
383
|
|
377
384
|
def test_here_doc_single_quotes
|
378
|
-
tokenize
|
385
|
+
tokenize "foo <<'EOF'\n foo\#{x}\n bar\n baz\nEOF"
|
379
386
|
assert_next_token :ident, "foo"
|
380
387
|
assert_next_token :normal, " "
|
381
388
|
assert_next_token :punct, "<<'"
|
@@ -388,7 +395,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
|
|
388
395
|
end
|
389
396
|
|
390
397
|
def test_here_doc_double_quotes
|
391
|
-
tokenize
|
398
|
+
tokenize "foo <<\"EOF\"\n foo\#{x}\n bar\n baz\nEOF"
|
392
399
|
assert_next_token :ident, "foo"
|
393
400
|
assert_next_token :normal, " "
|
394
401
|
assert_next_token :punct, "<<\""
|
@@ -403,12 +410,12 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
|
|
403
410
|
end
|
404
411
|
|
405
412
|
def test_space
|
406
|
-
tokenize
|
413
|
+
tokenize "\n \t\t\n\n\r\n"
|
407
414
|
assert_next_token :normal, "\n \t\t\n\n\r\n"
|
408
415
|
end
|
409
416
|
|
410
417
|
def test_number
|
411
|
-
tokenize
|
418
|
+
tokenize "1 1.0 1e5 1.0e5 1_2.5 1_2.5_2 1_2.5_2e3_2"
|
412
419
|
assert_next_token :number, "1"
|
413
420
|
skip_token
|
414
421
|
assert_next_token :number, "1.0"
|
@@ -425,7 +432,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
|
|
425
432
|
end
|
426
433
|
|
427
434
|
def test_dquoted_string
|
428
|
-
tokenize
|
435
|
+
tokenize '"foo #{x} bar\"\n\tbaz\xA5b\5\1234"'
|
429
436
|
assert_next_token :punct, '"'
|
430
437
|
assert_next_token :string, '', :region_open
|
431
438
|
assert_next_token :string, 'foo '
|
@@ -442,7 +449,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
|
|
442
449
|
end
|
443
450
|
|
444
451
|
def test_squoted_string
|
445
|
-
tokenize
|
452
|
+
tokenize '\'foo #{x} bar\\\'\n\tbaz\\\\\xA5b\5\1234\''
|
446
453
|
assert_next_token :punct, "'"
|
447
454
|
assert_next_token :string, "", :region_open
|
448
455
|
assert_next_token :string, 'foo #{x} bar'
|
@@ -455,51 +462,51 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
|
|
455
462
|
end
|
456
463
|
|
457
464
|
def test_dot_selector
|
458
|
-
tokenize
|
465
|
+
tokenize 'foo.nil'
|
459
466
|
skip_token
|
460
467
|
assert_next_token :punct, "."
|
461
468
|
assert_next_token :ident, "nil"
|
462
469
|
end
|
463
470
|
|
464
471
|
def test_dot_range_inclusive
|
465
|
-
tokenize
|
472
|
+
tokenize 'foo..nil'
|
466
473
|
skip_token
|
467
474
|
assert_next_token :punct, ".."
|
468
475
|
assert_next_token :constant, "nil"
|
469
476
|
end
|
470
477
|
|
471
478
|
def test_dot_range_exclusive
|
472
|
-
tokenize
|
479
|
+
tokenize 'foo...nil'
|
473
480
|
skip_token
|
474
481
|
assert_next_token :punct, "..."
|
475
482
|
assert_next_token :constant, "nil"
|
476
483
|
end
|
477
484
|
|
478
485
|
def test_dot_range_many
|
479
|
-
tokenize
|
486
|
+
tokenize 'foo.....nil'
|
480
487
|
skip_token
|
481
488
|
assert_next_token :punct, "....."
|
482
489
|
assert_next_token :constant, "nil"
|
483
490
|
end
|
484
491
|
|
485
492
|
def test_attribute
|
486
|
-
tokenize
|
493
|
+
tokenize '@var_foo'
|
487
494
|
assert_next_token :attribute, "@var_foo"
|
488
495
|
end
|
489
496
|
|
490
497
|
def test_global
|
491
|
-
tokenize
|
498
|
+
tokenize '$var_foo'
|
492
499
|
assert_next_token :global, "$var_foo"
|
493
|
-
tokenize
|
500
|
+
tokenize '$12'
|
494
501
|
assert_next_token :global, "$12"
|
495
|
-
tokenize
|
502
|
+
tokenize '$/f'
|
496
503
|
assert_next_token :global, "$/"
|
497
|
-
tokenize
|
504
|
+
tokenize "$\n"
|
498
505
|
assert_next_token :global, "$"
|
499
506
|
end
|
500
507
|
|
501
508
|
def test_paren_delimiter
|
502
|
-
tokenize
|
509
|
+
tokenize '%w(a)'
|
503
510
|
assert_next_token :punct, "%w("
|
504
511
|
assert_next_token :string, "", :region_open
|
505
512
|
assert_next_token :string, "a"
|
@@ -508,7 +515,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
|
|
508
515
|
end
|
509
516
|
|
510
517
|
def test_division
|
511
|
-
tokenize
|
518
|
+
tokenize 'm / 3'
|
512
519
|
assert_next_token :ident, "m"
|
513
520
|
assert_next_token :normal, " "
|
514
521
|
assert_next_token :punct, "/"
|
@@ -517,7 +524,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
|
|
517
524
|
end
|
518
525
|
|
519
526
|
def test_regex
|
520
|
-
tokenize
|
527
|
+
tokenize 'm =~ /3/'
|
521
528
|
assert_next_token :ident, "m"
|
522
529
|
assert_next_token :normal, " "
|
523
530
|
assert_next_token :punct, "=~"
|
@@ -530,7 +537,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
|
|
530
537
|
end
|
531
538
|
|
532
539
|
def test_heredoc_with_trailing_text
|
533
|
-
tokenize
|
540
|
+
tokenize "foo('here', <<EOF)\n A heredoc.\nEOF\nfoo"
|
534
541
|
assert_next_token :ident, "foo"
|
535
542
|
assert_next_token :punct, "('"
|
536
543
|
assert_next_token :string, '', :region_open
|
@@ -550,7 +557,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
|
|
550
557
|
end
|
551
558
|
|
552
559
|
def test_multiple_heredocs
|
553
|
-
tokenize
|
560
|
+
tokenize <<'TEST'
|
554
561
|
foo('here', <<EOF, 'there', <<-'FOO', 'blah')
|
555
562
|
First heredoc, right here.
|
556
563
|
Expressions are #{allowed}
|
@@ -598,7 +605,7 @@ TEST
|
|
598
605
|
end
|
599
606
|
|
600
607
|
def test_carldr_bad_heredoc_001
|
601
|
-
tokenize
|
608
|
+
tokenize <<'TEST'
|
602
609
|
str = <<END
|
603
610
|
here document #{1 + 1}
|
604
611
|
END
|
@@ -663,4 +670,202 @@ TEST
|
|
663
670
|
assert_next_token :punct, ")/"
|
664
671
|
assert_next_token :number, "2"
|
665
672
|
end
|
673
|
+
|
674
|
+
def test_heredoc_with_CRNL
|
675
|
+
tokenize "foo <<SRC\r\nSome text\r\nSRC\r\nfoo"
|
676
|
+
assert_next_token :ident, "foo"
|
677
|
+
assert_next_token :normal, " "
|
678
|
+
assert_next_token :punct, "<<"
|
679
|
+
assert_next_token :constant, "SRC"
|
680
|
+
assert_next_token :string, "", :region_open
|
681
|
+
assert_next_token :string, "\r\nSome text\r\n"
|
682
|
+
assert_next_token :string, "", :region_close
|
683
|
+
assert_next_token :constant, "SRC"
|
684
|
+
assert_next_token :normal, "\r\n"
|
685
|
+
assert_next_token :ident, "foo"
|
686
|
+
end
|
687
|
+
|
688
|
+
def test_question_mark_at_newline
|
689
|
+
tokenize "foo ?\n 'bar': 'baz'"
|
690
|
+
assert_next_token :ident, "foo"
|
691
|
+
assert_next_token :normal, " "
|
692
|
+
assert_next_token :punct, "?"
|
693
|
+
assert_next_token :normal, "\n "
|
694
|
+
assert_next_token :punct, "'"
|
695
|
+
assert_next_token :string, "", :region_open
|
696
|
+
assert_next_token :string, "bar"
|
697
|
+
assert_next_token :string, "", :region_close
|
698
|
+
assert_next_token :punct, "':"
|
699
|
+
assert_next_token :normal, " "
|
700
|
+
assert_next_token :punct, "'"
|
701
|
+
assert_next_token :string, "", :region_open
|
702
|
+
assert_next_token :string, "baz"
|
703
|
+
assert_next_token :string, "", :region_close
|
704
|
+
assert_next_token :punct, "'"
|
705
|
+
end
|
706
|
+
|
707
|
+
def test_question_mark_and_escaped_newline
|
708
|
+
tokenize "foo ?\\\n 'bar': 'baz'"
|
709
|
+
assert_next_token :ident, "foo"
|
710
|
+
assert_next_token :normal, " "
|
711
|
+
assert_next_token :punct, "?\\"
|
712
|
+
assert_next_token :normal, "\n "
|
713
|
+
assert_next_token :punct, "'"
|
714
|
+
assert_next_token :string, "", :region_open
|
715
|
+
assert_next_token :string, "bar"
|
716
|
+
assert_next_token :string, "", :region_close
|
717
|
+
assert_next_token :punct, "':"
|
718
|
+
assert_next_token :normal, " "
|
719
|
+
assert_next_token :punct, "'"
|
720
|
+
assert_next_token :string, "", :region_open
|
721
|
+
assert_next_token :string, "baz"
|
722
|
+
assert_next_token :string, "", :region_close
|
723
|
+
assert_next_token :punct, "'"
|
724
|
+
end
|
725
|
+
|
726
|
+
def test_highlighted_subexpression
|
727
|
+
tokenizer.set :expressions => :highlight
|
728
|
+
tokenize '"la la #{["hello", "world"].each { |f| puts "string #{f}" }}"'
|
729
|
+
assert_next_token :punct, '"'
|
730
|
+
assert_next_token :string, "", :region_open
|
731
|
+
assert_next_token :string, "la la "
|
732
|
+
assert_next_token :expr, "", :region_open
|
733
|
+
assert_next_token :expr, '#{'
|
734
|
+
assert_next_token :punct, '["'
|
735
|
+
assert_next_token :string, "", :region_open
|
736
|
+
assert_next_token :string, 'hello'
|
737
|
+
assert_next_token :string, "", :region_close
|
738
|
+
assert_next_token :punct, '",'
|
739
|
+
assert_next_token :normal, ' '
|
740
|
+
assert_next_token :punct, '"'
|
741
|
+
assert_next_token :string, "", :region_open
|
742
|
+
assert_next_token :string, "world"
|
743
|
+
assert_next_token :string, "", :region_close
|
744
|
+
assert_next_token :punct, '"].'
|
745
|
+
assert_next_token :ident, 'each'
|
746
|
+
assert_next_token :normal, ' '
|
747
|
+
assert_next_token :punct, '{'
|
748
|
+
assert_next_token :normal, ' '
|
749
|
+
assert_next_token :punct, '|'
|
750
|
+
assert_next_token :ident, 'f'
|
751
|
+
assert_next_token :punct, '|'
|
752
|
+
assert_next_token :normal, ' '
|
753
|
+
assert_next_token :ident, 'puts'
|
754
|
+
assert_next_token :normal, ' '
|
755
|
+
assert_next_token :punct, '"'
|
756
|
+
assert_next_token :string, "", :region_open
|
757
|
+
assert_next_token :string, "string "
|
758
|
+
assert_next_token :expr, "", :region_open
|
759
|
+
assert_next_token :expr, '#{'
|
760
|
+
assert_next_token :ident, 'f'
|
761
|
+
assert_next_token :expr, '}'
|
762
|
+
assert_next_token :expr, "", :region_close
|
763
|
+
assert_next_token :string, "", :region_close
|
764
|
+
assert_next_token :punct, '"'
|
765
|
+
assert_next_token :normal, ' '
|
766
|
+
assert_next_token :punct, '}'
|
767
|
+
assert_next_token :expr, '}'
|
768
|
+
assert_next_token :expr, "", :region_close
|
769
|
+
assert_next_token :string, "", :region_close
|
770
|
+
assert_next_token :punct, '"'
|
771
|
+
end
|
772
|
+
|
773
|
+
def test_expr_in_braces
|
774
|
+
tokenize '"#{f}"'
|
775
|
+
assert_next_token :punct, '"'
|
776
|
+
assert_next_token :string, "", :region_open
|
777
|
+
assert_next_token :expr, '#{f}'
|
778
|
+
assert_next_token :string, "", :region_close
|
779
|
+
assert_next_token :punct, '"'
|
780
|
+
end
|
781
|
+
|
782
|
+
def test_expr_in_braces_with_nested_braces
|
783
|
+
tokenize '"#{loop{break}}"'
|
784
|
+
assert_next_token :punct, '"'
|
785
|
+
assert_next_token :string, "", :region_open
|
786
|
+
assert_next_token :expr, '#{loop{break}}'
|
787
|
+
assert_next_token :string, "", :region_close
|
788
|
+
assert_next_token :punct, '"'
|
789
|
+
end
|
790
|
+
|
791
|
+
def test_expr_with_global_var
|
792
|
+
tokenize '"#$f"'
|
793
|
+
assert_next_token :punct, '"'
|
794
|
+
assert_next_token :string, "", :region_open
|
795
|
+
assert_next_token :expr, '#$f'
|
796
|
+
assert_next_token :string, "", :region_close
|
797
|
+
assert_next_token :punct, '"'
|
798
|
+
end
|
799
|
+
|
800
|
+
def test_expr_with_instance_var
|
801
|
+
tokenize '"#@f"'
|
802
|
+
assert_next_token :punct, '"'
|
803
|
+
assert_next_token :string, "", :region_open
|
804
|
+
assert_next_token :expr, '#@f'
|
805
|
+
assert_next_token :string, "", :region_close
|
806
|
+
assert_next_token :punct, '"'
|
807
|
+
end
|
808
|
+
|
809
|
+
def test_expr_with_class_var
|
810
|
+
tokenize '"#@@f"'
|
811
|
+
assert_next_token :punct, '"'
|
812
|
+
assert_next_token :string, "", :region_open
|
813
|
+
assert_next_token :expr, '#@@f'
|
814
|
+
assert_next_token :string, "", :region_close
|
815
|
+
assert_next_token :punct, '"'
|
816
|
+
end
|
817
|
+
|
818
|
+
def test_qmark_space
|
819
|
+
tokenize "? "
|
820
|
+
assert_next_token :punct, "?"
|
821
|
+
assert_next_token :normal, " "
|
822
|
+
end
|
823
|
+
|
824
|
+
def test_capitalized_method
|
825
|
+
tokenize "obj.Foo"
|
826
|
+
skip_token 2
|
827
|
+
assert_next_token :ident, "Foo"
|
828
|
+
end
|
829
|
+
|
830
|
+
def test_hexadecimal_literal
|
831
|
+
tokenize "0xDEADbeef 0X1234567890ABCDEFG"
|
832
|
+
assert_next_token :number, "0xDEADbeef"
|
833
|
+
skip_token
|
834
|
+
assert_next_token :number, "0X1234567890ABCDEF"
|
835
|
+
assert_next_token :constant, "G"
|
836
|
+
end
|
837
|
+
|
838
|
+
def test_binary_literal
|
839
|
+
tokenize "0b2 0b0 0b101 0B123"
|
840
|
+
assert_next_token :number, "0"
|
841
|
+
assert_next_token :ident, "b2"
|
842
|
+
skip_token
|
843
|
+
assert_next_token :number, "0b0"
|
844
|
+
skip_token
|
845
|
+
assert_next_token :number, "0b101"
|
846
|
+
skip_token
|
847
|
+
assert_next_token :number, "0B123"
|
848
|
+
end
|
849
|
+
|
850
|
+
def test_octal_literal
|
851
|
+
tokenize "0o9 0o12345670abc 0O12345678"
|
852
|
+
assert_next_token :number, "0"
|
853
|
+
assert_next_token :ident, "o9"
|
854
|
+
skip_token
|
855
|
+
assert_next_token :number, "0o12345670"
|
856
|
+
assert_next_token :ident, "abc"
|
857
|
+
skip_token
|
858
|
+
assert_next_token :number, "0O12345678"
|
859
|
+
end
|
860
|
+
|
861
|
+
def test_decimal_literal
|
862
|
+
tokenize "0dA 0d1234567890abc 0D1234567890"
|
863
|
+
assert_next_token :number, "0"
|
864
|
+
assert_next_token :ident, "dA"
|
865
|
+
skip_token
|
866
|
+
assert_next_token :number, "0d1234567890"
|
867
|
+
assert_next_token :ident, "abc"
|
868
|
+
skip_token
|
869
|
+
assert_next_token :number, "0D1234567890"
|
870
|
+
end
|
666
871
|
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
$:.unshift File.dirname(__FILE__) + "/../../lib"
|
2
|
+
|
3
|
+
require 'test/unit'
|
4
|
+
require 'syntax'
|
5
|
+
|
6
|
+
class TokenizerTestCase < Test::Unit::TestCase
|
7
|
+
def self.syntax( type )
|
8
|
+
class_eval <<-EOF
|
9
|
+
def setup
|
10
|
+
@tokenizer = Syntax.load(#{type.inspect})
|
11
|
+
end
|
12
|
+
EOF
|
13
|
+
end
|
14
|
+
|
15
|
+
def default_test
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
attr_reader :tokenizer
|
21
|
+
|
22
|
+
def tokenize( string )
|
23
|
+
@tokens = []
|
24
|
+
@tokenizer.tokenize( string ) { |tok| @tokens << tok }
|
25
|
+
end
|
26
|
+
|
27
|
+
def assert_next_token(group, lexeme, instruction=:none)
|
28
|
+
assert false, "no tokens in stack" if @tokens.nil? or @tokens.empty?
|
29
|
+
assert_equal [group, lexeme, instruction],
|
30
|
+
[@tokens.first.group, @tokens.first, @tokens.shift.instruction]
|
31
|
+
end
|
32
|
+
|
33
|
+
def assert_no_next_token
|
34
|
+
assert @tokens.empty?
|
35
|
+
end
|
36
|
+
|
37
|
+
def skip_token( n=1 )
|
38
|
+
n.times { @tokens.shift } unless @tokens.nil? || @tokens.empty?
|
39
|
+
end
|
40
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
|
-
rubygems_version: 0.8.
|
2
|
+
rubygems_version: 0.8.10
|
3
3
|
specification_version: 1
|
4
4
|
name: syntax
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.
|
7
|
-
date: 2005-
|
6
|
+
version: 1.0.0
|
7
|
+
date: 2005-06-18
|
8
8
|
summary: Syntax is Ruby library for performing simple syntax highlighting.
|
9
9
|
require_paths:
|
10
10
|
- lib
|
11
|
-
email:
|
11
|
+
email: jamis@jamisbuck.org
|
12
12
|
homepage:
|
13
13
|
rubyforge_project:
|
14
14
|
description:
|
@@ -47,6 +47,7 @@ files:
|
|
47
47
|
- test/syntax/tc_ruby.rb
|
48
48
|
- test/syntax/tc_xml.rb
|
49
49
|
- test/syntax/tc_yaml.rb
|
50
|
+
- test/syntax/tokenizer_testcase.rb
|
50
51
|
test_files:
|
51
52
|
- test/ALL-TESTS.rb
|
52
53
|
rdoc_options: []
|