syntax 0.7.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/syntax/common.rb +21 -0
- data/lib/syntax/convertors/abstract.rb +3 -0
- data/lib/syntax/lang/ruby.rb +28 -15
- data/lib/syntax/version.rb +2 -2
- data/test/syntax/tc_ruby.rb +297 -92
- data/test/syntax/tokenizer_testcase.rb +40 -0
- metadata +5 -4
data/lib/syntax/common.rb
CHANGED
@@ -78,8 +78,22 @@ module Syntax
|
|
78
78
|
finish
|
79
79
|
end
|
80
80
|
|
81
|
+
# Specify a set of tokenizer-specific options. Each tokenizer may (or may
|
82
|
+
# not) publish any options, but if a tokenizer does those options may be
|
83
|
+
# used to specify optional behavior.
|
84
|
+
def set( opts={} )
|
85
|
+
( @options ||= Hash.new ).update opts
|
86
|
+
end
|
87
|
+
|
88
|
+
# Get the value of the specified option.
|
89
|
+
def option(opt)
|
90
|
+
@options ? @options[opt] : nil
|
91
|
+
end
|
92
|
+
|
81
93
|
private
|
82
94
|
|
95
|
+
EOL = /(?=\r\n?|\n|$)/
|
96
|
+
|
83
97
|
# A convenience for delegating method calls to the scanner.
|
84
98
|
def self.delegate( sym )
|
85
99
|
define_method( sym ) { |*a| @text.__send__( sym, *a ) }
|
@@ -137,6 +151,13 @@ module Syntax
|
|
137
151
|
@chunk = ""
|
138
152
|
end
|
139
153
|
|
154
|
+
def subtokenize( syntax, text )
|
155
|
+
tokenizer = Syntax.load( syntax )
|
156
|
+
tokenizer.set @options if @options
|
157
|
+
flush_chunk
|
158
|
+
tokenizer.tokenize( text, &@callback )
|
159
|
+
end
|
160
|
+
|
140
161
|
end
|
141
162
|
|
142
163
|
end
|
@@ -7,6 +7,9 @@ module Syntax
|
|
7
7
|
# convenience methods to provide a common interface for all convertors.
|
8
8
|
class Abstract
|
9
9
|
|
10
|
+
# A reference to the tokenizer used by this convertor.
|
11
|
+
attr_reader :tokenizer
|
12
|
+
|
10
13
|
# A convenience method for instantiating a new convertor for a
|
11
14
|
# specific syntax.
|
12
15
|
def self.for_syntax( syntax )
|
data/lib/syntax/lang/ruby.rb
CHANGED
@@ -25,20 +25,20 @@ module Syntax
|
|
25
25
|
def step
|
26
26
|
case
|
27
27
|
when bol? && check( /=begin/ )
|
28
|
-
start_group( :comment, scan_until( /^=end
|
29
|
-
when bol? && check( /__END__
|
28
|
+
start_group( :comment, scan_until( /^=end#{EOL}/ ) )
|
29
|
+
when bol? && check( /__END__#{EOL}/ )
|
30
30
|
start_group( :comment, scan_until( /\Z/ ) )
|
31
31
|
else
|
32
32
|
case
|
33
33
|
when check( /def\s+/ )
|
34
34
|
start_group :keyword, scan( /def\s+/ )
|
35
|
-
start_group :method, scan_until(
|
35
|
+
start_group :method, scan_until( /(?=[;(\s]|#{EOL})/ )
|
36
36
|
when check( /class\s+/ )
|
37
37
|
start_group :keyword, scan( /class\s+/ )
|
38
|
-
start_group :class, scan_until(
|
38
|
+
start_group :class, scan_until( /(?=[;\s<]|#{EOL})/ )
|
39
39
|
when check( /module\s+/ )
|
40
40
|
start_group :keyword, scan( /module\s+/ )
|
41
|
-
start_group :module, scan_until(
|
41
|
+
start_group :module, scan_until( /(?=[;\s]|#{EOL})/ )
|
42
42
|
when check( /::/ )
|
43
43
|
start_group :punct, scan(/::/)
|
44
44
|
when check( /:"/ )
|
@@ -49,11 +49,11 @@ module Syntax
|
|
49
49
|
start_group :symbol, scan(/:/)
|
50
50
|
scan_delimited_region :symbol, :symbol, "", false
|
51
51
|
@allow_operator = true
|
52
|
-
when
|
53
|
-
start_group :symbol,
|
52
|
+
when scan( /:[_a-zA-Z@$][$@\w]*[=!?]?/ )
|
53
|
+
start_group :symbol, matched
|
54
54
|
@allow_operator = true
|
55
|
-
when
|
56
|
-
start_group :char,
|
55
|
+
when scan( /\?(\\[^\n\r]|[^\\\n\r\s])/ )
|
56
|
+
start_group :char, matched
|
57
57
|
@allow_operator = true
|
58
58
|
when check( /(__FILE__|__LINE__|true|false|nil|self)[?!]?/ )
|
59
59
|
if @selector || matched[-1] == ?? || matched[-1] == ?!
|
@@ -65,6 +65,9 @@ module Syntax
|
|
65
65
|
end
|
66
66
|
@selector = false
|
67
67
|
@allow_operator = true
|
68
|
+
when scan(/0([bB][01]+|[oO][0-7]+|[dD][0-9]+|[xX][0-9a-fA-F]+)/)
|
69
|
+
start_group :number, matched
|
70
|
+
@allow_operator = true
|
68
71
|
else
|
69
72
|
case peek(2)
|
70
73
|
when "%r"
|
@@ -120,7 +123,7 @@ module Syntax
|
|
120
123
|
when "#"
|
121
124
|
start_group :comment, scan( /#[^\n\r]*/ )
|
122
125
|
when /[A-Z]/
|
123
|
-
start_group :constant, scan( /\w+/ )
|
126
|
+
start_group @selector ? :ident : :constant, scan( /\w+/ )
|
124
127
|
@allow_operator = true
|
125
128
|
when /[a-z_]/
|
126
129
|
word = scan( /\w+[?!]?/ )
|
@@ -218,11 +221,11 @@ module Syntax
|
|
218
221
|
if heredoc
|
219
222
|
items << "(^"
|
220
223
|
items << '\s*' if heredoc == :float
|
221
|
-
items << "#{Regexp.escape(delim)}\s
|
224
|
+
items << "#{Regexp.escape(delim)}\s*?)#{EOL}"
|
222
225
|
else
|
223
226
|
items << "#{Regexp.escape(delim)}"
|
224
227
|
end
|
225
|
-
items << "|#(
|
228
|
+
items << "|#(\\$|@@?|\\{)" if exprs
|
226
229
|
items = Regexp.new( items )
|
227
230
|
|
228
231
|
loop do
|
@@ -263,25 +266,35 @@ module Syntax
|
|
263
266
|
start_group delim_group, matched
|
264
267
|
break
|
265
268
|
when /^#/
|
269
|
+
do_highlight = (option(:expressions) == :highlight)
|
270
|
+
start_region :expr if do_highlight
|
266
271
|
start_group :expr, matched
|
267
272
|
case matched[1]
|
268
273
|
when ?{
|
269
274
|
depth = 1
|
275
|
+
content = ""
|
270
276
|
while depth > 0
|
271
277
|
p = pos
|
272
278
|
c = scan_until( /[\{}]/ )
|
273
279
|
if c.nil?
|
274
|
-
|
280
|
+
content << scan_until( /\Z/ )
|
275
281
|
break
|
276
282
|
else
|
277
283
|
depth += ( matched == "{" ? 1 : -1 )
|
278
|
-
|
279
|
-
|
284
|
+
content << pre_match[p..-1]
|
285
|
+
content << matched if depth > 0
|
280
286
|
end
|
281
287
|
end
|
288
|
+
if do_highlight
|
289
|
+
subtokenize "ruby", content
|
290
|
+
start_group :expr, "}"
|
291
|
+
else
|
292
|
+
append content + "}"
|
293
|
+
end
|
282
294
|
when ?$, ?@
|
283
295
|
append scan( /\w+/ )
|
284
296
|
end
|
297
|
+
end_region :expr if do_highlight
|
285
298
|
else raise "unexpected match on #{matched}"
|
286
299
|
end
|
287
300
|
end
|
data/lib/syntax/version.rb
CHANGED
data/test/syntax/tc_ruby.rb
CHANGED
@@ -1,81 +1,69 @@
|
|
1
|
-
|
1
|
+
require File.dirname(__FILE__) + "/tokenizer_testcase"
|
2
2
|
|
3
|
-
|
4
|
-
require 'syntax/lang/ruby'
|
3
|
+
class TC_Syntax_Ruby < TokenizerTestCase
|
5
4
|
|
6
|
-
|
7
|
-
|
8
|
-
def tokenize( string )
|
9
|
-
@tokens = []
|
10
|
-
@ruby.tokenize( string ) { |tok| @tokens << tok }
|
11
|
-
end
|
12
|
-
|
13
|
-
def assert_next_token(group, lexeme, instruction=:none)
|
14
|
-
assert false, "no tokens in stack" if @tokens.nil? or @tokens.empty?
|
15
|
-
assert_equal [group, lexeme, instruction],
|
16
|
-
[@tokens.first.group, @tokens.first, @tokens.shift.instruction]
|
17
|
-
end
|
18
|
-
|
19
|
-
def assert_no_next_token
|
20
|
-
assert @tokens.empty?
|
21
|
-
end
|
22
|
-
|
23
|
-
def skip_token( n=1 )
|
24
|
-
n.times { @tokens.shift } unless @tokens.nil? || @tokens.empty?
|
25
|
-
end
|
26
|
-
|
27
|
-
def setup
|
28
|
-
@ruby = Syntax::Ruby.new
|
29
|
-
end
|
5
|
+
syntax "ruby"
|
30
6
|
|
31
7
|
def test_empty
|
32
|
-
tokenize
|
8
|
+
tokenize ""
|
33
9
|
assert_no_next_token
|
34
10
|
end
|
35
11
|
|
36
12
|
def test_constant
|
37
|
-
tokenize
|
13
|
+
tokenize "Foo"
|
38
14
|
assert_next_token :constant, "Foo"
|
39
15
|
end
|
40
16
|
|
41
17
|
def test_ident
|
42
|
-
tokenize
|
18
|
+
tokenize "foo"
|
43
19
|
assert_next_token :ident, "foo"
|
44
20
|
end
|
45
21
|
|
46
22
|
def test_comment_eol
|
47
|
-
tokenize
|
23
|
+
tokenize "# a comment\nfoo"
|
48
24
|
assert_next_token :comment, "# a comment"
|
49
25
|
assert_next_token :normal, "\n"
|
50
26
|
assert_next_token :ident, "foo"
|
51
27
|
end
|
52
28
|
|
53
29
|
def test_comment_block
|
54
|
-
tokenize
|
30
|
+
tokenize "=begin\nthis is a comment\n=end\nnoncomment"
|
55
31
|
assert_next_token :comment, "=begin\nthis is a comment\n=end"
|
56
32
|
assert_next_token :normal, "\n"
|
57
33
|
assert_next_token :ident, "noncomment"
|
58
34
|
end
|
59
35
|
|
36
|
+
def test_comment_block_with_CRNL
|
37
|
+
tokenize "=begin\r\nthis is a comment\r\n=end\r\nnoncomment"
|
38
|
+
assert_next_token :comment, "=begin\r\nthis is a comment\r\n=end"
|
39
|
+
assert_next_token :normal, "\r\n"
|
40
|
+
assert_next_token :ident, "noncomment"
|
41
|
+
end
|
42
|
+
|
60
43
|
def test_keyword
|
61
44
|
Syntax::Ruby::KEYWORDS.each do |word|
|
62
|
-
tokenize
|
45
|
+
tokenize word
|
63
46
|
assert_next_token :keyword, word
|
64
47
|
end
|
65
48
|
Syntax::Ruby::KEYWORDS.each do |word|
|
66
|
-
tokenize
|
49
|
+
tokenize "foo.#{word}"
|
67
50
|
skip_token 2
|
68
51
|
assert_next_token :ident, word
|
69
52
|
end
|
70
53
|
end
|
71
54
|
|
72
55
|
def test__END__
|
73
|
-
tokenize
|
56
|
+
tokenize "__END__\n\nblah blah blah"
|
74
57
|
assert_next_token :comment, "__END__\n\nblah blah blah"
|
75
58
|
end
|
76
59
|
|
60
|
+
def test__END__with_CRNL
|
61
|
+
tokenize "__END__\r\nblah blah blah"
|
62
|
+
assert_next_token :comment, "__END__\r\nblah blah blah"
|
63
|
+
end
|
64
|
+
|
77
65
|
def test_def_paren
|
78
|
-
tokenize
|
66
|
+
tokenize "def foo(bar)"
|
79
67
|
assert_next_token :keyword, "def "
|
80
68
|
assert_next_token :method, "foo"
|
81
69
|
assert_next_token :punct, "("
|
@@ -84,7 +72,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
|
|
84
72
|
end
|
85
73
|
|
86
74
|
def test_def_space
|
87
|
-
tokenize
|
75
|
+
tokenize "def foo bar"
|
88
76
|
assert_next_token :keyword, "def "
|
89
77
|
assert_next_token :method, "foo"
|
90
78
|
assert_next_token :normal, " "
|
@@ -92,28 +80,34 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
|
|
92
80
|
end
|
93
81
|
|
94
82
|
def test_def_semicolon
|
95
|
-
tokenize
|
83
|
+
tokenize "def foo;"
|
96
84
|
assert_next_token :keyword, "def "
|
97
85
|
assert_next_token :method, "foo"
|
98
86
|
assert_next_token :punct, ";"
|
99
87
|
end
|
100
88
|
|
89
|
+
def test_def_eol
|
90
|
+
tokenize "def foo"
|
91
|
+
assert_next_token :keyword, "def "
|
92
|
+
assert_next_token :method, "foo"
|
93
|
+
end
|
94
|
+
|
101
95
|
def test_class_space
|
102
|
-
tokenize
|
96
|
+
tokenize "class Foo\n"
|
103
97
|
assert_next_token :keyword, "class "
|
104
98
|
assert_next_token :class, "Foo"
|
105
99
|
assert_next_token :normal, "\n"
|
106
100
|
end
|
107
101
|
|
108
102
|
def test_class_semicolon
|
109
|
-
tokenize
|
103
|
+
tokenize "class Foo;"
|
110
104
|
assert_next_token :keyword, "class "
|
111
105
|
assert_next_token :class, "Foo"
|
112
106
|
assert_next_token :punct, ";"
|
113
107
|
end
|
114
108
|
|
115
109
|
def test_class_extend
|
116
|
-
tokenize
|
110
|
+
tokenize "class Foo< Bang"
|
117
111
|
assert_next_token :keyword, "class "
|
118
112
|
assert_next_token :class, "Foo"
|
119
113
|
assert_next_token :punct, "<"
|
@@ -122,34 +116,34 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
|
|
122
116
|
end
|
123
117
|
|
124
118
|
def test_module_space
|
125
|
-
tokenize
|
119
|
+
tokenize "module Foo\n"
|
126
120
|
assert_next_token :keyword, "module "
|
127
121
|
assert_next_token :module, "Foo"
|
128
122
|
assert_next_token :normal, "\n"
|
129
123
|
end
|
130
124
|
|
131
125
|
def test_module_semicolon
|
132
|
-
tokenize
|
126
|
+
tokenize "module Foo;"
|
133
127
|
assert_next_token :keyword, "module "
|
134
128
|
assert_next_token :module, "Foo"
|
135
129
|
assert_next_token :punct, ";"
|
136
130
|
end
|
137
131
|
|
138
132
|
def test_module_other
|
139
|
-
tokenize
|
133
|
+
tokenize "module Foo!\n"
|
140
134
|
assert_next_token :keyword, "module "
|
141
135
|
assert_next_token :module, "Foo!"
|
142
136
|
end
|
143
137
|
|
144
138
|
def test_scope_operator
|
145
|
-
tokenize
|
139
|
+
tokenize "Foo::Bar"
|
146
140
|
assert_next_token :constant, "Foo"
|
147
141
|
assert_next_token :punct, "::"
|
148
142
|
assert_next_token :constant, "Bar"
|
149
143
|
end
|
150
144
|
|
151
145
|
def test_symbol_dquote
|
152
|
-
tokenize
|
146
|
+
tokenize ':"foo"'
|
153
147
|
assert_next_token :symbol, ':"'
|
154
148
|
assert_next_token :symbol, '', :region_open
|
155
149
|
assert_next_token :symbol, 'foo'
|
@@ -159,7 +153,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
|
|
159
153
|
end
|
160
154
|
|
161
155
|
def test_symbol_squote
|
162
|
-
tokenize
|
156
|
+
tokenize ":'foo'"
|
163
157
|
assert_next_token :symbol, ":'"
|
164
158
|
assert_next_token :symbol, "", :region_open
|
165
159
|
assert_next_token :symbol, "foo"
|
@@ -169,43 +163,56 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
|
|
169
163
|
end
|
170
164
|
|
171
165
|
def test_symbol
|
172
|
-
tokenize
|
173
|
-
assert_next_token :symbol, ":
|
166
|
+
tokenize ":foo_123"
|
167
|
+
assert_next_token :symbol, ":foo_123"
|
168
|
+
|
169
|
+
tokenize ":123"
|
170
|
+
assert_next_token :punct, ":"
|
171
|
+
assert_next_token :number, "123"
|
172
|
+
|
173
|
+
tokenize ":foo="
|
174
|
+
assert_next_token :symbol, ":foo="
|
175
|
+
|
176
|
+
tokenize ":foo!"
|
177
|
+
assert_next_token :symbol, ":foo!"
|
178
|
+
|
179
|
+
tokenize ":foo?"
|
180
|
+
assert_next_token :symbol, ":foo?"
|
174
181
|
end
|
175
182
|
|
176
183
|
def test_char
|
177
|
-
tokenize
|
184
|
+
tokenize "?."
|
178
185
|
assert_next_token :char, "?."
|
179
186
|
|
180
|
-
tokenize
|
187
|
+
tokenize '?\n'
|
181
188
|
assert_next_token :char, '?\n'
|
182
189
|
end
|
183
190
|
|
184
191
|
def test_specials
|
185
192
|
%w{__FILE__ __LINE__ true false nil self}.each do |word|
|
186
|
-
tokenize
|
193
|
+
tokenize word
|
187
194
|
assert_next_token :constant, word
|
188
195
|
end
|
189
196
|
|
190
197
|
%w{__FILE__ __LINE__ true false nil self}.each do |word|
|
191
|
-
tokenize
|
198
|
+
tokenize "#{word}?"
|
192
199
|
assert_next_token :ident, "#{word}?"
|
193
200
|
end
|
194
201
|
|
195
202
|
%w{__FILE__ __LINE__ true false nil self}.each do |word|
|
196
|
-
tokenize
|
203
|
+
tokenize "#{word}!"
|
197
204
|
assert_next_token :ident, "#{word}!"
|
198
205
|
end
|
199
206
|
|
200
207
|
%w{__FILE__ __LINE__ true false nil self}.each do |word|
|
201
|
-
tokenize
|
208
|
+
tokenize "x.#{word}"
|
202
209
|
skip_token 2
|
203
210
|
assert_next_token :ident, word
|
204
211
|
end
|
205
212
|
end
|
206
213
|
|
207
214
|
def test_pct_r
|
208
|
-
tokenize
|
215
|
+
tokenize '%r{foo#{x}bar}'
|
209
216
|
assert_next_token :punct, "%r{"
|
210
217
|
assert_next_token :regex, "", :region_open
|
211
218
|
assert_next_token :regex, "foo"
|
@@ -214,7 +221,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
|
|
214
221
|
assert_next_token :regex, "", :region_close
|
215
222
|
assert_next_token :punct, "}"
|
216
223
|
|
217
|
-
tokenize
|
224
|
+
tokenize '%r-foo#{x}bar-'
|
218
225
|
assert_next_token :punct, "%r-"
|
219
226
|
assert_next_token :regex, "", :region_open
|
220
227
|
assert_next_token :regex, "foo"
|
@@ -238,7 +245,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
|
|
238
245
|
end
|
239
246
|
|
240
247
|
def test_pct_w_brace
|
241
|
-
tokenize
|
248
|
+
tokenize '%w{foo bar baz}'
|
242
249
|
assert_next_token :punct, "%w{"
|
243
250
|
assert_next_token :string, '', :region_open
|
244
251
|
assert_next_token :string, 'foo bar baz'
|
@@ -247,7 +254,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
|
|
247
254
|
end
|
248
255
|
|
249
256
|
def test_pct_w
|
250
|
-
tokenize
|
257
|
+
tokenize '%w-foo#{x} bar baz-'
|
251
258
|
assert_next_token :punct, "%w-"
|
252
259
|
assert_next_token :string, '', :region_open
|
253
260
|
assert_next_token :string, 'foo#{x} bar baz'
|
@@ -256,7 +263,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
|
|
256
263
|
end
|
257
264
|
|
258
265
|
def test_pct_q
|
259
|
-
tokenize
|
266
|
+
tokenize '%q-hello #{world}-'
|
260
267
|
assert_next_token :punct, "%q-"
|
261
268
|
assert_next_token :string, '', :region_open
|
262
269
|
assert_next_token :string, 'hello #{world}'
|
@@ -265,7 +272,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
|
|
265
272
|
end
|
266
273
|
|
267
274
|
def test_pct_s
|
268
|
-
tokenize
|
275
|
+
tokenize '%s-hello #{world}-'
|
269
276
|
assert_next_token :punct, "%s-"
|
270
277
|
assert_next_token :symbol, '', :region_open
|
271
278
|
assert_next_token :symbol, 'hello #{world}'
|
@@ -274,7 +281,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
|
|
274
281
|
end
|
275
282
|
|
276
283
|
def test_pct_W
|
277
|
-
tokenize
|
284
|
+
tokenize '%W-foo#{x} bar baz-'
|
278
285
|
assert_next_token :punct, "%W-"
|
279
286
|
assert_next_token :string, '', :region_open
|
280
287
|
assert_next_token :string, 'foo'
|
@@ -285,7 +292,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
|
|
285
292
|
end
|
286
293
|
|
287
294
|
def test_pct_Q
|
288
|
-
tokenize
|
295
|
+
tokenize '%Q-hello #{world}-'
|
289
296
|
assert_next_token :punct, "%Q-"
|
290
297
|
assert_next_token :string, '', :region_open
|
291
298
|
assert_next_token :string, 'hello '
|
@@ -295,7 +302,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
|
|
295
302
|
end
|
296
303
|
|
297
304
|
def test_pct_x
|
298
|
-
tokenize
|
305
|
+
tokenize '%x-ls /blah/#{foo}-'
|
299
306
|
assert_next_token :punct, "%x-"
|
300
307
|
assert_next_token :string, '', :region_open
|
301
308
|
assert_next_token :string, 'ls /blah/'
|
@@ -305,7 +312,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
|
|
305
312
|
end
|
306
313
|
|
307
314
|
def test_pct_string
|
308
|
-
tokenize
|
315
|
+
tokenize '%-hello #{world}-'
|
309
316
|
assert_next_token :punct, "%-"
|
310
317
|
assert_next_token :string, '', :region_open
|
311
318
|
assert_next_token :string, 'hello '
|
@@ -315,7 +322,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
|
|
315
322
|
end
|
316
323
|
|
317
324
|
def test_bad_pct_string
|
318
|
-
tokenize
|
325
|
+
tokenize '%0hello #{world}0'
|
319
326
|
assert_next_token :punct, "%"
|
320
327
|
assert_next_token :number, '0'
|
321
328
|
assert_next_token :ident, 'hello'
|
@@ -324,7 +331,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
|
|
324
331
|
end
|
325
332
|
|
326
333
|
def test_shift_left
|
327
|
-
tokenize
|
334
|
+
tokenize 'foo << 5'
|
328
335
|
assert_next_token :ident, "foo"
|
329
336
|
assert_next_token :normal, " "
|
330
337
|
assert_next_token :punct, "<<"
|
@@ -333,14 +340,14 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
|
|
333
340
|
end
|
334
341
|
|
335
342
|
def test_shift_left_no_white
|
336
|
-
tokenize
|
343
|
+
tokenize 'foo<<5'
|
337
344
|
assert_next_token :ident, "foo"
|
338
345
|
assert_next_token :punct, "<<"
|
339
346
|
assert_next_token :number, "5"
|
340
347
|
end
|
341
348
|
|
342
349
|
def test_here_doc_no_opts
|
343
|
-
tokenize
|
350
|
+
tokenize "foo <<EOF\n foo\n bar\n baz\nEOF"
|
344
351
|
assert_next_token :ident, "foo"
|
345
352
|
assert_next_token :normal, " "
|
346
353
|
assert_next_token :punct, "<<"
|
@@ -352,7 +359,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
|
|
352
359
|
end
|
353
360
|
|
354
361
|
def test_here_doc_no_opts_missing_end
|
355
|
-
tokenize
|
362
|
+
tokenize "foo <<EOF\n foo\n bar\n baz\n EOF"
|
356
363
|
assert_next_token :ident, "foo"
|
357
364
|
assert_next_token :normal, " "
|
358
365
|
assert_next_token :punct, "<<"
|
@@ -363,7 +370,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
|
|
363
370
|
end
|
364
371
|
|
365
372
|
def test_here_doc_float_right
|
366
|
-
tokenize
|
373
|
+
tokenize "foo <<-EOF\n foo\n bar\n baz\n EOF"
|
367
374
|
assert_next_token :ident, "foo"
|
368
375
|
assert_next_token :normal, " "
|
369
376
|
assert_next_token :punct, "<<-"
|
@@ -375,7 +382,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
|
|
375
382
|
end
|
376
383
|
|
377
384
|
def test_here_doc_single_quotes
|
378
|
-
tokenize
|
385
|
+
tokenize "foo <<'EOF'\n foo\#{x}\n bar\n baz\nEOF"
|
379
386
|
assert_next_token :ident, "foo"
|
380
387
|
assert_next_token :normal, " "
|
381
388
|
assert_next_token :punct, "<<'"
|
@@ -388,7 +395,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
|
|
388
395
|
end
|
389
396
|
|
390
397
|
def test_here_doc_double_quotes
|
391
|
-
tokenize
|
398
|
+
tokenize "foo <<\"EOF\"\n foo\#{x}\n bar\n baz\nEOF"
|
392
399
|
assert_next_token :ident, "foo"
|
393
400
|
assert_next_token :normal, " "
|
394
401
|
assert_next_token :punct, "<<\""
|
@@ -403,12 +410,12 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
|
|
403
410
|
end
|
404
411
|
|
405
412
|
def test_space
|
406
|
-
tokenize
|
413
|
+
tokenize "\n \t\t\n\n\r\n"
|
407
414
|
assert_next_token :normal, "\n \t\t\n\n\r\n"
|
408
415
|
end
|
409
416
|
|
410
417
|
def test_number
|
411
|
-
tokenize
|
418
|
+
tokenize "1 1.0 1e5 1.0e5 1_2.5 1_2.5_2 1_2.5_2e3_2"
|
412
419
|
assert_next_token :number, "1"
|
413
420
|
skip_token
|
414
421
|
assert_next_token :number, "1.0"
|
@@ -425,7 +432,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
|
|
425
432
|
end
|
426
433
|
|
427
434
|
def test_dquoted_string
|
428
|
-
tokenize
|
435
|
+
tokenize '"foo #{x} bar\"\n\tbaz\xA5b\5\1234"'
|
429
436
|
assert_next_token :punct, '"'
|
430
437
|
assert_next_token :string, '', :region_open
|
431
438
|
assert_next_token :string, 'foo '
|
@@ -442,7 +449,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
|
|
442
449
|
end
|
443
450
|
|
444
451
|
def test_squoted_string
|
445
|
-
tokenize
|
452
|
+
tokenize '\'foo #{x} bar\\\'\n\tbaz\\\\\xA5b\5\1234\''
|
446
453
|
assert_next_token :punct, "'"
|
447
454
|
assert_next_token :string, "", :region_open
|
448
455
|
assert_next_token :string, 'foo #{x} bar'
|
@@ -455,51 +462,51 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
|
|
455
462
|
end
|
456
463
|
|
457
464
|
def test_dot_selector
|
458
|
-
tokenize
|
465
|
+
tokenize 'foo.nil'
|
459
466
|
skip_token
|
460
467
|
assert_next_token :punct, "."
|
461
468
|
assert_next_token :ident, "nil"
|
462
469
|
end
|
463
470
|
|
464
471
|
def test_dot_range_inclusive
|
465
|
-
tokenize
|
472
|
+
tokenize 'foo..nil'
|
466
473
|
skip_token
|
467
474
|
assert_next_token :punct, ".."
|
468
475
|
assert_next_token :constant, "nil"
|
469
476
|
end
|
470
477
|
|
471
478
|
def test_dot_range_exclusive
|
472
|
-
tokenize
|
479
|
+
tokenize 'foo...nil'
|
473
480
|
skip_token
|
474
481
|
assert_next_token :punct, "..."
|
475
482
|
assert_next_token :constant, "nil"
|
476
483
|
end
|
477
484
|
|
478
485
|
def test_dot_range_many
|
479
|
-
tokenize
|
486
|
+
tokenize 'foo.....nil'
|
480
487
|
skip_token
|
481
488
|
assert_next_token :punct, "....."
|
482
489
|
assert_next_token :constant, "nil"
|
483
490
|
end
|
484
491
|
|
485
492
|
def test_attribute
|
486
|
-
tokenize
|
493
|
+
tokenize '@var_foo'
|
487
494
|
assert_next_token :attribute, "@var_foo"
|
488
495
|
end
|
489
496
|
|
490
497
|
def test_global
|
491
|
-
tokenize
|
498
|
+
tokenize '$var_foo'
|
492
499
|
assert_next_token :global, "$var_foo"
|
493
|
-
tokenize
|
500
|
+
tokenize '$12'
|
494
501
|
assert_next_token :global, "$12"
|
495
|
-
tokenize
|
502
|
+
tokenize '$/f'
|
496
503
|
assert_next_token :global, "$/"
|
497
|
-
tokenize
|
504
|
+
tokenize "$\n"
|
498
505
|
assert_next_token :global, "$"
|
499
506
|
end
|
500
507
|
|
501
508
|
def test_paren_delimiter
|
502
|
-
tokenize
|
509
|
+
tokenize '%w(a)'
|
503
510
|
assert_next_token :punct, "%w("
|
504
511
|
assert_next_token :string, "", :region_open
|
505
512
|
assert_next_token :string, "a"
|
@@ -508,7 +515,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
|
|
508
515
|
end
|
509
516
|
|
510
517
|
def test_division
|
511
|
-
tokenize
|
518
|
+
tokenize 'm / 3'
|
512
519
|
assert_next_token :ident, "m"
|
513
520
|
assert_next_token :normal, " "
|
514
521
|
assert_next_token :punct, "/"
|
@@ -517,7 +524,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
|
|
517
524
|
end
|
518
525
|
|
519
526
|
def test_regex
|
520
|
-
tokenize
|
527
|
+
tokenize 'm =~ /3/'
|
521
528
|
assert_next_token :ident, "m"
|
522
529
|
assert_next_token :normal, " "
|
523
530
|
assert_next_token :punct, "=~"
|
@@ -530,7 +537,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
|
|
530
537
|
end
|
531
538
|
|
532
539
|
def test_heredoc_with_trailing_text
|
533
|
-
tokenize
|
540
|
+
tokenize "foo('here', <<EOF)\n A heredoc.\nEOF\nfoo"
|
534
541
|
assert_next_token :ident, "foo"
|
535
542
|
assert_next_token :punct, "('"
|
536
543
|
assert_next_token :string, '', :region_open
|
@@ -550,7 +557,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
|
|
550
557
|
end
|
551
558
|
|
552
559
|
def test_multiple_heredocs
|
553
|
-
tokenize
|
560
|
+
tokenize <<'TEST'
|
554
561
|
foo('here', <<EOF, 'there', <<-'FOO', 'blah')
|
555
562
|
First heredoc, right here.
|
556
563
|
Expressions are #{allowed}
|
@@ -598,7 +605,7 @@ TEST
|
|
598
605
|
end
|
599
606
|
|
600
607
|
def test_carldr_bad_heredoc_001
|
601
|
-
tokenize
|
608
|
+
tokenize <<'TEST'
|
602
609
|
str = <<END
|
603
610
|
here document #{1 + 1}
|
604
611
|
END
|
@@ -663,4 +670,202 @@ TEST
|
|
663
670
|
assert_next_token :punct, ")/"
|
664
671
|
assert_next_token :number, "2"
|
665
672
|
end
|
673
|
+
|
674
|
+
def test_heredoc_with_CRNL
|
675
|
+
tokenize "foo <<SRC\r\nSome text\r\nSRC\r\nfoo"
|
676
|
+
assert_next_token :ident, "foo"
|
677
|
+
assert_next_token :normal, " "
|
678
|
+
assert_next_token :punct, "<<"
|
679
|
+
assert_next_token :constant, "SRC"
|
680
|
+
assert_next_token :string, "", :region_open
|
681
|
+
assert_next_token :string, "\r\nSome text\r\n"
|
682
|
+
assert_next_token :string, "", :region_close
|
683
|
+
assert_next_token :constant, "SRC"
|
684
|
+
assert_next_token :normal, "\r\n"
|
685
|
+
assert_next_token :ident, "foo"
|
686
|
+
end
|
687
|
+
|
688
|
+
def test_question_mark_at_newline
|
689
|
+
tokenize "foo ?\n 'bar': 'baz'"
|
690
|
+
assert_next_token :ident, "foo"
|
691
|
+
assert_next_token :normal, " "
|
692
|
+
assert_next_token :punct, "?"
|
693
|
+
assert_next_token :normal, "\n "
|
694
|
+
assert_next_token :punct, "'"
|
695
|
+
assert_next_token :string, "", :region_open
|
696
|
+
assert_next_token :string, "bar"
|
697
|
+
assert_next_token :string, "", :region_close
|
698
|
+
assert_next_token :punct, "':"
|
699
|
+
assert_next_token :normal, " "
|
700
|
+
assert_next_token :punct, "'"
|
701
|
+
assert_next_token :string, "", :region_open
|
702
|
+
assert_next_token :string, "baz"
|
703
|
+
assert_next_token :string, "", :region_close
|
704
|
+
assert_next_token :punct, "'"
|
705
|
+
end
|
706
|
+
|
707
|
+
def test_question_mark_and_escaped_newline
|
708
|
+
tokenize "foo ?\\\n 'bar': 'baz'"
|
709
|
+
assert_next_token :ident, "foo"
|
710
|
+
assert_next_token :normal, " "
|
711
|
+
assert_next_token :punct, "?\\"
|
712
|
+
assert_next_token :normal, "\n "
|
713
|
+
assert_next_token :punct, "'"
|
714
|
+
assert_next_token :string, "", :region_open
|
715
|
+
assert_next_token :string, "bar"
|
716
|
+
assert_next_token :string, "", :region_close
|
717
|
+
assert_next_token :punct, "':"
|
718
|
+
assert_next_token :normal, " "
|
719
|
+
assert_next_token :punct, "'"
|
720
|
+
assert_next_token :string, "", :region_open
|
721
|
+
assert_next_token :string, "baz"
|
722
|
+
assert_next_token :string, "", :region_close
|
723
|
+
assert_next_token :punct, "'"
|
724
|
+
end
|
725
|
+
|
726
|
+
def test_highlighted_subexpression
|
727
|
+
tokenizer.set :expressions => :highlight
|
728
|
+
tokenize '"la la #{["hello", "world"].each { |f| puts "string #{f}" }}"'
|
729
|
+
assert_next_token :punct, '"'
|
730
|
+
assert_next_token :string, "", :region_open
|
731
|
+
assert_next_token :string, "la la "
|
732
|
+
assert_next_token :expr, "", :region_open
|
733
|
+
assert_next_token :expr, '#{'
|
734
|
+
assert_next_token :punct, '["'
|
735
|
+
assert_next_token :string, "", :region_open
|
736
|
+
assert_next_token :string, 'hello'
|
737
|
+
assert_next_token :string, "", :region_close
|
738
|
+
assert_next_token :punct, '",'
|
739
|
+
assert_next_token :normal, ' '
|
740
|
+
assert_next_token :punct, '"'
|
741
|
+
assert_next_token :string, "", :region_open
|
742
|
+
assert_next_token :string, "world"
|
743
|
+
assert_next_token :string, "", :region_close
|
744
|
+
assert_next_token :punct, '"].'
|
745
|
+
assert_next_token :ident, 'each'
|
746
|
+
assert_next_token :normal, ' '
|
747
|
+
assert_next_token :punct, '{'
|
748
|
+
assert_next_token :normal, ' '
|
749
|
+
assert_next_token :punct, '|'
|
750
|
+
assert_next_token :ident, 'f'
|
751
|
+
assert_next_token :punct, '|'
|
752
|
+
assert_next_token :normal, ' '
|
753
|
+
assert_next_token :ident, 'puts'
|
754
|
+
assert_next_token :normal, ' '
|
755
|
+
assert_next_token :punct, '"'
|
756
|
+
assert_next_token :string, "", :region_open
|
757
|
+
assert_next_token :string, "string "
|
758
|
+
assert_next_token :expr, "", :region_open
|
759
|
+
assert_next_token :expr, '#{'
|
760
|
+
assert_next_token :ident, 'f'
|
761
|
+
assert_next_token :expr, '}'
|
762
|
+
assert_next_token :expr, "", :region_close
|
763
|
+
assert_next_token :string, "", :region_close
|
764
|
+
assert_next_token :punct, '"'
|
765
|
+
assert_next_token :normal, ' '
|
766
|
+
assert_next_token :punct, '}'
|
767
|
+
assert_next_token :expr, '}'
|
768
|
+
assert_next_token :expr, "", :region_close
|
769
|
+
assert_next_token :string, "", :region_close
|
770
|
+
assert_next_token :punct, '"'
|
771
|
+
end
|
772
|
+
|
773
|
+
def test_expr_in_braces
|
774
|
+
tokenize '"#{f}"'
|
775
|
+
assert_next_token :punct, '"'
|
776
|
+
assert_next_token :string, "", :region_open
|
777
|
+
assert_next_token :expr, '#{f}'
|
778
|
+
assert_next_token :string, "", :region_close
|
779
|
+
assert_next_token :punct, '"'
|
780
|
+
end
|
781
|
+
|
782
|
+
def test_expr_in_braces_with_nested_braces
|
783
|
+
tokenize '"#{loop{break}}"'
|
784
|
+
assert_next_token :punct, '"'
|
785
|
+
assert_next_token :string, "", :region_open
|
786
|
+
assert_next_token :expr, '#{loop{break}}'
|
787
|
+
assert_next_token :string, "", :region_close
|
788
|
+
assert_next_token :punct, '"'
|
789
|
+
end
|
790
|
+
|
791
|
+
def test_expr_with_global_var
|
792
|
+
tokenize '"#$f"'
|
793
|
+
assert_next_token :punct, '"'
|
794
|
+
assert_next_token :string, "", :region_open
|
795
|
+
assert_next_token :expr, '#$f'
|
796
|
+
assert_next_token :string, "", :region_close
|
797
|
+
assert_next_token :punct, '"'
|
798
|
+
end
|
799
|
+
|
800
|
+
def test_expr_with_instance_var
|
801
|
+
tokenize '"#@f"'
|
802
|
+
assert_next_token :punct, '"'
|
803
|
+
assert_next_token :string, "", :region_open
|
804
|
+
assert_next_token :expr, '#@f'
|
805
|
+
assert_next_token :string, "", :region_close
|
806
|
+
assert_next_token :punct, '"'
|
807
|
+
end
|
808
|
+
|
809
|
+
def test_expr_with_class_var
|
810
|
+
tokenize '"#@@f"'
|
811
|
+
assert_next_token :punct, '"'
|
812
|
+
assert_next_token :string, "", :region_open
|
813
|
+
assert_next_token :expr, '#@@f'
|
814
|
+
assert_next_token :string, "", :region_close
|
815
|
+
assert_next_token :punct, '"'
|
816
|
+
end
|
817
|
+
|
818
|
+
def test_qmark_space
|
819
|
+
tokenize "? "
|
820
|
+
assert_next_token :punct, "?"
|
821
|
+
assert_next_token :normal, " "
|
822
|
+
end
|
823
|
+
|
824
|
+
def test_capitalized_method
|
825
|
+
tokenize "obj.Foo"
|
826
|
+
skip_token 2
|
827
|
+
assert_next_token :ident, "Foo"
|
828
|
+
end
|
829
|
+
|
830
|
+
def test_hexadecimal_literal
|
831
|
+
tokenize "0xDEADbeef 0X1234567890ABCDEFG"
|
832
|
+
assert_next_token :number, "0xDEADbeef"
|
833
|
+
skip_token
|
834
|
+
assert_next_token :number, "0X1234567890ABCDEF"
|
835
|
+
assert_next_token :constant, "G"
|
836
|
+
end
|
837
|
+
|
838
|
+
def test_binary_literal
|
839
|
+
tokenize "0b2 0b0 0b101 0B123"
|
840
|
+
assert_next_token :number, "0"
|
841
|
+
assert_next_token :ident, "b2"
|
842
|
+
skip_token
|
843
|
+
assert_next_token :number, "0b0"
|
844
|
+
skip_token
|
845
|
+
assert_next_token :number, "0b101"
|
846
|
+
skip_token
|
847
|
+
assert_next_token :number, "0B123"
|
848
|
+
end
|
849
|
+
|
850
|
+
def test_octal_literal
|
851
|
+
tokenize "0o9 0o12345670abc 0O12345678"
|
852
|
+
assert_next_token :number, "0"
|
853
|
+
assert_next_token :ident, "o9"
|
854
|
+
skip_token
|
855
|
+
assert_next_token :number, "0o12345670"
|
856
|
+
assert_next_token :ident, "abc"
|
857
|
+
skip_token
|
858
|
+
assert_next_token :number, "0O12345678"
|
859
|
+
end
|
860
|
+
|
861
|
+
def test_decimal_literal
|
862
|
+
tokenize "0dA 0d1234567890abc 0D1234567890"
|
863
|
+
assert_next_token :number, "0"
|
864
|
+
assert_next_token :ident, "dA"
|
865
|
+
skip_token
|
866
|
+
assert_next_token :number, "0d1234567890"
|
867
|
+
assert_next_token :ident, "abc"
|
868
|
+
skip_token
|
869
|
+
assert_next_token :number, "0D1234567890"
|
870
|
+
end
|
666
871
|
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
$:.unshift File.dirname(__FILE__) + "/../../lib"
|
2
|
+
|
3
|
+
require 'test/unit'
|
4
|
+
require 'syntax'
|
5
|
+
|
6
|
+
class TokenizerTestCase < Test::Unit::TestCase
|
7
|
+
def self.syntax( type )
|
8
|
+
class_eval <<-EOF
|
9
|
+
def setup
|
10
|
+
@tokenizer = Syntax.load(#{type.inspect})
|
11
|
+
end
|
12
|
+
EOF
|
13
|
+
end
|
14
|
+
|
15
|
+
def default_test
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
attr_reader :tokenizer
|
21
|
+
|
22
|
+
def tokenize( string )
|
23
|
+
@tokens = []
|
24
|
+
@tokenizer.tokenize( string ) { |tok| @tokens << tok }
|
25
|
+
end
|
26
|
+
|
27
|
+
def assert_next_token(group, lexeme, instruction=:none)
|
28
|
+
assert false, "no tokens in stack" if @tokens.nil? or @tokens.empty?
|
29
|
+
assert_equal [group, lexeme, instruction],
|
30
|
+
[@tokens.first.group, @tokens.first, @tokens.shift.instruction]
|
31
|
+
end
|
32
|
+
|
33
|
+
def assert_no_next_token
|
34
|
+
assert @tokens.empty?
|
35
|
+
end
|
36
|
+
|
37
|
+
def skip_token( n=1 )
|
38
|
+
n.times { @tokens.shift } unless @tokens.nil? || @tokens.empty?
|
39
|
+
end
|
40
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
|
-
rubygems_version: 0.8.
|
2
|
+
rubygems_version: 0.8.10
|
3
3
|
specification_version: 1
|
4
4
|
name: syntax
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.
|
7
|
-
date: 2005-
|
6
|
+
version: 1.0.0
|
7
|
+
date: 2005-06-18
|
8
8
|
summary: Syntax is Ruby library for performing simple syntax highlighting.
|
9
9
|
require_paths:
|
10
10
|
- lib
|
11
|
-
email:
|
11
|
+
email: jamis@jamisbuck.org
|
12
12
|
homepage:
|
13
13
|
rubyforge_project:
|
14
14
|
description:
|
@@ -47,6 +47,7 @@ files:
|
|
47
47
|
- test/syntax/tc_ruby.rb
|
48
48
|
- test/syntax/tc_xml.rb
|
49
49
|
- test/syntax/tc_yaml.rb
|
50
|
+
- test/syntax/tokenizer_testcase.rb
|
50
51
|
test_files:
|
51
52
|
- test/ALL-TESTS.rb
|
52
53
|
rdoc_options: []
|