syntax 0.7.0 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -78,8 +78,22 @@ module Syntax
78
78
  finish
79
79
  end
80
80
 
81
+ # Specify a set of tokenizer-specific options. Each tokenizer may (or may
82
+ # not) publish any options, but if a tokenizer does those options may be
83
+ # used to specify optional behavior.
84
+ def set( opts={} )
85
+ ( @options ||= Hash.new ).update opts
86
+ end
87
+
88
+ # Get the value of the specified option.
89
+ def option(opt)
90
+ @options ? @options[opt] : nil
91
+ end
92
+
81
93
  private
82
94
 
95
+ EOL = /(?=\r\n?|\n|$)/
96
+
83
97
  # A convenience for delegating method calls to the scanner.
84
98
  def self.delegate( sym )
85
99
  define_method( sym ) { |*a| @text.__send__( sym, *a ) }
@@ -137,6 +151,13 @@ module Syntax
137
151
  @chunk = ""
138
152
  end
139
153
 
154
+ def subtokenize( syntax, text )
155
+ tokenizer = Syntax.load( syntax )
156
+ tokenizer.set @options if @options
157
+ flush_chunk
158
+ tokenizer.tokenize( text, &@callback )
159
+ end
160
+
140
161
  end
141
162
 
142
163
  end
@@ -7,6 +7,9 @@ module Syntax
7
7
  # convenience methods to provide a common interface for all convertors.
8
8
  class Abstract
9
9
 
10
+ # A reference to the tokenizer used by this convertor.
11
+ attr_reader :tokenizer
12
+
10
13
  # A convenience method for instantiating a new convertor for a
11
14
  # specific syntax.
12
15
  def self.for_syntax( syntax )
@@ -25,20 +25,20 @@ module Syntax
25
25
  def step
26
26
  case
27
27
  when bol? && check( /=begin/ )
28
- start_group( :comment, scan_until( /^=end$/ ) )
29
- when bol? && check( /__END__$/ )
28
+ start_group( :comment, scan_until( /^=end#{EOL}/ ) )
29
+ when bol? && check( /__END__#{EOL}/ )
30
30
  start_group( :comment, scan_until( /\Z/ ) )
31
31
  else
32
32
  case
33
33
  when check( /def\s+/ )
34
34
  start_group :keyword, scan( /def\s+/ )
35
- start_group :method, scan_until( /$|(?=[;(\s])/ )
35
+ start_group :method, scan_until( /(?=[;(\s]|#{EOL})/ )
36
36
  when check( /class\s+/ )
37
37
  start_group :keyword, scan( /class\s+/ )
38
- start_group :class, scan_until( /$|(?=[;\s<])/ )
38
+ start_group :class, scan_until( /(?=[;\s<]|#{EOL})/ )
39
39
  when check( /module\s+/ )
40
40
  start_group :keyword, scan( /module\s+/ )
41
- start_group :module, scan_until( /$|(?=[;\s])/ )
41
+ start_group :module, scan_until( /(?=[;\s]|#{EOL})/ )
42
42
  when check( /::/ )
43
43
  start_group :punct, scan(/::/)
44
44
  when check( /:"/ )
@@ -49,11 +49,11 @@ module Syntax
49
49
  start_group :symbol, scan(/:/)
50
50
  scan_delimited_region :symbol, :symbol, "", false
51
51
  @allow_operator = true
52
- when check( /:\w/ )
53
- start_group :symbol, scan(/:\w+[!?]?/)
52
+ when scan( /:[_a-zA-Z@$][$@\w]*[=!?]?/ )
53
+ start_group :symbol, matched
54
54
  @allow_operator = true
55
- when check( /\?\\?./ )
56
- start_group :char, scan(/\?\\?./)
55
+ when scan( /\?(\\[^\n\r]|[^\\\n\r\s])/ )
56
+ start_group :char, matched
57
57
  @allow_operator = true
58
58
  when check( /(__FILE__|__LINE__|true|false|nil|self)[?!]?/ )
59
59
  if @selector || matched[-1] == ?? || matched[-1] == ?!
@@ -65,6 +65,9 @@ module Syntax
65
65
  end
66
66
  @selector = false
67
67
  @allow_operator = true
68
+ when scan(/0([bB][01]+|[oO][0-7]+|[dD][0-9]+|[xX][0-9a-fA-F]+)/)
69
+ start_group :number, matched
70
+ @allow_operator = true
68
71
  else
69
72
  case peek(2)
70
73
  when "%r"
@@ -120,7 +123,7 @@ module Syntax
120
123
  when "#"
121
124
  start_group :comment, scan( /#[^\n\r]*/ )
122
125
  when /[A-Z]/
123
- start_group :constant, scan( /\w+/ )
126
+ start_group @selector ? :ident : :constant, scan( /\w+/ )
124
127
  @allow_operator = true
125
128
  when /[a-z_]/
126
129
  word = scan( /\w+[?!]?/ )
@@ -218,11 +221,11 @@ module Syntax
218
221
  if heredoc
219
222
  items << "(^"
220
223
  items << '\s*' if heredoc == :float
221
- items << "#{Regexp.escape(delim)}\s*)$"
224
+ items << "#{Regexp.escape(delim)}\s*?)#{EOL}"
222
225
  else
223
226
  items << "#{Regexp.escape(delim)}"
224
227
  end
225
- items << "|#(\\$|@|\\{)" if exprs
228
+ items << "|#(\\$|@@?|\\{)" if exprs
226
229
  items = Regexp.new( items )
227
230
 
228
231
  loop do
@@ -263,25 +266,35 @@ module Syntax
263
266
  start_group delim_group, matched
264
267
  break
265
268
  when /^#/
269
+ do_highlight = (option(:expressions) == :highlight)
270
+ start_region :expr if do_highlight
266
271
  start_group :expr, matched
267
272
  case matched[1]
268
273
  when ?{
269
274
  depth = 1
275
+ content = ""
270
276
  while depth > 0
271
277
  p = pos
272
278
  c = scan_until( /[\{}]/ )
273
279
  if c.nil?
274
- append scan_until( /\Z/ )
280
+ content << scan_until( /\Z/ )
275
281
  break
276
282
  else
277
283
  depth += ( matched == "{" ? 1 : -1 )
278
- append pre_match[p..-1]
279
- append matched
284
+ content << pre_match[p..-1]
285
+ content << matched if depth > 0
280
286
  end
281
287
  end
288
+ if do_highlight
289
+ subtokenize "ruby", content
290
+ start_group :expr, "}"
291
+ else
292
+ append content + "}"
293
+ end
282
294
  when ?$, ?@
283
295
  append scan( /\w+/ )
284
296
  end
297
+ end_region :expr if do_highlight
285
298
  else raise "unexpected match on #{matched}"
286
299
  end
287
300
  end
@@ -1,7 +1,7 @@
1
1
  module Syntax
2
2
  module Version
3
- MAJOR=0
4
- MINOR=7
3
+ MAJOR=1
4
+ MINOR=0
5
5
  TINY=0
6
6
 
7
7
  STRING=[MAJOR,MINOR,TINY].join('.')
@@ -1,81 +1,69 @@
1
- $:.unshift File.dirname(__FILE__) +"/../../lib"
1
+ require File.dirname(__FILE__) + "/tokenizer_testcase"
2
2
 
3
- require 'test/unit'
4
- require 'syntax/lang/ruby'
3
+ class TC_Syntax_Ruby < TokenizerTestCase
5
4
 
6
- class TC_Syntax_Ruby < Test::Unit::TestCase
7
-
8
- def tokenize( string )
9
- @tokens = []
10
- @ruby.tokenize( string ) { |tok| @tokens << tok }
11
- end
12
-
13
- def assert_next_token(group, lexeme, instruction=:none)
14
- assert false, "no tokens in stack" if @tokens.nil? or @tokens.empty?
15
- assert_equal [group, lexeme, instruction],
16
- [@tokens.first.group, @tokens.first, @tokens.shift.instruction]
17
- end
18
-
19
- def assert_no_next_token
20
- assert @tokens.empty?
21
- end
22
-
23
- def skip_token( n=1 )
24
- n.times { @tokens.shift } unless @tokens.nil? || @tokens.empty?
25
- end
26
-
27
- def setup
28
- @ruby = Syntax::Ruby.new
29
- end
5
+ syntax "ruby"
30
6
 
31
7
  def test_empty
32
- tokenize( "" )
8
+ tokenize ""
33
9
  assert_no_next_token
34
10
  end
35
11
 
36
12
  def test_constant
37
- tokenize( "Foo" )
13
+ tokenize "Foo"
38
14
  assert_next_token :constant, "Foo"
39
15
  end
40
16
 
41
17
  def test_ident
42
- tokenize( "foo" )
18
+ tokenize "foo"
43
19
  assert_next_token :ident, "foo"
44
20
  end
45
21
 
46
22
  def test_comment_eol
47
- tokenize( "# a comment\nfoo" )
23
+ tokenize "# a comment\nfoo"
48
24
  assert_next_token :comment, "# a comment"
49
25
  assert_next_token :normal, "\n"
50
26
  assert_next_token :ident, "foo"
51
27
  end
52
28
 
53
29
  def test_comment_block
54
- tokenize( "=begin\nthis is a comment\n=end\nnoncomment" )
30
+ tokenize "=begin\nthis is a comment\n=end\nnoncomment"
55
31
  assert_next_token :comment, "=begin\nthis is a comment\n=end"
56
32
  assert_next_token :normal, "\n"
57
33
  assert_next_token :ident, "noncomment"
58
34
  end
59
35
 
36
+ def test_comment_block_with_CRNL
37
+ tokenize "=begin\r\nthis is a comment\r\n=end\r\nnoncomment"
38
+ assert_next_token :comment, "=begin\r\nthis is a comment\r\n=end"
39
+ assert_next_token :normal, "\r\n"
40
+ assert_next_token :ident, "noncomment"
41
+ end
42
+
60
43
  def test_keyword
61
44
  Syntax::Ruby::KEYWORDS.each do |word|
62
- tokenize( word )
45
+ tokenize word
63
46
  assert_next_token :keyword, word
64
47
  end
65
48
  Syntax::Ruby::KEYWORDS.each do |word|
66
- tokenize( "foo.#{word}" )
49
+ tokenize "foo.#{word}"
67
50
  skip_token 2
68
51
  assert_next_token :ident, word
69
52
  end
70
53
  end
71
54
 
72
55
  def test__END__
73
- tokenize( "__END__\n\nblah blah blah" )
56
+ tokenize "__END__\n\nblah blah blah"
74
57
  assert_next_token :comment, "__END__\n\nblah blah blah"
75
58
  end
76
59
 
60
+ def test__END__with_CRNL
61
+ tokenize "__END__\r\nblah blah blah"
62
+ assert_next_token :comment, "__END__\r\nblah blah blah"
63
+ end
64
+
77
65
  def test_def_paren
78
- tokenize( "def foo(bar)" )
66
+ tokenize "def foo(bar)"
79
67
  assert_next_token :keyword, "def "
80
68
  assert_next_token :method, "foo"
81
69
  assert_next_token :punct, "("
@@ -84,7 +72,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
84
72
  end
85
73
 
86
74
  def test_def_space
87
- tokenize( "def foo bar" )
75
+ tokenize "def foo bar"
88
76
  assert_next_token :keyword, "def "
89
77
  assert_next_token :method, "foo"
90
78
  assert_next_token :normal, " "
@@ -92,28 +80,34 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
92
80
  end
93
81
 
94
82
  def test_def_semicolon
95
- tokenize( "def foo;" )
83
+ tokenize "def foo;"
96
84
  assert_next_token :keyword, "def "
97
85
  assert_next_token :method, "foo"
98
86
  assert_next_token :punct, ";"
99
87
  end
100
88
 
89
+ def test_def_eol
90
+ tokenize "def foo"
91
+ assert_next_token :keyword, "def "
92
+ assert_next_token :method, "foo"
93
+ end
94
+
101
95
  def test_class_space
102
- tokenize( "class Foo\n" )
96
+ tokenize "class Foo\n"
103
97
  assert_next_token :keyword, "class "
104
98
  assert_next_token :class, "Foo"
105
99
  assert_next_token :normal, "\n"
106
100
  end
107
101
 
108
102
  def test_class_semicolon
109
- tokenize( "class Foo;" )
103
+ tokenize "class Foo;"
110
104
  assert_next_token :keyword, "class "
111
105
  assert_next_token :class, "Foo"
112
106
  assert_next_token :punct, ";"
113
107
  end
114
108
 
115
109
  def test_class_extend
116
- tokenize( "class Foo< Bang" )
110
+ tokenize "class Foo< Bang"
117
111
  assert_next_token :keyword, "class "
118
112
  assert_next_token :class, "Foo"
119
113
  assert_next_token :punct, "<"
@@ -122,34 +116,34 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
122
116
  end
123
117
 
124
118
  def test_module_space
125
- tokenize( "module Foo\n" )
119
+ tokenize "module Foo\n"
126
120
  assert_next_token :keyword, "module "
127
121
  assert_next_token :module, "Foo"
128
122
  assert_next_token :normal, "\n"
129
123
  end
130
124
 
131
125
  def test_module_semicolon
132
- tokenize( "module Foo;" )
126
+ tokenize "module Foo;"
133
127
  assert_next_token :keyword, "module "
134
128
  assert_next_token :module, "Foo"
135
129
  assert_next_token :punct, ";"
136
130
  end
137
131
 
138
132
  def test_module_other
139
- tokenize( "module Foo!\n" )
133
+ tokenize "module Foo!\n"
140
134
  assert_next_token :keyword, "module "
141
135
  assert_next_token :module, "Foo!"
142
136
  end
143
137
 
144
138
  def test_scope_operator
145
- tokenize( "Foo::Bar" )
139
+ tokenize "Foo::Bar"
146
140
  assert_next_token :constant, "Foo"
147
141
  assert_next_token :punct, "::"
148
142
  assert_next_token :constant, "Bar"
149
143
  end
150
144
 
151
145
  def test_symbol_dquote
152
- tokenize( ':"foo"' )
146
+ tokenize ':"foo"'
153
147
  assert_next_token :symbol, ':"'
154
148
  assert_next_token :symbol, '', :region_open
155
149
  assert_next_token :symbol, 'foo'
@@ -159,7 +153,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
159
153
  end
160
154
 
161
155
  def test_symbol_squote
162
- tokenize( ":'foo'" )
156
+ tokenize ":'foo'"
163
157
  assert_next_token :symbol, ":'"
164
158
  assert_next_token :symbol, "", :region_open
165
159
  assert_next_token :symbol, "foo"
@@ -169,43 +163,56 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
169
163
  end
170
164
 
171
165
  def test_symbol
172
- tokenize( ":foo_bar?" )
173
- assert_next_token :symbol, ":foo_bar?"
166
+ tokenize ":foo_123"
167
+ assert_next_token :symbol, ":foo_123"
168
+
169
+ tokenize ":123"
170
+ assert_next_token :punct, ":"
171
+ assert_next_token :number, "123"
172
+
173
+ tokenize ":foo="
174
+ assert_next_token :symbol, ":foo="
175
+
176
+ tokenize ":foo!"
177
+ assert_next_token :symbol, ":foo!"
178
+
179
+ tokenize ":foo?"
180
+ assert_next_token :symbol, ":foo?"
174
181
  end
175
182
 
176
183
  def test_char
177
- tokenize( "?." )
184
+ tokenize "?."
178
185
  assert_next_token :char, "?."
179
186
 
180
- tokenize( '?\n' )
187
+ tokenize '?\n'
181
188
  assert_next_token :char, '?\n'
182
189
  end
183
190
 
184
191
  def test_specials
185
192
  %w{__FILE__ __LINE__ true false nil self}.each do |word|
186
- tokenize( word )
193
+ tokenize word
187
194
  assert_next_token :constant, word
188
195
  end
189
196
 
190
197
  %w{__FILE__ __LINE__ true false nil self}.each do |word|
191
- tokenize( "#{word}?" )
198
+ tokenize "#{word}?"
192
199
  assert_next_token :ident, "#{word}?"
193
200
  end
194
201
 
195
202
  %w{__FILE__ __LINE__ true false nil self}.each do |word|
196
- tokenize( "#{word}!" )
203
+ tokenize "#{word}!"
197
204
  assert_next_token :ident, "#{word}!"
198
205
  end
199
206
 
200
207
  %w{__FILE__ __LINE__ true false nil self}.each do |word|
201
- tokenize( "x.#{word}" )
208
+ tokenize "x.#{word}"
202
209
  skip_token 2
203
210
  assert_next_token :ident, word
204
211
  end
205
212
  end
206
213
 
207
214
  def test_pct_r
208
- tokenize( '%r{foo#{x}bar}' )
215
+ tokenize '%r{foo#{x}bar}'
209
216
  assert_next_token :punct, "%r{"
210
217
  assert_next_token :regex, "", :region_open
211
218
  assert_next_token :regex, "foo"
@@ -214,7 +221,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
214
221
  assert_next_token :regex, "", :region_close
215
222
  assert_next_token :punct, "}"
216
223
 
217
- tokenize( '%r-foo#{x}bar-' )
224
+ tokenize '%r-foo#{x}bar-'
218
225
  assert_next_token :punct, "%r-"
219
226
  assert_next_token :regex, "", :region_open
220
227
  assert_next_token :regex, "foo"
@@ -238,7 +245,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
238
245
  end
239
246
 
240
247
  def test_pct_w_brace
241
- tokenize( '%w{foo bar baz}' )
248
+ tokenize '%w{foo bar baz}'
242
249
  assert_next_token :punct, "%w{"
243
250
  assert_next_token :string, '', :region_open
244
251
  assert_next_token :string, 'foo bar baz'
@@ -247,7 +254,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
247
254
  end
248
255
 
249
256
  def test_pct_w
250
- tokenize( '%w-foo#{x} bar baz-' )
257
+ tokenize '%w-foo#{x} bar baz-'
251
258
  assert_next_token :punct, "%w-"
252
259
  assert_next_token :string, '', :region_open
253
260
  assert_next_token :string, 'foo#{x} bar baz'
@@ -256,7 +263,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
256
263
  end
257
264
 
258
265
  def test_pct_q
259
- tokenize( '%q-hello #{world}-' )
266
+ tokenize '%q-hello #{world}-'
260
267
  assert_next_token :punct, "%q-"
261
268
  assert_next_token :string, '', :region_open
262
269
  assert_next_token :string, 'hello #{world}'
@@ -265,7 +272,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
265
272
  end
266
273
 
267
274
  def test_pct_s
268
- tokenize( '%s-hello #{world}-' )
275
+ tokenize '%s-hello #{world}-'
269
276
  assert_next_token :punct, "%s-"
270
277
  assert_next_token :symbol, '', :region_open
271
278
  assert_next_token :symbol, 'hello #{world}'
@@ -274,7 +281,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
274
281
  end
275
282
 
276
283
  def test_pct_W
277
- tokenize( '%W-foo#{x} bar baz-' )
284
+ tokenize '%W-foo#{x} bar baz-'
278
285
  assert_next_token :punct, "%W-"
279
286
  assert_next_token :string, '', :region_open
280
287
  assert_next_token :string, 'foo'
@@ -285,7 +292,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
285
292
  end
286
293
 
287
294
  def test_pct_Q
288
- tokenize( '%Q-hello #{world}-' )
295
+ tokenize '%Q-hello #{world}-'
289
296
  assert_next_token :punct, "%Q-"
290
297
  assert_next_token :string, '', :region_open
291
298
  assert_next_token :string, 'hello '
@@ -295,7 +302,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
295
302
  end
296
303
 
297
304
  def test_pct_x
298
- tokenize( '%x-ls /blah/#{foo}-' )
305
+ tokenize '%x-ls /blah/#{foo}-'
299
306
  assert_next_token :punct, "%x-"
300
307
  assert_next_token :string, '', :region_open
301
308
  assert_next_token :string, 'ls /blah/'
@@ -305,7 +312,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
305
312
  end
306
313
 
307
314
  def test_pct_string
308
- tokenize( '%-hello #{world}-' )
315
+ tokenize '%-hello #{world}-'
309
316
  assert_next_token :punct, "%-"
310
317
  assert_next_token :string, '', :region_open
311
318
  assert_next_token :string, 'hello '
@@ -315,7 +322,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
315
322
  end
316
323
 
317
324
  def test_bad_pct_string
318
- tokenize( '%0hello #{world}0' )
325
+ tokenize '%0hello #{world}0'
319
326
  assert_next_token :punct, "%"
320
327
  assert_next_token :number, '0'
321
328
  assert_next_token :ident, 'hello'
@@ -324,7 +331,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
324
331
  end
325
332
 
326
333
  def test_shift_left
327
- tokenize( 'foo << 5' )
334
+ tokenize 'foo << 5'
328
335
  assert_next_token :ident, "foo"
329
336
  assert_next_token :normal, " "
330
337
  assert_next_token :punct, "<<"
@@ -333,14 +340,14 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
333
340
  end
334
341
 
335
342
  def test_shift_left_no_white
336
- tokenize( 'foo<<5' )
343
+ tokenize 'foo<<5'
337
344
  assert_next_token :ident, "foo"
338
345
  assert_next_token :punct, "<<"
339
346
  assert_next_token :number, "5"
340
347
  end
341
348
 
342
349
  def test_here_doc_no_opts
343
- tokenize( "foo <<EOF\n foo\n bar\n baz\nEOF" )
350
+ tokenize "foo <<EOF\n foo\n bar\n baz\nEOF"
344
351
  assert_next_token :ident, "foo"
345
352
  assert_next_token :normal, " "
346
353
  assert_next_token :punct, "<<"
@@ -352,7 +359,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
352
359
  end
353
360
 
354
361
  def test_here_doc_no_opts_missing_end
355
- tokenize( "foo <<EOF\n foo\n bar\n baz\n EOF" )
362
+ tokenize "foo <<EOF\n foo\n bar\n baz\n EOF"
356
363
  assert_next_token :ident, "foo"
357
364
  assert_next_token :normal, " "
358
365
  assert_next_token :punct, "<<"
@@ -363,7 +370,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
363
370
  end
364
371
 
365
372
  def test_here_doc_float_right
366
- tokenize( "foo <<-EOF\n foo\n bar\n baz\n EOF" )
373
+ tokenize "foo <<-EOF\n foo\n bar\n baz\n EOF"
367
374
  assert_next_token :ident, "foo"
368
375
  assert_next_token :normal, " "
369
376
  assert_next_token :punct, "<<-"
@@ -375,7 +382,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
375
382
  end
376
383
 
377
384
  def test_here_doc_single_quotes
378
- tokenize( "foo <<'EOF'\n foo\#{x}\n bar\n baz\nEOF" )
385
+ tokenize "foo <<'EOF'\n foo\#{x}\n bar\n baz\nEOF"
379
386
  assert_next_token :ident, "foo"
380
387
  assert_next_token :normal, " "
381
388
  assert_next_token :punct, "<<'"
@@ -388,7 +395,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
388
395
  end
389
396
 
390
397
  def test_here_doc_double_quotes
391
- tokenize( "foo <<\"EOF\"\n foo\#{x}\n bar\n baz\nEOF" )
398
+ tokenize "foo <<\"EOF\"\n foo\#{x}\n bar\n baz\nEOF"
392
399
  assert_next_token :ident, "foo"
393
400
  assert_next_token :normal, " "
394
401
  assert_next_token :punct, "<<\""
@@ -403,12 +410,12 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
403
410
  end
404
411
 
405
412
  def test_space
406
- tokenize( "\n \t\t\n\n\r\n" )
413
+ tokenize "\n \t\t\n\n\r\n"
407
414
  assert_next_token :normal, "\n \t\t\n\n\r\n"
408
415
  end
409
416
 
410
417
  def test_number
411
- tokenize( "1 1.0 1e5 1.0e5 1_2.5 1_2.5_2 1_2.5_2e3_2" )
418
+ tokenize "1 1.0 1e5 1.0e5 1_2.5 1_2.5_2 1_2.5_2e3_2"
412
419
  assert_next_token :number, "1"
413
420
  skip_token
414
421
  assert_next_token :number, "1.0"
@@ -425,7 +432,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
425
432
  end
426
433
 
427
434
  def test_dquoted_string
428
- tokenize( '"foo #{x} bar\"\n\tbaz\xA5b\5\1234"' )
435
+ tokenize '"foo #{x} bar\"\n\tbaz\xA5b\5\1234"'
429
436
  assert_next_token :punct, '"'
430
437
  assert_next_token :string, '', :region_open
431
438
  assert_next_token :string, 'foo '
@@ -442,7 +449,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
442
449
  end
443
450
 
444
451
  def test_squoted_string
445
- tokenize( '\'foo #{x} bar\\\'\n\tbaz\\\\\xA5b\5\1234\'' )
452
+ tokenize '\'foo #{x} bar\\\'\n\tbaz\\\\\xA5b\5\1234\''
446
453
  assert_next_token :punct, "'"
447
454
  assert_next_token :string, "", :region_open
448
455
  assert_next_token :string, 'foo #{x} bar'
@@ -455,51 +462,51 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
455
462
  end
456
463
 
457
464
  def test_dot_selector
458
- tokenize( 'foo.nil' )
465
+ tokenize 'foo.nil'
459
466
  skip_token
460
467
  assert_next_token :punct, "."
461
468
  assert_next_token :ident, "nil"
462
469
  end
463
470
 
464
471
  def test_dot_range_inclusive
465
- tokenize( 'foo..nil' )
472
+ tokenize 'foo..nil'
466
473
  skip_token
467
474
  assert_next_token :punct, ".."
468
475
  assert_next_token :constant, "nil"
469
476
  end
470
477
 
471
478
  def test_dot_range_exclusive
472
- tokenize( 'foo...nil' )
479
+ tokenize 'foo...nil'
473
480
  skip_token
474
481
  assert_next_token :punct, "..."
475
482
  assert_next_token :constant, "nil"
476
483
  end
477
484
 
478
485
  def test_dot_range_many
479
- tokenize( 'foo.....nil' )
486
+ tokenize 'foo.....nil'
480
487
  skip_token
481
488
  assert_next_token :punct, "....."
482
489
  assert_next_token :constant, "nil"
483
490
  end
484
491
 
485
492
  def test_attribute
486
- tokenize( '@var_foo' )
493
+ tokenize '@var_foo'
487
494
  assert_next_token :attribute, "@var_foo"
488
495
  end
489
496
 
490
497
  def test_global
491
- tokenize( '$var_foo' )
498
+ tokenize '$var_foo'
492
499
  assert_next_token :global, "$var_foo"
493
- tokenize( '$12' )
500
+ tokenize '$12'
494
501
  assert_next_token :global, "$12"
495
- tokenize( '$/f' )
502
+ tokenize '$/f'
496
503
  assert_next_token :global, "$/"
497
- tokenize( "$\n" )
504
+ tokenize "$\n"
498
505
  assert_next_token :global, "$"
499
506
  end
500
507
 
501
508
  def test_paren_delimiter
502
- tokenize( '%w(a)' )
509
+ tokenize '%w(a)'
503
510
  assert_next_token :punct, "%w("
504
511
  assert_next_token :string, "", :region_open
505
512
  assert_next_token :string, "a"
@@ -508,7 +515,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
508
515
  end
509
516
 
510
517
  def test_division
511
- tokenize( 'm / 3' )
518
+ tokenize 'm / 3'
512
519
  assert_next_token :ident, "m"
513
520
  assert_next_token :normal, " "
514
521
  assert_next_token :punct, "/"
@@ -517,7 +524,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
517
524
  end
518
525
 
519
526
  def test_regex
520
- tokenize( 'm =~ /3/' )
527
+ tokenize 'm =~ /3/'
521
528
  assert_next_token :ident, "m"
522
529
  assert_next_token :normal, " "
523
530
  assert_next_token :punct, "=~"
@@ -530,7 +537,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
530
537
  end
531
538
 
532
539
  def test_heredoc_with_trailing_text
533
- tokenize( "foo('here', <<EOF)\n A heredoc.\nEOF\nfoo" )
540
+ tokenize "foo('here', <<EOF)\n A heredoc.\nEOF\nfoo"
534
541
  assert_next_token :ident, "foo"
535
542
  assert_next_token :punct, "('"
536
543
  assert_next_token :string, '', :region_open
@@ -550,7 +557,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
550
557
  end
551
558
 
552
559
  def test_multiple_heredocs
553
- tokenize( <<'TEST' )
560
+ tokenize <<'TEST'
554
561
  foo('here', <<EOF, 'there', <<-'FOO', 'blah')
555
562
  First heredoc, right here.
556
563
  Expressions are #{allowed}
@@ -598,7 +605,7 @@ TEST
598
605
  end
599
606
 
600
607
  def test_carldr_bad_heredoc_001
601
- tokenize( <<'TEST' )
608
+ tokenize <<'TEST'
602
609
  str = <<END
603
610
  here document #{1 + 1}
604
611
  END
@@ -663,4 +670,202 @@ TEST
663
670
  assert_next_token :punct, ")/"
664
671
  assert_next_token :number, "2"
665
672
  end
673
+
674
+ def test_heredoc_with_CRNL
675
+ tokenize "foo <<SRC\r\nSome text\r\nSRC\r\nfoo"
676
+ assert_next_token :ident, "foo"
677
+ assert_next_token :normal, " "
678
+ assert_next_token :punct, "<<"
679
+ assert_next_token :constant, "SRC"
680
+ assert_next_token :string, "", :region_open
681
+ assert_next_token :string, "\r\nSome text\r\n"
682
+ assert_next_token :string, "", :region_close
683
+ assert_next_token :constant, "SRC"
684
+ assert_next_token :normal, "\r\n"
685
+ assert_next_token :ident, "foo"
686
+ end
687
+
688
+ def test_question_mark_at_newline
689
+ tokenize "foo ?\n 'bar': 'baz'"
690
+ assert_next_token :ident, "foo"
691
+ assert_next_token :normal, " "
692
+ assert_next_token :punct, "?"
693
+ assert_next_token :normal, "\n "
694
+ assert_next_token :punct, "'"
695
+ assert_next_token :string, "", :region_open
696
+ assert_next_token :string, "bar"
697
+ assert_next_token :string, "", :region_close
698
+ assert_next_token :punct, "':"
699
+ assert_next_token :normal, " "
700
+ assert_next_token :punct, "'"
701
+ assert_next_token :string, "", :region_open
702
+ assert_next_token :string, "baz"
703
+ assert_next_token :string, "", :region_close
704
+ assert_next_token :punct, "'"
705
+ end
706
+
707
+ def test_question_mark_and_escaped_newline
708
+ tokenize "foo ?\\\n 'bar': 'baz'"
709
+ assert_next_token :ident, "foo"
710
+ assert_next_token :normal, " "
711
+ assert_next_token :punct, "?\\"
712
+ assert_next_token :normal, "\n "
713
+ assert_next_token :punct, "'"
714
+ assert_next_token :string, "", :region_open
715
+ assert_next_token :string, "bar"
716
+ assert_next_token :string, "", :region_close
717
+ assert_next_token :punct, "':"
718
+ assert_next_token :normal, " "
719
+ assert_next_token :punct, "'"
720
+ assert_next_token :string, "", :region_open
721
+ assert_next_token :string, "baz"
722
+ assert_next_token :string, "", :region_close
723
+ assert_next_token :punct, "'"
724
+ end
725
+
726
+ def test_highlighted_subexpression
727
+ tokenizer.set :expressions => :highlight
728
+ tokenize '"la la #{["hello", "world"].each { |f| puts "string #{f}" }}"'
729
+ assert_next_token :punct, '"'
730
+ assert_next_token :string, "", :region_open
731
+ assert_next_token :string, "la la "
732
+ assert_next_token :expr, "", :region_open
733
+ assert_next_token :expr, '#{'
734
+ assert_next_token :punct, '["'
735
+ assert_next_token :string, "", :region_open
736
+ assert_next_token :string, 'hello'
737
+ assert_next_token :string, "", :region_close
738
+ assert_next_token :punct, '",'
739
+ assert_next_token :normal, ' '
740
+ assert_next_token :punct, '"'
741
+ assert_next_token :string, "", :region_open
742
+ assert_next_token :string, "world"
743
+ assert_next_token :string, "", :region_close
744
+ assert_next_token :punct, '"].'
745
+ assert_next_token :ident, 'each'
746
+ assert_next_token :normal, ' '
747
+ assert_next_token :punct, '{'
748
+ assert_next_token :normal, ' '
749
+ assert_next_token :punct, '|'
750
+ assert_next_token :ident, 'f'
751
+ assert_next_token :punct, '|'
752
+ assert_next_token :normal, ' '
753
+ assert_next_token :ident, 'puts'
754
+ assert_next_token :normal, ' '
755
+ assert_next_token :punct, '"'
756
+ assert_next_token :string, "", :region_open
757
+ assert_next_token :string, "string "
758
+ assert_next_token :expr, "", :region_open
759
+ assert_next_token :expr, '#{'
760
+ assert_next_token :ident, 'f'
761
+ assert_next_token :expr, '}'
762
+ assert_next_token :expr, "", :region_close
763
+ assert_next_token :string, "", :region_close
764
+ assert_next_token :punct, '"'
765
+ assert_next_token :normal, ' '
766
+ assert_next_token :punct, '}'
767
+ assert_next_token :expr, '}'
768
+ assert_next_token :expr, "", :region_close
769
+ assert_next_token :string, "", :region_close
770
+ assert_next_token :punct, '"'
771
+ end
772
+
773
+ def test_expr_in_braces
774
+ tokenize '"#{f}"'
775
+ assert_next_token :punct, '"'
776
+ assert_next_token :string, "", :region_open
777
+ assert_next_token :expr, '#{f}'
778
+ assert_next_token :string, "", :region_close
779
+ assert_next_token :punct, '"'
780
+ end
781
+
782
+ def test_expr_in_braces_with_nested_braces
783
+ tokenize '"#{loop{break}}"'
784
+ assert_next_token :punct, '"'
785
+ assert_next_token :string, "", :region_open
786
+ assert_next_token :expr, '#{loop{break}}'
787
+ assert_next_token :string, "", :region_close
788
+ assert_next_token :punct, '"'
789
+ end
790
+
791
+ def test_expr_with_global_var
792
+ tokenize '"#$f"'
793
+ assert_next_token :punct, '"'
794
+ assert_next_token :string, "", :region_open
795
+ assert_next_token :expr, '#$f'
796
+ assert_next_token :string, "", :region_close
797
+ assert_next_token :punct, '"'
798
+ end
799
+
800
+ def test_expr_with_instance_var
801
+ tokenize '"#@f"'
802
+ assert_next_token :punct, '"'
803
+ assert_next_token :string, "", :region_open
804
+ assert_next_token :expr, '#@f'
805
+ assert_next_token :string, "", :region_close
806
+ assert_next_token :punct, '"'
807
+ end
808
+
809
+ def test_expr_with_class_var
810
+ tokenize '"#@@f"'
811
+ assert_next_token :punct, '"'
812
+ assert_next_token :string, "", :region_open
813
+ assert_next_token :expr, '#@@f'
814
+ assert_next_token :string, "", :region_close
815
+ assert_next_token :punct, '"'
816
+ end
817
+
818
+ def test_qmark_space
819
+ tokenize "? "
820
+ assert_next_token :punct, "?"
821
+ assert_next_token :normal, " "
822
+ end
823
+
824
+ def test_capitalized_method
825
+ tokenize "obj.Foo"
826
+ skip_token 2
827
+ assert_next_token :ident, "Foo"
828
+ end
829
+
830
+ def test_hexadecimal_literal
831
+ tokenize "0xDEADbeef 0X1234567890ABCDEFG"
832
+ assert_next_token :number, "0xDEADbeef"
833
+ skip_token
834
+ assert_next_token :number, "0X1234567890ABCDEF"
835
+ assert_next_token :constant, "G"
836
+ end
837
+
838
+ def test_binary_literal
839
+ tokenize "0b2 0b0 0b101 0B123"
840
+ assert_next_token :number, "0"
841
+ assert_next_token :ident, "b2"
842
+ skip_token
843
+ assert_next_token :number, "0b0"
844
+ skip_token
845
+ assert_next_token :number, "0b101"
846
+ skip_token
847
+ assert_next_token :number, "0B123"
848
+ end
849
+
850
+ def test_octal_literal
851
+ tokenize "0o9 0o12345670abc 0O12345678"
852
+ assert_next_token :number, "0"
853
+ assert_next_token :ident, "o9"
854
+ skip_token
855
+ assert_next_token :number, "0o12345670"
856
+ assert_next_token :ident, "abc"
857
+ skip_token
858
+ assert_next_token :number, "0O12345678"
859
+ end
860
+
861
+ def test_decimal_literal
862
+ tokenize "0dA 0d1234567890abc 0D1234567890"
863
+ assert_next_token :number, "0"
864
+ assert_next_token :ident, "dA"
865
+ skip_token
866
+ assert_next_token :number, "0d1234567890"
867
+ assert_next_token :ident, "abc"
868
+ skip_token
869
+ assert_next_token :number, "0D1234567890"
870
+ end
666
871
  end
@@ -0,0 +1,40 @@
1
+ $:.unshift File.dirname(__FILE__) + "/../../lib"
2
+
3
+ require 'test/unit'
4
+ require 'syntax'
5
+
6
+ class TokenizerTestCase < Test::Unit::TestCase
7
+ def self.syntax( type )
8
+ class_eval <<-EOF
9
+ def setup
10
+ @tokenizer = Syntax.load(#{type.inspect})
11
+ end
12
+ EOF
13
+ end
14
+
15
+ def default_test
16
+ end
17
+
18
+ private
19
+
20
+ attr_reader :tokenizer
21
+
22
+ def tokenize( string )
23
+ @tokens = []
24
+ @tokenizer.tokenize( string ) { |tok| @tokens << tok }
25
+ end
26
+
27
+ def assert_next_token(group, lexeme, instruction=:none)
28
+ assert false, "no tokens in stack" if @tokens.nil? or @tokens.empty?
29
+ assert_equal [group, lexeme, instruction],
30
+ [@tokens.first.group, @tokens.first, @tokens.shift.instruction]
31
+ end
32
+
33
+ def assert_no_next_token
34
+ assert @tokens.empty?
35
+ end
36
+
37
+ def skip_token( n=1 )
38
+ n.times { @tokens.shift } unless @tokens.nil? || @tokens.empty?
39
+ end
40
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
- rubygems_version: 0.8.8
2
+ rubygems_version: 0.8.10
3
3
  specification_version: 1
4
4
  name: syntax
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.7.0
7
- date: 2005-03-23
6
+ version: 1.0.0
7
+ date: 2005-06-18
8
8
  summary: Syntax is Ruby library for performing simple syntax highlighting.
9
9
  require_paths:
10
10
  - lib
11
- email: jgb3@email.byu.edu
11
+ email: jamis@jamisbuck.org
12
12
  homepage:
13
13
  rubyforge_project:
14
14
  description:
@@ -47,6 +47,7 @@ files:
47
47
  - test/syntax/tc_ruby.rb
48
48
  - test/syntax/tc_xml.rb
49
49
  - test/syntax/tc_yaml.rb
50
+ - test/syntax/tokenizer_testcase.rb
50
51
  test_files:
51
52
  - test/ALL-TESTS.rb
52
53
  rdoc_options: []