syntax 0.7.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -78,8 +78,22 @@ module Syntax
78
78
  finish
79
79
  end
80
80
 
81
+ # Specify a set of tokenizer-specific options. Each tokenizer may (or may
82
+ # not) publish any options, but if a tokenizer does those options may be
83
+ # used to specify optional behavior.
84
+ def set( opts={} )
85
+ ( @options ||= Hash.new ).update opts
86
+ end
87
+
88
+ # Get the value of the specified option.
89
+ def option(opt)
90
+ @options ? @options[opt] : nil
91
+ end
92
+
81
93
  private
82
94
 
95
+ EOL = /(?=\r\n?|\n|$)/
96
+
83
97
  # A convenience for delegating method calls to the scanner.
84
98
  def self.delegate( sym )
85
99
  define_method( sym ) { |*a| @text.__send__( sym, *a ) }
@@ -137,6 +151,13 @@ module Syntax
137
151
  @chunk = ""
138
152
  end
139
153
 
154
+ def subtokenize( syntax, text )
155
+ tokenizer = Syntax.load( syntax )
156
+ tokenizer.set @options if @options
157
+ flush_chunk
158
+ tokenizer.tokenize( text, &@callback )
159
+ end
160
+
140
161
  end
141
162
 
142
163
  end
@@ -7,6 +7,9 @@ module Syntax
7
7
  # convenience methods to provide a common interface for all convertors.
8
8
  class Abstract
9
9
 
10
+ # A reference to the tokenizer used by this convertor.
11
+ attr_reader :tokenizer
12
+
10
13
  # A convenience method for instantiating a new convertor for a
11
14
  # specific syntax.
12
15
  def self.for_syntax( syntax )
@@ -25,20 +25,20 @@ module Syntax
25
25
  def step
26
26
  case
27
27
  when bol? && check( /=begin/ )
28
- start_group( :comment, scan_until( /^=end$/ ) )
29
- when bol? && check( /__END__$/ )
28
+ start_group( :comment, scan_until( /^=end#{EOL}/ ) )
29
+ when bol? && check( /__END__#{EOL}/ )
30
30
  start_group( :comment, scan_until( /\Z/ ) )
31
31
  else
32
32
  case
33
33
  when check( /def\s+/ )
34
34
  start_group :keyword, scan( /def\s+/ )
35
- start_group :method, scan_until( /$|(?=[;(\s])/ )
35
+ start_group :method, scan_until( /(?=[;(\s]|#{EOL})/ )
36
36
  when check( /class\s+/ )
37
37
  start_group :keyword, scan( /class\s+/ )
38
- start_group :class, scan_until( /$|(?=[;\s<])/ )
38
+ start_group :class, scan_until( /(?=[;\s<]|#{EOL})/ )
39
39
  when check( /module\s+/ )
40
40
  start_group :keyword, scan( /module\s+/ )
41
- start_group :module, scan_until( /$|(?=[;\s])/ )
41
+ start_group :module, scan_until( /(?=[;\s]|#{EOL})/ )
42
42
  when check( /::/ )
43
43
  start_group :punct, scan(/::/)
44
44
  when check( /:"/ )
@@ -49,11 +49,11 @@ module Syntax
49
49
  start_group :symbol, scan(/:/)
50
50
  scan_delimited_region :symbol, :symbol, "", false
51
51
  @allow_operator = true
52
- when check( /:\w/ )
53
- start_group :symbol, scan(/:\w+[!?]?/)
52
+ when scan( /:[_a-zA-Z@$][$@\w]*[=!?]?/ )
53
+ start_group :symbol, matched
54
54
  @allow_operator = true
55
- when check( /\?\\?./ )
56
- start_group :char, scan(/\?\\?./)
55
+ when scan( /\?(\\[^\n\r]|[^\\\n\r\s])/ )
56
+ start_group :char, matched
57
57
  @allow_operator = true
58
58
  when check( /(__FILE__|__LINE__|true|false|nil|self)[?!]?/ )
59
59
  if @selector || matched[-1] == ?? || matched[-1] == ?!
@@ -65,6 +65,9 @@ module Syntax
65
65
  end
66
66
  @selector = false
67
67
  @allow_operator = true
68
+ when scan(/0([bB][01]+|[oO][0-7]+|[dD][0-9]+|[xX][0-9a-fA-F]+)/)
69
+ start_group :number, matched
70
+ @allow_operator = true
68
71
  else
69
72
  case peek(2)
70
73
  when "%r"
@@ -120,7 +123,7 @@ module Syntax
120
123
  when "#"
121
124
  start_group :comment, scan( /#[^\n\r]*/ )
122
125
  when /[A-Z]/
123
- start_group :constant, scan( /\w+/ )
126
+ start_group @selector ? :ident : :constant, scan( /\w+/ )
124
127
  @allow_operator = true
125
128
  when /[a-z_]/
126
129
  word = scan( /\w+[?!]?/ )
@@ -218,11 +221,11 @@ module Syntax
218
221
  if heredoc
219
222
  items << "(^"
220
223
  items << '\s*' if heredoc == :float
221
- items << "#{Regexp.escape(delim)}\s*)$"
224
+ items << "#{Regexp.escape(delim)}\s*?)#{EOL}"
222
225
  else
223
226
  items << "#{Regexp.escape(delim)}"
224
227
  end
225
- items << "|#(\\$|@|\\{)" if exprs
228
+ items << "|#(\\$|@@?|\\{)" if exprs
226
229
  items = Regexp.new( items )
227
230
 
228
231
  loop do
@@ -263,25 +266,35 @@ module Syntax
263
266
  start_group delim_group, matched
264
267
  break
265
268
  when /^#/
269
+ do_highlight = (option(:expressions) == :highlight)
270
+ start_region :expr if do_highlight
266
271
  start_group :expr, matched
267
272
  case matched[1]
268
273
  when ?{
269
274
  depth = 1
275
+ content = ""
270
276
  while depth > 0
271
277
  p = pos
272
278
  c = scan_until( /[\{}]/ )
273
279
  if c.nil?
274
- append scan_until( /\Z/ )
280
+ content << scan_until( /\Z/ )
275
281
  break
276
282
  else
277
283
  depth += ( matched == "{" ? 1 : -1 )
278
- append pre_match[p..-1]
279
- append matched
284
+ content << pre_match[p..-1]
285
+ content << matched if depth > 0
280
286
  end
281
287
  end
288
+ if do_highlight
289
+ subtokenize "ruby", content
290
+ start_group :expr, "}"
291
+ else
292
+ append content + "}"
293
+ end
282
294
  when ?$, ?@
283
295
  append scan( /\w+/ )
284
296
  end
297
+ end_region :expr if do_highlight
285
298
  else raise "unexpected match on #{matched}"
286
299
  end
287
300
  end
@@ -1,7 +1,7 @@
1
1
  module Syntax
2
2
  module Version
3
- MAJOR=0
4
- MINOR=7
3
+ MAJOR=1
4
+ MINOR=0
5
5
  TINY=0
6
6
 
7
7
  STRING=[MAJOR,MINOR,TINY].join('.')
@@ -1,81 +1,69 @@
1
- $:.unshift File.dirname(__FILE__) +"/../../lib"
1
+ require File.dirname(__FILE__) + "/tokenizer_testcase"
2
2
 
3
- require 'test/unit'
4
- require 'syntax/lang/ruby'
3
+ class TC_Syntax_Ruby < TokenizerTestCase
5
4
 
6
- class TC_Syntax_Ruby < Test::Unit::TestCase
7
-
8
- def tokenize( string )
9
- @tokens = []
10
- @ruby.tokenize( string ) { |tok| @tokens << tok }
11
- end
12
-
13
- def assert_next_token(group, lexeme, instruction=:none)
14
- assert false, "no tokens in stack" if @tokens.nil? or @tokens.empty?
15
- assert_equal [group, lexeme, instruction],
16
- [@tokens.first.group, @tokens.first, @tokens.shift.instruction]
17
- end
18
-
19
- def assert_no_next_token
20
- assert @tokens.empty?
21
- end
22
-
23
- def skip_token( n=1 )
24
- n.times { @tokens.shift } unless @tokens.nil? || @tokens.empty?
25
- end
26
-
27
- def setup
28
- @ruby = Syntax::Ruby.new
29
- end
5
+ syntax "ruby"
30
6
 
31
7
  def test_empty
32
- tokenize( "" )
8
+ tokenize ""
33
9
  assert_no_next_token
34
10
  end
35
11
 
36
12
  def test_constant
37
- tokenize( "Foo" )
13
+ tokenize "Foo"
38
14
  assert_next_token :constant, "Foo"
39
15
  end
40
16
 
41
17
  def test_ident
42
- tokenize( "foo" )
18
+ tokenize "foo"
43
19
  assert_next_token :ident, "foo"
44
20
  end
45
21
 
46
22
  def test_comment_eol
47
- tokenize( "# a comment\nfoo" )
23
+ tokenize "# a comment\nfoo"
48
24
  assert_next_token :comment, "# a comment"
49
25
  assert_next_token :normal, "\n"
50
26
  assert_next_token :ident, "foo"
51
27
  end
52
28
 
53
29
  def test_comment_block
54
- tokenize( "=begin\nthis is a comment\n=end\nnoncomment" )
30
+ tokenize "=begin\nthis is a comment\n=end\nnoncomment"
55
31
  assert_next_token :comment, "=begin\nthis is a comment\n=end"
56
32
  assert_next_token :normal, "\n"
57
33
  assert_next_token :ident, "noncomment"
58
34
  end
59
35
 
36
+ def test_comment_block_with_CRNL
37
+ tokenize "=begin\r\nthis is a comment\r\n=end\r\nnoncomment"
38
+ assert_next_token :comment, "=begin\r\nthis is a comment\r\n=end"
39
+ assert_next_token :normal, "\r\n"
40
+ assert_next_token :ident, "noncomment"
41
+ end
42
+
60
43
  def test_keyword
61
44
  Syntax::Ruby::KEYWORDS.each do |word|
62
- tokenize( word )
45
+ tokenize word
63
46
  assert_next_token :keyword, word
64
47
  end
65
48
  Syntax::Ruby::KEYWORDS.each do |word|
66
- tokenize( "foo.#{word}" )
49
+ tokenize "foo.#{word}"
67
50
  skip_token 2
68
51
  assert_next_token :ident, word
69
52
  end
70
53
  end
71
54
 
72
55
  def test__END__
73
- tokenize( "__END__\n\nblah blah blah" )
56
+ tokenize "__END__\n\nblah blah blah"
74
57
  assert_next_token :comment, "__END__\n\nblah blah blah"
75
58
  end
76
59
 
60
+ def test__END__with_CRNL
61
+ tokenize "__END__\r\nblah blah blah"
62
+ assert_next_token :comment, "__END__\r\nblah blah blah"
63
+ end
64
+
77
65
  def test_def_paren
78
- tokenize( "def foo(bar)" )
66
+ tokenize "def foo(bar)"
79
67
  assert_next_token :keyword, "def "
80
68
  assert_next_token :method, "foo"
81
69
  assert_next_token :punct, "("
@@ -84,7 +72,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
84
72
  end
85
73
 
86
74
  def test_def_space
87
- tokenize( "def foo bar" )
75
+ tokenize "def foo bar"
88
76
  assert_next_token :keyword, "def "
89
77
  assert_next_token :method, "foo"
90
78
  assert_next_token :normal, " "
@@ -92,28 +80,34 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
92
80
  end
93
81
 
94
82
  def test_def_semicolon
95
- tokenize( "def foo;" )
83
+ tokenize "def foo;"
96
84
  assert_next_token :keyword, "def "
97
85
  assert_next_token :method, "foo"
98
86
  assert_next_token :punct, ";"
99
87
  end
100
88
 
89
+ def test_def_eol
90
+ tokenize "def foo"
91
+ assert_next_token :keyword, "def "
92
+ assert_next_token :method, "foo"
93
+ end
94
+
101
95
  def test_class_space
102
- tokenize( "class Foo\n" )
96
+ tokenize "class Foo\n"
103
97
  assert_next_token :keyword, "class "
104
98
  assert_next_token :class, "Foo"
105
99
  assert_next_token :normal, "\n"
106
100
  end
107
101
 
108
102
  def test_class_semicolon
109
- tokenize( "class Foo;" )
103
+ tokenize "class Foo;"
110
104
  assert_next_token :keyword, "class "
111
105
  assert_next_token :class, "Foo"
112
106
  assert_next_token :punct, ";"
113
107
  end
114
108
 
115
109
  def test_class_extend
116
- tokenize( "class Foo< Bang" )
110
+ tokenize "class Foo< Bang"
117
111
  assert_next_token :keyword, "class "
118
112
  assert_next_token :class, "Foo"
119
113
  assert_next_token :punct, "<"
@@ -122,34 +116,34 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
122
116
  end
123
117
 
124
118
  def test_module_space
125
- tokenize( "module Foo\n" )
119
+ tokenize "module Foo\n"
126
120
  assert_next_token :keyword, "module "
127
121
  assert_next_token :module, "Foo"
128
122
  assert_next_token :normal, "\n"
129
123
  end
130
124
 
131
125
  def test_module_semicolon
132
- tokenize( "module Foo;" )
126
+ tokenize "module Foo;"
133
127
  assert_next_token :keyword, "module "
134
128
  assert_next_token :module, "Foo"
135
129
  assert_next_token :punct, ";"
136
130
  end
137
131
 
138
132
  def test_module_other
139
- tokenize( "module Foo!\n" )
133
+ tokenize "module Foo!\n"
140
134
  assert_next_token :keyword, "module "
141
135
  assert_next_token :module, "Foo!"
142
136
  end
143
137
 
144
138
  def test_scope_operator
145
- tokenize( "Foo::Bar" )
139
+ tokenize "Foo::Bar"
146
140
  assert_next_token :constant, "Foo"
147
141
  assert_next_token :punct, "::"
148
142
  assert_next_token :constant, "Bar"
149
143
  end
150
144
 
151
145
  def test_symbol_dquote
152
- tokenize( ':"foo"' )
146
+ tokenize ':"foo"'
153
147
  assert_next_token :symbol, ':"'
154
148
  assert_next_token :symbol, '', :region_open
155
149
  assert_next_token :symbol, 'foo'
@@ -159,7 +153,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
159
153
  end
160
154
 
161
155
  def test_symbol_squote
162
- tokenize( ":'foo'" )
156
+ tokenize ":'foo'"
163
157
  assert_next_token :symbol, ":'"
164
158
  assert_next_token :symbol, "", :region_open
165
159
  assert_next_token :symbol, "foo"
@@ -169,43 +163,56 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
169
163
  end
170
164
 
171
165
  def test_symbol
172
- tokenize( ":foo_bar?" )
173
- assert_next_token :symbol, ":foo_bar?"
166
+ tokenize ":foo_123"
167
+ assert_next_token :symbol, ":foo_123"
168
+
169
+ tokenize ":123"
170
+ assert_next_token :punct, ":"
171
+ assert_next_token :number, "123"
172
+
173
+ tokenize ":foo="
174
+ assert_next_token :symbol, ":foo="
175
+
176
+ tokenize ":foo!"
177
+ assert_next_token :symbol, ":foo!"
178
+
179
+ tokenize ":foo?"
180
+ assert_next_token :symbol, ":foo?"
174
181
  end
175
182
 
176
183
  def test_char
177
- tokenize( "?." )
184
+ tokenize "?."
178
185
  assert_next_token :char, "?."
179
186
 
180
- tokenize( '?\n' )
187
+ tokenize '?\n'
181
188
  assert_next_token :char, '?\n'
182
189
  end
183
190
 
184
191
  def test_specials
185
192
  %w{__FILE__ __LINE__ true false nil self}.each do |word|
186
- tokenize( word )
193
+ tokenize word
187
194
  assert_next_token :constant, word
188
195
  end
189
196
 
190
197
  %w{__FILE__ __LINE__ true false nil self}.each do |word|
191
- tokenize( "#{word}?" )
198
+ tokenize "#{word}?"
192
199
  assert_next_token :ident, "#{word}?"
193
200
  end
194
201
 
195
202
  %w{__FILE__ __LINE__ true false nil self}.each do |word|
196
- tokenize( "#{word}!" )
203
+ tokenize "#{word}!"
197
204
  assert_next_token :ident, "#{word}!"
198
205
  end
199
206
 
200
207
  %w{__FILE__ __LINE__ true false nil self}.each do |word|
201
- tokenize( "x.#{word}" )
208
+ tokenize "x.#{word}"
202
209
  skip_token 2
203
210
  assert_next_token :ident, word
204
211
  end
205
212
  end
206
213
 
207
214
  def test_pct_r
208
- tokenize( '%r{foo#{x}bar}' )
215
+ tokenize '%r{foo#{x}bar}'
209
216
  assert_next_token :punct, "%r{"
210
217
  assert_next_token :regex, "", :region_open
211
218
  assert_next_token :regex, "foo"
@@ -214,7 +221,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
214
221
  assert_next_token :regex, "", :region_close
215
222
  assert_next_token :punct, "}"
216
223
 
217
- tokenize( '%r-foo#{x}bar-' )
224
+ tokenize '%r-foo#{x}bar-'
218
225
  assert_next_token :punct, "%r-"
219
226
  assert_next_token :regex, "", :region_open
220
227
  assert_next_token :regex, "foo"
@@ -238,7 +245,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
238
245
  end
239
246
 
240
247
  def test_pct_w_brace
241
- tokenize( '%w{foo bar baz}' )
248
+ tokenize '%w{foo bar baz}'
242
249
  assert_next_token :punct, "%w{"
243
250
  assert_next_token :string, '', :region_open
244
251
  assert_next_token :string, 'foo bar baz'
@@ -247,7 +254,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
247
254
  end
248
255
 
249
256
  def test_pct_w
250
- tokenize( '%w-foo#{x} bar baz-' )
257
+ tokenize '%w-foo#{x} bar baz-'
251
258
  assert_next_token :punct, "%w-"
252
259
  assert_next_token :string, '', :region_open
253
260
  assert_next_token :string, 'foo#{x} bar baz'
@@ -256,7 +263,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
256
263
  end
257
264
 
258
265
  def test_pct_q
259
- tokenize( '%q-hello #{world}-' )
266
+ tokenize '%q-hello #{world}-'
260
267
  assert_next_token :punct, "%q-"
261
268
  assert_next_token :string, '', :region_open
262
269
  assert_next_token :string, 'hello #{world}'
@@ -265,7 +272,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
265
272
  end
266
273
 
267
274
  def test_pct_s
268
- tokenize( '%s-hello #{world}-' )
275
+ tokenize '%s-hello #{world}-'
269
276
  assert_next_token :punct, "%s-"
270
277
  assert_next_token :symbol, '', :region_open
271
278
  assert_next_token :symbol, 'hello #{world}'
@@ -274,7 +281,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
274
281
  end
275
282
 
276
283
  def test_pct_W
277
- tokenize( '%W-foo#{x} bar baz-' )
284
+ tokenize '%W-foo#{x} bar baz-'
278
285
  assert_next_token :punct, "%W-"
279
286
  assert_next_token :string, '', :region_open
280
287
  assert_next_token :string, 'foo'
@@ -285,7 +292,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
285
292
  end
286
293
 
287
294
  def test_pct_Q
288
- tokenize( '%Q-hello #{world}-' )
295
+ tokenize '%Q-hello #{world}-'
289
296
  assert_next_token :punct, "%Q-"
290
297
  assert_next_token :string, '', :region_open
291
298
  assert_next_token :string, 'hello '
@@ -295,7 +302,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
295
302
  end
296
303
 
297
304
  def test_pct_x
298
- tokenize( '%x-ls /blah/#{foo}-' )
305
+ tokenize '%x-ls /blah/#{foo}-'
299
306
  assert_next_token :punct, "%x-"
300
307
  assert_next_token :string, '', :region_open
301
308
  assert_next_token :string, 'ls /blah/'
@@ -305,7 +312,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
305
312
  end
306
313
 
307
314
  def test_pct_string
308
- tokenize( '%-hello #{world}-' )
315
+ tokenize '%-hello #{world}-'
309
316
  assert_next_token :punct, "%-"
310
317
  assert_next_token :string, '', :region_open
311
318
  assert_next_token :string, 'hello '
@@ -315,7 +322,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
315
322
  end
316
323
 
317
324
  def test_bad_pct_string
318
- tokenize( '%0hello #{world}0' )
325
+ tokenize '%0hello #{world}0'
319
326
  assert_next_token :punct, "%"
320
327
  assert_next_token :number, '0'
321
328
  assert_next_token :ident, 'hello'
@@ -324,7 +331,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
324
331
  end
325
332
 
326
333
  def test_shift_left
327
- tokenize( 'foo << 5' )
334
+ tokenize 'foo << 5'
328
335
  assert_next_token :ident, "foo"
329
336
  assert_next_token :normal, " "
330
337
  assert_next_token :punct, "<<"
@@ -333,14 +340,14 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
333
340
  end
334
341
 
335
342
  def test_shift_left_no_white
336
- tokenize( 'foo<<5' )
343
+ tokenize 'foo<<5'
337
344
  assert_next_token :ident, "foo"
338
345
  assert_next_token :punct, "<<"
339
346
  assert_next_token :number, "5"
340
347
  end
341
348
 
342
349
  def test_here_doc_no_opts
343
- tokenize( "foo <<EOF\n foo\n bar\n baz\nEOF" )
350
+ tokenize "foo <<EOF\n foo\n bar\n baz\nEOF"
344
351
  assert_next_token :ident, "foo"
345
352
  assert_next_token :normal, " "
346
353
  assert_next_token :punct, "<<"
@@ -352,7 +359,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
352
359
  end
353
360
 
354
361
  def test_here_doc_no_opts_missing_end
355
- tokenize( "foo <<EOF\n foo\n bar\n baz\n EOF" )
362
+ tokenize "foo <<EOF\n foo\n bar\n baz\n EOF"
356
363
  assert_next_token :ident, "foo"
357
364
  assert_next_token :normal, " "
358
365
  assert_next_token :punct, "<<"
@@ -363,7 +370,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
363
370
  end
364
371
 
365
372
  def test_here_doc_float_right
366
- tokenize( "foo <<-EOF\n foo\n bar\n baz\n EOF" )
373
+ tokenize "foo <<-EOF\n foo\n bar\n baz\n EOF"
367
374
  assert_next_token :ident, "foo"
368
375
  assert_next_token :normal, " "
369
376
  assert_next_token :punct, "<<-"
@@ -375,7 +382,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
375
382
  end
376
383
 
377
384
  def test_here_doc_single_quotes
378
- tokenize( "foo <<'EOF'\n foo\#{x}\n bar\n baz\nEOF" )
385
+ tokenize "foo <<'EOF'\n foo\#{x}\n bar\n baz\nEOF"
379
386
  assert_next_token :ident, "foo"
380
387
  assert_next_token :normal, " "
381
388
  assert_next_token :punct, "<<'"
@@ -388,7 +395,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
388
395
  end
389
396
 
390
397
  def test_here_doc_double_quotes
391
- tokenize( "foo <<\"EOF\"\n foo\#{x}\n bar\n baz\nEOF" )
398
+ tokenize "foo <<\"EOF\"\n foo\#{x}\n bar\n baz\nEOF"
392
399
  assert_next_token :ident, "foo"
393
400
  assert_next_token :normal, " "
394
401
  assert_next_token :punct, "<<\""
@@ -403,12 +410,12 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
403
410
  end
404
411
 
405
412
  def test_space
406
- tokenize( "\n \t\t\n\n\r\n" )
413
+ tokenize "\n \t\t\n\n\r\n"
407
414
  assert_next_token :normal, "\n \t\t\n\n\r\n"
408
415
  end
409
416
 
410
417
  def test_number
411
- tokenize( "1 1.0 1e5 1.0e5 1_2.5 1_2.5_2 1_2.5_2e3_2" )
418
+ tokenize "1 1.0 1e5 1.0e5 1_2.5 1_2.5_2 1_2.5_2e3_2"
412
419
  assert_next_token :number, "1"
413
420
  skip_token
414
421
  assert_next_token :number, "1.0"
@@ -425,7 +432,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
425
432
  end
426
433
 
427
434
  def test_dquoted_string
428
- tokenize( '"foo #{x} bar\"\n\tbaz\xA5b\5\1234"' )
435
+ tokenize '"foo #{x} bar\"\n\tbaz\xA5b\5\1234"'
429
436
  assert_next_token :punct, '"'
430
437
  assert_next_token :string, '', :region_open
431
438
  assert_next_token :string, 'foo '
@@ -442,7 +449,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
442
449
  end
443
450
 
444
451
  def test_squoted_string
445
- tokenize( '\'foo #{x} bar\\\'\n\tbaz\\\\\xA5b\5\1234\'' )
452
+ tokenize '\'foo #{x} bar\\\'\n\tbaz\\\\\xA5b\5\1234\''
446
453
  assert_next_token :punct, "'"
447
454
  assert_next_token :string, "", :region_open
448
455
  assert_next_token :string, 'foo #{x} bar'
@@ -455,51 +462,51 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
455
462
  end
456
463
 
457
464
  def test_dot_selector
458
- tokenize( 'foo.nil' )
465
+ tokenize 'foo.nil'
459
466
  skip_token
460
467
  assert_next_token :punct, "."
461
468
  assert_next_token :ident, "nil"
462
469
  end
463
470
 
464
471
  def test_dot_range_inclusive
465
- tokenize( 'foo..nil' )
472
+ tokenize 'foo..nil'
466
473
  skip_token
467
474
  assert_next_token :punct, ".."
468
475
  assert_next_token :constant, "nil"
469
476
  end
470
477
 
471
478
  def test_dot_range_exclusive
472
- tokenize( 'foo...nil' )
479
+ tokenize 'foo...nil'
473
480
  skip_token
474
481
  assert_next_token :punct, "..."
475
482
  assert_next_token :constant, "nil"
476
483
  end
477
484
 
478
485
  def test_dot_range_many
479
- tokenize( 'foo.....nil' )
486
+ tokenize 'foo.....nil'
480
487
  skip_token
481
488
  assert_next_token :punct, "....."
482
489
  assert_next_token :constant, "nil"
483
490
  end
484
491
 
485
492
  def test_attribute
486
- tokenize( '@var_foo' )
493
+ tokenize '@var_foo'
487
494
  assert_next_token :attribute, "@var_foo"
488
495
  end
489
496
 
490
497
  def test_global
491
- tokenize( '$var_foo' )
498
+ tokenize '$var_foo'
492
499
  assert_next_token :global, "$var_foo"
493
- tokenize( '$12' )
500
+ tokenize '$12'
494
501
  assert_next_token :global, "$12"
495
- tokenize( '$/f' )
502
+ tokenize '$/f'
496
503
  assert_next_token :global, "$/"
497
- tokenize( "$\n" )
504
+ tokenize "$\n"
498
505
  assert_next_token :global, "$"
499
506
  end
500
507
 
501
508
  def test_paren_delimiter
502
- tokenize( '%w(a)' )
509
+ tokenize '%w(a)'
503
510
  assert_next_token :punct, "%w("
504
511
  assert_next_token :string, "", :region_open
505
512
  assert_next_token :string, "a"
@@ -508,7 +515,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
508
515
  end
509
516
 
510
517
  def test_division
511
- tokenize( 'm / 3' )
518
+ tokenize 'm / 3'
512
519
  assert_next_token :ident, "m"
513
520
  assert_next_token :normal, " "
514
521
  assert_next_token :punct, "/"
@@ -517,7 +524,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
517
524
  end
518
525
 
519
526
  def test_regex
520
- tokenize( 'm =~ /3/' )
527
+ tokenize 'm =~ /3/'
521
528
  assert_next_token :ident, "m"
522
529
  assert_next_token :normal, " "
523
530
  assert_next_token :punct, "=~"
@@ -530,7 +537,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
530
537
  end
531
538
 
532
539
  def test_heredoc_with_trailing_text
533
- tokenize( "foo('here', <<EOF)\n A heredoc.\nEOF\nfoo" )
540
+ tokenize "foo('here', <<EOF)\n A heredoc.\nEOF\nfoo"
534
541
  assert_next_token :ident, "foo"
535
542
  assert_next_token :punct, "('"
536
543
  assert_next_token :string, '', :region_open
@@ -550,7 +557,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
550
557
  end
551
558
 
552
559
  def test_multiple_heredocs
553
- tokenize( <<'TEST' )
560
+ tokenize <<'TEST'
554
561
  foo('here', <<EOF, 'there', <<-'FOO', 'blah')
555
562
  First heredoc, right here.
556
563
  Expressions are #{allowed}
@@ -598,7 +605,7 @@ TEST
598
605
  end
599
606
 
600
607
  def test_carldr_bad_heredoc_001
601
- tokenize( <<'TEST' )
608
+ tokenize <<'TEST'
602
609
  str = <<END
603
610
  here document #{1 + 1}
604
611
  END
@@ -663,4 +670,202 @@ TEST
663
670
  assert_next_token :punct, ")/"
664
671
  assert_next_token :number, "2"
665
672
  end
673
+
674
+ def test_heredoc_with_CRNL
675
+ tokenize "foo <<SRC\r\nSome text\r\nSRC\r\nfoo"
676
+ assert_next_token :ident, "foo"
677
+ assert_next_token :normal, " "
678
+ assert_next_token :punct, "<<"
679
+ assert_next_token :constant, "SRC"
680
+ assert_next_token :string, "", :region_open
681
+ assert_next_token :string, "\r\nSome text\r\n"
682
+ assert_next_token :string, "", :region_close
683
+ assert_next_token :constant, "SRC"
684
+ assert_next_token :normal, "\r\n"
685
+ assert_next_token :ident, "foo"
686
+ end
687
+
688
+ def test_question_mark_at_newline
689
+ tokenize "foo ?\n 'bar': 'baz'"
690
+ assert_next_token :ident, "foo"
691
+ assert_next_token :normal, " "
692
+ assert_next_token :punct, "?"
693
+ assert_next_token :normal, "\n "
694
+ assert_next_token :punct, "'"
695
+ assert_next_token :string, "", :region_open
696
+ assert_next_token :string, "bar"
697
+ assert_next_token :string, "", :region_close
698
+ assert_next_token :punct, "':"
699
+ assert_next_token :normal, " "
700
+ assert_next_token :punct, "'"
701
+ assert_next_token :string, "", :region_open
702
+ assert_next_token :string, "baz"
703
+ assert_next_token :string, "", :region_close
704
+ assert_next_token :punct, "'"
705
+ end
706
+
707
+ def test_question_mark_and_escaped_newline
708
+ tokenize "foo ?\\\n 'bar': 'baz'"
709
+ assert_next_token :ident, "foo"
710
+ assert_next_token :normal, " "
711
+ assert_next_token :punct, "?\\"
712
+ assert_next_token :normal, "\n "
713
+ assert_next_token :punct, "'"
714
+ assert_next_token :string, "", :region_open
715
+ assert_next_token :string, "bar"
716
+ assert_next_token :string, "", :region_close
717
+ assert_next_token :punct, "':"
718
+ assert_next_token :normal, " "
719
+ assert_next_token :punct, "'"
720
+ assert_next_token :string, "", :region_open
721
+ assert_next_token :string, "baz"
722
+ assert_next_token :string, "", :region_close
723
+ assert_next_token :punct, "'"
724
+ end
725
+
726
+ def test_highlighted_subexpression
727
+ tokenizer.set :expressions => :highlight
728
+ tokenize '"la la #{["hello", "world"].each { |f| puts "string #{f}" }}"'
729
+ assert_next_token :punct, '"'
730
+ assert_next_token :string, "", :region_open
731
+ assert_next_token :string, "la la "
732
+ assert_next_token :expr, "", :region_open
733
+ assert_next_token :expr, '#{'
734
+ assert_next_token :punct, '["'
735
+ assert_next_token :string, "", :region_open
736
+ assert_next_token :string, 'hello'
737
+ assert_next_token :string, "", :region_close
738
+ assert_next_token :punct, '",'
739
+ assert_next_token :normal, ' '
740
+ assert_next_token :punct, '"'
741
+ assert_next_token :string, "", :region_open
742
+ assert_next_token :string, "world"
743
+ assert_next_token :string, "", :region_close
744
+ assert_next_token :punct, '"].'
745
+ assert_next_token :ident, 'each'
746
+ assert_next_token :normal, ' '
747
+ assert_next_token :punct, '{'
748
+ assert_next_token :normal, ' '
749
+ assert_next_token :punct, '|'
750
+ assert_next_token :ident, 'f'
751
+ assert_next_token :punct, '|'
752
+ assert_next_token :normal, ' '
753
+ assert_next_token :ident, 'puts'
754
+ assert_next_token :normal, ' '
755
+ assert_next_token :punct, '"'
756
+ assert_next_token :string, "", :region_open
757
+ assert_next_token :string, "string "
758
+ assert_next_token :expr, "", :region_open
759
+ assert_next_token :expr, '#{'
760
+ assert_next_token :ident, 'f'
761
+ assert_next_token :expr, '}'
762
+ assert_next_token :expr, "", :region_close
763
+ assert_next_token :string, "", :region_close
764
+ assert_next_token :punct, '"'
765
+ assert_next_token :normal, ' '
766
+ assert_next_token :punct, '}'
767
+ assert_next_token :expr, '}'
768
+ assert_next_token :expr, "", :region_close
769
+ assert_next_token :string, "", :region_close
770
+ assert_next_token :punct, '"'
771
+ end
772
+
773
+ def test_expr_in_braces
774
+ tokenize '"#{f}"'
775
+ assert_next_token :punct, '"'
776
+ assert_next_token :string, "", :region_open
777
+ assert_next_token :expr, '#{f}'
778
+ assert_next_token :string, "", :region_close
779
+ assert_next_token :punct, '"'
780
+ end
781
+
782
+ def test_expr_in_braces_with_nested_braces
783
+ tokenize '"#{loop{break}}"'
784
+ assert_next_token :punct, '"'
785
+ assert_next_token :string, "", :region_open
786
+ assert_next_token :expr, '#{loop{break}}'
787
+ assert_next_token :string, "", :region_close
788
+ assert_next_token :punct, '"'
789
+ end
790
+
791
+ def test_expr_with_global_var
792
+ tokenize '"#$f"'
793
+ assert_next_token :punct, '"'
794
+ assert_next_token :string, "", :region_open
795
+ assert_next_token :expr, '#$f'
796
+ assert_next_token :string, "", :region_close
797
+ assert_next_token :punct, '"'
798
+ end
799
+
800
+ def test_expr_with_instance_var
801
+ tokenize '"#@f"'
802
+ assert_next_token :punct, '"'
803
+ assert_next_token :string, "", :region_open
804
+ assert_next_token :expr, '#@f'
805
+ assert_next_token :string, "", :region_close
806
+ assert_next_token :punct, '"'
807
+ end
808
+
809
+ def test_expr_with_class_var
810
+ tokenize '"#@@f"'
811
+ assert_next_token :punct, '"'
812
+ assert_next_token :string, "", :region_open
813
+ assert_next_token :expr, '#@@f'
814
+ assert_next_token :string, "", :region_close
815
+ assert_next_token :punct, '"'
816
+ end
817
+
818
+ def test_qmark_space
819
+ tokenize "? "
820
+ assert_next_token :punct, "?"
821
+ assert_next_token :normal, " "
822
+ end
823
+
824
+ def test_capitalized_method
825
+ tokenize "obj.Foo"
826
+ skip_token 2
827
+ assert_next_token :ident, "Foo"
828
+ end
829
+
830
+ def test_hexadecimal_literal
831
+ tokenize "0xDEADbeef 0X1234567890ABCDEFG"
832
+ assert_next_token :number, "0xDEADbeef"
833
+ skip_token
834
+ assert_next_token :number, "0X1234567890ABCDEF"
835
+ assert_next_token :constant, "G"
836
+ end
837
+
838
+ def test_binary_literal
839
+ tokenize "0b2 0b0 0b101 0B123"
840
+ assert_next_token :number, "0"
841
+ assert_next_token :ident, "b2"
842
+ skip_token
843
+ assert_next_token :number, "0b0"
844
+ skip_token
845
+ assert_next_token :number, "0b101"
846
+ skip_token
847
+ assert_next_token :number, "0B123"
848
+ end
849
+
850
+ def test_octal_literal
851
+ tokenize "0o9 0o12345670abc 0O12345678"
852
+ assert_next_token :number, "0"
853
+ assert_next_token :ident, "o9"
854
+ skip_token
855
+ assert_next_token :number, "0o12345670"
856
+ assert_next_token :ident, "abc"
857
+ skip_token
858
+ assert_next_token :number, "0O12345678"
859
+ end
860
+
861
+ def test_decimal_literal
862
+ tokenize "0dA 0d1234567890abc 0D1234567890"
863
+ assert_next_token :number, "0"
864
+ assert_next_token :ident, "dA"
865
+ skip_token
866
+ assert_next_token :number, "0d1234567890"
867
+ assert_next_token :ident, "abc"
868
+ skip_token
869
+ assert_next_token :number, "0D1234567890"
870
+ end
666
871
  end
@@ -0,0 +1,40 @@
1
+ $:.unshift File.dirname(__FILE__) + "/../../lib"
2
+
3
+ require 'test/unit'
4
+ require 'syntax'
5
+
6
+ class TokenizerTestCase < Test::Unit::TestCase
7
+ def self.syntax( type )
8
+ class_eval <<-EOF
9
+ def setup
10
+ @tokenizer = Syntax.load(#{type.inspect})
11
+ end
12
+ EOF
13
+ end
14
+
15
+ def default_test
16
+ end
17
+
18
+ private
19
+
20
+ attr_reader :tokenizer
21
+
22
+ def tokenize( string )
23
+ @tokens = []
24
+ @tokenizer.tokenize( string ) { |tok| @tokens << tok }
25
+ end
26
+
27
+ def assert_next_token(group, lexeme, instruction=:none)
28
+ assert false, "no tokens in stack" if @tokens.nil? or @tokens.empty?
29
+ assert_equal [group, lexeme, instruction],
30
+ [@tokens.first.group, @tokens.first, @tokens.shift.instruction]
31
+ end
32
+
33
+ def assert_no_next_token
34
+ assert @tokens.empty?
35
+ end
36
+
37
+ def skip_token( n=1 )
38
+ n.times { @tokens.shift } unless @tokens.nil? || @tokens.empty?
39
+ end
40
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
- rubygems_version: 0.8.8
2
+ rubygems_version: 0.8.10
3
3
  specification_version: 1
4
4
  name: syntax
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.7.0
7
- date: 2005-03-23
6
+ version: 1.0.0
7
+ date: 2005-06-18
8
8
  summary: Syntax is Ruby library for performing simple syntax highlighting.
9
9
  require_paths:
10
10
  - lib
11
- email: jgb3@email.byu.edu
11
+ email: jamis@jamisbuck.org
12
12
  homepage:
13
13
  rubyforge_project:
14
14
  description:
@@ -47,6 +47,7 @@ files:
47
47
  - test/syntax/tc_ruby.rb
48
48
  - test/syntax/tc_xml.rb
49
49
  - test/syntax/tc_yaml.rb
50
+ - test/syntax/tokenizer_testcase.rb
50
51
  test_files:
51
52
  - test/ALL-TESTS.rb
52
53
  rdoc_options: []