coderay 0.7.1.147 → 0.7.2.165

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. data/bin/coderay +54 -56
  2. data/demo/suite.rb +54 -54
  3. data/lib/coderay.rb +187 -187
  4. data/lib/coderay/duo.rb +29 -29
  5. data/lib/coderay/encoder.rb +173 -173
  6. data/lib/coderay/encoders/_map.rb +8 -8
  7. data/lib/coderay/encoders/count.rb +21 -21
  8. data/lib/coderay/encoders/debug.rb +46 -46
  9. data/lib/coderay/encoders/div.rb +20 -20
  10. data/lib/coderay/encoders/html.rb +249 -245
  11. data/lib/coderay/encoders/html/classes.rb +73 -73
  12. data/lib/coderay/encoders/html/css.rb +65 -65
  13. data/lib/coderay/encoders/html/numerization.rb +122 -122
  14. data/lib/coderay/encoders/html/output.rb +195 -195
  15. data/lib/coderay/encoders/null.rb +26 -26
  16. data/lib/coderay/encoders/page.rb +21 -21
  17. data/lib/coderay/encoders/span.rb +20 -20
  18. data/lib/coderay/encoders/statistic.rb +81 -81
  19. data/lib/coderay/encoders/text.rb +33 -33
  20. data/lib/coderay/encoders/tokens.rb +44 -44
  21. data/lib/coderay/encoders/xml.rb +71 -71
  22. data/lib/coderay/encoders/yaml.rb +22 -22
  23. data/lib/coderay/helpers/filetype.rb +152 -153
  24. data/lib/coderay/helpers/gzip_simple.rb +67 -68
  25. data/lib/coderay/helpers/plugin.rb +297 -297
  26. data/lib/coderay/helpers/word_list.rb +46 -47
  27. data/lib/coderay/scanner.rb +238 -238
  28. data/lib/coderay/scanners/_map.rb +15 -14
  29. data/lib/coderay/scanners/c.rb +163 -155
  30. data/lib/coderay/scanners/delphi.rb +131 -129
  31. data/lib/coderay/scanners/html.rb +174 -167
  32. data/lib/coderay/scanners/nitro_xhtml.rb +130 -0
  33. data/lib/coderay/scanners/plaintext.rb +15 -15
  34. data/lib/coderay/scanners/rhtml.rb +73 -65
  35. data/lib/coderay/scanners/ruby.rb +404 -397
  36. data/lib/coderay/scanners/ruby/patterns.rb +216 -216
  37. data/lib/coderay/scanners/xml.rb +18 -18
  38. data/lib/coderay/style.rb +20 -20
  39. data/lib/coderay/styles/_map.rb +3 -3
  40. data/lib/coderay/styles/cycnus.rb +18 -18
  41. data/lib/coderay/styles/murphy.rb +18 -18
  42. data/lib/coderay/tokens.rb +322 -322
  43. metadata +86 -86
  44. data/lib/coderay/scanners/nitro_html.rb +0 -125
  45. data/lib/coderay/scanners/yaml.rb +0 -85
@@ -1,15 +1,15 @@
1
- module CodeRay
2
- module Scanners
3
-
4
- class Plaintext < Scanner
5
-
6
- register_for :plaintext, :plain
7
-
8
- def scan_tokens tokens, options
9
- tokens << [scan_until(/\z/), :plain]
10
- end
11
-
12
- end
13
-
14
- end
15
- end
1
+ module CodeRay
2
+ module Scanners
3
+
4
+ class Plaintext < Scanner
5
+
6
+ register_for :plaintext, :plain
7
+
8
+ def scan_tokens tokens, options
9
+ tokens << [scan_until(/\z/), :plain]
10
+ end
11
+
12
+ end
13
+
14
+ end
15
+ end
@@ -1,65 +1,73 @@
1
- module CodeRay
2
- module Scanners
3
-
4
- load :html
5
- load :ruby
6
-
7
- # RHTML Scanner
8
- #
9
- # $Id$
10
- class RHTML < Scanner
11
-
12
- include Streamable
13
- register_for :rhtml
14
-
15
- ERB_RUBY_BLOCK = /
16
- <%(?!%)[=-]?
17
- (?>
18
- [^%]*
19
- (?> %(?!>) [^%]* )*
20
- )
21
- (?: %> )?
22
- /x
23
-
24
- START_OF_ERB = /
25
- <%(?!%)
26
- /x
27
-
28
- private
29
-
30
- def setup
31
- @ruby_scanner = CodeRay.scanner :ruby, :tokens => @tokens, :keep_tokens => true
32
- @html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true
33
- end
34
-
35
- def scan_tokens tokens, options
36
-
37
- until eos?
38
-
39
- if (match = scan_until(/(?=#{START_OF_ERB})/o) || scan_until(/\z/)) and not match.empty?
40
- @html_scanner.tokenize match
41
-
42
- elsif match = scan(/#{ERB_RUBY_BLOCK}/o)
43
- start_tag = match[/\A<%[-=]?/]
44
- end_tag = match[/%?>?\z/]
45
- tokens << [:open, :inline]
46
- tokens << [start_tag, :delimiter]
47
- code = match[start_tag.size .. -1 - end_tag.size]
48
- @ruby_scanner.tokenize code
49
- tokens << [end_tag, :delimiter] unless end_tag.empty?
50
- tokens << [:close, :inline]
51
-
52
- else
53
- raise_inspect 'else-case reached!', tokens
54
- end
55
-
56
- end
57
-
58
- tokens
59
-
60
- end
61
-
62
- end
63
-
64
- end
65
- end
1
+ module CodeRay
2
+ module Scanners
3
+
4
+ load :html
5
+ load :ruby
6
+
7
+ # RHTML Scanner
8
+ #
9
+ # $Id$
10
+ class RHTML < Scanner
11
+
12
+ include Streamable
13
+ register_for :rhtml
14
+
15
+ ERB_RUBY_BLOCK = /
16
+ <%(?!%)[=-]?
17
+ (?>
18
+ [^\-%]* # normal*
19
+ (?> # special
20
+ (?: %(?!>) | -(?!%>) )
21
+ [^\-%]* # normal*
22
+ )*
23
+ )
24
+ (?: -?%> )?
25
+ /x
26
+
27
+ START_OF_ERB = /
28
+ <%(?!%)
29
+ /x
30
+
31
+ private
32
+
33
+ def setup
34
+ @ruby_scanner = CodeRay.scanner :ruby, :tokens => @tokens, :keep_tokens => true
35
+ @html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true
36
+ end
37
+
38
+ def reset_instance
39
+ super
40
+ @html_scanner.reset
41
+ end
42
+
43
+ def scan_tokens tokens, options
44
+
45
+ until eos?
46
+
47
+ if (match = scan_until(/(?=#{START_OF_ERB})/o) || scan_until(/\z/)) and not match.empty?
48
+ @html_scanner.tokenize match
49
+
50
+ elsif match = scan(/#{ERB_RUBY_BLOCK}/o)
51
+ start_tag = match[/\A<%[-=]?/]
52
+ end_tag = match[/-?%?>?\z/]
53
+ tokens << [:open, :inline]
54
+ tokens << [start_tag, :delimiter]
55
+ code = match[start_tag.size .. -1 - end_tag.size]
56
+ @ruby_scanner.tokenize code
57
+ tokens << [end_tag, :delimiter] unless end_tag.empty?
58
+ tokens << [:close, :inline]
59
+
60
+ else
61
+ raise_inspect 'else-case reached!', tokens
62
+ end
63
+
64
+ end
65
+
66
+ tokens
67
+
68
+ end
69
+
70
+ end
71
+
72
+ end
73
+ end
@@ -1,397 +1,404 @@
1
- module CodeRay
2
- module Scanners
3
-
4
- # This scanner is really complex, since Ruby _is_ a complex language!
5
- #
6
- # It tries to highlight 100% of all common code,
7
- # and 90% of strange codes.
8
- #
9
- # It is optimized for HTML highlighting, and is not very useful for
10
- # parsing or pretty printing.
11
- #
12
- # For now, I think it's better than the scanners in VIM or Syntax, or
13
- # any highlighter I was able to find, except Caleb's RubyLexer.
14
- #
15
- # I hope it's also better than the rdoc/irb lexer.
16
- class Ruby < Scanner
17
-
18
- include Streamable
19
-
20
- register_for :ruby
21
-
22
- helper :patterns
23
-
24
- DEFAULT_OPTIONS = {
25
- :parse_regexps => true,
26
- }
27
-
28
- private
29
- def scan_tokens tokens, options
30
- parse_regexp = false # options[:parse_regexps]
31
- first_bake = saved_tokens = nil
32
- last_token_dot = false
33
- fancy_allowed = regexp_allowed = true
34
- heredocs = nil
35
- last_state = nil
36
- state = :initial
37
- depth = nil
38
- states = []
39
-
40
- patterns = Patterns # avoid constant lookup
41
-
42
- until eos?
43
- type = :error
44
- match = nil
45
- kind = nil
46
-
47
- if state.instance_of? patterns::StringState
48
- # {{{
49
- match = scan_until(state.pattern) || scan_until(/\z/)
50
- tokens << [match, :content] unless match.empty?
51
- break if eos?
52
-
53
- if state.heredoc and self[1]
54
- match = getch + scan_until(/$/)
55
- tokens << [match, :delimiter]
56
- tokens << [:close, state.type]
57
- state = state.next_state
58
- next
59
- end
60
-
61
- case match = getch
62
-
63
- when state.delim
64
- if state.paren
65
- state.paren_depth -= 1
66
- if state.paren_depth > 0
67
- tokens << [match, :nesting_delimiter]
68
- next
69
- end
70
- end
71
- tokens << [match, :delimiter]
72
- if state.type == :regexp and not eos?
73
- modifiers = scan(/#{patterns::REGEXP_MODIFIERS}/ox)
74
- tokens << [modifiers, :modifier] unless modifiers.empty?
75
- if parse_regexp
76
- extended = modifiers.index ?x
77
- tokens = saved_tokens
78
- regexp = tokens
79
- for text, type in regexp
80
- if text.is_a? ::String
81
- case type
82
- when :content
83
- text.scan(/([^#]+)|(#.*)/) do |plain, comment|
84
- if plain
85
- tokens << [plain, :content]
86
- else
87
- tokens << [comment, :comment]
88
- end
89
- end
90
- when :character
91
- if text[/\\(?:[swdSWDAzZbB]|\d+)/]
92
- tokens << [text, :modifier]
93
- else
94
- tokens << [text, type]
95
- end
96
- else
97
- tokens << [text, type]
98
- end
99
- else
100
- tokens << [text, type]
101
- end
102
- end
103
- first_bake = saved_tokens = nil
104
- end
105
- end
106
- tokens << [:close, state.type]
107
- fancy_allowed = regexp_allowed = false
108
- state = state.next_state
109
-
110
- when '\\'
111
- if state.interpreted
112
- if esc = scan(/ #{patterns::ESCAPE} /ox)
113
- tokens << [match + esc, :char]
114
- else
115
- tokens << [match, :error]
116
- end
117
- else
118
- case m = getch
119
- when state.delim, '\\'
120
- tokens << [match + m, :char]
121
- else
122
- tokens << [match + m, :content]
123
- end
124
- end
125
-
126
- when '#'
127
- case peek(1)[0]
128
- when ?{
129
- states.push [state, depth, heredocs]
130
- fancy_allowed = regexp_allowed = true
131
- state = :initial
132
- depth = 1
133
- tokens << [:open, :inline]
134
- tokens << [match + getch, :delimiter]
135
- when ?$, ?@
136
- tokens << [match, :escape]
137
- last_state = state # scan one token as normal code, then return here
138
- state = :initial
139
- else
140
- raise_inspect 'else-case # reached; #%p not handled' % peek(1), tokens
141
- end
142
-
143
- when state.paren
144
- state.paren_depth += 1
145
- tokens << [match, :nesting_delimiter]
146
-
147
- when /#{patterns::REGEXP_SYMBOLS}/ox
148
- tokens << [match, :function]
149
-
150
- else
151
- raise_inspect 'else-case " reached; %p not handled, state = %p' % [match, state], tokens
152
-
153
- end
154
- next
155
- # }}}
156
- else
157
- # {{{
158
- if match = scan(/ [ \t\f]+ | \\? \n | \# .* /x) or
159
- ( bol? and match = scan(/#{patterns::RUBYDOC_OR_DATA}/o) )
160
- fancy_allowed = true
161
- case m = match[0]
162
- when ?\s, ?\t, ?\f
163
- match << scan(/\s*/) unless eos? or heredocs
164
- type = :space
165
- when ?\n, ?\\
166
- type = :space
167
- if m == ?\n
168
- regexp_allowed = true
169
- state = :initial if state == :undef_comma_expected
170
- end
171
- if heredocs
172
- unscan # heredoc scanning needs \n at start
173
- state = heredocs.shift
174
- tokens << [:open, state.type]
175
- heredocs = nil if heredocs.empty?
176
- next
177
- else
178
- match << scan(/\s*/) unless eos?
179
- end
180
- when ?#, ?=, ?_
181
- type = :comment
182
- regexp_allowed = true
183
- else
184
- raise_inspect 'else-case _ reached, because case %p was not handled' % [matched[0].chr], tokens
185
- end
186
- tokens << [match, type]
187
- next
188
-
189
- elsif state == :initial
190
-
191
- # IDENTS #
192
- if match = scan(/#{patterns::METHOD_NAME}/o)
193
- if last_token_dot
194
- type = if match[/^[A-Z]/] and not match?(/\(/) then :constant else :ident end
195
- else
196
- type = patterns::IDENT_KIND[match]
197
- if type == :ident and match[/^[A-Z]/] and not match[/[!?]$/] and not match?(/\(/)
198
- type = :constant
199
- elsif type == :reserved
200
- state = patterns::DEF_NEW_STATE[match]
201
- end
202
- end
203
- ## experimental!
204
- fancy_allowed = regexp_allowed = :set if patterns::REGEXP_ALLOWED[match] or check(/\s+(?:%\S|\/\S)/)
205
-
206
- # OPERATORS #
207
- elsif (not last_token_dot and match = scan(/ ==?=? | \.\.?\.? | [\(\)\[\]\{\}] | :: | , /x)) or
208
- (last_token_dot and match = scan(/#{patterns::METHOD_NAME_OPERATOR}/o))
209
- if match !~ / [.\)\]\}] /x or match =~ /\.\.\.?/
210
- regexp_allowed = fancy_allowed = :set
211
- end
212
- last_token_dot = :set if match == '.' or match == '::'
213
- type = :operator
214
- unless states.empty?
215
- case match
216
- when '{'
217
- depth += 1
218
- when '}'
219
- depth -= 1
220
- if depth == 0
221
- state, depth, heredocs = states.pop
222
- tokens << [match, :delimiter]
223
- type = :inline
224
- match = :close
225
- end
226
- end
227
- end
228
-
229
- elsif match = scan(/ ['"] /mx)
230
- tokens << [:open, :string]
231
- type = :delimiter
232
- state = patterns::StringState.new :string, match == '"', match # important for streaming
233
-
234
- elsif match = scan(/#{patterns::INSTANCE_VARIABLE}/o)
235
- type = :instance_variable
236
-
237
- elsif regexp_allowed and match = scan(/\//)
238
- tokens << [:open, :regexp]
239
- type = :delimiter
240
- interpreted = true
241
- state = patterns::StringState.new :regexp, interpreted, match
242
- if parse_regexp
243
- tokens = []
244
- saved_tokens = tokens
245
- end
246
-
247
- elsif match = scan(/#{patterns::NUMERIC}/o)
248
- type = if self[1] then :float else :integer end
249
-
250
- elsif match = scan(/#{patterns::SYMBOL}/o)
251
- case delim = match[1]
252
- when ?', ?"
253
- tokens << [:open, :symbol]
254
- tokens << [':', :symbol]
255
- match = delim.chr
256
- type = :delimiter
257
- state = patterns::StringState.new :symbol, delim == ?", match
258
- else
259
- type = :symbol
260
- end
261
-
262
- elsif match = scan(/ [-+!~^]=? | [*|&]{1,2}=? | >>? /x)
263
- regexp_allowed = fancy_allowed = :set
264
- type = :operator
265
-
266
- elsif fancy_allowed and match = scan(/#{patterns::HEREDOC_OPEN}/o)
267
- indented = self[1] == '-'
268
- quote = self[3]
269
- delim = self[quote ? 4 : 2]
270
- type = patterns::QUOTE_TO_TYPE[quote]
271
- tokens << [:open, type]
272
- tokens << [match, :delimiter]
273
- match = :close
274
- heredoc = patterns::StringState.new type, quote != '\'', delim, (indented ? :indented : :linestart )
275
- heredocs ||= [] # create heredocs if empty
276
- heredocs << heredoc
277
-
278
- elsif fancy_allowed and match = scan(/#{patterns::FANCY_START_SAVE}/o)
279
- type, interpreted = *patterns::FancyStringType.fetch(self[1]) do
280
- raise_inspect 'Unknown fancy string: %%%p' % k, tokens
281
- end
282
- tokens << [:open, type]
283
- state = patterns::StringState.new type, interpreted, self[2]
284
- type = :delimiter
285
-
286
- elsif fancy_allowed and match = scan(/#{patterns::CHARACTER}/o)
287
- type = :integer
288
-
289
- elsif match = scan(/ [\/%]=? | <(?:<|=>?)? | [?:;] /x)
290
- regexp_allowed = fancy_allowed = :set
291
- type = :operator
292
-
293
- elsif match = scan(/`/)
294
- if last_token_dot
295
- type = :operator
296
- else
297
- tokens << [:open, :shell]
298
- type = :delimiter
299
- state = patterns::StringState.new :shell, true, match
300
- end
301
-
302
- elsif match = scan(/#{patterns::GLOBAL_VARIABLE}/o)
303
- type = :global_variable
304
-
305
- elsif match = scan(/#{patterns::CLASS_VARIABLE}/o)
306
- type = :class_variable
307
-
308
- else
309
- match = getch
310
-
311
- end
312
-
313
- elsif state == :def_expected
314
- state = :initial
315
- if match = scan(/(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o)
316
- type = :method
317
- else
318
- next
319
- end
320
-
321
- elsif state == :undef_expected
322
- state = :undef_comma_expected
323
- if match = scan(/#{patterns::METHOD_NAME_EX}/o)
324
- type = :method
325
- elsif match = scan(/#{patterns::SYMBOL}/o)
326
- case delim = match[1]
327
- when ?', ?"
328
- tokens << [:open, :symbol]
329
- tokens << [':', :symbol]
330
- match = delim.chr
331
- type = :delimiter
332
- state = patterns::StringState.new :symbol, delim == ?", match
333
- state.next_state = :undef_comma_expected
334
- else
335
- type = :symbol
336
- end
337
- else
338
- state = :initial
339
- next
340
- end
341
-
342
- elsif state == :undef_comma_expected
343
- if match = scan(/,/)
344
- type = :operator
345
- state = :undef_expected
346
- else
347
- state = :initial
348
- next
349
- end
350
-
351
- elsif state == :module_expected
352
- if match = scan(/<</)
353
- type = :operator
354
- else
355
- state = :initial
356
- if match = scan(/ (?:#{patterns::IDENT}::)* #{patterns::IDENT} /ox)
357
- type = :class
358
- else
359
- next
360
- end
361
- end
362
-
363
- end
364
- # }}}
365
-
366
- regexp_allowed = regexp_allowed == :set
367
- fancy_allowed = fancy_allowed == :set
368
- last_token_dot = last_token_dot == :set
369
-
370
- if $DEBUG and (not kind or kind == :error)
371
- raise_inspect 'Error token %p in line %d' %
372
- [[match, kind], line], tokens
373
- end
374
- raise_inspect 'Empty token', tokens unless match
375
-
376
- tokens << [match, type]
377
-
378
- if last_state
379
- state = last_state
380
- last_state = nil
381
- end
382
- end
383
- end
384
-
385
- states << state if state.is_a? patterns::StringState
386
- until states.empty?
387
- tokens << [:close, states.pop.type]
388
- end
389
-
390
- tokens
391
- end
392
- end
393
-
394
- end
395
- end
396
-
397
- # vim:fdm=marker
1
+ module CodeRay
2
+ module Scanners
3
+
4
+ # This scanner is really complex, since Ruby _is_ a complex language!
5
+ #
6
+ # It tries to highlight 100% of all common code,
7
+ # and 90% of strange codes.
8
+ #
9
+ # It is optimized for HTML highlighting, and is not very useful for
10
+ # parsing or pretty printing.
11
+ #
12
+ # For now, I think it's better than the scanners in VIM or Syntax, or
13
+ # any highlighter I was able to find, except Caleb's RubyLexer.
14
+ #
15
+ # I hope it's also better than the rdoc/irb lexer.
16
+ class Ruby < Scanner
17
+
18
+ include Streamable
19
+
20
+ register_for :ruby
21
+
22
+ helper :patterns
23
+
24
+ DEFAULT_OPTIONS = {
25
+ :parse_regexps => true,
26
+ }
27
+
28
+ private
29
+ def scan_tokens tokens, options
30
+ parse_regexp = false # options[:parse_regexps]
31
+ first_bake = saved_tokens = nil
32
+ last_token_dot = false
33
+ fancy_allowed = regexp_allowed = true
34
+ heredocs = nil
35
+ last_state = nil
36
+ state = :initial
37
+ depth = nil
38
+ inline_block_stack = []
39
+
40
+ patterns = Patterns # avoid constant lookup
41
+
42
+ until eos?
43
+ match = nil
44
+ kind = nil
45
+
46
+ if state.instance_of? patterns::StringState
47
+ # {{{
48
+ match = scan_until(state.pattern) || scan_until(/\z/)
49
+ tokens << [match, :content] unless match.empty?
50
+ break if eos?
51
+
52
+ if state.heredoc and self[1] # end of heredoc
53
+ match = getch.to_s
54
+ match << scan_until(/$/) unless eos?
55
+ tokens << [match, :delimiter]
56
+ tokens << [:close, state.type]
57
+ state = state.next_state
58
+ next
59
+ end
60
+
61
+ case match = getch
62
+
63
+ when state.delim
64
+ if state.paren
65
+ state.paren_depth -= 1
66
+ if state.paren_depth > 0
67
+ tokens << [match, :nesting_delimiter]
68
+ next
69
+ end
70
+ end
71
+ tokens << [match, :delimiter]
72
+ if state.type == :regexp and not eos?
73
+ modifiers = scan(/#{patterns::REGEXP_MODIFIERS}/ox)
74
+ tokens << [modifiers, :modifier] unless modifiers.empty?
75
+ if parse_regexp
76
+ extended = modifiers.index ?x
77
+ tokens = saved_tokens
78
+ regexp = tokens
79
+ for text, kind in regexp
80
+ if text.is_a? ::String
81
+ case kind
82
+ when :content
83
+ text.scan(/([^#]+)|(#.*)/) do |plain, comment|
84
+ if plain
85
+ tokens << [plain, :content]
86
+ else
87
+ tokens << [comment, :comment]
88
+ end
89
+ end
90
+ when :character
91
+ if text[/\\(?:[swdSWDAzZbB]|\d+)/]
92
+ tokens << [text, :modifier]
93
+ else
94
+ tokens << [text, kind]
95
+ end
96
+ else
97
+ tokens << [text, kind]
98
+ end
99
+ else
100
+ tokens << [text, kind]
101
+ end
102
+ end
103
+ first_bake = saved_tokens = nil
104
+ end
105
+ end
106
+ tokens << [:close, state.type]
107
+ fancy_allowed = regexp_allowed = false
108
+ state = state.next_state
109
+
110
+ when '\\'
111
+ if state.interpreted
112
+ if esc = scan(/ #{patterns::ESCAPE} /ox)
113
+ tokens << [match + esc, :char]
114
+ else
115
+ tokens << [match, :error]
116
+ end
117
+ else
118
+ case m = getch
119
+ when state.delim, '\\'
120
+ tokens << [match + m, :char]
121
+ when nil
122
+ tokens << [match, :error]
123
+ else
124
+ tokens << [match + m, :content]
125
+ end
126
+ end
127
+
128
+ when '#'
129
+ case peek(1)[0]
130
+ when ?{
131
+ inline_block_stack << [state, depth, heredocs]
132
+ fancy_allowed = regexp_allowed = true
133
+ state = :initial
134
+ depth = 1
135
+ tokens << [:open, :inline]
136
+ tokens << [match + getch, :delimiter]
137
+ when ?$, ?@
138
+ tokens << [match, :escape]
139
+ last_state = state # scan one token as normal code, then return here
140
+ state = :initial
141
+ else
142
+ raise_inspect 'else-case # reached; #%p not handled' % peek(1), tokens
143
+ end
144
+
145
+ when state.paren
146
+ state.paren_depth += 1
147
+ tokens << [match, :nesting_delimiter]
148
+
149
+ when /#{patterns::REGEXP_SYMBOLS}/ox
150
+ tokens << [match, :function]
151
+
152
+ else
153
+ raise_inspect 'else-case " reached; %p not handled, state = %p' % [match, state], tokens
154
+
155
+ end
156
+ next
157
+ # }}}
158
+ else
159
+ # {{{
160
+ if match = scan(/ [ \t\f]+ | \\? \n | \# .* /x) or
161
+ ( bol? and match = scan(/#{patterns::RUBYDOC_OR_DATA}/o) )
162
+ fancy_allowed = true
163
+ case m = match[0]
164
+ when ?\s, ?\t, ?\f
165
+ match << scan(/\s*/) unless eos? or heredocs
166
+ kind = :space
167
+ when ?\n, ?\\
168
+ kind = :space
169
+ if m == ?\n
170
+ regexp_allowed = true
171
+ state = :initial if state == :undef_comma_expected
172
+ end
173
+ if heredocs
174
+ unscan # heredoc scanning needs \n at start
175
+ state = heredocs.shift
176
+ tokens << [:open, state.type]
177
+ heredocs = nil if heredocs.empty?
178
+ next
179
+ else
180
+ match << scan(/\s*/) unless eos?
181
+ end
182
+ when ?#, ?=, ?_
183
+ kind = :comment
184
+ regexp_allowed = true
185
+ else
186
+ raise_inspect 'else-case _ reached, because case %p was not handled' % [matched[0].chr], tokens
187
+ end
188
+ tokens << [match, kind]
189
+ next
190
+
191
+ elsif state == :initial
192
+
193
+ # IDENTS #
194
+ if match = scan(/#{patterns::METHOD_NAME}/o)
195
+ if last_token_dot
196
+ kind = if match[/^[A-Z]/] and not match?(/\(/) then :constant else :ident end
197
+ else
198
+ kind = patterns::IDENT_KIND[match]
199
+ if kind == :ident and match[/^[A-Z]/] and not match[/[!?]$/] and not match?(/\(/)
200
+ kind = :constant
201
+ elsif kind == :reserved
202
+ state = patterns::DEF_NEW_STATE[match]
203
+ end
204
+ end
205
+ ## experimental!
206
+ fancy_allowed = regexp_allowed = :set if patterns::REGEXP_ALLOWED[match] or check(/\s+(?:%\S|\/\S)/)
207
+
208
+ # OPERATORS #
209
+ elsif (not last_token_dot and match = scan(/ ==?=? | \.\.?\.? | [\(\)\[\]\{\}] | :: | , /x)) or
210
+ (last_token_dot and match = scan(/#{patterns::METHOD_NAME_OPERATOR}/o))
211
+ if match !~ / [.\)\]\}] /x or match =~ /\.\.\.?/
212
+ regexp_allowed = fancy_allowed = :set
213
+ end
214
+ last_token_dot = :set if match == '.' or match == '::'
215
+ kind = :operator
216
+ unless inline_block_stack.empty?
217
+ case match
218
+ when '{'
219
+ depth += 1
220
+ when '}'
221
+ depth -= 1
222
+ if depth == 0 # closing brace of inline block reached
223
+ state, depth, heredocs = inline_block_stack.pop
224
+ tokens << [match, :delimiter]
225
+ kind = :inline
226
+ match = :close
227
+ end
228
+ end
229
+ end
230
+
231
+ elsif match = scan(/ ['"] /mx)
232
+ tokens << [:open, :string]
233
+ kind = :delimiter
234
+ state = patterns::StringState.new :string, match == '"', match # important for streaming
235
+
236
+ elsif match = scan(/#{patterns::INSTANCE_VARIABLE}/o)
237
+ kind = :instance_variable
238
+
239
+ elsif regexp_allowed and match = scan(/\//)
240
+ tokens << [:open, :regexp]
241
+ kind = :delimiter
242
+ interpreted = true
243
+ state = patterns::StringState.new :regexp, interpreted, match
244
+ if parse_regexp
245
+ tokens = []
246
+ saved_tokens = tokens
247
+ end
248
+
249
+ elsif match = scan(/#{patterns::NUMERIC}/o)
250
+ kind = if self[1] then :float else :integer end
251
+
252
+ elsif match = scan(/#{patterns::SYMBOL}/o)
253
+ case delim = match[1]
254
+ when ?', ?"
255
+ tokens << [:open, :symbol]
256
+ tokens << [':', :symbol]
257
+ match = delim.chr
258
+ kind = :delimiter
259
+ state = patterns::StringState.new :symbol, delim == ?", match
260
+ else
261
+ kind = :symbol
262
+ end
263
+
264
+ elsif match = scan(/ [-+!~^]=? | [*|&]{1,2}=? | >>? /x)
265
+ regexp_allowed = fancy_allowed = :set
266
+ kind = :operator
267
+
268
+ elsif fancy_allowed and match = scan(/#{patterns::HEREDOC_OPEN}/o)
269
+ indented = self[1] == '-'
270
+ quote = self[3]
271
+ delim = self[quote ? 4 : 2]
272
+ kind = patterns::QUOTE_TO_TYPE[quote]
273
+ tokens << [:open, kind]
274
+ tokens << [match, :delimiter]
275
+ match = :close
276
+ heredoc = patterns::StringState.new kind, quote != '\'', delim, (indented ? :indented : :linestart )
277
+ heredocs ||= [] # create heredocs if empty
278
+ heredocs << heredoc
279
+
280
+ elsif fancy_allowed and match = scan(/#{patterns::FANCY_START_SAVE}/o)
281
+ kind, interpreted = *patterns::FancyStringType.fetch(self[1]) do
282
+ raise_inspect 'Unknown fancy string: %%%p' % k, tokens
283
+ end
284
+ tokens << [:open, kind]
285
+ state = patterns::StringState.new kind, interpreted, self[2]
286
+ kind = :delimiter
287
+
288
+ elsif fancy_allowed and match = scan(/#{patterns::CHARACTER}/o)
289
+ kind = :integer
290
+
291
+ elsif match = scan(/ [\/%]=? | <(?:<|=>?)? | [?:;] /x)
292
+ regexp_allowed = fancy_allowed = :set
293
+ kind = :operator
294
+
295
+ elsif match = scan(/`/)
296
+ if last_token_dot
297
+ kind = :operator
298
+ else
299
+ tokens << [:open, :shell]
300
+ kind = :delimiter
301
+ state = patterns::StringState.new :shell, true, match
302
+ end
303
+
304
+ elsif match = scan(/#{patterns::GLOBAL_VARIABLE}/o)
305
+ kind = :global_variable
306
+
307
+ elsif match = scan(/#{patterns::CLASS_VARIABLE}/o)
308
+ kind = :class_variable
309
+
310
+ else
311
+ kind = :error
312
+ match = getch
313
+
314
+ end
315
+
316
+ elsif state == :def_expected
317
+ state = :initial
318
+ if match = scan(/(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o)
319
+ kind = :method
320
+ else
321
+ next
322
+ end
323
+
324
+ elsif state == :undef_expected
325
+ state = :undef_comma_expected
326
+ if match = scan(/#{patterns::METHOD_NAME_EX}/o)
327
+ kind = :method
328
+ elsif match = scan(/#{patterns::SYMBOL}/o)
329
+ case delim = match[1]
330
+ when ?', ?"
331
+ tokens << [:open, :symbol]
332
+ tokens << [':', :symbol]
333
+ match = delim.chr
334
+ kind = :delimiter
335
+ state = patterns::StringState.new :symbol, delim == ?", match
336
+ state.next_state = :undef_comma_expected
337
+ else
338
+ kind = :symbol
339
+ end
340
+ else
341
+ state = :initial
342
+ next
343
+ end
344
+
345
+ elsif state == :undef_comma_expected
346
+ if match = scan(/,/)
347
+ kind = :operator
348
+ state = :undef_expected
349
+ else
350
+ state = :initial
351
+ next
352
+ end
353
+
354
+ elsif state == :module_expected
355
+ if match = scan(/<</)
356
+ kind = :operator
357
+ else
358
+ state = :initial
359
+ if match = scan(/ (?:#{patterns::IDENT}::)* #{patterns::IDENT} /ox)
360
+ kind = :class
361
+ else
362
+ next
363
+ end
364
+ end
365
+
366
+ end
367
+ # }}}
368
+
369
+ regexp_allowed = regexp_allowed == :set
370
+ fancy_allowed = fancy_allowed == :set
371
+ last_token_dot = last_token_dot == :set
372
+
373
+ if $DEBUG and not kind
374
+ raise_inspect 'Error token %p in line %d' %
375
+ [[match, kind], line], tokens, state
376
+ end
377
+ raise_inspect 'Empty token', tokens unless match
378
+
379
+ tokens << [match, kind]
380
+
381
+ if last_state
382
+ state = last_state
383
+ last_state = nil
384
+ end
385
+ end
386
+ end
387
+
388
+ inline_block_stack << [state] if state.is_a? patterns::StringState
389
+ until inline_block_stack.empty?
390
+ this_block = inline_block_stack.pop
391
+ tokens << [:close, :inline] if this_block.size > 1
392
+ state = this_block.first
393
+ tokens << [:close, state.type]
394
+ end
395
+
396
+ tokens
397
+ end
398
+
399
+ end
400
+
401
+ end
402
+ end
403
+
404
+ # vim:fdm=marker