coderay 0.7.1.147 → 0.7.2.165

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. data/bin/coderay +54 -56
  2. data/demo/suite.rb +54 -54
  3. data/lib/coderay.rb +187 -187
  4. data/lib/coderay/duo.rb +29 -29
  5. data/lib/coderay/encoder.rb +173 -173
  6. data/lib/coderay/encoders/_map.rb +8 -8
  7. data/lib/coderay/encoders/count.rb +21 -21
  8. data/lib/coderay/encoders/debug.rb +46 -46
  9. data/lib/coderay/encoders/div.rb +20 -20
  10. data/lib/coderay/encoders/html.rb +249 -245
  11. data/lib/coderay/encoders/html/classes.rb +73 -73
  12. data/lib/coderay/encoders/html/css.rb +65 -65
  13. data/lib/coderay/encoders/html/numerization.rb +122 -122
  14. data/lib/coderay/encoders/html/output.rb +195 -195
  15. data/lib/coderay/encoders/null.rb +26 -26
  16. data/lib/coderay/encoders/page.rb +21 -21
  17. data/lib/coderay/encoders/span.rb +20 -20
  18. data/lib/coderay/encoders/statistic.rb +81 -81
  19. data/lib/coderay/encoders/text.rb +33 -33
  20. data/lib/coderay/encoders/tokens.rb +44 -44
  21. data/lib/coderay/encoders/xml.rb +71 -71
  22. data/lib/coderay/encoders/yaml.rb +22 -22
  23. data/lib/coderay/helpers/filetype.rb +152 -153
  24. data/lib/coderay/helpers/gzip_simple.rb +67 -68
  25. data/lib/coderay/helpers/plugin.rb +297 -297
  26. data/lib/coderay/helpers/word_list.rb +46 -47
  27. data/lib/coderay/scanner.rb +238 -238
  28. data/lib/coderay/scanners/_map.rb +15 -14
  29. data/lib/coderay/scanners/c.rb +163 -155
  30. data/lib/coderay/scanners/delphi.rb +131 -129
  31. data/lib/coderay/scanners/html.rb +174 -167
  32. data/lib/coderay/scanners/nitro_xhtml.rb +130 -0
  33. data/lib/coderay/scanners/plaintext.rb +15 -15
  34. data/lib/coderay/scanners/rhtml.rb +73 -65
  35. data/lib/coderay/scanners/ruby.rb +404 -397
  36. data/lib/coderay/scanners/ruby/patterns.rb +216 -216
  37. data/lib/coderay/scanners/xml.rb +18 -18
  38. data/lib/coderay/style.rb +20 -20
  39. data/lib/coderay/styles/_map.rb +3 -3
  40. data/lib/coderay/styles/cycnus.rb +18 -18
  41. data/lib/coderay/styles/murphy.rb +18 -18
  42. data/lib/coderay/tokens.rb +322 -322
  43. metadata +86 -86
  44. data/lib/coderay/scanners/nitro_html.rb +0 -125
  45. data/lib/coderay/scanners/yaml.rb +0 -85
@@ -1,15 +1,15 @@
1
- module CodeRay
2
- module Scanners
3
-
4
- class Plaintext < Scanner
5
-
6
- register_for :plaintext, :plain
7
-
8
- def scan_tokens tokens, options
9
- tokens << [scan_until(/\z/), :plain]
10
- end
11
-
12
- end
13
-
14
- end
15
- end
1
+ module CodeRay
2
+ module Scanners
3
+
4
+ class Plaintext < Scanner
5
+
6
+ register_for :plaintext, :plain
7
+
8
+ def scan_tokens tokens, options
9
+ tokens << [scan_until(/\z/), :plain]
10
+ end
11
+
12
+ end
13
+
14
+ end
15
+ end
@@ -1,65 +1,73 @@
1
- module CodeRay
2
- module Scanners
3
-
4
- load :html
5
- load :ruby
6
-
7
- # RHTML Scanner
8
- #
9
- # $Id$
10
- class RHTML < Scanner
11
-
12
- include Streamable
13
- register_for :rhtml
14
-
15
- ERB_RUBY_BLOCK = /
16
- <%(?!%)[=-]?
17
- (?>
18
- [^%]*
19
- (?> %(?!>) [^%]* )*
20
- )
21
- (?: %> )?
22
- /x
23
-
24
- START_OF_ERB = /
25
- <%(?!%)
26
- /x
27
-
28
- private
29
-
30
- def setup
31
- @ruby_scanner = CodeRay.scanner :ruby, :tokens => @tokens, :keep_tokens => true
32
- @html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true
33
- end
34
-
35
- def scan_tokens tokens, options
36
-
37
- until eos?
38
-
39
- if (match = scan_until(/(?=#{START_OF_ERB})/o) || scan_until(/\z/)) and not match.empty?
40
- @html_scanner.tokenize match
41
-
42
- elsif match = scan(/#{ERB_RUBY_BLOCK}/o)
43
- start_tag = match[/\A<%[-=]?/]
44
- end_tag = match[/%?>?\z/]
45
- tokens << [:open, :inline]
46
- tokens << [start_tag, :delimiter]
47
- code = match[start_tag.size .. -1 - end_tag.size]
48
- @ruby_scanner.tokenize code
49
- tokens << [end_tag, :delimiter] unless end_tag.empty?
50
- tokens << [:close, :inline]
51
-
52
- else
53
- raise_inspect 'else-case reached!', tokens
54
- end
55
-
56
- end
57
-
58
- tokens
59
-
60
- end
61
-
62
- end
63
-
64
- end
65
- end
1
+ module CodeRay
2
+ module Scanners
3
+
4
+ load :html
5
+ load :ruby
6
+
7
+ # RHTML Scanner
8
+ #
9
+ # $Id$
10
+ class RHTML < Scanner
11
+
12
+ include Streamable
13
+ register_for :rhtml
14
+
15
+ ERB_RUBY_BLOCK = /
16
+ <%(?!%)[=-]?
17
+ (?>
18
+ [^\-%]* # normal*
19
+ (?> # special
20
+ (?: %(?!>) | -(?!%>) )
21
+ [^\-%]* # normal*
22
+ )*
23
+ )
24
+ (?: -?%> )?
25
+ /x
26
+
27
+ START_OF_ERB = /
28
+ <%(?!%)
29
+ /x
30
+
31
+ private
32
+
33
+ def setup
34
+ @ruby_scanner = CodeRay.scanner :ruby, :tokens => @tokens, :keep_tokens => true
35
+ @html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true
36
+ end
37
+
38
+ def reset_instance
39
+ super
40
+ @html_scanner.reset
41
+ end
42
+
43
+ def scan_tokens tokens, options
44
+
45
+ until eos?
46
+
47
+ if (match = scan_until(/(?=#{START_OF_ERB})/o) || scan_until(/\z/)) and not match.empty?
48
+ @html_scanner.tokenize match
49
+
50
+ elsif match = scan(/#{ERB_RUBY_BLOCK}/o)
51
+ start_tag = match[/\A<%[-=]?/]
52
+ end_tag = match[/-?%?>?\z/]
53
+ tokens << [:open, :inline]
54
+ tokens << [start_tag, :delimiter]
55
+ code = match[start_tag.size .. -1 - end_tag.size]
56
+ @ruby_scanner.tokenize code
57
+ tokens << [end_tag, :delimiter] unless end_tag.empty?
58
+ tokens << [:close, :inline]
59
+
60
+ else
61
+ raise_inspect 'else-case reached!', tokens
62
+ end
63
+
64
+ end
65
+
66
+ tokens
67
+
68
+ end
69
+
70
+ end
71
+
72
+ end
73
+ end
@@ -1,397 +1,404 @@
1
- module CodeRay
2
- module Scanners
3
-
4
- # This scanner is really complex, since Ruby _is_ a complex language!
5
- #
6
- # It tries to highlight 100% of all common code,
7
- # and 90% of strange codes.
8
- #
9
- # It is optimized for HTML highlighting, and is not very useful for
10
- # parsing or pretty printing.
11
- #
12
- # For now, I think it's better than the scanners in VIM or Syntax, or
13
- # any highlighter I was able to find, except Caleb's RubyLexer.
14
- #
15
- # I hope it's also better than the rdoc/irb lexer.
16
- class Ruby < Scanner
17
-
18
- include Streamable
19
-
20
- register_for :ruby
21
-
22
- helper :patterns
23
-
24
- DEFAULT_OPTIONS = {
25
- :parse_regexps => true,
26
- }
27
-
28
- private
29
- def scan_tokens tokens, options
30
- parse_regexp = false # options[:parse_regexps]
31
- first_bake = saved_tokens = nil
32
- last_token_dot = false
33
- fancy_allowed = regexp_allowed = true
34
- heredocs = nil
35
- last_state = nil
36
- state = :initial
37
- depth = nil
38
- states = []
39
-
40
- patterns = Patterns # avoid constant lookup
41
-
42
- until eos?
43
- type = :error
44
- match = nil
45
- kind = nil
46
-
47
- if state.instance_of? patterns::StringState
48
- # {{{
49
- match = scan_until(state.pattern) || scan_until(/\z/)
50
- tokens << [match, :content] unless match.empty?
51
- break if eos?
52
-
53
- if state.heredoc and self[1]
54
- match = getch + scan_until(/$/)
55
- tokens << [match, :delimiter]
56
- tokens << [:close, state.type]
57
- state = state.next_state
58
- next
59
- end
60
-
61
- case match = getch
62
-
63
- when state.delim
64
- if state.paren
65
- state.paren_depth -= 1
66
- if state.paren_depth > 0
67
- tokens << [match, :nesting_delimiter]
68
- next
69
- end
70
- end
71
- tokens << [match, :delimiter]
72
- if state.type == :regexp and not eos?
73
- modifiers = scan(/#{patterns::REGEXP_MODIFIERS}/ox)
74
- tokens << [modifiers, :modifier] unless modifiers.empty?
75
- if parse_regexp
76
- extended = modifiers.index ?x
77
- tokens = saved_tokens
78
- regexp = tokens
79
- for text, type in regexp
80
- if text.is_a? ::String
81
- case type
82
- when :content
83
- text.scan(/([^#]+)|(#.*)/) do |plain, comment|
84
- if plain
85
- tokens << [plain, :content]
86
- else
87
- tokens << [comment, :comment]
88
- end
89
- end
90
- when :character
91
- if text[/\\(?:[swdSWDAzZbB]|\d+)/]
92
- tokens << [text, :modifier]
93
- else
94
- tokens << [text, type]
95
- end
96
- else
97
- tokens << [text, type]
98
- end
99
- else
100
- tokens << [text, type]
101
- end
102
- end
103
- first_bake = saved_tokens = nil
104
- end
105
- end
106
- tokens << [:close, state.type]
107
- fancy_allowed = regexp_allowed = false
108
- state = state.next_state
109
-
110
- when '\\'
111
- if state.interpreted
112
- if esc = scan(/ #{patterns::ESCAPE} /ox)
113
- tokens << [match + esc, :char]
114
- else
115
- tokens << [match, :error]
116
- end
117
- else
118
- case m = getch
119
- when state.delim, '\\'
120
- tokens << [match + m, :char]
121
- else
122
- tokens << [match + m, :content]
123
- end
124
- end
125
-
126
- when '#'
127
- case peek(1)[0]
128
- when ?{
129
- states.push [state, depth, heredocs]
130
- fancy_allowed = regexp_allowed = true
131
- state = :initial
132
- depth = 1
133
- tokens << [:open, :inline]
134
- tokens << [match + getch, :delimiter]
135
- when ?$, ?@
136
- tokens << [match, :escape]
137
- last_state = state # scan one token as normal code, then return here
138
- state = :initial
139
- else
140
- raise_inspect 'else-case # reached; #%p not handled' % peek(1), tokens
141
- end
142
-
143
- when state.paren
144
- state.paren_depth += 1
145
- tokens << [match, :nesting_delimiter]
146
-
147
- when /#{patterns::REGEXP_SYMBOLS}/ox
148
- tokens << [match, :function]
149
-
150
- else
151
- raise_inspect 'else-case " reached; %p not handled, state = %p' % [match, state], tokens
152
-
153
- end
154
- next
155
- # }}}
156
- else
157
- # {{{
158
- if match = scan(/ [ \t\f]+ | \\? \n | \# .* /x) or
159
- ( bol? and match = scan(/#{patterns::RUBYDOC_OR_DATA}/o) )
160
- fancy_allowed = true
161
- case m = match[0]
162
- when ?\s, ?\t, ?\f
163
- match << scan(/\s*/) unless eos? or heredocs
164
- type = :space
165
- when ?\n, ?\\
166
- type = :space
167
- if m == ?\n
168
- regexp_allowed = true
169
- state = :initial if state == :undef_comma_expected
170
- end
171
- if heredocs
172
- unscan # heredoc scanning needs \n at start
173
- state = heredocs.shift
174
- tokens << [:open, state.type]
175
- heredocs = nil if heredocs.empty?
176
- next
177
- else
178
- match << scan(/\s*/) unless eos?
179
- end
180
- when ?#, ?=, ?_
181
- type = :comment
182
- regexp_allowed = true
183
- else
184
- raise_inspect 'else-case _ reached, because case %p was not handled' % [matched[0].chr], tokens
185
- end
186
- tokens << [match, type]
187
- next
188
-
189
- elsif state == :initial
190
-
191
- # IDENTS #
192
- if match = scan(/#{patterns::METHOD_NAME}/o)
193
- if last_token_dot
194
- type = if match[/^[A-Z]/] and not match?(/\(/) then :constant else :ident end
195
- else
196
- type = patterns::IDENT_KIND[match]
197
- if type == :ident and match[/^[A-Z]/] and not match[/[!?]$/] and not match?(/\(/)
198
- type = :constant
199
- elsif type == :reserved
200
- state = patterns::DEF_NEW_STATE[match]
201
- end
202
- end
203
- ## experimental!
204
- fancy_allowed = regexp_allowed = :set if patterns::REGEXP_ALLOWED[match] or check(/\s+(?:%\S|\/\S)/)
205
-
206
- # OPERATORS #
207
- elsif (not last_token_dot and match = scan(/ ==?=? | \.\.?\.? | [\(\)\[\]\{\}] | :: | , /x)) or
208
- (last_token_dot and match = scan(/#{patterns::METHOD_NAME_OPERATOR}/o))
209
- if match !~ / [.\)\]\}] /x or match =~ /\.\.\.?/
210
- regexp_allowed = fancy_allowed = :set
211
- end
212
- last_token_dot = :set if match == '.' or match == '::'
213
- type = :operator
214
- unless states.empty?
215
- case match
216
- when '{'
217
- depth += 1
218
- when '}'
219
- depth -= 1
220
- if depth == 0
221
- state, depth, heredocs = states.pop
222
- tokens << [match, :delimiter]
223
- type = :inline
224
- match = :close
225
- end
226
- end
227
- end
228
-
229
- elsif match = scan(/ ['"] /mx)
230
- tokens << [:open, :string]
231
- type = :delimiter
232
- state = patterns::StringState.new :string, match == '"', match # important for streaming
233
-
234
- elsif match = scan(/#{patterns::INSTANCE_VARIABLE}/o)
235
- type = :instance_variable
236
-
237
- elsif regexp_allowed and match = scan(/\//)
238
- tokens << [:open, :regexp]
239
- type = :delimiter
240
- interpreted = true
241
- state = patterns::StringState.new :regexp, interpreted, match
242
- if parse_regexp
243
- tokens = []
244
- saved_tokens = tokens
245
- end
246
-
247
- elsif match = scan(/#{patterns::NUMERIC}/o)
248
- type = if self[1] then :float else :integer end
249
-
250
- elsif match = scan(/#{patterns::SYMBOL}/o)
251
- case delim = match[1]
252
- when ?', ?"
253
- tokens << [:open, :symbol]
254
- tokens << [':', :symbol]
255
- match = delim.chr
256
- type = :delimiter
257
- state = patterns::StringState.new :symbol, delim == ?", match
258
- else
259
- type = :symbol
260
- end
261
-
262
- elsif match = scan(/ [-+!~^]=? | [*|&]{1,2}=? | >>? /x)
263
- regexp_allowed = fancy_allowed = :set
264
- type = :operator
265
-
266
- elsif fancy_allowed and match = scan(/#{patterns::HEREDOC_OPEN}/o)
267
- indented = self[1] == '-'
268
- quote = self[3]
269
- delim = self[quote ? 4 : 2]
270
- type = patterns::QUOTE_TO_TYPE[quote]
271
- tokens << [:open, type]
272
- tokens << [match, :delimiter]
273
- match = :close
274
- heredoc = patterns::StringState.new type, quote != '\'', delim, (indented ? :indented : :linestart )
275
- heredocs ||= [] # create heredocs if empty
276
- heredocs << heredoc
277
-
278
- elsif fancy_allowed and match = scan(/#{patterns::FANCY_START_SAVE}/o)
279
- type, interpreted = *patterns::FancyStringType.fetch(self[1]) do
280
- raise_inspect 'Unknown fancy string: %%%p' % k, tokens
281
- end
282
- tokens << [:open, type]
283
- state = patterns::StringState.new type, interpreted, self[2]
284
- type = :delimiter
285
-
286
- elsif fancy_allowed and match = scan(/#{patterns::CHARACTER}/o)
287
- type = :integer
288
-
289
- elsif match = scan(/ [\/%]=? | <(?:<|=>?)? | [?:;] /x)
290
- regexp_allowed = fancy_allowed = :set
291
- type = :operator
292
-
293
- elsif match = scan(/`/)
294
- if last_token_dot
295
- type = :operator
296
- else
297
- tokens << [:open, :shell]
298
- type = :delimiter
299
- state = patterns::StringState.new :shell, true, match
300
- end
301
-
302
- elsif match = scan(/#{patterns::GLOBAL_VARIABLE}/o)
303
- type = :global_variable
304
-
305
- elsif match = scan(/#{patterns::CLASS_VARIABLE}/o)
306
- type = :class_variable
307
-
308
- else
309
- match = getch
310
-
311
- end
312
-
313
- elsif state == :def_expected
314
- state = :initial
315
- if match = scan(/(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o)
316
- type = :method
317
- else
318
- next
319
- end
320
-
321
- elsif state == :undef_expected
322
- state = :undef_comma_expected
323
- if match = scan(/#{patterns::METHOD_NAME_EX}/o)
324
- type = :method
325
- elsif match = scan(/#{patterns::SYMBOL}/o)
326
- case delim = match[1]
327
- when ?', ?"
328
- tokens << [:open, :symbol]
329
- tokens << [':', :symbol]
330
- match = delim.chr
331
- type = :delimiter
332
- state = patterns::StringState.new :symbol, delim == ?", match
333
- state.next_state = :undef_comma_expected
334
- else
335
- type = :symbol
336
- end
337
- else
338
- state = :initial
339
- next
340
- end
341
-
342
- elsif state == :undef_comma_expected
343
- if match = scan(/,/)
344
- type = :operator
345
- state = :undef_expected
346
- else
347
- state = :initial
348
- next
349
- end
350
-
351
- elsif state == :module_expected
352
- if match = scan(/<</)
353
- type = :operator
354
- else
355
- state = :initial
356
- if match = scan(/ (?:#{patterns::IDENT}::)* #{patterns::IDENT} /ox)
357
- type = :class
358
- else
359
- next
360
- end
361
- end
362
-
363
- end
364
- # }}}
365
-
366
- regexp_allowed = regexp_allowed == :set
367
- fancy_allowed = fancy_allowed == :set
368
- last_token_dot = last_token_dot == :set
369
-
370
- if $DEBUG and (not kind or kind == :error)
371
- raise_inspect 'Error token %p in line %d' %
372
- [[match, kind], line], tokens
373
- end
374
- raise_inspect 'Empty token', tokens unless match
375
-
376
- tokens << [match, type]
377
-
378
- if last_state
379
- state = last_state
380
- last_state = nil
381
- end
382
- end
383
- end
384
-
385
- states << state if state.is_a? patterns::StringState
386
- until states.empty?
387
- tokens << [:close, states.pop.type]
388
- end
389
-
390
- tokens
391
- end
392
- end
393
-
394
- end
395
- end
396
-
397
- # vim:fdm=marker
1
+ module CodeRay
2
+ module Scanners
3
+
4
+ # This scanner is really complex, since Ruby _is_ a complex language!
5
+ #
6
+ # It tries to highlight 100% of all common code,
7
+ # and 90% of strange codes.
8
+ #
9
+ # It is optimized for HTML highlighting, and is not very useful for
10
+ # parsing or pretty printing.
11
+ #
12
+ # For now, I think it's better than the scanners in VIM or Syntax, or
13
+ # any highlighter I was able to find, except Caleb's RubyLexer.
14
+ #
15
+ # I hope it's also better than the rdoc/irb lexer.
16
+ class Ruby < Scanner
17
+
18
+ include Streamable
19
+
20
+ register_for :ruby
21
+
22
+ helper :patterns
23
+
24
+ DEFAULT_OPTIONS = {
25
+ :parse_regexps => true,
26
+ }
27
+
28
+ private
29
+ def scan_tokens tokens, options
30
+ parse_regexp = false # options[:parse_regexps]
31
+ first_bake = saved_tokens = nil
32
+ last_token_dot = false
33
+ fancy_allowed = regexp_allowed = true
34
+ heredocs = nil
35
+ last_state = nil
36
+ state = :initial
37
+ depth = nil
38
+ inline_block_stack = []
39
+
40
+ patterns = Patterns # avoid constant lookup
41
+
42
+ until eos?
43
+ match = nil
44
+ kind = nil
45
+
46
+ if state.instance_of? patterns::StringState
47
+ # {{{
48
+ match = scan_until(state.pattern) || scan_until(/\z/)
49
+ tokens << [match, :content] unless match.empty?
50
+ break if eos?
51
+
52
+ if state.heredoc and self[1] # end of heredoc
53
+ match = getch.to_s
54
+ match << scan_until(/$/) unless eos?
55
+ tokens << [match, :delimiter]
56
+ tokens << [:close, state.type]
57
+ state = state.next_state
58
+ next
59
+ end
60
+
61
+ case match = getch
62
+
63
+ when state.delim
64
+ if state.paren
65
+ state.paren_depth -= 1
66
+ if state.paren_depth > 0
67
+ tokens << [match, :nesting_delimiter]
68
+ next
69
+ end
70
+ end
71
+ tokens << [match, :delimiter]
72
+ if state.type == :regexp and not eos?
73
+ modifiers = scan(/#{patterns::REGEXP_MODIFIERS}/ox)
74
+ tokens << [modifiers, :modifier] unless modifiers.empty?
75
+ if parse_regexp
76
+ extended = modifiers.index ?x
77
+ tokens = saved_tokens
78
+ regexp = tokens
79
+ for text, kind in regexp
80
+ if text.is_a? ::String
81
+ case kind
82
+ when :content
83
+ text.scan(/([^#]+)|(#.*)/) do |plain, comment|
84
+ if plain
85
+ tokens << [plain, :content]
86
+ else
87
+ tokens << [comment, :comment]
88
+ end
89
+ end
90
+ when :character
91
+ if text[/\\(?:[swdSWDAzZbB]|\d+)/]
92
+ tokens << [text, :modifier]
93
+ else
94
+ tokens << [text, kind]
95
+ end
96
+ else
97
+ tokens << [text, kind]
98
+ end
99
+ else
100
+ tokens << [text, kind]
101
+ end
102
+ end
103
+ first_bake = saved_tokens = nil
104
+ end
105
+ end
106
+ tokens << [:close, state.type]
107
+ fancy_allowed = regexp_allowed = false
108
+ state = state.next_state
109
+
110
+ when '\\'
111
+ if state.interpreted
112
+ if esc = scan(/ #{patterns::ESCAPE} /ox)
113
+ tokens << [match + esc, :char]
114
+ else
115
+ tokens << [match, :error]
116
+ end
117
+ else
118
+ case m = getch
119
+ when state.delim, '\\'
120
+ tokens << [match + m, :char]
121
+ when nil
122
+ tokens << [match, :error]
123
+ else
124
+ tokens << [match + m, :content]
125
+ end
126
+ end
127
+
128
+ when '#'
129
+ case peek(1)[0]
130
+ when ?{
131
+ inline_block_stack << [state, depth, heredocs]
132
+ fancy_allowed = regexp_allowed = true
133
+ state = :initial
134
+ depth = 1
135
+ tokens << [:open, :inline]
136
+ tokens << [match + getch, :delimiter]
137
+ when ?$, ?@
138
+ tokens << [match, :escape]
139
+ last_state = state # scan one token as normal code, then return here
140
+ state = :initial
141
+ else
142
+ raise_inspect 'else-case # reached; #%p not handled' % peek(1), tokens
143
+ end
144
+
145
+ when state.paren
146
+ state.paren_depth += 1
147
+ tokens << [match, :nesting_delimiter]
148
+
149
+ when /#{patterns::REGEXP_SYMBOLS}/ox
150
+ tokens << [match, :function]
151
+
152
+ else
153
+ raise_inspect 'else-case " reached; %p not handled, state = %p' % [match, state], tokens
154
+
155
+ end
156
+ next
157
+ # }}}
158
+ else
159
+ # {{{
160
+ if match = scan(/ [ \t\f]+ | \\? \n | \# .* /x) or
161
+ ( bol? and match = scan(/#{patterns::RUBYDOC_OR_DATA}/o) )
162
+ fancy_allowed = true
163
+ case m = match[0]
164
+ when ?\s, ?\t, ?\f
165
+ match << scan(/\s*/) unless eos? or heredocs
166
+ kind = :space
167
+ when ?\n, ?\\
168
+ kind = :space
169
+ if m == ?\n
170
+ regexp_allowed = true
171
+ state = :initial if state == :undef_comma_expected
172
+ end
173
+ if heredocs
174
+ unscan # heredoc scanning needs \n at start
175
+ state = heredocs.shift
176
+ tokens << [:open, state.type]
177
+ heredocs = nil if heredocs.empty?
178
+ next
179
+ else
180
+ match << scan(/\s*/) unless eos?
181
+ end
182
+ when ?#, ?=, ?_
183
+ kind = :comment
184
+ regexp_allowed = true
185
+ else
186
+ raise_inspect 'else-case _ reached, because case %p was not handled' % [matched[0].chr], tokens
187
+ end
188
+ tokens << [match, kind]
189
+ next
190
+
191
+ elsif state == :initial
192
+
193
+ # IDENTS #
194
+ if match = scan(/#{patterns::METHOD_NAME}/o)
195
+ if last_token_dot
196
+ kind = if match[/^[A-Z]/] and not match?(/\(/) then :constant else :ident end
197
+ else
198
+ kind = patterns::IDENT_KIND[match]
199
+ if kind == :ident and match[/^[A-Z]/] and not match[/[!?]$/] and not match?(/\(/)
200
+ kind = :constant
201
+ elsif kind == :reserved
202
+ state = patterns::DEF_NEW_STATE[match]
203
+ end
204
+ end
205
+ ## experimental!
206
+ fancy_allowed = regexp_allowed = :set if patterns::REGEXP_ALLOWED[match] or check(/\s+(?:%\S|\/\S)/)
207
+
208
+ # OPERATORS #
209
+ elsif (not last_token_dot and match = scan(/ ==?=? | \.\.?\.? | [\(\)\[\]\{\}] | :: | , /x)) or
210
+ (last_token_dot and match = scan(/#{patterns::METHOD_NAME_OPERATOR}/o))
211
+ if match !~ / [.\)\]\}] /x or match =~ /\.\.\.?/
212
+ regexp_allowed = fancy_allowed = :set
213
+ end
214
+ last_token_dot = :set if match == '.' or match == '::'
215
+ kind = :operator
216
+ unless inline_block_stack.empty?
217
+ case match
218
+ when '{'
219
+ depth += 1
220
+ when '}'
221
+ depth -= 1
222
+ if depth == 0 # closing brace of inline block reached
223
+ state, depth, heredocs = inline_block_stack.pop
224
+ tokens << [match, :delimiter]
225
+ kind = :inline
226
+ match = :close
227
+ end
228
+ end
229
+ end
230
+
231
+ elsif match = scan(/ ['"] /mx)
232
+ tokens << [:open, :string]
233
+ kind = :delimiter
234
+ state = patterns::StringState.new :string, match == '"', match # important for streaming
235
+
236
+ elsif match = scan(/#{patterns::INSTANCE_VARIABLE}/o)
237
+ kind = :instance_variable
238
+
239
+ elsif regexp_allowed and match = scan(/\//)
240
+ tokens << [:open, :regexp]
241
+ kind = :delimiter
242
+ interpreted = true
243
+ state = patterns::StringState.new :regexp, interpreted, match
244
+ if parse_regexp
245
+ tokens = []
246
+ saved_tokens = tokens
247
+ end
248
+
249
+ elsif match = scan(/#{patterns::NUMERIC}/o)
250
+ kind = if self[1] then :float else :integer end
251
+
252
+ elsif match = scan(/#{patterns::SYMBOL}/o)
253
+ case delim = match[1]
254
+ when ?', ?"
255
+ tokens << [:open, :symbol]
256
+ tokens << [':', :symbol]
257
+ match = delim.chr
258
+ kind = :delimiter
259
+ state = patterns::StringState.new :symbol, delim == ?", match
260
+ else
261
+ kind = :symbol
262
+ end
263
+
264
+ elsif match = scan(/ [-+!~^]=? | [*|&]{1,2}=? | >>? /x)
265
+ regexp_allowed = fancy_allowed = :set
266
+ kind = :operator
267
+
268
+ elsif fancy_allowed and match = scan(/#{patterns::HEREDOC_OPEN}/o)
269
+ indented = self[1] == '-'
270
+ quote = self[3]
271
+ delim = self[quote ? 4 : 2]
272
+ kind = patterns::QUOTE_TO_TYPE[quote]
273
+ tokens << [:open, kind]
274
+ tokens << [match, :delimiter]
275
+ match = :close
276
+ heredoc = patterns::StringState.new kind, quote != '\'', delim, (indented ? :indented : :linestart )
277
+ heredocs ||= [] # create heredocs if empty
278
+ heredocs << heredoc
279
+
280
+ elsif fancy_allowed and match = scan(/#{patterns::FANCY_START_SAVE}/o)
281
+ kind, interpreted = *patterns::FancyStringType.fetch(self[1]) do
282
+ raise_inspect 'Unknown fancy string: %%%p' % k, tokens
283
+ end
284
+ tokens << [:open, kind]
285
+ state = patterns::StringState.new kind, interpreted, self[2]
286
+ kind = :delimiter
287
+
288
+ elsif fancy_allowed and match = scan(/#{patterns::CHARACTER}/o)
289
+ kind = :integer
290
+
291
+ elsif match = scan(/ [\/%]=? | <(?:<|=>?)? | [?:;] /x)
292
+ regexp_allowed = fancy_allowed = :set
293
+ kind = :operator
294
+
295
+ elsif match = scan(/`/)
296
+ if last_token_dot
297
+ kind = :operator
298
+ else
299
+ tokens << [:open, :shell]
300
+ kind = :delimiter
301
+ state = patterns::StringState.new :shell, true, match
302
+ end
303
+
304
+ elsif match = scan(/#{patterns::GLOBAL_VARIABLE}/o)
305
+ kind = :global_variable
306
+
307
+ elsif match = scan(/#{patterns::CLASS_VARIABLE}/o)
308
+ kind = :class_variable
309
+
310
+ else
311
+ kind = :error
312
+ match = getch
313
+
314
+ end
315
+
316
+ elsif state == :def_expected
317
+ state = :initial
318
+ if match = scan(/(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o)
319
+ kind = :method
320
+ else
321
+ next
322
+ end
323
+
324
+ elsif state == :undef_expected
325
+ state = :undef_comma_expected
326
+ if match = scan(/#{patterns::METHOD_NAME_EX}/o)
327
+ kind = :method
328
+ elsif match = scan(/#{patterns::SYMBOL}/o)
329
+ case delim = match[1]
330
+ when ?', ?"
331
+ tokens << [:open, :symbol]
332
+ tokens << [':', :symbol]
333
+ match = delim.chr
334
+ kind = :delimiter
335
+ state = patterns::StringState.new :symbol, delim == ?", match
336
+ state.next_state = :undef_comma_expected
337
+ else
338
+ kind = :symbol
339
+ end
340
+ else
341
+ state = :initial
342
+ next
343
+ end
344
+
345
+ elsif state == :undef_comma_expected
346
+ if match = scan(/,/)
347
+ kind = :operator
348
+ state = :undef_expected
349
+ else
350
+ state = :initial
351
+ next
352
+ end
353
+
354
+ elsif state == :module_expected
355
+ if match = scan(/<</)
356
+ kind = :operator
357
+ else
358
+ state = :initial
359
+ if match = scan(/ (?:#{patterns::IDENT}::)* #{patterns::IDENT} /ox)
360
+ kind = :class
361
+ else
362
+ next
363
+ end
364
+ end
365
+
366
+ end
367
+ # }}}
368
+
369
+ regexp_allowed = regexp_allowed == :set
370
+ fancy_allowed = fancy_allowed == :set
371
+ last_token_dot = last_token_dot == :set
372
+
373
+ if $DEBUG and not kind
374
+ raise_inspect 'Error token %p in line %d' %
375
+ [[match, kind], line], tokens, state
376
+ end
377
+ raise_inspect 'Empty token', tokens unless match
378
+
379
+ tokens << [match, kind]
380
+
381
+ if last_state
382
+ state = last_state
383
+ last_state = nil
384
+ end
385
+ end
386
+ end
387
+
388
+ inline_block_stack << [state] if state.is_a? patterns::StringState
389
+ until inline_block_stack.empty?
390
+ this_block = inline_block_stack.pop
391
+ tokens << [:close, :inline] if this_block.size > 1
392
+ state = this_block.first
393
+ tokens << [:close, state.type]
394
+ end
395
+
396
+ tokens
397
+ end
398
+
399
+ end
400
+
401
+ end
402
+ end
403
+
404
+ # vim:fdm=marker