coderay 0.9.8 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (87) hide show
  1. data/{lib/README → README_INDEX.rdoc} +10 -21
  2. data/Rakefile +6 -6
  3. data/bin/coderay +193 -64
  4. data/lib/coderay.rb +61 -105
  5. data/lib/coderay/duo.rb +17 -21
  6. data/lib/coderay/encoder.rb +100 -112
  7. data/lib/coderay/encoders/_map.rb +12 -7
  8. data/lib/coderay/encoders/comment_filter.rb +12 -30
  9. data/lib/coderay/encoders/count.rb +29 -11
  10. data/lib/coderay/encoders/debug.rb +32 -20
  11. data/lib/coderay/encoders/div.rb +13 -9
  12. data/lib/coderay/encoders/filter.rb +34 -51
  13. data/lib/coderay/encoders/html.rb +155 -161
  14. data/lib/coderay/encoders/html/css.rb +4 -9
  15. data/lib/coderay/encoders/html/numbering.rb +115 -0
  16. data/lib/coderay/encoders/html/output.rb +22 -70
  17. data/lib/coderay/encoders/json.rb +59 -45
  18. data/lib/coderay/encoders/lines_of_code.rb +12 -57
  19. data/lib/coderay/encoders/null.rb +6 -14
  20. data/lib/coderay/encoders/page.rb +13 -9
  21. data/lib/coderay/encoders/span.rb +13 -9
  22. data/lib/coderay/encoders/statistic.rb +58 -39
  23. data/lib/coderay/encoders/terminal.rb +179 -0
  24. data/lib/coderay/encoders/text.rb +31 -17
  25. data/lib/coderay/encoders/token_kind_filter.rb +111 -0
  26. data/lib/coderay/encoders/xml.rb +19 -18
  27. data/lib/coderay/encoders/yaml.rb +37 -9
  28. data/lib/coderay/for_redcloth.rb +4 -4
  29. data/lib/coderay/helpers/file_type.rb +127 -246
  30. data/lib/coderay/helpers/gzip.rb +41 -0
  31. data/lib/coderay/helpers/plugin.rb +241 -306
  32. data/lib/coderay/helpers/word_list.rb +65 -126
  33. data/lib/coderay/scanner.rb +173 -156
  34. data/lib/coderay/scanners/_map.rb +18 -17
  35. data/lib/coderay/scanners/c.rb +63 -77
  36. data/lib/coderay/scanners/clojure.rb +217 -0
  37. data/lib/coderay/scanners/cpp.rb +71 -84
  38. data/lib/coderay/scanners/css.rb +103 -120
  39. data/lib/coderay/scanners/debug.rb +47 -44
  40. data/lib/coderay/scanners/delphi.rb +70 -76
  41. data/lib/coderay/scanners/diff.rb +141 -50
  42. data/lib/coderay/scanners/erb.rb +81 -0
  43. data/lib/coderay/scanners/groovy.rb +104 -113
  44. data/lib/coderay/scanners/haml.rb +168 -0
  45. data/lib/coderay/scanners/html.rb +181 -110
  46. data/lib/coderay/scanners/java.rb +73 -75
  47. data/lib/coderay/scanners/java/builtin_types.rb +2 -0
  48. data/lib/coderay/scanners/java_script.rb +90 -101
  49. data/lib/coderay/scanners/json.rb +40 -53
  50. data/lib/coderay/scanners/php.rb +123 -147
  51. data/lib/coderay/scanners/python.rb +93 -91
  52. data/lib/coderay/scanners/raydebug.rb +66 -0
  53. data/lib/coderay/scanners/ruby.rb +343 -326
  54. data/lib/coderay/scanners/ruby/patterns.rb +40 -106
  55. data/lib/coderay/scanners/ruby/string_state.rb +71 -0
  56. data/lib/coderay/scanners/sql.rb +80 -66
  57. data/lib/coderay/scanners/text.rb +26 -0
  58. data/lib/coderay/scanners/xml.rb +1 -1
  59. data/lib/coderay/scanners/yaml.rb +74 -73
  60. data/lib/coderay/style.rb +10 -7
  61. data/lib/coderay/styles/_map.rb +3 -3
  62. data/lib/coderay/styles/alpha.rb +143 -0
  63. data/lib/coderay/token_kinds.rb +90 -0
  64. data/lib/coderay/tokens.rb +102 -277
  65. data/lib/coderay/tokens_proxy.rb +55 -0
  66. data/lib/coderay/version.rb +3 -0
  67. data/test/functional/basic.rb +200 -18
  68. data/test/functional/examples.rb +130 -0
  69. data/test/functional/for_redcloth.rb +15 -8
  70. data/test/functional/suite.rb +9 -6
  71. metadata +103 -123
  72. data/FOLDERS +0 -53
  73. data/bin/coderay_stylesheet +0 -4
  74. data/lib/coderay/encoders/html/numerization.rb +0 -133
  75. data/lib/coderay/encoders/term.rb +0 -158
  76. data/lib/coderay/encoders/token_class_filter.rb +0 -84
  77. data/lib/coderay/helpers/gzip_simple.rb +0 -123
  78. data/lib/coderay/scanners/nitro_xhtml.rb +0 -136
  79. data/lib/coderay/scanners/plaintext.rb +0 -20
  80. data/lib/coderay/scanners/rhtml.rb +0 -78
  81. data/lib/coderay/scanners/scheme.rb +0 -145
  82. data/lib/coderay/styles/cycnus.rb +0 -152
  83. data/lib/coderay/styles/murphy.rb +0 -134
  84. data/lib/coderay/token_classes.rb +0 -86
  85. data/test/functional/load_plugin_scanner.rb +0 -11
  86. data/test/functional/vhdl.rb +0 -126
  87. data/test/functional/word_list.rb +0 -79
@@ -0,0 +1,66 @@
1
+ module CodeRay
2
+ module Scanners
3
+
4
+ # = Debug Scanner
5
+ #
6
+ # Parses the output of the Encoders::Debug encoder.
7
+ class Raydebug < Scanner
8
+
9
+ register_for :raydebug
10
+ file_extension 'raydebug'
11
+ title 'CodeRay Token Dump'
12
+
13
+ protected
14
+
15
+ def scan_tokens encoder, options
16
+
17
+ opened_tokens = []
18
+
19
+ until eos?
20
+
21
+ if match = scan(/\s+/)
22
+ encoder.text_token match, :space
23
+
24
+ elsif match = scan(/ (\w+) \( ( [^\)\\]* ( \\. [^\)\\]* )* ) /x)
25
+ kind = self[1]
26
+ encoder.text_token kind, :class
27
+ encoder.text_token '(', :operator
28
+ match = self[2]
29
+ encoder.text_token match, kind.to_sym
30
+ encoder.text_token match, :operator if match = scan(/\)/)
31
+
32
+ elsif match = scan(/ (\w+) ([<\[]) /x)
33
+ kind = self[1]
34
+ case self[2]
35
+ when '<'
36
+ encoder.text_token kind, :class
37
+ when '['
38
+ encoder.text_token kind, :class
39
+ else
40
+ raise 'CodeRay bug: This case should not be reached.'
41
+ end
42
+ kind = kind.to_sym
43
+ opened_tokens << kind
44
+ encoder.begin_group kind
45
+ encoder.text_token self[2], :operator
46
+
47
+ elsif !opened_tokens.empty? && match = scan(/ [>\]] /x)
48
+ encoder.text_token match, :operator
49
+ encoder.end_group opened_tokens.pop
50
+
51
+ else
52
+ encoder.text_token getch, :space
53
+
54
+ end
55
+
56
+ end
57
+
58
+ encoder.end_group opened_tokens.pop until opened_tokens.empty?
59
+
60
+ encoder
61
+ end
62
+
63
+ end
64
+
65
+ end
66
+ end
@@ -1,7 +1,6 @@
1
- # encoding: utf-8
2
1
  module CodeRay
3
2
  module Scanners
4
-
3
+
5
4
  # This scanner is really complex, since Ruby _is_ a complex language!
6
5
  #
7
6
  # It tries to highlight 100% of all common code,
@@ -9,310 +8,240 @@ module Scanners
9
8
  #
10
9
  # It is optimized for HTML highlighting, and is not very useful for
11
10
  # parsing or pretty printing.
12
- #
13
- # For now, I think it's better than the scanners in VIM or Syntax, or
14
- # any highlighter I was able to find, except Caleb's RubyLexer.
15
- #
16
- # I hope it's also better than the rdoc/irb lexer.
17
11
  class Ruby < Scanner
18
-
19
- include Streamable
20
-
12
+
21
13
  register_for :ruby
22
14
  file_extension 'rb'
23
-
24
- helper :patterns
25
15
 
26
- if not defined? EncodingError
27
- EncodingError = Class.new Exception
16
+ autoload :Patterns, 'coderay/scanners/ruby/patterns'
17
+ autoload :StringState, 'coderay/scanners/ruby/string_state'
18
+
19
+ def interpreted_string_state
20
+ StringState.new :string, true, '"'
28
21
  end
29
-
30
- private
31
- def scan_tokens tokens, options
32
- if string.respond_to?(:encoding)
33
- unless string.encoding == Encoding::UTF_8
34
- self.string = string.encode Encoding::UTF_8,
35
- :invalid => :replace, :undef => :replace, :replace => '?'
36
- end
37
- unicode = false
38
- else
39
- unicode = exist?(/[^\x00-\x7f]/)
22
+
23
+ protected
24
+
25
+ def setup
26
+ @state = :initial
27
+ end
28
+
29
+ def scan_tokens encoder, options
30
+ state, heredocs = options[:state] || @state
31
+ heredocs = heredocs.dup if heredocs.is_a?(Array)
32
+
33
+ if state && state.instance_of?(StringState)
34
+ encoder.begin_group state.type
40
35
  end
41
36
 
42
- last_token_dot = false
43
- value_expected = true
44
- heredocs = nil
45
37
  last_state = nil
46
- state = :initial
47
- depth = nil
48
- inline_block_stack = []
49
38
 
39
+ method_call_expected = false
40
+ value_expected = true
41
+
42
+ inline_block_stack = nil
43
+ inline_block_curly_depth = 0
44
+
45
+ if heredocs
46
+ state = heredocs.shift
47
+ encoder.begin_group state.type
48
+ heredocs = nil if heredocs.empty?
49
+ end
50
+
51
+ # def_object_stack = nil
52
+ # def_object_paren_depth = 0
50
53
 
51
54
  patterns = Patterns # avoid constant lookup
52
55
 
56
+ unicode = string.respond_to?(:encoding) && string.encoding.name == 'UTF-8'
57
+
53
58
  until eos?
54
- match = nil
55
- kind = nil
56
-
57
- if state.instance_of? patterns::StringState
58
- # {{{
59
- match = scan_until(state.pattern) || scan_rest
60
- tokens << [match, :content] unless match.empty?
61
- break if eos?
62
-
63
- if state.heredoc and self[1] # end of heredoc
64
- match = getch.to_s
65
- match << scan_until(/$/) unless eos?
66
- tokens << [match, :delimiter]
67
- tokens << [:close, state.type]
68
- state = state.next_state
69
- next
70
- end
71
-
72
- case match = getch
73
-
74
- when state.delim
75
- if state.paren
76
- state.paren_depth -= 1
77
- if state.paren_depth > 0
78
- tokens << [match, :nesting_delimiter]
79
- next
80
- end
81
- end
82
- tokens << [match, :delimiter]
83
- if state.type == :regexp and not eos?
84
- modifiers = scan(/#{patterns::REGEXP_MODIFIERS}/ox)
85
- tokens << [modifiers, :modifier] unless modifiers.empty?
86
- end
87
- tokens << [:close, state.type]
88
- value_expected = false
89
- state = state.next_state
90
-
91
- when '\\'
92
- if state.interpreted
93
- if esc = scan(/ #{patterns::ESCAPE} /ox)
94
- tokens << [match + esc, :char]
95
- else
96
- tokens << [match, :error]
97
- end
59
+
60
+ if state.instance_of? ::Symbol
61
+
62
+ if match = scan(/[ \t\f\v]+/)
63
+ encoder.text_token match, :space
64
+
65
+ elsif match = scan(/\n/)
66
+ if heredocs
67
+ unscan # heredoc scanning needs \n at start
68
+ state = heredocs.shift
69
+ encoder.begin_group state.type
70
+ heredocs = nil if heredocs.empty?
98
71
  else
99
- case m = getch
100
- when state.delim, '\\'
101
- tokens << [match + m, :char]
102
- when nil
103
- tokens << [match, :error]
104
- else
105
- tokens << [match + m, :content]
106
- end
107
- end
108
-
109
- when '#'
110
- case peek(1)
111
- when '{'
112
- inline_block_stack << [state, depth, heredocs]
72
+ state = :initial if state == :undef_comma_expected
73
+ encoder.text_token match, :space
113
74
  value_expected = true
114
- state = :initial
115
- depth = 1
116
- tokens << [:open, :inline]
117
- tokens << [match + getch, :inline_delimiter]
118
- when '$', '@'
119
- tokens << [match, :escape]
120
- last_state = state # scan one token as normal code, then return here
121
- state = :initial
122
- else
123
- raise_inspect 'else-case # reached; #%p not handled' % peek(1), tokens
124
75
  end
125
-
126
- when state.paren
127
- state.paren_depth += 1
128
- tokens << [match, :nesting_delimiter]
129
-
130
- when /#{patterns::REGEXP_SYMBOLS}/ox
131
- tokens << [match, :function]
132
-
133
- else
134
- raise_inspect 'else-case " reached; %p not handled, state = %p' % [match, state], tokens
135
-
136
- end
137
- next
138
- # }}}
139
- else
140
- # {{{
141
- if match = scan(/[ \t\f]+/)
142
- kind = :space
143
- match << scan(/\s*/) unless eos? || heredocs
144
- value_expected = true if match.index(?\n)
145
- tokens << [match, kind]
146
- next
147
76
 
148
- elsif match = scan(/\\?\n/)
149
- kind = :space
150
- if match == "\n"
151
- value_expected = true
152
- state = :initial if state == :undef_comma_expected
153
- end
77
+ elsif match = scan(bol? ? / \#(!)?.* | #{patterns::RUBYDOC_OR_DATA} /ox : /\#.*/)
78
+ encoder.text_token match, self[1] ? :doctype : :comment
79
+
80
+ elsif match = scan(/\\\n/)
154
81
  if heredocs
155
82
  unscan # heredoc scanning needs \n at start
83
+ encoder.text_token scan(/\\/), :space
156
84
  state = heredocs.shift
157
- tokens << [:open, state.type]
85
+ encoder.begin_group state.type
158
86
  heredocs = nil if heredocs.empty?
159
- next
160
87
  else
161
- match << scan(/\s*/) unless eos?
88
+ encoder.text_token match, :space
162
89
  end
163
- tokens << [match, kind]
164
- next
165
-
166
- elsif bol? && match = scan(/\#!.*/)
167
- tokens << [match, :doctype]
168
- next
169
90
 
170
- elsif match = scan(/\#.*/) or
171
- ( bol? and match = scan(/#{patterns::RUBYDOC_OR_DATA}/o) )
172
- kind = :comment
173
- tokens << [match, kind]
174
- next
175
-
176
91
  elsif state == :initial
177
-
92
+
178
93
  # IDENTS #
179
- if match = scan(unicode ? /#{patterns::METHOD_NAME}/uo :
94
+ if !method_call_expected &&
95
+ match = scan(unicode ? /#{patterns::METHOD_NAME}/uo :
180
96
  /#{patterns::METHOD_NAME}/o)
181
- if last_token_dot
182
- kind = if match[/^[A-Z]/] and not match?(/\(/) then :constant else :ident end
183
- else
184
- if value_expected != :expect_colon && scan(/:(?= )/)
185
- tokens << [match, :key]
186
- match = ':'
187
- kind = :operator
188
- else
189
- kind = patterns::IDENT_KIND[match]
190
- if kind == :ident
191
- if match[/\A[A-Z]/] and not match[/[!?]$/] and not match?(/\(/)
192
- kind = :constant
193
- end
194
- elsif kind == :reserved
195
- state = patterns::DEF_NEW_STATE[match]
196
- value_expected = :set if patterns::KEYWORDS_EXPECTING_VALUE[match]
197
- end
97
+ value_expected = false
98
+ kind = patterns::IDENT_KIND[match]
99
+ if kind == :ident
100
+ if match[/\A[A-Z]/] && !(match[/[!?]$/] || match?(/\(/))
101
+ kind = :constant
198
102
  end
103
+ elsif kind == :keyword
104
+ state = patterns::KEYWORD_NEW_STATE[match]
105
+ value_expected = true if patterns::KEYWORDS_EXPECTING_VALUE[match]
199
106
  end
200
- value_expected = :set if check(/#{patterns::VALUE_FOLLOWS}/o)
201
-
202
- elsif last_token_dot and match = scan(/#{patterns::METHOD_NAME_OPERATOR}|\(/o)
203
- kind = :ident
204
- value_expected = :set if check(unicode ? /#{patterns::VALUE_FOLLOWS}/uo :
205
- /#{patterns::VALUE_FOLLOWS}/o)
206
-
207
- # OPERATORS #
208
- elsif not last_token_dot and match = scan(/ \.\.\.? | (?:\.|::)() | [,\(\)\[\]\{\}] | ==?=? /x)
209
- if match !~ / [.\)\]\}] /x or match =~ /\.\.\.?/
210
- value_expected = :set
107
+ value_expected = true if !value_expected && check(/#{patterns::VALUE_FOLLOWS}/o)
108
+ encoder.text_token match, kind
109
+
110
+ elsif method_call_expected &&
111
+ match = scan(unicode ? /#{patterns::METHOD_AFTER_DOT}/uo :
112
+ /#{patterns::METHOD_AFTER_DOT}/o)
113
+ if method_call_expected == '::' && match[/\A[A-Z]/] && !match?(/\(/)
114
+ encoder.text_token match, :constant
115
+ else
116
+ encoder.text_token match, :ident
211
117
  end
212
- last_token_dot = :set if self[1]
213
- kind = :operator
214
- unless inline_block_stack.empty?
118
+ method_call_expected = false
119
+ value_expected = check(/#{patterns::VALUE_FOLLOWS}/o)
120
+
121
+ # OPERATORS #
122
+ elsif !method_call_expected && match = scan(/ (\.(?!\.)|::) | (?: \.\.\.? | ==?=? | [,\(\[\{] )() | [\)\]\}] /x)
123
+ method_call_expected = self[1]
124
+ value_expected = !method_call_expected && self[2]
125
+ if inline_block_stack
215
126
  case match
216
127
  when '{'
217
- depth += 1
128
+ inline_block_curly_depth += 1
218
129
  when '}'
219
- depth -= 1
220
- if depth == 0 # closing brace of inline block reached
221
- state, depth, heredocs = inline_block_stack.pop
130
+ inline_block_curly_depth -= 1
131
+ if inline_block_curly_depth == 0 # closing brace of inline block reached
132
+ state, inline_block_curly_depth, heredocs = inline_block_stack.pop
133
+ inline_block_stack = nil if inline_block_stack.empty?
222
134
  heredocs = nil if heredocs && heredocs.empty?
223
- tokens << [match, :inline_delimiter]
224
- kind = :inline
225
- match = :close
135
+ encoder.text_token match, :inline_delimiter
136
+ encoder.end_group :inline
137
+ next
226
138
  end
227
139
  end
228
140
  end
229
-
230
- elsif match = scan(/ ['"] /mx)
231
- tokens << [:open, :string]
232
- kind = :delimiter
233
- state = patterns::StringState.new :string, match == '"', match # important for streaming
234
-
235
- elsif match = scan(unicode ? /#{patterns::INSTANCE_VARIABLE}/uo :
236
- /#{patterns::INSTANCE_VARIABLE}/o)
237
- kind = :instance_variable
238
-
239
- elsif value_expected and match = scan(/\//)
240
- tokens << [:open, :regexp]
241
- kind = :delimiter
242
- interpreted = true
243
- state = patterns::StringState.new :regexp, interpreted, match
244
-
245
- # elsif match = scan(/[-+]?#{patterns::NUMERIC}/o)
246
- elsif match = value_expected ? scan(/[-+]?#{patterns::NUMERIC}/o) : scan(/#{patterns::NUMERIC}/o)
247
- kind = self[1] ? :float : :integer
248
-
141
+ encoder.text_token match, :operator
142
+
249
143
  elsif match = scan(unicode ? /#{patterns::SYMBOL}/uo :
250
144
  /#{patterns::SYMBOL}/o)
251
145
  case delim = match[1]
252
146
  when ?', ?"
253
- tokens << [:open, :symbol]
254
- tokens << [':', :symbol]
147
+ encoder.begin_group :symbol
148
+ encoder.text_token ':', :symbol
255
149
  match = delim.chr
256
- kind = :delimiter
257
- state = patterns::StringState.new :symbol, delim == ?", match
150
+ encoder.text_token match, :delimiter
151
+ state = self.class::StringState.new :symbol, delim == ?", match
152
+ else
153
+ encoder.text_token match, :symbol
154
+ value_expected = false
155
+ end
156
+
157
+ elsif match = scan(/ ' (?:(?>[^'\\]*) ')? | " (?:(?>[^"\\\#]*) ")? /mx)
158
+ encoder.begin_group :string
159
+ if match.size == 1
160
+ encoder.text_token match, :delimiter
161
+ state = self.class::StringState.new :string, match == '"', match # important for streaming
162
+ else
163
+ encoder.text_token match[0,1], :delimiter
164
+ encoder.text_token match[1..-2], :content if match.size > 2
165
+ encoder.text_token match[-1,1], :delimiter
166
+ encoder.end_group :string
167
+ value_expected = false
168
+ end
169
+
170
+ elsif match = scan(unicode ? /#{patterns::INSTANCE_VARIABLE}/uo :
171
+ /#{patterns::INSTANCE_VARIABLE}/o)
172
+ value_expected = false
173
+ encoder.text_token match, :instance_variable
174
+
175
+ elsif value_expected && match = scan(/\//)
176
+ encoder.begin_group :regexp
177
+ encoder.text_token match, :delimiter
178
+ state = self.class::StringState.new :regexp, true, '/'
179
+
180
+ elsif match = scan(value_expected ? /[-+]?#{patterns::NUMERIC}/o : /#{patterns::NUMERIC}/o)
181
+ if method_call_expected
182
+ encoder.text_token match, :error
183
+ method_call_expected = false
258
184
  else
259
- kind = :symbol
185
+ encoder.text_token match, self[1] ? :float : :integer # TODO: send :hex/:octal/:binary
260
186
  end
261
-
262
- elsif match = scan(/ -[>=]? | [+!~^]=? | [*|&]{1,2}=? | >>? /x)
263
- value_expected = :set
264
- kind = :operator
265
-
266
- elsif value_expected and match = scan(unicode ? /#{patterns::HEREDOC_OPEN}/uo :
267
- /#{patterns::HEREDOC_OPEN}/o)
268
- indented = self[1] == '-'
187
+ value_expected = false
188
+
189
+ elsif match = scan(/ [-+!~^\/]=? | [:;] | [*|&]{1,2}=? | >>? /x)
190
+ value_expected = true
191
+ encoder.text_token match, :operator
192
+
193
+ elsif value_expected && match = scan(/#{patterns::HEREDOC_OPEN}/o)
269
194
  quote = self[3]
270
195
  delim = self[quote ? 4 : 2]
271
196
  kind = patterns::QUOTE_TO_TYPE[quote]
272
- tokens << [:open, kind]
273
- tokens << [match, :delimiter]
274
- match = :close
275
- heredoc = patterns::StringState.new kind, quote != '\'', delim, (indented ? :indented : :linestart )
197
+ encoder.begin_group kind
198
+ encoder.text_token match, :delimiter
199
+ encoder.end_group kind
276
200
  heredocs ||= [] # create heredocs if empty
277
- heredocs << heredoc
278
-
279
- elsif value_expected and match = scan(/#{patterns::FANCY_START_CORRECT}/o)
280
- kind, interpreted = *patterns::FancyStringType.fetch(self[1]) do
281
- raise_inspect 'Unknown fancy string: %%%p' % k, tokens
282
- end
283
- tokens << [:open, kind]
284
- state = patterns::StringState.new kind, interpreted, self[2]
285
- kind = :delimiter
286
-
287
- elsif value_expected and match = scan(unicode ? /#{patterns::CHARACTER}/uo :
288
- /#{patterns::CHARACTER}/o)
289
- kind = :integer
290
-
291
- elsif match = scan(/ [\/%]=? | <(?:<|=>?)? | [?:;] /x)
292
- value_expected = :set
293
- kind = :operator
294
-
201
+ heredocs << self.class::StringState.new(kind, quote != "'", delim,
202
+ self[1] == '-' ? :indented : :linestart)
203
+ value_expected = false
204
+
205
+ elsif value_expected && match = scan(/#{patterns::FANCY_STRING_START}/o)
206
+ kind = patterns::FANCY_STRING_KIND[self[1]]
207
+ encoder.begin_group kind
208
+ state = self.class::StringState.new kind, patterns::FANCY_STRING_INTERPRETED[self[1]], self[2]
209
+ encoder.text_token match, :delimiter
210
+
211
+ elsif value_expected && match = scan(/#{patterns::CHARACTER}/o)
212
+ value_expected = false
213
+ encoder.text_token match, :integer
214
+
215
+ elsif match = scan(/ %=? | <(?:<|=>?)? | \? /x)
216
+ value_expected = true
217
+ encoder.text_token match, :operator
218
+
295
219
  elsif match = scan(/`/)
296
- if last_token_dot
297
- kind = :operator
298
- else
299
- tokens << [:open, :shell]
300
- kind = :delimiter
301
- state = patterns::StringState.new :shell, true, match
302
- end
303
-
220
+ encoder.begin_group :shell
221
+ encoder.text_token match, :delimiter
222
+ state = self.class::StringState.new :shell, true, match
223
+
304
224
  elsif match = scan(unicode ? /#{patterns::GLOBAL_VARIABLE}/uo :
305
225
  /#{patterns::GLOBAL_VARIABLE}/o)
306
- kind = :global_variable
307
-
226
+ encoder.text_token match, :global_variable
227
+ value_expected = false
228
+
308
229
  elsif match = scan(unicode ? /#{patterns::CLASS_VARIABLE}/uo :
309
230
  /#{patterns::CLASS_VARIABLE}/o)
310
- kind = :class_variable
311
-
231
+ encoder.text_token match, :class_variable
232
+ value_expected = false
233
+
234
+ elsif match = scan(/\\\z/)
235
+ encoder.text_token match, :space
236
+
312
237
  else
313
- if !unicode && !string.respond_to?(:encoding)
238
+ if method_call_expected
239
+ method_call_expected = false
240
+ next
241
+ end
242
+ unless unicode
314
243
  # check for unicode
315
- debug, $DEBUG = $DEBUG, false
244
+ $DEBUG_BEFORE, $DEBUG = $DEBUG, false
316
245
  begin
317
246
  if check(/./mu).size > 1
318
247
  # seems like we should try again with unicode
@@ -321,124 +250,212 @@ module Scanners
321
250
  rescue
322
251
  # bad unicode char; use getch
323
252
  ensure
324
- $DEBUG = debug
253
+ $DEBUG = $DEBUG_BEFORE
325
254
  end
326
255
  next if unicode
327
256
  end
328
- kind = :error
329
- match = scan(unicode ? /./mu : /./m)
330
-
257
+
258
+ encoder.text_token getch, :error
259
+
331
260
  end
332
-
333
- elsif state == :def_expected
334
- state = :initial
335
- if scan(/self\./)
336
- tokens << ['self', :pre_constant]
337
- tokens << ['.', :operator]
261
+
262
+ if last_state
263
+ state = last_state
264
+ last_state = nil
338
265
  end
266
+
267
+ elsif state == :def_expected
339
268
  if match = scan(unicode ? /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/uo :
340
269
  /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o)
341
- kind = :method
270
+ encoder.text_token match, :method
271
+ state = :initial
272
+ else
273
+ last_state = :dot_expected
274
+ state = :initial
275
+ end
276
+
277
+ elsif state == :dot_expected
278
+ if match = scan(/\.|::/)
279
+ # invalid definition
280
+ state = :def_expected
281
+ encoder.text_token match, :operator
342
282
  else
343
- next
283
+ state = :initial
344
284
  end
345
-
285
+
346
286
  elsif state == :module_expected
347
287
  if match = scan(/<</)
348
- kind = :operator
288
+ encoder.text_token match, :operator
349
289
  else
350
290
  state = :initial
351
- if match = scan(unicode ? /(?:#{patterns::IDENT}::)*#{patterns::IDENT}/uo :
352
- /(?:#{patterns::IDENT}::)*#{patterns::IDENT}/o)
353
- kind = :class
354
- else
355
- next
291
+ if match = scan(unicode ? / (?:#{patterns::IDENT}::)* #{patterns::IDENT} /oux :
292
+ / (?:#{patterns::IDENT}::)* #{patterns::IDENT} /ox)
293
+ encoder.text_token match, :class
356
294
  end
357
295
  end
358
-
296
+
359
297
  elsif state == :undef_expected
360
298
  state = :undef_comma_expected
361
- if match = scan(unicode ? /#{patterns::METHOD_NAME_EX}/uo :
362
- /#{patterns::METHOD_NAME_EX}/o)
363
- kind = :method
364
- elsif match = scan(unicode ? /#{patterns::SYMBOL}/uo :
365
- /#{patterns::SYMBOL}/o)
299
+ if match = scan(unicode ? /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/uo :
300
+ /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o)
301
+ encoder.text_token match, :method
302
+ elsif match = scan(/#{patterns::SYMBOL}/o)
366
303
  case delim = match[1]
367
304
  when ?', ?"
368
- tokens << [:open, :symbol]
369
- tokens << [':', :symbol]
305
+ encoder.begin_group :symbol
306
+ encoder.text_token ':', :symbol
370
307
  match = delim.chr
371
- kind = :delimiter
372
- state = patterns::StringState.new :symbol, delim == ?", match
308
+ encoder.text_token match, :delimiter
309
+ state = self.class::StringState.new :symbol, delim == ?", match
373
310
  state.next_state = :undef_comma_expected
374
311
  else
375
- kind = :symbol
312
+ encoder.text_token match, :symbol
376
313
  end
377
314
  else
378
315
  state = :initial
379
- next
380
316
  end
381
-
382
- elsif state == :alias_expected
383
- match = scan(unicode ? /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/uo :
384
- /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/o)
385
317
 
386
- if match
387
- tokens << [self[1], (self[1][0] == ?: ? :symbol : :method)]
388
- tokens << [self[2], :space]
389
- tokens << [self[3], (self[3][0] == ?: ? :symbol : :method)]
390
- end
391
- state = :initial
392
- next
393
-
394
318
  elsif state == :undef_comma_expected
395
319
  if match = scan(/,/)
396
- kind = :operator
320
+ encoder.text_token match, :operator
397
321
  state = :undef_expected
398
322
  else
399
323
  state = :initial
400
- next
401
324
  end
402
-
325
+
326
+ elsif state == :alias_expected
327
+ match = scan(unicode ? /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/uo :
328
+ /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/o)
329
+
330
+ if match
331
+ encoder.text_token self[1], (self[1][0] == ?: ? :symbol : :method)
332
+ encoder.text_token self[2], :space
333
+ encoder.text_token self[3], (self[3][0] == ?: ? :symbol : :method)
334
+ end
335
+ state = :initial
336
+
337
+ else
338
+ #:nocov:
339
+ raise_inspect 'Unknown state: %p' % [state], encoder
340
+ #:nocov:
403
341
  end
404
- # }}}
405
342
 
406
- unless kind == :error
407
- if value_expected = value_expected == :set
408
- value_expected = :expect_colon if match == '?' || match == 'when'
409
- end
410
- last_token_dot = last_token_dot == :set
343
+ else # StringState
344
+
345
+ match = scan_until(state.pattern) || scan_rest
346
+ unless match.empty?
347
+ encoder.text_token match, :content
348
+ break if eos?
411
349
  end
412
350
 
413
- if $CODERAY_DEBUG and not kind
414
- raise_inspect 'Error token %p in line %d' %
415
- [[match, kind], line], tokens, state
351
+ if state.heredoc && self[1] # end of heredoc
352
+ match = getch
353
+ match << scan_until(/$/) unless eos?
354
+ encoder.text_token match, :delimiter unless match.empty?
355
+ encoder.end_group state.type
356
+ state = state.next_state
357
+ next
416
358
  end
417
- raise_inspect 'Empty token', tokens unless match
418
-
419
- tokens << [match, kind]
420
-
421
- if last_state
422
- state = last_state
423
- last_state = nil
359
+
360
+ case match = getch
361
+
362
+ when state.delim
363
+ if state.paren_depth
364
+ state.paren_depth -= 1
365
+ if state.paren_depth > 0
366
+ encoder.text_token match, :content
367
+ next
368
+ end
369
+ end
370
+ encoder.text_token match, :delimiter
371
+ if state.type == :regexp && !eos?
372
+ match = scan(/#{patterns::REGEXP_MODIFIERS}/o)
373
+ encoder.text_token match, :modifier unless match.empty?
374
+ end
375
+ encoder.end_group state.type
376
+ value_expected = false
377
+ state = state.next_state
378
+
379
+ when '\\'
380
+ if state.interpreted
381
+ if esc = scan(/#{patterns::ESCAPE}/o)
382
+ encoder.text_token match + esc, :char
383
+ else
384
+ encoder.text_token match, :error
385
+ end
386
+ else
387
+ case esc = getch
388
+ when nil
389
+ encoder.text_token match, :content
390
+ when state.delim, '\\'
391
+ encoder.text_token match + esc, :char
392
+ else
393
+ encoder.text_token match + esc, :content
394
+ end
395
+ end
396
+
397
+ when '#'
398
+ case peek(1)
399
+ when '{'
400
+ inline_block_stack ||= []
401
+ inline_block_stack << [state, inline_block_curly_depth, heredocs]
402
+ value_expected = true
403
+ state = :initial
404
+ inline_block_curly_depth = 1
405
+ encoder.begin_group :inline
406
+ encoder.text_token match + getch, :inline_delimiter
407
+ when '$', '@'
408
+ encoder.text_token match, :escape
409
+ last_state = state
410
+ state = :initial
411
+ else
412
+ #:nocov:
413
+ raise_inspect 'else-case # reached; #%p not handled' % [peek(1)], encoder
414
+ #:nocov:
415
+ end
416
+
417
+ when state.opening_paren
418
+ state.paren_depth += 1
419
+ encoder.text_token match, :content
420
+
421
+ else
422
+ #:nocov
423
+ raise_inspect 'else-case " reached; %p not handled, state = %p' % [match, state], encoder
424
+ #:nocov:
425
+
424
426
  end
427
+
425
428
  end
429
+
430
+ end
431
+
432
+ # cleaning up
433
+ if state.is_a? StringState
434
+ encoder.end_group state.type
426
435
  end
427
-
428
- inline_block_stack << [state] if state.is_a? patterns::StringState
429
- until inline_block_stack.empty?
430
- this_block = inline_block_stack.pop
431
- tokens << [:close, :inline] if this_block.size > 1
432
- state = this_block.first
433
- tokens << [:close, state.type]
436
+
437
+ if options[:keep_state]
438
+ if state.is_a?(StringState) && state.heredoc
439
+ (heredocs ||= []).unshift state
440
+ state = :initial
441
+ elsif heredocs && heredocs.empty?
442
+ heredocs = nil
443
+ end
444
+ @state = state, heredocs
434
445
  end
435
-
436
- tokens
446
+
447
+ if inline_block_stack
448
+ until inline_block_stack.empty?
449
+ state, = *inline_block_stack.pop
450
+ encoder.end_group :inline
451
+ encoder.end_group state.type
452
+ end
453
+ end
454
+
455
+ encoder
437
456
  end
438
-
457
+
439
458
  end
440
-
459
+
441
460
  end
442
461
  end
443
-
444
- # vim:fdm=marker