coderay 0.9.8 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. data/{lib/README → README_INDEX.rdoc} +10 -21
  2. data/Rakefile +6 -6
  3. data/bin/coderay +193 -64
  4. data/lib/coderay.rb +61 -105
  5. data/lib/coderay/duo.rb +17 -21
  6. data/lib/coderay/encoder.rb +100 -112
  7. data/lib/coderay/encoders/_map.rb +12 -7
  8. data/lib/coderay/encoders/comment_filter.rb +12 -30
  9. data/lib/coderay/encoders/count.rb +29 -11
  10. data/lib/coderay/encoders/debug.rb +32 -20
  11. data/lib/coderay/encoders/div.rb +13 -9
  12. data/lib/coderay/encoders/filter.rb +34 -51
  13. data/lib/coderay/encoders/html.rb +155 -161
  14. data/lib/coderay/encoders/html/css.rb +4 -9
  15. data/lib/coderay/encoders/html/numbering.rb +115 -0
  16. data/lib/coderay/encoders/html/output.rb +22 -70
  17. data/lib/coderay/encoders/json.rb +59 -45
  18. data/lib/coderay/encoders/lines_of_code.rb +12 -57
  19. data/lib/coderay/encoders/null.rb +6 -14
  20. data/lib/coderay/encoders/page.rb +13 -9
  21. data/lib/coderay/encoders/span.rb +13 -9
  22. data/lib/coderay/encoders/statistic.rb +58 -39
  23. data/lib/coderay/encoders/terminal.rb +179 -0
  24. data/lib/coderay/encoders/text.rb +31 -17
  25. data/lib/coderay/encoders/token_kind_filter.rb +111 -0
  26. data/lib/coderay/encoders/xml.rb +19 -18
  27. data/lib/coderay/encoders/yaml.rb +37 -9
  28. data/lib/coderay/for_redcloth.rb +4 -4
  29. data/lib/coderay/helpers/file_type.rb +127 -246
  30. data/lib/coderay/helpers/gzip.rb +41 -0
  31. data/lib/coderay/helpers/plugin.rb +241 -306
  32. data/lib/coderay/helpers/word_list.rb +65 -126
  33. data/lib/coderay/scanner.rb +173 -156
  34. data/lib/coderay/scanners/_map.rb +18 -17
  35. data/lib/coderay/scanners/c.rb +63 -77
  36. data/lib/coderay/scanners/clojure.rb +217 -0
  37. data/lib/coderay/scanners/cpp.rb +71 -84
  38. data/lib/coderay/scanners/css.rb +103 -120
  39. data/lib/coderay/scanners/debug.rb +47 -44
  40. data/lib/coderay/scanners/delphi.rb +70 -76
  41. data/lib/coderay/scanners/diff.rb +141 -50
  42. data/lib/coderay/scanners/erb.rb +81 -0
  43. data/lib/coderay/scanners/groovy.rb +104 -113
  44. data/lib/coderay/scanners/haml.rb +168 -0
  45. data/lib/coderay/scanners/html.rb +181 -110
  46. data/lib/coderay/scanners/java.rb +73 -75
  47. data/lib/coderay/scanners/java/builtin_types.rb +2 -0
  48. data/lib/coderay/scanners/java_script.rb +90 -101
  49. data/lib/coderay/scanners/json.rb +40 -53
  50. data/lib/coderay/scanners/php.rb +123 -147
  51. data/lib/coderay/scanners/python.rb +93 -91
  52. data/lib/coderay/scanners/raydebug.rb +66 -0
  53. data/lib/coderay/scanners/ruby.rb +343 -326
  54. data/lib/coderay/scanners/ruby/patterns.rb +40 -106
  55. data/lib/coderay/scanners/ruby/string_state.rb +71 -0
  56. data/lib/coderay/scanners/sql.rb +80 -66
  57. data/lib/coderay/scanners/text.rb +26 -0
  58. data/lib/coderay/scanners/xml.rb +1 -1
  59. data/lib/coderay/scanners/yaml.rb +74 -73
  60. data/lib/coderay/style.rb +10 -7
  61. data/lib/coderay/styles/_map.rb +3 -3
  62. data/lib/coderay/styles/alpha.rb +143 -0
  63. data/lib/coderay/token_kinds.rb +90 -0
  64. data/lib/coderay/tokens.rb +102 -277
  65. data/lib/coderay/tokens_proxy.rb +55 -0
  66. data/lib/coderay/version.rb +3 -0
  67. data/test/functional/basic.rb +200 -18
  68. data/test/functional/examples.rb +130 -0
  69. data/test/functional/for_redcloth.rb +15 -8
  70. data/test/functional/suite.rb +9 -6
  71. metadata +103 -123
  72. data/FOLDERS +0 -53
  73. data/bin/coderay_stylesheet +0 -4
  74. data/lib/coderay/encoders/html/numerization.rb +0 -133
  75. data/lib/coderay/encoders/term.rb +0 -158
  76. data/lib/coderay/encoders/token_class_filter.rb +0 -84
  77. data/lib/coderay/helpers/gzip_simple.rb +0 -123
  78. data/lib/coderay/scanners/nitro_xhtml.rb +0 -136
  79. data/lib/coderay/scanners/plaintext.rb +0 -20
  80. data/lib/coderay/scanners/rhtml.rb +0 -78
  81. data/lib/coderay/scanners/scheme.rb +0 -145
  82. data/lib/coderay/styles/cycnus.rb +0 -152
  83. data/lib/coderay/styles/murphy.rb +0 -134
  84. data/lib/coderay/token_classes.rb +0 -86
  85. data/test/functional/load_plugin_scanner.rb +0 -11
  86. data/test/functional/vhdl.rb +0 -126
  87. data/test/functional/word_list.rb +0 -79
@@ -0,0 +1,66 @@
1
+ module CodeRay
2
+ module Scanners
3
+
4
+ # = Debug Scanner
5
+ #
6
+ # Parses the output of the Encoders::Debug encoder.
7
+ class Raydebug < Scanner
8
+
9
+ register_for :raydebug
10
+ file_extension 'raydebug'
11
+ title 'CodeRay Token Dump'
12
+
13
+ protected
14
+
15
+ def scan_tokens encoder, options
16
+
17
+ opened_tokens = []
18
+
19
+ until eos?
20
+
21
+ if match = scan(/\s+/)
22
+ encoder.text_token match, :space
23
+
24
+ elsif match = scan(/ (\w+) \( ( [^\)\\]* ( \\. [^\)\\]* )* ) /x)
25
+ kind = self[1]
26
+ encoder.text_token kind, :class
27
+ encoder.text_token '(', :operator
28
+ match = self[2]
29
+ encoder.text_token match, kind.to_sym
30
+ encoder.text_token match, :operator if match = scan(/\)/)
31
+
32
+ elsif match = scan(/ (\w+) ([<\[]) /x)
33
+ kind = self[1]
34
+ case self[2]
35
+ when '<'
36
+ encoder.text_token kind, :class
37
+ when '['
38
+ encoder.text_token kind, :class
39
+ else
40
+ raise 'CodeRay bug: This case should not be reached.'
41
+ end
42
+ kind = kind.to_sym
43
+ opened_tokens << kind
44
+ encoder.begin_group kind
45
+ encoder.text_token self[2], :operator
46
+
47
+ elsif !opened_tokens.empty? && match = scan(/ [>\]] /x)
48
+ encoder.text_token match, :operator
49
+ encoder.end_group opened_tokens.pop
50
+
51
+ else
52
+ encoder.text_token getch, :space
53
+
54
+ end
55
+
56
+ end
57
+
58
+ encoder.end_group opened_tokens.pop until opened_tokens.empty?
59
+
60
+ encoder
61
+ end
62
+
63
+ end
64
+
65
+ end
66
+ end
@@ -1,7 +1,6 @@
1
- # encoding: utf-8
2
1
  module CodeRay
3
2
  module Scanners
4
-
3
+
5
4
  # This scanner is really complex, since Ruby _is_ a complex language!
6
5
  #
7
6
  # It tries to highlight 100% of all common code,
@@ -9,310 +8,240 @@ module Scanners
9
8
  #
10
9
  # It is optimized for HTML highlighting, and is not very useful for
11
10
  # parsing or pretty printing.
12
- #
13
- # For now, I think it's better than the scanners in VIM or Syntax, or
14
- # any highlighter I was able to find, except Caleb's RubyLexer.
15
- #
16
- # I hope it's also better than the rdoc/irb lexer.
17
11
  class Ruby < Scanner
18
-
19
- include Streamable
20
-
12
+
21
13
  register_for :ruby
22
14
  file_extension 'rb'
23
-
24
- helper :patterns
25
15
 
26
- if not defined? EncodingError
27
- EncodingError = Class.new Exception
16
+ autoload :Patterns, 'coderay/scanners/ruby/patterns'
17
+ autoload :StringState, 'coderay/scanners/ruby/string_state'
18
+
19
+ def interpreted_string_state
20
+ StringState.new :string, true, '"'
28
21
  end
29
-
30
- private
31
- def scan_tokens tokens, options
32
- if string.respond_to?(:encoding)
33
- unless string.encoding == Encoding::UTF_8
34
- self.string = string.encode Encoding::UTF_8,
35
- :invalid => :replace, :undef => :replace, :replace => '?'
36
- end
37
- unicode = false
38
- else
39
- unicode = exist?(/[^\x00-\x7f]/)
22
+
23
+ protected
24
+
25
+ def setup
26
+ @state = :initial
27
+ end
28
+
29
+ def scan_tokens encoder, options
30
+ state, heredocs = options[:state] || @state
31
+ heredocs = heredocs.dup if heredocs.is_a?(Array)
32
+
33
+ if state && state.instance_of?(StringState)
34
+ encoder.begin_group state.type
40
35
  end
41
36
 
42
- last_token_dot = false
43
- value_expected = true
44
- heredocs = nil
45
37
  last_state = nil
46
- state = :initial
47
- depth = nil
48
- inline_block_stack = []
49
38
 
39
+ method_call_expected = false
40
+ value_expected = true
41
+
42
+ inline_block_stack = nil
43
+ inline_block_curly_depth = 0
44
+
45
+ if heredocs
46
+ state = heredocs.shift
47
+ encoder.begin_group state.type
48
+ heredocs = nil if heredocs.empty?
49
+ end
50
+
51
+ # def_object_stack = nil
52
+ # def_object_paren_depth = 0
50
53
 
51
54
  patterns = Patterns # avoid constant lookup
52
55
 
56
+ unicode = string.respond_to?(:encoding) && string.encoding.name == 'UTF-8'
57
+
53
58
  until eos?
54
- match = nil
55
- kind = nil
56
-
57
- if state.instance_of? patterns::StringState
58
- # {{{
59
- match = scan_until(state.pattern) || scan_rest
60
- tokens << [match, :content] unless match.empty?
61
- break if eos?
62
-
63
- if state.heredoc and self[1] # end of heredoc
64
- match = getch.to_s
65
- match << scan_until(/$/) unless eos?
66
- tokens << [match, :delimiter]
67
- tokens << [:close, state.type]
68
- state = state.next_state
69
- next
70
- end
71
-
72
- case match = getch
73
-
74
- when state.delim
75
- if state.paren
76
- state.paren_depth -= 1
77
- if state.paren_depth > 0
78
- tokens << [match, :nesting_delimiter]
79
- next
80
- end
81
- end
82
- tokens << [match, :delimiter]
83
- if state.type == :regexp and not eos?
84
- modifiers = scan(/#{patterns::REGEXP_MODIFIERS}/ox)
85
- tokens << [modifiers, :modifier] unless modifiers.empty?
86
- end
87
- tokens << [:close, state.type]
88
- value_expected = false
89
- state = state.next_state
90
-
91
- when '\\'
92
- if state.interpreted
93
- if esc = scan(/ #{patterns::ESCAPE} /ox)
94
- tokens << [match + esc, :char]
95
- else
96
- tokens << [match, :error]
97
- end
59
+
60
+ if state.instance_of? ::Symbol
61
+
62
+ if match = scan(/[ \t\f\v]+/)
63
+ encoder.text_token match, :space
64
+
65
+ elsif match = scan(/\n/)
66
+ if heredocs
67
+ unscan # heredoc scanning needs \n at start
68
+ state = heredocs.shift
69
+ encoder.begin_group state.type
70
+ heredocs = nil if heredocs.empty?
98
71
  else
99
- case m = getch
100
- when state.delim, '\\'
101
- tokens << [match + m, :char]
102
- when nil
103
- tokens << [match, :error]
104
- else
105
- tokens << [match + m, :content]
106
- end
107
- end
108
-
109
- when '#'
110
- case peek(1)
111
- when '{'
112
- inline_block_stack << [state, depth, heredocs]
72
+ state = :initial if state == :undef_comma_expected
73
+ encoder.text_token match, :space
113
74
  value_expected = true
114
- state = :initial
115
- depth = 1
116
- tokens << [:open, :inline]
117
- tokens << [match + getch, :inline_delimiter]
118
- when '$', '@'
119
- tokens << [match, :escape]
120
- last_state = state # scan one token as normal code, then return here
121
- state = :initial
122
- else
123
- raise_inspect 'else-case # reached; #%p not handled' % peek(1), tokens
124
75
  end
125
-
126
- when state.paren
127
- state.paren_depth += 1
128
- tokens << [match, :nesting_delimiter]
129
-
130
- when /#{patterns::REGEXP_SYMBOLS}/ox
131
- tokens << [match, :function]
132
-
133
- else
134
- raise_inspect 'else-case " reached; %p not handled, state = %p' % [match, state], tokens
135
-
136
- end
137
- next
138
- # }}}
139
- else
140
- # {{{
141
- if match = scan(/[ \t\f]+/)
142
- kind = :space
143
- match << scan(/\s*/) unless eos? || heredocs
144
- value_expected = true if match.index(?\n)
145
- tokens << [match, kind]
146
- next
147
76
 
148
- elsif match = scan(/\\?\n/)
149
- kind = :space
150
- if match == "\n"
151
- value_expected = true
152
- state = :initial if state == :undef_comma_expected
153
- end
77
+ elsif match = scan(bol? ? / \#(!)?.* | #{patterns::RUBYDOC_OR_DATA} /ox : /\#.*/)
78
+ encoder.text_token match, self[1] ? :doctype : :comment
79
+
80
+ elsif match = scan(/\\\n/)
154
81
  if heredocs
155
82
  unscan # heredoc scanning needs \n at start
83
+ encoder.text_token scan(/\\/), :space
156
84
  state = heredocs.shift
157
- tokens << [:open, state.type]
85
+ encoder.begin_group state.type
158
86
  heredocs = nil if heredocs.empty?
159
- next
160
87
  else
161
- match << scan(/\s*/) unless eos?
88
+ encoder.text_token match, :space
162
89
  end
163
- tokens << [match, kind]
164
- next
165
-
166
- elsif bol? && match = scan(/\#!.*/)
167
- tokens << [match, :doctype]
168
- next
169
90
 
170
- elsif match = scan(/\#.*/) or
171
- ( bol? and match = scan(/#{patterns::RUBYDOC_OR_DATA}/o) )
172
- kind = :comment
173
- tokens << [match, kind]
174
- next
175
-
176
91
  elsif state == :initial
177
-
92
+
178
93
  # IDENTS #
179
- if match = scan(unicode ? /#{patterns::METHOD_NAME}/uo :
94
+ if !method_call_expected &&
95
+ match = scan(unicode ? /#{patterns::METHOD_NAME}/uo :
180
96
  /#{patterns::METHOD_NAME}/o)
181
- if last_token_dot
182
- kind = if match[/^[A-Z]/] and not match?(/\(/) then :constant else :ident end
183
- else
184
- if value_expected != :expect_colon && scan(/:(?= )/)
185
- tokens << [match, :key]
186
- match = ':'
187
- kind = :operator
188
- else
189
- kind = patterns::IDENT_KIND[match]
190
- if kind == :ident
191
- if match[/\A[A-Z]/] and not match[/[!?]$/] and not match?(/\(/)
192
- kind = :constant
193
- end
194
- elsif kind == :reserved
195
- state = patterns::DEF_NEW_STATE[match]
196
- value_expected = :set if patterns::KEYWORDS_EXPECTING_VALUE[match]
197
- end
97
+ value_expected = false
98
+ kind = patterns::IDENT_KIND[match]
99
+ if kind == :ident
100
+ if match[/\A[A-Z]/] && !(match[/[!?]$/] || match?(/\(/))
101
+ kind = :constant
198
102
  end
103
+ elsif kind == :keyword
104
+ state = patterns::KEYWORD_NEW_STATE[match]
105
+ value_expected = true if patterns::KEYWORDS_EXPECTING_VALUE[match]
199
106
  end
200
- value_expected = :set if check(/#{patterns::VALUE_FOLLOWS}/o)
201
-
202
- elsif last_token_dot and match = scan(/#{patterns::METHOD_NAME_OPERATOR}|\(/o)
203
- kind = :ident
204
- value_expected = :set if check(unicode ? /#{patterns::VALUE_FOLLOWS}/uo :
205
- /#{patterns::VALUE_FOLLOWS}/o)
206
-
207
- # OPERATORS #
208
- elsif not last_token_dot and match = scan(/ \.\.\.? | (?:\.|::)() | [,\(\)\[\]\{\}] | ==?=? /x)
209
- if match !~ / [.\)\]\}] /x or match =~ /\.\.\.?/
210
- value_expected = :set
107
+ value_expected = true if !value_expected && check(/#{patterns::VALUE_FOLLOWS}/o)
108
+ encoder.text_token match, kind
109
+
110
+ elsif method_call_expected &&
111
+ match = scan(unicode ? /#{patterns::METHOD_AFTER_DOT}/uo :
112
+ /#{patterns::METHOD_AFTER_DOT}/o)
113
+ if method_call_expected == '::' && match[/\A[A-Z]/] && !match?(/\(/)
114
+ encoder.text_token match, :constant
115
+ else
116
+ encoder.text_token match, :ident
211
117
  end
212
- last_token_dot = :set if self[1]
213
- kind = :operator
214
- unless inline_block_stack.empty?
118
+ method_call_expected = false
119
+ value_expected = check(/#{patterns::VALUE_FOLLOWS}/o)
120
+
121
+ # OPERATORS #
122
+ elsif !method_call_expected && match = scan(/ (\.(?!\.)|::) | (?: \.\.\.? | ==?=? | [,\(\[\{] )() | [\)\]\}] /x)
123
+ method_call_expected = self[1]
124
+ value_expected = !method_call_expected && self[2]
125
+ if inline_block_stack
215
126
  case match
216
127
  when '{'
217
- depth += 1
128
+ inline_block_curly_depth += 1
218
129
  when '}'
219
- depth -= 1
220
- if depth == 0 # closing brace of inline block reached
221
- state, depth, heredocs = inline_block_stack.pop
130
+ inline_block_curly_depth -= 1
131
+ if inline_block_curly_depth == 0 # closing brace of inline block reached
132
+ state, inline_block_curly_depth, heredocs = inline_block_stack.pop
133
+ inline_block_stack = nil if inline_block_stack.empty?
222
134
  heredocs = nil if heredocs && heredocs.empty?
223
- tokens << [match, :inline_delimiter]
224
- kind = :inline
225
- match = :close
135
+ encoder.text_token match, :inline_delimiter
136
+ encoder.end_group :inline
137
+ next
226
138
  end
227
139
  end
228
140
  end
229
-
230
- elsif match = scan(/ ['"] /mx)
231
- tokens << [:open, :string]
232
- kind = :delimiter
233
- state = patterns::StringState.new :string, match == '"', match # important for streaming
234
-
235
- elsif match = scan(unicode ? /#{patterns::INSTANCE_VARIABLE}/uo :
236
- /#{patterns::INSTANCE_VARIABLE}/o)
237
- kind = :instance_variable
238
-
239
- elsif value_expected and match = scan(/\//)
240
- tokens << [:open, :regexp]
241
- kind = :delimiter
242
- interpreted = true
243
- state = patterns::StringState.new :regexp, interpreted, match
244
-
245
- # elsif match = scan(/[-+]?#{patterns::NUMERIC}/o)
246
- elsif match = value_expected ? scan(/[-+]?#{patterns::NUMERIC}/o) : scan(/#{patterns::NUMERIC}/o)
247
- kind = self[1] ? :float : :integer
248
-
141
+ encoder.text_token match, :operator
142
+
249
143
  elsif match = scan(unicode ? /#{patterns::SYMBOL}/uo :
250
144
  /#{patterns::SYMBOL}/o)
251
145
  case delim = match[1]
252
146
  when ?', ?"
253
- tokens << [:open, :symbol]
254
- tokens << [':', :symbol]
147
+ encoder.begin_group :symbol
148
+ encoder.text_token ':', :symbol
255
149
  match = delim.chr
256
- kind = :delimiter
257
- state = patterns::StringState.new :symbol, delim == ?", match
150
+ encoder.text_token match, :delimiter
151
+ state = self.class::StringState.new :symbol, delim == ?", match
152
+ else
153
+ encoder.text_token match, :symbol
154
+ value_expected = false
155
+ end
156
+
157
+ elsif match = scan(/ ' (?:(?>[^'\\]*) ')? | " (?:(?>[^"\\\#]*) ")? /mx)
158
+ encoder.begin_group :string
159
+ if match.size == 1
160
+ encoder.text_token match, :delimiter
161
+ state = self.class::StringState.new :string, match == '"', match # important for streaming
162
+ else
163
+ encoder.text_token match[0,1], :delimiter
164
+ encoder.text_token match[1..-2], :content if match.size > 2
165
+ encoder.text_token match[-1,1], :delimiter
166
+ encoder.end_group :string
167
+ value_expected = false
168
+ end
169
+
170
+ elsif match = scan(unicode ? /#{patterns::INSTANCE_VARIABLE}/uo :
171
+ /#{patterns::INSTANCE_VARIABLE}/o)
172
+ value_expected = false
173
+ encoder.text_token match, :instance_variable
174
+
175
+ elsif value_expected && match = scan(/\//)
176
+ encoder.begin_group :regexp
177
+ encoder.text_token match, :delimiter
178
+ state = self.class::StringState.new :regexp, true, '/'
179
+
180
+ elsif match = scan(value_expected ? /[-+]?#{patterns::NUMERIC}/o : /#{patterns::NUMERIC}/o)
181
+ if method_call_expected
182
+ encoder.text_token match, :error
183
+ method_call_expected = false
258
184
  else
259
- kind = :symbol
185
+ encoder.text_token match, self[1] ? :float : :integer # TODO: send :hex/:octal/:binary
260
186
  end
261
-
262
- elsif match = scan(/ -[>=]? | [+!~^]=? | [*|&]{1,2}=? | >>? /x)
263
- value_expected = :set
264
- kind = :operator
265
-
266
- elsif value_expected and match = scan(unicode ? /#{patterns::HEREDOC_OPEN}/uo :
267
- /#{patterns::HEREDOC_OPEN}/o)
268
- indented = self[1] == '-'
187
+ value_expected = false
188
+
189
+ elsif match = scan(/ [-+!~^\/]=? | [:;] | [*|&]{1,2}=? | >>? /x)
190
+ value_expected = true
191
+ encoder.text_token match, :operator
192
+
193
+ elsif value_expected && match = scan(/#{patterns::HEREDOC_OPEN}/o)
269
194
  quote = self[3]
270
195
  delim = self[quote ? 4 : 2]
271
196
  kind = patterns::QUOTE_TO_TYPE[quote]
272
- tokens << [:open, kind]
273
- tokens << [match, :delimiter]
274
- match = :close
275
- heredoc = patterns::StringState.new kind, quote != '\'', delim, (indented ? :indented : :linestart )
197
+ encoder.begin_group kind
198
+ encoder.text_token match, :delimiter
199
+ encoder.end_group kind
276
200
  heredocs ||= [] # create heredocs if empty
277
- heredocs << heredoc
278
-
279
- elsif value_expected and match = scan(/#{patterns::FANCY_START_CORRECT}/o)
280
- kind, interpreted = *patterns::FancyStringType.fetch(self[1]) do
281
- raise_inspect 'Unknown fancy string: %%%p' % k, tokens
282
- end
283
- tokens << [:open, kind]
284
- state = patterns::StringState.new kind, interpreted, self[2]
285
- kind = :delimiter
286
-
287
- elsif value_expected and match = scan(unicode ? /#{patterns::CHARACTER}/uo :
288
- /#{patterns::CHARACTER}/o)
289
- kind = :integer
290
-
291
- elsif match = scan(/ [\/%]=? | <(?:<|=>?)? | [?:;] /x)
292
- value_expected = :set
293
- kind = :operator
294
-
201
+ heredocs << self.class::StringState.new(kind, quote != "'", delim,
202
+ self[1] == '-' ? :indented : :linestart)
203
+ value_expected = false
204
+
205
+ elsif value_expected && match = scan(/#{patterns::FANCY_STRING_START}/o)
206
+ kind = patterns::FANCY_STRING_KIND[self[1]]
207
+ encoder.begin_group kind
208
+ state = self.class::StringState.new kind, patterns::FANCY_STRING_INTERPRETED[self[1]], self[2]
209
+ encoder.text_token match, :delimiter
210
+
211
+ elsif value_expected && match = scan(/#{patterns::CHARACTER}/o)
212
+ value_expected = false
213
+ encoder.text_token match, :integer
214
+
215
+ elsif match = scan(/ %=? | <(?:<|=>?)? | \? /x)
216
+ value_expected = true
217
+ encoder.text_token match, :operator
218
+
295
219
  elsif match = scan(/`/)
296
- if last_token_dot
297
- kind = :operator
298
- else
299
- tokens << [:open, :shell]
300
- kind = :delimiter
301
- state = patterns::StringState.new :shell, true, match
302
- end
303
-
220
+ encoder.begin_group :shell
221
+ encoder.text_token match, :delimiter
222
+ state = self.class::StringState.new :shell, true, match
223
+
304
224
  elsif match = scan(unicode ? /#{patterns::GLOBAL_VARIABLE}/uo :
305
225
  /#{patterns::GLOBAL_VARIABLE}/o)
306
- kind = :global_variable
307
-
226
+ encoder.text_token match, :global_variable
227
+ value_expected = false
228
+
308
229
  elsif match = scan(unicode ? /#{patterns::CLASS_VARIABLE}/uo :
309
230
  /#{patterns::CLASS_VARIABLE}/o)
310
- kind = :class_variable
311
-
231
+ encoder.text_token match, :class_variable
232
+ value_expected = false
233
+
234
+ elsif match = scan(/\\\z/)
235
+ encoder.text_token match, :space
236
+
312
237
  else
313
- if !unicode && !string.respond_to?(:encoding)
238
+ if method_call_expected
239
+ method_call_expected = false
240
+ next
241
+ end
242
+ unless unicode
314
243
  # check for unicode
315
- debug, $DEBUG = $DEBUG, false
244
+ $DEBUG_BEFORE, $DEBUG = $DEBUG, false
316
245
  begin
317
246
  if check(/./mu).size > 1
318
247
  # seems like we should try again with unicode
@@ -321,124 +250,212 @@ module Scanners
321
250
  rescue
322
251
  # bad unicode char; use getch
323
252
  ensure
324
- $DEBUG = debug
253
+ $DEBUG = $DEBUG_BEFORE
325
254
  end
326
255
  next if unicode
327
256
  end
328
- kind = :error
329
- match = scan(unicode ? /./mu : /./m)
330
-
257
+
258
+ encoder.text_token getch, :error
259
+
331
260
  end
332
-
333
- elsif state == :def_expected
334
- state = :initial
335
- if scan(/self\./)
336
- tokens << ['self', :pre_constant]
337
- tokens << ['.', :operator]
261
+
262
+ if last_state
263
+ state = last_state
264
+ last_state = nil
338
265
  end
266
+
267
+ elsif state == :def_expected
339
268
  if match = scan(unicode ? /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/uo :
340
269
  /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o)
341
- kind = :method
270
+ encoder.text_token match, :method
271
+ state = :initial
272
+ else
273
+ last_state = :dot_expected
274
+ state = :initial
275
+ end
276
+
277
+ elsif state == :dot_expected
278
+ if match = scan(/\.|::/)
279
+ # invalid definition
280
+ state = :def_expected
281
+ encoder.text_token match, :operator
342
282
  else
343
- next
283
+ state = :initial
344
284
  end
345
-
285
+
346
286
  elsif state == :module_expected
347
287
  if match = scan(/<</)
348
- kind = :operator
288
+ encoder.text_token match, :operator
349
289
  else
350
290
  state = :initial
351
- if match = scan(unicode ? /(?:#{patterns::IDENT}::)*#{patterns::IDENT}/uo :
352
- /(?:#{patterns::IDENT}::)*#{patterns::IDENT}/o)
353
- kind = :class
354
- else
355
- next
291
+ if match = scan(unicode ? / (?:#{patterns::IDENT}::)* #{patterns::IDENT} /oux :
292
+ / (?:#{patterns::IDENT}::)* #{patterns::IDENT} /ox)
293
+ encoder.text_token match, :class
356
294
  end
357
295
  end
358
-
296
+
359
297
  elsif state == :undef_expected
360
298
  state = :undef_comma_expected
361
- if match = scan(unicode ? /#{patterns::METHOD_NAME_EX}/uo :
362
- /#{patterns::METHOD_NAME_EX}/o)
363
- kind = :method
364
- elsif match = scan(unicode ? /#{patterns::SYMBOL}/uo :
365
- /#{patterns::SYMBOL}/o)
299
+ if match = scan(unicode ? /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/uo :
300
+ /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o)
301
+ encoder.text_token match, :method
302
+ elsif match = scan(/#{patterns::SYMBOL}/o)
366
303
  case delim = match[1]
367
304
  when ?', ?"
368
- tokens << [:open, :symbol]
369
- tokens << [':', :symbol]
305
+ encoder.begin_group :symbol
306
+ encoder.text_token ':', :symbol
370
307
  match = delim.chr
371
- kind = :delimiter
372
- state = patterns::StringState.new :symbol, delim == ?", match
308
+ encoder.text_token match, :delimiter
309
+ state = self.class::StringState.new :symbol, delim == ?", match
373
310
  state.next_state = :undef_comma_expected
374
311
  else
375
- kind = :symbol
312
+ encoder.text_token match, :symbol
376
313
  end
377
314
  else
378
315
  state = :initial
379
- next
380
316
  end
381
-
382
- elsif state == :alias_expected
383
- match = scan(unicode ? /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/uo :
384
- /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/o)
385
317
 
386
- if match
387
- tokens << [self[1], (self[1][0] == ?: ? :symbol : :method)]
388
- tokens << [self[2], :space]
389
- tokens << [self[3], (self[3][0] == ?: ? :symbol : :method)]
390
- end
391
- state = :initial
392
- next
393
-
394
318
  elsif state == :undef_comma_expected
395
319
  if match = scan(/,/)
396
- kind = :operator
320
+ encoder.text_token match, :operator
397
321
  state = :undef_expected
398
322
  else
399
323
  state = :initial
400
- next
401
324
  end
402
-
325
+
326
+ elsif state == :alias_expected
327
+ match = scan(unicode ? /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/uo :
328
+ /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/o)
329
+
330
+ if match
331
+ encoder.text_token self[1], (self[1][0] == ?: ? :symbol : :method)
332
+ encoder.text_token self[2], :space
333
+ encoder.text_token self[3], (self[3][0] == ?: ? :symbol : :method)
334
+ end
335
+ state = :initial
336
+
337
+ else
338
+ #:nocov:
339
+ raise_inspect 'Unknown state: %p' % [state], encoder
340
+ #:nocov:
403
341
  end
404
- # }}}
405
342
 
406
- unless kind == :error
407
- if value_expected = value_expected == :set
408
- value_expected = :expect_colon if match == '?' || match == 'when'
409
- end
410
- last_token_dot = last_token_dot == :set
343
+ else # StringState
344
+
345
+ match = scan_until(state.pattern) || scan_rest
346
+ unless match.empty?
347
+ encoder.text_token match, :content
348
+ break if eos?
411
349
  end
412
350
 
413
- if $CODERAY_DEBUG and not kind
414
- raise_inspect 'Error token %p in line %d' %
415
- [[match, kind], line], tokens, state
351
+ if state.heredoc && self[1] # end of heredoc
352
+ match = getch
353
+ match << scan_until(/$/) unless eos?
354
+ encoder.text_token match, :delimiter unless match.empty?
355
+ encoder.end_group state.type
356
+ state = state.next_state
357
+ next
416
358
  end
417
- raise_inspect 'Empty token', tokens unless match
418
-
419
- tokens << [match, kind]
420
-
421
- if last_state
422
- state = last_state
423
- last_state = nil
359
+
360
+ case match = getch
361
+
362
+ when state.delim
363
+ if state.paren_depth
364
+ state.paren_depth -= 1
365
+ if state.paren_depth > 0
366
+ encoder.text_token match, :content
367
+ next
368
+ end
369
+ end
370
+ encoder.text_token match, :delimiter
371
+ if state.type == :regexp && !eos?
372
+ match = scan(/#{patterns::REGEXP_MODIFIERS}/o)
373
+ encoder.text_token match, :modifier unless match.empty?
374
+ end
375
+ encoder.end_group state.type
376
+ value_expected = false
377
+ state = state.next_state
378
+
379
+ when '\\'
380
+ if state.interpreted
381
+ if esc = scan(/#{patterns::ESCAPE}/o)
382
+ encoder.text_token match + esc, :char
383
+ else
384
+ encoder.text_token match, :error
385
+ end
386
+ else
387
+ case esc = getch
388
+ when nil
389
+ encoder.text_token match, :content
390
+ when state.delim, '\\'
391
+ encoder.text_token match + esc, :char
392
+ else
393
+ encoder.text_token match + esc, :content
394
+ end
395
+ end
396
+
397
+ when '#'
398
+ case peek(1)
399
+ when '{'
400
+ inline_block_stack ||= []
401
+ inline_block_stack << [state, inline_block_curly_depth, heredocs]
402
+ value_expected = true
403
+ state = :initial
404
+ inline_block_curly_depth = 1
405
+ encoder.begin_group :inline
406
+ encoder.text_token match + getch, :inline_delimiter
407
+ when '$', '@'
408
+ encoder.text_token match, :escape
409
+ last_state = state
410
+ state = :initial
411
+ else
412
+ #:nocov:
413
+ raise_inspect 'else-case # reached; #%p not handled' % [peek(1)], encoder
414
+ #:nocov:
415
+ end
416
+
417
+ when state.opening_paren
418
+ state.paren_depth += 1
419
+ encoder.text_token match, :content
420
+
421
+ else
422
+ #:nocov
423
+ raise_inspect 'else-case " reached; %p not handled, state = %p' % [match, state], encoder
424
+ #:nocov:
425
+
424
426
  end
427
+
425
428
  end
429
+
430
+ end
431
+
432
+ # cleaning up
433
+ if state.is_a? StringState
434
+ encoder.end_group state.type
426
435
  end
427
-
428
- inline_block_stack << [state] if state.is_a? patterns::StringState
429
- until inline_block_stack.empty?
430
- this_block = inline_block_stack.pop
431
- tokens << [:close, :inline] if this_block.size > 1
432
- state = this_block.first
433
- tokens << [:close, state.type]
436
+
437
+ if options[:keep_state]
438
+ if state.is_a?(StringState) && state.heredoc
439
+ (heredocs ||= []).unshift state
440
+ state = :initial
441
+ elsif heredocs && heredocs.empty?
442
+ heredocs = nil
443
+ end
444
+ @state = state, heredocs
434
445
  end
435
-
436
- tokens
446
+
447
+ if inline_block_stack
448
+ until inline_block_stack.empty?
449
+ state, = *inline_block_stack.pop
450
+ encoder.end_group :inline
451
+ encoder.end_group state.type
452
+ end
453
+ end
454
+
455
+ encoder
437
456
  end
438
-
457
+
439
458
  end
440
-
459
+
441
460
  end
442
461
  end
443
-
444
- # vim:fdm=marker