coderay 1.0.0 → 1.0.0.598.pre

Sign up to get free protection for your applications and to get access to all the features.
Files changed (79) hide show
  1. data/FOLDERS +49 -0
  2. data/Rakefile +6 -5
  3. data/bin/coderay +74 -190
  4. data/bin/coderay_stylesheet +4 -0
  5. data/{README_INDEX.rdoc → lib/README} +20 -10
  6. data/lib/coderay.rb +60 -62
  7. data/lib/coderay/duo.rb +55 -2
  8. data/lib/coderay/encoder.rb +39 -52
  9. data/lib/coderay/encoders/_map.rb +7 -11
  10. data/lib/coderay/encoders/comment_filter.rb +61 -0
  11. data/lib/coderay/encoders/count.rb +26 -11
  12. data/lib/coderay/encoders/debug.rb +60 -11
  13. data/lib/coderay/encoders/div.rb +8 -9
  14. data/lib/coderay/encoders/filter.rb +52 -12
  15. data/lib/coderay/encoders/html.rb +113 -106
  16. data/lib/coderay/encoders/html/css.rb +7 -2
  17. data/lib/coderay/encoders/html/numbering.rb +27 -24
  18. data/lib/coderay/encoders/html/output.rb +58 -15
  19. data/lib/coderay/encoders/json.rb +44 -37
  20. data/lib/coderay/encoders/lines_of_code.rb +56 -9
  21. data/lib/coderay/encoders/null.rb +13 -6
  22. data/lib/coderay/encoders/page.rb +8 -8
  23. data/lib/coderay/encoders/span.rb +9 -10
  24. data/lib/coderay/encoders/statistic.rb +114 -51
  25. data/lib/coderay/encoders/terminal.rb +10 -7
  26. data/lib/coderay/encoders/text.rb +36 -17
  27. data/lib/coderay/encoders/token_kind_filter.rb +58 -1
  28. data/lib/coderay/encoders/xml.rb +11 -13
  29. data/lib/coderay/encoders/yaml.rb +14 -16
  30. data/lib/coderay/for_redcloth.rb +1 -1
  31. data/lib/coderay/helpers/file_type.rb +240 -125
  32. data/lib/coderay/helpers/gzip_simple.rb +123 -0
  33. data/lib/coderay/helpers/plugin.rb +307 -241
  34. data/lib/coderay/helpers/word_list.rb +126 -65
  35. data/lib/coderay/scanner.rb +103 -153
  36. data/lib/coderay/scanners/_map.rb +16 -18
  37. data/lib/coderay/scanners/c.rb +13 -13
  38. data/lib/coderay/scanners/cpp.rb +6 -6
  39. data/lib/coderay/scanners/css.rb +48 -47
  40. data/lib/coderay/scanners/debug.rb +55 -9
  41. data/lib/coderay/scanners/delphi.rb +4 -4
  42. data/lib/coderay/scanners/diff.rb +25 -43
  43. data/lib/coderay/scanners/groovy.rb +2 -2
  44. data/lib/coderay/scanners/html.rb +30 -107
  45. data/lib/coderay/scanners/java.rb +5 -6
  46. data/lib/coderay/scanners/java/builtin_types.rb +0 -2
  47. data/lib/coderay/scanners/java_script.rb +6 -6
  48. data/lib/coderay/scanners/json.rb +6 -7
  49. data/lib/coderay/scanners/nitro_xhtml.rb +136 -0
  50. data/lib/coderay/scanners/php.rb +12 -13
  51. data/lib/coderay/scanners/plaintext.rb +26 -0
  52. data/lib/coderay/scanners/python.rb +4 -4
  53. data/lib/coderay/scanners/{erb.rb → rhtml.rb} +11 -19
  54. data/lib/coderay/scanners/ruby.rb +208 -219
  55. data/lib/coderay/scanners/ruby/patterns.rb +85 -18
  56. data/lib/coderay/scanners/scheme.rb +136 -0
  57. data/lib/coderay/scanners/sql.rb +22 -29
  58. data/lib/coderay/scanners/yaml.rb +10 -11
  59. data/lib/coderay/styles/_map.rb +2 -2
  60. data/lib/coderay/styles/alpha.rb +104 -102
  61. data/lib/coderay/styles/cycnus.rb +143 -0
  62. data/lib/coderay/styles/murphy.rb +123 -0
  63. data/lib/coderay/token_kinds.rb +86 -87
  64. data/lib/coderay/tokens.rb +169 -26
  65. data/test/functional/basic.rb +14 -200
  66. data/test/functional/examples.rb +14 -20
  67. data/test/functional/for_redcloth.rb +8 -15
  68. data/test/functional/load_plugin_scanner.rb +11 -0
  69. data/test/functional/suite.rb +6 -9
  70. data/test/functional/vhdl.rb +126 -0
  71. data/test/functional/word_list.rb +79 -0
  72. metadata +129 -107
  73. data/lib/coderay/helpers/gzip.rb +0 -41
  74. data/lib/coderay/scanners/clojure.rb +0 -217
  75. data/lib/coderay/scanners/haml.rb +0 -168
  76. data/lib/coderay/scanners/ruby/string_state.rb +0 -71
  77. data/lib/coderay/scanners/text.rb +0 -26
  78. data/lib/coderay/tokens_proxy.rb +0 -55
  79. data/lib/coderay/version.rb +0 -3
@@ -13,11 +13,10 @@ module Scanners
13
13
  ] # :nodoc:
14
14
 
15
15
  ESCAPE = / [bfnrt\\"\/] /x # :nodoc:
16
- UNICODE_ESCAPE = / u[a-fA-F0-9]{4} /x # :nodoc:
16
+ UNICODE_ESCAPE = / u[a-fA-F0-9]{4} /x # :nodoc:
17
17
 
18
18
  protected
19
19
 
20
- # See http://json.org/ for a definition of the JSON lexic/grammar.
21
20
  def scan_tokens encoder, options
22
21
 
23
22
  state = :initial
@@ -45,14 +44,14 @@ module Scanners
45
44
  when '}', ']' then stack.pop # no error recovery, but works for valid JSON
46
45
  end
47
46
  elsif match = scan(/ true | false | null /x)
48
- encoder.text_token match, :value
47
+ encoder.text_token match, :value
49
48
  elsif match = scan(/ -? (?: 0 | [1-9]\d* ) /x)
49
+ kind = :integer
50
50
  if scan(/ \.\d+ (?:[eE][-+]?\d+)? | [eE][-+]? \d+ /x)
51
51
  match << matched
52
- encoder.text_token match, :float
53
- else
54
- encoder.text_token match, :integer
52
+ kind = :float
55
53
  end
54
+ encoder.text_token match, kind
56
55
  else
57
56
  encoder.text_token getch, :error
58
57
  end
@@ -77,7 +76,7 @@ module Scanners
77
76
  end
78
77
 
79
78
  else
80
- raise_inspect 'Unknown state: %p' % [state], encoder
79
+ raise_inspect 'Unknown state', encoder
81
80
 
82
81
  end
83
82
  end
@@ -0,0 +1,136 @@
1
+ module CodeRay
2
+ module Scanners
3
+
4
+ load :html
5
+ load :ruby
6
+
7
+ # Nitro XHTML Scanner
8
+ #
9
+ # Alias: +nitro+
10
+ class NitroXHTML < Scanner
11
+
12
+ register_for :nitro_xhtml
13
+ file_extension :xhtml
14
+ title 'Nitro XHTML'
15
+
16
+ KINDS_NOT_LOC = HTML::KINDS_NOT_LOC
17
+
18
+ NITRO_RUBY_BLOCK = /
19
+ <\?r
20
+ (?>
21
+ [^\?]*
22
+ (?> \?(?!>) [^\?]* )*
23
+ )
24
+ (?: \?> )?
25
+ |
26
+ <ruby>
27
+ (?>
28
+ [^<]*
29
+ (?> <(?!\/ruby>) [^<]* )*
30
+ )
31
+ (?: <\/ruby> )?
32
+ |
33
+ <%
34
+ (?>
35
+ [^%]*
36
+ (?> %(?!>) [^%]* )*
37
+ )
38
+ (?: %> )?
39
+ /mx # :nodoc:
40
+
41
+ NITRO_VALUE_BLOCK = /
42
+ \#
43
+ (?:
44
+ \{
45
+ [^{}]*
46
+ (?>
47
+ \{ [^}]* \}
48
+ (?> [^{}]* )
49
+ )*
50
+ \}?
51
+ | \| [^|]* \|?
52
+ | \( [^)]* \)?
53
+ | \[ [^\]]* \]?
54
+ | \\ [^\\]* \\?
55
+ )
56
+ /x # :nodoc:
57
+
58
+ NITRO_ENTITY = /
59
+ % (?: \#\d+ | \w+ ) ;
60
+ / # :nodoc:
61
+
62
+ START_OF_RUBY = /
63
+ (?=[<\#%])
64
+ < (?: \?r | % | ruby> )
65
+ | \# [{(|]
66
+ | % (?: \#\d+ | \w+ ) ;
67
+ /x # :nodoc:
68
+
69
+ CLOSING_PAREN = Hash.new { |h, p| h[p] = p } # :nodoc:
70
+ CLOSING_PAREN.update( {
71
+ '(' => ')',
72
+ '[' => ']',
73
+ '{' => '}',
74
+ } )
75
+
76
+ protected
77
+
78
+ def setup
79
+ @ruby_scanner = CodeRay.scanner :ruby, :tokens => @tokens, :keep_tokens => true
80
+ @html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true
81
+ end
82
+
83
+ def reset_instance
84
+ super
85
+ @html_scanner.reset
86
+ end
87
+
88
+ def scan_tokens encoder, options
89
+
90
+ until eos?
91
+
92
+ if (match = scan_until(/(?=#{START_OF_RUBY})/o) || match = scan_until(/\z/)) and not match.empty?
93
+ @html_scanner.tokenize match
94
+
95
+ elsif match = scan(/#{NITRO_VALUE_BLOCK}/o)
96
+ start_tag = match[0,2]
97
+ delimiter = CLOSING_PAREN[start_tag[1,1]]
98
+ end_tag = match[-1,1] == delimiter ? delimiter : ''
99
+ encoder.begin_group :inline
100
+ encoder.text_token start_tag, :inline_delimiter
101
+ code = match[start_tag.size .. -1 - end_tag.size]
102
+ @ruby_scanner.tokenize code, :tokens => encoder
103
+ encoder.text_token end_tag, :inline_delimiter unless end_tag.empty?
104
+ encoder.end_group :inline
105
+
106
+ elsif match = scan(/#{NITRO_RUBY_BLOCK}/o)
107
+ start_tag = '<?r'
108
+ end_tag = match[-2,2] == '?>' ? '?>' : ''
109
+ encoder.begin_group :inline
110
+ encoder.text_token start_tag, :inline_delimiter
111
+ code = match[start_tag.size .. -(end_tag.size)-1]
112
+ @ruby_scanner.tokenize code, :tokens => encoder
113
+ encoder.text_token end_tag, :inline_delimiter unless end_tag.empty?
114
+ encoder.end_group :inline
115
+
116
+ elsif entity = scan(/#{NITRO_ENTITY}/o)
117
+ encoder.text_token entity, :entity
118
+
119
+ elsif scan(/%/)
120
+ encoder.text_token matched, :error
121
+
122
+ else
123
+ raise_inspect 'else-case reached!', encoder
124
+
125
+ end
126
+
127
+ end
128
+
129
+ encoder
130
+
131
+ end
132
+
133
+ end
134
+
135
+ end
136
+ end
@@ -10,7 +10,6 @@ module Scanners
10
10
 
11
11
  register_for :php
12
12
  file_extension 'php'
13
- encoding 'BINARY'
14
13
 
15
14
  KINDS_NOT_LOC = HTML::KINDS_NOT_LOC
16
15
 
@@ -181,14 +180,14 @@ module Scanners
181
180
  $argc $argv
182
181
  ]
183
182
 
184
- IDENT_KIND = WordList::CaseIgnoring.new(:ident).
185
- add(KEYWORDS, :keyword).
186
- add(TYPES, :predefined_type).
187
- add(LANGUAGE_CONSTRUCTS, :keyword).
183
+ IDENT_KIND = CaseIgnoringWordList.new(:ident).
184
+ add(KEYWORDS, :reserved).
185
+ add(TYPES, :pre_type).
186
+ add(LANGUAGE_CONSTRUCTS, :reserved).
188
187
  add(BUILTIN_FUNCTIONS, :predefined).
189
- add(CLASSES, :predefined_constant).
188
+ add(CLASSES, :pre_constant).
190
189
  add(EXCEPTIONS, :exception).
191
- add(CONSTANTS, :predefined_constant)
190
+ add(CONSTANTS, :pre_constant)
192
191
 
193
192
  VARIABLE_KIND = WordList.new(:local_variable).
194
193
  add(PREDEFINED, :predefined)
@@ -234,8 +233,8 @@ module Scanners
234
233
  def scan_tokens encoder, options
235
234
 
236
235
  if check(RE::PHP_START) || # starts with <?
237
- (match?(/\s*<\S/) && check(/.{1,1000}#{RE::PHP_START}/om)) || # starts with tag and contains <?
238
- check(/.{0,1000}#{RE::HTML_INDICATOR}/om) ||
236
+ (match?(/\s*<\S/) && exist?(RE::PHP_START)) || # starts with tag and contains <?
237
+ exist?(RE::HTML_INDICATOR) ||
239
238
  check(/.{1,100}#{RE::PHP_START}/om) # PHP start after max 100 chars
240
239
  # is HTML with embedded PHP, so start with HTML
241
240
  states = [:initial]
@@ -261,7 +260,7 @@ module Scanners
261
260
  label_expected = true
262
261
  states << :php
263
262
  else
264
- match = scan_until(/(?=#{RE::PHP_START})/o) || scan_rest
263
+ match = scan_until(/(?=#{RE::PHP_START})/o) || scan_until(/\z/)
265
264
  @html_scanner.tokenize match unless match.empty?
266
265
  end
267
266
 
@@ -281,7 +280,7 @@ module Scanners
281
280
  label_expected = false
282
281
  if kind == :ident && match =~ /^[A-Z]/
283
282
  kind = :constant
284
- elsif kind == :keyword
283
+ elsif kind == :reserved
285
284
  case match
286
285
  when 'class'
287
286
  states << :class_expected
@@ -354,7 +353,7 @@ module Scanners
354
353
 
355
354
  elsif match = scan(/<<<(?:(#{RE::IDENTIFIER})|"(#{RE::IDENTIFIER})"|'(#{RE::IDENTIFIER})')/o)
356
355
  encoder.begin_group :string
357
- # warn 'heredoc in heredoc?' if heredoc_delimiter
356
+ warn 'heredoc in heredoc?' if heredoc_delimiter
358
357
  heredoc_delimiter = Regexp.escape(self[1] || self[2] || self[3])
359
358
  encoder.text_token match, :delimiter
360
359
  states.push self[3] ? :sqstring : :dqstring
@@ -462,7 +461,7 @@ module Scanners
462
461
  states.push :php
463
462
  encoder.text_token match, :delimiter
464
463
  else
465
- encoder.text_token match, :content
464
+ encoder.text_token match, :string
466
465
  end
467
466
  elsif match = scan(/\$\{#{RE::IDENTIFIER}\}/o)
468
467
  encoder.text_token match, :local_variable
@@ -0,0 +1,26 @@
1
+ module CodeRay
2
+ module Scanners
3
+
4
+ # Scanner for plain text.
5
+ #
6
+ # Yields just one token of the kind :plain.
7
+ #
8
+ # Alias: +plain+
9
+ class Plaintext < Scanner
10
+
11
+ register_for :plaintext, :plain
12
+ title 'Plain text'
13
+
14
+ KINDS_NOT_LOC = [:plain] # :nodoc:
15
+
16
+ protected
17
+
18
+ def scan_tokens encoder, options
19
+ encoder.text_token string, :plain
20
+ encoder
21
+ end
22
+
23
+ end
24
+
25
+ end
26
+ end
@@ -58,7 +58,7 @@ module Scanners
58
58
  add(KEYWORDS, :keyword).
59
59
  add(OLD_KEYWORDS, :old_keyword).
60
60
  add(PREDEFINED_METHODS_AND_TYPES, :predefined).
61
- add(PREDEFINED_VARIABLES_AND_CONSTANTS, :predefined_constant).
61
+ add(PREDEFINED_VARIABLES_AND_CONSTANTS, :pre_constant).
62
62
  add(PREDEFINED_EXCEPTIONS, :exception) # :nodoc:
63
63
 
64
64
  NAME = / [^\W\d] \w* /x # :nodoc:
@@ -107,7 +107,7 @@ module Scanners
107
107
  string_raw = false
108
108
  string_type = nil
109
109
  docstring_coming = match?(/#{DOCSTRING_COMING}/o)
110
- last_token_dot = false
110
+ import_clause = class_name_follows = last_token_dot = false
111
111
  unicode = string.respond_to?(:encoding) && string.encoding.name == 'UTF-8'
112
112
  from_import_state = []
113
113
 
@@ -194,7 +194,7 @@ module Scanners
194
194
  encoder.text_token match, :hex
195
195
 
196
196
  elsif match = scan(/0[bB][01]+[lL]?/)
197
- encoder.text_token match, :binary
197
+ encoder.text_token match, :bin
198
198
 
199
199
  elsif match = scan(/(?:\d*\.\d+|\d+\.\d*)(?:[eE][+-]?\d+)?|\d+[eE][+-]?\d+/)
200
200
  if scan(/[jJ]/)
@@ -205,7 +205,7 @@ module Scanners
205
205
  end
206
206
 
207
207
  elsif match = scan(/0[oO][0-7]+|0[0-7]+(?![89.eE])[lL]?/)
208
- encoder.text_token match, :octal
208
+ encoder.text_token match, :oct
209
209
 
210
210
  elsif match = scan(/\d+([lL])?/)
211
211
  if self[1] == nil && scan(/[jJ]/)
@@ -5,23 +5,23 @@ module Scanners
5
5
  load :ruby
6
6
 
7
7
  # Scanner for HTML ERB templates.
8
- class ERB < Scanner
8
+ class RHTML < Scanner
9
9
 
10
- register_for :erb
10
+ register_for :rhtml
11
11
  title 'HTML ERB Template'
12
12
 
13
13
  KINDS_NOT_LOC = HTML::KINDS_NOT_LOC
14
14
 
15
15
  ERB_RUBY_BLOCK = /
16
- (<%(?!%)[-=\#]?)
17
- ((?>
16
+ <%(?!%)[=-]?
17
+ (?>
18
18
  [^\-%]* # normal*
19
19
  (?> # special
20
20
  (?: %(?!>) | -(?!%>) )
21
21
  [^\-%]* # normal*
22
22
  )*
23
- ))
24
- ((?: -?%> )?)
23
+ )
24
+ (?: -?%> )?
25
25
  /x # :nodoc:
26
26
 
27
27
  START_OF_ERB = /
@@ -44,29 +44,21 @@ module Scanners
44
44
 
45
45
  until eos?
46
46
 
47
- if (match = scan_until(/(?=#{START_OF_ERB})/o) || scan_rest) and not match.empty?
47
+ if (match = scan_until(/(?=#{START_OF_ERB})/o) || scan_until(/\z/)) and not match.empty?
48
48
  @html_scanner.tokenize match, :tokens => encoder
49
49
 
50
50
  elsif match = scan(/#{ERB_RUBY_BLOCK}/o)
51
- start_tag = self[1]
52
- code = self[2]
53
- end_tag = self[3]
54
-
51
+ start_tag = match[/\A<%[-=]?/]
52
+ end_tag = match[/-?%?>?\z/]
55
53
  encoder.begin_group :inline
56
54
  encoder.text_token start_tag, :inline_delimiter
57
-
58
- if start_tag == '<%#'
59
- encoder.text_token code, :comment
60
- else
61
- @ruby_scanner.tokenize code, :tokens => encoder
62
- end unless code.empty?
63
-
55
+ code = match[start_tag.size .. -1 - end_tag.size]
56
+ @ruby_scanner.tokenize code
64
57
  encoder.text_token end_tag, :inline_delimiter unless end_tag.empty?
65
58
  encoder.end_group :inline
66
59
 
67
60
  else
68
61
  raise_inspect 'else-case reached!', encoder
69
-
70
62
  end
71
63
 
72
64
  end
@@ -1,6 +1,6 @@
1
1
  module CodeRay
2
2
  module Scanners
3
-
3
+
4
4
  # This scanner is really complex, since Ruby _is_ a complex language!
5
5
  #
6
6
  # It tries to highlight 100% of all common code,
@@ -8,16 +8,22 @@ module Scanners
8
8
  #
9
9
  # It is optimized for HTML highlighting, and is not very useful for
10
10
  # parsing or pretty printing.
11
+ #
12
+ # For now, I think it's better than the scanners in VIM or Syntax, or
13
+ # any highlighter I was able to find, except Caleb's RubyLexer.
14
+ #
15
+ # I hope it's also better than the rdoc/irb lexer.
16
+ #
17
+ # Alias: +irb+
11
18
  class Ruby < Scanner
12
-
19
+
13
20
  register_for :ruby
14
21
  file_extension 'rb'
22
+
23
+ helper :patterns
15
24
 
16
- autoload :Patterns, 'coderay/scanners/ruby/patterns'
17
- autoload :StringState, 'coderay/scanners/ruby/string_state'
18
-
19
- def interpreted_string_state
20
- StringState.new :string, true, '"'
25
+ unless defined? EncodingError
26
+ EncodingError = Class.new Exception # :nodoc:
21
27
  end
22
28
 
23
29
  protected
@@ -27,10 +33,11 @@ module Scanners
27
33
  end
28
34
 
29
35
  def scan_tokens encoder, options
30
- state, heredocs = options[:state] || @state
31
- heredocs = heredocs.dup if heredocs.is_a?(Array)
32
36
 
33
- if state && state.instance_of?(StringState)
37
+ patterns = Patterns # avoid constant lookup
38
+
39
+ state = @state
40
+ if state.instance_of? patterns::StringState
34
41
  encoder.begin_group state.type
35
42
  end
36
43
 
@@ -39,89 +46,165 @@ module Scanners
39
46
  method_call_expected = false
40
47
  value_expected = true
41
48
 
49
+ heredocs = nil
42
50
  inline_block_stack = nil
43
51
  inline_block_curly_depth = 0
44
52
 
45
- if heredocs
46
- state = heredocs.shift
47
- encoder.begin_group state.type
48
- heredocs = nil if heredocs.empty?
49
- end
50
-
51
53
  # def_object_stack = nil
52
54
  # def_object_paren_depth = 0
53
55
 
54
- patterns = Patterns # avoid constant lookup
55
-
56
56
  unicode = string.respond_to?(:encoding) && string.encoding.name == 'UTF-8'
57
57
 
58
58
  until eos?
59
-
60
- if state.instance_of? ::Symbol
61
-
62
- if match = scan(/[ \t\f\v]+/)
63
- encoder.text_token match, :space
64
-
65
- elsif match = scan(/\n/)
66
- if heredocs
67
- unscan # heredoc scanning needs \n at start
68
- state = heredocs.shift
69
- encoder.begin_group state.type
70
- heredocs = nil if heredocs.empty?
59
+
60
+ if state.instance_of? patterns::StringState
61
+
62
+ match = scan_until(state.pattern) || scan_until(/\z/)
63
+ encoder.text_token match, :content unless match.empty?
64
+ break if eos?
65
+
66
+ if state.heredoc and self[1] # end of heredoc
67
+ match = getch.to_s
68
+ match << scan_until(/$/) unless eos?
69
+ encoder.text_token match, :delimiter
70
+ encoder.end_group state.type
71
+ state = state.next_state
72
+ next
73
+ end
74
+
75
+ case match = getch
76
+
77
+ when state.delim
78
+ if state.paren_depth
79
+ state.paren_depth -= 1
80
+ if state.paren_depth > 0
81
+ encoder.text_token match, :nesting_delimiter
82
+ next
83
+ end
84
+ end
85
+ encoder.text_token match, :delimiter
86
+ if state.type == :regexp and not eos?
87
+ modifiers = scan(/#{patterns::REGEXP_MODIFIERS}/ox)
88
+ encoder.text_token modifiers, :modifier unless modifiers.empty?
89
+ end
90
+ encoder.end_group state.type
91
+ value_expected = false
92
+ state = state.next_state
93
+
94
+ when '\\'
95
+ if state.interpreted
96
+ if esc = scan(/ #{patterns::ESCAPE} /ox)
97
+ encoder.text_token match + esc, :char
98
+ else
99
+ encoder.text_token match, :error
100
+ end
71
101
  else
72
- state = :initial if state == :undef_comma_expected
73
- encoder.text_token match, :space
102
+ case m = getch
103
+ when state.delim, '\\'
104
+ encoder.text_token match + m, :char
105
+ when nil
106
+ encoder.text_token match, :content
107
+ else
108
+ encoder.text_token match + m, :content
109
+ end
110
+ end
111
+
112
+ when '#'
113
+ case peek(1)
114
+ when '{'
115
+ inline_block_stack ||= []
116
+ inline_block_stack << [state, inline_block_curly_depth, heredocs]
74
117
  value_expected = true
118
+ state = :initial
119
+ inline_block_curly_depth = 1
120
+ encoder.begin_group :inline
121
+ encoder.text_token match + getch, :inline_delimiter
122
+ when '$', '@'
123
+ encoder.text_token match, :escape
124
+ last_state = state
125
+ state = :initial
126
+ else
127
+ raise_inspect 'else-case # reached; #%p not handled' %
128
+ [peek(1)], encoder
75
129
  end
130
+
131
+ when state.opening_paren
132
+ state.paren_depth += 1
133
+ encoder.text_token match, :nesting_delimiter
134
+
135
+ when /#{patterns::REGEXP_SYMBOLS}/ox
136
+ encoder.text_token match, :function
137
+
138
+ else
139
+ raise_inspect 'else-case " reached; %p not handled, state = %p' %
140
+ [match, state], encoder
141
+
142
+ end
143
+
144
+ else
145
+
146
+ if match = scan(/[ \t\f]+/)
147
+ match << scan(/\s*/) unless eos? || heredocs
148
+ value_expected = true if match.index(?\n)
149
+ encoder.text_token match, :space
76
150
 
77
- elsif match = scan(bol? ? / \#(!)?.* | #{patterns::RUBYDOC_OR_DATA} /ox : /\#.*/)
78
- encoder.text_token match, self[1] ? :doctype : :comment
79
-
80
- elsif match = scan(/\\\n/)
151
+ elsif match = scan(/\\?\n/)
152
+ if match == "\n"
153
+ value_expected = true
154
+ state = :initial if state == :undef_comma_expected
155
+ end
81
156
  if heredocs
82
157
  unscan # heredoc scanning needs \n at start
83
- encoder.text_token scan(/\\/), :space
84
158
  state = heredocs.shift
85
159
  encoder.begin_group state.type
86
160
  heredocs = nil if heredocs.empty?
161
+ next
87
162
  else
88
- encoder.text_token match, :space
163
+ match << scan(/\s*/) unless eos?
89
164
  end
165
+ encoder.text_token match, :space
166
+
167
+ elsif bol? && match = scan(/\#!.*/)
168
+ encoder.text_token match, :doctype
90
169
 
170
+ elsif match = scan(/\#.*/) or
171
+ (bol? and match = scan(/#{patterns::RUBYDOC_OR_DATA}/o))
172
+ encoder.text_token match, :comment
173
+
91
174
  elsif state == :initial
92
-
175
+
93
176
  # IDENTS #
94
- if !method_call_expected &&
177
+ if !method_call_expected and
95
178
  match = scan(unicode ? /#{patterns::METHOD_NAME}/uo :
96
179
  /#{patterns::METHOD_NAME}/o)
97
180
  value_expected = false
98
181
  kind = patterns::IDENT_KIND[match]
99
182
  if kind == :ident
100
- if match[/\A[A-Z]/] && !(match[/[!?]$/] || match?(/\(/))
183
+ if match[/^[A-Z]/] && !match[/[!?]$/] && !match?(/\(/)
101
184
  kind = :constant
102
185
  end
103
- elsif kind == :keyword
186
+ elsif kind == :reserved
104
187
  state = patterns::KEYWORD_NEW_STATE[match]
105
188
  value_expected = true if patterns::KEYWORDS_EXPECTING_VALUE[match]
106
189
  end
107
190
  value_expected = true if !value_expected && check(/#{patterns::VALUE_FOLLOWS}/o)
108
191
  encoder.text_token match, kind
109
-
110
- elsif method_call_expected &&
192
+
193
+ elsif method_call_expected and
111
194
  match = scan(unicode ? /#{patterns::METHOD_AFTER_DOT}/uo :
112
195
  /#{patterns::METHOD_AFTER_DOT}/o)
113
- if method_call_expected == '::' && match[/\A[A-Z]/] && !match?(/\(/)
196
+ if method_call_expected == '::' && match[/^[A-Z]/] && !match?(/\(/)
114
197
  encoder.text_token match, :constant
115
198
  else
116
199
  encoder.text_token match, :ident
117
200
  end
118
201
  method_call_expected = false
119
202
  value_expected = check(/#{patterns::VALUE_FOLLOWS}/o)
120
-
203
+
121
204
  # OPERATORS #
122
- elsif !method_call_expected && match = scan(/ (\.(?!\.)|::) | (?: \.\.\.? | ==?=? | [,\(\[\{] )() | [\)\]\}] /x)
205
+ elsif not method_call_expected and match = scan(/ \.\.\.? | (\.|::) | [,\(\)\[\]\{\}] | ==?=? /x)
206
+ value_expected = match !~ / [.\)\]\}] /x || match =~ /\A\.\./
123
207
  method_call_expected = self[1]
124
- value_expected = !method_call_expected && self[2]
125
208
  if inline_block_stack
126
209
  case match
127
210
  when '{'
@@ -139,109 +222,111 @@ module Scanners
139
222
  end
140
223
  end
141
224
  encoder.text_token match, :operator
142
-
143
- elsif match = scan(unicode ? /#{patterns::SYMBOL}/uo :
144
- /#{patterns::SYMBOL}/o)
145
- case delim = match[1]
146
- when ?', ?"
147
- encoder.begin_group :symbol
148
- encoder.text_token ':', :symbol
149
- match = delim.chr
150
- encoder.text_token match, :delimiter
151
- state = self.class::StringState.new :symbol, delim == ?", match
152
- else
153
- encoder.text_token match, :symbol
154
- value_expected = false
155
- end
156
-
157
- elsif match = scan(/ ' (?:(?>[^'\\]*) ')? | " (?:(?>[^"\\\#]*) ")? /mx)
225
+
226
+ elsif match = scan(/ ['"] /mx)
158
227
  encoder.begin_group :string
159
- if match.size == 1
160
- encoder.text_token match, :delimiter
161
- state = self.class::StringState.new :string, match == '"', match # important for streaming
162
- else
163
- encoder.text_token match[0,1], :delimiter
164
- encoder.text_token match[1..-2], :content if match.size > 2
165
- encoder.text_token match[-1,1], :delimiter
166
- encoder.end_group :string
167
- value_expected = false
168
- end
169
-
228
+ encoder.text_token match, :delimiter
229
+ state = patterns::StringState.new :string, match == '"', match # important for streaming
230
+
170
231
  elsif match = scan(unicode ? /#{patterns::INSTANCE_VARIABLE}/uo :
171
232
  /#{patterns::INSTANCE_VARIABLE}/o)
172
233
  value_expected = false
173
234
  encoder.text_token match, :instance_variable
174
-
175
- elsif value_expected && match = scan(/\//)
235
+
236
+ elsif value_expected and match = scan(/\//)
176
237
  encoder.begin_group :regexp
177
238
  encoder.text_token match, :delimiter
178
- state = self.class::StringState.new :regexp, true, '/'
179
-
239
+ interpreted = true
240
+ state = patterns::StringState.new :regexp, interpreted, '/'
241
+
180
242
  elsif match = scan(value_expected ? /[-+]?#{patterns::NUMERIC}/o : /#{patterns::NUMERIC}/o)
181
243
  if method_call_expected
182
244
  encoder.text_token match, :error
183
245
  method_call_expected = false
184
246
  else
185
- encoder.text_token match, self[1] ? :float : :integer # TODO: send :hex/:octal/:binary
247
+ encoder.text_token match, self[1] ? :float : :integer
186
248
  end
187
249
  value_expected = false
188
-
189
- elsif match = scan(/ [-+!~^\/]=? | [:;] | [*|&]{1,2}=? | >>? /x)
250
+
251
+ elsif match = scan(unicode ? /#{patterns::SYMBOL}/uo :
252
+ /#{patterns::SYMBOL}/o)
253
+ case delim = match[1]
254
+ when ?', ?"
255
+ encoder.begin_group :symbol
256
+ encoder.text_token ':', :symbol
257
+ match = delim.chr
258
+ encoder.text_token match, :delimiter
259
+ state = patterns::StringState.new :symbol, delim == ?", match
260
+ else
261
+ encoder.text_token match, :symbol
262
+ value_expected = false
263
+ end
264
+
265
+ elsif match = scan(/ [-+!~^]=? | [*|&]{1,2}=? | >>? /x)
190
266
  value_expected = true
191
267
  encoder.text_token match, :operator
192
-
193
- elsif value_expected && match = scan(/#{patterns::HEREDOC_OPEN}/o)
268
+
269
+ elsif value_expected and match = scan(/#{patterns::HEREDOC_OPEN}/o)
270
+ indented = self[1] == '-'
194
271
  quote = self[3]
195
272
  delim = self[quote ? 4 : 2]
196
273
  kind = patterns::QUOTE_TO_TYPE[quote]
197
274
  encoder.begin_group kind
198
275
  encoder.text_token match, :delimiter
199
276
  encoder.end_group kind
277
+ heredoc = patterns::StringState.new kind, quote != '\'',
278
+ delim, (indented ? :indented : :linestart )
200
279
  heredocs ||= [] # create heredocs if empty
201
- heredocs << self.class::StringState.new(kind, quote != "'", delim,
202
- self[1] == '-' ? :indented : :linestart)
280
+ heredocs << heredoc
203
281
  value_expected = false
204
-
205
- elsif value_expected && match = scan(/#{patterns::FANCY_STRING_START}/o)
206
- kind = patterns::FANCY_STRING_KIND[self[1]]
282
+
283
+ elsif value_expected and match = scan(/#{patterns::FANCY_START}/o)
284
+ kind, interpreted = *patterns::FancyStringType.fetch(self[1]) do
285
+ raise_inspect 'Unknown fancy string: %%%p' % k, encoder
286
+ end
207
287
  encoder.begin_group kind
208
- state = self.class::StringState.new kind, patterns::FANCY_STRING_INTERPRETED[self[1]], self[2]
288
+ state = patterns::StringState.new kind, interpreted, self[2]
209
289
  encoder.text_token match, :delimiter
210
-
211
- elsif value_expected && match = scan(/#{patterns::CHARACTER}/o)
290
+
291
+ elsif value_expected and match = scan(/#{patterns::CHARACTER}/o)
212
292
  value_expected = false
213
293
  encoder.text_token match, :integer
214
-
215
- elsif match = scan(/ %=? | <(?:<|=>?)? | \? /x)
294
+
295
+ elsif match = scan(/ [\/%]=? | <(?:<|=>?)? | [?:;] /x)
216
296
  value_expected = true
217
297
  encoder.text_token match, :operator
218
-
298
+
219
299
  elsif match = scan(/`/)
220
- encoder.begin_group :shell
221
- encoder.text_token match, :delimiter
222
- state = self.class::StringState.new :shell, true, match
223
-
300
+ if method_call_expected
301
+ encoder.text_token match, :operator
302
+ value_expected = true
303
+ else
304
+ encoder.begin_group :shell
305
+ encoder.text_token match, :delimiter
306
+ state = patterns::StringState.new :shell, true, match
307
+ end
308
+
224
309
  elsif match = scan(unicode ? /#{patterns::GLOBAL_VARIABLE}/uo :
225
310
  /#{patterns::GLOBAL_VARIABLE}/o)
226
311
  encoder.text_token match, :global_variable
227
312
  value_expected = false
228
-
313
+
229
314
  elsif match = scan(unicode ? /#{patterns::CLASS_VARIABLE}/uo :
230
315
  /#{patterns::CLASS_VARIABLE}/o)
231
316
  encoder.text_token match, :class_variable
232
317
  value_expected = false
233
-
318
+
234
319
  elsif match = scan(/\\\z/)
235
320
  encoder.text_token match, :space
236
-
321
+
237
322
  else
238
323
  if method_call_expected
239
324
  method_call_expected = false
240
325
  next
241
326
  end
242
- unless unicode
327
+ if !unicode
243
328
  # check for unicode
244
- $DEBUG_BEFORE, $DEBUG = $DEBUG, false
329
+ debug, $DEBUG = $DEBUG, false
245
330
  begin
246
331
  if check(/./mu).size > 1
247
332
  # seems like we should try again with unicode
@@ -250,7 +335,7 @@ module Scanners
250
335
  rescue
251
336
  # bad unicode char; use getch
252
337
  ensure
253
- $DEBUG = $DEBUG_BEFORE
338
+ $DEBUG = debug
254
339
  end
255
340
  next if unicode
256
341
  end
@@ -263,7 +348,7 @@ module Scanners
263
348
  state = last_state
264
349
  last_state = nil
265
350
  end
266
-
351
+
267
352
  elsif state == :def_expected
268
353
  if match = scan(unicode ? /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/uo :
269
354
  /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o)
@@ -273,7 +358,7 @@ module Scanners
273
358
  last_state = :dot_expected
274
359
  state = :initial
275
360
  end
276
-
361
+
277
362
  elsif state == :dot_expected
278
363
  if match = scan(/\.|::/)
279
364
  # invalid definition
@@ -282,7 +367,7 @@ module Scanners
282
367
  else
283
368
  state = :initial
284
369
  end
285
-
370
+
286
371
  elsif state == :module_expected
287
372
  if match = scan(/<</)
288
373
  encoder.text_token match, :operator
@@ -293,7 +378,7 @@ module Scanners
293
378
  encoder.text_token match, :class
294
379
  end
295
380
  end
296
-
381
+
297
382
  elsif state == :undef_expected
298
383
  state = :undef_comma_expected
299
384
  if match = scan(unicode ? /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/uo :
@@ -306,7 +391,7 @@ module Scanners
306
391
  encoder.text_token ':', :symbol
307
392
  match = delim.chr
308
393
  encoder.text_token match, :delimiter
309
- state = self.class::StringState.new :symbol, delim == ?", match
394
+ state = patterns::StringState.new :symbol, delim == ?", match
310
395
  state.next_state = :undef_comma_expected
311
396
  else
312
397
  encoder.text_token match, :symbol
@@ -314,7 +399,7 @@ module Scanners
314
399
  else
315
400
  state = :initial
316
401
  end
317
-
402
+
318
403
  elsif state == :undef_comma_expected
319
404
  if match = scan(/,/)
320
405
  encoder.text_token match, :operator
@@ -322,7 +407,7 @@ module Scanners
322
407
  else
323
408
  state = :initial
324
409
  end
325
-
410
+
326
411
  elsif state == :alias_expected
327
412
  match = scan(unicode ? /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/uo :
328
413
  /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/o)
@@ -333,129 +418,33 @@ module Scanners
333
418
  encoder.text_token self[3], (self[3][0] == ?: ? :symbol : :method)
334
419
  end
335
420
  state = :initial
336
-
421
+
337
422
  else
338
- #:nocov:
339
423
  raise_inspect 'Unknown state: %p' % [state], encoder
340
- #:nocov:
341
- end
342
-
343
- else # StringState
344
-
345
- match = scan_until(state.pattern) || scan_rest
346
- unless match.empty?
347
- encoder.text_token match, :content
348
- break if eos?
349
- end
350
-
351
- if state.heredoc && self[1] # end of heredoc
352
- match = getch
353
- match << scan_until(/$/) unless eos?
354
- encoder.text_token match, :delimiter unless match.empty?
355
- encoder.end_group state.type
356
- state = state.next_state
357
- next
358
- end
359
-
360
- case match = getch
361
-
362
- when state.delim
363
- if state.paren_depth
364
- state.paren_depth -= 1
365
- if state.paren_depth > 0
366
- encoder.text_token match, :content
367
- next
368
- end
369
- end
370
- encoder.text_token match, :delimiter
371
- if state.type == :regexp && !eos?
372
- match = scan(/#{patterns::REGEXP_MODIFIERS}/o)
373
- encoder.text_token match, :modifier unless match.empty?
374
- end
375
- encoder.end_group state.type
376
- value_expected = false
377
- state = state.next_state
378
-
379
- when '\\'
380
- if state.interpreted
381
- if esc = scan(/#{patterns::ESCAPE}/o)
382
- encoder.text_token match + esc, :char
383
- else
384
- encoder.text_token match, :error
385
- end
386
- else
387
- case esc = getch
388
- when nil
389
- encoder.text_token match, :content
390
- when state.delim, '\\'
391
- encoder.text_token match + esc, :char
392
- else
393
- encoder.text_token match + esc, :content
394
- end
395
- end
396
-
397
- when '#'
398
- case peek(1)
399
- when '{'
400
- inline_block_stack ||= []
401
- inline_block_stack << [state, inline_block_curly_depth, heredocs]
402
- value_expected = true
403
- state = :initial
404
- inline_block_curly_depth = 1
405
- encoder.begin_group :inline
406
- encoder.text_token match + getch, :inline_delimiter
407
- when '$', '@'
408
- encoder.text_token match, :escape
409
- last_state = state
410
- state = :initial
411
- else
412
- #:nocov:
413
- raise_inspect 'else-case # reached; #%p not handled' % [peek(1)], encoder
414
- #:nocov:
415
- end
416
-
417
- when state.opening_paren
418
- state.paren_depth += 1
419
- encoder.text_token match, :content
420
-
421
- else
422
- #:nocov
423
- raise_inspect 'else-case " reached; %p not handled, state = %p' % [match, state], encoder
424
- #:nocov:
425
-
426
424
  end
427
425
 
428
426
  end
429
-
430
427
  end
431
-
428
+
432
429
  # cleaning up
433
- if state.is_a? StringState
434
- encoder.end_group state.type
435
- end
436
-
437
430
  if options[:keep_state]
438
- if state.is_a?(StringState) && state.heredoc
439
- (heredocs ||= []).unshift state
440
- state = :initial
441
- elsif heredocs && heredocs.empty?
442
- heredocs = nil
443
- end
444
- @state = state, heredocs
431
+ @state = state
432
+ end
433
+ if state.is_a? patterns::StringState
434
+ encoder.end_group state.type
445
435
  end
446
-
447
436
  if inline_block_stack
448
437
  until inline_block_stack.empty?
449
- state, = *inline_block_stack.pop
450
- encoder.end_group :inline
438
+ state, *more = inline_block_stack.pop
439
+ encoder.end_group :inline if more
451
440
  encoder.end_group state.type
452
441
  end
453
442
  end
454
-
443
+
455
444
  encoder
456
445
  end
457
-
446
+
458
447
  end
459
-
448
+
460
449
  end
461
450
  end