coderay 1.0.0 → 1.0.0.598.pre

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. data/FOLDERS +49 -0
  2. data/Rakefile +6 -5
  3. data/bin/coderay +74 -190
  4. data/bin/coderay_stylesheet +4 -0
  5. data/{README_INDEX.rdoc → lib/README} +20 -10
  6. data/lib/coderay.rb +60 -62
  7. data/lib/coderay/duo.rb +55 -2
  8. data/lib/coderay/encoder.rb +39 -52
  9. data/lib/coderay/encoders/_map.rb +7 -11
  10. data/lib/coderay/encoders/comment_filter.rb +61 -0
  11. data/lib/coderay/encoders/count.rb +26 -11
  12. data/lib/coderay/encoders/debug.rb +60 -11
  13. data/lib/coderay/encoders/div.rb +8 -9
  14. data/lib/coderay/encoders/filter.rb +52 -12
  15. data/lib/coderay/encoders/html.rb +113 -106
  16. data/lib/coderay/encoders/html/css.rb +7 -2
  17. data/lib/coderay/encoders/html/numbering.rb +27 -24
  18. data/lib/coderay/encoders/html/output.rb +58 -15
  19. data/lib/coderay/encoders/json.rb +44 -37
  20. data/lib/coderay/encoders/lines_of_code.rb +56 -9
  21. data/lib/coderay/encoders/null.rb +13 -6
  22. data/lib/coderay/encoders/page.rb +8 -8
  23. data/lib/coderay/encoders/span.rb +9 -10
  24. data/lib/coderay/encoders/statistic.rb +114 -51
  25. data/lib/coderay/encoders/terminal.rb +10 -7
  26. data/lib/coderay/encoders/text.rb +36 -17
  27. data/lib/coderay/encoders/token_kind_filter.rb +58 -1
  28. data/lib/coderay/encoders/xml.rb +11 -13
  29. data/lib/coderay/encoders/yaml.rb +14 -16
  30. data/lib/coderay/for_redcloth.rb +1 -1
  31. data/lib/coderay/helpers/file_type.rb +240 -125
  32. data/lib/coderay/helpers/gzip_simple.rb +123 -0
  33. data/lib/coderay/helpers/plugin.rb +307 -241
  34. data/lib/coderay/helpers/word_list.rb +126 -65
  35. data/lib/coderay/scanner.rb +103 -153
  36. data/lib/coderay/scanners/_map.rb +16 -18
  37. data/lib/coderay/scanners/c.rb +13 -13
  38. data/lib/coderay/scanners/cpp.rb +6 -6
  39. data/lib/coderay/scanners/css.rb +48 -47
  40. data/lib/coderay/scanners/debug.rb +55 -9
  41. data/lib/coderay/scanners/delphi.rb +4 -4
  42. data/lib/coderay/scanners/diff.rb +25 -43
  43. data/lib/coderay/scanners/groovy.rb +2 -2
  44. data/lib/coderay/scanners/html.rb +30 -107
  45. data/lib/coderay/scanners/java.rb +5 -6
  46. data/lib/coderay/scanners/java/builtin_types.rb +0 -2
  47. data/lib/coderay/scanners/java_script.rb +6 -6
  48. data/lib/coderay/scanners/json.rb +6 -7
  49. data/lib/coderay/scanners/nitro_xhtml.rb +136 -0
  50. data/lib/coderay/scanners/php.rb +12 -13
  51. data/lib/coderay/scanners/plaintext.rb +26 -0
  52. data/lib/coderay/scanners/python.rb +4 -4
  53. data/lib/coderay/scanners/{erb.rb → rhtml.rb} +11 -19
  54. data/lib/coderay/scanners/ruby.rb +208 -219
  55. data/lib/coderay/scanners/ruby/patterns.rb +85 -18
  56. data/lib/coderay/scanners/scheme.rb +136 -0
  57. data/lib/coderay/scanners/sql.rb +22 -29
  58. data/lib/coderay/scanners/yaml.rb +10 -11
  59. data/lib/coderay/styles/_map.rb +2 -2
  60. data/lib/coderay/styles/alpha.rb +104 -102
  61. data/lib/coderay/styles/cycnus.rb +143 -0
  62. data/lib/coderay/styles/murphy.rb +123 -0
  63. data/lib/coderay/token_kinds.rb +86 -87
  64. data/lib/coderay/tokens.rb +169 -26
  65. data/test/functional/basic.rb +14 -200
  66. data/test/functional/examples.rb +14 -20
  67. data/test/functional/for_redcloth.rb +8 -15
  68. data/test/functional/load_plugin_scanner.rb +11 -0
  69. data/test/functional/suite.rb +6 -9
  70. data/test/functional/vhdl.rb +126 -0
  71. data/test/functional/word_list.rb +79 -0
  72. metadata +129 -107
  73. data/lib/coderay/helpers/gzip.rb +0 -41
  74. data/lib/coderay/scanners/clojure.rb +0 -217
  75. data/lib/coderay/scanners/haml.rb +0 -168
  76. data/lib/coderay/scanners/ruby/string_state.rb +0 -71
  77. data/lib/coderay/scanners/text.rb +0 -26
  78. data/lib/coderay/tokens_proxy.rb +0 -55
  79. data/lib/coderay/version.rb +0 -3
@@ -13,11 +13,10 @@ module Scanners
13
13
  ] # :nodoc:
14
14
 
15
15
  ESCAPE = / [bfnrt\\"\/] /x # :nodoc:
16
- UNICODE_ESCAPE = / u[a-fA-F0-9]{4} /x # :nodoc:
16
+ UNICODE_ESCAPE = / u[a-fA-F0-9]{4} /x # :nodoc:
17
17
 
18
18
  protected
19
19
 
20
- # See http://json.org/ for a definition of the JSON lexic/grammar.
21
20
  def scan_tokens encoder, options
22
21
 
23
22
  state = :initial
@@ -45,14 +44,14 @@ module Scanners
45
44
  when '}', ']' then stack.pop # no error recovery, but works for valid JSON
46
45
  end
47
46
  elsif match = scan(/ true | false | null /x)
48
- encoder.text_token match, :value
47
+ encoder.text_token match, :value
49
48
  elsif match = scan(/ -? (?: 0 | [1-9]\d* ) /x)
49
+ kind = :integer
50
50
  if scan(/ \.\d+ (?:[eE][-+]?\d+)? | [eE][-+]? \d+ /x)
51
51
  match << matched
52
- encoder.text_token match, :float
53
- else
54
- encoder.text_token match, :integer
52
+ kind = :float
55
53
  end
54
+ encoder.text_token match, kind
56
55
  else
57
56
  encoder.text_token getch, :error
58
57
  end
@@ -77,7 +76,7 @@ module Scanners
77
76
  end
78
77
 
79
78
  else
80
- raise_inspect 'Unknown state: %p' % [state], encoder
79
+ raise_inspect 'Unknown state', encoder
81
80
 
82
81
  end
83
82
  end
@@ -0,0 +1,136 @@
1
+ module CodeRay
2
+ module Scanners
3
+
4
+ load :html
5
+ load :ruby
6
+
7
+ # Nitro XHTML Scanner
8
+ #
9
+ # Alias: +nitro+
10
+ class NitroXHTML < Scanner
11
+
12
+ register_for :nitro_xhtml
13
+ file_extension :xhtml
14
+ title 'Nitro XHTML'
15
+
16
+ KINDS_NOT_LOC = HTML::KINDS_NOT_LOC
17
+
18
+ NITRO_RUBY_BLOCK = /
19
+ <\?r
20
+ (?>
21
+ [^\?]*
22
+ (?> \?(?!>) [^\?]* )*
23
+ )
24
+ (?: \?> )?
25
+ |
26
+ <ruby>
27
+ (?>
28
+ [^<]*
29
+ (?> <(?!\/ruby>) [^<]* )*
30
+ )
31
+ (?: <\/ruby> )?
32
+ |
33
+ <%
34
+ (?>
35
+ [^%]*
36
+ (?> %(?!>) [^%]* )*
37
+ )
38
+ (?: %> )?
39
+ /mx # :nodoc:
40
+
41
+ NITRO_VALUE_BLOCK = /
42
+ \#
43
+ (?:
44
+ \{
45
+ [^{}]*
46
+ (?>
47
+ \{ [^}]* \}
48
+ (?> [^{}]* )
49
+ )*
50
+ \}?
51
+ | \| [^|]* \|?
52
+ | \( [^)]* \)?
53
+ | \[ [^\]]* \]?
54
+ | \\ [^\\]* \\?
55
+ )
56
+ /x # :nodoc:
57
+
58
+ NITRO_ENTITY = /
59
+ % (?: \#\d+ | \w+ ) ;
60
+ / # :nodoc:
61
+
62
+ START_OF_RUBY = /
63
+ (?=[<\#%])
64
+ < (?: \?r | % | ruby> )
65
+ | \# [{(|]
66
+ | % (?: \#\d+ | \w+ ) ;
67
+ /x # :nodoc:
68
+
69
+ CLOSING_PAREN = Hash.new { |h, p| h[p] = p } # :nodoc:
70
+ CLOSING_PAREN.update( {
71
+ '(' => ')',
72
+ '[' => ']',
73
+ '{' => '}',
74
+ } )
75
+
76
+ protected
77
+
78
+ def setup
79
+ @ruby_scanner = CodeRay.scanner :ruby, :tokens => @tokens, :keep_tokens => true
80
+ @html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true
81
+ end
82
+
83
+ def reset_instance
84
+ super
85
+ @html_scanner.reset
86
+ end
87
+
88
+ def scan_tokens encoder, options
89
+
90
+ until eos?
91
+
92
+ if (match = scan_until(/(?=#{START_OF_RUBY})/o) || match = scan_until(/\z/)) and not match.empty?
93
+ @html_scanner.tokenize match
94
+
95
+ elsif match = scan(/#{NITRO_VALUE_BLOCK}/o)
96
+ start_tag = match[0,2]
97
+ delimiter = CLOSING_PAREN[start_tag[1,1]]
98
+ end_tag = match[-1,1] == delimiter ? delimiter : ''
99
+ encoder.begin_group :inline
100
+ encoder.text_token start_tag, :inline_delimiter
101
+ code = match[start_tag.size .. -1 - end_tag.size]
102
+ @ruby_scanner.tokenize code, :tokens => encoder
103
+ encoder.text_token end_tag, :inline_delimiter unless end_tag.empty?
104
+ encoder.end_group :inline
105
+
106
+ elsif match = scan(/#{NITRO_RUBY_BLOCK}/o)
107
+ start_tag = '<?r'
108
+ end_tag = match[-2,2] == '?>' ? '?>' : ''
109
+ encoder.begin_group :inline
110
+ encoder.text_token start_tag, :inline_delimiter
111
+ code = match[start_tag.size .. -(end_tag.size)-1]
112
+ @ruby_scanner.tokenize code, :tokens => encoder
113
+ encoder.text_token end_tag, :inline_delimiter unless end_tag.empty?
114
+ encoder.end_group :inline
115
+
116
+ elsif entity = scan(/#{NITRO_ENTITY}/o)
117
+ encoder.text_token entity, :entity
118
+
119
+ elsif scan(/%/)
120
+ encoder.text_token matched, :error
121
+
122
+ else
123
+ raise_inspect 'else-case reached!', encoder
124
+
125
+ end
126
+
127
+ end
128
+
129
+ encoder
130
+
131
+ end
132
+
133
+ end
134
+
135
+ end
136
+ end
@@ -10,7 +10,6 @@ module Scanners
10
10
 
11
11
  register_for :php
12
12
  file_extension 'php'
13
- encoding 'BINARY'
14
13
 
15
14
  KINDS_NOT_LOC = HTML::KINDS_NOT_LOC
16
15
 
@@ -181,14 +180,14 @@ module Scanners
181
180
  $argc $argv
182
181
  ]
183
182
 
184
- IDENT_KIND = WordList::CaseIgnoring.new(:ident).
185
- add(KEYWORDS, :keyword).
186
- add(TYPES, :predefined_type).
187
- add(LANGUAGE_CONSTRUCTS, :keyword).
183
+ IDENT_KIND = CaseIgnoringWordList.new(:ident).
184
+ add(KEYWORDS, :reserved).
185
+ add(TYPES, :pre_type).
186
+ add(LANGUAGE_CONSTRUCTS, :reserved).
188
187
  add(BUILTIN_FUNCTIONS, :predefined).
189
- add(CLASSES, :predefined_constant).
188
+ add(CLASSES, :pre_constant).
190
189
  add(EXCEPTIONS, :exception).
191
- add(CONSTANTS, :predefined_constant)
190
+ add(CONSTANTS, :pre_constant)
192
191
 
193
192
  VARIABLE_KIND = WordList.new(:local_variable).
194
193
  add(PREDEFINED, :predefined)
@@ -234,8 +233,8 @@ module Scanners
234
233
  def scan_tokens encoder, options
235
234
 
236
235
  if check(RE::PHP_START) || # starts with <?
237
- (match?(/\s*<\S/) && check(/.{1,1000}#{RE::PHP_START}/om)) || # starts with tag and contains <?
238
- check(/.{0,1000}#{RE::HTML_INDICATOR}/om) ||
236
+ (match?(/\s*<\S/) && exist?(RE::PHP_START)) || # starts with tag and contains <?
237
+ exist?(RE::HTML_INDICATOR) ||
239
238
  check(/.{1,100}#{RE::PHP_START}/om) # PHP start after max 100 chars
240
239
  # is HTML with embedded PHP, so start with HTML
241
240
  states = [:initial]
@@ -261,7 +260,7 @@ module Scanners
261
260
  label_expected = true
262
261
  states << :php
263
262
  else
264
- match = scan_until(/(?=#{RE::PHP_START})/o) || scan_rest
263
+ match = scan_until(/(?=#{RE::PHP_START})/o) || scan_until(/\z/)
265
264
  @html_scanner.tokenize match unless match.empty?
266
265
  end
267
266
 
@@ -281,7 +280,7 @@ module Scanners
281
280
  label_expected = false
282
281
  if kind == :ident && match =~ /^[A-Z]/
283
282
  kind = :constant
284
- elsif kind == :keyword
283
+ elsif kind == :reserved
285
284
  case match
286
285
  when 'class'
287
286
  states << :class_expected
@@ -354,7 +353,7 @@ module Scanners
354
353
 
355
354
  elsif match = scan(/<<<(?:(#{RE::IDENTIFIER})|"(#{RE::IDENTIFIER})"|'(#{RE::IDENTIFIER})')/o)
356
355
  encoder.begin_group :string
357
- # warn 'heredoc in heredoc?' if heredoc_delimiter
356
+ warn 'heredoc in heredoc?' if heredoc_delimiter
358
357
  heredoc_delimiter = Regexp.escape(self[1] || self[2] || self[3])
359
358
  encoder.text_token match, :delimiter
360
359
  states.push self[3] ? :sqstring : :dqstring
@@ -462,7 +461,7 @@ module Scanners
462
461
  states.push :php
463
462
  encoder.text_token match, :delimiter
464
463
  else
465
- encoder.text_token match, :content
464
+ encoder.text_token match, :string
466
465
  end
467
466
  elsif match = scan(/\$\{#{RE::IDENTIFIER}\}/o)
468
467
  encoder.text_token match, :local_variable
@@ -0,0 +1,26 @@
1
+ module CodeRay
2
+ module Scanners
3
+
4
+ # Scanner for plain text.
5
+ #
6
+ # Yields just one token of the kind :plain.
7
+ #
8
+ # Alias: +plain+
9
+ class Plaintext < Scanner
10
+
11
+ register_for :plaintext, :plain
12
+ title 'Plain text'
13
+
14
+ KINDS_NOT_LOC = [:plain] # :nodoc:
15
+
16
+ protected
17
+
18
+ def scan_tokens encoder, options
19
+ encoder.text_token string, :plain
20
+ encoder
21
+ end
22
+
23
+ end
24
+
25
+ end
26
+ end
@@ -58,7 +58,7 @@ module Scanners
58
58
  add(KEYWORDS, :keyword).
59
59
  add(OLD_KEYWORDS, :old_keyword).
60
60
  add(PREDEFINED_METHODS_AND_TYPES, :predefined).
61
- add(PREDEFINED_VARIABLES_AND_CONSTANTS, :predefined_constant).
61
+ add(PREDEFINED_VARIABLES_AND_CONSTANTS, :pre_constant).
62
62
  add(PREDEFINED_EXCEPTIONS, :exception) # :nodoc:
63
63
 
64
64
  NAME = / [^\W\d] \w* /x # :nodoc:
@@ -107,7 +107,7 @@ module Scanners
107
107
  string_raw = false
108
108
  string_type = nil
109
109
  docstring_coming = match?(/#{DOCSTRING_COMING}/o)
110
- last_token_dot = false
110
+ import_clause = class_name_follows = last_token_dot = false
111
111
  unicode = string.respond_to?(:encoding) && string.encoding.name == 'UTF-8'
112
112
  from_import_state = []
113
113
 
@@ -194,7 +194,7 @@ module Scanners
194
194
  encoder.text_token match, :hex
195
195
 
196
196
  elsif match = scan(/0[bB][01]+[lL]?/)
197
- encoder.text_token match, :binary
197
+ encoder.text_token match, :bin
198
198
 
199
199
  elsif match = scan(/(?:\d*\.\d+|\d+\.\d*)(?:[eE][+-]?\d+)?|\d+[eE][+-]?\d+/)
200
200
  if scan(/[jJ]/)
@@ -205,7 +205,7 @@ module Scanners
205
205
  end
206
206
 
207
207
  elsif match = scan(/0[oO][0-7]+|0[0-7]+(?![89.eE])[lL]?/)
208
- encoder.text_token match, :octal
208
+ encoder.text_token match, :oct
209
209
 
210
210
  elsif match = scan(/\d+([lL])?/)
211
211
  if self[1] == nil && scan(/[jJ]/)
@@ -5,23 +5,23 @@ module Scanners
5
5
  load :ruby
6
6
 
7
7
  # Scanner for HTML ERB templates.
8
- class ERB < Scanner
8
+ class RHTML < Scanner
9
9
 
10
- register_for :erb
10
+ register_for :rhtml
11
11
  title 'HTML ERB Template'
12
12
 
13
13
  KINDS_NOT_LOC = HTML::KINDS_NOT_LOC
14
14
 
15
15
  ERB_RUBY_BLOCK = /
16
- (<%(?!%)[-=\#]?)
17
- ((?>
16
+ <%(?!%)[=-]?
17
+ (?>
18
18
  [^\-%]* # normal*
19
19
  (?> # special
20
20
  (?: %(?!>) | -(?!%>) )
21
21
  [^\-%]* # normal*
22
22
  )*
23
- ))
24
- ((?: -?%> )?)
23
+ )
24
+ (?: -?%> )?
25
25
  /x # :nodoc:
26
26
 
27
27
  START_OF_ERB = /
@@ -44,29 +44,21 @@ module Scanners
44
44
 
45
45
  until eos?
46
46
 
47
- if (match = scan_until(/(?=#{START_OF_ERB})/o) || scan_rest) and not match.empty?
47
+ if (match = scan_until(/(?=#{START_OF_ERB})/o) || scan_until(/\z/)) and not match.empty?
48
48
  @html_scanner.tokenize match, :tokens => encoder
49
49
 
50
50
  elsif match = scan(/#{ERB_RUBY_BLOCK}/o)
51
- start_tag = self[1]
52
- code = self[2]
53
- end_tag = self[3]
54
-
51
+ start_tag = match[/\A<%[-=]?/]
52
+ end_tag = match[/-?%?>?\z/]
55
53
  encoder.begin_group :inline
56
54
  encoder.text_token start_tag, :inline_delimiter
57
-
58
- if start_tag == '<%#'
59
- encoder.text_token code, :comment
60
- else
61
- @ruby_scanner.tokenize code, :tokens => encoder
62
- end unless code.empty?
63
-
55
+ code = match[start_tag.size .. -1 - end_tag.size]
56
+ @ruby_scanner.tokenize code
64
57
  encoder.text_token end_tag, :inline_delimiter unless end_tag.empty?
65
58
  encoder.end_group :inline
66
59
 
67
60
  else
68
61
  raise_inspect 'else-case reached!', encoder
69
-
70
62
  end
71
63
 
72
64
  end
@@ -1,6 +1,6 @@
1
1
  module CodeRay
2
2
  module Scanners
3
-
3
+
4
4
  # This scanner is really complex, since Ruby _is_ a complex language!
5
5
  #
6
6
  # It tries to highlight 100% of all common code,
@@ -8,16 +8,22 @@ module Scanners
8
8
  #
9
9
  # It is optimized for HTML highlighting, and is not very useful for
10
10
  # parsing or pretty printing.
11
+ #
12
+ # For now, I think it's better than the scanners in VIM or Syntax, or
13
+ # any highlighter I was able to find, except Caleb's RubyLexer.
14
+ #
15
+ # I hope it's also better than the rdoc/irb lexer.
16
+ #
17
+ # Alias: +irb+
11
18
  class Ruby < Scanner
12
-
19
+
13
20
  register_for :ruby
14
21
  file_extension 'rb'
22
+
23
+ helper :patterns
15
24
 
16
- autoload :Patterns, 'coderay/scanners/ruby/patterns'
17
- autoload :StringState, 'coderay/scanners/ruby/string_state'
18
-
19
- def interpreted_string_state
20
- StringState.new :string, true, '"'
25
+ unless defined? EncodingError
26
+ EncodingError = Class.new Exception # :nodoc:
21
27
  end
22
28
 
23
29
  protected
@@ -27,10 +33,11 @@ module Scanners
27
33
  end
28
34
 
29
35
  def scan_tokens encoder, options
30
- state, heredocs = options[:state] || @state
31
- heredocs = heredocs.dup if heredocs.is_a?(Array)
32
36
 
33
- if state && state.instance_of?(StringState)
37
+ patterns = Patterns # avoid constant lookup
38
+
39
+ state = @state
40
+ if state.instance_of? patterns::StringState
34
41
  encoder.begin_group state.type
35
42
  end
36
43
 
@@ -39,89 +46,165 @@ module Scanners
39
46
  method_call_expected = false
40
47
  value_expected = true
41
48
 
49
+ heredocs = nil
42
50
  inline_block_stack = nil
43
51
  inline_block_curly_depth = 0
44
52
 
45
- if heredocs
46
- state = heredocs.shift
47
- encoder.begin_group state.type
48
- heredocs = nil if heredocs.empty?
49
- end
50
-
51
53
  # def_object_stack = nil
52
54
  # def_object_paren_depth = 0
53
55
 
54
- patterns = Patterns # avoid constant lookup
55
-
56
56
  unicode = string.respond_to?(:encoding) && string.encoding.name == 'UTF-8'
57
57
 
58
58
  until eos?
59
-
60
- if state.instance_of? ::Symbol
61
-
62
- if match = scan(/[ \t\f\v]+/)
63
- encoder.text_token match, :space
64
-
65
- elsif match = scan(/\n/)
66
- if heredocs
67
- unscan # heredoc scanning needs \n at start
68
- state = heredocs.shift
69
- encoder.begin_group state.type
70
- heredocs = nil if heredocs.empty?
59
+
60
+ if state.instance_of? patterns::StringState
61
+
62
+ match = scan_until(state.pattern) || scan_until(/\z/)
63
+ encoder.text_token match, :content unless match.empty?
64
+ break if eos?
65
+
66
+ if state.heredoc and self[1] # end of heredoc
67
+ match = getch.to_s
68
+ match << scan_until(/$/) unless eos?
69
+ encoder.text_token match, :delimiter
70
+ encoder.end_group state.type
71
+ state = state.next_state
72
+ next
73
+ end
74
+
75
+ case match = getch
76
+
77
+ when state.delim
78
+ if state.paren_depth
79
+ state.paren_depth -= 1
80
+ if state.paren_depth > 0
81
+ encoder.text_token match, :nesting_delimiter
82
+ next
83
+ end
84
+ end
85
+ encoder.text_token match, :delimiter
86
+ if state.type == :regexp and not eos?
87
+ modifiers = scan(/#{patterns::REGEXP_MODIFIERS}/ox)
88
+ encoder.text_token modifiers, :modifier unless modifiers.empty?
89
+ end
90
+ encoder.end_group state.type
91
+ value_expected = false
92
+ state = state.next_state
93
+
94
+ when '\\'
95
+ if state.interpreted
96
+ if esc = scan(/ #{patterns::ESCAPE} /ox)
97
+ encoder.text_token match + esc, :char
98
+ else
99
+ encoder.text_token match, :error
100
+ end
71
101
  else
72
- state = :initial if state == :undef_comma_expected
73
- encoder.text_token match, :space
102
+ case m = getch
103
+ when state.delim, '\\'
104
+ encoder.text_token match + m, :char
105
+ when nil
106
+ encoder.text_token match, :content
107
+ else
108
+ encoder.text_token match + m, :content
109
+ end
110
+ end
111
+
112
+ when '#'
113
+ case peek(1)
114
+ when '{'
115
+ inline_block_stack ||= []
116
+ inline_block_stack << [state, inline_block_curly_depth, heredocs]
74
117
  value_expected = true
118
+ state = :initial
119
+ inline_block_curly_depth = 1
120
+ encoder.begin_group :inline
121
+ encoder.text_token match + getch, :inline_delimiter
122
+ when '$', '@'
123
+ encoder.text_token match, :escape
124
+ last_state = state
125
+ state = :initial
126
+ else
127
+ raise_inspect 'else-case # reached; #%p not handled' %
128
+ [peek(1)], encoder
75
129
  end
130
+
131
+ when state.opening_paren
132
+ state.paren_depth += 1
133
+ encoder.text_token match, :nesting_delimiter
134
+
135
+ when /#{patterns::REGEXP_SYMBOLS}/ox
136
+ encoder.text_token match, :function
137
+
138
+ else
139
+ raise_inspect 'else-case " reached; %p not handled, state = %p' %
140
+ [match, state], encoder
141
+
142
+ end
143
+
144
+ else
145
+
146
+ if match = scan(/[ \t\f]+/)
147
+ match << scan(/\s*/) unless eos? || heredocs
148
+ value_expected = true if match.index(?\n)
149
+ encoder.text_token match, :space
76
150
 
77
- elsif match = scan(bol? ? / \#(!)?.* | #{patterns::RUBYDOC_OR_DATA} /ox : /\#.*/)
78
- encoder.text_token match, self[1] ? :doctype : :comment
79
-
80
- elsif match = scan(/\\\n/)
151
+ elsif match = scan(/\\?\n/)
152
+ if match == "\n"
153
+ value_expected = true
154
+ state = :initial if state == :undef_comma_expected
155
+ end
81
156
  if heredocs
82
157
  unscan # heredoc scanning needs \n at start
83
- encoder.text_token scan(/\\/), :space
84
158
  state = heredocs.shift
85
159
  encoder.begin_group state.type
86
160
  heredocs = nil if heredocs.empty?
161
+ next
87
162
  else
88
- encoder.text_token match, :space
163
+ match << scan(/\s*/) unless eos?
89
164
  end
165
+ encoder.text_token match, :space
166
+
167
+ elsif bol? && match = scan(/\#!.*/)
168
+ encoder.text_token match, :doctype
90
169
 
170
+ elsif match = scan(/\#.*/) or
171
+ (bol? and match = scan(/#{patterns::RUBYDOC_OR_DATA}/o))
172
+ encoder.text_token match, :comment
173
+
91
174
  elsif state == :initial
92
-
175
+
93
176
  # IDENTS #
94
- if !method_call_expected &&
177
+ if !method_call_expected and
95
178
  match = scan(unicode ? /#{patterns::METHOD_NAME}/uo :
96
179
  /#{patterns::METHOD_NAME}/o)
97
180
  value_expected = false
98
181
  kind = patterns::IDENT_KIND[match]
99
182
  if kind == :ident
100
- if match[/\A[A-Z]/] && !(match[/[!?]$/] || match?(/\(/))
183
+ if match[/^[A-Z]/] && !match[/[!?]$/] && !match?(/\(/)
101
184
  kind = :constant
102
185
  end
103
- elsif kind == :keyword
186
+ elsif kind == :reserved
104
187
  state = patterns::KEYWORD_NEW_STATE[match]
105
188
  value_expected = true if patterns::KEYWORDS_EXPECTING_VALUE[match]
106
189
  end
107
190
  value_expected = true if !value_expected && check(/#{patterns::VALUE_FOLLOWS}/o)
108
191
  encoder.text_token match, kind
109
-
110
- elsif method_call_expected &&
192
+
193
+ elsif method_call_expected and
111
194
  match = scan(unicode ? /#{patterns::METHOD_AFTER_DOT}/uo :
112
195
  /#{patterns::METHOD_AFTER_DOT}/o)
113
- if method_call_expected == '::' && match[/\A[A-Z]/] && !match?(/\(/)
196
+ if method_call_expected == '::' && match[/^[A-Z]/] && !match?(/\(/)
114
197
  encoder.text_token match, :constant
115
198
  else
116
199
  encoder.text_token match, :ident
117
200
  end
118
201
  method_call_expected = false
119
202
  value_expected = check(/#{patterns::VALUE_FOLLOWS}/o)
120
-
203
+
121
204
  # OPERATORS #
122
- elsif !method_call_expected && match = scan(/ (\.(?!\.)|::) | (?: \.\.\.? | ==?=? | [,\(\[\{] )() | [\)\]\}] /x)
205
+ elsif not method_call_expected and match = scan(/ \.\.\.? | (\.|::) | [,\(\)\[\]\{\}] | ==?=? /x)
206
+ value_expected = match !~ / [.\)\]\}] /x || match =~ /\A\.\./
123
207
  method_call_expected = self[1]
124
- value_expected = !method_call_expected && self[2]
125
208
  if inline_block_stack
126
209
  case match
127
210
  when '{'
@@ -139,109 +222,111 @@ module Scanners
139
222
  end
140
223
  end
141
224
  encoder.text_token match, :operator
142
-
143
- elsif match = scan(unicode ? /#{patterns::SYMBOL}/uo :
144
- /#{patterns::SYMBOL}/o)
145
- case delim = match[1]
146
- when ?', ?"
147
- encoder.begin_group :symbol
148
- encoder.text_token ':', :symbol
149
- match = delim.chr
150
- encoder.text_token match, :delimiter
151
- state = self.class::StringState.new :symbol, delim == ?", match
152
- else
153
- encoder.text_token match, :symbol
154
- value_expected = false
155
- end
156
-
157
- elsif match = scan(/ ' (?:(?>[^'\\]*) ')? | " (?:(?>[^"\\\#]*) ")? /mx)
225
+
226
+ elsif match = scan(/ ['"] /mx)
158
227
  encoder.begin_group :string
159
- if match.size == 1
160
- encoder.text_token match, :delimiter
161
- state = self.class::StringState.new :string, match == '"', match # important for streaming
162
- else
163
- encoder.text_token match[0,1], :delimiter
164
- encoder.text_token match[1..-2], :content if match.size > 2
165
- encoder.text_token match[-1,1], :delimiter
166
- encoder.end_group :string
167
- value_expected = false
168
- end
169
-
228
+ encoder.text_token match, :delimiter
229
+ state = patterns::StringState.new :string, match == '"', match # important for streaming
230
+
170
231
  elsif match = scan(unicode ? /#{patterns::INSTANCE_VARIABLE}/uo :
171
232
  /#{patterns::INSTANCE_VARIABLE}/o)
172
233
  value_expected = false
173
234
  encoder.text_token match, :instance_variable
174
-
175
- elsif value_expected && match = scan(/\//)
235
+
236
+ elsif value_expected and match = scan(/\//)
176
237
  encoder.begin_group :regexp
177
238
  encoder.text_token match, :delimiter
178
- state = self.class::StringState.new :regexp, true, '/'
179
-
239
+ interpreted = true
240
+ state = patterns::StringState.new :regexp, interpreted, '/'
241
+
180
242
  elsif match = scan(value_expected ? /[-+]?#{patterns::NUMERIC}/o : /#{patterns::NUMERIC}/o)
181
243
  if method_call_expected
182
244
  encoder.text_token match, :error
183
245
  method_call_expected = false
184
246
  else
185
- encoder.text_token match, self[1] ? :float : :integer # TODO: send :hex/:octal/:binary
247
+ encoder.text_token match, self[1] ? :float : :integer
186
248
  end
187
249
  value_expected = false
188
-
189
- elsif match = scan(/ [-+!~^\/]=? | [:;] | [*|&]{1,2}=? | >>? /x)
250
+
251
+ elsif match = scan(unicode ? /#{patterns::SYMBOL}/uo :
252
+ /#{patterns::SYMBOL}/o)
253
+ case delim = match[1]
254
+ when ?', ?"
255
+ encoder.begin_group :symbol
256
+ encoder.text_token ':', :symbol
257
+ match = delim.chr
258
+ encoder.text_token match, :delimiter
259
+ state = patterns::StringState.new :symbol, delim == ?", match
260
+ else
261
+ encoder.text_token match, :symbol
262
+ value_expected = false
263
+ end
264
+
265
+ elsif match = scan(/ [-+!~^]=? | [*|&]{1,2}=? | >>? /x)
190
266
  value_expected = true
191
267
  encoder.text_token match, :operator
192
-
193
- elsif value_expected && match = scan(/#{patterns::HEREDOC_OPEN}/o)
268
+
269
+ elsif value_expected and match = scan(/#{patterns::HEREDOC_OPEN}/o)
270
+ indented = self[1] == '-'
194
271
  quote = self[3]
195
272
  delim = self[quote ? 4 : 2]
196
273
  kind = patterns::QUOTE_TO_TYPE[quote]
197
274
  encoder.begin_group kind
198
275
  encoder.text_token match, :delimiter
199
276
  encoder.end_group kind
277
+ heredoc = patterns::StringState.new kind, quote != '\'',
278
+ delim, (indented ? :indented : :linestart )
200
279
  heredocs ||= [] # create heredocs if empty
201
- heredocs << self.class::StringState.new(kind, quote != "'", delim,
202
- self[1] == '-' ? :indented : :linestart)
280
+ heredocs << heredoc
203
281
  value_expected = false
204
-
205
- elsif value_expected && match = scan(/#{patterns::FANCY_STRING_START}/o)
206
- kind = patterns::FANCY_STRING_KIND[self[1]]
282
+
283
+ elsif value_expected and match = scan(/#{patterns::FANCY_START}/o)
284
+ kind, interpreted = *patterns::FancyStringType.fetch(self[1]) do
285
+ raise_inspect 'Unknown fancy string: %%%p' % k, encoder
286
+ end
207
287
  encoder.begin_group kind
208
- state = self.class::StringState.new kind, patterns::FANCY_STRING_INTERPRETED[self[1]], self[2]
288
+ state = patterns::StringState.new kind, interpreted, self[2]
209
289
  encoder.text_token match, :delimiter
210
-
211
- elsif value_expected && match = scan(/#{patterns::CHARACTER}/o)
290
+
291
+ elsif value_expected and match = scan(/#{patterns::CHARACTER}/o)
212
292
  value_expected = false
213
293
  encoder.text_token match, :integer
214
-
215
- elsif match = scan(/ %=? | <(?:<|=>?)? | \? /x)
294
+
295
+ elsif match = scan(/ [\/%]=? | <(?:<|=>?)? | [?:;] /x)
216
296
  value_expected = true
217
297
  encoder.text_token match, :operator
218
-
298
+
219
299
  elsif match = scan(/`/)
220
- encoder.begin_group :shell
221
- encoder.text_token match, :delimiter
222
- state = self.class::StringState.new :shell, true, match
223
-
300
+ if method_call_expected
301
+ encoder.text_token match, :operator
302
+ value_expected = true
303
+ else
304
+ encoder.begin_group :shell
305
+ encoder.text_token match, :delimiter
306
+ state = patterns::StringState.new :shell, true, match
307
+ end
308
+
224
309
  elsif match = scan(unicode ? /#{patterns::GLOBAL_VARIABLE}/uo :
225
310
  /#{patterns::GLOBAL_VARIABLE}/o)
226
311
  encoder.text_token match, :global_variable
227
312
  value_expected = false
228
-
313
+
229
314
  elsif match = scan(unicode ? /#{patterns::CLASS_VARIABLE}/uo :
230
315
  /#{patterns::CLASS_VARIABLE}/o)
231
316
  encoder.text_token match, :class_variable
232
317
  value_expected = false
233
-
318
+
234
319
  elsif match = scan(/\\\z/)
235
320
  encoder.text_token match, :space
236
-
321
+
237
322
  else
238
323
  if method_call_expected
239
324
  method_call_expected = false
240
325
  next
241
326
  end
242
- unless unicode
327
+ if !unicode
243
328
  # check for unicode
244
- $DEBUG_BEFORE, $DEBUG = $DEBUG, false
329
+ debug, $DEBUG = $DEBUG, false
245
330
  begin
246
331
  if check(/./mu).size > 1
247
332
  # seems like we should try again with unicode
@@ -250,7 +335,7 @@ module Scanners
250
335
  rescue
251
336
  # bad unicode char; use getch
252
337
  ensure
253
- $DEBUG = $DEBUG_BEFORE
338
+ $DEBUG = debug
254
339
  end
255
340
  next if unicode
256
341
  end
@@ -263,7 +348,7 @@ module Scanners
263
348
  state = last_state
264
349
  last_state = nil
265
350
  end
266
-
351
+
267
352
  elsif state == :def_expected
268
353
  if match = scan(unicode ? /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/uo :
269
354
  /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o)
@@ -273,7 +358,7 @@ module Scanners
273
358
  last_state = :dot_expected
274
359
  state = :initial
275
360
  end
276
-
361
+
277
362
  elsif state == :dot_expected
278
363
  if match = scan(/\.|::/)
279
364
  # invalid definition
@@ -282,7 +367,7 @@ module Scanners
282
367
  else
283
368
  state = :initial
284
369
  end
285
-
370
+
286
371
  elsif state == :module_expected
287
372
  if match = scan(/<</)
288
373
  encoder.text_token match, :operator
@@ -293,7 +378,7 @@ module Scanners
293
378
  encoder.text_token match, :class
294
379
  end
295
380
  end
296
-
381
+
297
382
  elsif state == :undef_expected
298
383
  state = :undef_comma_expected
299
384
  if match = scan(unicode ? /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/uo :
@@ -306,7 +391,7 @@ module Scanners
306
391
  encoder.text_token ':', :symbol
307
392
  match = delim.chr
308
393
  encoder.text_token match, :delimiter
309
- state = self.class::StringState.new :symbol, delim == ?", match
394
+ state = patterns::StringState.new :symbol, delim == ?", match
310
395
  state.next_state = :undef_comma_expected
311
396
  else
312
397
  encoder.text_token match, :symbol
@@ -314,7 +399,7 @@ module Scanners
314
399
  else
315
400
  state = :initial
316
401
  end
317
-
402
+
318
403
  elsif state == :undef_comma_expected
319
404
  if match = scan(/,/)
320
405
  encoder.text_token match, :operator
@@ -322,7 +407,7 @@ module Scanners
322
407
  else
323
408
  state = :initial
324
409
  end
325
-
410
+
326
411
  elsif state == :alias_expected
327
412
  match = scan(unicode ? /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/uo :
328
413
  /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/o)
@@ -333,129 +418,33 @@ module Scanners
333
418
  encoder.text_token self[3], (self[3][0] == ?: ? :symbol : :method)
334
419
  end
335
420
  state = :initial
336
-
421
+
337
422
  else
338
- #:nocov:
339
423
  raise_inspect 'Unknown state: %p' % [state], encoder
340
- #:nocov:
341
- end
342
-
343
- else # StringState
344
-
345
- match = scan_until(state.pattern) || scan_rest
346
- unless match.empty?
347
- encoder.text_token match, :content
348
- break if eos?
349
- end
350
-
351
- if state.heredoc && self[1] # end of heredoc
352
- match = getch
353
- match << scan_until(/$/) unless eos?
354
- encoder.text_token match, :delimiter unless match.empty?
355
- encoder.end_group state.type
356
- state = state.next_state
357
- next
358
- end
359
-
360
- case match = getch
361
-
362
- when state.delim
363
- if state.paren_depth
364
- state.paren_depth -= 1
365
- if state.paren_depth > 0
366
- encoder.text_token match, :content
367
- next
368
- end
369
- end
370
- encoder.text_token match, :delimiter
371
- if state.type == :regexp && !eos?
372
- match = scan(/#{patterns::REGEXP_MODIFIERS}/o)
373
- encoder.text_token match, :modifier unless match.empty?
374
- end
375
- encoder.end_group state.type
376
- value_expected = false
377
- state = state.next_state
378
-
379
- when '\\'
380
- if state.interpreted
381
- if esc = scan(/#{patterns::ESCAPE}/o)
382
- encoder.text_token match + esc, :char
383
- else
384
- encoder.text_token match, :error
385
- end
386
- else
387
- case esc = getch
388
- when nil
389
- encoder.text_token match, :content
390
- when state.delim, '\\'
391
- encoder.text_token match + esc, :char
392
- else
393
- encoder.text_token match + esc, :content
394
- end
395
- end
396
-
397
- when '#'
398
- case peek(1)
399
- when '{'
400
- inline_block_stack ||= []
401
- inline_block_stack << [state, inline_block_curly_depth, heredocs]
402
- value_expected = true
403
- state = :initial
404
- inline_block_curly_depth = 1
405
- encoder.begin_group :inline
406
- encoder.text_token match + getch, :inline_delimiter
407
- when '$', '@'
408
- encoder.text_token match, :escape
409
- last_state = state
410
- state = :initial
411
- else
412
- #:nocov:
413
- raise_inspect 'else-case # reached; #%p not handled' % [peek(1)], encoder
414
- #:nocov:
415
- end
416
-
417
- when state.opening_paren
418
- state.paren_depth += 1
419
- encoder.text_token match, :content
420
-
421
- else
422
- #:nocov
423
- raise_inspect 'else-case " reached; %p not handled, state = %p' % [match, state], encoder
424
- #:nocov:
425
-
426
424
  end
427
425
 
428
426
  end
429
-
430
427
  end
431
-
428
+
432
429
  # cleaning up
433
- if state.is_a? StringState
434
- encoder.end_group state.type
435
- end
436
-
437
430
  if options[:keep_state]
438
- if state.is_a?(StringState) && state.heredoc
439
- (heredocs ||= []).unshift state
440
- state = :initial
441
- elsif heredocs && heredocs.empty?
442
- heredocs = nil
443
- end
444
- @state = state, heredocs
431
+ @state = state
432
+ end
433
+ if state.is_a? patterns::StringState
434
+ encoder.end_group state.type
445
435
  end
446
-
447
436
  if inline_block_stack
448
437
  until inline_block_stack.empty?
449
- state, = *inline_block_stack.pop
450
- encoder.end_group :inline
438
+ state, *more = inline_block_stack.pop
439
+ encoder.end_group :inline if more
451
440
  encoder.end_group state.type
452
441
  end
453
442
  end
454
-
443
+
455
444
  encoder
456
445
  end
457
-
446
+
458
447
  end
459
-
448
+
460
449
  end
461
450
  end