coderay 0.9.8 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/{lib/README → README_INDEX.rdoc} +10 -21
- data/Rakefile +6 -6
- data/bin/coderay +193 -64
- data/lib/coderay.rb +61 -105
- data/lib/coderay/duo.rb +17 -21
- data/lib/coderay/encoder.rb +100 -112
- data/lib/coderay/encoders/_map.rb +12 -7
- data/lib/coderay/encoders/comment_filter.rb +12 -30
- data/lib/coderay/encoders/count.rb +29 -11
- data/lib/coderay/encoders/debug.rb +32 -20
- data/lib/coderay/encoders/div.rb +13 -9
- data/lib/coderay/encoders/filter.rb +34 -51
- data/lib/coderay/encoders/html.rb +155 -161
- data/lib/coderay/encoders/html/css.rb +4 -9
- data/lib/coderay/encoders/html/numbering.rb +115 -0
- data/lib/coderay/encoders/html/output.rb +22 -70
- data/lib/coderay/encoders/json.rb +59 -45
- data/lib/coderay/encoders/lines_of_code.rb +12 -57
- data/lib/coderay/encoders/null.rb +6 -14
- data/lib/coderay/encoders/page.rb +13 -9
- data/lib/coderay/encoders/span.rb +13 -9
- data/lib/coderay/encoders/statistic.rb +58 -39
- data/lib/coderay/encoders/terminal.rb +179 -0
- data/lib/coderay/encoders/text.rb +31 -17
- data/lib/coderay/encoders/token_kind_filter.rb +111 -0
- data/lib/coderay/encoders/xml.rb +19 -18
- data/lib/coderay/encoders/yaml.rb +37 -9
- data/lib/coderay/for_redcloth.rb +4 -4
- data/lib/coderay/helpers/file_type.rb +127 -246
- data/lib/coderay/helpers/gzip.rb +41 -0
- data/lib/coderay/helpers/plugin.rb +241 -306
- data/lib/coderay/helpers/word_list.rb +65 -126
- data/lib/coderay/scanner.rb +173 -156
- data/lib/coderay/scanners/_map.rb +18 -17
- data/lib/coderay/scanners/c.rb +63 -77
- data/lib/coderay/scanners/clojure.rb +217 -0
- data/lib/coderay/scanners/cpp.rb +71 -84
- data/lib/coderay/scanners/css.rb +103 -120
- data/lib/coderay/scanners/debug.rb +47 -44
- data/lib/coderay/scanners/delphi.rb +70 -76
- data/lib/coderay/scanners/diff.rb +141 -50
- data/lib/coderay/scanners/erb.rb +81 -0
- data/lib/coderay/scanners/groovy.rb +104 -113
- data/lib/coderay/scanners/haml.rb +168 -0
- data/lib/coderay/scanners/html.rb +181 -110
- data/lib/coderay/scanners/java.rb +73 -75
- data/lib/coderay/scanners/java/builtin_types.rb +2 -0
- data/lib/coderay/scanners/java_script.rb +90 -101
- data/lib/coderay/scanners/json.rb +40 -53
- data/lib/coderay/scanners/php.rb +123 -147
- data/lib/coderay/scanners/python.rb +93 -91
- data/lib/coderay/scanners/raydebug.rb +66 -0
- data/lib/coderay/scanners/ruby.rb +343 -326
- data/lib/coderay/scanners/ruby/patterns.rb +40 -106
- data/lib/coderay/scanners/ruby/string_state.rb +71 -0
- data/lib/coderay/scanners/sql.rb +80 -66
- data/lib/coderay/scanners/text.rb +26 -0
- data/lib/coderay/scanners/xml.rb +1 -1
- data/lib/coderay/scanners/yaml.rb +74 -73
- data/lib/coderay/style.rb +10 -7
- data/lib/coderay/styles/_map.rb +3 -3
- data/lib/coderay/styles/alpha.rb +143 -0
- data/lib/coderay/token_kinds.rb +90 -0
- data/lib/coderay/tokens.rb +102 -277
- data/lib/coderay/tokens_proxy.rb +55 -0
- data/lib/coderay/version.rb +3 -0
- data/test/functional/basic.rb +200 -18
- data/test/functional/examples.rb +130 -0
- data/test/functional/for_redcloth.rb +15 -8
- data/test/functional/suite.rb +9 -6
- metadata +103 -123
- data/FOLDERS +0 -53
- data/bin/coderay_stylesheet +0 -4
- data/lib/coderay/encoders/html/numerization.rb +0 -133
- data/lib/coderay/encoders/term.rb +0 -158
- data/lib/coderay/encoders/token_class_filter.rb +0 -84
- data/lib/coderay/helpers/gzip_simple.rb +0 -123
- data/lib/coderay/scanners/nitro_xhtml.rb +0 -136
- data/lib/coderay/scanners/plaintext.rb +0 -20
- data/lib/coderay/scanners/rhtml.rb +0 -78
- data/lib/coderay/scanners/scheme.rb +0 -145
- data/lib/coderay/styles/cycnus.rb +0 -152
- data/lib/coderay/styles/murphy.rb +0 -134
- data/lib/coderay/token_classes.rb +0 -86
- data/test/functional/load_plugin_scanner.rb +0 -11
- data/test/functional/vhdl.rb +0 -126
- data/test/functional/word_list.rb +0 -79
@@ -0,0 +1,66 @@
|
|
1
|
+
module CodeRay
|
2
|
+
module Scanners
|
3
|
+
|
4
|
+
# = Debug Scanner
|
5
|
+
#
|
6
|
+
# Parses the output of the Encoders::Debug encoder.
|
7
|
+
class Raydebug < Scanner
|
8
|
+
|
9
|
+
register_for :raydebug
|
10
|
+
file_extension 'raydebug'
|
11
|
+
title 'CodeRay Token Dump'
|
12
|
+
|
13
|
+
protected
|
14
|
+
|
15
|
+
def scan_tokens encoder, options
|
16
|
+
|
17
|
+
opened_tokens = []
|
18
|
+
|
19
|
+
until eos?
|
20
|
+
|
21
|
+
if match = scan(/\s+/)
|
22
|
+
encoder.text_token match, :space
|
23
|
+
|
24
|
+
elsif match = scan(/ (\w+) \( ( [^\)\\]* ( \\. [^\)\\]* )* ) /x)
|
25
|
+
kind = self[1]
|
26
|
+
encoder.text_token kind, :class
|
27
|
+
encoder.text_token '(', :operator
|
28
|
+
match = self[2]
|
29
|
+
encoder.text_token match, kind.to_sym
|
30
|
+
encoder.text_token match, :operator if match = scan(/\)/)
|
31
|
+
|
32
|
+
elsif match = scan(/ (\w+) ([<\[]) /x)
|
33
|
+
kind = self[1]
|
34
|
+
case self[2]
|
35
|
+
when '<'
|
36
|
+
encoder.text_token kind, :class
|
37
|
+
when '['
|
38
|
+
encoder.text_token kind, :class
|
39
|
+
else
|
40
|
+
raise 'CodeRay bug: This case should not be reached.'
|
41
|
+
end
|
42
|
+
kind = kind.to_sym
|
43
|
+
opened_tokens << kind
|
44
|
+
encoder.begin_group kind
|
45
|
+
encoder.text_token self[2], :operator
|
46
|
+
|
47
|
+
elsif !opened_tokens.empty? && match = scan(/ [>\]] /x)
|
48
|
+
encoder.text_token match, :operator
|
49
|
+
encoder.end_group opened_tokens.pop
|
50
|
+
|
51
|
+
else
|
52
|
+
encoder.text_token getch, :space
|
53
|
+
|
54
|
+
end
|
55
|
+
|
56
|
+
end
|
57
|
+
|
58
|
+
encoder.end_group opened_tokens.pop until opened_tokens.empty?
|
59
|
+
|
60
|
+
encoder
|
61
|
+
end
|
62
|
+
|
63
|
+
end
|
64
|
+
|
65
|
+
end
|
66
|
+
end
|
@@ -1,7 +1,6 @@
|
|
1
|
-
# encoding: utf-8
|
2
1
|
module CodeRay
|
3
2
|
module Scanners
|
4
|
-
|
3
|
+
|
5
4
|
# This scanner is really complex, since Ruby _is_ a complex language!
|
6
5
|
#
|
7
6
|
# It tries to highlight 100% of all common code,
|
@@ -9,310 +8,240 @@ module Scanners
|
|
9
8
|
#
|
10
9
|
# It is optimized for HTML highlighting, and is not very useful for
|
11
10
|
# parsing or pretty printing.
|
12
|
-
#
|
13
|
-
# For now, I think it's better than the scanners in VIM or Syntax, or
|
14
|
-
# any highlighter I was able to find, except Caleb's RubyLexer.
|
15
|
-
#
|
16
|
-
# I hope it's also better than the rdoc/irb lexer.
|
17
11
|
class Ruby < Scanner
|
18
|
-
|
19
|
-
include Streamable
|
20
|
-
|
12
|
+
|
21
13
|
register_for :ruby
|
22
14
|
file_extension 'rb'
|
23
|
-
|
24
|
-
helper :patterns
|
25
15
|
|
26
|
-
|
27
|
-
|
16
|
+
autoload :Patterns, 'coderay/scanners/ruby/patterns'
|
17
|
+
autoload :StringState, 'coderay/scanners/ruby/string_state'
|
18
|
+
|
19
|
+
def interpreted_string_state
|
20
|
+
StringState.new :string, true, '"'
|
28
21
|
end
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
22
|
+
|
23
|
+
protected
|
24
|
+
|
25
|
+
def setup
|
26
|
+
@state = :initial
|
27
|
+
end
|
28
|
+
|
29
|
+
def scan_tokens encoder, options
|
30
|
+
state, heredocs = options[:state] || @state
|
31
|
+
heredocs = heredocs.dup if heredocs.is_a?(Array)
|
32
|
+
|
33
|
+
if state && state.instance_of?(StringState)
|
34
|
+
encoder.begin_group state.type
|
40
35
|
end
|
41
36
|
|
42
|
-
last_token_dot = false
|
43
|
-
value_expected = true
|
44
|
-
heredocs = nil
|
45
37
|
last_state = nil
|
46
|
-
state = :initial
|
47
|
-
depth = nil
|
48
|
-
inline_block_stack = []
|
49
38
|
|
39
|
+
method_call_expected = false
|
40
|
+
value_expected = true
|
41
|
+
|
42
|
+
inline_block_stack = nil
|
43
|
+
inline_block_curly_depth = 0
|
44
|
+
|
45
|
+
if heredocs
|
46
|
+
state = heredocs.shift
|
47
|
+
encoder.begin_group state.type
|
48
|
+
heredocs = nil if heredocs.empty?
|
49
|
+
end
|
50
|
+
|
51
|
+
# def_object_stack = nil
|
52
|
+
# def_object_paren_depth = 0
|
50
53
|
|
51
54
|
patterns = Patterns # avoid constant lookup
|
52
55
|
|
56
|
+
unicode = string.respond_to?(:encoding) && string.encoding.name == 'UTF-8'
|
57
|
+
|
53
58
|
until eos?
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
tokens << [match, :delimiter]
|
67
|
-
tokens << [:close, state.type]
|
68
|
-
state = state.next_state
|
69
|
-
next
|
70
|
-
end
|
71
|
-
|
72
|
-
case match = getch
|
73
|
-
|
74
|
-
when state.delim
|
75
|
-
if state.paren
|
76
|
-
state.paren_depth -= 1
|
77
|
-
if state.paren_depth > 0
|
78
|
-
tokens << [match, :nesting_delimiter]
|
79
|
-
next
|
80
|
-
end
|
81
|
-
end
|
82
|
-
tokens << [match, :delimiter]
|
83
|
-
if state.type == :regexp and not eos?
|
84
|
-
modifiers = scan(/#{patterns::REGEXP_MODIFIERS}/ox)
|
85
|
-
tokens << [modifiers, :modifier] unless modifiers.empty?
|
86
|
-
end
|
87
|
-
tokens << [:close, state.type]
|
88
|
-
value_expected = false
|
89
|
-
state = state.next_state
|
90
|
-
|
91
|
-
when '\\'
|
92
|
-
if state.interpreted
|
93
|
-
if esc = scan(/ #{patterns::ESCAPE} /ox)
|
94
|
-
tokens << [match + esc, :char]
|
95
|
-
else
|
96
|
-
tokens << [match, :error]
|
97
|
-
end
|
59
|
+
|
60
|
+
if state.instance_of? ::Symbol
|
61
|
+
|
62
|
+
if match = scan(/[ \t\f\v]+/)
|
63
|
+
encoder.text_token match, :space
|
64
|
+
|
65
|
+
elsif match = scan(/\n/)
|
66
|
+
if heredocs
|
67
|
+
unscan # heredoc scanning needs \n at start
|
68
|
+
state = heredocs.shift
|
69
|
+
encoder.begin_group state.type
|
70
|
+
heredocs = nil if heredocs.empty?
|
98
71
|
else
|
99
|
-
|
100
|
-
|
101
|
-
tokens << [match + m, :char]
|
102
|
-
when nil
|
103
|
-
tokens << [match, :error]
|
104
|
-
else
|
105
|
-
tokens << [match + m, :content]
|
106
|
-
end
|
107
|
-
end
|
108
|
-
|
109
|
-
when '#'
|
110
|
-
case peek(1)
|
111
|
-
when '{'
|
112
|
-
inline_block_stack << [state, depth, heredocs]
|
72
|
+
state = :initial if state == :undef_comma_expected
|
73
|
+
encoder.text_token match, :space
|
113
74
|
value_expected = true
|
114
|
-
state = :initial
|
115
|
-
depth = 1
|
116
|
-
tokens << [:open, :inline]
|
117
|
-
tokens << [match + getch, :inline_delimiter]
|
118
|
-
when '$', '@'
|
119
|
-
tokens << [match, :escape]
|
120
|
-
last_state = state # scan one token as normal code, then return here
|
121
|
-
state = :initial
|
122
|
-
else
|
123
|
-
raise_inspect 'else-case # reached; #%p not handled' % peek(1), tokens
|
124
75
|
end
|
125
|
-
|
126
|
-
when state.paren
|
127
|
-
state.paren_depth += 1
|
128
|
-
tokens << [match, :nesting_delimiter]
|
129
|
-
|
130
|
-
when /#{patterns::REGEXP_SYMBOLS}/ox
|
131
|
-
tokens << [match, :function]
|
132
|
-
|
133
|
-
else
|
134
|
-
raise_inspect 'else-case " reached; %p not handled, state = %p' % [match, state], tokens
|
135
|
-
|
136
|
-
end
|
137
|
-
next
|
138
|
-
# }}}
|
139
|
-
else
|
140
|
-
# {{{
|
141
|
-
if match = scan(/[ \t\f]+/)
|
142
|
-
kind = :space
|
143
|
-
match << scan(/\s*/) unless eos? || heredocs
|
144
|
-
value_expected = true if match.index(?\n)
|
145
|
-
tokens << [match, kind]
|
146
|
-
next
|
147
76
|
|
148
|
-
elsif match = scan(
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
state = :initial if state == :undef_comma_expected
|
153
|
-
end
|
77
|
+
elsif match = scan(bol? ? / \#(!)?.* | #{patterns::RUBYDOC_OR_DATA} /ox : /\#.*/)
|
78
|
+
encoder.text_token match, self[1] ? :doctype : :comment
|
79
|
+
|
80
|
+
elsif match = scan(/\\\n/)
|
154
81
|
if heredocs
|
155
82
|
unscan # heredoc scanning needs \n at start
|
83
|
+
encoder.text_token scan(/\\/), :space
|
156
84
|
state = heredocs.shift
|
157
|
-
|
85
|
+
encoder.begin_group state.type
|
158
86
|
heredocs = nil if heredocs.empty?
|
159
|
-
next
|
160
87
|
else
|
161
|
-
match
|
88
|
+
encoder.text_token match, :space
|
162
89
|
end
|
163
|
-
tokens << [match, kind]
|
164
|
-
next
|
165
|
-
|
166
|
-
elsif bol? && match = scan(/\#!.*/)
|
167
|
-
tokens << [match, :doctype]
|
168
|
-
next
|
169
90
|
|
170
|
-
elsif match = scan(/\#.*/) or
|
171
|
-
( bol? and match = scan(/#{patterns::RUBYDOC_OR_DATA}/o) )
|
172
|
-
kind = :comment
|
173
|
-
tokens << [match, kind]
|
174
|
-
next
|
175
|
-
|
176
91
|
elsif state == :initial
|
177
|
-
|
92
|
+
|
178
93
|
# IDENTS #
|
179
|
-
if
|
94
|
+
if !method_call_expected &&
|
95
|
+
match = scan(unicode ? /#{patterns::METHOD_NAME}/uo :
|
180
96
|
/#{patterns::METHOD_NAME}/o)
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
if
|
185
|
-
|
186
|
-
match = ':'
|
187
|
-
kind = :operator
|
188
|
-
else
|
189
|
-
kind = patterns::IDENT_KIND[match]
|
190
|
-
if kind == :ident
|
191
|
-
if match[/\A[A-Z]/] and not match[/[!?]$/] and not match?(/\(/)
|
192
|
-
kind = :constant
|
193
|
-
end
|
194
|
-
elsif kind == :reserved
|
195
|
-
state = patterns::DEF_NEW_STATE[match]
|
196
|
-
value_expected = :set if patterns::KEYWORDS_EXPECTING_VALUE[match]
|
197
|
-
end
|
97
|
+
value_expected = false
|
98
|
+
kind = patterns::IDENT_KIND[match]
|
99
|
+
if kind == :ident
|
100
|
+
if match[/\A[A-Z]/] && !(match[/[!?]$/] || match?(/\(/))
|
101
|
+
kind = :constant
|
198
102
|
end
|
103
|
+
elsif kind == :keyword
|
104
|
+
state = patterns::KEYWORD_NEW_STATE[match]
|
105
|
+
value_expected = true if patterns::KEYWORDS_EXPECTING_VALUE[match]
|
199
106
|
end
|
200
|
-
value_expected =
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
value_expected = :set
|
107
|
+
value_expected = true if !value_expected && check(/#{patterns::VALUE_FOLLOWS}/o)
|
108
|
+
encoder.text_token match, kind
|
109
|
+
|
110
|
+
elsif method_call_expected &&
|
111
|
+
match = scan(unicode ? /#{patterns::METHOD_AFTER_DOT}/uo :
|
112
|
+
/#{patterns::METHOD_AFTER_DOT}/o)
|
113
|
+
if method_call_expected == '::' && match[/\A[A-Z]/] && !match?(/\(/)
|
114
|
+
encoder.text_token match, :constant
|
115
|
+
else
|
116
|
+
encoder.text_token match, :ident
|
211
117
|
end
|
212
|
-
|
213
|
-
|
214
|
-
|
118
|
+
method_call_expected = false
|
119
|
+
value_expected = check(/#{patterns::VALUE_FOLLOWS}/o)
|
120
|
+
|
121
|
+
# OPERATORS #
|
122
|
+
elsif !method_call_expected && match = scan(/ (\.(?!\.)|::) | (?: \.\.\.? | ==?=? | [,\(\[\{] )() | [\)\]\}] /x)
|
123
|
+
method_call_expected = self[1]
|
124
|
+
value_expected = !method_call_expected && self[2]
|
125
|
+
if inline_block_stack
|
215
126
|
case match
|
216
127
|
when '{'
|
217
|
-
|
128
|
+
inline_block_curly_depth += 1
|
218
129
|
when '}'
|
219
|
-
|
220
|
-
if
|
221
|
-
state,
|
130
|
+
inline_block_curly_depth -= 1
|
131
|
+
if inline_block_curly_depth == 0 # closing brace of inline block reached
|
132
|
+
state, inline_block_curly_depth, heredocs = inline_block_stack.pop
|
133
|
+
inline_block_stack = nil if inline_block_stack.empty?
|
222
134
|
heredocs = nil if heredocs && heredocs.empty?
|
223
|
-
|
224
|
-
|
225
|
-
|
135
|
+
encoder.text_token match, :inline_delimiter
|
136
|
+
encoder.end_group :inline
|
137
|
+
next
|
226
138
|
end
|
227
139
|
end
|
228
140
|
end
|
229
|
-
|
230
|
-
|
231
|
-
tokens << [:open, :string]
|
232
|
-
kind = :delimiter
|
233
|
-
state = patterns::StringState.new :string, match == '"', match # important for streaming
|
234
|
-
|
235
|
-
elsif match = scan(unicode ? /#{patterns::INSTANCE_VARIABLE}/uo :
|
236
|
-
/#{patterns::INSTANCE_VARIABLE}/o)
|
237
|
-
kind = :instance_variable
|
238
|
-
|
239
|
-
elsif value_expected and match = scan(/\//)
|
240
|
-
tokens << [:open, :regexp]
|
241
|
-
kind = :delimiter
|
242
|
-
interpreted = true
|
243
|
-
state = patterns::StringState.new :regexp, interpreted, match
|
244
|
-
|
245
|
-
# elsif match = scan(/[-+]?#{patterns::NUMERIC}/o)
|
246
|
-
elsif match = value_expected ? scan(/[-+]?#{patterns::NUMERIC}/o) : scan(/#{patterns::NUMERIC}/o)
|
247
|
-
kind = self[1] ? :float : :integer
|
248
|
-
|
141
|
+
encoder.text_token match, :operator
|
142
|
+
|
249
143
|
elsif match = scan(unicode ? /#{patterns::SYMBOL}/uo :
|
250
144
|
/#{patterns::SYMBOL}/o)
|
251
145
|
case delim = match[1]
|
252
146
|
when ?', ?"
|
253
|
-
|
254
|
-
|
147
|
+
encoder.begin_group :symbol
|
148
|
+
encoder.text_token ':', :symbol
|
255
149
|
match = delim.chr
|
256
|
-
|
257
|
-
state =
|
150
|
+
encoder.text_token match, :delimiter
|
151
|
+
state = self.class::StringState.new :symbol, delim == ?", match
|
152
|
+
else
|
153
|
+
encoder.text_token match, :symbol
|
154
|
+
value_expected = false
|
155
|
+
end
|
156
|
+
|
157
|
+
elsif match = scan(/ ' (?:(?>[^'\\]*) ')? | " (?:(?>[^"\\\#]*) ")? /mx)
|
158
|
+
encoder.begin_group :string
|
159
|
+
if match.size == 1
|
160
|
+
encoder.text_token match, :delimiter
|
161
|
+
state = self.class::StringState.new :string, match == '"', match # important for streaming
|
162
|
+
else
|
163
|
+
encoder.text_token match[0,1], :delimiter
|
164
|
+
encoder.text_token match[1..-2], :content if match.size > 2
|
165
|
+
encoder.text_token match[-1,1], :delimiter
|
166
|
+
encoder.end_group :string
|
167
|
+
value_expected = false
|
168
|
+
end
|
169
|
+
|
170
|
+
elsif match = scan(unicode ? /#{patterns::INSTANCE_VARIABLE}/uo :
|
171
|
+
/#{patterns::INSTANCE_VARIABLE}/o)
|
172
|
+
value_expected = false
|
173
|
+
encoder.text_token match, :instance_variable
|
174
|
+
|
175
|
+
elsif value_expected && match = scan(/\//)
|
176
|
+
encoder.begin_group :regexp
|
177
|
+
encoder.text_token match, :delimiter
|
178
|
+
state = self.class::StringState.new :regexp, true, '/'
|
179
|
+
|
180
|
+
elsif match = scan(value_expected ? /[-+]?#{patterns::NUMERIC}/o : /#{patterns::NUMERIC}/o)
|
181
|
+
if method_call_expected
|
182
|
+
encoder.text_token match, :error
|
183
|
+
method_call_expected = false
|
258
184
|
else
|
259
|
-
|
185
|
+
encoder.text_token match, self[1] ? :float : :integer # TODO: send :hex/:octal/:binary
|
260
186
|
end
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
indented = self[1] == '-'
|
187
|
+
value_expected = false
|
188
|
+
|
189
|
+
elsif match = scan(/ [-+!~^\/]=? | [:;] | [*|&]{1,2}=? | >>? /x)
|
190
|
+
value_expected = true
|
191
|
+
encoder.text_token match, :operator
|
192
|
+
|
193
|
+
elsif value_expected && match = scan(/#{patterns::HEREDOC_OPEN}/o)
|
269
194
|
quote = self[3]
|
270
195
|
delim = self[quote ? 4 : 2]
|
271
196
|
kind = patterns::QUOTE_TO_TYPE[quote]
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
heredoc = patterns::StringState.new kind, quote != '\'', delim, (indented ? :indented : :linestart )
|
197
|
+
encoder.begin_group kind
|
198
|
+
encoder.text_token match, :delimiter
|
199
|
+
encoder.end_group kind
|
276
200
|
heredocs ||= [] # create heredocs if empty
|
277
|
-
heredocs <<
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
state =
|
285
|
-
|
286
|
-
|
287
|
-
elsif value_expected
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
elsif match = scan(/
|
292
|
-
value_expected =
|
293
|
-
|
294
|
-
|
201
|
+
heredocs << self.class::StringState.new(kind, quote != "'", delim,
|
202
|
+
self[1] == '-' ? :indented : :linestart)
|
203
|
+
value_expected = false
|
204
|
+
|
205
|
+
elsif value_expected && match = scan(/#{patterns::FANCY_STRING_START}/o)
|
206
|
+
kind = patterns::FANCY_STRING_KIND[self[1]]
|
207
|
+
encoder.begin_group kind
|
208
|
+
state = self.class::StringState.new kind, patterns::FANCY_STRING_INTERPRETED[self[1]], self[2]
|
209
|
+
encoder.text_token match, :delimiter
|
210
|
+
|
211
|
+
elsif value_expected && match = scan(/#{patterns::CHARACTER}/o)
|
212
|
+
value_expected = false
|
213
|
+
encoder.text_token match, :integer
|
214
|
+
|
215
|
+
elsif match = scan(/ %=? | <(?:<|=>?)? | \? /x)
|
216
|
+
value_expected = true
|
217
|
+
encoder.text_token match, :operator
|
218
|
+
|
295
219
|
elsif match = scan(/`/)
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
kind = :delimiter
|
301
|
-
state = patterns::StringState.new :shell, true, match
|
302
|
-
end
|
303
|
-
|
220
|
+
encoder.begin_group :shell
|
221
|
+
encoder.text_token match, :delimiter
|
222
|
+
state = self.class::StringState.new :shell, true, match
|
223
|
+
|
304
224
|
elsif match = scan(unicode ? /#{patterns::GLOBAL_VARIABLE}/uo :
|
305
225
|
/#{patterns::GLOBAL_VARIABLE}/o)
|
306
|
-
|
307
|
-
|
226
|
+
encoder.text_token match, :global_variable
|
227
|
+
value_expected = false
|
228
|
+
|
308
229
|
elsif match = scan(unicode ? /#{patterns::CLASS_VARIABLE}/uo :
|
309
230
|
/#{patterns::CLASS_VARIABLE}/o)
|
310
|
-
|
311
|
-
|
231
|
+
encoder.text_token match, :class_variable
|
232
|
+
value_expected = false
|
233
|
+
|
234
|
+
elsif match = scan(/\\\z/)
|
235
|
+
encoder.text_token match, :space
|
236
|
+
|
312
237
|
else
|
313
|
-
if
|
238
|
+
if method_call_expected
|
239
|
+
method_call_expected = false
|
240
|
+
next
|
241
|
+
end
|
242
|
+
unless unicode
|
314
243
|
# check for unicode
|
315
|
-
|
244
|
+
$DEBUG_BEFORE, $DEBUG = $DEBUG, false
|
316
245
|
begin
|
317
246
|
if check(/./mu).size > 1
|
318
247
|
# seems like we should try again with unicode
|
@@ -321,124 +250,212 @@ module Scanners
|
|
321
250
|
rescue
|
322
251
|
# bad unicode char; use getch
|
323
252
|
ensure
|
324
|
-
$DEBUG =
|
253
|
+
$DEBUG = $DEBUG_BEFORE
|
325
254
|
end
|
326
255
|
next if unicode
|
327
256
|
end
|
328
|
-
|
329
|
-
|
330
|
-
|
257
|
+
|
258
|
+
encoder.text_token getch, :error
|
259
|
+
|
331
260
|
end
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
tokens << ['self', :pre_constant]
|
337
|
-
tokens << ['.', :operator]
|
261
|
+
|
262
|
+
if last_state
|
263
|
+
state = last_state
|
264
|
+
last_state = nil
|
338
265
|
end
|
266
|
+
|
267
|
+
elsif state == :def_expected
|
339
268
|
if match = scan(unicode ? /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/uo :
|
340
269
|
/(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o)
|
341
|
-
|
270
|
+
encoder.text_token match, :method
|
271
|
+
state = :initial
|
272
|
+
else
|
273
|
+
last_state = :dot_expected
|
274
|
+
state = :initial
|
275
|
+
end
|
276
|
+
|
277
|
+
elsif state == :dot_expected
|
278
|
+
if match = scan(/\.|::/)
|
279
|
+
# invalid definition
|
280
|
+
state = :def_expected
|
281
|
+
encoder.text_token match, :operator
|
342
282
|
else
|
343
|
-
|
283
|
+
state = :initial
|
344
284
|
end
|
345
|
-
|
285
|
+
|
346
286
|
elsif state == :module_expected
|
347
287
|
if match = scan(/<</)
|
348
|
-
|
288
|
+
encoder.text_token match, :operator
|
349
289
|
else
|
350
290
|
state = :initial
|
351
|
-
if match = scan(unicode ? /(?:#{patterns::IDENT}::)
|
352
|
-
/(?:#{patterns::IDENT}::)
|
353
|
-
|
354
|
-
else
|
355
|
-
next
|
291
|
+
if match = scan(unicode ? / (?:#{patterns::IDENT}::)* #{patterns::IDENT} /oux :
|
292
|
+
/ (?:#{patterns::IDENT}::)* #{patterns::IDENT} /ox)
|
293
|
+
encoder.text_token match, :class
|
356
294
|
end
|
357
295
|
end
|
358
|
-
|
296
|
+
|
359
297
|
elsif state == :undef_expected
|
360
298
|
state = :undef_comma_expected
|
361
|
-
if match = scan(unicode ?
|
362
|
-
|
363
|
-
|
364
|
-
elsif match = scan(
|
365
|
-
/#{patterns::SYMBOL}/o)
|
299
|
+
if match = scan(unicode ? /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/uo :
|
300
|
+
/(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o)
|
301
|
+
encoder.text_token match, :method
|
302
|
+
elsif match = scan(/#{patterns::SYMBOL}/o)
|
366
303
|
case delim = match[1]
|
367
304
|
when ?', ?"
|
368
|
-
|
369
|
-
|
305
|
+
encoder.begin_group :symbol
|
306
|
+
encoder.text_token ':', :symbol
|
370
307
|
match = delim.chr
|
371
|
-
|
372
|
-
state =
|
308
|
+
encoder.text_token match, :delimiter
|
309
|
+
state = self.class::StringState.new :symbol, delim == ?", match
|
373
310
|
state.next_state = :undef_comma_expected
|
374
311
|
else
|
375
|
-
|
312
|
+
encoder.text_token match, :symbol
|
376
313
|
end
|
377
314
|
else
|
378
315
|
state = :initial
|
379
|
-
next
|
380
316
|
end
|
381
|
-
|
382
|
-
elsif state == :alias_expected
|
383
|
-
match = scan(unicode ? /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/uo :
|
384
|
-
/(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/o)
|
385
317
|
|
386
|
-
if match
|
387
|
-
tokens << [self[1], (self[1][0] == ?: ? :symbol : :method)]
|
388
|
-
tokens << [self[2], :space]
|
389
|
-
tokens << [self[3], (self[3][0] == ?: ? :symbol : :method)]
|
390
|
-
end
|
391
|
-
state = :initial
|
392
|
-
next
|
393
|
-
|
394
318
|
elsif state == :undef_comma_expected
|
395
319
|
if match = scan(/,/)
|
396
|
-
|
320
|
+
encoder.text_token match, :operator
|
397
321
|
state = :undef_expected
|
398
322
|
else
|
399
323
|
state = :initial
|
400
|
-
next
|
401
324
|
end
|
402
|
-
|
325
|
+
|
326
|
+
elsif state == :alias_expected
|
327
|
+
match = scan(unicode ? /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/uo :
|
328
|
+
/(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/o)
|
329
|
+
|
330
|
+
if match
|
331
|
+
encoder.text_token self[1], (self[1][0] == ?: ? :symbol : :method)
|
332
|
+
encoder.text_token self[2], :space
|
333
|
+
encoder.text_token self[3], (self[3][0] == ?: ? :symbol : :method)
|
334
|
+
end
|
335
|
+
state = :initial
|
336
|
+
|
337
|
+
else
|
338
|
+
#:nocov:
|
339
|
+
raise_inspect 'Unknown state: %p' % [state], encoder
|
340
|
+
#:nocov:
|
403
341
|
end
|
404
|
-
# }}}
|
405
342
|
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
343
|
+
else # StringState
|
344
|
+
|
345
|
+
match = scan_until(state.pattern) || scan_rest
|
346
|
+
unless match.empty?
|
347
|
+
encoder.text_token match, :content
|
348
|
+
break if eos?
|
411
349
|
end
|
412
350
|
|
413
|
-
if
|
414
|
-
|
415
|
-
|
351
|
+
if state.heredoc && self[1] # end of heredoc
|
352
|
+
match = getch
|
353
|
+
match << scan_until(/$/) unless eos?
|
354
|
+
encoder.text_token match, :delimiter unless match.empty?
|
355
|
+
encoder.end_group state.type
|
356
|
+
state = state.next_state
|
357
|
+
next
|
416
358
|
end
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
359
|
+
|
360
|
+
case match = getch
|
361
|
+
|
362
|
+
when state.delim
|
363
|
+
if state.paren_depth
|
364
|
+
state.paren_depth -= 1
|
365
|
+
if state.paren_depth > 0
|
366
|
+
encoder.text_token match, :content
|
367
|
+
next
|
368
|
+
end
|
369
|
+
end
|
370
|
+
encoder.text_token match, :delimiter
|
371
|
+
if state.type == :regexp && !eos?
|
372
|
+
match = scan(/#{patterns::REGEXP_MODIFIERS}/o)
|
373
|
+
encoder.text_token match, :modifier unless match.empty?
|
374
|
+
end
|
375
|
+
encoder.end_group state.type
|
376
|
+
value_expected = false
|
377
|
+
state = state.next_state
|
378
|
+
|
379
|
+
when '\\'
|
380
|
+
if state.interpreted
|
381
|
+
if esc = scan(/#{patterns::ESCAPE}/o)
|
382
|
+
encoder.text_token match + esc, :char
|
383
|
+
else
|
384
|
+
encoder.text_token match, :error
|
385
|
+
end
|
386
|
+
else
|
387
|
+
case esc = getch
|
388
|
+
when nil
|
389
|
+
encoder.text_token match, :content
|
390
|
+
when state.delim, '\\'
|
391
|
+
encoder.text_token match + esc, :char
|
392
|
+
else
|
393
|
+
encoder.text_token match + esc, :content
|
394
|
+
end
|
395
|
+
end
|
396
|
+
|
397
|
+
when '#'
|
398
|
+
case peek(1)
|
399
|
+
when '{'
|
400
|
+
inline_block_stack ||= []
|
401
|
+
inline_block_stack << [state, inline_block_curly_depth, heredocs]
|
402
|
+
value_expected = true
|
403
|
+
state = :initial
|
404
|
+
inline_block_curly_depth = 1
|
405
|
+
encoder.begin_group :inline
|
406
|
+
encoder.text_token match + getch, :inline_delimiter
|
407
|
+
when '$', '@'
|
408
|
+
encoder.text_token match, :escape
|
409
|
+
last_state = state
|
410
|
+
state = :initial
|
411
|
+
else
|
412
|
+
#:nocov:
|
413
|
+
raise_inspect 'else-case # reached; #%p not handled' % [peek(1)], encoder
|
414
|
+
#:nocov:
|
415
|
+
end
|
416
|
+
|
417
|
+
when state.opening_paren
|
418
|
+
state.paren_depth += 1
|
419
|
+
encoder.text_token match, :content
|
420
|
+
|
421
|
+
else
|
422
|
+
#:nocov
|
423
|
+
raise_inspect 'else-case " reached; %p not handled, state = %p' % [match, state], encoder
|
424
|
+
#:nocov:
|
425
|
+
|
424
426
|
end
|
427
|
+
|
425
428
|
end
|
429
|
+
|
430
|
+
end
|
431
|
+
|
432
|
+
# cleaning up
|
433
|
+
if state.is_a? StringState
|
434
|
+
encoder.end_group state.type
|
426
435
|
end
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
436
|
+
|
437
|
+
if options[:keep_state]
|
438
|
+
if state.is_a?(StringState) && state.heredoc
|
439
|
+
(heredocs ||= []).unshift state
|
440
|
+
state = :initial
|
441
|
+
elsif heredocs && heredocs.empty?
|
442
|
+
heredocs = nil
|
443
|
+
end
|
444
|
+
@state = state, heredocs
|
434
445
|
end
|
435
|
-
|
436
|
-
|
446
|
+
|
447
|
+
if inline_block_stack
|
448
|
+
until inline_block_stack.empty?
|
449
|
+
state, = *inline_block_stack.pop
|
450
|
+
encoder.end_group :inline
|
451
|
+
encoder.end_group state.type
|
452
|
+
end
|
453
|
+
end
|
454
|
+
|
455
|
+
encoder
|
437
456
|
end
|
438
|
-
|
457
|
+
|
439
458
|
end
|
440
|
-
|
459
|
+
|
441
460
|
end
|
442
461
|
end
|
443
|
-
|
444
|
-
# vim:fdm=marker
|