coderay 0.9.8 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/{lib/README → README_INDEX.rdoc} +10 -21
- data/Rakefile +6 -6
- data/bin/coderay +193 -64
- data/lib/coderay.rb +61 -105
- data/lib/coderay/duo.rb +17 -21
- data/lib/coderay/encoder.rb +100 -112
- data/lib/coderay/encoders/_map.rb +12 -7
- data/lib/coderay/encoders/comment_filter.rb +12 -30
- data/lib/coderay/encoders/count.rb +29 -11
- data/lib/coderay/encoders/debug.rb +32 -20
- data/lib/coderay/encoders/div.rb +13 -9
- data/lib/coderay/encoders/filter.rb +34 -51
- data/lib/coderay/encoders/html.rb +155 -161
- data/lib/coderay/encoders/html/css.rb +4 -9
- data/lib/coderay/encoders/html/numbering.rb +115 -0
- data/lib/coderay/encoders/html/output.rb +22 -70
- data/lib/coderay/encoders/json.rb +59 -45
- data/lib/coderay/encoders/lines_of_code.rb +12 -57
- data/lib/coderay/encoders/null.rb +6 -14
- data/lib/coderay/encoders/page.rb +13 -9
- data/lib/coderay/encoders/span.rb +13 -9
- data/lib/coderay/encoders/statistic.rb +58 -39
- data/lib/coderay/encoders/terminal.rb +179 -0
- data/lib/coderay/encoders/text.rb +31 -17
- data/lib/coderay/encoders/token_kind_filter.rb +111 -0
- data/lib/coderay/encoders/xml.rb +19 -18
- data/lib/coderay/encoders/yaml.rb +37 -9
- data/lib/coderay/for_redcloth.rb +4 -4
- data/lib/coderay/helpers/file_type.rb +127 -246
- data/lib/coderay/helpers/gzip.rb +41 -0
- data/lib/coderay/helpers/plugin.rb +241 -306
- data/lib/coderay/helpers/word_list.rb +65 -126
- data/lib/coderay/scanner.rb +173 -156
- data/lib/coderay/scanners/_map.rb +18 -17
- data/lib/coderay/scanners/c.rb +63 -77
- data/lib/coderay/scanners/clojure.rb +217 -0
- data/lib/coderay/scanners/cpp.rb +71 -84
- data/lib/coderay/scanners/css.rb +103 -120
- data/lib/coderay/scanners/debug.rb +47 -44
- data/lib/coderay/scanners/delphi.rb +70 -76
- data/lib/coderay/scanners/diff.rb +141 -50
- data/lib/coderay/scanners/erb.rb +81 -0
- data/lib/coderay/scanners/groovy.rb +104 -113
- data/lib/coderay/scanners/haml.rb +168 -0
- data/lib/coderay/scanners/html.rb +181 -110
- data/lib/coderay/scanners/java.rb +73 -75
- data/lib/coderay/scanners/java/builtin_types.rb +2 -0
- data/lib/coderay/scanners/java_script.rb +90 -101
- data/lib/coderay/scanners/json.rb +40 -53
- data/lib/coderay/scanners/php.rb +123 -147
- data/lib/coderay/scanners/python.rb +93 -91
- data/lib/coderay/scanners/raydebug.rb +66 -0
- data/lib/coderay/scanners/ruby.rb +343 -326
- data/lib/coderay/scanners/ruby/patterns.rb +40 -106
- data/lib/coderay/scanners/ruby/string_state.rb +71 -0
- data/lib/coderay/scanners/sql.rb +80 -66
- data/lib/coderay/scanners/text.rb +26 -0
- data/lib/coderay/scanners/xml.rb +1 -1
- data/lib/coderay/scanners/yaml.rb +74 -73
- data/lib/coderay/style.rb +10 -7
- data/lib/coderay/styles/_map.rb +3 -3
- data/lib/coderay/styles/alpha.rb +143 -0
- data/lib/coderay/token_kinds.rb +90 -0
- data/lib/coderay/tokens.rb +102 -277
- data/lib/coderay/tokens_proxy.rb +55 -0
- data/lib/coderay/version.rb +3 -0
- data/test/functional/basic.rb +200 -18
- data/test/functional/examples.rb +130 -0
- data/test/functional/for_redcloth.rb +15 -8
- data/test/functional/suite.rb +9 -6
- metadata +103 -123
- data/FOLDERS +0 -53
- data/bin/coderay_stylesheet +0 -4
- data/lib/coderay/encoders/html/numerization.rb +0 -133
- data/lib/coderay/encoders/term.rb +0 -158
- data/lib/coderay/encoders/token_class_filter.rb +0 -84
- data/lib/coderay/helpers/gzip_simple.rb +0 -123
- data/lib/coderay/scanners/nitro_xhtml.rb +0 -136
- data/lib/coderay/scanners/plaintext.rb +0 -20
- data/lib/coderay/scanners/rhtml.rb +0 -78
- data/lib/coderay/scanners/scheme.rb +0 -145
- data/lib/coderay/styles/cycnus.rb +0 -152
- data/lib/coderay/styles/murphy.rb +0 -134
- data/lib/coderay/token_classes.rb +0 -86
- data/test/functional/load_plugin_scanner.rb +0 -11
- data/test/functional/vhdl.rb +0 -126
- data/test/functional/word_list.rb +0 -79
@@ -0,0 +1,66 @@
|
|
1
|
+
module CodeRay
|
2
|
+
module Scanners
|
3
|
+
|
4
|
+
# = Debug Scanner
|
5
|
+
#
|
6
|
+
# Parses the output of the Encoders::Debug encoder.
|
7
|
+
class Raydebug < Scanner
|
8
|
+
|
9
|
+
register_for :raydebug
|
10
|
+
file_extension 'raydebug'
|
11
|
+
title 'CodeRay Token Dump'
|
12
|
+
|
13
|
+
protected
|
14
|
+
|
15
|
+
def scan_tokens encoder, options
|
16
|
+
|
17
|
+
opened_tokens = []
|
18
|
+
|
19
|
+
until eos?
|
20
|
+
|
21
|
+
if match = scan(/\s+/)
|
22
|
+
encoder.text_token match, :space
|
23
|
+
|
24
|
+
elsif match = scan(/ (\w+) \( ( [^\)\\]* ( \\. [^\)\\]* )* ) /x)
|
25
|
+
kind = self[1]
|
26
|
+
encoder.text_token kind, :class
|
27
|
+
encoder.text_token '(', :operator
|
28
|
+
match = self[2]
|
29
|
+
encoder.text_token match, kind.to_sym
|
30
|
+
encoder.text_token match, :operator if match = scan(/\)/)
|
31
|
+
|
32
|
+
elsif match = scan(/ (\w+) ([<\[]) /x)
|
33
|
+
kind = self[1]
|
34
|
+
case self[2]
|
35
|
+
when '<'
|
36
|
+
encoder.text_token kind, :class
|
37
|
+
when '['
|
38
|
+
encoder.text_token kind, :class
|
39
|
+
else
|
40
|
+
raise 'CodeRay bug: This case should not be reached.'
|
41
|
+
end
|
42
|
+
kind = kind.to_sym
|
43
|
+
opened_tokens << kind
|
44
|
+
encoder.begin_group kind
|
45
|
+
encoder.text_token self[2], :operator
|
46
|
+
|
47
|
+
elsif !opened_tokens.empty? && match = scan(/ [>\]] /x)
|
48
|
+
encoder.text_token match, :operator
|
49
|
+
encoder.end_group opened_tokens.pop
|
50
|
+
|
51
|
+
else
|
52
|
+
encoder.text_token getch, :space
|
53
|
+
|
54
|
+
end
|
55
|
+
|
56
|
+
end
|
57
|
+
|
58
|
+
encoder.end_group opened_tokens.pop until opened_tokens.empty?
|
59
|
+
|
60
|
+
encoder
|
61
|
+
end
|
62
|
+
|
63
|
+
end
|
64
|
+
|
65
|
+
end
|
66
|
+
end
|
@@ -1,7 +1,6 @@
|
|
1
|
-
# encoding: utf-8
|
2
1
|
module CodeRay
|
3
2
|
module Scanners
|
4
|
-
|
3
|
+
|
5
4
|
# This scanner is really complex, since Ruby _is_ a complex language!
|
6
5
|
#
|
7
6
|
# It tries to highlight 100% of all common code,
|
@@ -9,310 +8,240 @@ module Scanners
|
|
9
8
|
#
|
10
9
|
# It is optimized for HTML highlighting, and is not very useful for
|
11
10
|
# parsing or pretty printing.
|
12
|
-
#
|
13
|
-
# For now, I think it's better than the scanners in VIM or Syntax, or
|
14
|
-
# any highlighter I was able to find, except Caleb's RubyLexer.
|
15
|
-
#
|
16
|
-
# I hope it's also better than the rdoc/irb lexer.
|
17
11
|
class Ruby < Scanner
|
18
|
-
|
19
|
-
include Streamable
|
20
|
-
|
12
|
+
|
21
13
|
register_for :ruby
|
22
14
|
file_extension 'rb'
|
23
|
-
|
24
|
-
helper :patterns
|
25
15
|
|
26
|
-
|
27
|
-
|
16
|
+
autoload :Patterns, 'coderay/scanners/ruby/patterns'
|
17
|
+
autoload :StringState, 'coderay/scanners/ruby/string_state'
|
18
|
+
|
19
|
+
def interpreted_string_state
|
20
|
+
StringState.new :string, true, '"'
|
28
21
|
end
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
22
|
+
|
23
|
+
protected
|
24
|
+
|
25
|
+
def setup
|
26
|
+
@state = :initial
|
27
|
+
end
|
28
|
+
|
29
|
+
def scan_tokens encoder, options
|
30
|
+
state, heredocs = options[:state] || @state
|
31
|
+
heredocs = heredocs.dup if heredocs.is_a?(Array)
|
32
|
+
|
33
|
+
if state && state.instance_of?(StringState)
|
34
|
+
encoder.begin_group state.type
|
40
35
|
end
|
41
36
|
|
42
|
-
last_token_dot = false
|
43
|
-
value_expected = true
|
44
|
-
heredocs = nil
|
45
37
|
last_state = nil
|
46
|
-
state = :initial
|
47
|
-
depth = nil
|
48
|
-
inline_block_stack = []
|
49
38
|
|
39
|
+
method_call_expected = false
|
40
|
+
value_expected = true
|
41
|
+
|
42
|
+
inline_block_stack = nil
|
43
|
+
inline_block_curly_depth = 0
|
44
|
+
|
45
|
+
if heredocs
|
46
|
+
state = heredocs.shift
|
47
|
+
encoder.begin_group state.type
|
48
|
+
heredocs = nil if heredocs.empty?
|
49
|
+
end
|
50
|
+
|
51
|
+
# def_object_stack = nil
|
52
|
+
# def_object_paren_depth = 0
|
50
53
|
|
51
54
|
patterns = Patterns # avoid constant lookup
|
52
55
|
|
56
|
+
unicode = string.respond_to?(:encoding) && string.encoding.name == 'UTF-8'
|
57
|
+
|
53
58
|
until eos?
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
tokens << [match, :delimiter]
|
67
|
-
tokens << [:close, state.type]
|
68
|
-
state = state.next_state
|
69
|
-
next
|
70
|
-
end
|
71
|
-
|
72
|
-
case match = getch
|
73
|
-
|
74
|
-
when state.delim
|
75
|
-
if state.paren
|
76
|
-
state.paren_depth -= 1
|
77
|
-
if state.paren_depth > 0
|
78
|
-
tokens << [match, :nesting_delimiter]
|
79
|
-
next
|
80
|
-
end
|
81
|
-
end
|
82
|
-
tokens << [match, :delimiter]
|
83
|
-
if state.type == :regexp and not eos?
|
84
|
-
modifiers = scan(/#{patterns::REGEXP_MODIFIERS}/ox)
|
85
|
-
tokens << [modifiers, :modifier] unless modifiers.empty?
|
86
|
-
end
|
87
|
-
tokens << [:close, state.type]
|
88
|
-
value_expected = false
|
89
|
-
state = state.next_state
|
90
|
-
|
91
|
-
when '\\'
|
92
|
-
if state.interpreted
|
93
|
-
if esc = scan(/ #{patterns::ESCAPE} /ox)
|
94
|
-
tokens << [match + esc, :char]
|
95
|
-
else
|
96
|
-
tokens << [match, :error]
|
97
|
-
end
|
59
|
+
|
60
|
+
if state.instance_of? ::Symbol
|
61
|
+
|
62
|
+
if match = scan(/[ \t\f\v]+/)
|
63
|
+
encoder.text_token match, :space
|
64
|
+
|
65
|
+
elsif match = scan(/\n/)
|
66
|
+
if heredocs
|
67
|
+
unscan # heredoc scanning needs \n at start
|
68
|
+
state = heredocs.shift
|
69
|
+
encoder.begin_group state.type
|
70
|
+
heredocs = nil if heredocs.empty?
|
98
71
|
else
|
99
|
-
|
100
|
-
|
101
|
-
tokens << [match + m, :char]
|
102
|
-
when nil
|
103
|
-
tokens << [match, :error]
|
104
|
-
else
|
105
|
-
tokens << [match + m, :content]
|
106
|
-
end
|
107
|
-
end
|
108
|
-
|
109
|
-
when '#'
|
110
|
-
case peek(1)
|
111
|
-
when '{'
|
112
|
-
inline_block_stack << [state, depth, heredocs]
|
72
|
+
state = :initial if state == :undef_comma_expected
|
73
|
+
encoder.text_token match, :space
|
113
74
|
value_expected = true
|
114
|
-
state = :initial
|
115
|
-
depth = 1
|
116
|
-
tokens << [:open, :inline]
|
117
|
-
tokens << [match + getch, :inline_delimiter]
|
118
|
-
when '$', '@'
|
119
|
-
tokens << [match, :escape]
|
120
|
-
last_state = state # scan one token as normal code, then return here
|
121
|
-
state = :initial
|
122
|
-
else
|
123
|
-
raise_inspect 'else-case # reached; #%p not handled' % peek(1), tokens
|
124
75
|
end
|
125
|
-
|
126
|
-
when state.paren
|
127
|
-
state.paren_depth += 1
|
128
|
-
tokens << [match, :nesting_delimiter]
|
129
|
-
|
130
|
-
when /#{patterns::REGEXP_SYMBOLS}/ox
|
131
|
-
tokens << [match, :function]
|
132
|
-
|
133
|
-
else
|
134
|
-
raise_inspect 'else-case " reached; %p not handled, state = %p' % [match, state], tokens
|
135
|
-
|
136
|
-
end
|
137
|
-
next
|
138
|
-
# }}}
|
139
|
-
else
|
140
|
-
# {{{
|
141
|
-
if match = scan(/[ \t\f]+/)
|
142
|
-
kind = :space
|
143
|
-
match << scan(/\s*/) unless eos? || heredocs
|
144
|
-
value_expected = true if match.index(?\n)
|
145
|
-
tokens << [match, kind]
|
146
|
-
next
|
147
76
|
|
148
|
-
elsif match = scan(
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
state = :initial if state == :undef_comma_expected
|
153
|
-
end
|
77
|
+
elsif match = scan(bol? ? / \#(!)?.* | #{patterns::RUBYDOC_OR_DATA} /ox : /\#.*/)
|
78
|
+
encoder.text_token match, self[1] ? :doctype : :comment
|
79
|
+
|
80
|
+
elsif match = scan(/\\\n/)
|
154
81
|
if heredocs
|
155
82
|
unscan # heredoc scanning needs \n at start
|
83
|
+
encoder.text_token scan(/\\/), :space
|
156
84
|
state = heredocs.shift
|
157
|
-
|
85
|
+
encoder.begin_group state.type
|
158
86
|
heredocs = nil if heredocs.empty?
|
159
|
-
next
|
160
87
|
else
|
161
|
-
match
|
88
|
+
encoder.text_token match, :space
|
162
89
|
end
|
163
|
-
tokens << [match, kind]
|
164
|
-
next
|
165
|
-
|
166
|
-
elsif bol? && match = scan(/\#!.*/)
|
167
|
-
tokens << [match, :doctype]
|
168
|
-
next
|
169
90
|
|
170
|
-
elsif match = scan(/\#.*/) or
|
171
|
-
( bol? and match = scan(/#{patterns::RUBYDOC_OR_DATA}/o) )
|
172
|
-
kind = :comment
|
173
|
-
tokens << [match, kind]
|
174
|
-
next
|
175
|
-
|
176
91
|
elsif state == :initial
|
177
|
-
|
92
|
+
|
178
93
|
# IDENTS #
|
179
|
-
if
|
94
|
+
if !method_call_expected &&
|
95
|
+
match = scan(unicode ? /#{patterns::METHOD_NAME}/uo :
|
180
96
|
/#{patterns::METHOD_NAME}/o)
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
if
|
185
|
-
|
186
|
-
match = ':'
|
187
|
-
kind = :operator
|
188
|
-
else
|
189
|
-
kind = patterns::IDENT_KIND[match]
|
190
|
-
if kind == :ident
|
191
|
-
if match[/\A[A-Z]/] and not match[/[!?]$/] and not match?(/\(/)
|
192
|
-
kind = :constant
|
193
|
-
end
|
194
|
-
elsif kind == :reserved
|
195
|
-
state = patterns::DEF_NEW_STATE[match]
|
196
|
-
value_expected = :set if patterns::KEYWORDS_EXPECTING_VALUE[match]
|
197
|
-
end
|
97
|
+
value_expected = false
|
98
|
+
kind = patterns::IDENT_KIND[match]
|
99
|
+
if kind == :ident
|
100
|
+
if match[/\A[A-Z]/] && !(match[/[!?]$/] || match?(/\(/))
|
101
|
+
kind = :constant
|
198
102
|
end
|
103
|
+
elsif kind == :keyword
|
104
|
+
state = patterns::KEYWORD_NEW_STATE[match]
|
105
|
+
value_expected = true if patterns::KEYWORDS_EXPECTING_VALUE[match]
|
199
106
|
end
|
200
|
-
value_expected =
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
value_expected = :set
|
107
|
+
value_expected = true if !value_expected && check(/#{patterns::VALUE_FOLLOWS}/o)
|
108
|
+
encoder.text_token match, kind
|
109
|
+
|
110
|
+
elsif method_call_expected &&
|
111
|
+
match = scan(unicode ? /#{patterns::METHOD_AFTER_DOT}/uo :
|
112
|
+
/#{patterns::METHOD_AFTER_DOT}/o)
|
113
|
+
if method_call_expected == '::' && match[/\A[A-Z]/] && !match?(/\(/)
|
114
|
+
encoder.text_token match, :constant
|
115
|
+
else
|
116
|
+
encoder.text_token match, :ident
|
211
117
|
end
|
212
|
-
|
213
|
-
|
214
|
-
|
118
|
+
method_call_expected = false
|
119
|
+
value_expected = check(/#{patterns::VALUE_FOLLOWS}/o)
|
120
|
+
|
121
|
+
# OPERATORS #
|
122
|
+
elsif !method_call_expected && match = scan(/ (\.(?!\.)|::) | (?: \.\.\.? | ==?=? | [,\(\[\{] )() | [\)\]\}] /x)
|
123
|
+
method_call_expected = self[1]
|
124
|
+
value_expected = !method_call_expected && self[2]
|
125
|
+
if inline_block_stack
|
215
126
|
case match
|
216
127
|
when '{'
|
217
|
-
|
128
|
+
inline_block_curly_depth += 1
|
218
129
|
when '}'
|
219
|
-
|
220
|
-
if
|
221
|
-
state,
|
130
|
+
inline_block_curly_depth -= 1
|
131
|
+
if inline_block_curly_depth == 0 # closing brace of inline block reached
|
132
|
+
state, inline_block_curly_depth, heredocs = inline_block_stack.pop
|
133
|
+
inline_block_stack = nil if inline_block_stack.empty?
|
222
134
|
heredocs = nil if heredocs && heredocs.empty?
|
223
|
-
|
224
|
-
|
225
|
-
|
135
|
+
encoder.text_token match, :inline_delimiter
|
136
|
+
encoder.end_group :inline
|
137
|
+
next
|
226
138
|
end
|
227
139
|
end
|
228
140
|
end
|
229
|
-
|
230
|
-
|
231
|
-
tokens << [:open, :string]
|
232
|
-
kind = :delimiter
|
233
|
-
state = patterns::StringState.new :string, match == '"', match # important for streaming
|
234
|
-
|
235
|
-
elsif match = scan(unicode ? /#{patterns::INSTANCE_VARIABLE}/uo :
|
236
|
-
/#{patterns::INSTANCE_VARIABLE}/o)
|
237
|
-
kind = :instance_variable
|
238
|
-
|
239
|
-
elsif value_expected and match = scan(/\//)
|
240
|
-
tokens << [:open, :regexp]
|
241
|
-
kind = :delimiter
|
242
|
-
interpreted = true
|
243
|
-
state = patterns::StringState.new :regexp, interpreted, match
|
244
|
-
|
245
|
-
# elsif match = scan(/[-+]?#{patterns::NUMERIC}/o)
|
246
|
-
elsif match = value_expected ? scan(/[-+]?#{patterns::NUMERIC}/o) : scan(/#{patterns::NUMERIC}/o)
|
247
|
-
kind = self[1] ? :float : :integer
|
248
|
-
|
141
|
+
encoder.text_token match, :operator
|
142
|
+
|
249
143
|
elsif match = scan(unicode ? /#{patterns::SYMBOL}/uo :
|
250
144
|
/#{patterns::SYMBOL}/o)
|
251
145
|
case delim = match[1]
|
252
146
|
when ?', ?"
|
253
|
-
|
254
|
-
|
147
|
+
encoder.begin_group :symbol
|
148
|
+
encoder.text_token ':', :symbol
|
255
149
|
match = delim.chr
|
256
|
-
|
257
|
-
state =
|
150
|
+
encoder.text_token match, :delimiter
|
151
|
+
state = self.class::StringState.new :symbol, delim == ?", match
|
152
|
+
else
|
153
|
+
encoder.text_token match, :symbol
|
154
|
+
value_expected = false
|
155
|
+
end
|
156
|
+
|
157
|
+
elsif match = scan(/ ' (?:(?>[^'\\]*) ')? | " (?:(?>[^"\\\#]*) ")? /mx)
|
158
|
+
encoder.begin_group :string
|
159
|
+
if match.size == 1
|
160
|
+
encoder.text_token match, :delimiter
|
161
|
+
state = self.class::StringState.new :string, match == '"', match # important for streaming
|
162
|
+
else
|
163
|
+
encoder.text_token match[0,1], :delimiter
|
164
|
+
encoder.text_token match[1..-2], :content if match.size > 2
|
165
|
+
encoder.text_token match[-1,1], :delimiter
|
166
|
+
encoder.end_group :string
|
167
|
+
value_expected = false
|
168
|
+
end
|
169
|
+
|
170
|
+
elsif match = scan(unicode ? /#{patterns::INSTANCE_VARIABLE}/uo :
|
171
|
+
/#{patterns::INSTANCE_VARIABLE}/o)
|
172
|
+
value_expected = false
|
173
|
+
encoder.text_token match, :instance_variable
|
174
|
+
|
175
|
+
elsif value_expected && match = scan(/\//)
|
176
|
+
encoder.begin_group :regexp
|
177
|
+
encoder.text_token match, :delimiter
|
178
|
+
state = self.class::StringState.new :regexp, true, '/'
|
179
|
+
|
180
|
+
elsif match = scan(value_expected ? /[-+]?#{patterns::NUMERIC}/o : /#{patterns::NUMERIC}/o)
|
181
|
+
if method_call_expected
|
182
|
+
encoder.text_token match, :error
|
183
|
+
method_call_expected = false
|
258
184
|
else
|
259
|
-
|
185
|
+
encoder.text_token match, self[1] ? :float : :integer # TODO: send :hex/:octal/:binary
|
260
186
|
end
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
indented = self[1] == '-'
|
187
|
+
value_expected = false
|
188
|
+
|
189
|
+
elsif match = scan(/ [-+!~^\/]=? | [:;] | [*|&]{1,2}=? | >>? /x)
|
190
|
+
value_expected = true
|
191
|
+
encoder.text_token match, :operator
|
192
|
+
|
193
|
+
elsif value_expected && match = scan(/#{patterns::HEREDOC_OPEN}/o)
|
269
194
|
quote = self[3]
|
270
195
|
delim = self[quote ? 4 : 2]
|
271
196
|
kind = patterns::QUOTE_TO_TYPE[quote]
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
heredoc = patterns::StringState.new kind, quote != '\'', delim, (indented ? :indented : :linestart )
|
197
|
+
encoder.begin_group kind
|
198
|
+
encoder.text_token match, :delimiter
|
199
|
+
encoder.end_group kind
|
276
200
|
heredocs ||= [] # create heredocs if empty
|
277
|
-
heredocs <<
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
state =
|
285
|
-
|
286
|
-
|
287
|
-
elsif value_expected
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
elsif match = scan(/
|
292
|
-
value_expected =
|
293
|
-
|
294
|
-
|
201
|
+
heredocs << self.class::StringState.new(kind, quote != "'", delim,
|
202
|
+
self[1] == '-' ? :indented : :linestart)
|
203
|
+
value_expected = false
|
204
|
+
|
205
|
+
elsif value_expected && match = scan(/#{patterns::FANCY_STRING_START}/o)
|
206
|
+
kind = patterns::FANCY_STRING_KIND[self[1]]
|
207
|
+
encoder.begin_group kind
|
208
|
+
state = self.class::StringState.new kind, patterns::FANCY_STRING_INTERPRETED[self[1]], self[2]
|
209
|
+
encoder.text_token match, :delimiter
|
210
|
+
|
211
|
+
elsif value_expected && match = scan(/#{patterns::CHARACTER}/o)
|
212
|
+
value_expected = false
|
213
|
+
encoder.text_token match, :integer
|
214
|
+
|
215
|
+
elsif match = scan(/ %=? | <(?:<|=>?)? | \? /x)
|
216
|
+
value_expected = true
|
217
|
+
encoder.text_token match, :operator
|
218
|
+
|
295
219
|
elsif match = scan(/`/)
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
kind = :delimiter
|
301
|
-
state = patterns::StringState.new :shell, true, match
|
302
|
-
end
|
303
|
-
|
220
|
+
encoder.begin_group :shell
|
221
|
+
encoder.text_token match, :delimiter
|
222
|
+
state = self.class::StringState.new :shell, true, match
|
223
|
+
|
304
224
|
elsif match = scan(unicode ? /#{patterns::GLOBAL_VARIABLE}/uo :
|
305
225
|
/#{patterns::GLOBAL_VARIABLE}/o)
|
306
|
-
|
307
|
-
|
226
|
+
encoder.text_token match, :global_variable
|
227
|
+
value_expected = false
|
228
|
+
|
308
229
|
elsif match = scan(unicode ? /#{patterns::CLASS_VARIABLE}/uo :
|
309
230
|
/#{patterns::CLASS_VARIABLE}/o)
|
310
|
-
|
311
|
-
|
231
|
+
encoder.text_token match, :class_variable
|
232
|
+
value_expected = false
|
233
|
+
|
234
|
+
elsif match = scan(/\\\z/)
|
235
|
+
encoder.text_token match, :space
|
236
|
+
|
312
237
|
else
|
313
|
-
if
|
238
|
+
if method_call_expected
|
239
|
+
method_call_expected = false
|
240
|
+
next
|
241
|
+
end
|
242
|
+
unless unicode
|
314
243
|
# check for unicode
|
315
|
-
|
244
|
+
$DEBUG_BEFORE, $DEBUG = $DEBUG, false
|
316
245
|
begin
|
317
246
|
if check(/./mu).size > 1
|
318
247
|
# seems like we should try again with unicode
|
@@ -321,124 +250,212 @@ module Scanners
|
|
321
250
|
rescue
|
322
251
|
# bad unicode char; use getch
|
323
252
|
ensure
|
324
|
-
$DEBUG =
|
253
|
+
$DEBUG = $DEBUG_BEFORE
|
325
254
|
end
|
326
255
|
next if unicode
|
327
256
|
end
|
328
|
-
|
329
|
-
|
330
|
-
|
257
|
+
|
258
|
+
encoder.text_token getch, :error
|
259
|
+
|
331
260
|
end
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
tokens << ['self', :pre_constant]
|
337
|
-
tokens << ['.', :operator]
|
261
|
+
|
262
|
+
if last_state
|
263
|
+
state = last_state
|
264
|
+
last_state = nil
|
338
265
|
end
|
266
|
+
|
267
|
+
elsif state == :def_expected
|
339
268
|
if match = scan(unicode ? /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/uo :
|
340
269
|
/(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o)
|
341
|
-
|
270
|
+
encoder.text_token match, :method
|
271
|
+
state = :initial
|
272
|
+
else
|
273
|
+
last_state = :dot_expected
|
274
|
+
state = :initial
|
275
|
+
end
|
276
|
+
|
277
|
+
elsif state == :dot_expected
|
278
|
+
if match = scan(/\.|::/)
|
279
|
+
# invalid definition
|
280
|
+
state = :def_expected
|
281
|
+
encoder.text_token match, :operator
|
342
282
|
else
|
343
|
-
|
283
|
+
state = :initial
|
344
284
|
end
|
345
|
-
|
285
|
+
|
346
286
|
elsif state == :module_expected
|
347
287
|
if match = scan(/<</)
|
348
|
-
|
288
|
+
encoder.text_token match, :operator
|
349
289
|
else
|
350
290
|
state = :initial
|
351
|
-
if match = scan(unicode ? /(?:#{patterns::IDENT}::)
|
352
|
-
/(?:#{patterns::IDENT}::)
|
353
|
-
|
354
|
-
else
|
355
|
-
next
|
291
|
+
if match = scan(unicode ? / (?:#{patterns::IDENT}::)* #{patterns::IDENT} /oux :
|
292
|
+
/ (?:#{patterns::IDENT}::)* #{patterns::IDENT} /ox)
|
293
|
+
encoder.text_token match, :class
|
356
294
|
end
|
357
295
|
end
|
358
|
-
|
296
|
+
|
359
297
|
elsif state == :undef_expected
|
360
298
|
state = :undef_comma_expected
|
361
|
-
if match = scan(unicode ?
|
362
|
-
|
363
|
-
|
364
|
-
elsif match = scan(
|
365
|
-
/#{patterns::SYMBOL}/o)
|
299
|
+
if match = scan(unicode ? /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/uo :
|
300
|
+
/(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o)
|
301
|
+
encoder.text_token match, :method
|
302
|
+
elsif match = scan(/#{patterns::SYMBOL}/o)
|
366
303
|
case delim = match[1]
|
367
304
|
when ?', ?"
|
368
|
-
|
369
|
-
|
305
|
+
encoder.begin_group :symbol
|
306
|
+
encoder.text_token ':', :symbol
|
370
307
|
match = delim.chr
|
371
|
-
|
372
|
-
state =
|
308
|
+
encoder.text_token match, :delimiter
|
309
|
+
state = self.class::StringState.new :symbol, delim == ?", match
|
373
310
|
state.next_state = :undef_comma_expected
|
374
311
|
else
|
375
|
-
|
312
|
+
encoder.text_token match, :symbol
|
376
313
|
end
|
377
314
|
else
|
378
315
|
state = :initial
|
379
|
-
next
|
380
316
|
end
|
381
|
-
|
382
|
-
elsif state == :alias_expected
|
383
|
-
match = scan(unicode ? /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/uo :
|
384
|
-
/(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/o)
|
385
317
|
|
386
|
-
if match
|
387
|
-
tokens << [self[1], (self[1][0] == ?: ? :symbol : :method)]
|
388
|
-
tokens << [self[2], :space]
|
389
|
-
tokens << [self[3], (self[3][0] == ?: ? :symbol : :method)]
|
390
|
-
end
|
391
|
-
state = :initial
|
392
|
-
next
|
393
|
-
|
394
318
|
elsif state == :undef_comma_expected
|
395
319
|
if match = scan(/,/)
|
396
|
-
|
320
|
+
encoder.text_token match, :operator
|
397
321
|
state = :undef_expected
|
398
322
|
else
|
399
323
|
state = :initial
|
400
|
-
next
|
401
324
|
end
|
402
|
-
|
325
|
+
|
326
|
+
elsif state == :alias_expected
|
327
|
+
match = scan(unicode ? /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/uo :
|
328
|
+
/(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/o)
|
329
|
+
|
330
|
+
if match
|
331
|
+
encoder.text_token self[1], (self[1][0] == ?: ? :symbol : :method)
|
332
|
+
encoder.text_token self[2], :space
|
333
|
+
encoder.text_token self[3], (self[3][0] == ?: ? :symbol : :method)
|
334
|
+
end
|
335
|
+
state = :initial
|
336
|
+
|
337
|
+
else
|
338
|
+
#:nocov:
|
339
|
+
raise_inspect 'Unknown state: %p' % [state], encoder
|
340
|
+
#:nocov:
|
403
341
|
end
|
404
|
-
# }}}
|
405
342
|
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
343
|
+
else # StringState
|
344
|
+
|
345
|
+
match = scan_until(state.pattern) || scan_rest
|
346
|
+
unless match.empty?
|
347
|
+
encoder.text_token match, :content
|
348
|
+
break if eos?
|
411
349
|
end
|
412
350
|
|
413
|
-
if
|
414
|
-
|
415
|
-
|
351
|
+
if state.heredoc && self[1] # end of heredoc
|
352
|
+
match = getch
|
353
|
+
match << scan_until(/$/) unless eos?
|
354
|
+
encoder.text_token match, :delimiter unless match.empty?
|
355
|
+
encoder.end_group state.type
|
356
|
+
state = state.next_state
|
357
|
+
next
|
416
358
|
end
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
359
|
+
|
360
|
+
case match = getch
|
361
|
+
|
362
|
+
when state.delim
|
363
|
+
if state.paren_depth
|
364
|
+
state.paren_depth -= 1
|
365
|
+
if state.paren_depth > 0
|
366
|
+
encoder.text_token match, :content
|
367
|
+
next
|
368
|
+
end
|
369
|
+
end
|
370
|
+
encoder.text_token match, :delimiter
|
371
|
+
if state.type == :regexp && !eos?
|
372
|
+
match = scan(/#{patterns::REGEXP_MODIFIERS}/o)
|
373
|
+
encoder.text_token match, :modifier unless match.empty?
|
374
|
+
end
|
375
|
+
encoder.end_group state.type
|
376
|
+
value_expected = false
|
377
|
+
state = state.next_state
|
378
|
+
|
379
|
+
when '\\'
|
380
|
+
if state.interpreted
|
381
|
+
if esc = scan(/#{patterns::ESCAPE}/o)
|
382
|
+
encoder.text_token match + esc, :char
|
383
|
+
else
|
384
|
+
encoder.text_token match, :error
|
385
|
+
end
|
386
|
+
else
|
387
|
+
case esc = getch
|
388
|
+
when nil
|
389
|
+
encoder.text_token match, :content
|
390
|
+
when state.delim, '\\'
|
391
|
+
encoder.text_token match + esc, :char
|
392
|
+
else
|
393
|
+
encoder.text_token match + esc, :content
|
394
|
+
end
|
395
|
+
end
|
396
|
+
|
397
|
+
when '#'
|
398
|
+
case peek(1)
|
399
|
+
when '{'
|
400
|
+
inline_block_stack ||= []
|
401
|
+
inline_block_stack << [state, inline_block_curly_depth, heredocs]
|
402
|
+
value_expected = true
|
403
|
+
state = :initial
|
404
|
+
inline_block_curly_depth = 1
|
405
|
+
encoder.begin_group :inline
|
406
|
+
encoder.text_token match + getch, :inline_delimiter
|
407
|
+
when '$', '@'
|
408
|
+
encoder.text_token match, :escape
|
409
|
+
last_state = state
|
410
|
+
state = :initial
|
411
|
+
else
|
412
|
+
#:nocov:
|
413
|
+
raise_inspect 'else-case # reached; #%p not handled' % [peek(1)], encoder
|
414
|
+
#:nocov:
|
415
|
+
end
|
416
|
+
|
417
|
+
when state.opening_paren
|
418
|
+
state.paren_depth += 1
|
419
|
+
encoder.text_token match, :content
|
420
|
+
|
421
|
+
else
|
422
|
+
#:nocov
|
423
|
+
raise_inspect 'else-case " reached; %p not handled, state = %p' % [match, state], encoder
|
424
|
+
#:nocov:
|
425
|
+
|
424
426
|
end
|
427
|
+
|
425
428
|
end
|
429
|
+
|
430
|
+
end
|
431
|
+
|
432
|
+
# cleaning up
|
433
|
+
if state.is_a? StringState
|
434
|
+
encoder.end_group state.type
|
426
435
|
end
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
436
|
+
|
437
|
+
if options[:keep_state]
|
438
|
+
if state.is_a?(StringState) && state.heredoc
|
439
|
+
(heredocs ||= []).unshift state
|
440
|
+
state = :initial
|
441
|
+
elsif heredocs && heredocs.empty?
|
442
|
+
heredocs = nil
|
443
|
+
end
|
444
|
+
@state = state, heredocs
|
434
445
|
end
|
435
|
-
|
436
|
-
|
446
|
+
|
447
|
+
if inline_block_stack
|
448
|
+
until inline_block_stack.empty?
|
449
|
+
state, = *inline_block_stack.pop
|
450
|
+
encoder.end_group :inline
|
451
|
+
encoder.end_group state.type
|
452
|
+
end
|
453
|
+
end
|
454
|
+
|
455
|
+
encoder
|
437
456
|
end
|
438
|
-
|
457
|
+
|
439
458
|
end
|
440
|
-
|
459
|
+
|
441
460
|
end
|
442
461
|
end
|
443
|
-
|
444
|
-
# vim:fdm=marker
|