coderay-beta 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/FOLDERS +53 -0
- data/LICENSE +504 -0
- data/bin/coderay +82 -0
- data/bin/coderay_stylesheet +4 -0
- data/lib/README +129 -0
- data/lib/coderay.rb +320 -0
- data/lib/coderay/duo.rb +85 -0
- data/lib/coderay/encoder.rb +213 -0
- data/lib/coderay/encoders/_map.rb +11 -0
- data/lib/coderay/encoders/comment_filter.rb +43 -0
- data/lib/coderay/encoders/count.rb +21 -0
- data/lib/coderay/encoders/debug.rb +49 -0
- data/lib/coderay/encoders/div.rb +19 -0
- data/lib/coderay/encoders/filter.rb +75 -0
- data/lib/coderay/encoders/html.rb +305 -0
- data/lib/coderay/encoders/html/css.rb +70 -0
- data/lib/coderay/encoders/html/numerization.rb +133 -0
- data/lib/coderay/encoders/html/output.rb +206 -0
- data/lib/coderay/encoders/json.rb +69 -0
- data/lib/coderay/encoders/lines_of_code.rb +90 -0
- data/lib/coderay/encoders/null.rb +26 -0
- data/lib/coderay/encoders/page.rb +20 -0
- data/lib/coderay/encoders/span.rb +19 -0
- data/lib/coderay/encoders/statistic.rb +77 -0
- data/lib/coderay/encoders/term.rb +137 -0
- data/lib/coderay/encoders/text.rb +32 -0
- data/lib/coderay/encoders/token_class_filter.rb +84 -0
- data/lib/coderay/encoders/xml.rb +71 -0
- data/lib/coderay/encoders/yaml.rb +22 -0
- data/lib/coderay/for_redcloth.rb +85 -0
- data/lib/coderay/helpers/file_type.rb +240 -0
- data/lib/coderay/helpers/gzip_simple.rb +123 -0
- data/lib/coderay/helpers/plugin.rb +349 -0
- data/lib/coderay/helpers/word_list.rb +138 -0
- data/lib/coderay/scanner.rb +284 -0
- data/lib/coderay/scanners/_map.rb +23 -0
- data/lib/coderay/scanners/c.rb +203 -0
- data/lib/coderay/scanners/cpp.rb +228 -0
- data/lib/coderay/scanners/css.rb +210 -0
- data/lib/coderay/scanners/debug.rb +62 -0
- data/lib/coderay/scanners/delphi.rb +150 -0
- data/lib/coderay/scanners/diff.rb +105 -0
- data/lib/coderay/scanners/groovy.rb +263 -0
- data/lib/coderay/scanners/html.rb +182 -0
- data/lib/coderay/scanners/java.rb +176 -0
- data/lib/coderay/scanners/java/builtin_types.rb +419 -0
- data/lib/coderay/scanners/java_script.rb +224 -0
- data/lib/coderay/scanners/json.rb +112 -0
- data/lib/coderay/scanners/nitro_xhtml.rb +136 -0
- data/lib/coderay/scanners/php.rb +526 -0
- data/lib/coderay/scanners/plaintext.rb +21 -0
- data/lib/coderay/scanners/python.rb +285 -0
- data/lib/coderay/scanners/rhtml.rb +74 -0
- data/lib/coderay/scanners/ruby.rb +404 -0
- data/lib/coderay/scanners/ruby/patterns.rb +238 -0
- data/lib/coderay/scanners/scheme.rb +145 -0
- data/lib/coderay/scanners/sql.rb +162 -0
- data/lib/coderay/scanners/xml.rb +17 -0
- data/lib/coderay/scanners/yaml.rb +144 -0
- data/lib/coderay/style.rb +20 -0
- data/lib/coderay/styles/_map.rb +7 -0
- data/lib/coderay/styles/cycnus.rb +151 -0
- data/lib/coderay/styles/murphy.rb +132 -0
- data/lib/coderay/token_classes.rb +86 -0
- data/lib/coderay/tokens.rb +391 -0
- data/lib/term/ansicolor.rb +220 -0
- metadata +123 -0
@@ -0,0 +1,21 @@
|
|
1
|
+
module CodeRay
|
2
|
+
module Scanners
|
3
|
+
|
4
|
+
class Plaintext < Scanner
|
5
|
+
|
6
|
+
register_for :plaintext, :plain
|
7
|
+
title 'Plain text'
|
8
|
+
|
9
|
+
include Streamable
|
10
|
+
|
11
|
+
KINDS_NOT_LOC = [:plain]
|
12
|
+
|
13
|
+
def scan_tokens tokens, options
|
14
|
+
text = (scan_until(/\z/) || '')
|
15
|
+
tokens << [text, :plain]
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
19
|
+
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,285 @@
|
|
1
|
+
module CodeRay
|
2
|
+
module Scanners
|
3
|
+
|
4
|
+
# Bases on pygments' PythonLexer, see
|
5
|
+
# http://dev.pocoo.org/projects/pygments/browser/pygments/lexers/agile.py.
|
6
|
+
class Python < Scanner
|
7
|
+
|
8
|
+
include Streamable
|
9
|
+
|
10
|
+
register_for :python
|
11
|
+
file_extension 'py'
|
12
|
+
|
13
|
+
KEYWORDS = [
|
14
|
+
'and', 'as', 'assert', 'break', 'class', 'continue', 'def',
|
15
|
+
'del', 'elif', 'else', 'except', 'finally', 'for',
|
16
|
+
'from', 'global', 'if', 'import', 'in', 'is', 'lambda', 'not',
|
17
|
+
'or', 'pass', 'raise', 'return', 'try', 'while', 'with', 'yield',
|
18
|
+
'nonlocal', # new in Python 3
|
19
|
+
]
|
20
|
+
|
21
|
+
OLD_KEYWORDS = [
|
22
|
+
'exec', 'print', # gone in Python 3
|
23
|
+
]
|
24
|
+
|
25
|
+
PREDEFINED_METHODS_AND_TYPES = %w[
|
26
|
+
__import__ abs all any apply basestring bin bool buffer
|
27
|
+
bytearray bytes callable chr classmethod cmp coerce compile
|
28
|
+
complex delattr dict dir divmod enumerate eval execfile exit
|
29
|
+
file filter float frozenset getattr globals hasattr hash hex id
|
30
|
+
input int intern isinstance issubclass iter len list locals
|
31
|
+
long map max min next object oct open ord pow property range
|
32
|
+
raw_input reduce reload repr reversed round set setattr slice
|
33
|
+
sorted staticmethod str sum super tuple type unichr unicode
|
34
|
+
vars xrange zip
|
35
|
+
]
|
36
|
+
|
37
|
+
PREDEFINED_EXCEPTIONS = %w[
|
38
|
+
ArithmeticError AssertionError AttributeError
|
39
|
+
BaseException DeprecationWarning EOFError EnvironmentError
|
40
|
+
Exception FloatingPointError FutureWarning GeneratorExit IOError
|
41
|
+
ImportError ImportWarning IndentationError IndexError KeyError
|
42
|
+
KeyboardInterrupt LookupError MemoryError NameError
|
43
|
+
NotImplemented NotImplementedError OSError OverflowError
|
44
|
+
OverflowWarning PendingDeprecationWarning ReferenceError
|
45
|
+
RuntimeError RuntimeWarning StandardError StopIteration
|
46
|
+
SyntaxError SyntaxWarning SystemError SystemExit TabError
|
47
|
+
TypeError UnboundLocalError UnicodeDecodeError
|
48
|
+
UnicodeEncodeError UnicodeError UnicodeTranslateError
|
49
|
+
UnicodeWarning UserWarning ValueError Warning ZeroDivisionError
|
50
|
+
]
|
51
|
+
|
52
|
+
PREDEFINED_VARIABLES_AND_CONSTANTS = [
|
53
|
+
'False', 'True', 'None', # "keywords" since Python 3
|
54
|
+
'self', 'Ellipsis', 'NotImplemented',
|
55
|
+
]
|
56
|
+
|
57
|
+
IDENT_KIND = WordList.new(:ident).
|
58
|
+
add(KEYWORDS, :keyword).
|
59
|
+
add(OLD_KEYWORDS, :old_keyword).
|
60
|
+
add(PREDEFINED_METHODS_AND_TYPES, :predefined).
|
61
|
+
add(PREDEFINED_VARIABLES_AND_CONSTANTS, :pre_constant).
|
62
|
+
add(PREDEFINED_EXCEPTIONS, :exception)
|
63
|
+
|
64
|
+
NAME = / [^\W\d] \w* /x
|
65
|
+
ESCAPE = / [abfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
|
66
|
+
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} | N\{[-\w ]+\} /x
|
67
|
+
|
68
|
+
OPERATOR = /
|
69
|
+
\.\.\. | # ellipsis
|
70
|
+
\.(?!\d) | # dot but not decimal point
|
71
|
+
[,;:()\[\]{}] | # simple delimiters
|
72
|
+
\/\/=? | \*\*=? | # special math
|
73
|
+
[-+*\/%&|^]=? | # ordinary math and binary logic
|
74
|
+
[~`] | # binary complement and inspection
|
75
|
+
<<=? | >>=? | [<>=]=? | != # comparison and assignment
|
76
|
+
/x
|
77
|
+
|
78
|
+
STRING_DELIMITER_REGEXP = Hash.new do |h, delimiter|
|
79
|
+
h[delimiter] = Regexp.union delimiter
|
80
|
+
end
|
81
|
+
|
82
|
+
STRING_CONTENT_REGEXP = Hash.new do |h, delimiter|
|
83
|
+
h[delimiter] = / [^\\\n]+? (?= \\ | $ | #{Regexp.escape(delimiter)} ) /x
|
84
|
+
end
|
85
|
+
|
86
|
+
DEF_NEW_STATE = WordList.new(:initial).
|
87
|
+
add(%w(def), :def_expected).
|
88
|
+
add(%w(import from), :include_expected).
|
89
|
+
add(%w(class), :class_expected)
|
90
|
+
|
91
|
+
DESCRIPTOR = /
|
92
|
+
#{NAME}
|
93
|
+
(?: \. #{NAME} )*
|
94
|
+
| \*
|
95
|
+
/x
|
96
|
+
|
97
|
+
def scan_tokens tokens, options
|
98
|
+
|
99
|
+
state = :initial
|
100
|
+
string_delimiter = nil
|
101
|
+
string_raw = false
|
102
|
+
import_clause = class_name_follows = last_token_dot = false
|
103
|
+
unicode = string.respond_to?(:encoding) && string.encoding.name == 'UTF-8'
|
104
|
+
from_import_state = []
|
105
|
+
|
106
|
+
until eos?
|
107
|
+
|
108
|
+
kind = nil
|
109
|
+
match = nil
|
110
|
+
|
111
|
+
if state == :string
|
112
|
+
if scan(STRING_DELIMITER_REGEXP[string_delimiter])
|
113
|
+
tokens << [matched, :delimiter]
|
114
|
+
tokens << [:close, :string]
|
115
|
+
state = :initial
|
116
|
+
next
|
117
|
+
elsif string_delimiter.size == 3 && scan(/\n/)
|
118
|
+
kind = :content
|
119
|
+
elsif scan(STRING_CONTENT_REGEXP[string_delimiter])
|
120
|
+
kind = :content
|
121
|
+
elsif !string_raw && scan(/ \\ #{ESCAPE} /ox)
|
122
|
+
kind = :char
|
123
|
+
elsif scan(/ \\ #{UNICODE_ESCAPE} /ox)
|
124
|
+
kind = :char
|
125
|
+
elsif scan(/ \\ . /x)
|
126
|
+
kind = :content
|
127
|
+
elsif scan(/ \\ | $ /x)
|
128
|
+
tokens << [:close, :string]
|
129
|
+
kind = :error
|
130
|
+
state = :initial
|
131
|
+
else
|
132
|
+
raise_inspect "else case \" reached; %p not handled." % peek(1), tokens, state
|
133
|
+
end
|
134
|
+
|
135
|
+
elsif match = scan(/ [ \t]+ | \\\n /x)
|
136
|
+
tokens << [match, :space]
|
137
|
+
next
|
138
|
+
|
139
|
+
elsif match = scan(/\n/)
|
140
|
+
tokens << [match, :space]
|
141
|
+
state = :initial if state == :include_expected
|
142
|
+
next
|
143
|
+
|
144
|
+
elsif match = scan(/ \# [^\n]* /mx)
|
145
|
+
tokens << [match, :comment]
|
146
|
+
next
|
147
|
+
|
148
|
+
elsif state == :initial
|
149
|
+
|
150
|
+
if scan(/#{OPERATOR}/o)
|
151
|
+
kind = :operator
|
152
|
+
|
153
|
+
elsif match = scan(/(u?r?|b)?("""|"|'''|')/i)
|
154
|
+
tokens << [:open, :string]
|
155
|
+
string_delimiter = self[2]
|
156
|
+
string_raw = false
|
157
|
+
modifiers = self[1]
|
158
|
+
unless modifiers.empty?
|
159
|
+
string_raw = !!modifiers.index(?r)
|
160
|
+
tokens << [modifiers, :modifier]
|
161
|
+
match = string_delimiter
|
162
|
+
end
|
163
|
+
state = :string
|
164
|
+
kind = :delimiter
|
165
|
+
|
166
|
+
# TODO: backticks
|
167
|
+
|
168
|
+
elsif match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o)
|
169
|
+
kind = IDENT_KIND[match]
|
170
|
+
# TODO: keyword arguments
|
171
|
+
kind = :ident if last_token_dot
|
172
|
+
if kind == :old_keyword
|
173
|
+
kind = check(/\(/) ? :ident : :keyword
|
174
|
+
elsif kind == :predefined && check(/ *=/)
|
175
|
+
kind = :ident
|
176
|
+
elsif kind == :keyword
|
177
|
+
state = DEF_NEW_STATE[match]
|
178
|
+
from_import_state << match.to_sym if state == :include_expected
|
179
|
+
end
|
180
|
+
|
181
|
+
elsif scan(/@[a-zA-Z0-9_.]+[lL]?/)
|
182
|
+
kind = :decorator
|
183
|
+
|
184
|
+
elsif scan(/0[xX][0-9A-Fa-f]+[lL]?/)
|
185
|
+
kind = :hex
|
186
|
+
|
187
|
+
elsif scan(/0[bB][01]+[lL]?/)
|
188
|
+
kind = :bin
|
189
|
+
|
190
|
+
elsif match = scan(/(?:\d*\.\d+|\d+\.\d*)(?:[eE][+-]?\d+)?|\d+[eE][+-]?\d+/)
|
191
|
+
kind = :float
|
192
|
+
if scan(/[jJ]/)
|
193
|
+
match << matched
|
194
|
+
kind = :imaginary
|
195
|
+
end
|
196
|
+
|
197
|
+
elsif scan(/0[oO][0-7]+|0[0-7]+(?![89.eE])[lL]?/)
|
198
|
+
kind = :oct
|
199
|
+
|
200
|
+
elsif match = scan(/\d+([lL])?/)
|
201
|
+
kind = :integer
|
202
|
+
if self[1] == nil && scan(/[jJ]/)
|
203
|
+
match << matched
|
204
|
+
kind = :imaginary
|
205
|
+
end
|
206
|
+
|
207
|
+
else
|
208
|
+
getch
|
209
|
+
kind = :error
|
210
|
+
|
211
|
+
end
|
212
|
+
|
213
|
+
elsif state == :def_expected
|
214
|
+
state = :initial
|
215
|
+
if match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o)
|
216
|
+
kind = :method
|
217
|
+
else
|
218
|
+
next
|
219
|
+
end
|
220
|
+
|
221
|
+
elsif state == :class_expected
|
222
|
+
state = :initial
|
223
|
+
if match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o)
|
224
|
+
kind = :class
|
225
|
+
else
|
226
|
+
next
|
227
|
+
end
|
228
|
+
|
229
|
+
elsif state == :include_expected
|
230
|
+
if match = scan(unicode ? /#{DESCRIPTOR}/uo : /#{DESCRIPTOR}/o)
|
231
|
+
kind = :include
|
232
|
+
if match == 'as'
|
233
|
+
kind = :keyword
|
234
|
+
from_import_state << :as
|
235
|
+
elsif from_import_state.first == :from && match == 'import'
|
236
|
+
kind = :keyword
|
237
|
+
from_import_state << :import
|
238
|
+
elsif from_import_state.last == :as
|
239
|
+
# kind = match[0,1][unicode ? /[[:upper:]]/u : /[[:upper:]]/] ? :class : :method
|
240
|
+
kind = :ident
|
241
|
+
from_import_state.pop
|
242
|
+
elsif IDENT_KIND[match] == :keyword
|
243
|
+
unscan
|
244
|
+
match = nil
|
245
|
+
state = :initial
|
246
|
+
next
|
247
|
+
end
|
248
|
+
elsif match = scan(/,/)
|
249
|
+
from_import_state.pop if from_import_state.last == :as
|
250
|
+
kind = :operator
|
251
|
+
else
|
252
|
+
from_import_state = []
|
253
|
+
state = :initial
|
254
|
+
next
|
255
|
+
end
|
256
|
+
|
257
|
+
else
|
258
|
+
raise_inspect 'Unknown state', tokens, state
|
259
|
+
|
260
|
+
end
|
261
|
+
|
262
|
+
match ||= matched
|
263
|
+
if $DEBUG and not kind
|
264
|
+
raise_inspect 'Error token %p in line %d' %
|
265
|
+
[[match, kind], line], tokens, state
|
266
|
+
end
|
267
|
+
raise_inspect 'Empty token', tokens, state unless match
|
268
|
+
|
269
|
+
last_token_dot = match == '.'
|
270
|
+
|
271
|
+
tokens << [match, kind]
|
272
|
+
|
273
|
+
end
|
274
|
+
|
275
|
+
if state == :string
|
276
|
+
tokens << [:close, :string]
|
277
|
+
end
|
278
|
+
|
279
|
+
tokens
|
280
|
+
end
|
281
|
+
|
282
|
+
end
|
283
|
+
|
284
|
+
end
|
285
|
+
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
module CodeRay
|
2
|
+
module Scanners
|
3
|
+
|
4
|
+
load :html
|
5
|
+
load :ruby
|
6
|
+
|
7
|
+
# RHTML Scanner
|
8
|
+
class RHTML < Scanner
|
9
|
+
|
10
|
+
include Streamable
|
11
|
+
register_for :rhtml
|
12
|
+
title 'HTML ERB Template'
|
13
|
+
|
14
|
+
KINDS_NOT_LOC = HTML::KINDS_NOT_LOC
|
15
|
+
|
16
|
+
ERB_RUBY_BLOCK = /
|
17
|
+
<%(?!%)[=-]?
|
18
|
+
(?>
|
19
|
+
[^\-%]* # normal*
|
20
|
+
(?> # special
|
21
|
+
(?: %(?!>) | -(?!%>) )
|
22
|
+
[^\-%]* # normal*
|
23
|
+
)*
|
24
|
+
)
|
25
|
+
(?: -?%> )?
|
26
|
+
/x
|
27
|
+
|
28
|
+
START_OF_ERB = /
|
29
|
+
<%(?!%)
|
30
|
+
/x
|
31
|
+
|
32
|
+
private
|
33
|
+
|
34
|
+
def setup
|
35
|
+
@ruby_scanner = CodeRay.scanner :ruby, :tokens => @tokens, :keep_tokens => true
|
36
|
+
@html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true
|
37
|
+
end
|
38
|
+
|
39
|
+
def reset_instance
|
40
|
+
super
|
41
|
+
@html_scanner.reset
|
42
|
+
end
|
43
|
+
|
44
|
+
def scan_tokens tokens, options
|
45
|
+
|
46
|
+
until eos?
|
47
|
+
|
48
|
+
if (match = scan_until(/(?=#{START_OF_ERB})/o) || scan_until(/\z/)) and not match.empty?
|
49
|
+
@html_scanner.tokenize match
|
50
|
+
|
51
|
+
elsif match = scan(/#{ERB_RUBY_BLOCK}/o)
|
52
|
+
start_tag = match[/\A<%[-=]?/]
|
53
|
+
end_tag = match[/-?%?>?\z/]
|
54
|
+
tokens << [:open, :inline]
|
55
|
+
tokens << [start_tag, :inline_delimiter]
|
56
|
+
code = match[start_tag.size .. -1 - end_tag.size]
|
57
|
+
@ruby_scanner.tokenize code
|
58
|
+
tokens << [end_tag, :inline_delimiter] unless end_tag.empty?
|
59
|
+
tokens << [:close, :inline]
|
60
|
+
|
61
|
+
else
|
62
|
+
raise_inspect 'else-case reached!', tokens
|
63
|
+
end
|
64
|
+
|
65
|
+
end
|
66
|
+
|
67
|
+
tokens
|
68
|
+
|
69
|
+
end
|
70
|
+
|
71
|
+
end
|
72
|
+
|
73
|
+
end
|
74
|
+
end
|
@@ -0,0 +1,404 @@
|
|
1
|
+
module CodeRay
|
2
|
+
module Scanners
|
3
|
+
|
4
|
+
# This scanner is really complex, since Ruby _is_ a complex language!
|
5
|
+
#
|
6
|
+
# It tries to highlight 100% of all common code,
|
7
|
+
# and 90% of strange codes.
|
8
|
+
#
|
9
|
+
# It is optimized for HTML highlighting, and is not very useful for
|
10
|
+
# parsing or pretty printing.
|
11
|
+
#
|
12
|
+
# For now, I think it's better than the scanners in VIM or Syntax, or
|
13
|
+
# any highlighter I was able to find, except Caleb's RubyLexer.
|
14
|
+
#
|
15
|
+
# I hope it's also better than the rdoc/irb lexer.
|
16
|
+
class Ruby < Scanner
|
17
|
+
|
18
|
+
include Streamable
|
19
|
+
|
20
|
+
register_for :ruby
|
21
|
+
file_extension 'rb'
|
22
|
+
|
23
|
+
helper :patterns
|
24
|
+
|
25
|
+
if not defined? EncodingError
|
26
|
+
EncodingError = Class.new Exception
|
27
|
+
end
|
28
|
+
|
29
|
+
private
|
30
|
+
def scan_tokens tokens, options
|
31
|
+
last_token_dot = false
|
32
|
+
value_expected = true
|
33
|
+
heredocs = nil
|
34
|
+
last_state = nil
|
35
|
+
state = :initial
|
36
|
+
depth = nil
|
37
|
+
inline_block_stack = []
|
38
|
+
unicode = string.respond_to?(:encoding) && string.encoding.name == 'UTF-8'
|
39
|
+
|
40
|
+
patterns = Patterns # avoid constant lookup
|
41
|
+
|
42
|
+
until eos?
|
43
|
+
match = nil
|
44
|
+
kind = nil
|
45
|
+
|
46
|
+
if state.instance_of? patterns::StringState
|
47
|
+
# {{{
|
48
|
+
match = scan_until(state.pattern) || scan_until(/\z/)
|
49
|
+
tokens << [match, :content] unless match.empty?
|
50
|
+
break if eos?
|
51
|
+
|
52
|
+
if state.heredoc and self[1] # end of heredoc
|
53
|
+
match = getch.to_s
|
54
|
+
match << scan_until(/$/) unless eos?
|
55
|
+
tokens << [match, :delimiter]
|
56
|
+
tokens << [:close, state.type]
|
57
|
+
state = state.next_state
|
58
|
+
next
|
59
|
+
end
|
60
|
+
|
61
|
+
case match = getch
|
62
|
+
|
63
|
+
when state.delim
|
64
|
+
if state.paren
|
65
|
+
state.paren_depth -= 1
|
66
|
+
if state.paren_depth > 0
|
67
|
+
tokens << [match, :nesting_delimiter]
|
68
|
+
next
|
69
|
+
end
|
70
|
+
end
|
71
|
+
tokens << [match, :delimiter]
|
72
|
+
if state.type == :regexp and not eos?
|
73
|
+
modifiers = scan(/#{patterns::REGEXP_MODIFIERS}/ox)
|
74
|
+
tokens << [modifiers, :modifier] unless modifiers.empty?
|
75
|
+
end
|
76
|
+
tokens << [:close, state.type]
|
77
|
+
value_expected = false
|
78
|
+
state = state.next_state
|
79
|
+
|
80
|
+
when '\\'
|
81
|
+
if state.interpreted
|
82
|
+
if esc = scan(/ #{patterns::ESCAPE} /ox)
|
83
|
+
tokens << [match + esc, :char]
|
84
|
+
else
|
85
|
+
tokens << [match, :error]
|
86
|
+
end
|
87
|
+
else
|
88
|
+
case m = getch
|
89
|
+
when state.delim, '\\'
|
90
|
+
tokens << [match + m, :char]
|
91
|
+
when nil
|
92
|
+
tokens << [match, :error]
|
93
|
+
else
|
94
|
+
tokens << [match + m, :content]
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
when '#'
|
99
|
+
case peek(1)
|
100
|
+
when '{'
|
101
|
+
inline_block_stack << [state, depth, heredocs]
|
102
|
+
value_expected = true
|
103
|
+
state = :initial
|
104
|
+
depth = 1
|
105
|
+
tokens << [:open, :inline]
|
106
|
+
tokens << [match + getch, :inline_delimiter]
|
107
|
+
when '$', '@'
|
108
|
+
tokens << [match, :escape]
|
109
|
+
last_state = state # scan one token as normal code, then return here
|
110
|
+
state = :initial
|
111
|
+
else
|
112
|
+
raise_inspect 'else-case # reached; #%p not handled' % peek(1), tokens
|
113
|
+
end
|
114
|
+
|
115
|
+
when state.paren
|
116
|
+
state.paren_depth += 1
|
117
|
+
tokens << [match, :nesting_delimiter]
|
118
|
+
|
119
|
+
when /#{patterns::REGEXP_SYMBOLS}/ox
|
120
|
+
tokens << [match, :function]
|
121
|
+
|
122
|
+
else
|
123
|
+
raise_inspect 'else-case " reached; %p not handled, state = %p' % [match, state], tokens
|
124
|
+
|
125
|
+
end
|
126
|
+
next
|
127
|
+
# }}}
|
128
|
+
else
|
129
|
+
# {{{
|
130
|
+
if match = scan(/[ \t\f]+/)
|
131
|
+
kind = :space
|
132
|
+
match << scan(/\s*/) unless eos? || heredocs
|
133
|
+
value_expected = true if match.index(?\n)
|
134
|
+
tokens << [match, kind]
|
135
|
+
next
|
136
|
+
|
137
|
+
elsif match = scan(/\\?\n/)
|
138
|
+
kind = :space
|
139
|
+
if match == "\n"
|
140
|
+
value_expected = true
|
141
|
+
state = :initial if state == :undef_comma_expected
|
142
|
+
end
|
143
|
+
if heredocs
|
144
|
+
unscan # heredoc scanning needs \n at start
|
145
|
+
state = heredocs.shift
|
146
|
+
tokens << [:open, state.type]
|
147
|
+
heredocs = nil if heredocs.empty?
|
148
|
+
next
|
149
|
+
else
|
150
|
+
match << scan(/\s*/) unless eos?
|
151
|
+
end
|
152
|
+
tokens << [match, kind]
|
153
|
+
next
|
154
|
+
|
155
|
+
elsif bol? && match = scan(/\#!.*/)
|
156
|
+
tokens << [match, :doctype]
|
157
|
+
next
|
158
|
+
|
159
|
+
elsif match = scan(/\#.*/) or
|
160
|
+
( bol? and match = scan(/#{patterns::RUBYDOC_OR_DATA}/o) )
|
161
|
+
kind = :comment
|
162
|
+
tokens << [match, kind]
|
163
|
+
next
|
164
|
+
|
165
|
+
elsif state == :initial
|
166
|
+
|
167
|
+
# IDENTS #
|
168
|
+
if match = scan(unicode ? /#{patterns::METHOD_NAME}/uo :
|
169
|
+
/#{patterns::METHOD_NAME}/o)
|
170
|
+
if last_token_dot
|
171
|
+
kind = if match[/^[A-Z]/] and not match?(/\(/) then :constant else :ident end
|
172
|
+
else
|
173
|
+
kind = patterns::IDENT_KIND[match]
|
174
|
+
if kind == :ident and match[/^[A-Z]/] and not match[/[!?]$/] and not match?(/\(/)
|
175
|
+
kind = :constant
|
176
|
+
elsif kind == :reserved
|
177
|
+
state = patterns::DEF_NEW_STATE[match]
|
178
|
+
value_expected = :set if patterns::KEYWORDS_EXPECTING_VALUE[match]
|
179
|
+
end
|
180
|
+
end
|
181
|
+
value_expected = :set if check(/#{patterns::VALUE_FOLLOWS}/o)
|
182
|
+
|
183
|
+
elsif last_token_dot and match = scan(/#{patterns::METHOD_NAME_OPERATOR}|\(/o)
|
184
|
+
kind = :ident
|
185
|
+
value_expected = :set if check(/#{patterns::VALUE_FOLLOWS}/o)
|
186
|
+
|
187
|
+
# OPERATORS #
|
188
|
+
elsif not last_token_dot and match = scan(/ \.\.\.? | (?:\.|::)() | [,\(\)\[\]\{\}] | ==?=? /x)
|
189
|
+
if match !~ / [.\)\]\}] /x or match =~ /\.\.\.?/
|
190
|
+
value_expected = :set
|
191
|
+
end
|
192
|
+
last_token_dot = :set if self[1]
|
193
|
+
kind = :operator
|
194
|
+
unless inline_block_stack.empty?
|
195
|
+
case match
|
196
|
+
when '{'
|
197
|
+
depth += 1
|
198
|
+
when '}'
|
199
|
+
depth -= 1
|
200
|
+
if depth == 0 # closing brace of inline block reached
|
201
|
+
state, depth, heredocs = inline_block_stack.pop
|
202
|
+
heredocs = nil if heredocs && heredocs.empty?
|
203
|
+
tokens << [match, :inline_delimiter]
|
204
|
+
kind = :inline
|
205
|
+
match = :close
|
206
|
+
end
|
207
|
+
end
|
208
|
+
end
|
209
|
+
|
210
|
+
elsif match = scan(/ ['"] /mx)
|
211
|
+
tokens << [:open, :string]
|
212
|
+
kind = :delimiter
|
213
|
+
state = patterns::StringState.new :string, match == '"', match # important for streaming
|
214
|
+
|
215
|
+
elsif match = scan(/#{patterns::INSTANCE_VARIABLE}/o)
|
216
|
+
kind = :instance_variable
|
217
|
+
|
218
|
+
elsif value_expected and match = scan(/\//)
|
219
|
+
tokens << [:open, :regexp]
|
220
|
+
kind = :delimiter
|
221
|
+
interpreted = true
|
222
|
+
state = patterns::StringState.new :regexp, interpreted, match
|
223
|
+
|
224
|
+
# elsif match = scan(/[-+]?#{patterns::NUMERIC}/o)
|
225
|
+
elsif match = value_expected ? scan(/[-+]?#{patterns::NUMERIC}/o) : scan(/#{patterns::NUMERIC}/o)
|
226
|
+
kind = self[1] ? :float : :integer
|
227
|
+
|
228
|
+
elsif match = scan(/#{patterns::SYMBOL}/o)
|
229
|
+
case delim = match[1]
|
230
|
+
when ?', ?"
|
231
|
+
tokens << [:open, :symbol]
|
232
|
+
tokens << [':', :symbol]
|
233
|
+
match = delim.chr
|
234
|
+
kind = :delimiter
|
235
|
+
state = patterns::StringState.new :symbol, delim == ?", match
|
236
|
+
else
|
237
|
+
kind = :symbol
|
238
|
+
end
|
239
|
+
|
240
|
+
elsif match = scan(/ [-+!~^]=? | [*|&]{1,2}=? | >>? /x)
|
241
|
+
value_expected = :set
|
242
|
+
kind = :operator
|
243
|
+
|
244
|
+
elsif value_expected and match = scan(/#{patterns::HEREDOC_OPEN}/o)
|
245
|
+
indented = self[1] == '-'
|
246
|
+
quote = self[3]
|
247
|
+
delim = self[quote ? 4 : 2]
|
248
|
+
kind = patterns::QUOTE_TO_TYPE[quote]
|
249
|
+
tokens << [:open, kind]
|
250
|
+
tokens << [match, :delimiter]
|
251
|
+
match = :close
|
252
|
+
heredoc = patterns::StringState.new kind, quote != '\'', delim, (indented ? :indented : :linestart )
|
253
|
+
heredocs ||= [] # create heredocs if empty
|
254
|
+
heredocs << heredoc
|
255
|
+
|
256
|
+
elsif value_expected and match = scan(/#{patterns::FANCY_START_CORRECT}/o)
|
257
|
+
kind, interpreted = *patterns::FancyStringType.fetch(self[1]) do
|
258
|
+
raise_inspect 'Unknown fancy string: %%%p' % k, tokens
|
259
|
+
end
|
260
|
+
tokens << [:open, kind]
|
261
|
+
state = patterns::StringState.new kind, interpreted, self[2]
|
262
|
+
kind = :delimiter
|
263
|
+
|
264
|
+
elsif value_expected and match = scan(/#{patterns::CHARACTER}/o)
|
265
|
+
kind = :integer
|
266
|
+
|
267
|
+
elsif match = scan(/ [\/%]=? | <(?:<|=>?)? | [?:;] /x)
|
268
|
+
value_expected = :set
|
269
|
+
kind = :operator
|
270
|
+
|
271
|
+
elsif match = scan(/`/)
|
272
|
+
if last_token_dot
|
273
|
+
kind = :operator
|
274
|
+
else
|
275
|
+
tokens << [:open, :shell]
|
276
|
+
kind = :delimiter
|
277
|
+
state = patterns::StringState.new :shell, true, match
|
278
|
+
end
|
279
|
+
|
280
|
+
elsif match = scan(/#{patterns::GLOBAL_VARIABLE}/o)
|
281
|
+
kind = :global_variable
|
282
|
+
|
283
|
+
elsif match = scan(/#{patterns::CLASS_VARIABLE}/o)
|
284
|
+
kind = :class_variable
|
285
|
+
|
286
|
+
else
|
287
|
+
kind = :error
|
288
|
+
match = (scan(/./mu) rescue nil) || getch
|
289
|
+
if !unicode && match.size > 1
|
290
|
+
# warn 'Switching to unicode mode because of char %p' % [match]
|
291
|
+
unicode = true
|
292
|
+
unscan
|
293
|
+
next
|
294
|
+
end
|
295
|
+
|
296
|
+
end
|
297
|
+
|
298
|
+
elsif state == :def_expected
|
299
|
+
state = :initial
|
300
|
+
if scan(/self\./)
|
301
|
+
tokens << ['self', :pre_constant]
|
302
|
+
tokens << ['.', :operator]
|
303
|
+
end
|
304
|
+
if match = scan(unicode ? /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/uo :
|
305
|
+
/(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o)
|
306
|
+
kind = :method
|
307
|
+
else
|
308
|
+
next
|
309
|
+
end
|
310
|
+
|
311
|
+
elsif state == :module_expected
|
312
|
+
if match = scan(/<</)
|
313
|
+
kind = :operator
|
314
|
+
else
|
315
|
+
state = :initial
|
316
|
+
if match = scan(/ (?:#{patterns::IDENT}::)* #{patterns::IDENT} /ox)
|
317
|
+
kind = :class
|
318
|
+
else
|
319
|
+
next
|
320
|
+
end
|
321
|
+
end
|
322
|
+
|
323
|
+
elsif state == :undef_expected
|
324
|
+
state = :undef_comma_expected
|
325
|
+
if match = scan(/#{patterns::METHOD_NAME_EX}/o)
|
326
|
+
kind = :method
|
327
|
+
elsif match = scan(/#{patterns::SYMBOL}/o)
|
328
|
+
case delim = match[1]
|
329
|
+
when ?', ?"
|
330
|
+
tokens << [:open, :symbol]
|
331
|
+
tokens << [':', :symbol]
|
332
|
+
match = delim.chr
|
333
|
+
kind = :delimiter
|
334
|
+
state = patterns::StringState.new :symbol, delim == ?", match
|
335
|
+
state.next_state = :undef_comma_expected
|
336
|
+
else
|
337
|
+
kind = :symbol
|
338
|
+
end
|
339
|
+
else
|
340
|
+
state = :initial
|
341
|
+
next
|
342
|
+
end
|
343
|
+
|
344
|
+
elsif state == :alias_expected
|
345
|
+
match = scan(unicode ? /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/uo :
|
346
|
+
/(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/o)
|
347
|
+
|
348
|
+
if match
|
349
|
+
tokens << [self[1], (self[1][0] == ?: ? :symbol : :method)]
|
350
|
+
tokens << [self[2], :space]
|
351
|
+
tokens << [self[3], (self[3][0] == ?: ? :symbol : :method)]
|
352
|
+
end
|
353
|
+
state = :initial
|
354
|
+
next
|
355
|
+
|
356
|
+
elsif state == :undef_comma_expected
|
357
|
+
if match = scan(/,/)
|
358
|
+
kind = :operator
|
359
|
+
state = :undef_expected
|
360
|
+
else
|
361
|
+
state = :initial
|
362
|
+
next
|
363
|
+
end
|
364
|
+
|
365
|
+
end
|
366
|
+
# }}}
|
367
|
+
|
368
|
+
unless kind == :error
|
369
|
+
value_expected = value_expected == :set
|
370
|
+
last_token_dot = last_token_dot == :set
|
371
|
+
end
|
372
|
+
|
373
|
+
if $DEBUG and not kind
|
374
|
+
raise_inspect 'Error token %p in line %d' %
|
375
|
+
[[match, kind], line], tokens, state
|
376
|
+
end
|
377
|
+
raise_inspect 'Empty token', tokens unless match
|
378
|
+
|
379
|
+
tokens << [match, kind]
|
380
|
+
|
381
|
+
if last_state
|
382
|
+
state = last_state
|
383
|
+
last_state = nil
|
384
|
+
end
|
385
|
+
end
|
386
|
+
end
|
387
|
+
|
388
|
+
inline_block_stack << [state] if state.is_a? patterns::StringState
|
389
|
+
until inline_block_stack.empty?
|
390
|
+
this_block = inline_block_stack.pop
|
391
|
+
tokens << [:close, :inline] if this_block.size > 1
|
392
|
+
state = this_block.first
|
393
|
+
tokens << [:close, state.type]
|
394
|
+
end
|
395
|
+
|
396
|
+
tokens
|
397
|
+
end
|
398
|
+
|
399
|
+
end
|
400
|
+
|
401
|
+
end
|
402
|
+
end
|
403
|
+
|
404
|
+
# vim:fdm=marker
|