raldred-coderay 0.9.0 → 0.9.339
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/README +128 -0
- data/lib/coderay.rb +319 -0
- data/lib/coderay/duo.rb +85 -0
- data/lib/coderay/encoder.rb +187 -0
- data/lib/coderay/encoders/_map.rb +9 -0
- data/lib/coderay/encoders/count.rb +21 -0
- data/lib/coderay/encoders/debug.rb +49 -0
- data/lib/coderay/encoders/div.rb +20 -0
- data/lib/coderay/encoders/html.rb +306 -0
- data/lib/coderay/encoders/html/css.rb +70 -0
- data/lib/coderay/encoders/html/numerization.rb +133 -0
- data/lib/coderay/encoders/html/output.rb +206 -0
- data/lib/coderay/encoders/json.rb +19 -0
- data/lib/coderay/encoders/null.rb +26 -0
- data/lib/coderay/encoders/page.rb +21 -0
- data/lib/coderay/encoders/span.rb +20 -0
- data/lib/coderay/encoders/statistic.rb +77 -0
- data/lib/coderay/encoders/term.rb +114 -0
- data/lib/coderay/encoders/text.rb +32 -0
- data/lib/coderay/encoders/tokens.rb +44 -0
- data/lib/coderay/encoders/xml.rb +71 -0
- data/lib/coderay/encoders/yaml.rb +22 -0
- data/lib/coderay/for_redcloth.rb +73 -0
- data/lib/coderay/helpers/file_type.rb +226 -0
- data/lib/coderay/helpers/gzip_simple.rb +123 -0
- data/lib/coderay/helpers/plugin.rb +339 -0
- data/lib/coderay/helpers/word_list.rb +124 -0
- data/lib/coderay/scanner.rb +271 -0
- data/lib/coderay/scanners/_map.rb +21 -0
- data/lib/coderay/scanners/c.rb +166 -0
- data/lib/coderay/scanners/css.rb +202 -0
- data/lib/coderay/scanners/debug.rb +61 -0
- data/lib/coderay/scanners/delphi.rb +150 -0
- data/lib/coderay/scanners/diff.rb +104 -0
- data/lib/coderay/scanners/groovy.rb +271 -0
- data/lib/coderay/scanners/html.rb +175 -0
- data/lib/coderay/scanners/java.rb +173 -0
- data/lib/coderay/scanners/java/builtin_types.rb +419 -0
- data/lib/coderay/scanners/java_script.rb +195 -0
- data/lib/coderay/scanners/json.rb +107 -0
- data/lib/coderay/scanners/nitro_xhtml.rb +132 -0
- data/lib/coderay/scanners/php.rb +404 -0
- data/lib/coderay/scanners/plaintext.rb +18 -0
- data/lib/coderay/scanners/python.rb +232 -0
- data/lib/coderay/scanners/rhtml.rb +71 -0
- data/lib/coderay/scanners/ruby.rb +386 -0
- data/lib/coderay/scanners/ruby/patterns.rb +232 -0
- data/lib/coderay/scanners/scheme.rb +142 -0
- data/lib/coderay/scanners/sql.rb +162 -0
- data/lib/coderay/scanners/xml.rb +17 -0
- data/lib/coderay/scanners/yaml.rb +142 -0
- data/lib/coderay/style.rb +20 -0
- data/lib/coderay/styles/_map.rb +7 -0
- data/lib/coderay/styles/cycnus.rb +151 -0
- data/lib/coderay/styles/murphy.rb +132 -0
- data/lib/coderay/token_classes.rb +86 -0
- data/lib/coderay/tokens.rb +387 -0
- metadata +59 -1
@@ -0,0 +1,271 @@
|
|
1
|
+
module CodeRay
|
2
|
+
module Scanners
|
3
|
+
|
4
|
+
load :java
|
5
|
+
|
6
|
+
class Groovy < Java
|
7
|
+
|
8
|
+
include Streamable
|
9
|
+
register_for :groovy
|
10
|
+
|
11
|
+
# TODO: Check this!
|
12
|
+
KEYWORDS = Java::KEYWORDS + %w[
|
13
|
+
as assert def in
|
14
|
+
]
|
15
|
+
KEYWORDS_EXPECTING_VALUE = WordList.new.add %w[
|
16
|
+
case instanceof new return throw typeof while as assert in
|
17
|
+
]
|
18
|
+
|
19
|
+
MAGIC_VARIABLES = Java::MAGIC_VARIABLES + %w[ it ]
|
20
|
+
# DIRECTIVES = %w[
|
21
|
+
# abstract extends final implements native private protected public
|
22
|
+
# static strictfp synchronized threadsafe throws transient volatile
|
23
|
+
# ]
|
24
|
+
|
25
|
+
IDENT_KIND = WordList.new(:ident).
|
26
|
+
add(KEYWORDS, :keyword).
|
27
|
+
add(MAGIC_VARIABLES, :local_variable).
|
28
|
+
add(TYPES, :type).
|
29
|
+
add(BuiltinTypes::List, :pre_type).
|
30
|
+
add(DIRECTIVES, :directive)
|
31
|
+
|
32
|
+
ESCAPE = / [bfnrtv$\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
|
33
|
+
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} /x # no 4-byte unicode chars? U[a-fA-F0-9]{8}
|
34
|
+
REGEXP_ESCAPE = / [bfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} | \d | [bBdDsSwW\/] /x
|
35
|
+
|
36
|
+
# TODO: interpretation inside ', ", /
|
37
|
+
STRING_CONTENT_PATTERN = {
|
38
|
+
"'" => /(?>\\[^\\'\n]+|[^\\'\n]+)+/,
|
39
|
+
'"' => /[^\\$"\n]+/,
|
40
|
+
"'''" => /(?>[^\\']+|'(?!''))+/,
|
41
|
+
'"""' => /(?>[^\\$"]+|"(?!""))+/,
|
42
|
+
'/' => /[^\\$\/\n]+/,
|
43
|
+
}
|
44
|
+
|
45
|
+
def scan_tokens tokens, options
|
46
|
+
|
47
|
+
state = :initial
|
48
|
+
inline_block_stack = []
|
49
|
+
inline_block_paren_depth = nil
|
50
|
+
string_delimiter = nil
|
51
|
+
import_clause = class_name_follows = last_token = after_def = false
|
52
|
+
value_expected = true
|
53
|
+
|
54
|
+
until eos?
|
55
|
+
|
56
|
+
kind = nil
|
57
|
+
match = nil
|
58
|
+
|
59
|
+
case state
|
60
|
+
|
61
|
+
when :initial
|
62
|
+
|
63
|
+
if match = scan(/ \s+ | \\\n /x)
|
64
|
+
tokens << [match, :space]
|
65
|
+
if match.index ?\n
|
66
|
+
import_clause = after_def = false
|
67
|
+
value_expected = true unless value_expected
|
68
|
+
end
|
69
|
+
next
|
70
|
+
|
71
|
+
elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
|
72
|
+
value_expected = true
|
73
|
+
after_def = false
|
74
|
+
kind = :comment
|
75
|
+
|
76
|
+
elsif bol? && scan(/ \#!.* /x)
|
77
|
+
kind = :doctype
|
78
|
+
|
79
|
+
elsif import_clause && scan(/ (?!as) #{IDENT} (?: \. #{IDENT} )* (?: \.\* )? /ox)
|
80
|
+
after_def = value_expected = false
|
81
|
+
kind = :include
|
82
|
+
|
83
|
+
elsif match = scan(/ #{IDENT} | \[\] /ox)
|
84
|
+
kind = IDENT_KIND[match]
|
85
|
+
value_expected = (kind == :keyword) && KEYWORDS_EXPECTING_VALUE[match]
|
86
|
+
if last_token == '.'
|
87
|
+
kind = :ident
|
88
|
+
elsif class_name_follows
|
89
|
+
kind = :class
|
90
|
+
class_name_follows = false
|
91
|
+
elsif after_def && check(/\s*[({]/)
|
92
|
+
kind = :method
|
93
|
+
after_def = false
|
94
|
+
elsif kind == :ident && last_token != '?' && check(/:/)
|
95
|
+
kind = :key
|
96
|
+
else
|
97
|
+
class_name_follows = true if match == 'class' || (import_clause && match == 'as')
|
98
|
+
import_clause = match == 'import'
|
99
|
+
after_def = true if match == 'def'
|
100
|
+
end
|
101
|
+
|
102
|
+
elsif scan(/;/)
|
103
|
+
import_clause = after_def = false
|
104
|
+
value_expected = true
|
105
|
+
kind = :operator
|
106
|
+
|
107
|
+
elsif scan(/\{/)
|
108
|
+
class_name_follows = after_def = false
|
109
|
+
value_expected = true
|
110
|
+
kind = :operator
|
111
|
+
if !inline_block_stack.empty?
|
112
|
+
inline_block_paren_depth += 1
|
113
|
+
end
|
114
|
+
|
115
|
+
# TODO: ~'...', ~"..." and ~/.../ style regexps
|
116
|
+
elsif match = scan(/ \.\.<? | \*?\.(?!\d)@? | \.& | \?:? | [,?:(\[] | -[->] | \+\+ |
|
117
|
+
&& | \|\| | \*\*=? | ==?~ | <=?>? | [-+*%^~&|>=!]=? | <<<?=? | >>>?=? /x)
|
118
|
+
value_expected = true
|
119
|
+
value_expected = :regexp if match == '~'
|
120
|
+
after_def = false
|
121
|
+
kind = :operator
|
122
|
+
|
123
|
+
elsif match = scan(/ [)\]}] /x)
|
124
|
+
value_expected = after_def = false
|
125
|
+
if !inline_block_stack.empty? && match == '}'
|
126
|
+
inline_block_paren_depth -= 1
|
127
|
+
if inline_block_paren_depth == 0 # closing brace of inline block reached
|
128
|
+
tokens << [match, :inline_delimiter]
|
129
|
+
tokens << [:close, :inline]
|
130
|
+
state, string_delimiter, inline_block_paren_depth = inline_block_stack.pop
|
131
|
+
next
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
elsif check(/[\d.]/)
|
136
|
+
after_def = value_expected = false
|
137
|
+
if scan(/0[xX][0-9A-Fa-f]+/)
|
138
|
+
kind = :hex
|
139
|
+
elsif scan(/(?>0[0-7]+)(?![89.eEfF])/)
|
140
|
+
kind = :oct
|
141
|
+
elsif scan(/\d+[fFdD]|\d*\.\d+(?:[eE][+-]?\d+)?[fFdD]?|\d+[eE][+-]?\d+[fFdD]?/)
|
142
|
+
kind = :float
|
143
|
+
elsif scan(/\d+[lLgG]?/)
|
144
|
+
kind = :integer
|
145
|
+
end
|
146
|
+
|
147
|
+
elsif match = scan(/'''|"""/)
|
148
|
+
after_def = value_expected = false
|
149
|
+
state = :multiline_string
|
150
|
+
tokens << [:open, :string]
|
151
|
+
string_delimiter = match
|
152
|
+
kind = :delimiter
|
153
|
+
|
154
|
+
# TODO: record.'name'
|
155
|
+
elsif match = scan(/["']/)
|
156
|
+
after_def = value_expected = false
|
157
|
+
state = match == '/' ? :regexp : :string
|
158
|
+
tokens << [:open, state]
|
159
|
+
string_delimiter = match
|
160
|
+
kind = :delimiter
|
161
|
+
|
162
|
+
elsif value_expected && (match = scan(/\//))
|
163
|
+
after_def = value_expected = false
|
164
|
+
tokens << [:open, :regexp]
|
165
|
+
state = :regexp
|
166
|
+
string_delimiter = '/'
|
167
|
+
kind = :delimiter
|
168
|
+
|
169
|
+
elsif scan(/ @ #{IDENT} /ox)
|
170
|
+
after_def = value_expected = false
|
171
|
+
kind = :annotation
|
172
|
+
|
173
|
+
elsif scan(/\//)
|
174
|
+
after_def = false
|
175
|
+
value_expected = true
|
176
|
+
kind = :operator
|
177
|
+
|
178
|
+
else
|
179
|
+
getch
|
180
|
+
kind = :error
|
181
|
+
|
182
|
+
end
|
183
|
+
|
184
|
+
when :string, :regexp, :multiline_string
|
185
|
+
if scan(STRING_CONTENT_PATTERN[string_delimiter])
|
186
|
+
kind = :content
|
187
|
+
|
188
|
+
elsif match = scan(state == :multiline_string ? /'''|"""/ : /["'\/]/)
|
189
|
+
tokens << [match, :delimiter]
|
190
|
+
if state == :regexp
|
191
|
+
# TODO: regexp modifiers? s, m, x, i?
|
192
|
+
modifiers = scan(/[ix]+/)
|
193
|
+
tokens << [modifiers, :modifier] if modifiers && !modifiers.empty?
|
194
|
+
end
|
195
|
+
state = :string if state == :multiline_string
|
196
|
+
tokens << [:close, state]
|
197
|
+
string_delimiter = nil
|
198
|
+
after_def = value_expected = false
|
199
|
+
state = :initial
|
200
|
+
next
|
201
|
+
|
202
|
+
elsif (state == :string || state == :multiline_string) &&
|
203
|
+
(match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox))
|
204
|
+
if string_delimiter[0] == ?' && !(match == "\\\\" || match == "\\'")
|
205
|
+
kind = :content
|
206
|
+
else
|
207
|
+
kind = :char
|
208
|
+
end
|
209
|
+
elsif state == :regexp && scan(/ \\ (?: #{REGEXP_ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
|
210
|
+
kind = :char
|
211
|
+
|
212
|
+
elsif match = scan(/ \$ #{IDENT} /mox)
|
213
|
+
tokens << [:open, :inline]
|
214
|
+
tokens << ['$', :inline_delimiter]
|
215
|
+
match = match[1..-1]
|
216
|
+
tokens << [match, IDENT_KIND[match]]
|
217
|
+
tokens << [:close, :inline]
|
218
|
+
next
|
219
|
+
elsif match = scan(/ \$ \{ /x)
|
220
|
+
tokens << [:open, :inline]
|
221
|
+
tokens << ['${', :inline_delimiter]
|
222
|
+
inline_block_stack << [state, string_delimiter, inline_block_paren_depth]
|
223
|
+
inline_block_paren_depth = 1
|
224
|
+
state = :initial
|
225
|
+
next
|
226
|
+
|
227
|
+
elsif scan(/ \$ /mx)
|
228
|
+
kind = :content
|
229
|
+
|
230
|
+
elsif scan(/ \\. /mx)
|
231
|
+
kind = :content
|
232
|
+
|
233
|
+
elsif scan(/ \\ | \n /x)
|
234
|
+
tokens << [:close, state]
|
235
|
+
kind = :error
|
236
|
+
after_def = value_expected = false
|
237
|
+
state = :initial
|
238
|
+
|
239
|
+
else
|
240
|
+
raise_inspect "else case \" reached; %p not handled." % peek(1), tokens
|
241
|
+
end
|
242
|
+
|
243
|
+
else
|
244
|
+
raise_inspect 'Unknown state', tokens
|
245
|
+
|
246
|
+
end
|
247
|
+
|
248
|
+
match ||= matched
|
249
|
+
if $DEBUG and not kind
|
250
|
+
raise_inspect 'Error token %p in line %d' %
|
251
|
+
[[match, kind], line], tokens
|
252
|
+
end
|
253
|
+
raise_inspect 'Empty token', tokens unless match
|
254
|
+
|
255
|
+
last_token = match unless [:space, :comment, :doctype].include? kind
|
256
|
+
|
257
|
+
tokens << [match, kind]
|
258
|
+
|
259
|
+
end
|
260
|
+
|
261
|
+
if [:multiline_string, :string, :regexp].include? state
|
262
|
+
tokens << [:close, state]
|
263
|
+
end
|
264
|
+
|
265
|
+
tokens
|
266
|
+
end
|
267
|
+
|
268
|
+
end
|
269
|
+
|
270
|
+
end
|
271
|
+
end
|
@@ -0,0 +1,175 @@
|
|
1
|
+
module CodeRay
|
2
|
+
module Scanners
|
3
|
+
|
4
|
+
# HTML Scanner
|
5
|
+
class HTML < Scanner
|
6
|
+
|
7
|
+
include Streamable
|
8
|
+
register_for :html
|
9
|
+
|
10
|
+
ATTR_NAME = /[\w.:-]+/
|
11
|
+
ATTR_VALUE_UNQUOTED = ATTR_NAME
|
12
|
+
TAG_END = /\/?>/
|
13
|
+
HEX = /[0-9a-fA-F]/
|
14
|
+
ENTITY = /
|
15
|
+
&
|
16
|
+
(?:
|
17
|
+
\w+
|
18
|
+
|
|
19
|
+
\#
|
20
|
+
(?:
|
21
|
+
\d+
|
22
|
+
|
|
23
|
+
x#{HEX}+
|
24
|
+
)
|
25
|
+
)
|
26
|
+
;
|
27
|
+
/ox
|
28
|
+
|
29
|
+
PLAIN_STRING_CONTENT = {
|
30
|
+
"'" => /[^&'>\n]+/,
|
31
|
+
'"' => /[^&">\n]+/,
|
32
|
+
}
|
33
|
+
|
34
|
+
def reset
|
35
|
+
super
|
36
|
+
@state = :initial
|
37
|
+
end
|
38
|
+
|
39
|
+
private
|
40
|
+
def setup
|
41
|
+
@state = :initial
|
42
|
+
@plain_string_content = nil
|
43
|
+
end
|
44
|
+
|
45
|
+
def scan_tokens tokens, options
|
46
|
+
|
47
|
+
state = @state
|
48
|
+
plain_string_content = @plain_string_content
|
49
|
+
|
50
|
+
until eos?
|
51
|
+
|
52
|
+
kind = nil
|
53
|
+
match = nil
|
54
|
+
|
55
|
+
if scan(/\s+/m)
|
56
|
+
kind = :space
|
57
|
+
|
58
|
+
else
|
59
|
+
|
60
|
+
case state
|
61
|
+
|
62
|
+
when :initial
|
63
|
+
if scan(/<!--.*?-->/m)
|
64
|
+
kind = :comment
|
65
|
+
elsif scan(/<!DOCTYPE.*?>/m)
|
66
|
+
kind = :doctype
|
67
|
+
elsif scan(/<\?xml.*?\?>/m)
|
68
|
+
kind = :preprocessor
|
69
|
+
elsif scan(/<\?.*?\?>|<%.*?%>/m)
|
70
|
+
kind = :comment
|
71
|
+
elsif scan(/<\/[-\w_.:]*>/m)
|
72
|
+
kind = :tag
|
73
|
+
elsif match = scan(/<[-\w_.:]+>?/m)
|
74
|
+
kind = :tag
|
75
|
+
state = :attribute unless match[-1] == ?>
|
76
|
+
elsif scan(/[^<>&]+/)
|
77
|
+
kind = :plain
|
78
|
+
elsif scan(/#{ENTITY}/ox)
|
79
|
+
kind = :entity
|
80
|
+
elsif scan(/[<>&]/)
|
81
|
+
kind = :error
|
82
|
+
else
|
83
|
+
raise_inspect '[BUG] else-case reached with state %p' % [state], tokens
|
84
|
+
end
|
85
|
+
|
86
|
+
when :attribute
|
87
|
+
if scan(/#{TAG_END}/)
|
88
|
+
kind = :tag
|
89
|
+
state = :initial
|
90
|
+
elsif scan(/#{ATTR_NAME}/o)
|
91
|
+
kind = :attribute_name
|
92
|
+
state = :attribute_equal
|
93
|
+
else
|
94
|
+
kind = :error
|
95
|
+
getch
|
96
|
+
end
|
97
|
+
|
98
|
+
when :attribute_equal
|
99
|
+
if scan(/=/)
|
100
|
+
kind = :operator
|
101
|
+
state = :attribute_value
|
102
|
+
elsif scan(/#{ATTR_NAME}/o)
|
103
|
+
kind = :attribute_name
|
104
|
+
elsif scan(/#{TAG_END}/o)
|
105
|
+
kind = :tag
|
106
|
+
state = :initial
|
107
|
+
elsif scan(/./)
|
108
|
+
kind = :error
|
109
|
+
state = :attribute
|
110
|
+
end
|
111
|
+
|
112
|
+
when :attribute_value
|
113
|
+
if scan(/#{ATTR_VALUE_UNQUOTED}/o)
|
114
|
+
kind = :attribute_value
|
115
|
+
state = :attribute
|
116
|
+
elsif match = scan(/["']/)
|
117
|
+
tokens << [:open, :string]
|
118
|
+
state = :attribute_value_string
|
119
|
+
plain_string_content = PLAIN_STRING_CONTENT[match]
|
120
|
+
kind = :delimiter
|
121
|
+
elsif scan(/#{TAG_END}/o)
|
122
|
+
kind = :tag
|
123
|
+
state = :initial
|
124
|
+
else
|
125
|
+
kind = :error
|
126
|
+
getch
|
127
|
+
end
|
128
|
+
|
129
|
+
when :attribute_value_string
|
130
|
+
if scan(plain_string_content)
|
131
|
+
kind = :content
|
132
|
+
elsif scan(/['"]/)
|
133
|
+
tokens << [matched, :delimiter]
|
134
|
+
tokens << [:close, :string]
|
135
|
+
state = :attribute
|
136
|
+
next
|
137
|
+
elsif scan(/#{ENTITY}/ox)
|
138
|
+
kind = :entity
|
139
|
+
elsif scan(/&/)
|
140
|
+
kind = :content
|
141
|
+
elsif scan(/[\n>]/)
|
142
|
+
tokens << [:close, :string]
|
143
|
+
kind = :error
|
144
|
+
state = :initial
|
145
|
+
end
|
146
|
+
|
147
|
+
else
|
148
|
+
raise_inspect 'Unknown state: %p' % [state], tokens
|
149
|
+
|
150
|
+
end
|
151
|
+
|
152
|
+
end
|
153
|
+
|
154
|
+
match ||= matched
|
155
|
+
if $DEBUG and not kind
|
156
|
+
raise_inspect 'Error token %p in line %d' %
|
157
|
+
[[match, kind], line], tokens, state
|
158
|
+
end
|
159
|
+
raise_inspect 'Empty token', tokens unless match
|
160
|
+
|
161
|
+
tokens << [match, kind]
|
162
|
+
end
|
163
|
+
|
164
|
+
if options[:keep_state]
|
165
|
+
@state = state
|
166
|
+
@plain_string_content = plain_string_content
|
167
|
+
end
|
168
|
+
|
169
|
+
tokens
|
170
|
+
end
|
171
|
+
|
172
|
+
end
|
173
|
+
|
174
|
+
end
|
175
|
+
end
|
@@ -0,0 +1,173 @@
|
|
1
|
+
module CodeRay
|
2
|
+
module Scanners
|
3
|
+
|
4
|
+
class Java < Scanner
|
5
|
+
|
6
|
+
include Streamable
|
7
|
+
register_for :java
|
8
|
+
helper :builtin_types
|
9
|
+
|
10
|
+
# TODO: Check this!
|
11
|
+
KEYWORDS = %w[
|
12
|
+
break case catch continue default do else
|
13
|
+
false finally for if instanceof new null
|
14
|
+
return switch throw true try typeof while
|
15
|
+
debugger export import package
|
16
|
+
]
|
17
|
+
|
18
|
+
MAGIC_VARIABLES = %w[ this super ]
|
19
|
+
TYPES = %w[
|
20
|
+
boolean byte char class interface double enum float String int long short void
|
21
|
+
] << '[]'
|
22
|
+
DIRECTIVES = %w[
|
23
|
+
abstract extends final implements native private protected public
|
24
|
+
static strictfp synchronized threadsafe throws transient volatile
|
25
|
+
]
|
26
|
+
|
27
|
+
# Reserved for future use.
|
28
|
+
|
29
|
+
IDENT_KIND = WordList.new(:ident).
|
30
|
+
add(KEYWORDS, :keyword).
|
31
|
+
add(MAGIC_VARIABLES, :local_variable).
|
32
|
+
add(TYPES, :type).
|
33
|
+
add(BuiltinTypes::List, :pre_type).
|
34
|
+
add(DIRECTIVES, :directive)
|
35
|
+
|
36
|
+
ESCAPE = / [bfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
|
37
|
+
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x
|
38
|
+
STRING_CONTENT_PATTERN = {
|
39
|
+
"'" => /[^\\']+/,
|
40
|
+
'"' => /[^\\"]+/,
|
41
|
+
'/' => /[^\\\/]+/,
|
42
|
+
}
|
43
|
+
IDENT = /[a-zA-Z_][A-Za-z_0-9]*/
|
44
|
+
|
45
|
+
def scan_tokens tokens, options
|
46
|
+
|
47
|
+
state = :initial
|
48
|
+
string_delimiter = nil
|
49
|
+
import_clause = class_name_follows = last_token_dot = false
|
50
|
+
|
51
|
+
until eos?
|
52
|
+
|
53
|
+
kind = nil
|
54
|
+
match = nil
|
55
|
+
|
56
|
+
case state
|
57
|
+
|
58
|
+
when :initial
|
59
|
+
|
60
|
+
if match = scan(/ \s+ | \\\n /x)
|
61
|
+
tokens << [match, :space]
|
62
|
+
next
|
63
|
+
|
64
|
+
elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
|
65
|
+
tokens << [match, :comment]
|
66
|
+
next
|
67
|
+
|
68
|
+
elsif import_clause && scan(/ #{IDENT} (?: \. #{IDENT} )* /ox)
|
69
|
+
kind = :include
|
70
|
+
|
71
|
+
elsif match = scan(/ #{IDENT} | \[\] /ox)
|
72
|
+
kind = IDENT_KIND[match]
|
73
|
+
if last_token_dot
|
74
|
+
kind = :ident
|
75
|
+
elsif class_name_follows
|
76
|
+
kind = :class
|
77
|
+
class_name_follows = false
|
78
|
+
else
|
79
|
+
import_clause = true if match == 'import'
|
80
|
+
class_name_follows = true if match == 'class' || match == 'interface'
|
81
|
+
end
|
82
|
+
|
83
|
+
elsif scan(/ \.(?!\d) | [,?:()\[\]}] | -- | \+\+ | && | \|\| | \*\*=? | [-+*\/%^~&|<>=!]=? | <<<?=? | >>>?=? /x)
|
84
|
+
kind = :operator
|
85
|
+
|
86
|
+
elsif scan(/;/)
|
87
|
+
import_clause = false
|
88
|
+
kind = :operator
|
89
|
+
|
90
|
+
elsif scan(/\{/)
|
91
|
+
class_name_follows = false
|
92
|
+
kind = :operator
|
93
|
+
|
94
|
+
elsif check(/[\d.]/)
|
95
|
+
if scan(/0[xX][0-9A-Fa-f]+/)
|
96
|
+
kind = :hex
|
97
|
+
elsif scan(/(?>0[0-7]+)(?![89.eEfF])/)
|
98
|
+
kind = :oct
|
99
|
+
elsif scan(/\d+[fFdD]|\d*\.\d+(?:[eE][+-]?\d+)?[fFdD]?|\d+[eE][+-]?\d+[fFdD]?/)
|
100
|
+
kind = :float
|
101
|
+
elsif scan(/\d+[lL]?/)
|
102
|
+
kind = :integer
|
103
|
+
end
|
104
|
+
|
105
|
+
elsif match = scan(/["']/)
|
106
|
+
tokens << [:open, :string]
|
107
|
+
state = :string
|
108
|
+
string_delimiter = match
|
109
|
+
kind = :delimiter
|
110
|
+
|
111
|
+
elsif scan(/ @ #{IDENT} /ox)
|
112
|
+
kind = :annotation
|
113
|
+
|
114
|
+
else
|
115
|
+
getch
|
116
|
+
kind = :error
|
117
|
+
|
118
|
+
end
|
119
|
+
|
120
|
+
when :string
|
121
|
+
if scan(STRING_CONTENT_PATTERN[string_delimiter])
|
122
|
+
kind = :content
|
123
|
+
elsif match = scan(/["'\/]/)
|
124
|
+
tokens << [match, :delimiter]
|
125
|
+
tokens << [:close, state]
|
126
|
+
string_delimiter = nil
|
127
|
+
state = :initial
|
128
|
+
next
|
129
|
+
elsif state == :string && (match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox))
|
130
|
+
if string_delimiter == "'" && !(match == "\\\\" || match == "\\'")
|
131
|
+
kind = :content
|
132
|
+
else
|
133
|
+
kind = :char
|
134
|
+
end
|
135
|
+
elsif scan(/\\./m)
|
136
|
+
kind = :content
|
137
|
+
elsif scan(/ \\ | $ /x)
|
138
|
+
tokens << [:close, :delimiter]
|
139
|
+
kind = :error
|
140
|
+
state = :initial
|
141
|
+
else
|
142
|
+
raise_inspect "else case \" reached; %p not handled." % peek(1), tokens
|
143
|
+
end
|
144
|
+
|
145
|
+
else
|
146
|
+
raise_inspect 'Unknown state', tokens
|
147
|
+
|
148
|
+
end
|
149
|
+
|
150
|
+
match ||= matched
|
151
|
+
if $DEBUG and not kind
|
152
|
+
raise_inspect 'Error token %p in line %d' %
|
153
|
+
[[match, kind], line], tokens
|
154
|
+
end
|
155
|
+
raise_inspect 'Empty token', tokens unless match
|
156
|
+
|
157
|
+
last_token_dot = match == '.'
|
158
|
+
|
159
|
+
tokens << [match, kind]
|
160
|
+
|
161
|
+
end
|
162
|
+
|
163
|
+
if state == :string
|
164
|
+
tokens << [:close, state]
|
165
|
+
end
|
166
|
+
|
167
|
+
tokens
|
168
|
+
end
|
169
|
+
|
170
|
+
end
|
171
|
+
|
172
|
+
end
|
173
|
+
end
|