coderay 0.9.8 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/{lib/README → README_INDEX.rdoc} +10 -21
- data/Rakefile +6 -6
- data/bin/coderay +193 -64
- data/lib/coderay.rb +61 -105
- data/lib/coderay/duo.rb +17 -21
- data/lib/coderay/encoder.rb +100 -112
- data/lib/coderay/encoders/_map.rb +12 -7
- data/lib/coderay/encoders/comment_filter.rb +12 -30
- data/lib/coderay/encoders/count.rb +29 -11
- data/lib/coderay/encoders/debug.rb +32 -20
- data/lib/coderay/encoders/div.rb +13 -9
- data/lib/coderay/encoders/filter.rb +34 -51
- data/lib/coderay/encoders/html.rb +155 -161
- data/lib/coderay/encoders/html/css.rb +4 -9
- data/lib/coderay/encoders/html/numbering.rb +115 -0
- data/lib/coderay/encoders/html/output.rb +22 -70
- data/lib/coderay/encoders/json.rb +59 -45
- data/lib/coderay/encoders/lines_of_code.rb +12 -57
- data/lib/coderay/encoders/null.rb +6 -14
- data/lib/coderay/encoders/page.rb +13 -9
- data/lib/coderay/encoders/span.rb +13 -9
- data/lib/coderay/encoders/statistic.rb +58 -39
- data/lib/coderay/encoders/terminal.rb +179 -0
- data/lib/coderay/encoders/text.rb +31 -17
- data/lib/coderay/encoders/token_kind_filter.rb +111 -0
- data/lib/coderay/encoders/xml.rb +19 -18
- data/lib/coderay/encoders/yaml.rb +37 -9
- data/lib/coderay/for_redcloth.rb +4 -4
- data/lib/coderay/helpers/file_type.rb +127 -246
- data/lib/coderay/helpers/gzip.rb +41 -0
- data/lib/coderay/helpers/plugin.rb +241 -306
- data/lib/coderay/helpers/word_list.rb +65 -126
- data/lib/coderay/scanner.rb +173 -156
- data/lib/coderay/scanners/_map.rb +18 -17
- data/lib/coderay/scanners/c.rb +63 -77
- data/lib/coderay/scanners/clojure.rb +217 -0
- data/lib/coderay/scanners/cpp.rb +71 -84
- data/lib/coderay/scanners/css.rb +103 -120
- data/lib/coderay/scanners/debug.rb +47 -44
- data/lib/coderay/scanners/delphi.rb +70 -76
- data/lib/coderay/scanners/diff.rb +141 -50
- data/lib/coderay/scanners/erb.rb +81 -0
- data/lib/coderay/scanners/groovy.rb +104 -113
- data/lib/coderay/scanners/haml.rb +168 -0
- data/lib/coderay/scanners/html.rb +181 -110
- data/lib/coderay/scanners/java.rb +73 -75
- data/lib/coderay/scanners/java/builtin_types.rb +2 -0
- data/lib/coderay/scanners/java_script.rb +90 -101
- data/lib/coderay/scanners/json.rb +40 -53
- data/lib/coderay/scanners/php.rb +123 -147
- data/lib/coderay/scanners/python.rb +93 -91
- data/lib/coderay/scanners/raydebug.rb +66 -0
- data/lib/coderay/scanners/ruby.rb +343 -326
- data/lib/coderay/scanners/ruby/patterns.rb +40 -106
- data/lib/coderay/scanners/ruby/string_state.rb +71 -0
- data/lib/coderay/scanners/sql.rb +80 -66
- data/lib/coderay/scanners/text.rb +26 -0
- data/lib/coderay/scanners/xml.rb +1 -1
- data/lib/coderay/scanners/yaml.rb +74 -73
- data/lib/coderay/style.rb +10 -7
- data/lib/coderay/styles/_map.rb +3 -3
- data/lib/coderay/styles/alpha.rb +143 -0
- data/lib/coderay/token_kinds.rb +90 -0
- data/lib/coderay/tokens.rb +102 -277
- data/lib/coderay/tokens_proxy.rb +55 -0
- data/lib/coderay/version.rb +3 -0
- data/test/functional/basic.rb +200 -18
- data/test/functional/examples.rb +130 -0
- data/test/functional/for_redcloth.rb +15 -8
- data/test/functional/suite.rb +9 -6
- metadata +103 -123
- data/FOLDERS +0 -53
- data/bin/coderay_stylesheet +0 -4
- data/lib/coderay/encoders/html/numerization.rb +0 -133
- data/lib/coderay/encoders/term.rb +0 -158
- data/lib/coderay/encoders/token_class_filter.rb +0 -84
- data/lib/coderay/helpers/gzip_simple.rb +0 -123
- data/lib/coderay/scanners/nitro_xhtml.rb +0 -136
- data/lib/coderay/scanners/plaintext.rb +0 -20
- data/lib/coderay/scanners/rhtml.rb +0 -78
- data/lib/coderay/scanners/scheme.rb +0 -145
- data/lib/coderay/styles/cycnus.rb +0 -152
- data/lib/coderay/styles/murphy.rb +0 -134
- data/lib/coderay/token_classes.rb +0 -86
- data/test/functional/load_plugin_scanner.rb +0 -11
- data/test/functional/vhdl.rb +0 -126
- data/test/functional/word_list.rb +0 -79
@@ -2,22 +2,42 @@ module CodeRay
|
|
2
2
|
module Scanners
|
3
3
|
|
4
4
|
# HTML Scanner
|
5
|
+
#
|
6
|
+
# Alias: +xhtml+
|
7
|
+
#
|
8
|
+
# See also: Scanners::XML
|
5
9
|
class HTML < Scanner
|
6
10
|
|
7
|
-
include Streamable
|
8
11
|
register_for :html
|
9
12
|
|
10
13
|
KINDS_NOT_LOC = [
|
11
14
|
:comment, :doctype, :preprocessor,
|
12
15
|
:tag, :attribute_name, :operator,
|
13
|
-
:attribute_value, :
|
14
|
-
:plain, :entity, :error
|
15
|
-
]
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
16
|
+
:attribute_value, :string,
|
17
|
+
:plain, :entity, :error,
|
18
|
+
] # :nodoc:
|
19
|
+
|
20
|
+
EVENT_ATTRIBUTES = %w(
|
21
|
+
onabort onafterprint onbeforeprint onbeforeunload onblur oncanplay
|
22
|
+
oncanplaythrough onchange onclick oncontextmenu oncuechange ondblclick
|
23
|
+
ondrag ondragdrop ondragend ondragenter ondragleave ondragover
|
24
|
+
ondragstart ondrop ondurationchange onemptied onended onerror onfocus
|
25
|
+
onformchange onforminput onhashchange oninput oninvalid onkeydown
|
26
|
+
onkeypress onkeyup onload onloadeddata onloadedmetadata onloadstart
|
27
|
+
onmessage onmousedown onmousemove onmouseout onmouseover onmouseup
|
28
|
+
onmousewheel onmove onoffline ononline onpagehide onpageshow onpause
|
29
|
+
onplay onplaying onpopstate onprogress onratechange onreadystatechange
|
30
|
+
onredo onreset onresize onscroll onseeked onseeking onselect onshow
|
31
|
+
onstalled onstorage onsubmit onsuspend ontimeupdate onundo onunload
|
32
|
+
onvolumechange onwaiting
|
33
|
+
)
|
34
|
+
|
35
|
+
IN_ATTRIBUTE = WordList::CaseIgnoring.new(nil).
|
36
|
+
add(EVENT_ATTRIBUTES, :script)
|
37
|
+
|
38
|
+
ATTR_NAME = /[\w.:-]+/ # :nodoc:
|
39
|
+
TAG_END = /\/?>/ # :nodoc:
|
40
|
+
HEX = /[0-9a-fA-F]/ # :nodoc:
|
21
41
|
ENTITY = /
|
22
42
|
&
|
23
43
|
(?:
|
@@ -31,152 +51,203 @@ module Scanners
|
|
31
51
|
)
|
32
52
|
)
|
33
53
|
;
|
34
|
-
/ox
|
35
|
-
|
54
|
+
/ox # :nodoc:
|
55
|
+
|
36
56
|
PLAIN_STRING_CONTENT = {
|
37
57
|
"'" => /[^&'>\n]+/,
|
38
58
|
'"' => /[^&">\n]+/,
|
39
|
-
}
|
40
|
-
|
59
|
+
} # :nodoc:
|
60
|
+
|
41
61
|
def reset
|
42
62
|
super
|
43
63
|
@state = :initial
|
64
|
+
@plain_string_content = nil
|
44
65
|
end
|
45
|
-
|
46
|
-
|
66
|
+
|
67
|
+
protected
|
68
|
+
|
47
69
|
def setup
|
48
70
|
@state = :initial
|
49
71
|
@plain_string_content = nil
|
50
72
|
end
|
51
|
-
|
52
|
-
def
|
53
|
-
|
54
|
-
|
73
|
+
|
74
|
+
def scan_java_script encoder, code
|
75
|
+
if code && !code.empty?
|
76
|
+
@java_script_scanner ||= Scanners::JavaScript.new '', :keep_tokens => true
|
77
|
+
# encoder.begin_group :inline
|
78
|
+
@java_script_scanner.tokenize code, :tokens => encoder
|
79
|
+
# encoder.end_group :inline
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
def scan_tokens encoder, options
|
84
|
+
state = options[:state] || @state
|
55
85
|
plain_string_content = @plain_string_content
|
56
|
-
|
86
|
+
in_tag = in_attribute = nil
|
87
|
+
|
88
|
+
encoder.begin_group :string if state == :attribute_value_string
|
89
|
+
|
57
90
|
until eos?
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
if scan(/\s+/m)
|
63
|
-
kind = :space
|
64
|
-
|
91
|
+
|
92
|
+
if state != :in_special_tag && match = scan(/\s+/m)
|
93
|
+
encoder.text_token match, :space
|
94
|
+
|
65
95
|
else
|
66
|
-
|
96
|
+
|
67
97
|
case state
|
68
|
-
|
98
|
+
|
69
99
|
when :initial
|
70
|
-
if scan(
|
71
|
-
|
72
|
-
elsif scan(/<!DOCTYPE
|
73
|
-
|
74
|
-
elsif scan(/<\?xml
|
75
|
-
|
76
|
-
elsif scan(
|
77
|
-
|
78
|
-
elsif scan(/<\/[-\w.:]
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
100
|
+
if match = scan(/<!--(?:.*?-->|.*)/m)
|
101
|
+
encoder.text_token match, :comment
|
102
|
+
elsif match = scan(/<!DOCTYPE(?:.*?>|.*)/m)
|
103
|
+
encoder.text_token match, :doctype
|
104
|
+
elsif match = scan(/<\?xml(?:.*?\?>|.*)/m)
|
105
|
+
encoder.text_token match, :preprocessor
|
106
|
+
elsif match = scan(/<\?(?:.*?\?>|.*)/m)
|
107
|
+
encoder.text_token match, :comment
|
108
|
+
elsif match = scan(/<\/[-\w.:]*>?/m)
|
109
|
+
in_tag = nil
|
110
|
+
encoder.text_token match, :tag
|
111
|
+
elsif match = scan(/<(?:(script)|[-\w.:]+)(>)?/m)
|
112
|
+
encoder.text_token match, :tag
|
113
|
+
in_tag = self[1]
|
114
|
+
if self[2]
|
115
|
+
state = :in_special_tag if in_tag
|
116
|
+
else
|
117
|
+
state = :attribute
|
118
|
+
end
|
119
|
+
elsif match = scan(/[^<>&]+/)
|
120
|
+
encoder.text_token match, :plain
|
121
|
+
elsif match = scan(/#{ENTITY}/ox)
|
122
|
+
encoder.text_token match, :entity
|
123
|
+
elsif match = scan(/[<>&]/)
|
124
|
+
in_tag = nil
|
125
|
+
encoder.text_token match, :error
|
89
126
|
else
|
90
|
-
raise_inspect '[BUG] else-case reached with state %p' % [state],
|
127
|
+
raise_inspect '[BUG] else-case reached with state %p' % [state], encoder
|
91
128
|
end
|
92
|
-
|
129
|
+
|
93
130
|
when :attribute
|
94
|
-
if scan(/#{TAG_END}/o)
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
131
|
+
if match = scan(/#{TAG_END}/o)
|
132
|
+
encoder.text_token match, :tag
|
133
|
+
in_attribute = nil
|
134
|
+
if in_tag
|
135
|
+
state = :in_special_tag
|
136
|
+
else
|
137
|
+
state = :initial
|
138
|
+
end
|
139
|
+
elsif match = scan(/#{ATTR_NAME}/o)
|
140
|
+
in_attribute = IN_ATTRIBUTE[match]
|
141
|
+
encoder.text_token match, :attribute_name
|
99
142
|
state = :attribute_equal
|
100
143
|
else
|
101
|
-
|
102
|
-
getch
|
144
|
+
in_tag = nil
|
145
|
+
encoder.text_token getch, :error
|
103
146
|
end
|
104
|
-
|
147
|
+
|
105
148
|
when :attribute_equal
|
106
|
-
if scan(/=/)
|
107
|
-
|
149
|
+
if match = scan(/=/) #/
|
150
|
+
encoder.text_token match, :operator
|
108
151
|
state = :attribute_value
|
109
|
-
elsif scan(/#{ATTR_NAME}/o)
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
elsif scan(/./)
|
115
|
-
kind = :error
|
152
|
+
elsif scan(/#{ATTR_NAME}/o) || scan(/#{TAG_END}/o)
|
153
|
+
state = :attribute
|
154
|
+
next
|
155
|
+
else
|
156
|
+
encoder.text_token getch, :error
|
116
157
|
state = :attribute
|
117
158
|
end
|
118
|
-
|
159
|
+
|
119
160
|
when :attribute_value
|
120
|
-
if scan(/#{
|
121
|
-
|
161
|
+
if match = scan(/#{ATTR_NAME}/o)
|
162
|
+
encoder.text_token match, :attribute_value
|
122
163
|
state = :attribute
|
123
164
|
elsif match = scan(/["']/)
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
165
|
+
if in_attribute == :script
|
166
|
+
encoder.begin_group :inline
|
167
|
+
encoder.text_token match, :inline_delimiter
|
168
|
+
if scan(/javascript:[ \t]*/)
|
169
|
+
encoder.text_token matched, :comment
|
170
|
+
end
|
171
|
+
code = scan_until(match == '"' ? /(?="|\z)/ : /(?='|\z)/)
|
172
|
+
scan_java_script encoder, code
|
173
|
+
match = scan(/["']/)
|
174
|
+
encoder.text_token match, :inline_delimiter if match
|
175
|
+
encoder.end_group :inline
|
176
|
+
state = :attribute
|
177
|
+
in_attribute = nil
|
178
|
+
else
|
179
|
+
encoder.begin_group :string
|
180
|
+
state = :attribute_value_string
|
181
|
+
plain_string_content = PLAIN_STRING_CONTENT[match]
|
182
|
+
encoder.text_token match, :delimiter
|
183
|
+
end
|
184
|
+
elsif match = scan(/#{TAG_END}/o)
|
185
|
+
encoder.text_token match, :tag
|
130
186
|
state = :initial
|
131
187
|
else
|
132
|
-
|
133
|
-
getch
|
188
|
+
encoder.text_token getch, :error
|
134
189
|
end
|
135
|
-
|
190
|
+
|
136
191
|
when :attribute_value_string
|
137
|
-
if scan(plain_string_content)
|
138
|
-
|
139
|
-
elsif scan(/['"]/)
|
140
|
-
|
141
|
-
|
192
|
+
if match = scan(plain_string_content)
|
193
|
+
encoder.text_token match, :content
|
194
|
+
elsif match = scan(/['"]/)
|
195
|
+
encoder.text_token match, :delimiter
|
196
|
+
encoder.end_group :string
|
142
197
|
state = :attribute
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
tokens << [:close, :string]
|
150
|
-
kind = :error
|
198
|
+
elsif match = scan(/#{ENTITY}/ox)
|
199
|
+
encoder.text_token match, :entity
|
200
|
+
elsif match = scan(/&/)
|
201
|
+
encoder.text_token match, :content
|
202
|
+
elsif match = scan(/[\n>]/)
|
203
|
+
encoder.end_group :string
|
151
204
|
state = :initial
|
205
|
+
encoder.text_token match, :error
|
152
206
|
end
|
153
|
-
|
207
|
+
|
208
|
+
when :in_special_tag
|
209
|
+
case in_tag
|
210
|
+
when 'script'
|
211
|
+
encoder.text_token match, :space if match = scan(/[ \t]*\n/)
|
212
|
+
if scan(/(\s*<!--)(?:(.*?)(-->)|(.*))/m)
|
213
|
+
code = self[2] || self[4]
|
214
|
+
closing = self[3]
|
215
|
+
encoder.text_token self[1], :comment
|
216
|
+
else
|
217
|
+
code = scan_until(/(?=(?:\n\s*)?<\/script>)|\z/)
|
218
|
+
closing = false
|
219
|
+
end
|
220
|
+
unless code.empty?
|
221
|
+
encoder.begin_group :inline
|
222
|
+
scan_java_script encoder, code
|
223
|
+
encoder.end_group :inline
|
224
|
+
end
|
225
|
+
encoder.text_token closing, :comment if closing
|
226
|
+
state = :initial
|
227
|
+
else
|
228
|
+
raise 'unknown special tag: %p' % [in_tag]
|
229
|
+
end
|
230
|
+
|
154
231
|
else
|
155
|
-
raise_inspect 'Unknown state: %p' % [state],
|
156
|
-
|
232
|
+
raise_inspect 'Unknown state: %p' % [state], encoder
|
233
|
+
|
157
234
|
end
|
158
|
-
|
159
|
-
end
|
160
|
-
|
161
|
-
match ||= matched
|
162
|
-
if $CODERAY_DEBUG and not kind
|
163
|
-
raise_inspect 'Error token %p in line %d' %
|
164
|
-
[[match, kind], line], tokens, state
|
235
|
+
|
165
236
|
end
|
166
|
-
|
167
|
-
|
168
|
-
tokens << [match, kind]
|
237
|
+
|
169
238
|
end
|
170
|
-
|
239
|
+
|
171
240
|
if options[:keep_state]
|
172
241
|
@state = state
|
173
242
|
@plain_string_content = plain_string_content
|
174
243
|
end
|
175
|
-
|
176
|
-
|
244
|
+
|
245
|
+
encoder.end_group :string if state == :attribute_value_string
|
246
|
+
|
247
|
+
encoder
|
177
248
|
end
|
178
|
-
|
249
|
+
|
179
250
|
end
|
180
|
-
|
251
|
+
|
181
252
|
end
|
182
253
|
end
|
@@ -1,11 +1,12 @@
|
|
1
1
|
module CodeRay
|
2
2
|
module Scanners
|
3
|
-
|
3
|
+
|
4
|
+
# Scanner for Java.
|
4
5
|
class Java < Scanner
|
5
|
-
|
6
|
-
include Streamable
|
6
|
+
|
7
7
|
register_for :java
|
8
|
-
|
8
|
+
|
9
|
+
autoload :BuiltinTypes, 'coderay/scanners/java/builtin_types'
|
9
10
|
|
10
11
|
# http://java.sun.com/docs/books/tutorial/java/nutsandbolts/_keywords.html
|
11
12
|
KEYWORDS = %w[
|
@@ -13,63 +14,64 @@ module Scanners
|
|
13
14
|
finally for if instanceof import new package
|
14
15
|
return switch throw try typeof while
|
15
16
|
debugger export
|
16
|
-
]
|
17
|
-
RESERVED = %w[ const goto ]
|
18
|
-
CONSTANTS = %w[ false null true ]
|
19
|
-
MAGIC_VARIABLES = %w[ this super ]
|
17
|
+
] # :nodoc:
|
18
|
+
RESERVED = %w[ const goto ] # :nodoc:
|
19
|
+
CONSTANTS = %w[ false null true ] # :nodoc:
|
20
|
+
MAGIC_VARIABLES = %w[ this super ] # :nodoc:
|
20
21
|
TYPES = %w[
|
21
22
|
boolean byte char class double enum float int interface long
|
22
23
|
short void
|
23
|
-
] << '[]' # because int[] should be highlighted as a type
|
24
|
+
] << '[]' # :nodoc: because int[] should be highlighted as a type
|
24
25
|
DIRECTIVES = %w[
|
25
26
|
abstract extends final implements native private protected public
|
26
27
|
static strictfp synchronized throws transient volatile
|
27
|
-
]
|
28
|
+
] # :nodoc:
|
28
29
|
|
29
30
|
IDENT_KIND = WordList.new(:ident).
|
30
31
|
add(KEYWORDS, :keyword).
|
31
32
|
add(RESERVED, :reserved).
|
32
|
-
add(CONSTANTS, :
|
33
|
+
add(CONSTANTS, :predefined_constant).
|
33
34
|
add(MAGIC_VARIABLES, :local_variable).
|
34
35
|
add(TYPES, :type).
|
35
|
-
add(BuiltinTypes::List, :
|
36
|
+
add(BuiltinTypes::List, :predefined_type).
|
36
37
|
add(BuiltinTypes::List.select { |builtin| builtin[/(Error|Exception)$/] }, :exception).
|
37
|
-
add(DIRECTIVES, :directive)
|
38
|
+
add(DIRECTIVES, :directive) # :nodoc:
|
38
39
|
|
39
|
-
ESCAPE = / [bfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
|
40
|
-
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x
|
40
|
+
ESCAPE = / [bfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc:
|
41
|
+
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x # :nodoc:
|
41
42
|
STRING_CONTENT_PATTERN = {
|
42
43
|
"'" => /[^\\']+/,
|
43
44
|
'"' => /[^\\"]+/,
|
44
45
|
'/' => /[^\\\/]+/,
|
45
|
-
}
|
46
|
-
IDENT = /[a-zA-Z_][A-Za-z_0-9]*/
|
46
|
+
} # :nodoc:
|
47
|
+
IDENT = /[a-zA-Z_][A-Za-z_0-9]*/ # :nodoc:
|
48
|
+
|
49
|
+
protected
|
47
50
|
|
48
|
-
def scan_tokens
|
51
|
+
def scan_tokens encoder, options
|
49
52
|
|
50
53
|
state = :initial
|
51
54
|
string_delimiter = nil
|
52
|
-
|
55
|
+
package_name_expected = false
|
56
|
+
class_name_follows = false
|
57
|
+
last_token_dot = false
|
53
58
|
|
54
59
|
until eos?
|
55
60
|
|
56
|
-
kind = nil
|
57
|
-
match = nil
|
58
|
-
|
59
61
|
case state
|
60
62
|
|
61
63
|
when :initial
|
62
64
|
|
63
65
|
if match = scan(/ \s+ | \\\n /x)
|
64
|
-
|
66
|
+
encoder.text_token match, :space
|
65
67
|
next
|
66
68
|
|
67
69
|
elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
|
68
|
-
|
70
|
+
encoder.text_token match, :comment
|
69
71
|
next
|
70
72
|
|
71
|
-
elsif
|
72
|
-
|
73
|
+
elsif package_name_expected && match = scan(/ #{IDENT} (?: \. #{IDENT} )* /ox)
|
74
|
+
encoder.text_token match, package_name_expected
|
73
75
|
|
74
76
|
elsif match = scan(/ #{IDENT} | \[\] /ox)
|
75
77
|
kind = IDENT_KIND[match]
|
@@ -79,95 +81,91 @@ module Scanners
|
|
79
81
|
kind = :class
|
80
82
|
class_name_follows = false
|
81
83
|
else
|
82
|
-
|
83
|
-
|
84
|
+
case match
|
85
|
+
when 'import'
|
86
|
+
package_name_expected = :include
|
87
|
+
when 'package'
|
88
|
+
package_name_expected = :namespace
|
89
|
+
when 'class', 'interface'
|
90
|
+
class_name_follows = true
|
91
|
+
end
|
84
92
|
end
|
93
|
+
encoder.text_token match, kind
|
85
94
|
|
86
|
-
elsif scan(/ \.(?!\d) | [,?:()\[\]}] | -- | \+\+ | && | \|\| | \*\*=? | [-+*\/%^~&|<>=!]=? | <<<?=? | >>>?=? /x)
|
87
|
-
|
95
|
+
elsif match = scan(/ \.(?!\d) | [,?:()\[\]}] | -- | \+\+ | && | \|\| | \*\*=? | [-+*\/%^~&|<>=!]=? | <<<?=? | >>>?=? /x)
|
96
|
+
encoder.text_token match, :operator
|
88
97
|
|
89
|
-
elsif scan(/;/)
|
90
|
-
|
91
|
-
|
98
|
+
elsif match = scan(/;/)
|
99
|
+
package_name_expected = false
|
100
|
+
encoder.text_token match, :operator
|
92
101
|
|
93
|
-
elsif scan(/\{/)
|
102
|
+
elsif match = scan(/\{/)
|
94
103
|
class_name_follows = false
|
95
|
-
|
104
|
+
encoder.text_token match, :operator
|
96
105
|
|
97
106
|
elsif check(/[\d.]/)
|
98
|
-
if scan(/0[xX][0-9A-Fa-f]+/)
|
99
|
-
|
100
|
-
elsif scan(/(?>0[0-7]+)(?![89.eEfF])/)
|
101
|
-
|
102
|
-
elsif scan(/\d+[fFdD]|\d*\.\d+(?:[eE][+-]?\d+)?[fFdD]?|\d+[eE][+-]?\d+[fFdD]?/)
|
103
|
-
|
104
|
-
elsif scan(/\d+[lL]?/)
|
105
|
-
|
107
|
+
if match = scan(/0[xX][0-9A-Fa-f]+/)
|
108
|
+
encoder.text_token match, :hex
|
109
|
+
elsif match = scan(/(?>0[0-7]+)(?![89.eEfF])/)
|
110
|
+
encoder.text_token match, :octal
|
111
|
+
elsif match = scan(/\d+[fFdD]|\d*\.\d+(?:[eE][+-]?\d+)?[fFdD]?|\d+[eE][+-]?\d+[fFdD]?/)
|
112
|
+
encoder.text_token match, :float
|
113
|
+
elsif match = scan(/\d+[lL]?/)
|
114
|
+
encoder.text_token match, :integer
|
106
115
|
end
|
107
116
|
|
108
117
|
elsif match = scan(/["']/)
|
109
|
-
tokens << [:open, :string]
|
110
118
|
state = :string
|
119
|
+
encoder.begin_group state
|
111
120
|
string_delimiter = match
|
112
|
-
|
121
|
+
encoder.text_token match, :delimiter
|
113
122
|
|
114
|
-
elsif scan(/ @ #{IDENT} /ox)
|
115
|
-
|
123
|
+
elsif match = scan(/ @ #{IDENT} /ox)
|
124
|
+
encoder.text_token match, :annotation
|
116
125
|
|
117
126
|
else
|
118
|
-
getch
|
119
|
-
kind = :error
|
127
|
+
encoder.text_token getch, :error
|
120
128
|
|
121
129
|
end
|
122
130
|
|
123
131
|
when :string
|
124
|
-
if scan(STRING_CONTENT_PATTERN[string_delimiter])
|
125
|
-
|
132
|
+
if match = scan(STRING_CONTENT_PATTERN[string_delimiter])
|
133
|
+
encoder.text_token match, :content
|
126
134
|
elsif match = scan(/["'\/]/)
|
127
|
-
|
128
|
-
|
129
|
-
string_delimiter = nil
|
135
|
+
encoder.text_token match, :delimiter
|
136
|
+
encoder.end_group state
|
130
137
|
state = :initial
|
131
|
-
|
138
|
+
string_delimiter = nil
|
132
139
|
elsif state == :string && (match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox))
|
133
140
|
if string_delimiter == "'" && !(match == "\\\\" || match == "\\'")
|
134
|
-
|
141
|
+
encoder.text_token match, :content
|
135
142
|
else
|
136
|
-
|
143
|
+
encoder.text_token match, :char
|
137
144
|
end
|
138
|
-
elsif scan(/\\./m)
|
139
|
-
|
140
|
-
elsif scan(/ \\ | $ /x)
|
141
|
-
|
142
|
-
kind = :error
|
145
|
+
elsif match = scan(/\\./m)
|
146
|
+
encoder.text_token match, :content
|
147
|
+
elsif match = scan(/ \\ | $ /x)
|
148
|
+
encoder.end_group state
|
143
149
|
state = :initial
|
150
|
+
encoder.text_token match, :error
|
144
151
|
else
|
145
|
-
raise_inspect "else case \" reached; %p not handled." % peek(1),
|
152
|
+
raise_inspect "else case \" reached; %p not handled." % peek(1), encoder
|
146
153
|
end
|
147
154
|
|
148
155
|
else
|
149
|
-
raise_inspect 'Unknown state',
|
156
|
+
raise_inspect 'Unknown state', encoder
|
150
157
|
|
151
158
|
end
|
152
|
-
|
153
|
-
match ||= matched
|
154
|
-
if $CODERAY_DEBUG and not kind
|
155
|
-
raise_inspect 'Error token %p in line %d' %
|
156
|
-
[[match, kind], line], tokens
|
157
|
-
end
|
158
|
-
raise_inspect 'Empty token', tokens unless match
|
159
159
|
|
160
160
|
last_token_dot = match == '.'
|
161
161
|
|
162
|
-
tokens << [match, kind]
|
163
|
-
|
164
162
|
end
|
165
163
|
|
166
164
|
if state == :string
|
167
|
-
|
165
|
+
encoder.end_group state
|
168
166
|
end
|
169
167
|
|
170
|
-
|
168
|
+
encoder
|
171
169
|
end
|
172
170
|
|
173
171
|
end
|