coderay 0.8.357 → 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/README +4 -3
- data/lib/coderay.rb +2 -1
- data/lib/coderay/encoder.rb +41 -15
- data/lib/coderay/encoders/_map.rb +3 -1
- data/lib/coderay/encoders/comment_filter.rb +43 -0
- data/lib/coderay/encoders/div.rb +2 -3
- data/lib/coderay/encoders/filter.rb +75 -0
- data/lib/coderay/encoders/html.rb +20 -3
- data/lib/coderay/encoders/html/css.rb +1 -1
- data/lib/coderay/encoders/html/numerization.rb +11 -2
- data/lib/coderay/encoders/html/output.rb +10 -1
- data/lib/coderay/encoders/json.rb +69 -0
- data/lib/coderay/encoders/lines_of_code.rb +90 -0
- data/lib/coderay/encoders/page.rb +1 -2
- data/lib/coderay/encoders/span.rb +2 -3
- data/lib/coderay/encoders/term.rb +137 -0
- data/lib/coderay/encoders/text.rb +4 -4
- data/lib/coderay/encoders/token_class_filter.rb +84 -0
- data/lib/coderay/encoders/xml.rb +1 -0
- data/lib/coderay/for_redcloth.rb +9 -4
- data/lib/coderay/helpers/file_type.rb +54 -15
- data/lib/coderay/helpers/plugin.rb +21 -3
- data/lib/coderay/helpers/word_list.rb +19 -4
- data/lib/coderay/scanner.rb +33 -2
- data/lib/coderay/scanners/_map.rb +10 -4
- data/lib/coderay/scanners/c.rb +61 -23
- data/lib/coderay/scanners/cpp.rb +228 -0
- data/lib/coderay/scanners/css.rb +9 -1
- data/lib/coderay/scanners/debug.rb +1 -0
- data/lib/coderay/scanners/delphi.rb +2 -2
- data/lib/coderay/scanners/diff.rb +1 -0
- data/lib/coderay/scanners/groovy.rb +263 -0
- data/lib/coderay/scanners/html.rb +9 -2
- data/lib/coderay/scanners/java.rb +18 -14
- data/lib/coderay/scanners/java_script.rb +42 -13
- data/lib/coderay/scanners/json.rb +7 -1
- data/lib/coderay/scanners/nitro_xhtml.rb +4 -0
- data/lib/coderay/scanners/php.rb +526 -0
- data/lib/coderay/scanners/plaintext.rb +4 -1
- data/lib/coderay/scanners/python.rb +285 -0
- data/lib/coderay/scanners/rhtml.rb +3 -0
- data/lib/coderay/scanners/ruby.rb +29 -11
- data/lib/coderay/scanners/ruby/patterns.rb +26 -20
- data/lib/coderay/scanners/scheme.rb +3 -0
- data/lib/coderay/scanners/sql.rb +162 -0
- data/lib/coderay/scanners/xml.rb +1 -1
- data/lib/coderay/scanners/yaml.rb +4 -1
- data/lib/coderay/styles/cycnus.rb +11 -7
- data/lib/coderay/token_classes.rb +4 -1
- data/lib/coderay/tokens.rb +50 -46
- metadata +14 -4
- data/lib/coderay/encoders/tokens.rb +0 -44
@@ -176,7 +176,7 @@ module PluginHost
|
|
176
176
|
def inspect
|
177
177
|
map = plugin_hash.dup
|
178
178
|
map.each do |id, plugin|
|
179
|
-
map[id] = plugin.to_s[/(
|
179
|
+
map[id] = plugin.to_s[/(?>\w+)$/]
|
180
180
|
end
|
181
181
|
"#{name}[#{host_id}]#{map.inspect}"
|
182
182
|
end
|
@@ -280,6 +280,16 @@ module Plugin
|
|
280
280
|
def register_for *ids
|
281
281
|
plugin_host.register self, *ids
|
282
282
|
end
|
283
|
+
|
284
|
+
# Returns the title of the plugin, or sets it to the
|
285
|
+
# optional argument +title+.
|
286
|
+
def title title = nil
|
287
|
+
if title
|
288
|
+
@title = title.to_s
|
289
|
+
else
|
290
|
+
@title ||= name[/([^:]+)$/, 1]
|
291
|
+
end
|
292
|
+
end
|
283
293
|
|
284
294
|
# The host for this Plugin class.
|
285
295
|
def plugin_host host = nil
|
@@ -301,15 +311,23 @@ module Plugin
|
|
301
311
|
#
|
302
312
|
# The above example loads the file myplugin/my_helper.rb relative to the
|
303
313
|
# file in which MyPlugin was defined.
|
314
|
+
#
|
315
|
+
# You can also load a helper from a different plugin:
|
316
|
+
#
|
317
|
+
# helper 'other_plugin/helper_name'
|
304
318
|
def helper *helpers
|
305
319
|
for helper in helpers
|
306
|
-
|
320
|
+
if helper.is_a?(String) && helper[/\//]
|
321
|
+
self::PLUGIN_HOST.require_helper $`, $'
|
322
|
+
else
|
323
|
+
self::PLUGIN_HOST.require_helper plugin_id, helper.to_s
|
324
|
+
end
|
307
325
|
end
|
308
326
|
end
|
309
327
|
|
310
328
|
# Returns the pulgin id used by the engine.
|
311
329
|
def plugin_id
|
312
|
-
name[
|
330
|
+
name[/\w+$/].downcase
|
313
331
|
end
|
314
332
|
|
315
333
|
end
|
@@ -98,15 +98,22 @@ class CaseIgnoringWordList < WordList
|
|
98
98
|
# Creates a new case-insensitive WordList with +default+ as default value.
|
99
99
|
#
|
100
100
|
# You can activate caching to store the results for every [] request.
|
101
|
+
# This speeds up subsequent lookups for the same word, but also
|
102
|
+
# uses memory.
|
101
103
|
def initialize default = false, caching = false
|
102
104
|
if caching
|
103
105
|
super(default, false) do |h, k|
|
104
106
|
h[k] = h.fetch k.downcase, default
|
105
107
|
end
|
106
108
|
else
|
107
|
-
|
108
|
-
|
109
|
-
|
109
|
+
super(default, false)
|
110
|
+
extend Uncached
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
module Uncached # :nodoc:
|
115
|
+
def [] key
|
116
|
+
super(key.downcase)
|
110
117
|
end
|
111
118
|
end
|
112
119
|
|
@@ -120,4 +127,12 @@ class CaseIgnoringWordList < WordList
|
|
120
127
|
|
121
128
|
end
|
122
129
|
|
123
|
-
end
|
130
|
+
end
|
131
|
+
|
132
|
+
__END__
|
133
|
+
# check memory consumption
|
134
|
+
END {
|
135
|
+
ObjectSpace.each_object(CodeRay::CaseIgnoringWordList) do |wl|
|
136
|
+
p wl.inject(0) { |memo, key, value| memo + key.size + 24 }
|
137
|
+
end
|
138
|
+
}
|
data/lib/coderay/scanner.rb
CHANGED
@@ -43,6 +43,7 @@ module CodeRay
|
|
43
43
|
# You can also use +map+, +any?+, +find+ and even +sort_by+,
|
44
44
|
# if you want.
|
45
45
|
class Scanner < StringScanner
|
46
|
+
|
46
47
|
extend Plugin
|
47
48
|
plugin_host Scanners
|
48
49
|
|
@@ -55,6 +56,8 @@ module CodeRay
|
|
55
56
|
#
|
56
57
|
# Define @default_options for subclasses.
|
57
58
|
DEFAULT_OPTIONS = { :stream => false }
|
59
|
+
|
60
|
+
KINDS_NOT_LOC = [:comment, :doctype]
|
58
61
|
|
59
62
|
class << self
|
60
63
|
|
@@ -65,7 +68,14 @@ module CodeRay
|
|
65
68
|
|
66
69
|
def normify code
|
67
70
|
code = code.to_s
|
68
|
-
|
71
|
+
if code.respond_to? :force_encoding
|
72
|
+
begin
|
73
|
+
code.force_encoding 'utf-8'
|
74
|
+
code[/\z/] # raises an ArgumentError when code contains a non-UTF-8 char
|
75
|
+
rescue ArgumentError
|
76
|
+
code.force_encoding 'binary'
|
77
|
+
end
|
78
|
+
end
|
69
79
|
code.to_unix
|
70
80
|
end
|
71
81
|
|
@@ -104,9 +114,10 @@ module CodeRay
|
|
104
114
|
#
|
105
115
|
# Else, a Tokens object is used.
|
106
116
|
def initialize code='', options = {}, &block
|
107
|
-
@options = self.class::DEFAULT_OPTIONS.merge options
|
108
117
|
raise "I am only the basic Scanner class. I can't scan "\
|
109
118
|
"anything. :( Use my subclasses." if self.class == Scanner
|
119
|
+
|
120
|
+
@options = self.class::DEFAULT_OPTIONS.merge options
|
110
121
|
|
111
122
|
super Scanner.normify(code)
|
112
123
|
|
@@ -121,6 +132,7 @@ module CodeRay
|
|
121
132
|
"but :stream is #{@options[:stream]}" if block_given?
|
122
133
|
@tokens ||= Tokens.new
|
123
134
|
end
|
135
|
+
@tokens.scanner = self
|
124
136
|
|
125
137
|
setup
|
126
138
|
end
|
@@ -140,6 +152,11 @@ module CodeRay
|
|
140
152
|
alias code string
|
141
153
|
alias code= string=
|
142
154
|
|
155
|
+
# Returns the Plugin ID for this scanner.
|
156
|
+
def lang
|
157
|
+
self.class.plugin_id
|
158
|
+
end
|
159
|
+
|
143
160
|
# Scans the code and returns all tokens in a Tokens object.
|
144
161
|
def tokenize new_string=nil, options = {}
|
145
162
|
options = @options.merge(options)
|
@@ -181,8 +198,21 @@ module CodeRay
|
|
181
198
|
|
182
199
|
def column pos = self.pos
|
183
200
|
return 0 if pos <= 0
|
201
|
+
string = string()
|
202
|
+
if string.respond_to?(:bytesize) && (defined?(@bin_string) || string.bytesize != string.size)
|
203
|
+
@bin_string ||= string.dup.force_encoding(:binary)
|
204
|
+
string = @bin_string
|
205
|
+
end
|
184
206
|
pos - (string.rindex(?\n, pos) || 0)
|
185
207
|
end
|
208
|
+
|
209
|
+
def marshal_dump
|
210
|
+
@options
|
211
|
+
end
|
212
|
+
|
213
|
+
def marshal_load options
|
214
|
+
@options = options
|
215
|
+
end
|
186
216
|
|
187
217
|
protected
|
188
218
|
|
@@ -207,6 +237,7 @@ module CodeRay
|
|
207
237
|
def reset_instance
|
208
238
|
@tokens.clear unless @options[:keep_tokens]
|
209
239
|
@cached_tokens = nil
|
240
|
+
@bin_string = nil if defined? @bin_string
|
210
241
|
end
|
211
242
|
|
212
243
|
# Scanner error with additional status information
|
@@ -1,14 +1,20 @@
|
|
1
1
|
module CodeRay
|
2
2
|
module Scanners
|
3
3
|
|
4
|
-
map
|
5
|
-
:
|
6
|
-
:
|
4
|
+
map \
|
5
|
+
:h => :c,
|
6
|
+
:cplusplus => :cpp,
|
7
|
+
:'c++' => :cpp,
|
8
|
+
:ecma => :java_script,
|
9
|
+
:ecmascript => :java_script,
|
10
|
+
:ecma_script => :java_script,
|
7
11
|
:irb => :ruby,
|
8
|
-
:xhtml => :nitro_xhtml,
|
9
12
|
:javascript => :java_script,
|
10
13
|
:js => :java_script,
|
11
14
|
:nitro => :nitro_xhtml,
|
15
|
+
:pascal => :delphi,
|
16
|
+
:plain => :plaintext,
|
17
|
+
:xhtml => :html,
|
12
18
|
:yml => :yaml
|
13
19
|
|
14
20
|
default :plain
|
data/lib/coderay/scanners/c.rb
CHANGED
@@ -3,42 +3,50 @@ module Scanners
|
|
3
3
|
|
4
4
|
class C < Scanner
|
5
5
|
|
6
|
-
register_for :c
|
7
|
-
|
8
6
|
include Streamable
|
7
|
+
|
8
|
+
register_for :c
|
9
|
+
file_extension 'c'
|
9
10
|
|
10
11
|
RESERVED_WORDS = [
|
11
|
-
'asm', 'break', 'case', 'continue', 'default', 'do',
|
12
|
-
'
|
13
|
-
'
|
14
|
-
'
|
15
|
-
'sizeof',
|
16
|
-
'volatile', 'const', # C89
|
17
|
-
'inline', 'restrict', # C99
|
12
|
+
'asm', 'break', 'case', 'continue', 'default', 'do',
|
13
|
+
'else', 'enum', 'for', 'goto', 'if', 'return',
|
14
|
+
'sizeof', 'struct', 'switch', 'typedef', 'union', 'while',
|
15
|
+
'restrict', # added in C99
|
18
16
|
]
|
19
17
|
|
20
18
|
PREDEFINED_TYPES = [
|
21
|
-
'int', 'long', 'short', 'char',
|
19
|
+
'int', 'long', 'short', 'char',
|
22
20
|
'signed', 'unsigned', 'float', 'double',
|
23
|
-
'bool', 'complex', # C99
|
21
|
+
'bool', 'complex', # added in C99
|
24
22
|
]
|
25
23
|
|
26
24
|
PREDEFINED_CONSTANTS = [
|
27
25
|
'EOF', 'NULL',
|
28
|
-
'true', 'false', # C99
|
26
|
+
'true', 'false', # added in C99
|
27
|
+
]
|
28
|
+
DIRECTIVES = [
|
29
|
+
'auto', 'extern', 'register', 'static', 'void',
|
30
|
+
'const', 'volatile', # added in C89
|
31
|
+
'inline', # added in C99
|
29
32
|
]
|
30
33
|
|
31
34
|
IDENT_KIND = WordList.new(:ident).
|
32
35
|
add(RESERVED_WORDS, :reserved).
|
33
36
|
add(PREDEFINED_TYPES, :pre_type).
|
37
|
+
add(DIRECTIVES, :directive).
|
34
38
|
add(PREDEFINED_CONSTANTS, :pre_constant)
|
35
39
|
|
36
|
-
ESCAPE = / [
|
40
|
+
ESCAPE = / [rbfntv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
|
37
41
|
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x
|
38
42
|
|
39
43
|
def scan_tokens tokens, options
|
40
44
|
|
41
45
|
state = :initial
|
46
|
+
label_expected = true
|
47
|
+
case_expected = false
|
48
|
+
label_expected_before_preproc_line = nil
|
49
|
+
in_preproc_line = false
|
42
50
|
|
43
51
|
until eos?
|
44
52
|
|
@@ -49,8 +57,13 @@ module Scanners
|
|
49
57
|
|
50
58
|
when :initial
|
51
59
|
|
52
|
-
if scan(/ \s+ | \\\n /x)
|
53
|
-
|
60
|
+
if match = scan(/ \s+ | \\\n /x)
|
61
|
+
if in_preproc_line && match != "\\\n" && match.index(?\n)
|
62
|
+
in_preproc_line = false
|
63
|
+
label_expected = label_expected_before_preproc_line
|
64
|
+
end
|
65
|
+
tokens << [match, :space]
|
66
|
+
next
|
54
67
|
|
55
68
|
elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
|
56
69
|
kind = :comment
|
@@ -59,16 +72,32 @@ module Scanners
|
|
59
72
|
match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos?
|
60
73
|
kind = :comment
|
61
74
|
|
62
|
-
elsif scan(/ [
|
75
|
+
elsif match = scan(/ [-+*=<>?:;,!&^|()\[\]{}~%]+ | \/=? | \.(?!\d) /x)
|
76
|
+
label_expected = match =~ /[;\{\}]/
|
77
|
+
if case_expected
|
78
|
+
label_expected = true if match == ':'
|
79
|
+
case_expected = false
|
80
|
+
end
|
63
81
|
kind = :operator
|
64
82
|
|
65
83
|
elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
|
66
84
|
kind = IDENT_KIND[match]
|
67
|
-
if kind == :ident
|
68
|
-
match << scan(/:/)
|
85
|
+
if kind == :ident && label_expected && !in_preproc_line && scan(/:(?!:)/)
|
69
86
|
kind = :label
|
87
|
+
match << matched
|
88
|
+
else
|
89
|
+
label_expected = false
|
90
|
+
if kind == :reserved
|
91
|
+
case match
|
92
|
+
when 'case', 'default'
|
93
|
+
case_expected = true
|
94
|
+
end
|
95
|
+
end
|
70
96
|
end
|
71
97
|
|
98
|
+
elsif scan(/\$/)
|
99
|
+
kind = :ident
|
100
|
+
|
72
101
|
elsif match = scan(/L?"/)
|
73
102
|
tokens << [:open, :string]
|
74
103
|
if match[0] == ?L
|
@@ -78,23 +107,30 @@ module Scanners
|
|
78
107
|
state = :string
|
79
108
|
kind = :delimiter
|
80
109
|
|
81
|
-
elsif scan(
|
82
|
-
kind = :preprocessor
|
110
|
+
elsif scan(/#[ \t]*(\w*)/)
|
111
|
+
kind = :preprocessor
|
112
|
+
in_preproc_line = true
|
113
|
+
label_expected_before_preproc_line = label_expected
|
83
114
|
state = :include_expected if self[1] == 'include'
|
84
115
|
|
85
116
|
elsif scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox)
|
117
|
+
label_expected = false
|
86
118
|
kind = :char
|
87
119
|
|
88
120
|
elsif scan(/0[xX][0-9A-Fa-f]+/)
|
121
|
+
label_expected = false
|
89
122
|
kind = :hex
|
90
123
|
|
91
124
|
elsif scan(/(?:0[0-7]+)(?![89.eEfF])/)
|
125
|
+
label_expected = false
|
92
126
|
kind = :oct
|
93
127
|
|
94
|
-
elsif scan(/(?:\d+)(?![.eEfF])
|
128
|
+
elsif scan(/(?:\d+)(?![.eEfF])L?L?/)
|
129
|
+
label_expected = false
|
95
130
|
kind = :integer
|
96
131
|
|
97
132
|
elsif scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
|
133
|
+
label_expected = false
|
98
134
|
kind = :float
|
99
135
|
|
100
136
|
else
|
@@ -110,6 +146,7 @@ module Scanners
|
|
110
146
|
tokens << ['"', :delimiter]
|
111
147
|
tokens << [:close, :string]
|
112
148
|
state = :initial
|
149
|
+
label_expected = false
|
113
150
|
next
|
114
151
|
elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
|
115
152
|
kind = :char
|
@@ -117,6 +154,7 @@ module Scanners
|
|
117
154
|
tokens << [:close, :string]
|
118
155
|
kind = :error
|
119
156
|
state = :initial
|
157
|
+
label_expected = false
|
120
158
|
else
|
121
159
|
raise_inspect "else case \" reached; %p not handled." % peek(1), tokens
|
122
160
|
end
|
@@ -131,8 +169,8 @@ module Scanners
|
|
131
169
|
state = :initial if match.index ?\n
|
132
170
|
|
133
171
|
else
|
134
|
-
|
135
|
-
|
172
|
+
state = :initial
|
173
|
+
next
|
136
174
|
|
137
175
|
end
|
138
176
|
|
@@ -0,0 +1,228 @@
|
|
1
|
+
module CodeRay
|
2
|
+
module Scanners
|
3
|
+
|
4
|
+
class CPlusPlus < Scanner
|
5
|
+
|
6
|
+
include Streamable
|
7
|
+
|
8
|
+
register_for :cpp
|
9
|
+
file_extension 'cpp'
|
10
|
+
title 'C++'
|
11
|
+
|
12
|
+
# http://www.cppreference.com/wiki/keywords/start
|
13
|
+
RESERVED_WORDS = [
|
14
|
+
'and', 'and_eq', 'asm', 'bitand', 'bitor', 'break',
|
15
|
+
'case', 'catch', 'class', 'compl', 'const_cast',
|
16
|
+
'continue', 'default', 'delete', 'do', 'dynamic_cast', 'else',
|
17
|
+
'enum', 'export', 'for', 'goto', 'if', 'namespace', 'new',
|
18
|
+
'not', 'not_eq', 'or', 'or_eq', 'reinterpret_cast', 'return',
|
19
|
+
'sizeof', 'static_cast', 'struct', 'switch', 'template',
|
20
|
+
'throw', 'try', 'typedef', 'typeid', 'typename', 'union',
|
21
|
+
'while', 'xor', 'xor_eq'
|
22
|
+
]
|
23
|
+
|
24
|
+
PREDEFINED_TYPES = [
|
25
|
+
'bool', 'char', 'double', 'float', 'int', 'long',
|
26
|
+
'short', 'signed', 'unsigned', 'wchar_t', 'string'
|
27
|
+
]
|
28
|
+
PREDEFINED_CONSTANTS = [
|
29
|
+
'false', 'true',
|
30
|
+
'EOF', 'NULL',
|
31
|
+
]
|
32
|
+
PREDEFINED_VARIABLES = [
|
33
|
+
'this'
|
34
|
+
]
|
35
|
+
DIRECTIVES = [
|
36
|
+
'auto', 'const', 'explicit', 'extern', 'friend', 'inline', 'mutable', 'operator',
|
37
|
+
'private', 'protected', 'public', 'register', 'static', 'using', 'virtual', 'void',
|
38
|
+
'volatile'
|
39
|
+
]
|
40
|
+
|
41
|
+
IDENT_KIND = WordList.new(:ident).
|
42
|
+
add(RESERVED_WORDS, :reserved).
|
43
|
+
add(PREDEFINED_TYPES, :pre_type).
|
44
|
+
add(PREDEFINED_VARIABLES, :local_variable).
|
45
|
+
add(DIRECTIVES, :directive).
|
46
|
+
add(PREDEFINED_CONSTANTS, :pre_constant)
|
47
|
+
|
48
|
+
ESCAPE = / [rbfntv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
|
49
|
+
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x
|
50
|
+
|
51
|
+
def scan_tokens tokens, options
|
52
|
+
|
53
|
+
state = :initial
|
54
|
+
label_expected = true
|
55
|
+
case_expected = false
|
56
|
+
label_expected_before_preproc_line = nil
|
57
|
+
in_preproc_line = false
|
58
|
+
|
59
|
+
until eos?
|
60
|
+
|
61
|
+
kind = nil
|
62
|
+
match = nil
|
63
|
+
|
64
|
+
case state
|
65
|
+
|
66
|
+
when :initial
|
67
|
+
|
68
|
+
if match = scan(/ \s+ | \\\n /x)
|
69
|
+
if in_preproc_line && match != "\\\n" && match.index(?\n)
|
70
|
+
in_preproc_line = false
|
71
|
+
label_expected = label_expected_before_preproc_line
|
72
|
+
end
|
73
|
+
tokens << [match, :space]
|
74
|
+
next
|
75
|
+
|
76
|
+
elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
|
77
|
+
kind = :comment
|
78
|
+
|
79
|
+
elsif match = scan(/ \# \s* if \s* 0 /x)
|
80
|
+
match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos?
|
81
|
+
kind = :comment
|
82
|
+
|
83
|
+
elsif match = scan(/ [-+*=<>?:;,!&^|()\[\]{}~%]+ | \/=? | \.(?!\d) /x)
|
84
|
+
label_expected = match =~ /[;\{\}]/
|
85
|
+
if case_expected
|
86
|
+
label_expected = true if match == ':'
|
87
|
+
case_expected = false
|
88
|
+
end
|
89
|
+
kind = :operator
|
90
|
+
|
91
|
+
elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
|
92
|
+
kind = IDENT_KIND[match]
|
93
|
+
if kind == :ident && label_expected && !in_preproc_line && scan(/:(?!:)/)
|
94
|
+
kind = :label
|
95
|
+
match << matched
|
96
|
+
else
|
97
|
+
label_expected = false
|
98
|
+
if kind == :reserved
|
99
|
+
case match
|
100
|
+
when 'class'
|
101
|
+
state = :class_name_expected
|
102
|
+
when 'case', 'default'
|
103
|
+
case_expected = true
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
elsif scan(/\$/)
|
109
|
+
kind = :ident
|
110
|
+
|
111
|
+
elsif match = scan(/L?"/)
|
112
|
+
tokens << [:open, :string]
|
113
|
+
if match[0] == ?L
|
114
|
+
tokens << ['L', :modifier]
|
115
|
+
match = '"'
|
116
|
+
end
|
117
|
+
state = :string
|
118
|
+
kind = :delimiter
|
119
|
+
|
120
|
+
elsif scan(/#[ \t]*(\w*)/)
|
121
|
+
kind = :preprocessor
|
122
|
+
in_preproc_line = true
|
123
|
+
label_expected_before_preproc_line = label_expected
|
124
|
+
state = :include_expected if self[1] == 'include'
|
125
|
+
|
126
|
+
elsif scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox)
|
127
|
+
label_expected = false
|
128
|
+
kind = :char
|
129
|
+
|
130
|
+
elsif scan(/0[xX][0-9A-Fa-f]+/)
|
131
|
+
label_expected = false
|
132
|
+
kind = :hex
|
133
|
+
|
134
|
+
elsif scan(/(?:0[0-7]+)(?![89.eEfF])/)
|
135
|
+
label_expected = false
|
136
|
+
kind = :oct
|
137
|
+
|
138
|
+
elsif scan(/(?:\d+)(?![.eEfF])L?L?/)
|
139
|
+
label_expected = false
|
140
|
+
kind = :integer
|
141
|
+
|
142
|
+
elsif scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
|
143
|
+
label_expected = false
|
144
|
+
kind = :float
|
145
|
+
|
146
|
+
else
|
147
|
+
getch
|
148
|
+
kind = :error
|
149
|
+
|
150
|
+
end
|
151
|
+
|
152
|
+
when :string
|
153
|
+
if scan(/[^\\"]+/)
|
154
|
+
kind = :content
|
155
|
+
elsif scan(/"/)
|
156
|
+
tokens << ['"', :delimiter]
|
157
|
+
tokens << [:close, :string]
|
158
|
+
state = :initial
|
159
|
+
label_expected = false
|
160
|
+
next
|
161
|
+
elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
|
162
|
+
kind = :char
|
163
|
+
elsif scan(/ \\ | $ /x)
|
164
|
+
tokens << [:close, :string]
|
165
|
+
kind = :error
|
166
|
+
state = :initial
|
167
|
+
label_expected = false
|
168
|
+
else
|
169
|
+
raise_inspect "else case \" reached; %p not handled." % peek(1), tokens
|
170
|
+
end
|
171
|
+
|
172
|
+
when :include_expected
|
173
|
+
if scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/)
|
174
|
+
kind = :include
|
175
|
+
state = :initial
|
176
|
+
|
177
|
+
elsif match = scan(/\s+/)
|
178
|
+
kind = :space
|
179
|
+
state = :initial if match.index ?\n
|
180
|
+
|
181
|
+
else
|
182
|
+
state = :initial
|
183
|
+
next
|
184
|
+
|
185
|
+
end
|
186
|
+
|
187
|
+
when :class_name_expected
|
188
|
+
if scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
|
189
|
+
kind = :class
|
190
|
+
state = :initial
|
191
|
+
|
192
|
+
elsif match = scan(/\s+/)
|
193
|
+
kind = :space
|
194
|
+
|
195
|
+
else
|
196
|
+
getch
|
197
|
+
kind = :error
|
198
|
+
state = :initial
|
199
|
+
|
200
|
+
end
|
201
|
+
|
202
|
+
else
|
203
|
+
raise_inspect 'Unknown state', tokens
|
204
|
+
|
205
|
+
end
|
206
|
+
|
207
|
+
match ||= matched
|
208
|
+
if $DEBUG and not kind
|
209
|
+
raise_inspect 'Error token %p in line %d' %
|
210
|
+
[[match, kind], line], tokens
|
211
|
+
end
|
212
|
+
raise_inspect 'Empty token', tokens unless match
|
213
|
+
|
214
|
+
tokens << [match, kind]
|
215
|
+
|
216
|
+
end
|
217
|
+
|
218
|
+
if state == :string
|
219
|
+
tokens << [:close, :string]
|
220
|
+
end
|
221
|
+
|
222
|
+
tokens
|
223
|
+
end
|
224
|
+
|
225
|
+
end
|
226
|
+
|
227
|
+
end
|
228
|
+
end
|