raldred-coderay 0.9.0 → 0.9.339
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/README +128 -0
- data/lib/coderay.rb +319 -0
- data/lib/coderay/duo.rb +85 -0
- data/lib/coderay/encoder.rb +187 -0
- data/lib/coderay/encoders/_map.rb +9 -0
- data/lib/coderay/encoders/count.rb +21 -0
- data/lib/coderay/encoders/debug.rb +49 -0
- data/lib/coderay/encoders/div.rb +20 -0
- data/lib/coderay/encoders/html.rb +306 -0
- data/lib/coderay/encoders/html/css.rb +70 -0
- data/lib/coderay/encoders/html/numerization.rb +133 -0
- data/lib/coderay/encoders/html/output.rb +206 -0
- data/lib/coderay/encoders/json.rb +19 -0
- data/lib/coderay/encoders/null.rb +26 -0
- data/lib/coderay/encoders/page.rb +21 -0
- data/lib/coderay/encoders/span.rb +20 -0
- data/lib/coderay/encoders/statistic.rb +77 -0
- data/lib/coderay/encoders/term.rb +114 -0
- data/lib/coderay/encoders/text.rb +32 -0
- data/lib/coderay/encoders/tokens.rb +44 -0
- data/lib/coderay/encoders/xml.rb +71 -0
- data/lib/coderay/encoders/yaml.rb +22 -0
- data/lib/coderay/for_redcloth.rb +73 -0
- data/lib/coderay/helpers/file_type.rb +226 -0
- data/lib/coderay/helpers/gzip_simple.rb +123 -0
- data/lib/coderay/helpers/plugin.rb +339 -0
- data/lib/coderay/helpers/word_list.rb +124 -0
- data/lib/coderay/scanner.rb +271 -0
- data/lib/coderay/scanners/_map.rb +21 -0
- data/lib/coderay/scanners/c.rb +166 -0
- data/lib/coderay/scanners/css.rb +202 -0
- data/lib/coderay/scanners/debug.rb +61 -0
- data/lib/coderay/scanners/delphi.rb +150 -0
- data/lib/coderay/scanners/diff.rb +104 -0
- data/lib/coderay/scanners/groovy.rb +271 -0
- data/lib/coderay/scanners/html.rb +175 -0
- data/lib/coderay/scanners/java.rb +173 -0
- data/lib/coderay/scanners/java/builtin_types.rb +419 -0
- data/lib/coderay/scanners/java_script.rb +195 -0
- data/lib/coderay/scanners/json.rb +107 -0
- data/lib/coderay/scanners/nitro_xhtml.rb +132 -0
- data/lib/coderay/scanners/php.rb +404 -0
- data/lib/coderay/scanners/plaintext.rb +18 -0
- data/lib/coderay/scanners/python.rb +232 -0
- data/lib/coderay/scanners/rhtml.rb +71 -0
- data/lib/coderay/scanners/ruby.rb +386 -0
- data/lib/coderay/scanners/ruby/patterns.rb +232 -0
- data/lib/coderay/scanners/scheme.rb +142 -0
- data/lib/coderay/scanners/sql.rb +162 -0
- data/lib/coderay/scanners/xml.rb +17 -0
- data/lib/coderay/scanners/yaml.rb +142 -0
- data/lib/coderay/style.rb +20 -0
- data/lib/coderay/styles/_map.rb +7 -0
- data/lib/coderay/styles/cycnus.rb +151 -0
- data/lib/coderay/styles/murphy.rb +132 -0
- data/lib/coderay/token_classes.rb +86 -0
- data/lib/coderay/tokens.rb +387 -0
- metadata +59 -1
@@ -0,0 +1,124 @@
|
|
1
|
+
module CodeRay
|
2
|
+
|
3
|
+
# = WordList
|
4
|
+
#
|
5
|
+
# <b>A Hash subclass designed for mapping word lists to token types.</b>
|
6
|
+
#
|
7
|
+
# Copyright (c) 2006 by murphy (Kornelius Kalnbach) <murphy rubychan de>
|
8
|
+
#
|
9
|
+
# License:: LGPL / ask the author
|
10
|
+
# Version:: 1.1 (2006-Oct-19)
|
11
|
+
#
|
12
|
+
# A WordList is a Hash with some additional features.
|
13
|
+
# It is intended to be used for keyword recognition.
|
14
|
+
#
|
15
|
+
# WordList is highly optimized to be used in Scanners,
|
16
|
+
# typically to decide whether a given ident is a special token.
|
17
|
+
#
|
18
|
+
# For case insensitive words use CaseIgnoringWordList.
|
19
|
+
#
|
20
|
+
# Example:
|
21
|
+
#
|
22
|
+
# # define word arrays
|
23
|
+
# RESERVED_WORDS = %w[
|
24
|
+
# asm break case continue default do else
|
25
|
+
# ...
|
26
|
+
# ]
|
27
|
+
#
|
28
|
+
# PREDEFINED_TYPES = %w[
|
29
|
+
# int long short char void
|
30
|
+
# ...
|
31
|
+
# ]
|
32
|
+
#
|
33
|
+
# PREDEFINED_CONSTANTS = %w[
|
34
|
+
# EOF NULL ...
|
35
|
+
# ]
|
36
|
+
#
|
37
|
+
# # make a WordList
|
38
|
+
# IDENT_KIND = WordList.new(:ident).
|
39
|
+
# add(RESERVED_WORDS, :reserved).
|
40
|
+
# add(PREDEFINED_TYPES, :pre_type).
|
41
|
+
# add(PREDEFINED_CONSTANTS, :pre_constant)
|
42
|
+
#
|
43
|
+
# ...
|
44
|
+
#
|
45
|
+
# def scan_tokens tokens, options
|
46
|
+
# ...
|
47
|
+
#
|
48
|
+
# elsif scan(/[A-Za-z_][A-Za-z_0-9]*/)
|
49
|
+
# # use it
|
50
|
+
# kind = IDENT_KIND[match]
|
51
|
+
# ...
|
52
|
+
class WordList < Hash
|
53
|
+
|
54
|
+
# Creates a new WordList with +default+ as default value.
|
55
|
+
#
|
56
|
+
# You can activate +caching+ to store the results for every [] request.
|
57
|
+
#
|
58
|
+
# With caching, methods like +include?+ or +delete+ may no longer behave
|
59
|
+
# as you expect. Therefore, it is recommended to use the [] method only.
|
60
|
+
def initialize default = false, caching = false, &block
|
61
|
+
if block
|
62
|
+
raise ArgumentError, 'Can\'t combine block with caching.' if caching
|
63
|
+
super(&block)
|
64
|
+
else
|
65
|
+
if caching
|
66
|
+
super() do |h, k|
|
67
|
+
h[k] = h.fetch k, default
|
68
|
+
end
|
69
|
+
else
|
70
|
+
super default
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
# Add words to the list and associate them with +kind+.
|
76
|
+
#
|
77
|
+
# Returns +self+, so you can concat add calls.
|
78
|
+
def add words, kind = true
|
79
|
+
words.each do |word|
|
80
|
+
self[word] = kind
|
81
|
+
end
|
82
|
+
self
|
83
|
+
end
|
84
|
+
|
85
|
+
end
|
86
|
+
|
87
|
+
|
88
|
+
# A CaseIgnoringWordList is like a WordList, only that
|
89
|
+
# keys are compared case-insensitively.
|
90
|
+
#
|
91
|
+
# Ignoring the text case is realized by sending the +downcase+ message to
|
92
|
+
# all keys.
|
93
|
+
#
|
94
|
+
# Caching usually makes a CaseIgnoringWordList faster, but it has to be
|
95
|
+
# activated explicitely.
|
96
|
+
class CaseIgnoringWordList < WordList
|
97
|
+
|
98
|
+
# Creates a new case-insensitive WordList with +default+ as default value.
|
99
|
+
#
|
100
|
+
# You can activate caching to store the results for every [] request.
|
101
|
+
def initialize default = false, caching = false
|
102
|
+
if caching
|
103
|
+
super(default, false) do |h, k|
|
104
|
+
h[k] = h.fetch k.downcase, default
|
105
|
+
end
|
106
|
+
else
|
107
|
+
super(default, false)
|
108
|
+
def self.[] key # :nodoc:
|
109
|
+
super(key.downcase)
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
# Add +words+ to the list and associate them with +kind+.
|
115
|
+
def add words, kind = true
|
116
|
+
words.each do |word|
|
117
|
+
self[word.downcase] = kind
|
118
|
+
end
|
119
|
+
self
|
120
|
+
end
|
121
|
+
|
122
|
+
end
|
123
|
+
|
124
|
+
end
|
@@ -0,0 +1,271 @@
|
|
1
|
+
module CodeRay
|
2
|
+
|
3
|
+
require 'coderay/helpers/plugin'
|
4
|
+
|
5
|
+
# = Scanners
|
6
|
+
#
|
7
|
+
# This module holds the Scanner class and its subclasses.
|
8
|
+
# For example, the Ruby scanner is named CodeRay::Scanners::Ruby
|
9
|
+
# can be found in coderay/scanners/ruby.
|
10
|
+
#
|
11
|
+
# Scanner also provides methods and constants for the register
|
12
|
+
# mechanism and the [] method that returns the Scanner class
|
13
|
+
# belonging to the given lang.
|
14
|
+
#
|
15
|
+
# See PluginHost.
|
16
|
+
module Scanners
|
17
|
+
extend PluginHost
|
18
|
+
plugin_path File.dirname(__FILE__), 'scanners'
|
19
|
+
|
20
|
+
require 'strscan'
|
21
|
+
|
22
|
+
# = Scanner
|
23
|
+
#
|
24
|
+
# The base class for all Scanners.
|
25
|
+
#
|
26
|
+
# It is a subclass of Ruby's great +StringScanner+, which
|
27
|
+
# makes it easy to access the scanning methods inside.
|
28
|
+
#
|
29
|
+
# It is also +Enumerable+, so you can use it like an Array of
|
30
|
+
# Tokens:
|
31
|
+
#
|
32
|
+
# require 'coderay'
|
33
|
+
#
|
34
|
+
# c_scanner = CodeRay::Scanners[:c].new "if (*p == '{') nest++;"
|
35
|
+
#
|
36
|
+
# for text, kind in c_scanner
|
37
|
+
# puts text if kind == :operator
|
38
|
+
# end
|
39
|
+
#
|
40
|
+
# # prints: (*==)++;
|
41
|
+
#
|
42
|
+
# OK, this is a very simple example :)
|
43
|
+
# You can also use +map+, +any?+, +find+ and even +sort_by+,
|
44
|
+
# if you want.
|
45
|
+
class Scanner < StringScanner
|
46
|
+
extend Plugin
|
47
|
+
plugin_host Scanners
|
48
|
+
|
49
|
+
# Raised if a Scanner fails while scanning
|
50
|
+
ScanError = Class.new(Exception)
|
51
|
+
|
52
|
+
require 'coderay/helpers/word_list'
|
53
|
+
|
54
|
+
# The default options for all scanner classes.
|
55
|
+
#
|
56
|
+
# Define @default_options for subclasses.
|
57
|
+
DEFAULT_OPTIONS = { :stream => false }
|
58
|
+
|
59
|
+
class << self
|
60
|
+
|
61
|
+
# Returns if the Scanner can be used in streaming mode.
|
62
|
+
def streamable?
|
63
|
+
is_a? Streamable
|
64
|
+
end
|
65
|
+
|
66
|
+
def normify code
|
67
|
+
code = code.to_s
|
68
|
+
if code.respond_to? :force_encoding
|
69
|
+
begin
|
70
|
+
code.force_encoding 'utf-8'
|
71
|
+
code[/\z/] # raises an ArgumentError when code contains a non-UTF-8 char
|
72
|
+
rescue ArgumentError
|
73
|
+
code.force_encoding 'binary'
|
74
|
+
end
|
75
|
+
end
|
76
|
+
code.to_unix
|
77
|
+
end
|
78
|
+
|
79
|
+
def file_extension extension = nil
|
80
|
+
if extension
|
81
|
+
@file_extension = extension.to_s
|
82
|
+
else
|
83
|
+
@file_extension ||= plugin_id.to_s
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
end
|
88
|
+
|
89
|
+
=begin
|
90
|
+
## Excluded for speed reasons; protected seems to make methods slow.
|
91
|
+
|
92
|
+
# Save the StringScanner methods from being called.
|
93
|
+
# This would not be useful for highlighting.
|
94
|
+
strscan_public_methods =
|
95
|
+
StringScanner.instance_methods -
|
96
|
+
StringScanner.ancestors[1].instance_methods
|
97
|
+
protected(*strscan_public_methods)
|
98
|
+
=end
|
99
|
+
|
100
|
+
# Create a new Scanner.
|
101
|
+
#
|
102
|
+
# * +code+ is the input String and is handled by the superclass
|
103
|
+
# StringScanner.
|
104
|
+
# * +options+ is a Hash with Symbols as keys.
|
105
|
+
# It is merged with the default options of the class (you can
|
106
|
+
# overwrite default options here.)
|
107
|
+
# * +block+ is the callback for streamed highlighting.
|
108
|
+
#
|
109
|
+
# If you set :stream to +true+ in the options, the Scanner uses a
|
110
|
+
# TokenStream with the +block+ as callback to handle the tokens.
|
111
|
+
#
|
112
|
+
# Else, a Tokens object is used.
|
113
|
+
def initialize code='', options = {}, &block
|
114
|
+
@options = self.class::DEFAULT_OPTIONS.merge options
|
115
|
+
raise "I am only the basic Scanner class. I can't scan "\
|
116
|
+
"anything. :( Use my subclasses." if self.class == Scanner
|
117
|
+
|
118
|
+
super Scanner.normify(code)
|
119
|
+
|
120
|
+
@tokens = options[:tokens]
|
121
|
+
if @options[:stream]
|
122
|
+
warn "warning in CodeRay::Scanner.new: :stream is set, "\
|
123
|
+
"but no block was given" unless block_given?
|
124
|
+
raise NotStreamableError, self unless kind_of? Streamable
|
125
|
+
@tokens ||= TokenStream.new(&block)
|
126
|
+
else
|
127
|
+
warn "warning in CodeRay::Scanner.new: Block given, "\
|
128
|
+
"but :stream is #{@options[:stream]}" if block_given?
|
129
|
+
@tokens ||= Tokens.new
|
130
|
+
end
|
131
|
+
|
132
|
+
setup
|
133
|
+
end
|
134
|
+
|
135
|
+
def reset
|
136
|
+
super
|
137
|
+
reset_instance
|
138
|
+
end
|
139
|
+
|
140
|
+
def string= code
|
141
|
+
code = Scanner.normify(code)
|
142
|
+
super code
|
143
|
+
reset_instance
|
144
|
+
end
|
145
|
+
|
146
|
+
# More mnemonic accessor name for the input string.
|
147
|
+
alias code string
|
148
|
+
alias code= string=
|
149
|
+
|
150
|
+
# Scans the code and returns all tokens in a Tokens object.
|
151
|
+
def tokenize new_string=nil, options = {}
|
152
|
+
options = @options.merge(options)
|
153
|
+
self.string = new_string if new_string
|
154
|
+
@cached_tokens =
|
155
|
+
if @options[:stream] # :stream must have been set already
|
156
|
+
reset unless new_string
|
157
|
+
scan_tokens @tokens, options
|
158
|
+
@tokens
|
159
|
+
else
|
160
|
+
scan_tokens @tokens, options
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
def tokens
|
165
|
+
@cached_tokens ||= tokenize
|
166
|
+
end
|
167
|
+
|
168
|
+
# Whether the scanner is in streaming mode.
|
169
|
+
def streaming?
|
170
|
+
!!@options[:stream]
|
171
|
+
end
|
172
|
+
|
173
|
+
# Traverses the tokens.
|
174
|
+
def each &block
|
175
|
+
raise ArgumentError,
|
176
|
+
'Cannot traverse TokenStream.' if @options[:stream]
|
177
|
+
tokens.each(&block)
|
178
|
+
end
|
179
|
+
include Enumerable
|
180
|
+
|
181
|
+
# The current line position of the scanner.
|
182
|
+
#
|
183
|
+
# Beware, this is implemented inefficiently. It should be used
|
184
|
+
# for debugging only.
|
185
|
+
def line
|
186
|
+
string[0..pos].count("\n") + 1
|
187
|
+
end
|
188
|
+
|
189
|
+
def column pos = self.pos
|
190
|
+
return 0 if pos <= 0
|
191
|
+
string = string()
|
192
|
+
if string.respond_to?(:bytesize) && (defined?(@bin_string) || string.bytesize != string.size)
|
193
|
+
@bin_string ||= string.dup.force_encoding(:binary)
|
194
|
+
string = @bin_string
|
195
|
+
end
|
196
|
+
pos - (string.rindex(?\n, pos) || 0)
|
197
|
+
end
|
198
|
+
|
199
|
+
protected
|
200
|
+
|
201
|
+
# Can be implemented by subclasses to do some initialization
|
202
|
+
# that has to be done once per instance.
|
203
|
+
#
|
204
|
+
# Use reset for initialization that has to be done once per
|
205
|
+
# scan.
|
206
|
+
def setup
|
207
|
+
end
|
208
|
+
|
209
|
+
# This is the central method, and commonly the only one a
|
210
|
+
# subclass implements.
|
211
|
+
#
|
212
|
+
# Subclasses must implement this method; it must return +tokens+
|
213
|
+
# and must only use Tokens#<< for storing scanned tokens!
|
214
|
+
def scan_tokens tokens, options
|
215
|
+
raise NotImplementedError,
|
216
|
+
"#{self.class}#scan_tokens not implemented."
|
217
|
+
end
|
218
|
+
|
219
|
+
def reset_instance
|
220
|
+
@tokens.clear unless @options[:keep_tokens]
|
221
|
+
@cached_tokens = nil
|
222
|
+
@bin_string = nil if defined? @bin_string
|
223
|
+
end
|
224
|
+
|
225
|
+
# Scanner error with additional status information
|
226
|
+
def raise_inspect msg, tokens, state = 'No state given!', ambit = 30
|
227
|
+
raise ScanError, <<-EOE % [
|
228
|
+
|
229
|
+
|
230
|
+
***ERROR in %s: %s (after %d tokens)
|
231
|
+
|
232
|
+
tokens:
|
233
|
+
%s
|
234
|
+
|
235
|
+
current line: %d column: %d pos: %d
|
236
|
+
matched: %p state: %p
|
237
|
+
bol? = %p, eos? = %p
|
238
|
+
|
239
|
+
surrounding code:
|
240
|
+
%p ~~ %p
|
241
|
+
|
242
|
+
|
243
|
+
***ERROR***
|
244
|
+
|
245
|
+
EOE
|
246
|
+
File.basename(caller[0]),
|
247
|
+
msg,
|
248
|
+
tokens.size,
|
249
|
+
tokens.last(10).map { |t| t.inspect }.join("\n"),
|
250
|
+
line, column, pos,
|
251
|
+
matched, state, bol?, eos?,
|
252
|
+
string[pos - ambit, ambit],
|
253
|
+
string[pos, ambit],
|
254
|
+
]
|
255
|
+
end
|
256
|
+
|
257
|
+
end
|
258
|
+
|
259
|
+
end
|
260
|
+
end
|
261
|
+
|
262
|
+
class String
|
263
|
+
# I love this hack. It seems to silence all dos/unix/mac newline problems.
|
264
|
+
def to_unix
|
265
|
+
if index ?\r
|
266
|
+
gsub(/\r\n?/, "\n")
|
267
|
+
else
|
268
|
+
self
|
269
|
+
end
|
270
|
+
end
|
271
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module CodeRay
|
2
|
+
module Scanners
|
3
|
+
|
4
|
+
map \
|
5
|
+
:cpp => :c,
|
6
|
+
:ecma => :java_script,
|
7
|
+
:ecmascript => :java_script,
|
8
|
+
:ecma_script => :java_script,
|
9
|
+
:irb => :ruby,
|
10
|
+
:javascript => :java_script,
|
11
|
+
:js => :java_script,
|
12
|
+
:nitro => :nitro_xhtml,
|
13
|
+
:pascal => :delphi,
|
14
|
+
:plain => :plaintext,
|
15
|
+
:xhtml => :html,
|
16
|
+
:yml => :yaml
|
17
|
+
|
18
|
+
default :plain
|
19
|
+
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,166 @@
|
|
1
|
+
module CodeRay
|
2
|
+
module Scanners
|
3
|
+
|
4
|
+
class C < Scanner
|
5
|
+
|
6
|
+
include Streamable
|
7
|
+
|
8
|
+
register_for :c
|
9
|
+
file_extension 'c'
|
10
|
+
|
11
|
+
RESERVED_WORDS = [
|
12
|
+
'asm', 'break', 'case', 'continue', 'default', 'do', 'else',
|
13
|
+
'for', 'goto', 'if', 'return', 'switch', 'while',
|
14
|
+
'struct', 'union', 'enum', 'typedef',
|
15
|
+
'static', 'register', 'auto', 'extern',
|
16
|
+
'sizeof',
|
17
|
+
'volatile', 'const', # C89
|
18
|
+
'inline', 'restrict', # C99
|
19
|
+
]
|
20
|
+
|
21
|
+
PREDEFINED_TYPES = [
|
22
|
+
'int', 'long', 'short', 'char', 'void',
|
23
|
+
'signed', 'unsigned', 'float', 'double',
|
24
|
+
'bool', 'complex', # C99
|
25
|
+
]
|
26
|
+
|
27
|
+
PREDEFINED_CONSTANTS = [
|
28
|
+
'EOF', 'NULL',
|
29
|
+
'true', 'false', # C99
|
30
|
+
]
|
31
|
+
|
32
|
+
IDENT_KIND = WordList.new(:ident).
|
33
|
+
add(RESERVED_WORDS, :reserved).
|
34
|
+
add(PREDEFINED_TYPES, :pre_type).
|
35
|
+
add(PREDEFINED_CONSTANTS, :pre_constant)
|
36
|
+
|
37
|
+
ESCAPE = / [rbfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
|
38
|
+
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x
|
39
|
+
|
40
|
+
def scan_tokens tokens, options
|
41
|
+
|
42
|
+
state = :initial
|
43
|
+
|
44
|
+
until eos?
|
45
|
+
|
46
|
+
kind = nil
|
47
|
+
match = nil
|
48
|
+
|
49
|
+
case state
|
50
|
+
|
51
|
+
when :initial
|
52
|
+
|
53
|
+
if scan(/ \s+ | \\\n /x)
|
54
|
+
kind = :space
|
55
|
+
|
56
|
+
elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
|
57
|
+
kind = :comment
|
58
|
+
|
59
|
+
elsif match = scan(/ \# \s* if \s* 0 /x)
|
60
|
+
match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos?
|
61
|
+
kind = :comment
|
62
|
+
|
63
|
+
elsif scan(/ [-+*\/=<>?:;,!&^|()\[\]{}~%]+ | \.(?!\d) /x)
|
64
|
+
kind = :operator
|
65
|
+
|
66
|
+
elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
|
67
|
+
kind = IDENT_KIND[match]
|
68
|
+
if kind == :ident and check(/:(?!:)/)
|
69
|
+
match << scan(/:/)
|
70
|
+
kind = :label
|
71
|
+
end
|
72
|
+
|
73
|
+
elsif match = scan(/L?"/)
|
74
|
+
tokens << [:open, :string]
|
75
|
+
if match[0] == ?L
|
76
|
+
tokens << ['L', :modifier]
|
77
|
+
match = '"'
|
78
|
+
end
|
79
|
+
state = :string
|
80
|
+
kind = :delimiter
|
81
|
+
|
82
|
+
elsif scan(/#\s*(\w*)/)
|
83
|
+
kind = :preprocessor # FIXME multiline preprocs
|
84
|
+
state = :include_expected if self[1] == 'include'
|
85
|
+
|
86
|
+
elsif scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox)
|
87
|
+
kind = :char
|
88
|
+
|
89
|
+
elsif scan(/0[xX][0-9A-Fa-f]+/)
|
90
|
+
kind = :hex
|
91
|
+
|
92
|
+
elsif scan(/(?:0[0-7]+)(?![89.eEfF])/)
|
93
|
+
kind = :oct
|
94
|
+
|
95
|
+
elsif scan(/(?:\d+)(?![.eEfF])/)
|
96
|
+
kind = :integer
|
97
|
+
|
98
|
+
elsif scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
|
99
|
+
kind = :float
|
100
|
+
|
101
|
+
else
|
102
|
+
getch
|
103
|
+
kind = :error
|
104
|
+
|
105
|
+
end
|
106
|
+
|
107
|
+
when :string
|
108
|
+
if scan(/[^\\\n"]+/)
|
109
|
+
kind = :content
|
110
|
+
elsif scan(/"/)
|
111
|
+
tokens << ['"', :delimiter]
|
112
|
+
tokens << [:close, :string]
|
113
|
+
state = :initial
|
114
|
+
next
|
115
|
+
elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
|
116
|
+
kind = :char
|
117
|
+
elsif scan(/ \\ | $ /x)
|
118
|
+
tokens << [:close, :string]
|
119
|
+
kind = :error
|
120
|
+
state = :initial
|
121
|
+
else
|
122
|
+
raise_inspect "else case \" reached; %p not handled." % peek(1), tokens
|
123
|
+
end
|
124
|
+
|
125
|
+
when :include_expected
|
126
|
+
if scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/)
|
127
|
+
kind = :include
|
128
|
+
state = :initial
|
129
|
+
|
130
|
+
elsif match = scan(/\s+/)
|
131
|
+
kind = :space
|
132
|
+
state = :initial if match.index ?\n
|
133
|
+
|
134
|
+
else
|
135
|
+
getch
|
136
|
+
kind = :error
|
137
|
+
|
138
|
+
end
|
139
|
+
|
140
|
+
else
|
141
|
+
raise_inspect 'Unknown state', tokens
|
142
|
+
|
143
|
+
end
|
144
|
+
|
145
|
+
match ||= matched
|
146
|
+
if $DEBUG and not kind
|
147
|
+
raise_inspect 'Error token %p in line %d' %
|
148
|
+
[[match, kind], line], tokens
|
149
|
+
end
|
150
|
+
raise_inspect 'Empty token', tokens unless match
|
151
|
+
|
152
|
+
tokens << [match, kind]
|
153
|
+
|
154
|
+
end
|
155
|
+
|
156
|
+
if state == :string
|
157
|
+
tokens << [:close, :string]
|
158
|
+
end
|
159
|
+
|
160
|
+
tokens
|
161
|
+
end
|
162
|
+
|
163
|
+
end
|
164
|
+
|
165
|
+
end
|
166
|
+
end
|