coderay-beta 0.9.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. data/FOLDERS +53 -0
  2. data/LICENSE +504 -0
  3. data/bin/coderay +82 -0
  4. data/bin/coderay_stylesheet +4 -0
  5. data/lib/README +129 -0
  6. data/lib/coderay.rb +320 -0
  7. data/lib/coderay/duo.rb +85 -0
  8. data/lib/coderay/encoder.rb +213 -0
  9. data/lib/coderay/encoders/_map.rb +11 -0
  10. data/lib/coderay/encoders/comment_filter.rb +43 -0
  11. data/lib/coderay/encoders/count.rb +21 -0
  12. data/lib/coderay/encoders/debug.rb +49 -0
  13. data/lib/coderay/encoders/div.rb +19 -0
  14. data/lib/coderay/encoders/filter.rb +75 -0
  15. data/lib/coderay/encoders/html.rb +305 -0
  16. data/lib/coderay/encoders/html/css.rb +70 -0
  17. data/lib/coderay/encoders/html/numerization.rb +133 -0
  18. data/lib/coderay/encoders/html/output.rb +206 -0
  19. data/lib/coderay/encoders/json.rb +69 -0
  20. data/lib/coderay/encoders/lines_of_code.rb +90 -0
  21. data/lib/coderay/encoders/null.rb +26 -0
  22. data/lib/coderay/encoders/page.rb +20 -0
  23. data/lib/coderay/encoders/span.rb +19 -0
  24. data/lib/coderay/encoders/statistic.rb +77 -0
  25. data/lib/coderay/encoders/term.rb +137 -0
  26. data/lib/coderay/encoders/text.rb +32 -0
  27. data/lib/coderay/encoders/token_class_filter.rb +84 -0
  28. data/lib/coderay/encoders/xml.rb +71 -0
  29. data/lib/coderay/encoders/yaml.rb +22 -0
  30. data/lib/coderay/for_redcloth.rb +85 -0
  31. data/lib/coderay/helpers/file_type.rb +240 -0
  32. data/lib/coderay/helpers/gzip_simple.rb +123 -0
  33. data/lib/coderay/helpers/plugin.rb +349 -0
  34. data/lib/coderay/helpers/word_list.rb +138 -0
  35. data/lib/coderay/scanner.rb +284 -0
  36. data/lib/coderay/scanners/_map.rb +23 -0
  37. data/lib/coderay/scanners/c.rb +203 -0
  38. data/lib/coderay/scanners/cpp.rb +228 -0
  39. data/lib/coderay/scanners/css.rb +210 -0
  40. data/lib/coderay/scanners/debug.rb +62 -0
  41. data/lib/coderay/scanners/delphi.rb +150 -0
  42. data/lib/coderay/scanners/diff.rb +105 -0
  43. data/lib/coderay/scanners/groovy.rb +263 -0
  44. data/lib/coderay/scanners/html.rb +182 -0
  45. data/lib/coderay/scanners/java.rb +176 -0
  46. data/lib/coderay/scanners/java/builtin_types.rb +419 -0
  47. data/lib/coderay/scanners/java_script.rb +224 -0
  48. data/lib/coderay/scanners/json.rb +112 -0
  49. data/lib/coderay/scanners/nitro_xhtml.rb +136 -0
  50. data/lib/coderay/scanners/php.rb +526 -0
  51. data/lib/coderay/scanners/plaintext.rb +21 -0
  52. data/lib/coderay/scanners/python.rb +285 -0
  53. data/lib/coderay/scanners/rhtml.rb +74 -0
  54. data/lib/coderay/scanners/ruby.rb +404 -0
  55. data/lib/coderay/scanners/ruby/patterns.rb +238 -0
  56. data/lib/coderay/scanners/scheme.rb +145 -0
  57. data/lib/coderay/scanners/sql.rb +162 -0
  58. data/lib/coderay/scanners/xml.rb +17 -0
  59. data/lib/coderay/scanners/yaml.rb +144 -0
  60. data/lib/coderay/style.rb +20 -0
  61. data/lib/coderay/styles/_map.rb +7 -0
  62. data/lib/coderay/styles/cycnus.rb +151 -0
  63. data/lib/coderay/styles/murphy.rb +132 -0
  64. data/lib/coderay/token_classes.rb +86 -0
  65. data/lib/coderay/tokens.rb +391 -0
  66. data/lib/term/ansicolor.rb +220 -0
  67. metadata +123 -0
@@ -0,0 +1,238 @@
1
+ # encoding: utf-8
2
+ module CodeRay
3
+ module Scanners
4
+
5
+ module Ruby::Patterns # :nodoc:
6
+
7
+ RESERVED_WORDS = %w[
8
+ and def end in or unless begin
9
+ defined? ensure module redo super until
10
+ BEGIN break do next rescue then
11
+ when END case else for retry
12
+ while alias class elsif if not return
13
+ undef yield
14
+ ]
15
+
16
+ DEF_KEYWORDS = %w[ def ]
17
+ UNDEF_KEYWORDS = %w[ undef ]
18
+ ALIAS_KEYWORDS = %w[ alias ]
19
+ MODULE_KEYWORDS = %w[class module]
20
+ DEF_NEW_STATE = WordList.new(:initial).
21
+ add(DEF_KEYWORDS, :def_expected).
22
+ add(UNDEF_KEYWORDS, :undef_expected).
23
+ add(ALIAS_KEYWORDS, :alias_expected).
24
+ add(MODULE_KEYWORDS, :module_expected)
25
+
26
+ PREDEFINED_CONSTANTS = %w[
27
+ nil true false self
28
+ DATA ARGV ARGF __FILE__ __LINE__
29
+ ]
30
+
31
+ IDENT_KIND = WordList.new(:ident).
32
+ add(RESERVED_WORDS, :reserved).
33
+ add(PREDEFINED_CONSTANTS, :pre_constant)
34
+
35
+ IDENT = 'ä'[/[[:alpha:]]/] == 'ä' ? /[[:alpha:]_][[:alnum:]_]*/ : /[^\W\d]\w*/
36
+
37
+ METHOD_NAME = / #{IDENT} [?!]? /ox
38
+ METHOD_NAME_OPERATOR = /
39
+ \*\*? # multiplication and power
40
+ | [-+~]@? # plus, minus, tilde with and without at sign
41
+ | [\/%&|^`] # division, modulo or format strings, and, or, xor, system
42
+ | \[\]=? # array getter and setter
43
+ | << | >> # append or shift left, shift right
44
+ | <=?>? | >=? # comparison, rocket operator
45
+ | ===? | =~ # simple equality, case equality, match
46
+ | ![~=@]? # negation with and without at sign, not-equal and not-match
47
+ /ox
48
+ METHOD_NAME_EX = / #{IDENT} (?:[?!]|=(?!>))? | #{METHOD_NAME_OPERATOR} /ox
49
+ INSTANCE_VARIABLE = / @ #{IDENT} /ox
50
+ CLASS_VARIABLE = / @@ #{IDENT} /ox
51
+ OBJECT_VARIABLE = / @@? #{IDENT} /ox
52
+ GLOBAL_VARIABLE = / \$ (?: #{IDENT} | [1-9]\d* | 0\w* | [~&+`'=\/,;_.<>!@$?*":\\] | -[a-zA-Z_0-9] ) /ox
53
+ PREFIX_VARIABLE = / #{GLOBAL_VARIABLE} | #{OBJECT_VARIABLE} /ox
54
+ VARIABLE = / @?@? #{IDENT} | #{GLOBAL_VARIABLE} /ox
55
+
56
+ QUOTE_TO_TYPE = {
57
+ '`' => :shell,
58
+ '/'=> :regexp,
59
+ }
60
+ QUOTE_TO_TYPE.default = :string
61
+
62
+ REGEXP_MODIFIERS = /[mixounse]*/
63
+ REGEXP_SYMBOLS = /[|?*+(){}\[\].^$]/
64
+
65
+ DECIMAL = /\d+(?:_\d+)*/
66
+ OCTAL = /0_?[0-7]+(?:_[0-7]+)*/
67
+ HEXADECIMAL = /0x[0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*/
68
+ BINARY = /0b[01]+(?:_[01]+)*/
69
+
70
+ EXPONENT = / [eE] [+-]? #{DECIMAL} /ox
71
+ FLOAT_SUFFIX = / #{EXPONENT} | \. #{DECIMAL} #{EXPONENT}? /ox
72
+ FLOAT_OR_INT = / #{DECIMAL} (?: #{FLOAT_SUFFIX} () )? /ox
73
+ NUMERIC = / (?: (?=0) (?: #{OCTAL} | #{HEXADECIMAL} | #{BINARY} ) | #{FLOAT_OR_INT} ) /ox
74
+
75
+ SYMBOL = /
76
+ :
77
+ (?:
78
+ #{METHOD_NAME_EX}
79
+ | #{PREFIX_VARIABLE}
80
+ | ['"]
81
+ )
82
+ /ox
83
+ METHOD_NAME_OR_SYMBOL = / #{METHOD_NAME_EX} | #{SYMBOL} /ox
84
+
85
+ SIMPLE_ESCAPE = /
86
+ [abefnrstv]
87
+ | [0-7]{1,3}
88
+ | x[0-9A-Fa-f]{1,2}
89
+ | .?
90
+ /mx
91
+
92
+ CONTROL_META_ESCAPE = /
93
+ (?: M-|C-|c )
94
+ (?: \\ (?: M-|C-|c ) )*
95
+ (?: [^\\] | \\ #{SIMPLE_ESCAPE} )?
96
+ /mox
97
+
98
+ ESCAPE = /
99
+ #{CONTROL_META_ESCAPE} | #{SIMPLE_ESCAPE}
100
+ /mox
101
+
102
+ CHARACTER = /
103
+ \?
104
+ (?:
105
+ [^\s\\]
106
+ | \\ #{ESCAPE}
107
+ )
108
+ /mox
109
+
110
+ # NOTE: This is not completely correct, but
111
+ # nobody needs heredoc delimiters ending with \n.
112
+ HEREDOC_OPEN = /
113
+ << (-)? # $1 = float
114
+ (?:
115
+ ( [A-Za-z_0-9]+ ) # $2 = delim
116
+ |
117
+ ( ["'`\/] ) # $3 = quote, type
118
+ ( [^\n]*? ) \3 # $4 = delim
119
+ )
120
+ /mx
121
+
122
+ RUBYDOC = /
123
+ =begin (?!\S)
124
+ .*?
125
+ (?: \Z | ^=end (?!\S) [^\n]* )
126
+ /mx
127
+
128
+ DATA = /
129
+ __END__$
130
+ .*?
131
+ (?: \Z | (?=^\#CODE) )
132
+ /mx
133
+
134
+ # Checks for a valid value to follow. This enables
135
+ # value_expected in method calls without parentheses.
136
+ VALUE_FOLLOWS = /
137
+ (?>[ \t\f\v]+)
138
+ (?:
139
+ [%\/][^\s=]
140
+ | <<-?\S
141
+ | [-+] \d
142
+ | #{CHARACTER}
143
+ )
144
+ /x
145
+ KEYWORDS_EXPECTING_VALUE = WordList.new.add(%w[
146
+ and end in or unless begin
147
+ defined? ensure redo super until
148
+ break do next rescue then
149
+ when case else for retry
150
+ while elsif if not return
151
+ yield
152
+ ])
153
+
154
+ RUBYDOC_OR_DATA = / #{RUBYDOC} | #{DATA} /xo
155
+
156
+ RDOC_DATA_START = / ^=begin (?!\S) | ^__END__$ /x
157
+
158
+ FANCY_START_CORRECT = / % ( [qQwWxsr] | (?![a-zA-Z0-9]) ) ([^a-zA-Z0-9]) /mx
159
+
160
+ FancyStringType = {
161
+ 'q' => [:string, false],
162
+ 'Q' => [:string, true],
163
+ 'r' => [:regexp, true],
164
+ 's' => [:symbol, false],
165
+ 'x' => [:shell, true]
166
+ }
167
+ FancyStringType['w'] = FancyStringType['q']
168
+ FancyStringType['W'] = FancyStringType[''] = FancyStringType['Q']
169
+
170
+ class StringState < Struct.new :type, :interpreted, :delim, :heredoc,
171
+ :paren, :paren_depth, :pattern, :next_state
172
+
173
+ CLOSING_PAREN = Hash[ *%w[
174
+ ( )
175
+ [ ]
176
+ < >
177
+ { }
178
+ ] ]
179
+
180
+ CLOSING_PAREN.each { |k,v| k.freeze; v.freeze } # debug, if I try to change it with <<
181
+ OPENING_PAREN = CLOSING_PAREN.invert
182
+
183
+ STRING_PATTERN = Hash.new do |h, k|
184
+ delim, interpreted = *k
185
+ delim_pattern = Regexp.escape(delim.dup) # dup: workaround for old Ruby
186
+ if closing_paren = CLOSING_PAREN[delim]
187
+ delim_pattern = delim_pattern[0..-1] if defined? JRUBY_VERSION # JRuby fix
188
+ delim_pattern << Regexp.escape(closing_paren)
189
+ end
190
+ delim_pattern << '\\\\' unless delim == '\\'
191
+
192
+ special_escapes =
193
+ case interpreted
194
+ when :regexp_symbols
195
+ '| ' + REGEXP_SYMBOLS.source
196
+ when :words
197
+ '| \s'
198
+ end
199
+
200
+ h[k] =
201
+ if interpreted and not delim == '#'
202
+ / (?= [#{delim_pattern}] | \# [{$@] #{special_escapes} ) /mx
203
+ else
204
+ / (?= [#{delim_pattern}] #{special_escapes} ) /mx
205
+ end
206
+ end
207
+
208
+ HEREDOC_PATTERN = Hash.new do |h, k|
209
+ delim, interpreted, indented = *k
210
+ delim_pattern = Regexp.escape(delim.dup) # dup: workaround for old Ruby
211
+ delim_pattern = / \n #{ '(?>[\ \t]*)' if indented } #{ Regexp.new delim_pattern } $ /x
212
+ h[k] =
213
+ if interpreted
214
+ / (?= #{delim_pattern}() | \\ | \# [{$@] ) /mx # $1 set == end of heredoc
215
+ else
216
+ / (?= #{delim_pattern}() | \\ ) /mx
217
+ end
218
+ end
219
+
220
+ def initialize kind, interpreted, delim, heredoc = false
221
+ if heredoc
222
+ pattern = HEREDOC_PATTERN[ [delim, interpreted, heredoc == :indented] ]
223
+ delim = nil
224
+ else
225
+ pattern = STRING_PATTERN[ [delim, interpreted] ]
226
+ if paren = CLOSING_PAREN[delim]
227
+ delim, paren = paren, delim
228
+ paren_depth = 1
229
+ end
230
+ end
231
+ super kind, interpreted, delim, heredoc, paren, paren_depth, pattern, :initial
232
+ end
233
+ end unless defined? StringState
234
+
235
+ end
236
+
237
+ end
238
+ end
@@ -0,0 +1,145 @@
1
+ module CodeRay
2
+ module Scanners
3
+
4
+ # Scheme scanner for CodeRay (by closure).
5
+ # Thanks to murphy for putting CodeRay into public.
6
+ class Scheme < Scanner
7
+
8
+ # TODO: function defs
9
+ # TODO: built-in functions
10
+
11
+ register_for :scheme
12
+ file_extension 'scm'
13
+
14
+ CORE_FORMS = %w[
15
+ lambda let let* letrec syntax-case define-syntax let-syntax
16
+ letrec-syntax begin define quote if or and cond case do delay
17
+ quasiquote set! cons force call-with-current-continuation call/cc
18
+ ]
19
+
20
+ IDENT_KIND = CaseIgnoringWordList.new(:ident).
21
+ add(CORE_FORMS, :reserved)
22
+
23
+ #IDENTIFIER_INITIAL = /[a-z!@\$%&\*\/\:<=>\?~_\^]/i
24
+ #IDENTIFIER_SUBSEQUENT = /#{IDENTIFIER_INITIAL}|\d|\.|\+|-/
25
+ #IDENTIFIER = /#{IDENTIFIER_INITIAL}#{IDENTIFIER_SUBSEQUENT}*|\+|-|\.{3}/
26
+ IDENTIFIER = /[a-zA-Z!@$%&*\/:<=>?~_^][\w!@$%&*\/:<=>?~^.+\-]*|[+-]|\.\.\./
27
+ DIGIT = /\d/
28
+ DIGIT10 = DIGIT
29
+ DIGIT16 = /[0-9a-f]/i
30
+ DIGIT8 = /[0-7]/
31
+ DIGIT2 = /[01]/
32
+ RADIX16 = /\#x/i
33
+ RADIX8 = /\#o/i
34
+ RADIX2 = /\#b/i
35
+ RADIX10 = /\#d/i
36
+ EXACTNESS = /#i|#e/i
37
+ SIGN = /[\+-]?/
38
+ EXP_MARK = /[esfdl]/i
39
+ EXP = /#{EXP_MARK}#{SIGN}#{DIGIT}+/
40
+ SUFFIX = /#{EXP}?/
41
+ PREFIX10 = /#{RADIX10}?#{EXACTNESS}?|#{EXACTNESS}?#{RADIX10}?/
42
+ PREFIX16 = /#{RADIX16}#{EXACTNESS}?|#{EXACTNESS}?#{RADIX16}/
43
+ PREFIX8 = /#{RADIX8}#{EXACTNESS}?|#{EXACTNESS}?#{RADIX8}/
44
+ PREFIX2 = /#{RADIX2}#{EXACTNESS}?|#{EXACTNESS}?#{RADIX2}/
45
+ UINT10 = /#{DIGIT10}+#*/
46
+ UINT16 = /#{DIGIT16}+#*/
47
+ UINT8 = /#{DIGIT8}+#*/
48
+ UINT2 = /#{DIGIT2}+#*/
49
+ DECIMAL = /#{DIGIT10}+#+\.#*#{SUFFIX}|#{DIGIT10}+\.#{DIGIT10}*#*#{SUFFIX}|\.#{DIGIT10}+#*#{SUFFIX}|#{UINT10}#{EXP}/
50
+ UREAL10 = /#{UINT10}\/#{UINT10}|#{DECIMAL}|#{UINT10}/
51
+ UREAL16 = /#{UINT16}\/#{UINT16}|#{UINT16}/
52
+ UREAL8 = /#{UINT8}\/#{UINT8}|#{UINT8}/
53
+ UREAL2 = /#{UINT2}\/#{UINT2}|#{UINT2}/
54
+ REAL10 = /#{SIGN}#{UREAL10}/
55
+ REAL16 = /#{SIGN}#{UREAL16}/
56
+ REAL8 = /#{SIGN}#{UREAL8}/
57
+ REAL2 = /#{SIGN}#{UREAL2}/
58
+ IMAG10 = /i|#{UREAL10}i/
59
+ IMAG16 = /i|#{UREAL16}i/
60
+ IMAG8 = /i|#{UREAL8}i/
61
+ IMAG2 = /i|#{UREAL2}i/
62
+ COMPLEX10 = /#{REAL10}@#{REAL10}|#{REAL10}\+#{IMAG10}|#{REAL10}-#{IMAG10}|\+#{IMAG10}|-#{IMAG10}|#{REAL10}/
63
+ COMPLEX16 = /#{REAL16}@#{REAL16}|#{REAL16}\+#{IMAG16}|#{REAL16}-#{IMAG16}|\+#{IMAG16}|-#{IMAG16}|#{REAL16}/
64
+ COMPLEX8 = /#{REAL8}@#{REAL8}|#{REAL8}\+#{IMAG8}|#{REAL8}-#{IMAG8}|\+#{IMAG8}|-#{IMAG8}|#{REAL8}/
65
+ COMPLEX2 = /#{REAL2}@#{REAL2}|#{REAL2}\+#{IMAG2}|#{REAL2}-#{IMAG2}|\+#{IMAG2}|-#{IMAG2}|#{REAL2}/
66
+ NUM10 = /#{PREFIX10}?#{COMPLEX10}/
67
+ NUM16 = /#{PREFIX16}#{COMPLEX16}/
68
+ NUM8 = /#{PREFIX8}#{COMPLEX8}/
69
+ NUM2 = /#{PREFIX2}#{COMPLEX2}/
70
+ NUM = /#{NUM10}|#{NUM16}|#{NUM8}|#{NUM2}/
71
+
72
+ private
73
+ def scan_tokens tokens,options
74
+
75
+ state = :initial
76
+ ident_kind = IDENT_KIND
77
+
78
+ until eos?
79
+ kind = match = nil
80
+
81
+ case state
82
+ when :initial
83
+ if scan(/ \s+ | \\\n /x)
84
+ kind = :space
85
+ elsif scan(/['\(\[\)\]]|#\(/)
86
+ kind = :operator_fat
87
+ elsif scan(/;.*/)
88
+ kind = :comment
89
+ elsif scan(/#\\(?:newline|space|.?)/)
90
+ kind = :char
91
+ elsif scan(/#[ft]/)
92
+ kind = :pre_constant
93
+ elsif scan(/#{IDENTIFIER}/o)
94
+ kind = ident_kind[matched]
95
+ elsif scan(/\./)
96
+ kind = :operator
97
+ elsif scan(/"/)
98
+ tokens << [:open, :string]
99
+ state = :string
100
+ tokens << ['"', :delimiter]
101
+ next
102
+ elsif scan(/#{NUM}/o) and not matched.empty?
103
+ kind = :integer
104
+ elsif getch
105
+ kind = :error
106
+ end
107
+
108
+ when :string
109
+ if scan(/[^"\\]+/) or scan(/\\.?/)
110
+ kind = :content
111
+ elsif scan(/"/)
112
+ tokens << ['"', :delimiter]
113
+ tokens << [:close, :string]
114
+ state = :initial
115
+ next
116
+ else
117
+ raise_inspect "else case \" reached; %p not handled." % peek(1),
118
+ tokens, state
119
+ end
120
+
121
+ else
122
+ raise "else case reached"
123
+ end
124
+
125
+ match ||= matched
126
+ if $DEBUG and not kind
127
+ raise_inspect 'Error token %p in line %d' %
128
+ [[match, kind], line], tokens
129
+ end
130
+ raise_inspect 'Empty token', tokens, state unless match
131
+
132
+ tokens << [match, kind]
133
+
134
+ end # until eos
135
+
136
+ if state == :string
137
+ tokens << [:close, :string]
138
+ end
139
+
140
+ tokens
141
+
142
+ end #scan_tokens
143
+ end #class
144
+ end #module scanners
145
+ end #module coderay
@@ -0,0 +1,162 @@
1
+ module CodeRay module Scanners
2
+
3
+ # by Josh Goebel
4
+ class SQL < Scanner
5
+
6
+ register_for :sql
7
+
8
+ RESERVED_WORDS = %w(
9
+ create database table index trigger drop primary key set select
10
+ insert update delete replace into
11
+ on from values before and or if exists case when
12
+ then else as group order by avg where
13
+ join inner outer union engine not
14
+ like end using collate show columns begin
15
+ )
16
+
17
+ PREDEFINED_TYPES = %w(
18
+ char varchar enum binary text tinytext mediumtext
19
+ longtext blob tinyblob mediumblob longblob timestamp
20
+ date time datetime year double decimal float int
21
+ integer tinyint mediumint bigint smallint unsigned bit
22
+ bool boolean hex bin oct
23
+ )
24
+
25
+ PREDEFINED_FUNCTIONS = %w( sum cast abs pi count min max avg )
26
+
27
+ DIRECTIVES = %w( auto_increment unique default charset )
28
+
29
+ PREDEFINED_CONSTANTS = %w( null true false )
30
+
31
+ IDENT_KIND = CaseIgnoringWordList.new(:ident).
32
+ add(RESERVED_WORDS, :reserved).
33
+ add(PREDEFINED_TYPES, :pre_type).
34
+ add(PREDEFINED_CONSTANTS, :pre_constant).
35
+ add(PREDEFINED_FUNCTIONS, :predefined).
36
+ add(DIRECTIVES, :directive)
37
+
38
+ ESCAPE = / [rbfntv\n\\\/'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} | . /mx
39
+ UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x
40
+
41
+ STRING_PREFIXES = /[xnb]|_\w+/i
42
+
43
+ def scan_tokens tokens, options
44
+
45
+ state = :initial
46
+ string_type = nil
47
+ string_content = ''
48
+
49
+ until eos?
50
+
51
+ kind = nil
52
+ match = nil
53
+
54
+ if state == :initial
55
+
56
+ if scan(/ \s+ | \\\n /x)
57
+ kind = :space
58
+
59
+ elsif scan(/^(?:--\s?|#).*/)
60
+ kind = :comment
61
+
62
+ elsif scan(%r! /\* (?: .*? \*/ | .* ) !mx)
63
+ kind = :comment
64
+
65
+ elsif scan(/ [-+*\/=<>;,!&^|()\[\]{}~%] | \.(?!\d) /x)
66
+ kind = :operator
67
+
68
+ elsif scan(/(#{STRING_PREFIXES})?([`"'])/o)
69
+ prefix = self[1]
70
+ string_type = self[2]
71
+ tokens << [:open, :string]
72
+ tokens << [prefix, :modifier] if prefix
73
+ match = string_type
74
+ state = :string
75
+ kind = :delimiter
76
+
77
+ elsif match = scan(/ @? [A-Za-z_][A-Za-z_0-9]* /x)
78
+ kind = match[0] == ?@ ? :variable : IDENT_KIND[match.downcase]
79
+
80
+ elsif scan(/0[xX][0-9A-Fa-f]+/)
81
+ kind = :hex
82
+
83
+ elsif scan(/0[0-7]+(?![89.eEfF])/)
84
+ kind = :oct
85
+
86
+ elsif scan(/(?>\d+)(?![.eEfF])/)
87
+ kind = :integer
88
+
89
+ elsif scan(/\d[fF]|\d*\.\d+(?:[eE][+-]?\d+)?|\d+[eE][+-]?\d+/)
90
+ kind = :float
91
+
92
+ else
93
+ getch
94
+ kind = :error
95
+
96
+ end
97
+
98
+ elsif state == :string
99
+ if match = scan(/[^\\"'`]+/)
100
+ string_content << match
101
+ next
102
+ elsif match = scan(/["'`]/)
103
+ if string_type == match
104
+ if peek(1) == string_type # doubling means escape
105
+ string_content << string_type << getch
106
+ next
107
+ end
108
+ unless string_content.empty?
109
+ tokens << [string_content, :content]
110
+ string_content = ''
111
+ end
112
+ tokens << [matched, :delimiter]
113
+ tokens << [:close, :string]
114
+ state = :initial
115
+ string_type = nil
116
+ next
117
+ else
118
+ string_content << match
119
+ end
120
+ next
121
+ elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
122
+ unless string_content.empty?
123
+ tokens << [string_content, :content]
124
+ string_content = ''
125
+ end
126
+ kind = :char
127
+ elsif match = scan(/ \\ . /mox)
128
+ string_content << match
129
+ next
130
+ elsif scan(/ \\ | $ /x)
131
+ unless string_content.empty?
132
+ tokens << [string_content, :content]
133
+ string_content = ''
134
+ end
135
+ kind = :error
136
+ state = :initial
137
+ else
138
+ raise "else case \" reached; %p not handled." % peek(1), tokens
139
+ end
140
+
141
+ else
142
+ raise 'else-case reached', tokens
143
+
144
+ end
145
+
146
+ match ||= matched
147
+ unless kind
148
+ raise_inspect 'Error token %p in line %d' %
149
+ [[match, kind], line], tokens, state
150
+ end
151
+ raise_inspect 'Empty token', tokens unless match
152
+
153
+ tokens << [match, kind]
154
+
155
+ end
156
+ tokens
157
+
158
+ end
159
+
160
+ end
161
+
162
+ end end