coderay-beta 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. data/FOLDERS +53 -0
  2. data/LICENSE +504 -0
  3. data/bin/coderay +82 -0
  4. data/bin/coderay_stylesheet +4 -0
  5. data/lib/README +129 -0
  6. data/lib/coderay.rb +320 -0
  7. data/lib/coderay/duo.rb +85 -0
  8. data/lib/coderay/encoder.rb +213 -0
  9. data/lib/coderay/encoders/_map.rb +11 -0
  10. data/lib/coderay/encoders/comment_filter.rb +43 -0
  11. data/lib/coderay/encoders/count.rb +21 -0
  12. data/lib/coderay/encoders/debug.rb +49 -0
  13. data/lib/coderay/encoders/div.rb +19 -0
  14. data/lib/coderay/encoders/filter.rb +75 -0
  15. data/lib/coderay/encoders/html.rb +305 -0
  16. data/lib/coderay/encoders/html/css.rb +70 -0
  17. data/lib/coderay/encoders/html/numerization.rb +133 -0
  18. data/lib/coderay/encoders/html/output.rb +206 -0
  19. data/lib/coderay/encoders/json.rb +69 -0
  20. data/lib/coderay/encoders/lines_of_code.rb +90 -0
  21. data/lib/coderay/encoders/null.rb +26 -0
  22. data/lib/coderay/encoders/page.rb +20 -0
  23. data/lib/coderay/encoders/span.rb +19 -0
  24. data/lib/coderay/encoders/statistic.rb +77 -0
  25. data/lib/coderay/encoders/term.rb +137 -0
  26. data/lib/coderay/encoders/text.rb +32 -0
  27. data/lib/coderay/encoders/token_class_filter.rb +84 -0
  28. data/lib/coderay/encoders/xml.rb +71 -0
  29. data/lib/coderay/encoders/yaml.rb +22 -0
  30. data/lib/coderay/for_redcloth.rb +85 -0
  31. data/lib/coderay/helpers/file_type.rb +240 -0
  32. data/lib/coderay/helpers/gzip_simple.rb +123 -0
  33. data/lib/coderay/helpers/plugin.rb +349 -0
  34. data/lib/coderay/helpers/word_list.rb +138 -0
  35. data/lib/coderay/scanner.rb +284 -0
  36. data/lib/coderay/scanners/_map.rb +23 -0
  37. data/lib/coderay/scanners/c.rb +203 -0
  38. data/lib/coderay/scanners/cpp.rb +228 -0
  39. data/lib/coderay/scanners/css.rb +210 -0
  40. data/lib/coderay/scanners/debug.rb +62 -0
  41. data/lib/coderay/scanners/delphi.rb +150 -0
  42. data/lib/coderay/scanners/diff.rb +105 -0
  43. data/lib/coderay/scanners/groovy.rb +263 -0
  44. data/lib/coderay/scanners/html.rb +182 -0
  45. data/lib/coderay/scanners/java.rb +176 -0
  46. data/lib/coderay/scanners/java/builtin_types.rb +419 -0
  47. data/lib/coderay/scanners/java_script.rb +224 -0
  48. data/lib/coderay/scanners/json.rb +112 -0
  49. data/lib/coderay/scanners/nitro_xhtml.rb +136 -0
  50. data/lib/coderay/scanners/php.rb +526 -0
  51. data/lib/coderay/scanners/plaintext.rb +21 -0
  52. data/lib/coderay/scanners/python.rb +285 -0
  53. data/lib/coderay/scanners/rhtml.rb +74 -0
  54. data/lib/coderay/scanners/ruby.rb +404 -0
  55. data/lib/coderay/scanners/ruby/patterns.rb +238 -0
  56. data/lib/coderay/scanners/scheme.rb +145 -0
  57. data/lib/coderay/scanners/sql.rb +162 -0
  58. data/lib/coderay/scanners/xml.rb +17 -0
  59. data/lib/coderay/scanners/yaml.rb +144 -0
  60. data/lib/coderay/style.rb +20 -0
  61. data/lib/coderay/styles/_map.rb +7 -0
  62. data/lib/coderay/styles/cycnus.rb +151 -0
  63. data/lib/coderay/styles/murphy.rb +132 -0
  64. data/lib/coderay/token_classes.rb +86 -0
  65. data/lib/coderay/tokens.rb +391 -0
  66. data/lib/term/ansicolor.rb +220 -0
  67. metadata +123 -0
@@ -0,0 +1,238 @@
1
+ # encoding: utf-8
2
+ module CodeRay
3
+ module Scanners
4
+
5
+ module Ruby::Patterns # :nodoc:
6
+
7
+ RESERVED_WORDS = %w[
8
+ and def end in or unless begin
9
+ defined? ensure module redo super until
10
+ BEGIN break do next rescue then
11
+ when END case else for retry
12
+ while alias class elsif if not return
13
+ undef yield
14
+ ]
15
+
16
+ DEF_KEYWORDS = %w[ def ]
17
+ UNDEF_KEYWORDS = %w[ undef ]
18
+ ALIAS_KEYWORDS = %w[ alias ]
19
+ MODULE_KEYWORDS = %w[class module]
20
+ DEF_NEW_STATE = WordList.new(:initial).
21
+ add(DEF_KEYWORDS, :def_expected).
22
+ add(UNDEF_KEYWORDS, :undef_expected).
23
+ add(ALIAS_KEYWORDS, :alias_expected).
24
+ add(MODULE_KEYWORDS, :module_expected)
25
+
26
+ PREDEFINED_CONSTANTS = %w[
27
+ nil true false self
28
+ DATA ARGV ARGF __FILE__ __LINE__
29
+ ]
30
+
31
+ IDENT_KIND = WordList.new(:ident).
32
+ add(RESERVED_WORDS, :reserved).
33
+ add(PREDEFINED_CONSTANTS, :pre_constant)
34
+
35
+ IDENT = 'ä'[/[[:alpha:]]/] == 'ä' ? /[[:alpha:]_][[:alnum:]_]*/ : /[^\W\d]\w*/
36
+
37
+ METHOD_NAME = / #{IDENT} [?!]? /ox
38
+ METHOD_NAME_OPERATOR = /
39
+ \*\*? # multiplication and power
40
+ | [-+~]@? # plus, minus, tilde with and without at sign
41
+ | [\/%&|^`] # division, modulo or format strings, and, or, xor, system
42
+ | \[\]=? # array getter and setter
43
+ | << | >> # append or shift left, shift right
44
+ | <=?>? | >=? # comparison, rocket operator
45
+ | ===? | =~ # simple equality, case equality, match
46
+ | ![~=@]? # negation with and without at sign, not-equal and not-match
47
+ /ox
48
+ METHOD_NAME_EX = / #{IDENT} (?:[?!]|=(?!>))? | #{METHOD_NAME_OPERATOR} /ox
49
+ INSTANCE_VARIABLE = / @ #{IDENT} /ox
50
+ CLASS_VARIABLE = / @@ #{IDENT} /ox
51
+ OBJECT_VARIABLE = / @@? #{IDENT} /ox
52
+ GLOBAL_VARIABLE = / \$ (?: #{IDENT} | [1-9]\d* | 0\w* | [~&+`'=\/,;_.<>!@$?*":\\] | -[a-zA-Z_0-9] ) /ox
53
+ PREFIX_VARIABLE = / #{GLOBAL_VARIABLE} | #{OBJECT_VARIABLE} /ox
54
+ VARIABLE = / @?@? #{IDENT} | #{GLOBAL_VARIABLE} /ox
55
+
56
+ QUOTE_TO_TYPE = {
57
+ '`' => :shell,
58
+ '/'=> :regexp,
59
+ }
60
+ QUOTE_TO_TYPE.default = :string
61
+
62
+ REGEXP_MODIFIERS = /[mixounse]*/
63
+ REGEXP_SYMBOLS = /[|?*+(){}\[\].^$]/
64
+
65
+ DECIMAL = /\d+(?:_\d+)*/
66
+ OCTAL = /0_?[0-7]+(?:_[0-7]+)*/
67
+ HEXADECIMAL = /0x[0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*/
68
+ BINARY = /0b[01]+(?:_[01]+)*/
69
+
70
+ EXPONENT = / [eE] [+-]? #{DECIMAL} /ox
71
+ FLOAT_SUFFIX = / #{EXPONENT} | \. #{DECIMAL} #{EXPONENT}? /ox
72
+ FLOAT_OR_INT = / #{DECIMAL} (?: #{FLOAT_SUFFIX} () )? /ox
73
+ NUMERIC = / (?: (?=0) (?: #{OCTAL} | #{HEXADECIMAL} | #{BINARY} ) | #{FLOAT_OR_INT} ) /ox
74
+
75
+ SYMBOL = /
76
+ :
77
+ (?:
78
+ #{METHOD_NAME_EX}
79
+ | #{PREFIX_VARIABLE}
80
+ | ['"]
81
+ )
82
+ /ox
83
+ METHOD_NAME_OR_SYMBOL = / #{METHOD_NAME_EX} | #{SYMBOL} /ox
84
+
85
+ SIMPLE_ESCAPE = /
86
+ [abefnrstv]
87
+ | [0-7]{1,3}
88
+ | x[0-9A-Fa-f]{1,2}
89
+ | .?
90
+ /mx
91
+
92
+ CONTROL_META_ESCAPE = /
93
+ (?: M-|C-|c )
94
+ (?: \\ (?: M-|C-|c ) )*
95
+ (?: [^\\] | \\ #{SIMPLE_ESCAPE} )?
96
+ /mox
97
+
98
+ ESCAPE = /
99
+ #{CONTROL_META_ESCAPE} | #{SIMPLE_ESCAPE}
100
+ /mox
101
+
102
+ CHARACTER = /
103
+ \?
104
+ (?:
105
+ [^\s\\]
106
+ | \\ #{ESCAPE}
107
+ )
108
+ /mox
109
+
110
+ # NOTE: This is not completely correct, but
111
+ # nobody needs heredoc delimiters ending with \n.
112
+ HEREDOC_OPEN = /
113
+ << (-)? # $1 = float
114
+ (?:
115
+ ( [A-Za-z_0-9]+ ) # $2 = delim
116
+ |
117
+ ( ["'`\/] ) # $3 = quote, type
118
+ ( [^\n]*? ) \3 # $4 = delim
119
+ )
120
+ /mx
121
+
122
+ RUBYDOC = /
123
+ =begin (?!\S)
124
+ .*?
125
+ (?: \Z | ^=end (?!\S) [^\n]* )
126
+ /mx
127
+
128
+ DATA = /
129
+ __END__$
130
+ .*?
131
+ (?: \Z | (?=^\#CODE) )
132
+ /mx
133
+
134
+ # Checks for a valid value to follow. This enables
135
+ # value_expected in method calls without parentheses.
136
+ VALUE_FOLLOWS = /
137
+ (?>[ \t\f\v]+)
138
+ (?:
139
+ [%\/][^\s=]
140
+ | <<-?\S
141
+ | [-+] \d
142
+ | #{CHARACTER}
143
+ )
144
+ /x
145
+ KEYWORDS_EXPECTING_VALUE = WordList.new.add(%w[
146
+ and end in or unless begin
147
+ defined? ensure redo super until
148
+ break do next rescue then
149
+ when case else for retry
150
+ while elsif if not return
151
+ yield
152
+ ])
153
+
154
+ RUBYDOC_OR_DATA = / #{RUBYDOC} | #{DATA} /xo
155
+
156
+ RDOC_DATA_START = / ^=begin (?!\S) | ^__END__$ /x
157
+
158
+ FANCY_START_CORRECT = / % ( [qQwWxsr] | (?![a-zA-Z0-9]) ) ([^a-zA-Z0-9]) /mx
159
+
160
+ FancyStringType = {
161
+ 'q' => [:string, false],
162
+ 'Q' => [:string, true],
163
+ 'r' => [:regexp, true],
164
+ 's' => [:symbol, false],
165
+ 'x' => [:shell, true]
166
+ }
167
+ FancyStringType['w'] = FancyStringType['q']
168
+ FancyStringType['W'] = FancyStringType[''] = FancyStringType['Q']
169
+
170
+ class StringState < Struct.new :type, :interpreted, :delim, :heredoc,
171
+ :paren, :paren_depth, :pattern, :next_state
172
+
173
+ CLOSING_PAREN = Hash[ *%w[
174
+ ( )
175
+ [ ]
176
+ < >
177
+ { }
178
+ ] ]
179
+
180
+ CLOSING_PAREN.each { |k,v| k.freeze; v.freeze } # debug, if I try to change it with <<
181
+ OPENING_PAREN = CLOSING_PAREN.invert
182
+
183
+ STRING_PATTERN = Hash.new do |h, k|
184
+ delim, interpreted = *k
185
+ delim_pattern = Regexp.escape(delim.dup) # dup: workaround for old Ruby
186
+ if closing_paren = CLOSING_PAREN[delim]
187
+ delim_pattern = delim_pattern[0..-1] if defined? JRUBY_VERSION # JRuby fix
188
+ delim_pattern << Regexp.escape(closing_paren)
189
+ end
190
+ delim_pattern << '\\\\' unless delim == '\\'
191
+
192
+ special_escapes =
193
+ case interpreted
194
+ when :regexp_symbols
195
+ '| ' + REGEXP_SYMBOLS.source
196
+ when :words
197
+ '| \s'
198
+ end
199
+
200
+ h[k] =
201
+ if interpreted and not delim == '#'
202
+ / (?= [#{delim_pattern}] | \# [{$@] #{special_escapes} ) /mx
203
+ else
204
+ / (?= [#{delim_pattern}] #{special_escapes} ) /mx
205
+ end
206
+ end
207
+
208
+ HEREDOC_PATTERN = Hash.new do |h, k|
209
+ delim, interpreted, indented = *k
210
+ delim_pattern = Regexp.escape(delim.dup) # dup: workaround for old Ruby
211
+ delim_pattern = / \n #{ '(?>[\ \t]*)' if indented } #{ Regexp.new delim_pattern } $ /x
212
+ h[k] =
213
+ if interpreted
214
+ / (?= #{delim_pattern}() | \\ | \# [{$@] ) /mx # $1 set == end of heredoc
215
+ else
216
+ / (?= #{delim_pattern}() | \\ ) /mx
217
+ end
218
+ end
219
+
220
+ def initialize kind, interpreted, delim, heredoc = false
221
+ if heredoc
222
+ pattern = HEREDOC_PATTERN[ [delim, interpreted, heredoc == :indented] ]
223
+ delim = nil
224
+ else
225
+ pattern = STRING_PATTERN[ [delim, interpreted] ]
226
+ if paren = CLOSING_PAREN[delim]
227
+ delim, paren = paren, delim
228
+ paren_depth = 1
229
+ end
230
+ end
231
+ super kind, interpreted, delim, heredoc, paren, paren_depth, pattern, :initial
232
+ end
233
+ end unless defined? StringState
234
+
235
+ end
236
+
237
+ end
238
+ end
@@ -0,0 +1,145 @@
1
+ module CodeRay
2
+ module Scanners
3
+
4
+ # Scheme scanner for CodeRay (by closure).
5
+ # Thanks to murphy for putting CodeRay into public.
6
+ class Scheme < Scanner
7
+
8
+ # TODO: function defs
9
+ # TODO: built-in functions
10
+
11
+ register_for :scheme
12
+ file_extension 'scm'
13
+
14
+ CORE_FORMS = %w[
15
+ lambda let let* letrec syntax-case define-syntax let-syntax
16
+ letrec-syntax begin define quote if or and cond case do delay
17
+ quasiquote set! cons force call-with-current-continuation call/cc
18
+ ]
19
+
20
+ IDENT_KIND = CaseIgnoringWordList.new(:ident).
21
+ add(CORE_FORMS, :reserved)
22
+
23
+ #IDENTIFIER_INITIAL = /[a-z!@\$%&\*\/\:<=>\?~_\^]/i
24
+ #IDENTIFIER_SUBSEQUENT = /#{IDENTIFIER_INITIAL}|\d|\.|\+|-/
25
+ #IDENTIFIER = /#{IDENTIFIER_INITIAL}#{IDENTIFIER_SUBSEQUENT}*|\+|-|\.{3}/
26
+ IDENTIFIER = /[a-zA-Z!@$%&*\/:<=>?~_^][\w!@$%&*\/:<=>?~^.+\-]*|[+-]|\.\.\./
27
+ DIGIT = /\d/
28
+ DIGIT10 = DIGIT
29
+ DIGIT16 = /[0-9a-f]/i
30
+ DIGIT8 = /[0-7]/
31
+ DIGIT2 = /[01]/
32
+ RADIX16 = /\#x/i
33
+ RADIX8 = /\#o/i
34
+ RADIX2 = /\#b/i
35
+ RADIX10 = /\#d/i
36
+ EXACTNESS = /#i|#e/i
37
+ SIGN = /[\+-]?/
38
+ EXP_MARK = /[esfdl]/i
39
+ EXP = /#{EXP_MARK}#{SIGN}#{DIGIT}+/
40
+ SUFFIX = /#{EXP}?/
41
+ PREFIX10 = /#{RADIX10}?#{EXACTNESS}?|#{EXACTNESS}?#{RADIX10}?/
42
+ PREFIX16 = /#{RADIX16}#{EXACTNESS}?|#{EXACTNESS}?#{RADIX16}/
43
+ PREFIX8 = /#{RADIX8}#{EXACTNESS}?|#{EXACTNESS}?#{RADIX8}/
44
+ PREFIX2 = /#{RADIX2}#{EXACTNESS}?|#{EXACTNESS}?#{RADIX2}/
45
+ UINT10 = /#{DIGIT10}+#*/
46
+ UINT16 = /#{DIGIT16}+#*/
47
+ UINT8 = /#{DIGIT8}+#*/
48
+ UINT2 = /#{DIGIT2}+#*/
49
+ DECIMAL = /#{DIGIT10}+#+\.#*#{SUFFIX}|#{DIGIT10}+\.#{DIGIT10}*#*#{SUFFIX}|\.#{DIGIT10}+#*#{SUFFIX}|#{UINT10}#{EXP}/
50
+ UREAL10 = /#{UINT10}\/#{UINT10}|#{DECIMAL}|#{UINT10}/
51
+ UREAL16 = /#{UINT16}\/#{UINT16}|#{UINT16}/
52
+ UREAL8 = /#{UINT8}\/#{UINT8}|#{UINT8}/
53
+ UREAL2 = /#{UINT2}\/#{UINT2}|#{UINT2}/
54
+ REAL10 = /#{SIGN}#{UREAL10}/
55
+ REAL16 = /#{SIGN}#{UREAL16}/
56
+ REAL8 = /#{SIGN}#{UREAL8}/
57
+ REAL2 = /#{SIGN}#{UREAL2}/
58
+ IMAG10 = /i|#{UREAL10}i/
59
+ IMAG16 = /i|#{UREAL16}i/
60
+ IMAG8 = /i|#{UREAL8}i/
61
+ IMAG2 = /i|#{UREAL2}i/
62
+ COMPLEX10 = /#{REAL10}@#{REAL10}|#{REAL10}\+#{IMAG10}|#{REAL10}-#{IMAG10}|\+#{IMAG10}|-#{IMAG10}|#{REAL10}/
63
+ COMPLEX16 = /#{REAL16}@#{REAL16}|#{REAL16}\+#{IMAG16}|#{REAL16}-#{IMAG16}|\+#{IMAG16}|-#{IMAG16}|#{REAL16}/
64
+ COMPLEX8 = /#{REAL8}@#{REAL8}|#{REAL8}\+#{IMAG8}|#{REAL8}-#{IMAG8}|\+#{IMAG8}|-#{IMAG8}|#{REAL8}/
65
+ COMPLEX2 = /#{REAL2}@#{REAL2}|#{REAL2}\+#{IMAG2}|#{REAL2}-#{IMAG2}|\+#{IMAG2}|-#{IMAG2}|#{REAL2}/
66
+ NUM10 = /#{PREFIX10}?#{COMPLEX10}/
67
+ NUM16 = /#{PREFIX16}#{COMPLEX16}/
68
+ NUM8 = /#{PREFIX8}#{COMPLEX8}/
69
+ NUM2 = /#{PREFIX2}#{COMPLEX2}/
70
+ NUM = /#{NUM10}|#{NUM16}|#{NUM8}|#{NUM2}/
71
+
72
+ private
73
+ def scan_tokens tokens,options
74
+
75
+ state = :initial
76
+ ident_kind = IDENT_KIND
77
+
78
+ until eos?
79
+ kind = match = nil
80
+
81
+ case state
82
+ when :initial
83
+ if scan(/ \s+ | \\\n /x)
84
+ kind = :space
85
+ elsif scan(/['\(\[\)\]]|#\(/)
86
+ kind = :operator_fat
87
+ elsif scan(/;.*/)
88
+ kind = :comment
89
+ elsif scan(/#\\(?:newline|space|.?)/)
90
+ kind = :char
91
+ elsif scan(/#[ft]/)
92
+ kind = :pre_constant
93
+ elsif scan(/#{IDENTIFIER}/o)
94
+ kind = ident_kind[matched]
95
+ elsif scan(/\./)
96
+ kind = :operator
97
+ elsif scan(/"/)
98
+ tokens << [:open, :string]
99
+ state = :string
100
+ tokens << ['"', :delimiter]
101
+ next
102
+ elsif scan(/#{NUM}/o) and not matched.empty?
103
+ kind = :integer
104
+ elsif getch
105
+ kind = :error
106
+ end
107
+
108
+ when :string
109
+ if scan(/[^"\\]+/) or scan(/\\.?/)
110
+ kind = :content
111
+ elsif scan(/"/)
112
+ tokens << ['"', :delimiter]
113
+ tokens << [:close, :string]
114
+ state = :initial
115
+ next
116
+ else
117
+ raise_inspect "else case \" reached; %p not handled." % peek(1),
118
+ tokens, state
119
+ end
120
+
121
+ else
122
+ raise "else case reached"
123
+ end
124
+
125
+ match ||= matched
126
+ if $DEBUG and not kind
127
+ raise_inspect 'Error token %p in line %d' %
128
+ [[match, kind], line], tokens
129
+ end
130
+ raise_inspect 'Empty token', tokens, state unless match
131
+
132
+ tokens << [match, kind]
133
+
134
+ end # until eos
135
+
136
+ if state == :string
137
+ tokens << [:close, :string]
138
+ end
139
+
140
+ tokens
141
+
142
+ end #scan_tokens
143
+ end #class
144
+ end #module scanners
145
+ end #module coderay
@@ -0,0 +1,162 @@
1
+ module CodeRay module Scanners
2
+
3
+ # by Josh Goebel
4
+ class SQL < Scanner
5
+
6
+ register_for :sql
7
+
8
+ RESERVED_WORDS = %w(
9
+ create database table index trigger drop primary key set select
10
+ insert update delete replace into
11
+ on from values before and or if exists case when
12
+ then else as group order by avg where
13
+ join inner outer union engine not
14
+ like end using collate show columns begin
15
+ )
16
+
17
+ PREDEFINED_TYPES = %w(
18
+ char varchar enum binary text tinytext mediumtext
19
+ longtext blob tinyblob mediumblob longblob timestamp
20
+ date time datetime year double decimal float int
21
+ integer tinyint mediumint bigint smallint unsigned bit
22
+ bool boolean hex bin oct
23
+ )
24
+
25
+ PREDEFINED_FUNCTIONS = %w( sum cast abs pi count min max avg )
26
+
27
+ DIRECTIVES = %w( auto_increment unique default charset )
28
+
29
+ PREDEFINED_CONSTANTS = %w( null true false )
30
+
31
+ IDENT_KIND = CaseIgnoringWordList.new(:ident).
32
+ add(RESERVED_WORDS, :reserved).
33
+ add(PREDEFINED_TYPES, :pre_type).
34
+ add(PREDEFINED_CONSTANTS, :pre_constant).
35
+ add(PREDEFINED_FUNCTIONS, :predefined).
36
+ add(DIRECTIVES, :directive)
37
+
38
+ ESCAPE = / [rbfntv\n\\\/'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} | . /mx
39
+ UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x
40
+
41
+ STRING_PREFIXES = /[xnb]|_\w+/i
42
+
43
+ def scan_tokens tokens, options
44
+
45
+ state = :initial
46
+ string_type = nil
47
+ string_content = ''
48
+
49
+ until eos?
50
+
51
+ kind = nil
52
+ match = nil
53
+
54
+ if state == :initial
55
+
56
+ if scan(/ \s+ | \\\n /x)
57
+ kind = :space
58
+
59
+ elsif scan(/^(?:--\s?|#).*/)
60
+ kind = :comment
61
+
62
+ elsif scan(%r! /\* (?: .*? \*/ | .* ) !mx)
63
+ kind = :comment
64
+
65
+ elsif scan(/ [-+*\/=<>;,!&^|()\[\]{}~%] | \.(?!\d) /x)
66
+ kind = :operator
67
+
68
+ elsif scan(/(#{STRING_PREFIXES})?([`"'])/o)
69
+ prefix = self[1]
70
+ string_type = self[2]
71
+ tokens << [:open, :string]
72
+ tokens << [prefix, :modifier] if prefix
73
+ match = string_type
74
+ state = :string
75
+ kind = :delimiter
76
+
77
+ elsif match = scan(/ @? [A-Za-z_][A-Za-z_0-9]* /x)
78
+ kind = match[0] == ?@ ? :variable : IDENT_KIND[match.downcase]
79
+
80
+ elsif scan(/0[xX][0-9A-Fa-f]+/)
81
+ kind = :hex
82
+
83
+ elsif scan(/0[0-7]+(?![89.eEfF])/)
84
+ kind = :oct
85
+
86
+ elsif scan(/(?>\d+)(?![.eEfF])/)
87
+ kind = :integer
88
+
89
+ elsif scan(/\d[fF]|\d*\.\d+(?:[eE][+-]?\d+)?|\d+[eE][+-]?\d+/)
90
+ kind = :float
91
+
92
+ else
93
+ getch
94
+ kind = :error
95
+
96
+ end
97
+
98
+ elsif state == :string
99
+ if match = scan(/[^\\"'`]+/)
100
+ string_content << match
101
+ next
102
+ elsif match = scan(/["'`]/)
103
+ if string_type == match
104
+ if peek(1) == string_type # doubling means escape
105
+ string_content << string_type << getch
106
+ next
107
+ end
108
+ unless string_content.empty?
109
+ tokens << [string_content, :content]
110
+ string_content = ''
111
+ end
112
+ tokens << [matched, :delimiter]
113
+ tokens << [:close, :string]
114
+ state = :initial
115
+ string_type = nil
116
+ next
117
+ else
118
+ string_content << match
119
+ end
120
+ next
121
+ elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
122
+ unless string_content.empty?
123
+ tokens << [string_content, :content]
124
+ string_content = ''
125
+ end
126
+ kind = :char
127
+ elsif match = scan(/ \\ . /mox)
128
+ string_content << match
129
+ next
130
+ elsif scan(/ \\ | $ /x)
131
+ unless string_content.empty?
132
+ tokens << [string_content, :content]
133
+ string_content = ''
134
+ end
135
+ kind = :error
136
+ state = :initial
137
+ else
138
+ raise "else case \" reached; %p not handled." % peek(1), tokens
139
+ end
140
+
141
+ else
142
+ raise 'else-case reached', tokens
143
+
144
+ end
145
+
146
+ match ||= matched
147
+ unless kind
148
+ raise_inspect 'Error token %p in line %d' %
149
+ [[match, kind], line], tokens, state
150
+ end
151
+ raise_inspect 'Empty token', tokens unless match
152
+
153
+ tokens << [match, kind]
154
+
155
+ end
156
+ tokens
157
+
158
+ end
159
+
160
+ end
161
+
162
+ end end