coderay-beta 0.9.1
Sign up to get free protection for your applications and to get access to all the features.
- data/FOLDERS +53 -0
- data/LICENSE +504 -0
- data/bin/coderay +82 -0
- data/bin/coderay_stylesheet +4 -0
- data/lib/README +129 -0
- data/lib/coderay.rb +320 -0
- data/lib/coderay/duo.rb +85 -0
- data/lib/coderay/encoder.rb +213 -0
- data/lib/coderay/encoders/_map.rb +11 -0
- data/lib/coderay/encoders/comment_filter.rb +43 -0
- data/lib/coderay/encoders/count.rb +21 -0
- data/lib/coderay/encoders/debug.rb +49 -0
- data/lib/coderay/encoders/div.rb +19 -0
- data/lib/coderay/encoders/filter.rb +75 -0
- data/lib/coderay/encoders/html.rb +305 -0
- data/lib/coderay/encoders/html/css.rb +70 -0
- data/lib/coderay/encoders/html/numerization.rb +133 -0
- data/lib/coderay/encoders/html/output.rb +206 -0
- data/lib/coderay/encoders/json.rb +69 -0
- data/lib/coderay/encoders/lines_of_code.rb +90 -0
- data/lib/coderay/encoders/null.rb +26 -0
- data/lib/coderay/encoders/page.rb +20 -0
- data/lib/coderay/encoders/span.rb +19 -0
- data/lib/coderay/encoders/statistic.rb +77 -0
- data/lib/coderay/encoders/term.rb +137 -0
- data/lib/coderay/encoders/text.rb +32 -0
- data/lib/coderay/encoders/token_class_filter.rb +84 -0
- data/lib/coderay/encoders/xml.rb +71 -0
- data/lib/coderay/encoders/yaml.rb +22 -0
- data/lib/coderay/for_redcloth.rb +85 -0
- data/lib/coderay/helpers/file_type.rb +240 -0
- data/lib/coderay/helpers/gzip_simple.rb +123 -0
- data/lib/coderay/helpers/plugin.rb +349 -0
- data/lib/coderay/helpers/word_list.rb +138 -0
- data/lib/coderay/scanner.rb +284 -0
- data/lib/coderay/scanners/_map.rb +23 -0
- data/lib/coderay/scanners/c.rb +203 -0
- data/lib/coderay/scanners/cpp.rb +228 -0
- data/lib/coderay/scanners/css.rb +210 -0
- data/lib/coderay/scanners/debug.rb +62 -0
- data/lib/coderay/scanners/delphi.rb +150 -0
- data/lib/coderay/scanners/diff.rb +105 -0
- data/lib/coderay/scanners/groovy.rb +263 -0
- data/lib/coderay/scanners/html.rb +182 -0
- data/lib/coderay/scanners/java.rb +176 -0
- data/lib/coderay/scanners/java/builtin_types.rb +419 -0
- data/lib/coderay/scanners/java_script.rb +224 -0
- data/lib/coderay/scanners/json.rb +112 -0
- data/lib/coderay/scanners/nitro_xhtml.rb +136 -0
- data/lib/coderay/scanners/php.rb +526 -0
- data/lib/coderay/scanners/plaintext.rb +21 -0
- data/lib/coderay/scanners/python.rb +285 -0
- data/lib/coderay/scanners/rhtml.rb +74 -0
- data/lib/coderay/scanners/ruby.rb +404 -0
- data/lib/coderay/scanners/ruby/patterns.rb +238 -0
- data/lib/coderay/scanners/scheme.rb +145 -0
- data/lib/coderay/scanners/sql.rb +162 -0
- data/lib/coderay/scanners/xml.rb +17 -0
- data/lib/coderay/scanners/yaml.rb +144 -0
- data/lib/coderay/style.rb +20 -0
- data/lib/coderay/styles/_map.rb +7 -0
- data/lib/coderay/styles/cycnus.rb +151 -0
- data/lib/coderay/styles/murphy.rb +132 -0
- data/lib/coderay/token_classes.rb +86 -0
- data/lib/coderay/tokens.rb +391 -0
- data/lib/term/ansicolor.rb +220 -0
- metadata +123 -0
@@ -0,0 +1,238 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module CodeRay
|
3
|
+
module Scanners
|
4
|
+
|
5
|
+
module Ruby::Patterns # :nodoc:
|
6
|
+
|
7
|
+
RESERVED_WORDS = %w[
|
8
|
+
and def end in or unless begin
|
9
|
+
defined? ensure module redo super until
|
10
|
+
BEGIN break do next rescue then
|
11
|
+
when END case else for retry
|
12
|
+
while alias class elsif if not return
|
13
|
+
undef yield
|
14
|
+
]
|
15
|
+
|
16
|
+
DEF_KEYWORDS = %w[ def ]
|
17
|
+
UNDEF_KEYWORDS = %w[ undef ]
|
18
|
+
ALIAS_KEYWORDS = %w[ alias ]
|
19
|
+
MODULE_KEYWORDS = %w[class module]
|
20
|
+
DEF_NEW_STATE = WordList.new(:initial).
|
21
|
+
add(DEF_KEYWORDS, :def_expected).
|
22
|
+
add(UNDEF_KEYWORDS, :undef_expected).
|
23
|
+
add(ALIAS_KEYWORDS, :alias_expected).
|
24
|
+
add(MODULE_KEYWORDS, :module_expected)
|
25
|
+
|
26
|
+
PREDEFINED_CONSTANTS = %w[
|
27
|
+
nil true false self
|
28
|
+
DATA ARGV ARGF __FILE__ __LINE__
|
29
|
+
]
|
30
|
+
|
31
|
+
IDENT_KIND = WordList.new(:ident).
|
32
|
+
add(RESERVED_WORDS, :reserved).
|
33
|
+
add(PREDEFINED_CONSTANTS, :pre_constant)
|
34
|
+
|
35
|
+
IDENT = 'ä'[/[[:alpha:]]/] == 'ä' ? /[[:alpha:]_][[:alnum:]_]*/ : /[^\W\d]\w*/
|
36
|
+
|
37
|
+
METHOD_NAME = / #{IDENT} [?!]? /ox
|
38
|
+
METHOD_NAME_OPERATOR = /
|
39
|
+
\*\*? # multiplication and power
|
40
|
+
| [-+~]@? # plus, minus, tilde with and without at sign
|
41
|
+
| [\/%&|^`] # division, modulo or format strings, and, or, xor, system
|
42
|
+
| \[\]=? # array getter and setter
|
43
|
+
| << | >> # append or shift left, shift right
|
44
|
+
| <=?>? | >=? # comparison, rocket operator
|
45
|
+
| ===? | =~ # simple equality, case equality, match
|
46
|
+
| ![~=@]? # negation with and without at sign, not-equal and not-match
|
47
|
+
/ox
|
48
|
+
METHOD_NAME_EX = / #{IDENT} (?:[?!]|=(?!>))? | #{METHOD_NAME_OPERATOR} /ox
|
49
|
+
INSTANCE_VARIABLE = / @ #{IDENT} /ox
|
50
|
+
CLASS_VARIABLE = / @@ #{IDENT} /ox
|
51
|
+
OBJECT_VARIABLE = / @@? #{IDENT} /ox
|
52
|
+
GLOBAL_VARIABLE = / \$ (?: #{IDENT} | [1-9]\d* | 0\w* | [~&+`'=\/,;_.<>!@$?*":\\] | -[a-zA-Z_0-9] ) /ox
|
53
|
+
PREFIX_VARIABLE = / #{GLOBAL_VARIABLE} | #{OBJECT_VARIABLE} /ox
|
54
|
+
VARIABLE = / @?@? #{IDENT} | #{GLOBAL_VARIABLE} /ox
|
55
|
+
|
56
|
+
QUOTE_TO_TYPE = {
|
57
|
+
'`' => :shell,
|
58
|
+
'/'=> :regexp,
|
59
|
+
}
|
60
|
+
QUOTE_TO_TYPE.default = :string
|
61
|
+
|
62
|
+
REGEXP_MODIFIERS = /[mixounse]*/
|
63
|
+
REGEXP_SYMBOLS = /[|?*+(){}\[\].^$]/
|
64
|
+
|
65
|
+
DECIMAL = /\d+(?:_\d+)*/
|
66
|
+
OCTAL = /0_?[0-7]+(?:_[0-7]+)*/
|
67
|
+
HEXADECIMAL = /0x[0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*/
|
68
|
+
BINARY = /0b[01]+(?:_[01]+)*/
|
69
|
+
|
70
|
+
EXPONENT = / [eE] [+-]? #{DECIMAL} /ox
|
71
|
+
FLOAT_SUFFIX = / #{EXPONENT} | \. #{DECIMAL} #{EXPONENT}? /ox
|
72
|
+
FLOAT_OR_INT = / #{DECIMAL} (?: #{FLOAT_SUFFIX} () )? /ox
|
73
|
+
NUMERIC = / (?: (?=0) (?: #{OCTAL} | #{HEXADECIMAL} | #{BINARY} ) | #{FLOAT_OR_INT} ) /ox
|
74
|
+
|
75
|
+
SYMBOL = /
|
76
|
+
:
|
77
|
+
(?:
|
78
|
+
#{METHOD_NAME_EX}
|
79
|
+
| #{PREFIX_VARIABLE}
|
80
|
+
| ['"]
|
81
|
+
)
|
82
|
+
/ox
|
83
|
+
METHOD_NAME_OR_SYMBOL = / #{METHOD_NAME_EX} | #{SYMBOL} /ox
|
84
|
+
|
85
|
+
SIMPLE_ESCAPE = /
|
86
|
+
[abefnrstv]
|
87
|
+
| [0-7]{1,3}
|
88
|
+
| x[0-9A-Fa-f]{1,2}
|
89
|
+
| .?
|
90
|
+
/mx
|
91
|
+
|
92
|
+
CONTROL_META_ESCAPE = /
|
93
|
+
(?: M-|C-|c )
|
94
|
+
(?: \\ (?: M-|C-|c ) )*
|
95
|
+
(?: [^\\] | \\ #{SIMPLE_ESCAPE} )?
|
96
|
+
/mox
|
97
|
+
|
98
|
+
ESCAPE = /
|
99
|
+
#{CONTROL_META_ESCAPE} | #{SIMPLE_ESCAPE}
|
100
|
+
/mox
|
101
|
+
|
102
|
+
CHARACTER = /
|
103
|
+
\?
|
104
|
+
(?:
|
105
|
+
[^\s\\]
|
106
|
+
| \\ #{ESCAPE}
|
107
|
+
)
|
108
|
+
/mox
|
109
|
+
|
110
|
+
# NOTE: This is not completely correct, but
|
111
|
+
# nobody needs heredoc delimiters ending with \n.
|
112
|
+
HEREDOC_OPEN = /
|
113
|
+
<< (-)? # $1 = float
|
114
|
+
(?:
|
115
|
+
( [A-Za-z_0-9]+ ) # $2 = delim
|
116
|
+
|
|
117
|
+
( ["'`\/] ) # $3 = quote, type
|
118
|
+
( [^\n]*? ) \3 # $4 = delim
|
119
|
+
)
|
120
|
+
/mx
|
121
|
+
|
122
|
+
RUBYDOC = /
|
123
|
+
=begin (?!\S)
|
124
|
+
.*?
|
125
|
+
(?: \Z | ^=end (?!\S) [^\n]* )
|
126
|
+
/mx
|
127
|
+
|
128
|
+
DATA = /
|
129
|
+
__END__$
|
130
|
+
.*?
|
131
|
+
(?: \Z | (?=^\#CODE) )
|
132
|
+
/mx
|
133
|
+
|
134
|
+
# Checks for a valid value to follow. This enables
|
135
|
+
# value_expected in method calls without parentheses.
|
136
|
+
VALUE_FOLLOWS = /
|
137
|
+
(?>[ \t\f\v]+)
|
138
|
+
(?:
|
139
|
+
[%\/][^\s=]
|
140
|
+
| <<-?\S
|
141
|
+
| [-+] \d
|
142
|
+
| #{CHARACTER}
|
143
|
+
)
|
144
|
+
/x
|
145
|
+
KEYWORDS_EXPECTING_VALUE = WordList.new.add(%w[
|
146
|
+
and end in or unless begin
|
147
|
+
defined? ensure redo super until
|
148
|
+
break do next rescue then
|
149
|
+
when case else for retry
|
150
|
+
while elsif if not return
|
151
|
+
yield
|
152
|
+
])
|
153
|
+
|
154
|
+
RUBYDOC_OR_DATA = / #{RUBYDOC} | #{DATA} /xo
|
155
|
+
|
156
|
+
RDOC_DATA_START = / ^=begin (?!\S) | ^__END__$ /x
|
157
|
+
|
158
|
+
FANCY_START_CORRECT = / % ( [qQwWxsr] | (?![a-zA-Z0-9]) ) ([^a-zA-Z0-9]) /mx
|
159
|
+
|
160
|
+
FancyStringType = {
|
161
|
+
'q' => [:string, false],
|
162
|
+
'Q' => [:string, true],
|
163
|
+
'r' => [:regexp, true],
|
164
|
+
's' => [:symbol, false],
|
165
|
+
'x' => [:shell, true]
|
166
|
+
}
|
167
|
+
FancyStringType['w'] = FancyStringType['q']
|
168
|
+
FancyStringType['W'] = FancyStringType[''] = FancyStringType['Q']
|
169
|
+
|
170
|
+
class StringState < Struct.new :type, :interpreted, :delim, :heredoc,
|
171
|
+
:paren, :paren_depth, :pattern, :next_state
|
172
|
+
|
173
|
+
CLOSING_PAREN = Hash[ *%w[
|
174
|
+
( )
|
175
|
+
[ ]
|
176
|
+
< >
|
177
|
+
{ }
|
178
|
+
] ]
|
179
|
+
|
180
|
+
CLOSING_PAREN.each { |k,v| k.freeze; v.freeze } # debug, if I try to change it with <<
|
181
|
+
OPENING_PAREN = CLOSING_PAREN.invert
|
182
|
+
|
183
|
+
STRING_PATTERN = Hash.new do |h, k|
|
184
|
+
delim, interpreted = *k
|
185
|
+
delim_pattern = Regexp.escape(delim.dup) # dup: workaround for old Ruby
|
186
|
+
if closing_paren = CLOSING_PAREN[delim]
|
187
|
+
delim_pattern = delim_pattern[0..-1] if defined? JRUBY_VERSION # JRuby fix
|
188
|
+
delim_pattern << Regexp.escape(closing_paren)
|
189
|
+
end
|
190
|
+
delim_pattern << '\\\\' unless delim == '\\'
|
191
|
+
|
192
|
+
special_escapes =
|
193
|
+
case interpreted
|
194
|
+
when :regexp_symbols
|
195
|
+
'| ' + REGEXP_SYMBOLS.source
|
196
|
+
when :words
|
197
|
+
'| \s'
|
198
|
+
end
|
199
|
+
|
200
|
+
h[k] =
|
201
|
+
if interpreted and not delim == '#'
|
202
|
+
/ (?= [#{delim_pattern}] | \# [{$@] #{special_escapes} ) /mx
|
203
|
+
else
|
204
|
+
/ (?= [#{delim_pattern}] #{special_escapes} ) /mx
|
205
|
+
end
|
206
|
+
end
|
207
|
+
|
208
|
+
HEREDOC_PATTERN = Hash.new do |h, k|
|
209
|
+
delim, interpreted, indented = *k
|
210
|
+
delim_pattern = Regexp.escape(delim.dup) # dup: workaround for old Ruby
|
211
|
+
delim_pattern = / \n #{ '(?>[\ \t]*)' if indented } #{ Regexp.new delim_pattern } $ /x
|
212
|
+
h[k] =
|
213
|
+
if interpreted
|
214
|
+
/ (?= #{delim_pattern}() | \\ | \# [{$@] ) /mx # $1 set == end of heredoc
|
215
|
+
else
|
216
|
+
/ (?= #{delim_pattern}() | \\ ) /mx
|
217
|
+
end
|
218
|
+
end
|
219
|
+
|
220
|
+
def initialize kind, interpreted, delim, heredoc = false
|
221
|
+
if heredoc
|
222
|
+
pattern = HEREDOC_PATTERN[ [delim, interpreted, heredoc == :indented] ]
|
223
|
+
delim = nil
|
224
|
+
else
|
225
|
+
pattern = STRING_PATTERN[ [delim, interpreted] ]
|
226
|
+
if paren = CLOSING_PAREN[delim]
|
227
|
+
delim, paren = paren, delim
|
228
|
+
paren_depth = 1
|
229
|
+
end
|
230
|
+
end
|
231
|
+
super kind, interpreted, delim, heredoc, paren, paren_depth, pattern, :initial
|
232
|
+
end
|
233
|
+
end unless defined? StringState
|
234
|
+
|
235
|
+
end
|
236
|
+
|
237
|
+
end
|
238
|
+
end
|
@@ -0,0 +1,145 @@
|
|
1
|
+
module CodeRay
|
2
|
+
module Scanners
|
3
|
+
|
4
|
+
# Scheme scanner for CodeRay (by closure).
|
5
|
+
# Thanks to murphy for putting CodeRay into public.
|
6
|
+
class Scheme < Scanner
|
7
|
+
|
8
|
+
# TODO: function defs
|
9
|
+
# TODO: built-in functions
|
10
|
+
|
11
|
+
register_for :scheme
|
12
|
+
file_extension 'scm'
|
13
|
+
|
14
|
+
CORE_FORMS = %w[
|
15
|
+
lambda let let* letrec syntax-case define-syntax let-syntax
|
16
|
+
letrec-syntax begin define quote if or and cond case do delay
|
17
|
+
quasiquote set! cons force call-with-current-continuation call/cc
|
18
|
+
]
|
19
|
+
|
20
|
+
IDENT_KIND = CaseIgnoringWordList.new(:ident).
|
21
|
+
add(CORE_FORMS, :reserved)
|
22
|
+
|
23
|
+
#IDENTIFIER_INITIAL = /[a-z!@\$%&\*\/\:<=>\?~_\^]/i
|
24
|
+
#IDENTIFIER_SUBSEQUENT = /#{IDENTIFIER_INITIAL}|\d|\.|\+|-/
|
25
|
+
#IDENTIFIER = /#{IDENTIFIER_INITIAL}#{IDENTIFIER_SUBSEQUENT}*|\+|-|\.{3}/
|
26
|
+
IDENTIFIER = /[a-zA-Z!@$%&*\/:<=>?~_^][\w!@$%&*\/:<=>?~^.+\-]*|[+-]|\.\.\./
|
27
|
+
DIGIT = /\d/
|
28
|
+
DIGIT10 = DIGIT
|
29
|
+
DIGIT16 = /[0-9a-f]/i
|
30
|
+
DIGIT8 = /[0-7]/
|
31
|
+
DIGIT2 = /[01]/
|
32
|
+
RADIX16 = /\#x/i
|
33
|
+
RADIX8 = /\#o/i
|
34
|
+
RADIX2 = /\#b/i
|
35
|
+
RADIX10 = /\#d/i
|
36
|
+
EXACTNESS = /#i|#e/i
|
37
|
+
SIGN = /[\+-]?/
|
38
|
+
EXP_MARK = /[esfdl]/i
|
39
|
+
EXP = /#{EXP_MARK}#{SIGN}#{DIGIT}+/
|
40
|
+
SUFFIX = /#{EXP}?/
|
41
|
+
PREFIX10 = /#{RADIX10}?#{EXACTNESS}?|#{EXACTNESS}?#{RADIX10}?/
|
42
|
+
PREFIX16 = /#{RADIX16}#{EXACTNESS}?|#{EXACTNESS}?#{RADIX16}/
|
43
|
+
PREFIX8 = /#{RADIX8}#{EXACTNESS}?|#{EXACTNESS}?#{RADIX8}/
|
44
|
+
PREFIX2 = /#{RADIX2}#{EXACTNESS}?|#{EXACTNESS}?#{RADIX2}/
|
45
|
+
UINT10 = /#{DIGIT10}+#*/
|
46
|
+
UINT16 = /#{DIGIT16}+#*/
|
47
|
+
UINT8 = /#{DIGIT8}+#*/
|
48
|
+
UINT2 = /#{DIGIT2}+#*/
|
49
|
+
DECIMAL = /#{DIGIT10}+#+\.#*#{SUFFIX}|#{DIGIT10}+\.#{DIGIT10}*#*#{SUFFIX}|\.#{DIGIT10}+#*#{SUFFIX}|#{UINT10}#{EXP}/
|
50
|
+
UREAL10 = /#{UINT10}\/#{UINT10}|#{DECIMAL}|#{UINT10}/
|
51
|
+
UREAL16 = /#{UINT16}\/#{UINT16}|#{UINT16}/
|
52
|
+
UREAL8 = /#{UINT8}\/#{UINT8}|#{UINT8}/
|
53
|
+
UREAL2 = /#{UINT2}\/#{UINT2}|#{UINT2}/
|
54
|
+
REAL10 = /#{SIGN}#{UREAL10}/
|
55
|
+
REAL16 = /#{SIGN}#{UREAL16}/
|
56
|
+
REAL8 = /#{SIGN}#{UREAL8}/
|
57
|
+
REAL2 = /#{SIGN}#{UREAL2}/
|
58
|
+
IMAG10 = /i|#{UREAL10}i/
|
59
|
+
IMAG16 = /i|#{UREAL16}i/
|
60
|
+
IMAG8 = /i|#{UREAL8}i/
|
61
|
+
IMAG2 = /i|#{UREAL2}i/
|
62
|
+
COMPLEX10 = /#{REAL10}@#{REAL10}|#{REAL10}\+#{IMAG10}|#{REAL10}-#{IMAG10}|\+#{IMAG10}|-#{IMAG10}|#{REAL10}/
|
63
|
+
COMPLEX16 = /#{REAL16}@#{REAL16}|#{REAL16}\+#{IMAG16}|#{REAL16}-#{IMAG16}|\+#{IMAG16}|-#{IMAG16}|#{REAL16}/
|
64
|
+
COMPLEX8 = /#{REAL8}@#{REAL8}|#{REAL8}\+#{IMAG8}|#{REAL8}-#{IMAG8}|\+#{IMAG8}|-#{IMAG8}|#{REAL8}/
|
65
|
+
COMPLEX2 = /#{REAL2}@#{REAL2}|#{REAL2}\+#{IMAG2}|#{REAL2}-#{IMAG2}|\+#{IMAG2}|-#{IMAG2}|#{REAL2}/
|
66
|
+
NUM10 = /#{PREFIX10}?#{COMPLEX10}/
|
67
|
+
NUM16 = /#{PREFIX16}#{COMPLEX16}/
|
68
|
+
NUM8 = /#{PREFIX8}#{COMPLEX8}/
|
69
|
+
NUM2 = /#{PREFIX2}#{COMPLEX2}/
|
70
|
+
NUM = /#{NUM10}|#{NUM16}|#{NUM8}|#{NUM2}/
|
71
|
+
|
72
|
+
private
|
73
|
+
def scan_tokens tokens,options
|
74
|
+
|
75
|
+
state = :initial
|
76
|
+
ident_kind = IDENT_KIND
|
77
|
+
|
78
|
+
until eos?
|
79
|
+
kind = match = nil
|
80
|
+
|
81
|
+
case state
|
82
|
+
when :initial
|
83
|
+
if scan(/ \s+ | \\\n /x)
|
84
|
+
kind = :space
|
85
|
+
elsif scan(/['\(\[\)\]]|#\(/)
|
86
|
+
kind = :operator_fat
|
87
|
+
elsif scan(/;.*/)
|
88
|
+
kind = :comment
|
89
|
+
elsif scan(/#\\(?:newline|space|.?)/)
|
90
|
+
kind = :char
|
91
|
+
elsif scan(/#[ft]/)
|
92
|
+
kind = :pre_constant
|
93
|
+
elsif scan(/#{IDENTIFIER}/o)
|
94
|
+
kind = ident_kind[matched]
|
95
|
+
elsif scan(/\./)
|
96
|
+
kind = :operator
|
97
|
+
elsif scan(/"/)
|
98
|
+
tokens << [:open, :string]
|
99
|
+
state = :string
|
100
|
+
tokens << ['"', :delimiter]
|
101
|
+
next
|
102
|
+
elsif scan(/#{NUM}/o) and not matched.empty?
|
103
|
+
kind = :integer
|
104
|
+
elsif getch
|
105
|
+
kind = :error
|
106
|
+
end
|
107
|
+
|
108
|
+
when :string
|
109
|
+
if scan(/[^"\\]+/) or scan(/\\.?/)
|
110
|
+
kind = :content
|
111
|
+
elsif scan(/"/)
|
112
|
+
tokens << ['"', :delimiter]
|
113
|
+
tokens << [:close, :string]
|
114
|
+
state = :initial
|
115
|
+
next
|
116
|
+
else
|
117
|
+
raise_inspect "else case \" reached; %p not handled." % peek(1),
|
118
|
+
tokens, state
|
119
|
+
end
|
120
|
+
|
121
|
+
else
|
122
|
+
raise "else case reached"
|
123
|
+
end
|
124
|
+
|
125
|
+
match ||= matched
|
126
|
+
if $DEBUG and not kind
|
127
|
+
raise_inspect 'Error token %p in line %d' %
|
128
|
+
[[match, kind], line], tokens
|
129
|
+
end
|
130
|
+
raise_inspect 'Empty token', tokens, state unless match
|
131
|
+
|
132
|
+
tokens << [match, kind]
|
133
|
+
|
134
|
+
end # until eos
|
135
|
+
|
136
|
+
if state == :string
|
137
|
+
tokens << [:close, :string]
|
138
|
+
end
|
139
|
+
|
140
|
+
tokens
|
141
|
+
|
142
|
+
end #scan_tokens
|
143
|
+
end #class
|
144
|
+
end #module scanners
|
145
|
+
end #module coderay
|
@@ -0,0 +1,162 @@
|
|
1
|
+
module CodeRay module Scanners
|
2
|
+
|
3
|
+
# by Josh Goebel
|
4
|
+
class SQL < Scanner
|
5
|
+
|
6
|
+
register_for :sql
|
7
|
+
|
8
|
+
RESERVED_WORDS = %w(
|
9
|
+
create database table index trigger drop primary key set select
|
10
|
+
insert update delete replace into
|
11
|
+
on from values before and or if exists case when
|
12
|
+
then else as group order by avg where
|
13
|
+
join inner outer union engine not
|
14
|
+
like end using collate show columns begin
|
15
|
+
)
|
16
|
+
|
17
|
+
PREDEFINED_TYPES = %w(
|
18
|
+
char varchar enum binary text tinytext mediumtext
|
19
|
+
longtext blob tinyblob mediumblob longblob timestamp
|
20
|
+
date time datetime year double decimal float int
|
21
|
+
integer tinyint mediumint bigint smallint unsigned bit
|
22
|
+
bool boolean hex bin oct
|
23
|
+
)
|
24
|
+
|
25
|
+
PREDEFINED_FUNCTIONS = %w( sum cast abs pi count min max avg )
|
26
|
+
|
27
|
+
DIRECTIVES = %w( auto_increment unique default charset )
|
28
|
+
|
29
|
+
PREDEFINED_CONSTANTS = %w( null true false )
|
30
|
+
|
31
|
+
IDENT_KIND = CaseIgnoringWordList.new(:ident).
|
32
|
+
add(RESERVED_WORDS, :reserved).
|
33
|
+
add(PREDEFINED_TYPES, :pre_type).
|
34
|
+
add(PREDEFINED_CONSTANTS, :pre_constant).
|
35
|
+
add(PREDEFINED_FUNCTIONS, :predefined).
|
36
|
+
add(DIRECTIVES, :directive)
|
37
|
+
|
38
|
+
ESCAPE = / [rbfntv\n\\\/'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} | . /mx
|
39
|
+
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x
|
40
|
+
|
41
|
+
STRING_PREFIXES = /[xnb]|_\w+/i
|
42
|
+
|
43
|
+
def scan_tokens tokens, options
|
44
|
+
|
45
|
+
state = :initial
|
46
|
+
string_type = nil
|
47
|
+
string_content = ''
|
48
|
+
|
49
|
+
until eos?
|
50
|
+
|
51
|
+
kind = nil
|
52
|
+
match = nil
|
53
|
+
|
54
|
+
if state == :initial
|
55
|
+
|
56
|
+
if scan(/ \s+ | \\\n /x)
|
57
|
+
kind = :space
|
58
|
+
|
59
|
+
elsif scan(/^(?:--\s?|#).*/)
|
60
|
+
kind = :comment
|
61
|
+
|
62
|
+
elsif scan(%r! /\* (?: .*? \*/ | .* ) !mx)
|
63
|
+
kind = :comment
|
64
|
+
|
65
|
+
elsif scan(/ [-+*\/=<>;,!&^|()\[\]{}~%] | \.(?!\d) /x)
|
66
|
+
kind = :operator
|
67
|
+
|
68
|
+
elsif scan(/(#{STRING_PREFIXES})?([`"'])/o)
|
69
|
+
prefix = self[1]
|
70
|
+
string_type = self[2]
|
71
|
+
tokens << [:open, :string]
|
72
|
+
tokens << [prefix, :modifier] if prefix
|
73
|
+
match = string_type
|
74
|
+
state = :string
|
75
|
+
kind = :delimiter
|
76
|
+
|
77
|
+
elsif match = scan(/ @? [A-Za-z_][A-Za-z_0-9]* /x)
|
78
|
+
kind = match[0] == ?@ ? :variable : IDENT_KIND[match.downcase]
|
79
|
+
|
80
|
+
elsif scan(/0[xX][0-9A-Fa-f]+/)
|
81
|
+
kind = :hex
|
82
|
+
|
83
|
+
elsif scan(/0[0-7]+(?![89.eEfF])/)
|
84
|
+
kind = :oct
|
85
|
+
|
86
|
+
elsif scan(/(?>\d+)(?![.eEfF])/)
|
87
|
+
kind = :integer
|
88
|
+
|
89
|
+
elsif scan(/\d[fF]|\d*\.\d+(?:[eE][+-]?\d+)?|\d+[eE][+-]?\d+/)
|
90
|
+
kind = :float
|
91
|
+
|
92
|
+
else
|
93
|
+
getch
|
94
|
+
kind = :error
|
95
|
+
|
96
|
+
end
|
97
|
+
|
98
|
+
elsif state == :string
|
99
|
+
if match = scan(/[^\\"'`]+/)
|
100
|
+
string_content << match
|
101
|
+
next
|
102
|
+
elsif match = scan(/["'`]/)
|
103
|
+
if string_type == match
|
104
|
+
if peek(1) == string_type # doubling means escape
|
105
|
+
string_content << string_type << getch
|
106
|
+
next
|
107
|
+
end
|
108
|
+
unless string_content.empty?
|
109
|
+
tokens << [string_content, :content]
|
110
|
+
string_content = ''
|
111
|
+
end
|
112
|
+
tokens << [matched, :delimiter]
|
113
|
+
tokens << [:close, :string]
|
114
|
+
state = :initial
|
115
|
+
string_type = nil
|
116
|
+
next
|
117
|
+
else
|
118
|
+
string_content << match
|
119
|
+
end
|
120
|
+
next
|
121
|
+
elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
|
122
|
+
unless string_content.empty?
|
123
|
+
tokens << [string_content, :content]
|
124
|
+
string_content = ''
|
125
|
+
end
|
126
|
+
kind = :char
|
127
|
+
elsif match = scan(/ \\ . /mox)
|
128
|
+
string_content << match
|
129
|
+
next
|
130
|
+
elsif scan(/ \\ | $ /x)
|
131
|
+
unless string_content.empty?
|
132
|
+
tokens << [string_content, :content]
|
133
|
+
string_content = ''
|
134
|
+
end
|
135
|
+
kind = :error
|
136
|
+
state = :initial
|
137
|
+
else
|
138
|
+
raise "else case \" reached; %p not handled." % peek(1), tokens
|
139
|
+
end
|
140
|
+
|
141
|
+
else
|
142
|
+
raise 'else-case reached', tokens
|
143
|
+
|
144
|
+
end
|
145
|
+
|
146
|
+
match ||= matched
|
147
|
+
unless kind
|
148
|
+
raise_inspect 'Error token %p in line %d' %
|
149
|
+
[[match, kind], line], tokens, state
|
150
|
+
end
|
151
|
+
raise_inspect 'Empty token', tokens unless match
|
152
|
+
|
153
|
+
tokens << [match, kind]
|
154
|
+
|
155
|
+
end
|
156
|
+
tokens
|
157
|
+
|
158
|
+
end
|
159
|
+
|
160
|
+
end
|
161
|
+
|
162
|
+
end end
|