coderay-beta 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/FOLDERS +53 -0
- data/LICENSE +504 -0
- data/bin/coderay +82 -0
- data/bin/coderay_stylesheet +4 -0
- data/lib/README +129 -0
- data/lib/coderay.rb +320 -0
- data/lib/coderay/duo.rb +85 -0
- data/lib/coderay/encoder.rb +213 -0
- data/lib/coderay/encoders/_map.rb +11 -0
- data/lib/coderay/encoders/comment_filter.rb +43 -0
- data/lib/coderay/encoders/count.rb +21 -0
- data/lib/coderay/encoders/debug.rb +49 -0
- data/lib/coderay/encoders/div.rb +19 -0
- data/lib/coderay/encoders/filter.rb +75 -0
- data/lib/coderay/encoders/html.rb +305 -0
- data/lib/coderay/encoders/html/css.rb +70 -0
- data/lib/coderay/encoders/html/numerization.rb +133 -0
- data/lib/coderay/encoders/html/output.rb +206 -0
- data/lib/coderay/encoders/json.rb +69 -0
- data/lib/coderay/encoders/lines_of_code.rb +90 -0
- data/lib/coderay/encoders/null.rb +26 -0
- data/lib/coderay/encoders/page.rb +20 -0
- data/lib/coderay/encoders/span.rb +19 -0
- data/lib/coderay/encoders/statistic.rb +77 -0
- data/lib/coderay/encoders/term.rb +137 -0
- data/lib/coderay/encoders/text.rb +32 -0
- data/lib/coderay/encoders/token_class_filter.rb +84 -0
- data/lib/coderay/encoders/xml.rb +71 -0
- data/lib/coderay/encoders/yaml.rb +22 -0
- data/lib/coderay/for_redcloth.rb +85 -0
- data/lib/coderay/helpers/file_type.rb +240 -0
- data/lib/coderay/helpers/gzip_simple.rb +123 -0
- data/lib/coderay/helpers/plugin.rb +349 -0
- data/lib/coderay/helpers/word_list.rb +138 -0
- data/lib/coderay/scanner.rb +284 -0
- data/lib/coderay/scanners/_map.rb +23 -0
- data/lib/coderay/scanners/c.rb +203 -0
- data/lib/coderay/scanners/cpp.rb +228 -0
- data/lib/coderay/scanners/css.rb +210 -0
- data/lib/coderay/scanners/debug.rb +62 -0
- data/lib/coderay/scanners/delphi.rb +150 -0
- data/lib/coderay/scanners/diff.rb +105 -0
- data/lib/coderay/scanners/groovy.rb +263 -0
- data/lib/coderay/scanners/html.rb +182 -0
- data/lib/coderay/scanners/java.rb +176 -0
- data/lib/coderay/scanners/java/builtin_types.rb +419 -0
- data/lib/coderay/scanners/java_script.rb +224 -0
- data/lib/coderay/scanners/json.rb +112 -0
- data/lib/coderay/scanners/nitro_xhtml.rb +136 -0
- data/lib/coderay/scanners/php.rb +526 -0
- data/lib/coderay/scanners/plaintext.rb +21 -0
- data/lib/coderay/scanners/python.rb +285 -0
- data/lib/coderay/scanners/rhtml.rb +74 -0
- data/lib/coderay/scanners/ruby.rb +404 -0
- data/lib/coderay/scanners/ruby/patterns.rb +238 -0
- data/lib/coderay/scanners/scheme.rb +145 -0
- data/lib/coderay/scanners/sql.rb +162 -0
- data/lib/coderay/scanners/xml.rb +17 -0
- data/lib/coderay/scanners/yaml.rb +144 -0
- data/lib/coderay/style.rb +20 -0
- data/lib/coderay/styles/_map.rb +7 -0
- data/lib/coderay/styles/cycnus.rb +151 -0
- data/lib/coderay/styles/murphy.rb +132 -0
- data/lib/coderay/token_classes.rb +86 -0
- data/lib/coderay/tokens.rb +391 -0
- data/lib/term/ansicolor.rb +220 -0
- metadata +123 -0
@@ -0,0 +1,238 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module CodeRay
|
3
|
+
module Scanners
|
4
|
+
|
5
|
+
module Ruby::Patterns # :nodoc:
|
6
|
+
|
7
|
+
RESERVED_WORDS = %w[
|
8
|
+
and def end in or unless begin
|
9
|
+
defined? ensure module redo super until
|
10
|
+
BEGIN break do next rescue then
|
11
|
+
when END case else for retry
|
12
|
+
while alias class elsif if not return
|
13
|
+
undef yield
|
14
|
+
]
|
15
|
+
|
16
|
+
DEF_KEYWORDS = %w[ def ]
|
17
|
+
UNDEF_KEYWORDS = %w[ undef ]
|
18
|
+
ALIAS_KEYWORDS = %w[ alias ]
|
19
|
+
MODULE_KEYWORDS = %w[class module]
|
20
|
+
DEF_NEW_STATE = WordList.new(:initial).
|
21
|
+
add(DEF_KEYWORDS, :def_expected).
|
22
|
+
add(UNDEF_KEYWORDS, :undef_expected).
|
23
|
+
add(ALIAS_KEYWORDS, :alias_expected).
|
24
|
+
add(MODULE_KEYWORDS, :module_expected)
|
25
|
+
|
26
|
+
PREDEFINED_CONSTANTS = %w[
|
27
|
+
nil true false self
|
28
|
+
DATA ARGV ARGF __FILE__ __LINE__
|
29
|
+
]
|
30
|
+
|
31
|
+
IDENT_KIND = WordList.new(:ident).
|
32
|
+
add(RESERVED_WORDS, :reserved).
|
33
|
+
add(PREDEFINED_CONSTANTS, :pre_constant)
|
34
|
+
|
35
|
+
IDENT = 'ä'[/[[:alpha:]]/] == 'ä' ? /[[:alpha:]_][[:alnum:]_]*/ : /[^\W\d]\w*/
|
36
|
+
|
37
|
+
METHOD_NAME = / #{IDENT} [?!]? /ox
|
38
|
+
METHOD_NAME_OPERATOR = /
|
39
|
+
\*\*? # multiplication and power
|
40
|
+
| [-+~]@? # plus, minus, tilde with and without at sign
|
41
|
+
| [\/%&|^`] # division, modulo or format strings, and, or, xor, system
|
42
|
+
| \[\]=? # array getter and setter
|
43
|
+
| << | >> # append or shift left, shift right
|
44
|
+
| <=?>? | >=? # comparison, rocket operator
|
45
|
+
| ===? | =~ # simple equality, case equality, match
|
46
|
+
| ![~=@]? # negation with and without at sign, not-equal and not-match
|
47
|
+
/ox
|
48
|
+
METHOD_NAME_EX = / #{IDENT} (?:[?!]|=(?!>))? | #{METHOD_NAME_OPERATOR} /ox
|
49
|
+
INSTANCE_VARIABLE = / @ #{IDENT} /ox
|
50
|
+
CLASS_VARIABLE = / @@ #{IDENT} /ox
|
51
|
+
OBJECT_VARIABLE = / @@? #{IDENT} /ox
|
52
|
+
GLOBAL_VARIABLE = / \$ (?: #{IDENT} | [1-9]\d* | 0\w* | [~&+`'=\/,;_.<>!@$?*":\\] | -[a-zA-Z_0-9] ) /ox
|
53
|
+
PREFIX_VARIABLE = / #{GLOBAL_VARIABLE} | #{OBJECT_VARIABLE} /ox
|
54
|
+
VARIABLE = / @?@? #{IDENT} | #{GLOBAL_VARIABLE} /ox
|
55
|
+
|
56
|
+
QUOTE_TO_TYPE = {
|
57
|
+
'`' => :shell,
|
58
|
+
'/'=> :regexp,
|
59
|
+
}
|
60
|
+
QUOTE_TO_TYPE.default = :string
|
61
|
+
|
62
|
+
REGEXP_MODIFIERS = /[mixounse]*/
|
63
|
+
REGEXP_SYMBOLS = /[|?*+(){}\[\].^$]/
|
64
|
+
|
65
|
+
DECIMAL = /\d+(?:_\d+)*/
|
66
|
+
OCTAL = /0_?[0-7]+(?:_[0-7]+)*/
|
67
|
+
HEXADECIMAL = /0x[0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*/
|
68
|
+
BINARY = /0b[01]+(?:_[01]+)*/
|
69
|
+
|
70
|
+
EXPONENT = / [eE] [+-]? #{DECIMAL} /ox
|
71
|
+
FLOAT_SUFFIX = / #{EXPONENT} | \. #{DECIMAL} #{EXPONENT}? /ox
|
72
|
+
FLOAT_OR_INT = / #{DECIMAL} (?: #{FLOAT_SUFFIX} () )? /ox
|
73
|
+
NUMERIC = / (?: (?=0) (?: #{OCTAL} | #{HEXADECIMAL} | #{BINARY} ) | #{FLOAT_OR_INT} ) /ox
|
74
|
+
|
75
|
+
SYMBOL = /
|
76
|
+
:
|
77
|
+
(?:
|
78
|
+
#{METHOD_NAME_EX}
|
79
|
+
| #{PREFIX_VARIABLE}
|
80
|
+
| ['"]
|
81
|
+
)
|
82
|
+
/ox
|
83
|
+
METHOD_NAME_OR_SYMBOL = / #{METHOD_NAME_EX} | #{SYMBOL} /ox
|
84
|
+
|
85
|
+
SIMPLE_ESCAPE = /
|
86
|
+
[abefnrstv]
|
87
|
+
| [0-7]{1,3}
|
88
|
+
| x[0-9A-Fa-f]{1,2}
|
89
|
+
| .?
|
90
|
+
/mx
|
91
|
+
|
92
|
+
CONTROL_META_ESCAPE = /
|
93
|
+
(?: M-|C-|c )
|
94
|
+
(?: \\ (?: M-|C-|c ) )*
|
95
|
+
(?: [^\\] | \\ #{SIMPLE_ESCAPE} )?
|
96
|
+
/mox
|
97
|
+
|
98
|
+
ESCAPE = /
|
99
|
+
#{CONTROL_META_ESCAPE} | #{SIMPLE_ESCAPE}
|
100
|
+
/mox
|
101
|
+
|
102
|
+
CHARACTER = /
|
103
|
+
\?
|
104
|
+
(?:
|
105
|
+
[^\s\\]
|
106
|
+
| \\ #{ESCAPE}
|
107
|
+
)
|
108
|
+
/mox
|
109
|
+
|
110
|
+
# NOTE: This is not completely correct, but
|
111
|
+
# nobody needs heredoc delimiters ending with \n.
|
112
|
+
HEREDOC_OPEN = /
|
113
|
+
<< (-)? # $1 = float
|
114
|
+
(?:
|
115
|
+
( [A-Za-z_0-9]+ ) # $2 = delim
|
116
|
+
|
|
117
|
+
( ["'`\/] ) # $3 = quote, type
|
118
|
+
( [^\n]*? ) \3 # $4 = delim
|
119
|
+
)
|
120
|
+
/mx
|
121
|
+
|
122
|
+
RUBYDOC = /
|
123
|
+
=begin (?!\S)
|
124
|
+
.*?
|
125
|
+
(?: \Z | ^=end (?!\S) [^\n]* )
|
126
|
+
/mx
|
127
|
+
|
128
|
+
DATA = /
|
129
|
+
__END__$
|
130
|
+
.*?
|
131
|
+
(?: \Z | (?=^\#CODE) )
|
132
|
+
/mx
|
133
|
+
|
134
|
+
# Checks for a valid value to follow. This enables
|
135
|
+
# value_expected in method calls without parentheses.
|
136
|
+
VALUE_FOLLOWS = /
|
137
|
+
(?>[ \t\f\v]+)
|
138
|
+
(?:
|
139
|
+
[%\/][^\s=]
|
140
|
+
| <<-?\S
|
141
|
+
| [-+] \d
|
142
|
+
| #{CHARACTER}
|
143
|
+
)
|
144
|
+
/x
|
145
|
+
KEYWORDS_EXPECTING_VALUE = WordList.new.add(%w[
|
146
|
+
and end in or unless begin
|
147
|
+
defined? ensure redo super until
|
148
|
+
break do next rescue then
|
149
|
+
when case else for retry
|
150
|
+
while elsif if not return
|
151
|
+
yield
|
152
|
+
])
|
153
|
+
|
154
|
+
RUBYDOC_OR_DATA = / #{RUBYDOC} | #{DATA} /xo
|
155
|
+
|
156
|
+
RDOC_DATA_START = / ^=begin (?!\S) | ^__END__$ /x
|
157
|
+
|
158
|
+
FANCY_START_CORRECT = / % ( [qQwWxsr] | (?![a-zA-Z0-9]) ) ([^a-zA-Z0-9]) /mx
|
159
|
+
|
160
|
+
FancyStringType = {
|
161
|
+
'q' => [:string, false],
|
162
|
+
'Q' => [:string, true],
|
163
|
+
'r' => [:regexp, true],
|
164
|
+
's' => [:symbol, false],
|
165
|
+
'x' => [:shell, true]
|
166
|
+
}
|
167
|
+
FancyStringType['w'] = FancyStringType['q']
|
168
|
+
FancyStringType['W'] = FancyStringType[''] = FancyStringType['Q']
|
169
|
+
|
170
|
+
class StringState < Struct.new :type, :interpreted, :delim, :heredoc,
|
171
|
+
:paren, :paren_depth, :pattern, :next_state
|
172
|
+
|
173
|
+
CLOSING_PAREN = Hash[ *%w[
|
174
|
+
( )
|
175
|
+
[ ]
|
176
|
+
< >
|
177
|
+
{ }
|
178
|
+
] ]
|
179
|
+
|
180
|
+
CLOSING_PAREN.each { |k,v| k.freeze; v.freeze } # debug, if I try to change it with <<
|
181
|
+
OPENING_PAREN = CLOSING_PAREN.invert
|
182
|
+
|
183
|
+
STRING_PATTERN = Hash.new do |h, k|
|
184
|
+
delim, interpreted = *k
|
185
|
+
delim_pattern = Regexp.escape(delim.dup) # dup: workaround for old Ruby
|
186
|
+
if closing_paren = CLOSING_PAREN[delim]
|
187
|
+
delim_pattern = delim_pattern[0..-1] if defined? JRUBY_VERSION # JRuby fix
|
188
|
+
delim_pattern << Regexp.escape(closing_paren)
|
189
|
+
end
|
190
|
+
delim_pattern << '\\\\' unless delim == '\\'
|
191
|
+
|
192
|
+
special_escapes =
|
193
|
+
case interpreted
|
194
|
+
when :regexp_symbols
|
195
|
+
'| ' + REGEXP_SYMBOLS.source
|
196
|
+
when :words
|
197
|
+
'| \s'
|
198
|
+
end
|
199
|
+
|
200
|
+
h[k] =
|
201
|
+
if interpreted and not delim == '#'
|
202
|
+
/ (?= [#{delim_pattern}] | \# [{$@] #{special_escapes} ) /mx
|
203
|
+
else
|
204
|
+
/ (?= [#{delim_pattern}] #{special_escapes} ) /mx
|
205
|
+
end
|
206
|
+
end
|
207
|
+
|
208
|
+
HEREDOC_PATTERN = Hash.new do |h, k|
|
209
|
+
delim, interpreted, indented = *k
|
210
|
+
delim_pattern = Regexp.escape(delim.dup) # dup: workaround for old Ruby
|
211
|
+
delim_pattern = / \n #{ '(?>[\ \t]*)' if indented } #{ Regexp.new delim_pattern } $ /x
|
212
|
+
h[k] =
|
213
|
+
if interpreted
|
214
|
+
/ (?= #{delim_pattern}() | \\ | \# [{$@] ) /mx # $1 set == end of heredoc
|
215
|
+
else
|
216
|
+
/ (?= #{delim_pattern}() | \\ ) /mx
|
217
|
+
end
|
218
|
+
end
|
219
|
+
|
220
|
+
def initialize kind, interpreted, delim, heredoc = false
|
221
|
+
if heredoc
|
222
|
+
pattern = HEREDOC_PATTERN[ [delim, interpreted, heredoc == :indented] ]
|
223
|
+
delim = nil
|
224
|
+
else
|
225
|
+
pattern = STRING_PATTERN[ [delim, interpreted] ]
|
226
|
+
if paren = CLOSING_PAREN[delim]
|
227
|
+
delim, paren = paren, delim
|
228
|
+
paren_depth = 1
|
229
|
+
end
|
230
|
+
end
|
231
|
+
super kind, interpreted, delim, heredoc, paren, paren_depth, pattern, :initial
|
232
|
+
end
|
233
|
+
end unless defined? StringState
|
234
|
+
|
235
|
+
end
|
236
|
+
|
237
|
+
end
|
238
|
+
end
|
@@ -0,0 +1,145 @@
|
|
1
|
+
module CodeRay
|
2
|
+
module Scanners
|
3
|
+
|
4
|
+
# Scheme scanner for CodeRay (by closure).
|
5
|
+
# Thanks to murphy for putting CodeRay into public.
|
6
|
+
class Scheme < Scanner
|
7
|
+
|
8
|
+
# TODO: function defs
|
9
|
+
# TODO: built-in functions
|
10
|
+
|
11
|
+
register_for :scheme
|
12
|
+
file_extension 'scm'
|
13
|
+
|
14
|
+
CORE_FORMS = %w[
|
15
|
+
lambda let let* letrec syntax-case define-syntax let-syntax
|
16
|
+
letrec-syntax begin define quote if or and cond case do delay
|
17
|
+
quasiquote set! cons force call-with-current-continuation call/cc
|
18
|
+
]
|
19
|
+
|
20
|
+
IDENT_KIND = CaseIgnoringWordList.new(:ident).
|
21
|
+
add(CORE_FORMS, :reserved)
|
22
|
+
|
23
|
+
#IDENTIFIER_INITIAL = /[a-z!@\$%&\*\/\:<=>\?~_\^]/i
|
24
|
+
#IDENTIFIER_SUBSEQUENT = /#{IDENTIFIER_INITIAL}|\d|\.|\+|-/
|
25
|
+
#IDENTIFIER = /#{IDENTIFIER_INITIAL}#{IDENTIFIER_SUBSEQUENT}*|\+|-|\.{3}/
|
26
|
+
IDENTIFIER = /[a-zA-Z!@$%&*\/:<=>?~_^][\w!@$%&*\/:<=>?~^.+\-]*|[+-]|\.\.\./
|
27
|
+
DIGIT = /\d/
|
28
|
+
DIGIT10 = DIGIT
|
29
|
+
DIGIT16 = /[0-9a-f]/i
|
30
|
+
DIGIT8 = /[0-7]/
|
31
|
+
DIGIT2 = /[01]/
|
32
|
+
RADIX16 = /\#x/i
|
33
|
+
RADIX8 = /\#o/i
|
34
|
+
RADIX2 = /\#b/i
|
35
|
+
RADIX10 = /\#d/i
|
36
|
+
EXACTNESS = /#i|#e/i
|
37
|
+
SIGN = /[\+-]?/
|
38
|
+
EXP_MARK = /[esfdl]/i
|
39
|
+
EXP = /#{EXP_MARK}#{SIGN}#{DIGIT}+/
|
40
|
+
SUFFIX = /#{EXP}?/
|
41
|
+
PREFIX10 = /#{RADIX10}?#{EXACTNESS}?|#{EXACTNESS}?#{RADIX10}?/
|
42
|
+
PREFIX16 = /#{RADIX16}#{EXACTNESS}?|#{EXACTNESS}?#{RADIX16}/
|
43
|
+
PREFIX8 = /#{RADIX8}#{EXACTNESS}?|#{EXACTNESS}?#{RADIX8}/
|
44
|
+
PREFIX2 = /#{RADIX2}#{EXACTNESS}?|#{EXACTNESS}?#{RADIX2}/
|
45
|
+
UINT10 = /#{DIGIT10}+#*/
|
46
|
+
UINT16 = /#{DIGIT16}+#*/
|
47
|
+
UINT8 = /#{DIGIT8}+#*/
|
48
|
+
UINT2 = /#{DIGIT2}+#*/
|
49
|
+
DECIMAL = /#{DIGIT10}+#+\.#*#{SUFFIX}|#{DIGIT10}+\.#{DIGIT10}*#*#{SUFFIX}|\.#{DIGIT10}+#*#{SUFFIX}|#{UINT10}#{EXP}/
|
50
|
+
UREAL10 = /#{UINT10}\/#{UINT10}|#{DECIMAL}|#{UINT10}/
|
51
|
+
UREAL16 = /#{UINT16}\/#{UINT16}|#{UINT16}/
|
52
|
+
UREAL8 = /#{UINT8}\/#{UINT8}|#{UINT8}/
|
53
|
+
UREAL2 = /#{UINT2}\/#{UINT2}|#{UINT2}/
|
54
|
+
REAL10 = /#{SIGN}#{UREAL10}/
|
55
|
+
REAL16 = /#{SIGN}#{UREAL16}/
|
56
|
+
REAL8 = /#{SIGN}#{UREAL8}/
|
57
|
+
REAL2 = /#{SIGN}#{UREAL2}/
|
58
|
+
IMAG10 = /i|#{UREAL10}i/
|
59
|
+
IMAG16 = /i|#{UREAL16}i/
|
60
|
+
IMAG8 = /i|#{UREAL8}i/
|
61
|
+
IMAG2 = /i|#{UREAL2}i/
|
62
|
+
COMPLEX10 = /#{REAL10}@#{REAL10}|#{REAL10}\+#{IMAG10}|#{REAL10}-#{IMAG10}|\+#{IMAG10}|-#{IMAG10}|#{REAL10}/
|
63
|
+
COMPLEX16 = /#{REAL16}@#{REAL16}|#{REAL16}\+#{IMAG16}|#{REAL16}-#{IMAG16}|\+#{IMAG16}|-#{IMAG16}|#{REAL16}/
|
64
|
+
COMPLEX8 = /#{REAL8}@#{REAL8}|#{REAL8}\+#{IMAG8}|#{REAL8}-#{IMAG8}|\+#{IMAG8}|-#{IMAG8}|#{REAL8}/
|
65
|
+
COMPLEX2 = /#{REAL2}@#{REAL2}|#{REAL2}\+#{IMAG2}|#{REAL2}-#{IMAG2}|\+#{IMAG2}|-#{IMAG2}|#{REAL2}/
|
66
|
+
NUM10 = /#{PREFIX10}?#{COMPLEX10}/
|
67
|
+
NUM16 = /#{PREFIX16}#{COMPLEX16}/
|
68
|
+
NUM8 = /#{PREFIX8}#{COMPLEX8}/
|
69
|
+
NUM2 = /#{PREFIX2}#{COMPLEX2}/
|
70
|
+
NUM = /#{NUM10}|#{NUM16}|#{NUM8}|#{NUM2}/
|
71
|
+
|
72
|
+
private
|
73
|
+
def scan_tokens tokens,options
|
74
|
+
|
75
|
+
state = :initial
|
76
|
+
ident_kind = IDENT_KIND
|
77
|
+
|
78
|
+
until eos?
|
79
|
+
kind = match = nil
|
80
|
+
|
81
|
+
case state
|
82
|
+
when :initial
|
83
|
+
if scan(/ \s+ | \\\n /x)
|
84
|
+
kind = :space
|
85
|
+
elsif scan(/['\(\[\)\]]|#\(/)
|
86
|
+
kind = :operator_fat
|
87
|
+
elsif scan(/;.*/)
|
88
|
+
kind = :comment
|
89
|
+
elsif scan(/#\\(?:newline|space|.?)/)
|
90
|
+
kind = :char
|
91
|
+
elsif scan(/#[ft]/)
|
92
|
+
kind = :pre_constant
|
93
|
+
elsif scan(/#{IDENTIFIER}/o)
|
94
|
+
kind = ident_kind[matched]
|
95
|
+
elsif scan(/\./)
|
96
|
+
kind = :operator
|
97
|
+
elsif scan(/"/)
|
98
|
+
tokens << [:open, :string]
|
99
|
+
state = :string
|
100
|
+
tokens << ['"', :delimiter]
|
101
|
+
next
|
102
|
+
elsif scan(/#{NUM}/o) and not matched.empty?
|
103
|
+
kind = :integer
|
104
|
+
elsif getch
|
105
|
+
kind = :error
|
106
|
+
end
|
107
|
+
|
108
|
+
when :string
|
109
|
+
if scan(/[^"\\]+/) or scan(/\\.?/)
|
110
|
+
kind = :content
|
111
|
+
elsif scan(/"/)
|
112
|
+
tokens << ['"', :delimiter]
|
113
|
+
tokens << [:close, :string]
|
114
|
+
state = :initial
|
115
|
+
next
|
116
|
+
else
|
117
|
+
raise_inspect "else case \" reached; %p not handled." % peek(1),
|
118
|
+
tokens, state
|
119
|
+
end
|
120
|
+
|
121
|
+
else
|
122
|
+
raise "else case reached"
|
123
|
+
end
|
124
|
+
|
125
|
+
match ||= matched
|
126
|
+
if $DEBUG and not kind
|
127
|
+
raise_inspect 'Error token %p in line %d' %
|
128
|
+
[[match, kind], line], tokens
|
129
|
+
end
|
130
|
+
raise_inspect 'Empty token', tokens, state unless match
|
131
|
+
|
132
|
+
tokens << [match, kind]
|
133
|
+
|
134
|
+
end # until eos
|
135
|
+
|
136
|
+
if state == :string
|
137
|
+
tokens << [:close, :string]
|
138
|
+
end
|
139
|
+
|
140
|
+
tokens
|
141
|
+
|
142
|
+
end #scan_tokens
|
143
|
+
end #class
|
144
|
+
end #module scanners
|
145
|
+
end #module coderay
|
@@ -0,0 +1,162 @@
|
|
1
|
+
module CodeRay module Scanners
|
2
|
+
|
3
|
+
# by Josh Goebel
|
4
|
+
class SQL < Scanner
|
5
|
+
|
6
|
+
register_for :sql
|
7
|
+
|
8
|
+
RESERVED_WORDS = %w(
|
9
|
+
create database table index trigger drop primary key set select
|
10
|
+
insert update delete replace into
|
11
|
+
on from values before and or if exists case when
|
12
|
+
then else as group order by avg where
|
13
|
+
join inner outer union engine not
|
14
|
+
like end using collate show columns begin
|
15
|
+
)
|
16
|
+
|
17
|
+
PREDEFINED_TYPES = %w(
|
18
|
+
char varchar enum binary text tinytext mediumtext
|
19
|
+
longtext blob tinyblob mediumblob longblob timestamp
|
20
|
+
date time datetime year double decimal float int
|
21
|
+
integer tinyint mediumint bigint smallint unsigned bit
|
22
|
+
bool boolean hex bin oct
|
23
|
+
)
|
24
|
+
|
25
|
+
PREDEFINED_FUNCTIONS = %w( sum cast abs pi count min max avg )
|
26
|
+
|
27
|
+
DIRECTIVES = %w( auto_increment unique default charset )
|
28
|
+
|
29
|
+
PREDEFINED_CONSTANTS = %w( null true false )
|
30
|
+
|
31
|
+
IDENT_KIND = CaseIgnoringWordList.new(:ident).
|
32
|
+
add(RESERVED_WORDS, :reserved).
|
33
|
+
add(PREDEFINED_TYPES, :pre_type).
|
34
|
+
add(PREDEFINED_CONSTANTS, :pre_constant).
|
35
|
+
add(PREDEFINED_FUNCTIONS, :predefined).
|
36
|
+
add(DIRECTIVES, :directive)
|
37
|
+
|
38
|
+
ESCAPE = / [rbfntv\n\\\/'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} | . /mx
|
39
|
+
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x
|
40
|
+
|
41
|
+
STRING_PREFIXES = /[xnb]|_\w+/i
|
42
|
+
|
43
|
+
def scan_tokens tokens, options
|
44
|
+
|
45
|
+
state = :initial
|
46
|
+
string_type = nil
|
47
|
+
string_content = ''
|
48
|
+
|
49
|
+
until eos?
|
50
|
+
|
51
|
+
kind = nil
|
52
|
+
match = nil
|
53
|
+
|
54
|
+
if state == :initial
|
55
|
+
|
56
|
+
if scan(/ \s+ | \\\n /x)
|
57
|
+
kind = :space
|
58
|
+
|
59
|
+
elsif scan(/^(?:--\s?|#).*/)
|
60
|
+
kind = :comment
|
61
|
+
|
62
|
+
elsif scan(%r! /\* (?: .*? \*/ | .* ) !mx)
|
63
|
+
kind = :comment
|
64
|
+
|
65
|
+
elsif scan(/ [-+*\/=<>;,!&^|()\[\]{}~%] | \.(?!\d) /x)
|
66
|
+
kind = :operator
|
67
|
+
|
68
|
+
elsif scan(/(#{STRING_PREFIXES})?([`"'])/o)
|
69
|
+
prefix = self[1]
|
70
|
+
string_type = self[2]
|
71
|
+
tokens << [:open, :string]
|
72
|
+
tokens << [prefix, :modifier] if prefix
|
73
|
+
match = string_type
|
74
|
+
state = :string
|
75
|
+
kind = :delimiter
|
76
|
+
|
77
|
+
elsif match = scan(/ @? [A-Za-z_][A-Za-z_0-9]* /x)
|
78
|
+
kind = match[0] == ?@ ? :variable : IDENT_KIND[match.downcase]
|
79
|
+
|
80
|
+
elsif scan(/0[xX][0-9A-Fa-f]+/)
|
81
|
+
kind = :hex
|
82
|
+
|
83
|
+
elsif scan(/0[0-7]+(?![89.eEfF])/)
|
84
|
+
kind = :oct
|
85
|
+
|
86
|
+
elsif scan(/(?>\d+)(?![.eEfF])/)
|
87
|
+
kind = :integer
|
88
|
+
|
89
|
+
elsif scan(/\d[fF]|\d*\.\d+(?:[eE][+-]?\d+)?|\d+[eE][+-]?\d+/)
|
90
|
+
kind = :float
|
91
|
+
|
92
|
+
else
|
93
|
+
getch
|
94
|
+
kind = :error
|
95
|
+
|
96
|
+
end
|
97
|
+
|
98
|
+
elsif state == :string
|
99
|
+
if match = scan(/[^\\"'`]+/)
|
100
|
+
string_content << match
|
101
|
+
next
|
102
|
+
elsif match = scan(/["'`]/)
|
103
|
+
if string_type == match
|
104
|
+
if peek(1) == string_type # doubling means escape
|
105
|
+
string_content << string_type << getch
|
106
|
+
next
|
107
|
+
end
|
108
|
+
unless string_content.empty?
|
109
|
+
tokens << [string_content, :content]
|
110
|
+
string_content = ''
|
111
|
+
end
|
112
|
+
tokens << [matched, :delimiter]
|
113
|
+
tokens << [:close, :string]
|
114
|
+
state = :initial
|
115
|
+
string_type = nil
|
116
|
+
next
|
117
|
+
else
|
118
|
+
string_content << match
|
119
|
+
end
|
120
|
+
next
|
121
|
+
elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
|
122
|
+
unless string_content.empty?
|
123
|
+
tokens << [string_content, :content]
|
124
|
+
string_content = ''
|
125
|
+
end
|
126
|
+
kind = :char
|
127
|
+
elsif match = scan(/ \\ . /mox)
|
128
|
+
string_content << match
|
129
|
+
next
|
130
|
+
elsif scan(/ \\ | $ /x)
|
131
|
+
unless string_content.empty?
|
132
|
+
tokens << [string_content, :content]
|
133
|
+
string_content = ''
|
134
|
+
end
|
135
|
+
kind = :error
|
136
|
+
state = :initial
|
137
|
+
else
|
138
|
+
raise "else case \" reached; %p not handled." % peek(1), tokens
|
139
|
+
end
|
140
|
+
|
141
|
+
else
|
142
|
+
raise 'else-case reached', tokens
|
143
|
+
|
144
|
+
end
|
145
|
+
|
146
|
+
match ||= matched
|
147
|
+
unless kind
|
148
|
+
raise_inspect 'Error token %p in line %d' %
|
149
|
+
[[match, kind], line], tokens, state
|
150
|
+
end
|
151
|
+
raise_inspect 'Empty token', tokens unless match
|
152
|
+
|
153
|
+
tokens << [match, kind]
|
154
|
+
|
155
|
+
end
|
156
|
+
tokens
|
157
|
+
|
158
|
+
end
|
159
|
+
|
160
|
+
end
|
161
|
+
|
162
|
+
end end
|