coderay 0.4.3.48
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +340 -0
- data/README +103 -0
- data/demo/demo_count.rb +10 -0
- data/demo/demo_css.rb +4 -0
- data/demo/demo_div.rb +19 -0
- data/demo/demo_dump.rb +15 -0
- data/demo/demo_encoder.rb +39 -0
- data/demo/demo_global_vars.rb +13 -0
- data/demo/demo_global_vars2.rb +28 -0
- data/demo/demo_html.rb +394 -0
- data/demo/demo_html2.rb +11 -0
- data/demo/demo_load_encoder.rb +17 -0
- data/demo/demo_more.rb +204 -0
- data/demo/demo_scanner.rb +36 -0
- data/demo/demo_server.rb +92 -0
- data/demo/demo_simple.rb +10 -0
- data/demo/demo_stream.rb +25 -0
- data/demo/demo_stream2.rb +8 -0
- data/demo/demo_tokens.rb +3 -0
- data/lib/coderay.rb +284 -0
- data/lib/coderay/encoder.rb +151 -0
- data/lib/coderay/encoders/count.rb +21 -0
- data/lib/coderay/encoders/div.rb +16 -0
- data/lib/coderay/encoders/helpers/html_css.rb +155 -0
- data/lib/coderay/encoders/helpers/html_helper.rb +68 -0
- data/lib/coderay/encoders/helpers/html_output.rb +237 -0
- data/lib/coderay/encoders/html.rb +169 -0
- data/lib/coderay/encoders/null.rb +20 -0
- data/lib/coderay/encoders/span.rb +16 -0
- data/lib/coderay/encoders/statistic.rb +74 -0
- data/lib/coderay/encoders/text.rb +33 -0
- data/lib/coderay/encoders/tokens.rb +44 -0
- data/lib/coderay/encoders/yaml.rb +19 -0
- data/lib/coderay/helpers/filetype.rb +145 -0
- data/lib/coderay/helpers/gzip_simple.rb +123 -0
- data/lib/coderay/helpers/plugin.rb +286 -0
- data/lib/coderay/helpers/scanner_helper.rb +63 -0
- data/lib/coderay/scanner.rb +197 -0
- data/lib/coderay/scanners/c.rb +147 -0
- data/lib/coderay/scanners/delphi.rb +123 -0
- data/lib/coderay/scanners/helpers/ruby_helper.rb +212 -0
- data/lib/coderay/scanners/plaintext.rb +13 -0
- data/lib/coderay/scanners/ruby.rb +337 -0
- data/lib/coderay/tokens.rb +324 -0
- metadata +89 -0
@@ -0,0 +1,197 @@
|
|
1
|
+
module CodeRay
|
2
|
+
|
3
|
+
require 'coderay/helpers/plugin'
|
4
|
+
|
5
|
+
# = Scanners
|
6
|
+
#
|
7
|
+
# $Id: scanner.rb 39 2005-09-29 04:35:37Z murphy $
|
8
|
+
#
|
9
|
+
# This module holds the Scanner class and its subclasses.
|
10
|
+
# For example, the Ruby scanner is named CodeRay::Scanners::Ruby
|
11
|
+
# can be found in coderay/scanners/ruby.
|
12
|
+
#
|
13
|
+
# Scanner also provides methods and constants for the register
|
14
|
+
# mechanism and the [] method that returns the Scanner class
|
15
|
+
# belonging to the given lang.
|
16
|
+
#
|
17
|
+
# See PluginHost.
|
18
|
+
module Scanners
|
19
|
+
extend PluginHost
|
20
|
+
plugin_path 'coderay/scanners'
|
21
|
+
|
22
|
+
require 'strscan'
|
23
|
+
|
24
|
+
# = Scanner
|
25
|
+
#
|
26
|
+
# The base class for all Scanners.
|
27
|
+
#
|
28
|
+
# It is a subclass of Ruby's great +StringScanner+, which
|
29
|
+
# makes it easy to access the scanning methods inside.
|
30
|
+
#
|
31
|
+
# It is also +Enumerable+, so you can use it like an Array of
|
32
|
+
# Tokens:
|
33
|
+
#
|
34
|
+
# require 'coderay'
|
35
|
+
#
|
36
|
+
# c_scanner = CodeRay::Scanners[:c].new "if (*p == '{') nest++;"
|
37
|
+
#
|
38
|
+
# for text, kind in c_scanner
|
39
|
+
# puts text if kind == :operator
|
40
|
+
# end
|
41
|
+
#
|
42
|
+
# # prints: (*==)++;
|
43
|
+
#
|
44
|
+
# OK, this is a very simple example :)
|
45
|
+
# You can also use +map+, +any?+, +find+ and even +sort_by+,
|
46
|
+
# if you want.
|
47
|
+
class Scanner < StringScanner
|
48
|
+
extend Plugin
|
49
|
+
plugin_host Scanners
|
50
|
+
|
51
|
+
# Raised if a Scanner fails while scanning
|
52
|
+
ScanError = Class.new(Exception)
|
53
|
+
|
54
|
+
require 'coderay/helpers/scanner_helper'
|
55
|
+
|
56
|
+
# The default options for all scanner classes.
|
57
|
+
#
|
58
|
+
# Define @default_options for subclasses.
|
59
|
+
DEFAULT_OPTIONS = { :stream => false }
|
60
|
+
|
61
|
+
class << self
|
62
|
+
|
63
|
+
# Returns if the Scanner can be used in streaming mode.
|
64
|
+
def streamable?
|
65
|
+
is_a? Streamable
|
66
|
+
end
|
67
|
+
|
68
|
+
end
|
69
|
+
|
70
|
+
=begin
|
71
|
+
## Excluded for speed reasons; protected seems to make methods slow.
|
72
|
+
|
73
|
+
# Save the StringScanner methods from being called.
|
74
|
+
# This would not be useful for highlighting.
|
75
|
+
strscan_public_methods =
|
76
|
+
StringScanner.instance_methods -
|
77
|
+
StringScanner.ancestors[1].instance_methods
|
78
|
+
protected(*strscan_public_methods)
|
79
|
+
=end
|
80
|
+
|
81
|
+
# Create a new Scanner.
|
82
|
+
#
|
83
|
+
# * +code+ is the input String and is handled by the superclass
|
84
|
+
# StringScanner.
|
85
|
+
# * +options+ is a Hash with Symbols as keys.
|
86
|
+
# It is merged with the default options of the class (you can
|
87
|
+
# overwrite default options here.)
|
88
|
+
# * +block+ is the callback for streamed highlighting.
|
89
|
+
#
|
90
|
+
# If you set :stream to +true+ in the options, the Scanner uses a
|
91
|
+
# TokenStream with the +block+ as callback to handle the tokens.
|
92
|
+
#
|
93
|
+
# Else, a Tokens object is used.
|
94
|
+
def initialize code, options = {}, &block
|
95
|
+
@options = self.class::DEFAULT_OPTIONS.merge options
|
96
|
+
raise "I am only the basic Scanner class. I can't scan "\
|
97
|
+
"anything. :( Use my subclasses." if self.class == Scanner
|
98
|
+
|
99
|
+
# I love this hack. It seems to silence
|
100
|
+
# all dos/unix/mac newline problems.
|
101
|
+
super code.gsub(/\r\n?/, "\n")
|
102
|
+
|
103
|
+
if @options[:stream]
|
104
|
+
warn "warning in CodeRay::Scanner.new: :stream is set, "\
|
105
|
+
"but no block was given" unless block_given?
|
106
|
+
raise NotStreamableError, self unless kind_of? Streamable
|
107
|
+
@tokens = TokenStream.new(&block)
|
108
|
+
else
|
109
|
+
warn "warning in CodeRay::Scanner.new: Block given, "\
|
110
|
+
"but :stream is #{@options[:stream]}" if block_given?
|
111
|
+
@tokens = Tokens.new
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
# More mnemonic accessor name for the input string.
|
116
|
+
alias code string
|
117
|
+
|
118
|
+
# Scans the code and returns all tokens in a Tokens object.
|
119
|
+
def tokenize options = {}
|
120
|
+
options = @options.merge({}) #options
|
121
|
+
if @options[:stream] # :stream must have been set already
|
122
|
+
reset ## what is this for?
|
123
|
+
scan_tokens @tokens, options
|
124
|
+
@tokens
|
125
|
+
else
|
126
|
+
@cached_tokens ||= scan_tokens @tokens, options
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
# You can also see tokenize as a read-only attribute
|
131
|
+
alias tokens tokenize
|
132
|
+
|
133
|
+
# Traverses the tokens.
|
134
|
+
def each &block
|
135
|
+
raise ArgumentError,
|
136
|
+
'Cannot traverse TokenStream.' if @options[:stream]
|
137
|
+
tokens.each(&block)
|
138
|
+
end
|
139
|
+
include Enumerable
|
140
|
+
|
141
|
+
# The current line position of the scanner.
|
142
|
+
#
|
143
|
+
# Beware, this is implemented inefficiently. It should be used
|
144
|
+
# for debugging only.
|
145
|
+
def line
|
146
|
+
string[0..pos].count("\n") + 1
|
147
|
+
end
|
148
|
+
|
149
|
+
protected
|
150
|
+
|
151
|
+
# This is the central method, and commonly the only one a
|
152
|
+
# subclass implements.
|
153
|
+
#
|
154
|
+
# Subclasses must implement this method; it must return +tokens+
|
155
|
+
# and must only use Tokens#<< for storing scanned tokens!
|
156
|
+
def scan_tokens tokens, options
|
157
|
+
raise NotImplementedError,
|
158
|
+
"#{self.class}#scan_tokens not implemented."
|
159
|
+
end
|
160
|
+
|
161
|
+
# Scanner error with additional status information
|
162
|
+
def raise_inspect msg, tokens, ambit = 30
|
163
|
+
raise ScanError, <<-EOE % [
|
164
|
+
|
165
|
+
|
166
|
+
***ERROR in %s: %s
|
167
|
+
|
168
|
+
tokens:
|
169
|
+
%s
|
170
|
+
|
171
|
+
current line: %d pos = %d
|
172
|
+
matched: %p
|
173
|
+
bol? = %p, eos? = %p
|
174
|
+
|
175
|
+
surrounding code:
|
176
|
+
%p ~~ %p
|
177
|
+
|
178
|
+
|
179
|
+
***ERROR***
|
180
|
+
|
181
|
+
EOE
|
182
|
+
File.basename(caller[0]),
|
183
|
+
msg,
|
184
|
+
tokens.last(10).map { |t| t.inspect }.join("\n"),
|
185
|
+
line, pos,
|
186
|
+
matched, bol?, eos?,
|
187
|
+
string[pos-ambit,ambit],
|
188
|
+
string[pos,ambit],
|
189
|
+
]
|
190
|
+
end
|
191
|
+
|
192
|
+
end
|
193
|
+
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
197
|
+
# vim:sw=2:ts=2:noet:tw=78
|
@@ -0,0 +1,147 @@
|
|
1
|
+
module CodeRay module Scanners
|
2
|
+
|
3
|
+
class C < Scanner
|
4
|
+
|
5
|
+
register_for :c
|
6
|
+
|
7
|
+
RESERVED_WORDS = [
|
8
|
+
'asm', 'break', 'case', 'continue', 'default', 'do', 'else',
|
9
|
+
'for', 'goto', 'if', 'return', 'switch', 'while',
|
10
|
+
'struct', 'union', 'enum', 'typedef',
|
11
|
+
'static', 'register', 'auto', 'extern',
|
12
|
+
'sizeof',
|
13
|
+
'volatile', 'const', # C89
|
14
|
+
'inline', 'restrict', # C99
|
15
|
+
]
|
16
|
+
|
17
|
+
PREDEFINED_TYPES = [
|
18
|
+
'int', 'long', 'short', 'char', 'void',
|
19
|
+
'signed', 'unsigned', 'float', 'double',
|
20
|
+
'bool', 'complex', # C99
|
21
|
+
]
|
22
|
+
|
23
|
+
PREDEFINED_CONSTANTS = [
|
24
|
+
'EOF', 'NULL',
|
25
|
+
'true', 'false', # C99
|
26
|
+
]
|
27
|
+
|
28
|
+
IDENT_KIND = Scanner::WordList.new(:ident).
|
29
|
+
add(RESERVED_WORDS, :reserved).
|
30
|
+
add(PREDEFINED_TYPES, :pre_type).
|
31
|
+
add(PREDEFINED_CONSTANTS, :pre_constant)
|
32
|
+
|
33
|
+
ESCAPE = / [rbfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
|
34
|
+
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x
|
35
|
+
|
36
|
+
def scan_tokens tokens, options
|
37
|
+
|
38
|
+
state = :initial
|
39
|
+
|
40
|
+
until eos?
|
41
|
+
|
42
|
+
kind = :error
|
43
|
+
match = nil
|
44
|
+
|
45
|
+
if state == :initial
|
46
|
+
|
47
|
+
if scan(/ \s+ | \\\n /x)
|
48
|
+
kind = :space
|
49
|
+
|
50
|
+
elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
|
51
|
+
kind = :comment
|
52
|
+
|
53
|
+
elsif match = scan(/ \# \s* if \s* 0 /x)
|
54
|
+
match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos?
|
55
|
+
kind = :comment
|
56
|
+
|
57
|
+
elsif scan(/ [-+*\/=<>?:;,!&^|()\[\]{}~%]+ | \.(?!\d) /x)
|
58
|
+
kind = :operator
|
59
|
+
|
60
|
+
elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
|
61
|
+
kind = IDENT_KIND[match]
|
62
|
+
if kind == :ident and check(/:(?!:)/)
|
63
|
+
match << scan(/:/)
|
64
|
+
kind = :label
|
65
|
+
end
|
66
|
+
|
67
|
+
elsif match = scan(/L?"/)
|
68
|
+
tokens << [:open, :string]
|
69
|
+
if match[0] == ?L
|
70
|
+
tokens << ['L', :modifier]
|
71
|
+
match = '"'
|
72
|
+
end
|
73
|
+
state = :string
|
74
|
+
kind = :delimiter
|
75
|
+
|
76
|
+
elsif scan(/#\s*(\w*)/)
|
77
|
+
kind = :preprocessor # FIXME multiline preprocs
|
78
|
+
state = :include_expected if self[1] == 'include'
|
79
|
+
|
80
|
+
elsif scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox)
|
81
|
+
kind = :char
|
82
|
+
|
83
|
+
elsif scan(/0[xX][0-9A-Fa-f]+/)
|
84
|
+
kind = :hex
|
85
|
+
|
86
|
+
elsif scan(/(?:0[0-7]+)(?![89.eEfF])/)
|
87
|
+
kind = :oct
|
88
|
+
|
89
|
+
elsif scan(/(?:\d+)(?![.eEfF])/)
|
90
|
+
kind = :integer
|
91
|
+
|
92
|
+
elsif scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
|
93
|
+
kind = :float
|
94
|
+
|
95
|
+
else
|
96
|
+
getch
|
97
|
+
end
|
98
|
+
|
99
|
+
elsif state == :string
|
100
|
+
if scan(/[^\\"]+/)
|
101
|
+
kind = :content
|
102
|
+
elsif scan(/"/)
|
103
|
+
tokens << ['"', :delimiter]
|
104
|
+
tokens << [:close, :string]
|
105
|
+
state = :initial
|
106
|
+
next
|
107
|
+
elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
|
108
|
+
kind = :char
|
109
|
+
elsif scan(/ \\ | $ /x)
|
110
|
+
kind = :error
|
111
|
+
state = :initial
|
112
|
+
else
|
113
|
+
raise "else case \" reached; %p not handled." % peek(1), tokens
|
114
|
+
end
|
115
|
+
|
116
|
+
elsif state == :include_expected
|
117
|
+
if scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/)
|
118
|
+
kind = :include
|
119
|
+
state = :initial
|
120
|
+
|
121
|
+
elsif match = scan(/\s+/)
|
122
|
+
kind = :space
|
123
|
+
state = :initial if match.index ?\n
|
124
|
+
|
125
|
+
else
|
126
|
+
getch
|
127
|
+
|
128
|
+
end
|
129
|
+
|
130
|
+
else
|
131
|
+
raise 'else-case reached', tokens
|
132
|
+
|
133
|
+
end
|
134
|
+
|
135
|
+
match ||= matched
|
136
|
+
raise [match, kind], tokens if kind == :error
|
137
|
+
|
138
|
+
tokens << [match, kind]
|
139
|
+
|
140
|
+
end
|
141
|
+
|
142
|
+
tokens
|
143
|
+
end
|
144
|
+
|
145
|
+
end
|
146
|
+
|
147
|
+
end end
|
@@ -0,0 +1,123 @@
|
|
1
|
+
module CodeRay module Scanners
|
2
|
+
|
3
|
+
class Delphi < Scanner
|
4
|
+
|
5
|
+
register_for :delphi
|
6
|
+
|
7
|
+
RESERVED_WORDS = [
|
8
|
+
'and', 'array', 'as', 'at', 'asm', 'at', 'begin', 'case', 'class',
|
9
|
+
'const', 'constructor', 'destructor', 'dispinterface', 'div', 'do',
|
10
|
+
'downto', 'else', 'end', 'except', 'exports', 'file', 'finalization',
|
11
|
+
'finally', 'for', 'function', 'goto', 'if', 'implementation', 'in',
|
12
|
+
'inherited', 'initialization', 'inline', 'interface', 'is', 'label',
|
13
|
+
'library', 'mod', 'nil', 'not', 'object', 'of', 'or', 'out', 'packed',
|
14
|
+
'procedure', 'program', 'property', 'raise', 'record', 'repeat',
|
15
|
+
'resourcestring', 'set', 'shl', 'shr', 'string', 'then', 'threadvar',
|
16
|
+
'to', 'try', 'type', 'unit', 'until', 'uses', 'var', 'while', 'with',
|
17
|
+
'xor', 'on'
|
18
|
+
]
|
19
|
+
|
20
|
+
DIRECTIVES = [
|
21
|
+
'absolute', 'abstract', 'assembler', 'at', 'automated', 'cdecl',
|
22
|
+
'contains', 'deprecated', 'dispid', 'dynamic', 'export',
|
23
|
+
'external', 'far', 'forward', 'implements', 'local',
|
24
|
+
'near', 'nodefault', 'on', 'overload', 'override',
|
25
|
+
'package', 'pascal', 'platform', 'private', 'protected', 'public',
|
26
|
+
'published', 'read', 'readonly', 'register', 'reintroduce',
|
27
|
+
'requires', 'resident', 'safecall', 'stdcall', 'stored', 'varargs',
|
28
|
+
'virtual', 'write', 'writeonly'
|
29
|
+
]
|
30
|
+
|
31
|
+
IDENT_KIND = Scanner::WordList.new(:ident, :case_ignore).
|
32
|
+
add(RESERVED_WORDS, :reserved).
|
33
|
+
add(DIRECTIVES, :directive)
|
34
|
+
|
35
|
+
def scan_tokens tokens, options
|
36
|
+
|
37
|
+
state = :initial
|
38
|
+
|
39
|
+
until eos?
|
40
|
+
|
41
|
+
kind = :error
|
42
|
+
match = nil
|
43
|
+
|
44
|
+
if state == :initial
|
45
|
+
|
46
|
+
if scan(/ \s+ /x)
|
47
|
+
kind = :space
|
48
|
+
|
49
|
+
elsif scan(%r! \{ \$ [^}]* \}? | \(\* \$ (?: .*? \*\) | .* ) !mx)
|
50
|
+
kind = :preprocessor
|
51
|
+
|
52
|
+
elsif scan(%r! // [^\n]* | \{ [^}]* \}? | \(\* (?: .*? \*\) | .* ) !mx)
|
53
|
+
kind = :comment
|
54
|
+
|
55
|
+
elsif scan(/ [-+*\/=<>:;,.@\^|\(\)\[\]]+ /x)
|
56
|
+
kind = :operator
|
57
|
+
|
58
|
+
elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
|
59
|
+
kind = IDENT_KIND[match]
|
60
|
+
|
61
|
+
elsif match = scan(/ ' ( [^\n']|'' ) (?:'|$) /x)
|
62
|
+
tokens << [:open, :char]
|
63
|
+
tokens << ["'", :delimiter]
|
64
|
+
tokens << [self[1], :content]
|
65
|
+
tokens << ["'", :delimiter]
|
66
|
+
tokens << [:close, :char]
|
67
|
+
next
|
68
|
+
|
69
|
+
elsif match = scan(/ ' /x)
|
70
|
+
tokens << [:open, :string]
|
71
|
+
state = :string
|
72
|
+
kind = :delimiter
|
73
|
+
|
74
|
+
elsif scan(/ \# (?: \d+ | \$[0-9A-Fa-f]+ ) /x)
|
75
|
+
kind = :char
|
76
|
+
|
77
|
+
elsif scan(/ \$ [0-9A-Fa-f]+ /x)
|
78
|
+
kind = :hex
|
79
|
+
|
80
|
+
elsif scan(/ (?: \d+ ) (?![eE]|\.[^.]) /x)
|
81
|
+
kind = :integer
|
82
|
+
|
83
|
+
elsif scan(/ \d+ (?: \.\d+ (?: [eE][+-]? \d+ )? | [eE][+-]? \d+ ) /x)
|
84
|
+
kind = :float
|
85
|
+
|
86
|
+
else
|
87
|
+
getch
|
88
|
+
end
|
89
|
+
|
90
|
+
elsif state == :string
|
91
|
+
if scan(/[^\n']+/)
|
92
|
+
kind = :content
|
93
|
+
elsif scan(/''/)
|
94
|
+
kind = :char
|
95
|
+
elsif scan(/'/)
|
96
|
+
tokens << ["'", :delimiter]
|
97
|
+
tokens << [:close, :string]
|
98
|
+
state = :initial
|
99
|
+
next
|
100
|
+
elsif scan(/\n/)
|
101
|
+
state = :initial
|
102
|
+
else
|
103
|
+
raise "else case \' reached; %p not handled." % peek(1), tokens
|
104
|
+
end
|
105
|
+
|
106
|
+
else
|
107
|
+
raise 'else-case reached', tokens
|
108
|
+
|
109
|
+
end
|
110
|
+
|
111
|
+
match ||= matched
|
112
|
+
raise [match, kind], tokens if kind == :error
|
113
|
+
|
114
|
+
tokens << [match, kind]
|
115
|
+
|
116
|
+
end
|
117
|
+
|
118
|
+
tokens
|
119
|
+
end
|
120
|
+
|
121
|
+
end
|
122
|
+
|
123
|
+
end end
|