coderay 0.4.3.48
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +340 -0
- data/README +103 -0
- data/demo/demo_count.rb +10 -0
- data/demo/demo_css.rb +4 -0
- data/demo/demo_div.rb +19 -0
- data/demo/demo_dump.rb +15 -0
- data/demo/demo_encoder.rb +39 -0
- data/demo/demo_global_vars.rb +13 -0
- data/demo/demo_global_vars2.rb +28 -0
- data/demo/demo_html.rb +394 -0
- data/demo/demo_html2.rb +11 -0
- data/demo/demo_load_encoder.rb +17 -0
- data/demo/demo_more.rb +204 -0
- data/demo/demo_scanner.rb +36 -0
- data/demo/demo_server.rb +92 -0
- data/demo/demo_simple.rb +10 -0
- data/demo/demo_stream.rb +25 -0
- data/demo/demo_stream2.rb +8 -0
- data/demo/demo_tokens.rb +3 -0
- data/lib/coderay.rb +284 -0
- data/lib/coderay/encoder.rb +151 -0
- data/lib/coderay/encoders/count.rb +21 -0
- data/lib/coderay/encoders/div.rb +16 -0
- data/lib/coderay/encoders/helpers/html_css.rb +155 -0
- data/lib/coderay/encoders/helpers/html_helper.rb +68 -0
- data/lib/coderay/encoders/helpers/html_output.rb +237 -0
- data/lib/coderay/encoders/html.rb +169 -0
- data/lib/coderay/encoders/null.rb +20 -0
- data/lib/coderay/encoders/span.rb +16 -0
- data/lib/coderay/encoders/statistic.rb +74 -0
- data/lib/coderay/encoders/text.rb +33 -0
- data/lib/coderay/encoders/tokens.rb +44 -0
- data/lib/coderay/encoders/yaml.rb +19 -0
- data/lib/coderay/helpers/filetype.rb +145 -0
- data/lib/coderay/helpers/gzip_simple.rb +123 -0
- data/lib/coderay/helpers/plugin.rb +286 -0
- data/lib/coderay/helpers/scanner_helper.rb +63 -0
- data/lib/coderay/scanner.rb +197 -0
- data/lib/coderay/scanners/c.rb +147 -0
- data/lib/coderay/scanners/delphi.rb +123 -0
- data/lib/coderay/scanners/helpers/ruby_helper.rb +212 -0
- data/lib/coderay/scanners/plaintext.rb +13 -0
- data/lib/coderay/scanners/ruby.rb +337 -0
- data/lib/coderay/tokens.rb +324 -0
- metadata +89 -0
@@ -0,0 +1,197 @@
|
|
1
|
+
module CodeRay
|
2
|
+
|
3
|
+
require 'coderay/helpers/plugin'
|
4
|
+
|
5
|
+
# = Scanners
|
6
|
+
#
|
7
|
+
# $Id: scanner.rb 39 2005-09-29 04:35:37Z murphy $
|
8
|
+
#
|
9
|
+
# This module holds the Scanner class and its subclasses.
|
10
|
+
# For example, the Ruby scanner is named CodeRay::Scanners::Ruby
|
11
|
+
# can be found in coderay/scanners/ruby.
|
12
|
+
#
|
13
|
+
# Scanner also provides methods and constants for the register
|
14
|
+
# mechanism and the [] method that returns the Scanner class
|
15
|
+
# belonging to the given lang.
|
16
|
+
#
|
17
|
+
# See PluginHost.
|
18
|
+
module Scanners
|
19
|
+
extend PluginHost
|
20
|
+
plugin_path 'coderay/scanners'
|
21
|
+
|
22
|
+
require 'strscan'
|
23
|
+
|
24
|
+
# = Scanner
|
25
|
+
#
|
26
|
+
# The base class for all Scanners.
|
27
|
+
#
|
28
|
+
# It is a subclass of Ruby's great +StringScanner+, which
|
29
|
+
# makes it easy to access the scanning methods inside.
|
30
|
+
#
|
31
|
+
# It is also +Enumerable+, so you can use it like an Array of
|
32
|
+
# Tokens:
|
33
|
+
#
|
34
|
+
# require 'coderay'
|
35
|
+
#
|
36
|
+
# c_scanner = CodeRay::Scanners[:c].new "if (*p == '{') nest++;"
|
37
|
+
#
|
38
|
+
# for text, kind in c_scanner
|
39
|
+
# puts text if kind == :operator
|
40
|
+
# end
|
41
|
+
#
|
42
|
+
# # prints: (*==)++;
|
43
|
+
#
|
44
|
+
# OK, this is a very simple example :)
|
45
|
+
# You can also use +map+, +any?+, +find+ and even +sort_by+,
|
46
|
+
# if you want.
|
47
|
+
class Scanner < StringScanner
|
48
|
+
extend Plugin
|
49
|
+
plugin_host Scanners
|
50
|
+
|
51
|
+
# Raised if a Scanner fails while scanning
|
52
|
+
ScanError = Class.new(Exception)
|
53
|
+
|
54
|
+
require 'coderay/helpers/scanner_helper'
|
55
|
+
|
56
|
+
# The default options for all scanner classes.
|
57
|
+
#
|
58
|
+
# Define @default_options for subclasses.
|
59
|
+
DEFAULT_OPTIONS = { :stream => false }
|
60
|
+
|
61
|
+
class << self
|
62
|
+
|
63
|
+
# Returns if the Scanner can be used in streaming mode.
|
64
|
+
def streamable?
|
65
|
+
is_a? Streamable
|
66
|
+
end
|
67
|
+
|
68
|
+
end
|
69
|
+
|
70
|
+
=begin
|
71
|
+
## Excluded for speed reasons; protected seems to make methods slow.
|
72
|
+
|
73
|
+
# Save the StringScanner methods from being called.
|
74
|
+
# This would not be useful for highlighting.
|
75
|
+
strscan_public_methods =
|
76
|
+
StringScanner.instance_methods -
|
77
|
+
StringScanner.ancestors[1].instance_methods
|
78
|
+
protected(*strscan_public_methods)
|
79
|
+
=end
|
80
|
+
|
81
|
+
# Create a new Scanner.
|
82
|
+
#
|
83
|
+
# * +code+ is the input String and is handled by the superclass
|
84
|
+
# StringScanner.
|
85
|
+
# * +options+ is a Hash with Symbols as keys.
|
86
|
+
# It is merged with the default options of the class (you can
|
87
|
+
# overwrite default options here.)
|
88
|
+
# * +block+ is the callback for streamed highlighting.
|
89
|
+
#
|
90
|
+
# If you set :stream to +true+ in the options, the Scanner uses a
|
91
|
+
# TokenStream with the +block+ as callback to handle the tokens.
|
92
|
+
#
|
93
|
+
# Else, a Tokens object is used.
|
94
|
+
def initialize code, options = {}, &block
|
95
|
+
@options = self.class::DEFAULT_OPTIONS.merge options
|
96
|
+
raise "I am only the basic Scanner class. I can't scan "\
|
97
|
+
"anything. :( Use my subclasses." if self.class == Scanner
|
98
|
+
|
99
|
+
# I love this hack. It seems to silence
|
100
|
+
# all dos/unix/mac newline problems.
|
101
|
+
super code.gsub(/\r\n?/, "\n")
|
102
|
+
|
103
|
+
if @options[:stream]
|
104
|
+
warn "warning in CodeRay::Scanner.new: :stream is set, "\
|
105
|
+
"but no block was given" unless block_given?
|
106
|
+
raise NotStreamableError, self unless kind_of? Streamable
|
107
|
+
@tokens = TokenStream.new(&block)
|
108
|
+
else
|
109
|
+
warn "warning in CodeRay::Scanner.new: Block given, "\
|
110
|
+
"but :stream is #{@options[:stream]}" if block_given?
|
111
|
+
@tokens = Tokens.new
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
# More mnemonic accessor name for the input string.
|
116
|
+
alias code string
|
117
|
+
|
118
|
+
# Scans the code and returns all tokens in a Tokens object.
|
119
|
+
def tokenize options = {}
|
120
|
+
options = @options.merge({}) #options
|
121
|
+
if @options[:stream] # :stream must have been set already
|
122
|
+
reset ## what is this for?
|
123
|
+
scan_tokens @tokens, options
|
124
|
+
@tokens
|
125
|
+
else
|
126
|
+
@cached_tokens ||= scan_tokens @tokens, options
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
# You can also see tokenize as a read-only attribute
|
131
|
+
alias tokens tokenize
|
132
|
+
|
133
|
+
# Traverses the tokens.
|
134
|
+
def each &block
|
135
|
+
raise ArgumentError,
|
136
|
+
'Cannot traverse TokenStream.' if @options[:stream]
|
137
|
+
tokens.each(&block)
|
138
|
+
end
|
139
|
+
include Enumerable
|
140
|
+
|
141
|
+
# The current line position of the scanner.
|
142
|
+
#
|
143
|
+
# Beware, this is implemented inefficiently. It should be used
|
144
|
+
# for debugging only.
|
145
|
+
def line
|
146
|
+
string[0..pos].count("\n") + 1
|
147
|
+
end
|
148
|
+
|
149
|
+
protected
|
150
|
+
|
151
|
+
# This is the central method, and commonly the only one a
|
152
|
+
# subclass implements.
|
153
|
+
#
|
154
|
+
# Subclasses must implement this method; it must return +tokens+
|
155
|
+
# and must only use Tokens#<< for storing scanned tokens!
|
156
|
+
def scan_tokens tokens, options
|
157
|
+
raise NotImplementedError,
|
158
|
+
"#{self.class}#scan_tokens not implemented."
|
159
|
+
end
|
160
|
+
|
161
|
+
# Scanner error with additional status information
|
162
|
+
def raise_inspect msg, tokens, ambit = 30
|
163
|
+
raise ScanError, <<-EOE % [
|
164
|
+
|
165
|
+
|
166
|
+
***ERROR in %s: %s
|
167
|
+
|
168
|
+
tokens:
|
169
|
+
%s
|
170
|
+
|
171
|
+
current line: %d pos = %d
|
172
|
+
matched: %p
|
173
|
+
bol? = %p, eos? = %p
|
174
|
+
|
175
|
+
surrounding code:
|
176
|
+
%p ~~ %p
|
177
|
+
|
178
|
+
|
179
|
+
***ERROR***
|
180
|
+
|
181
|
+
EOE
|
182
|
+
File.basename(caller[0]),
|
183
|
+
msg,
|
184
|
+
tokens.last(10).map { |t| t.inspect }.join("\n"),
|
185
|
+
line, pos,
|
186
|
+
matched, bol?, eos?,
|
187
|
+
string[pos-ambit,ambit],
|
188
|
+
string[pos,ambit],
|
189
|
+
]
|
190
|
+
end
|
191
|
+
|
192
|
+
end
|
193
|
+
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
197
|
+
# vim:sw=2:ts=2:noet:tw=78
|
@@ -0,0 +1,147 @@
|
|
1
|
+
module CodeRay module Scanners
|
2
|
+
|
3
|
+
class C < Scanner
|
4
|
+
|
5
|
+
register_for :c
|
6
|
+
|
7
|
+
RESERVED_WORDS = [
|
8
|
+
'asm', 'break', 'case', 'continue', 'default', 'do', 'else',
|
9
|
+
'for', 'goto', 'if', 'return', 'switch', 'while',
|
10
|
+
'struct', 'union', 'enum', 'typedef',
|
11
|
+
'static', 'register', 'auto', 'extern',
|
12
|
+
'sizeof',
|
13
|
+
'volatile', 'const', # C89
|
14
|
+
'inline', 'restrict', # C99
|
15
|
+
]
|
16
|
+
|
17
|
+
PREDEFINED_TYPES = [
|
18
|
+
'int', 'long', 'short', 'char', 'void',
|
19
|
+
'signed', 'unsigned', 'float', 'double',
|
20
|
+
'bool', 'complex', # C99
|
21
|
+
]
|
22
|
+
|
23
|
+
PREDEFINED_CONSTANTS = [
|
24
|
+
'EOF', 'NULL',
|
25
|
+
'true', 'false', # C99
|
26
|
+
]
|
27
|
+
|
28
|
+
IDENT_KIND = Scanner::WordList.new(:ident).
|
29
|
+
add(RESERVED_WORDS, :reserved).
|
30
|
+
add(PREDEFINED_TYPES, :pre_type).
|
31
|
+
add(PREDEFINED_CONSTANTS, :pre_constant)
|
32
|
+
|
33
|
+
ESCAPE = / [rbfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
|
34
|
+
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x
|
35
|
+
|
36
|
+
def scan_tokens tokens, options
|
37
|
+
|
38
|
+
state = :initial
|
39
|
+
|
40
|
+
until eos?
|
41
|
+
|
42
|
+
kind = :error
|
43
|
+
match = nil
|
44
|
+
|
45
|
+
if state == :initial
|
46
|
+
|
47
|
+
if scan(/ \s+ | \\\n /x)
|
48
|
+
kind = :space
|
49
|
+
|
50
|
+
elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
|
51
|
+
kind = :comment
|
52
|
+
|
53
|
+
elsif match = scan(/ \# \s* if \s* 0 /x)
|
54
|
+
match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos?
|
55
|
+
kind = :comment
|
56
|
+
|
57
|
+
elsif scan(/ [-+*\/=<>?:;,!&^|()\[\]{}~%]+ | \.(?!\d) /x)
|
58
|
+
kind = :operator
|
59
|
+
|
60
|
+
elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
|
61
|
+
kind = IDENT_KIND[match]
|
62
|
+
if kind == :ident and check(/:(?!:)/)
|
63
|
+
match << scan(/:/)
|
64
|
+
kind = :label
|
65
|
+
end
|
66
|
+
|
67
|
+
elsif match = scan(/L?"/)
|
68
|
+
tokens << [:open, :string]
|
69
|
+
if match[0] == ?L
|
70
|
+
tokens << ['L', :modifier]
|
71
|
+
match = '"'
|
72
|
+
end
|
73
|
+
state = :string
|
74
|
+
kind = :delimiter
|
75
|
+
|
76
|
+
elsif scan(/#\s*(\w*)/)
|
77
|
+
kind = :preprocessor # FIXME multiline preprocs
|
78
|
+
state = :include_expected if self[1] == 'include'
|
79
|
+
|
80
|
+
elsif scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox)
|
81
|
+
kind = :char
|
82
|
+
|
83
|
+
elsif scan(/0[xX][0-9A-Fa-f]+/)
|
84
|
+
kind = :hex
|
85
|
+
|
86
|
+
elsif scan(/(?:0[0-7]+)(?![89.eEfF])/)
|
87
|
+
kind = :oct
|
88
|
+
|
89
|
+
elsif scan(/(?:\d+)(?![.eEfF])/)
|
90
|
+
kind = :integer
|
91
|
+
|
92
|
+
elsif scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
|
93
|
+
kind = :float
|
94
|
+
|
95
|
+
else
|
96
|
+
getch
|
97
|
+
end
|
98
|
+
|
99
|
+
elsif state == :string
|
100
|
+
if scan(/[^\\"]+/)
|
101
|
+
kind = :content
|
102
|
+
elsif scan(/"/)
|
103
|
+
tokens << ['"', :delimiter]
|
104
|
+
tokens << [:close, :string]
|
105
|
+
state = :initial
|
106
|
+
next
|
107
|
+
elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
|
108
|
+
kind = :char
|
109
|
+
elsif scan(/ \\ | $ /x)
|
110
|
+
kind = :error
|
111
|
+
state = :initial
|
112
|
+
else
|
113
|
+
raise "else case \" reached; %p not handled." % peek(1), tokens
|
114
|
+
end
|
115
|
+
|
116
|
+
elsif state == :include_expected
|
117
|
+
if scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/)
|
118
|
+
kind = :include
|
119
|
+
state = :initial
|
120
|
+
|
121
|
+
elsif match = scan(/\s+/)
|
122
|
+
kind = :space
|
123
|
+
state = :initial if match.index ?\n
|
124
|
+
|
125
|
+
else
|
126
|
+
getch
|
127
|
+
|
128
|
+
end
|
129
|
+
|
130
|
+
else
|
131
|
+
raise 'else-case reached', tokens
|
132
|
+
|
133
|
+
end
|
134
|
+
|
135
|
+
match ||= matched
|
136
|
+
raise [match, kind], tokens if kind == :error
|
137
|
+
|
138
|
+
tokens << [match, kind]
|
139
|
+
|
140
|
+
end
|
141
|
+
|
142
|
+
tokens
|
143
|
+
end
|
144
|
+
|
145
|
+
end
|
146
|
+
|
147
|
+
end end
|
@@ -0,0 +1,123 @@
|
|
1
|
+
module CodeRay module Scanners
|
2
|
+
|
3
|
+
class Delphi < Scanner
|
4
|
+
|
5
|
+
register_for :delphi
|
6
|
+
|
7
|
+
RESERVED_WORDS = [
|
8
|
+
'and', 'array', 'as', 'at', 'asm', 'at', 'begin', 'case', 'class',
|
9
|
+
'const', 'constructor', 'destructor', 'dispinterface', 'div', 'do',
|
10
|
+
'downto', 'else', 'end', 'except', 'exports', 'file', 'finalization',
|
11
|
+
'finally', 'for', 'function', 'goto', 'if', 'implementation', 'in',
|
12
|
+
'inherited', 'initialization', 'inline', 'interface', 'is', 'label',
|
13
|
+
'library', 'mod', 'nil', 'not', 'object', 'of', 'or', 'out', 'packed',
|
14
|
+
'procedure', 'program', 'property', 'raise', 'record', 'repeat',
|
15
|
+
'resourcestring', 'set', 'shl', 'shr', 'string', 'then', 'threadvar',
|
16
|
+
'to', 'try', 'type', 'unit', 'until', 'uses', 'var', 'while', 'with',
|
17
|
+
'xor', 'on'
|
18
|
+
]
|
19
|
+
|
20
|
+
DIRECTIVES = [
|
21
|
+
'absolute', 'abstract', 'assembler', 'at', 'automated', 'cdecl',
|
22
|
+
'contains', 'deprecated', 'dispid', 'dynamic', 'export',
|
23
|
+
'external', 'far', 'forward', 'implements', 'local',
|
24
|
+
'near', 'nodefault', 'on', 'overload', 'override',
|
25
|
+
'package', 'pascal', 'platform', 'private', 'protected', 'public',
|
26
|
+
'published', 'read', 'readonly', 'register', 'reintroduce',
|
27
|
+
'requires', 'resident', 'safecall', 'stdcall', 'stored', 'varargs',
|
28
|
+
'virtual', 'write', 'writeonly'
|
29
|
+
]
|
30
|
+
|
31
|
+
IDENT_KIND = Scanner::WordList.new(:ident, :case_ignore).
|
32
|
+
add(RESERVED_WORDS, :reserved).
|
33
|
+
add(DIRECTIVES, :directive)
|
34
|
+
|
35
|
+
def scan_tokens tokens, options
|
36
|
+
|
37
|
+
state = :initial
|
38
|
+
|
39
|
+
until eos?
|
40
|
+
|
41
|
+
kind = :error
|
42
|
+
match = nil
|
43
|
+
|
44
|
+
if state == :initial
|
45
|
+
|
46
|
+
if scan(/ \s+ /x)
|
47
|
+
kind = :space
|
48
|
+
|
49
|
+
elsif scan(%r! \{ \$ [^}]* \}? | \(\* \$ (?: .*? \*\) | .* ) !mx)
|
50
|
+
kind = :preprocessor
|
51
|
+
|
52
|
+
elsif scan(%r! // [^\n]* | \{ [^}]* \}? | \(\* (?: .*? \*\) | .* ) !mx)
|
53
|
+
kind = :comment
|
54
|
+
|
55
|
+
elsif scan(/ [-+*\/=<>:;,.@\^|\(\)\[\]]+ /x)
|
56
|
+
kind = :operator
|
57
|
+
|
58
|
+
elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
|
59
|
+
kind = IDENT_KIND[match]
|
60
|
+
|
61
|
+
elsif match = scan(/ ' ( [^\n']|'' ) (?:'|$) /x)
|
62
|
+
tokens << [:open, :char]
|
63
|
+
tokens << ["'", :delimiter]
|
64
|
+
tokens << [self[1], :content]
|
65
|
+
tokens << ["'", :delimiter]
|
66
|
+
tokens << [:close, :char]
|
67
|
+
next
|
68
|
+
|
69
|
+
elsif match = scan(/ ' /x)
|
70
|
+
tokens << [:open, :string]
|
71
|
+
state = :string
|
72
|
+
kind = :delimiter
|
73
|
+
|
74
|
+
elsif scan(/ \# (?: \d+ | \$[0-9A-Fa-f]+ ) /x)
|
75
|
+
kind = :char
|
76
|
+
|
77
|
+
elsif scan(/ \$ [0-9A-Fa-f]+ /x)
|
78
|
+
kind = :hex
|
79
|
+
|
80
|
+
elsif scan(/ (?: \d+ ) (?![eE]|\.[^.]) /x)
|
81
|
+
kind = :integer
|
82
|
+
|
83
|
+
elsif scan(/ \d+ (?: \.\d+ (?: [eE][+-]? \d+ )? | [eE][+-]? \d+ ) /x)
|
84
|
+
kind = :float
|
85
|
+
|
86
|
+
else
|
87
|
+
getch
|
88
|
+
end
|
89
|
+
|
90
|
+
elsif state == :string
|
91
|
+
if scan(/[^\n']+/)
|
92
|
+
kind = :content
|
93
|
+
elsif scan(/''/)
|
94
|
+
kind = :char
|
95
|
+
elsif scan(/'/)
|
96
|
+
tokens << ["'", :delimiter]
|
97
|
+
tokens << [:close, :string]
|
98
|
+
state = :initial
|
99
|
+
next
|
100
|
+
elsif scan(/\n/)
|
101
|
+
state = :initial
|
102
|
+
else
|
103
|
+
raise "else case \' reached; %p not handled." % peek(1), tokens
|
104
|
+
end
|
105
|
+
|
106
|
+
else
|
107
|
+
raise 'else-case reached', tokens
|
108
|
+
|
109
|
+
end
|
110
|
+
|
111
|
+
match ||= matched
|
112
|
+
raise [match, kind], tokens if kind == :error
|
113
|
+
|
114
|
+
tokens << [match, kind]
|
115
|
+
|
116
|
+
end
|
117
|
+
|
118
|
+
tokens
|
119
|
+
end
|
120
|
+
|
121
|
+
end
|
122
|
+
|
123
|
+
end end
|