coderay-beta 0.9.1
Sign up to get free protection for your applications and to get access to all the features.
- data/FOLDERS +53 -0
- data/LICENSE +504 -0
- data/bin/coderay +82 -0
- data/bin/coderay_stylesheet +4 -0
- data/lib/README +129 -0
- data/lib/coderay.rb +320 -0
- data/lib/coderay/duo.rb +85 -0
- data/lib/coderay/encoder.rb +213 -0
- data/lib/coderay/encoders/_map.rb +11 -0
- data/lib/coderay/encoders/comment_filter.rb +43 -0
- data/lib/coderay/encoders/count.rb +21 -0
- data/lib/coderay/encoders/debug.rb +49 -0
- data/lib/coderay/encoders/div.rb +19 -0
- data/lib/coderay/encoders/filter.rb +75 -0
- data/lib/coderay/encoders/html.rb +305 -0
- data/lib/coderay/encoders/html/css.rb +70 -0
- data/lib/coderay/encoders/html/numerization.rb +133 -0
- data/lib/coderay/encoders/html/output.rb +206 -0
- data/lib/coderay/encoders/json.rb +69 -0
- data/lib/coderay/encoders/lines_of_code.rb +90 -0
- data/lib/coderay/encoders/null.rb +26 -0
- data/lib/coderay/encoders/page.rb +20 -0
- data/lib/coderay/encoders/span.rb +19 -0
- data/lib/coderay/encoders/statistic.rb +77 -0
- data/lib/coderay/encoders/term.rb +137 -0
- data/lib/coderay/encoders/text.rb +32 -0
- data/lib/coderay/encoders/token_class_filter.rb +84 -0
- data/lib/coderay/encoders/xml.rb +71 -0
- data/lib/coderay/encoders/yaml.rb +22 -0
- data/lib/coderay/for_redcloth.rb +85 -0
- data/lib/coderay/helpers/file_type.rb +240 -0
- data/lib/coderay/helpers/gzip_simple.rb +123 -0
- data/lib/coderay/helpers/plugin.rb +349 -0
- data/lib/coderay/helpers/word_list.rb +138 -0
- data/lib/coderay/scanner.rb +284 -0
- data/lib/coderay/scanners/_map.rb +23 -0
- data/lib/coderay/scanners/c.rb +203 -0
- data/lib/coderay/scanners/cpp.rb +228 -0
- data/lib/coderay/scanners/css.rb +210 -0
- data/lib/coderay/scanners/debug.rb +62 -0
- data/lib/coderay/scanners/delphi.rb +150 -0
- data/lib/coderay/scanners/diff.rb +105 -0
- data/lib/coderay/scanners/groovy.rb +263 -0
- data/lib/coderay/scanners/html.rb +182 -0
- data/lib/coderay/scanners/java.rb +176 -0
- data/lib/coderay/scanners/java/builtin_types.rb +419 -0
- data/lib/coderay/scanners/java_script.rb +224 -0
- data/lib/coderay/scanners/json.rb +112 -0
- data/lib/coderay/scanners/nitro_xhtml.rb +136 -0
- data/lib/coderay/scanners/php.rb +526 -0
- data/lib/coderay/scanners/plaintext.rb +21 -0
- data/lib/coderay/scanners/python.rb +285 -0
- data/lib/coderay/scanners/rhtml.rb +74 -0
- data/lib/coderay/scanners/ruby.rb +404 -0
- data/lib/coderay/scanners/ruby/patterns.rb +238 -0
- data/lib/coderay/scanners/scheme.rb +145 -0
- data/lib/coderay/scanners/sql.rb +162 -0
- data/lib/coderay/scanners/xml.rb +17 -0
- data/lib/coderay/scanners/yaml.rb +144 -0
- data/lib/coderay/style.rb +20 -0
- data/lib/coderay/styles/_map.rb +7 -0
- data/lib/coderay/styles/cycnus.rb +151 -0
- data/lib/coderay/styles/murphy.rb +132 -0
- data/lib/coderay/token_classes.rb +86 -0
- data/lib/coderay/tokens.rb +391 -0
- data/lib/term/ansicolor.rb +220 -0
- metadata +123 -0
@@ -0,0 +1,86 @@
|
|
1
|
+
module CodeRay
|
2
|
+
class Tokens
|
3
|
+
ClassOfKind = Hash.new do |h, k|
|
4
|
+
h[k] = k.to_s
|
5
|
+
end
|
6
|
+
ClassOfKind.update with = {
|
7
|
+
:annotation => 'at',
|
8
|
+
:attribute_name => 'an',
|
9
|
+
:attribute_name_fat => 'af',
|
10
|
+
:attribute_value => 'av',
|
11
|
+
:attribute_value_fat => 'aw',
|
12
|
+
:bin => 'bi',
|
13
|
+
:char => 'ch',
|
14
|
+
:class => 'cl',
|
15
|
+
:class_variable => 'cv',
|
16
|
+
:color => 'cr',
|
17
|
+
:comment => 'c',
|
18
|
+
:complex => 'cm',
|
19
|
+
:constant => 'co',
|
20
|
+
:content => 'k',
|
21
|
+
:decorator => 'de',
|
22
|
+
:definition => 'df',
|
23
|
+
:delimiter => 'dl',
|
24
|
+
:directive => 'di',
|
25
|
+
:doc => 'do',
|
26
|
+
:doctype => 'dt',
|
27
|
+
:doc_string => 'ds',
|
28
|
+
:entity => 'en',
|
29
|
+
:error => 'er',
|
30
|
+
:escape => 'e',
|
31
|
+
:exception => 'ex',
|
32
|
+
:float => 'fl',
|
33
|
+
:function => 'fu',
|
34
|
+
:global_variable => 'gv',
|
35
|
+
:hex => 'hx',
|
36
|
+
:imaginary => 'cm',
|
37
|
+
:important => 'im',
|
38
|
+
:include => 'ic',
|
39
|
+
:inline => 'il',
|
40
|
+
:inline_delimiter => 'idl',
|
41
|
+
:instance_variable => 'iv',
|
42
|
+
:integer => 'i',
|
43
|
+
:interpreted => 'in',
|
44
|
+
:keyword => 'kw',
|
45
|
+
:key => 'ke',
|
46
|
+
:label => 'la',
|
47
|
+
:local_variable => 'lv',
|
48
|
+
:modifier => 'mod',
|
49
|
+
:oct => 'oc',
|
50
|
+
:operator_fat => 'of',
|
51
|
+
:pre_constant => 'pc',
|
52
|
+
:pre_type => 'pt',
|
53
|
+
:predefined => 'pd',
|
54
|
+
:preprocessor => 'pp',
|
55
|
+
:pseudo_class => 'ps',
|
56
|
+
:regexp => 'rx',
|
57
|
+
:reserved => 'r',
|
58
|
+
:shell => 'sh',
|
59
|
+
:string => 's',
|
60
|
+
:symbol => 'sy',
|
61
|
+
:tag => 'ta',
|
62
|
+
:tag_fat => 'tf',
|
63
|
+
:tag_special => 'ts',
|
64
|
+
:type => 'ty',
|
65
|
+
:variable => 'v',
|
66
|
+
:value => 'vl',
|
67
|
+
:xml_text => 'xt',
|
68
|
+
|
69
|
+
:insert => 'ins',
|
70
|
+
:delete => 'del',
|
71
|
+
:change => 'chg',
|
72
|
+
:head => 'head',
|
73
|
+
|
74
|
+
:ident => :NO_HIGHLIGHT, # 'id'
|
75
|
+
#:operator => 'op',
|
76
|
+
:operator => :NO_HIGHLIGHT, # 'op'
|
77
|
+
:space => :NO_HIGHLIGHT, # 'sp'
|
78
|
+
:plain => :NO_HIGHLIGHT,
|
79
|
+
}
|
80
|
+
ClassOfKind[:method] = ClassOfKind[:function]
|
81
|
+
ClassOfKind[:open] = ClassOfKind[:close] = ClassOfKind[:delimiter]
|
82
|
+
ClassOfKind[:nesting_delimiter] = ClassOfKind[:delimiter]
|
83
|
+
ClassOfKind[:escape] = ClassOfKind[:delimiter]
|
84
|
+
#ClassOfKind.default = ClassOfKind[:error] or raise 'no class found for :error!'
|
85
|
+
end
|
86
|
+
end
|
@@ -0,0 +1,391 @@
|
|
1
|
+
module CodeRay
|
2
|
+
|
3
|
+
# = Tokens
|
4
|
+
#
|
5
|
+
# The Tokens class represents a list of tokens returnd from
|
6
|
+
# a Scanner.
|
7
|
+
#
|
8
|
+
# A token is not a special object, just a two-element Array
|
9
|
+
# consisting of
|
10
|
+
# * the _token_ _kind_ (a Symbol representing the type of the token)
|
11
|
+
# * the _token_ _text_ (the original source of the token in a String)
|
12
|
+
#
|
13
|
+
# A token looks like this:
|
14
|
+
#
|
15
|
+
# [:comment, '# It looks like this']
|
16
|
+
# [:float, '3.1415926']
|
17
|
+
# [:error, '$^']
|
18
|
+
#
|
19
|
+
# Some scanners also yield some kind of sub-tokens, represented by special
|
20
|
+
# token texts, namely :open and :close .
|
21
|
+
#
|
22
|
+
# The Ruby scanner, for example, splits "a string" into:
|
23
|
+
#
|
24
|
+
# [
|
25
|
+
# [:open, :string],
|
26
|
+
# [:delimiter, '"'],
|
27
|
+
# [:content, 'a string'],
|
28
|
+
# [:delimiter, '"'],
|
29
|
+
# [:close, :string]
|
30
|
+
# ]
|
31
|
+
#
|
32
|
+
# Tokens is also the interface between Scanners and Encoders:
|
33
|
+
# The input is split and saved into a Tokens object. The Encoder
|
34
|
+
# then builds the output from this object.
|
35
|
+
#
|
36
|
+
# Thus, the syntax below becomes clear:
|
37
|
+
#
|
38
|
+
# CodeRay.scan('price = 2.59', :ruby).html
|
39
|
+
# # the Tokens object is here -------^
|
40
|
+
#
|
41
|
+
# See how small it is? ;)
|
42
|
+
#
|
43
|
+
# Tokens gives you the power to handle pre-scanned code very easily:
|
44
|
+
# You can convert it to a webpage, a YAML file, or dump it into a gzip'ed string
|
45
|
+
# that you put in your DB.
|
46
|
+
#
|
47
|
+
# Tokens' subclass TokenStream allows streaming to save memory.
|
48
|
+
class Tokens < Array
|
49
|
+
|
50
|
+
# The Scanner instance that created the tokens.
|
51
|
+
attr_accessor :scanner
|
52
|
+
|
53
|
+
# Whether the object is a TokenStream.
|
54
|
+
#
|
55
|
+
# Returns false.
|
56
|
+
def stream?
|
57
|
+
false
|
58
|
+
end
|
59
|
+
|
60
|
+
# Iterates over all tokens.
|
61
|
+
#
|
62
|
+
# If a filter is given, only tokens of that kind are yielded.
|
63
|
+
def each kind_filter = nil, &block
|
64
|
+
unless kind_filter
|
65
|
+
super(&block)
|
66
|
+
else
|
67
|
+
super() do |text, kind|
|
68
|
+
next unless kind == kind_filter
|
69
|
+
yield text, kind
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
# Iterates over all text tokens.
|
75
|
+
# Range tokens like [:open, :string] are left out.
|
76
|
+
#
|
77
|
+
# Example:
|
78
|
+
# tokens.each_text_token { |text, kind| text.replace html_escape(text) }
|
79
|
+
def each_text_token
|
80
|
+
each do |text, kind|
|
81
|
+
next unless text.is_a? ::String
|
82
|
+
yield text, kind
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
# Encode the tokens using encoder.
|
87
|
+
#
|
88
|
+
# encoder can be
|
89
|
+
# * a symbol like :html oder :statistic
|
90
|
+
# * an Encoder class
|
91
|
+
# * an Encoder object
|
92
|
+
#
|
93
|
+
# options are passed to the encoder.
|
94
|
+
def encode encoder, options = {}
|
95
|
+
unless encoder.is_a? Encoders::Encoder
|
96
|
+
unless encoder.is_a? Class
|
97
|
+
encoder_class = Encoders[encoder]
|
98
|
+
end
|
99
|
+
encoder = encoder_class.new options
|
100
|
+
end
|
101
|
+
encoder.encode_tokens self, options
|
102
|
+
end
|
103
|
+
|
104
|
+
|
105
|
+
# Turn into a string using Encoders::Text.
|
106
|
+
#
|
107
|
+
# +options+ are passed to the encoder if given.
|
108
|
+
def to_s options = {}
|
109
|
+
encode :text, options
|
110
|
+
end
|
111
|
+
|
112
|
+
# Redirects unknown methods to encoder calls.
|
113
|
+
#
|
114
|
+
# For example, if you call +tokens.html+, the HTML encoder
|
115
|
+
# is used to highlight the tokens.
|
116
|
+
def method_missing meth, options = {}
|
117
|
+
Encoders[meth].new(options).encode_tokens self
|
118
|
+
end
|
119
|
+
|
120
|
+
# Returns the tokens compressed by joining consecutive
|
121
|
+
# tokens of the same kind.
|
122
|
+
#
|
123
|
+
# This can not be undone, but should yield the same output
|
124
|
+
# in most Encoders. It basically makes the output smaller.
|
125
|
+
#
|
126
|
+
# Combined with dump, it saves space for the cost of time.
|
127
|
+
#
|
128
|
+
# If the scanner is written carefully, this is not required -
|
129
|
+
# for example, consecutive //-comment lines could already be
|
130
|
+
# joined in one comment token by the Scanner.
|
131
|
+
def optimize
|
132
|
+
print ' Tokens#optimize: before: %d - ' % size if $DEBUG
|
133
|
+
last_kind = last_text = nil
|
134
|
+
new = self.class.new
|
135
|
+
for text, kind in self
|
136
|
+
if text.is_a? String
|
137
|
+
if kind == last_kind
|
138
|
+
last_text << text
|
139
|
+
else
|
140
|
+
new << [last_text, last_kind] if last_kind
|
141
|
+
last_text = text
|
142
|
+
last_kind = kind
|
143
|
+
end
|
144
|
+
else
|
145
|
+
new << [last_text, last_kind] if last_kind
|
146
|
+
last_kind = last_text = nil
|
147
|
+
new << [text, kind]
|
148
|
+
end
|
149
|
+
end
|
150
|
+
new << [last_text, last_kind] if last_kind
|
151
|
+
print 'after: %d (%d saved = %2.0f%%)' %
|
152
|
+
[new.size, size - new.size, 1.0 - (new.size.to_f / size)] if $DEBUG
|
153
|
+
new
|
154
|
+
end
|
155
|
+
|
156
|
+
# Compact the object itself; see optimize.
|
157
|
+
def optimize!
|
158
|
+
replace optimize
|
159
|
+
end
|
160
|
+
|
161
|
+
# Ensure that all :open tokens have a correspondent :close one.
|
162
|
+
#
|
163
|
+
# TODO: Test this!
|
164
|
+
def fix
|
165
|
+
tokens = self.class.new
|
166
|
+
# Check token nesting using a stack of kinds.
|
167
|
+
opened = []
|
168
|
+
for type, kind in self
|
169
|
+
case type
|
170
|
+
when :open
|
171
|
+
opened.push [:close, kind]
|
172
|
+
when :begin_line
|
173
|
+
opened.push [:end_line, kind]
|
174
|
+
when :close, :end_line
|
175
|
+
expected = opened.pop
|
176
|
+
if [type, kind] != expected
|
177
|
+
# Unexpected :close; decide what to do based on the kind:
|
178
|
+
# - token was never opened: delete the :close (just skip it)
|
179
|
+
next unless opened.rindex expected
|
180
|
+
# - token was opened earlier: also close tokens in between
|
181
|
+
tokens << token until (token = opened.pop) == expected
|
182
|
+
end
|
183
|
+
end
|
184
|
+
tokens << [type, kind]
|
185
|
+
end
|
186
|
+
# Close remaining opened tokens
|
187
|
+
tokens << token while token = opened.pop
|
188
|
+
tokens
|
189
|
+
end
|
190
|
+
|
191
|
+
def fix!
|
192
|
+
replace fix
|
193
|
+
end
|
194
|
+
|
195
|
+
# TODO: Scanner#split_into_lines
|
196
|
+
#
|
197
|
+
# Makes sure that:
|
198
|
+
# - newlines are single tokens
|
199
|
+
# (which means all other token are single-line)
|
200
|
+
# - there are no open tokens at the end the line
|
201
|
+
#
|
202
|
+
# This makes it simple for encoders that work line-oriented,
|
203
|
+
# like HTML with list-style numeration.
|
204
|
+
def split_into_lines
|
205
|
+
raise NotImplementedError
|
206
|
+
end
|
207
|
+
|
208
|
+
def split_into_lines!
|
209
|
+
replace split_into_lines
|
210
|
+
end
|
211
|
+
|
212
|
+
# Dumps the object into a String that can be saved
|
213
|
+
# in files or databases.
|
214
|
+
#
|
215
|
+
# The dump is created with Marshal.dump;
|
216
|
+
# In addition, it is gzipped using GZip.gzip.
|
217
|
+
#
|
218
|
+
# The returned String object includes Undumping
|
219
|
+
# so it has an #undump method. See Tokens.load.
|
220
|
+
#
|
221
|
+
# You can configure the level of compression,
|
222
|
+
# but the default value 7 should be what you want
|
223
|
+
# in most cases as it is a good compromise between
|
224
|
+
# speed and compression rate.
|
225
|
+
#
|
226
|
+
# See GZip module.
|
227
|
+
def dump gzip_level = 7
|
228
|
+
require 'coderay/helpers/gzip_simple'
|
229
|
+
dump = Marshal.dump self
|
230
|
+
dump = dump.gzip gzip_level
|
231
|
+
dump.extend Undumping
|
232
|
+
end
|
233
|
+
|
234
|
+
# The total size of the tokens.
|
235
|
+
# Should be equal to the input size before
|
236
|
+
# scanning.
|
237
|
+
def text_size
|
238
|
+
size = 0
|
239
|
+
each_text_token do |t, k|
|
240
|
+
size + t.size
|
241
|
+
end
|
242
|
+
size
|
243
|
+
end
|
244
|
+
|
245
|
+
# The total size of the tokens.
|
246
|
+
# Should be equal to the input size before
|
247
|
+
# scanning.
|
248
|
+
def text
|
249
|
+
map { |t, k| t if t.is_a? ::String }.join
|
250
|
+
end
|
251
|
+
|
252
|
+
# Include this module to give an object an #undump
|
253
|
+
# method.
|
254
|
+
#
|
255
|
+
# The string returned by Tokens.dump includes Undumping.
|
256
|
+
module Undumping
|
257
|
+
# Calls Tokens.load with itself.
|
258
|
+
def undump
|
259
|
+
Tokens.load self
|
260
|
+
end
|
261
|
+
end
|
262
|
+
|
263
|
+
# Undump the object using Marshal.load, then
|
264
|
+
# unzip it using GZip.gunzip.
|
265
|
+
#
|
266
|
+
# The result is commonly a Tokens object, but
|
267
|
+
# this is not guaranteed.
|
268
|
+
def Tokens.load dump
|
269
|
+
require 'coderay/helpers/gzip_simple'
|
270
|
+
dump = dump.gunzip
|
271
|
+
@dump = Marshal.load dump
|
272
|
+
end
|
273
|
+
|
274
|
+
end
|
275
|
+
|
276
|
+
|
277
|
+
# = TokenStream
|
278
|
+
#
|
279
|
+
# The TokenStream class is a fake Array without elements.
|
280
|
+
#
|
281
|
+
# It redirects the method << to a block given at creation.
|
282
|
+
#
|
283
|
+
# This allows scanners and Encoders to use streaming (no
|
284
|
+
# tokens are saved, the input is highlighted the same time it
|
285
|
+
# is scanned) with the same code.
|
286
|
+
#
|
287
|
+
# See CodeRay.encode_stream and CodeRay.scan_stream
|
288
|
+
class TokenStream < Tokens
|
289
|
+
|
290
|
+
# Whether the object is a TokenStream.
|
291
|
+
#
|
292
|
+
# Returns true.
|
293
|
+
def stream?
|
294
|
+
true
|
295
|
+
end
|
296
|
+
|
297
|
+
# The Array is empty, but size counts the tokens given by <<.
|
298
|
+
attr_reader :size
|
299
|
+
|
300
|
+
# Creates a new TokenStream that calls +block+ whenever
|
301
|
+
# its << method is called.
|
302
|
+
#
|
303
|
+
# Example:
|
304
|
+
#
|
305
|
+
# require 'coderay'
|
306
|
+
#
|
307
|
+
# token_stream = CodeRay::TokenStream.new do |kind, text|
|
308
|
+
# puts 'kind: %s, text size: %d.' % [kind, text.size]
|
309
|
+
# end
|
310
|
+
#
|
311
|
+
# token_stream << [:regexp, '/\d+/']
|
312
|
+
# #-> kind: rexpexp, text size: 5.
|
313
|
+
#
|
314
|
+
def initialize &block
|
315
|
+
raise ArgumentError, 'Block expected for streaming.' unless block
|
316
|
+
@callback = block
|
317
|
+
@size = 0
|
318
|
+
end
|
319
|
+
|
320
|
+
# Calls +block+ with +token+ and increments size.
|
321
|
+
#
|
322
|
+
# Returns self.
|
323
|
+
def << token
|
324
|
+
@callback.call(*token)
|
325
|
+
@size += 1
|
326
|
+
self
|
327
|
+
end
|
328
|
+
|
329
|
+
# This method is not implemented due to speed reasons. Use Tokens.
|
330
|
+
def text_size
|
331
|
+
raise NotImplementedError,
|
332
|
+
'This method is not implemented due to speed reasons.'
|
333
|
+
end
|
334
|
+
|
335
|
+
# A TokenStream cannot be dumped. Use Tokens.
|
336
|
+
def dump
|
337
|
+
raise NotImplementedError, 'A TokenStream cannot be dumped.'
|
338
|
+
end
|
339
|
+
|
340
|
+
# A TokenStream cannot be optimized. Use Tokens.
|
341
|
+
def optimize
|
342
|
+
raise NotImplementedError, 'A TokenStream cannot be optimized.'
|
343
|
+
end
|
344
|
+
|
345
|
+
end
|
346
|
+
|
347
|
+
end
|
348
|
+
|
349
|
+
if $0 == __FILE__
|
350
|
+
$VERBOSE = true
|
351
|
+
$: << File.join(File.dirname(__FILE__), '..')
|
352
|
+
eval DATA.read, nil, $0, __LINE__ + 4
|
353
|
+
end
|
354
|
+
|
355
|
+
__END__
|
356
|
+
require 'test/unit'
|
357
|
+
|
358
|
+
class TokensTest < Test::Unit::TestCase
|
359
|
+
|
360
|
+
def test_creation
|
361
|
+
assert CodeRay::Tokens < Array
|
362
|
+
tokens = nil
|
363
|
+
assert_nothing_raised do
|
364
|
+
tokens = CodeRay::Tokens.new
|
365
|
+
end
|
366
|
+
assert_kind_of Array, tokens
|
367
|
+
end
|
368
|
+
|
369
|
+
def test_adding_tokens
|
370
|
+
tokens = CodeRay::Tokens.new
|
371
|
+
assert_nothing_raised do
|
372
|
+
tokens << ['string', :type]
|
373
|
+
tokens << ['()', :operator]
|
374
|
+
end
|
375
|
+
assert_equal tokens.size, 2
|
376
|
+
end
|
377
|
+
|
378
|
+
def test_dump_undump
|
379
|
+
tokens = CodeRay::Tokens.new
|
380
|
+
assert_nothing_raised do
|
381
|
+
tokens << ['string', :type]
|
382
|
+
tokens << ['()', :operator]
|
383
|
+
end
|
384
|
+
tokens2 = nil
|
385
|
+
assert_nothing_raised do
|
386
|
+
tokens2 = tokens.dump.undump
|
387
|
+
end
|
388
|
+
assert_equal tokens, tokens2
|
389
|
+
end
|
390
|
+
|
391
|
+
end
|