coderay-beta 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/FOLDERS +53 -0
- data/LICENSE +504 -0
- data/bin/coderay +82 -0
- data/bin/coderay_stylesheet +4 -0
- data/lib/README +129 -0
- data/lib/coderay.rb +320 -0
- data/lib/coderay/duo.rb +85 -0
- data/lib/coderay/encoder.rb +213 -0
- data/lib/coderay/encoders/_map.rb +11 -0
- data/lib/coderay/encoders/comment_filter.rb +43 -0
- data/lib/coderay/encoders/count.rb +21 -0
- data/lib/coderay/encoders/debug.rb +49 -0
- data/lib/coderay/encoders/div.rb +19 -0
- data/lib/coderay/encoders/filter.rb +75 -0
- data/lib/coderay/encoders/html.rb +305 -0
- data/lib/coderay/encoders/html/css.rb +70 -0
- data/lib/coderay/encoders/html/numerization.rb +133 -0
- data/lib/coderay/encoders/html/output.rb +206 -0
- data/lib/coderay/encoders/json.rb +69 -0
- data/lib/coderay/encoders/lines_of_code.rb +90 -0
- data/lib/coderay/encoders/null.rb +26 -0
- data/lib/coderay/encoders/page.rb +20 -0
- data/lib/coderay/encoders/span.rb +19 -0
- data/lib/coderay/encoders/statistic.rb +77 -0
- data/lib/coderay/encoders/term.rb +137 -0
- data/lib/coderay/encoders/text.rb +32 -0
- data/lib/coderay/encoders/token_class_filter.rb +84 -0
- data/lib/coderay/encoders/xml.rb +71 -0
- data/lib/coderay/encoders/yaml.rb +22 -0
- data/lib/coderay/for_redcloth.rb +85 -0
- data/lib/coderay/helpers/file_type.rb +240 -0
- data/lib/coderay/helpers/gzip_simple.rb +123 -0
- data/lib/coderay/helpers/plugin.rb +349 -0
- data/lib/coderay/helpers/word_list.rb +138 -0
- data/lib/coderay/scanner.rb +284 -0
- data/lib/coderay/scanners/_map.rb +23 -0
- data/lib/coderay/scanners/c.rb +203 -0
- data/lib/coderay/scanners/cpp.rb +228 -0
- data/lib/coderay/scanners/css.rb +210 -0
- data/lib/coderay/scanners/debug.rb +62 -0
- data/lib/coderay/scanners/delphi.rb +150 -0
- data/lib/coderay/scanners/diff.rb +105 -0
- data/lib/coderay/scanners/groovy.rb +263 -0
- data/lib/coderay/scanners/html.rb +182 -0
- data/lib/coderay/scanners/java.rb +176 -0
- data/lib/coderay/scanners/java/builtin_types.rb +419 -0
- data/lib/coderay/scanners/java_script.rb +224 -0
- data/lib/coderay/scanners/json.rb +112 -0
- data/lib/coderay/scanners/nitro_xhtml.rb +136 -0
- data/lib/coderay/scanners/php.rb +526 -0
- data/lib/coderay/scanners/plaintext.rb +21 -0
- data/lib/coderay/scanners/python.rb +285 -0
- data/lib/coderay/scanners/rhtml.rb +74 -0
- data/lib/coderay/scanners/ruby.rb +404 -0
- data/lib/coderay/scanners/ruby/patterns.rb +238 -0
- data/lib/coderay/scanners/scheme.rb +145 -0
- data/lib/coderay/scanners/sql.rb +162 -0
- data/lib/coderay/scanners/xml.rb +17 -0
- data/lib/coderay/scanners/yaml.rb +144 -0
- data/lib/coderay/style.rb +20 -0
- data/lib/coderay/styles/_map.rb +7 -0
- data/lib/coderay/styles/cycnus.rb +151 -0
- data/lib/coderay/styles/murphy.rb +132 -0
- data/lib/coderay/token_classes.rb +86 -0
- data/lib/coderay/tokens.rb +391 -0
- data/lib/term/ansicolor.rb +220 -0
- metadata +123 -0
@@ -0,0 +1,86 @@
|
|
1
|
+
module CodeRay
|
2
|
+
class Tokens
|
3
|
+
ClassOfKind = Hash.new do |h, k|
|
4
|
+
h[k] = k.to_s
|
5
|
+
end
|
6
|
+
ClassOfKind.update with = {
|
7
|
+
:annotation => 'at',
|
8
|
+
:attribute_name => 'an',
|
9
|
+
:attribute_name_fat => 'af',
|
10
|
+
:attribute_value => 'av',
|
11
|
+
:attribute_value_fat => 'aw',
|
12
|
+
:bin => 'bi',
|
13
|
+
:char => 'ch',
|
14
|
+
:class => 'cl',
|
15
|
+
:class_variable => 'cv',
|
16
|
+
:color => 'cr',
|
17
|
+
:comment => 'c',
|
18
|
+
:complex => 'cm',
|
19
|
+
:constant => 'co',
|
20
|
+
:content => 'k',
|
21
|
+
:decorator => 'de',
|
22
|
+
:definition => 'df',
|
23
|
+
:delimiter => 'dl',
|
24
|
+
:directive => 'di',
|
25
|
+
:doc => 'do',
|
26
|
+
:doctype => 'dt',
|
27
|
+
:doc_string => 'ds',
|
28
|
+
:entity => 'en',
|
29
|
+
:error => 'er',
|
30
|
+
:escape => 'e',
|
31
|
+
:exception => 'ex',
|
32
|
+
:float => 'fl',
|
33
|
+
:function => 'fu',
|
34
|
+
:global_variable => 'gv',
|
35
|
+
:hex => 'hx',
|
36
|
+
:imaginary => 'cm',
|
37
|
+
:important => 'im',
|
38
|
+
:include => 'ic',
|
39
|
+
:inline => 'il',
|
40
|
+
:inline_delimiter => 'idl',
|
41
|
+
:instance_variable => 'iv',
|
42
|
+
:integer => 'i',
|
43
|
+
:interpreted => 'in',
|
44
|
+
:keyword => 'kw',
|
45
|
+
:key => 'ke',
|
46
|
+
:label => 'la',
|
47
|
+
:local_variable => 'lv',
|
48
|
+
:modifier => 'mod',
|
49
|
+
:oct => 'oc',
|
50
|
+
:operator_fat => 'of',
|
51
|
+
:pre_constant => 'pc',
|
52
|
+
:pre_type => 'pt',
|
53
|
+
:predefined => 'pd',
|
54
|
+
:preprocessor => 'pp',
|
55
|
+
:pseudo_class => 'ps',
|
56
|
+
:regexp => 'rx',
|
57
|
+
:reserved => 'r',
|
58
|
+
:shell => 'sh',
|
59
|
+
:string => 's',
|
60
|
+
:symbol => 'sy',
|
61
|
+
:tag => 'ta',
|
62
|
+
:tag_fat => 'tf',
|
63
|
+
:tag_special => 'ts',
|
64
|
+
:type => 'ty',
|
65
|
+
:variable => 'v',
|
66
|
+
:value => 'vl',
|
67
|
+
:xml_text => 'xt',
|
68
|
+
|
69
|
+
:insert => 'ins',
|
70
|
+
:delete => 'del',
|
71
|
+
:change => 'chg',
|
72
|
+
:head => 'head',
|
73
|
+
|
74
|
+
:ident => :NO_HIGHLIGHT, # 'id'
|
75
|
+
#:operator => 'op',
|
76
|
+
:operator => :NO_HIGHLIGHT, # 'op'
|
77
|
+
:space => :NO_HIGHLIGHT, # 'sp'
|
78
|
+
:plain => :NO_HIGHLIGHT,
|
79
|
+
}
|
80
|
+
ClassOfKind[:method] = ClassOfKind[:function]
|
81
|
+
ClassOfKind[:open] = ClassOfKind[:close] = ClassOfKind[:delimiter]
|
82
|
+
ClassOfKind[:nesting_delimiter] = ClassOfKind[:delimiter]
|
83
|
+
ClassOfKind[:escape] = ClassOfKind[:delimiter]
|
84
|
+
#ClassOfKind.default = ClassOfKind[:error] or raise 'no class found for :error!'
|
85
|
+
end
|
86
|
+
end
|
@@ -0,0 +1,391 @@
|
|
1
|
+
module CodeRay
|
2
|
+
|
3
|
+
# = Tokens
|
4
|
+
#
|
5
|
+
# The Tokens class represents a list of tokens returnd from
|
6
|
+
# a Scanner.
|
7
|
+
#
|
8
|
+
# A token is not a special object, just a two-element Array
|
9
|
+
# consisting of
|
10
|
+
# * the _token_ _kind_ (a Symbol representing the type of the token)
|
11
|
+
# * the _token_ _text_ (the original source of the token in a String)
|
12
|
+
#
|
13
|
+
# A token looks like this:
|
14
|
+
#
|
15
|
+
# [:comment, '# It looks like this']
|
16
|
+
# [:float, '3.1415926']
|
17
|
+
# [:error, '$^']
|
18
|
+
#
|
19
|
+
# Some scanners also yield some kind of sub-tokens, represented by special
|
20
|
+
# token texts, namely :open and :close .
|
21
|
+
#
|
22
|
+
# The Ruby scanner, for example, splits "a string" into:
|
23
|
+
#
|
24
|
+
# [
|
25
|
+
# [:open, :string],
|
26
|
+
# [:delimiter, '"'],
|
27
|
+
# [:content, 'a string'],
|
28
|
+
# [:delimiter, '"'],
|
29
|
+
# [:close, :string]
|
30
|
+
# ]
|
31
|
+
#
|
32
|
+
# Tokens is also the interface between Scanners and Encoders:
|
33
|
+
# The input is split and saved into a Tokens object. The Encoder
|
34
|
+
# then builds the output from this object.
|
35
|
+
#
|
36
|
+
# Thus, the syntax below becomes clear:
|
37
|
+
#
|
38
|
+
# CodeRay.scan('price = 2.59', :ruby).html
|
39
|
+
# # the Tokens object is here -------^
|
40
|
+
#
|
41
|
+
# See how small it is? ;)
|
42
|
+
#
|
43
|
+
# Tokens gives you the power to handle pre-scanned code very easily:
|
44
|
+
# You can convert it to a webpage, a YAML file, or dump it into a gzip'ed string
|
45
|
+
# that you put in your DB.
|
46
|
+
#
|
47
|
+
# Tokens' subclass TokenStream allows streaming to save memory.
|
48
|
+
class Tokens < Array
|
49
|
+
|
50
|
+
# The Scanner instance that created the tokens.
|
51
|
+
attr_accessor :scanner
|
52
|
+
|
53
|
+
# Whether the object is a TokenStream.
|
54
|
+
#
|
55
|
+
# Returns false.
|
56
|
+
def stream?
|
57
|
+
false
|
58
|
+
end
|
59
|
+
|
60
|
+
# Iterates over all tokens.
|
61
|
+
#
|
62
|
+
# If a filter is given, only tokens of that kind are yielded.
|
63
|
+
def each kind_filter = nil, &block
|
64
|
+
unless kind_filter
|
65
|
+
super(&block)
|
66
|
+
else
|
67
|
+
super() do |text, kind|
|
68
|
+
next unless kind == kind_filter
|
69
|
+
yield text, kind
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
# Iterates over all text tokens.
|
75
|
+
# Range tokens like [:open, :string] are left out.
|
76
|
+
#
|
77
|
+
# Example:
|
78
|
+
# tokens.each_text_token { |text, kind| text.replace html_escape(text) }
|
79
|
+
def each_text_token
|
80
|
+
each do |text, kind|
|
81
|
+
next unless text.is_a? ::String
|
82
|
+
yield text, kind
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
# Encode the tokens using encoder.
|
87
|
+
#
|
88
|
+
# encoder can be
|
89
|
+
# * a symbol like :html oder :statistic
|
90
|
+
# * an Encoder class
|
91
|
+
# * an Encoder object
|
92
|
+
#
|
93
|
+
# options are passed to the encoder.
|
94
|
+
def encode encoder, options = {}
|
95
|
+
unless encoder.is_a? Encoders::Encoder
|
96
|
+
unless encoder.is_a? Class
|
97
|
+
encoder_class = Encoders[encoder]
|
98
|
+
end
|
99
|
+
encoder = encoder_class.new options
|
100
|
+
end
|
101
|
+
encoder.encode_tokens self, options
|
102
|
+
end
|
103
|
+
|
104
|
+
|
105
|
+
# Turn into a string using Encoders::Text.
|
106
|
+
#
|
107
|
+
# +options+ are passed to the encoder if given.
|
108
|
+
def to_s options = {}
|
109
|
+
encode :text, options
|
110
|
+
end
|
111
|
+
|
112
|
+
# Redirects unknown methods to encoder calls.
|
113
|
+
#
|
114
|
+
# For example, if you call +tokens.html+, the HTML encoder
|
115
|
+
# is used to highlight the tokens.
|
116
|
+
def method_missing meth, options = {}
|
117
|
+
Encoders[meth].new(options).encode_tokens self
|
118
|
+
end
|
119
|
+
|
120
|
+
# Returns the tokens compressed by joining consecutive
|
121
|
+
# tokens of the same kind.
|
122
|
+
#
|
123
|
+
# This can not be undone, but should yield the same output
|
124
|
+
# in most Encoders. It basically makes the output smaller.
|
125
|
+
#
|
126
|
+
# Combined with dump, it saves space for the cost of time.
|
127
|
+
#
|
128
|
+
# If the scanner is written carefully, this is not required -
|
129
|
+
# for example, consecutive //-comment lines could already be
|
130
|
+
# joined in one comment token by the Scanner.
|
131
|
+
def optimize
|
132
|
+
print ' Tokens#optimize: before: %d - ' % size if $DEBUG
|
133
|
+
last_kind = last_text = nil
|
134
|
+
new = self.class.new
|
135
|
+
for text, kind in self
|
136
|
+
if text.is_a? String
|
137
|
+
if kind == last_kind
|
138
|
+
last_text << text
|
139
|
+
else
|
140
|
+
new << [last_text, last_kind] if last_kind
|
141
|
+
last_text = text
|
142
|
+
last_kind = kind
|
143
|
+
end
|
144
|
+
else
|
145
|
+
new << [last_text, last_kind] if last_kind
|
146
|
+
last_kind = last_text = nil
|
147
|
+
new << [text, kind]
|
148
|
+
end
|
149
|
+
end
|
150
|
+
new << [last_text, last_kind] if last_kind
|
151
|
+
print 'after: %d (%d saved = %2.0f%%)' %
|
152
|
+
[new.size, size - new.size, 1.0 - (new.size.to_f / size)] if $DEBUG
|
153
|
+
new
|
154
|
+
end
|
155
|
+
|
156
|
+
# Compact the object itself; see optimize.
|
157
|
+
def optimize!
|
158
|
+
replace optimize
|
159
|
+
end
|
160
|
+
|
161
|
+
# Ensure that all :open tokens have a correspondent :close one.
|
162
|
+
#
|
163
|
+
# TODO: Test this!
|
164
|
+
def fix
|
165
|
+
tokens = self.class.new
|
166
|
+
# Check token nesting using a stack of kinds.
|
167
|
+
opened = []
|
168
|
+
for type, kind in self
|
169
|
+
case type
|
170
|
+
when :open
|
171
|
+
opened.push [:close, kind]
|
172
|
+
when :begin_line
|
173
|
+
opened.push [:end_line, kind]
|
174
|
+
when :close, :end_line
|
175
|
+
expected = opened.pop
|
176
|
+
if [type, kind] != expected
|
177
|
+
# Unexpected :close; decide what to do based on the kind:
|
178
|
+
# - token was never opened: delete the :close (just skip it)
|
179
|
+
next unless opened.rindex expected
|
180
|
+
# - token was opened earlier: also close tokens in between
|
181
|
+
tokens << token until (token = opened.pop) == expected
|
182
|
+
end
|
183
|
+
end
|
184
|
+
tokens << [type, kind]
|
185
|
+
end
|
186
|
+
# Close remaining opened tokens
|
187
|
+
tokens << token while token = opened.pop
|
188
|
+
tokens
|
189
|
+
end
|
190
|
+
|
191
|
+
def fix!
|
192
|
+
replace fix
|
193
|
+
end
|
194
|
+
|
195
|
+
# TODO: Scanner#split_into_lines
|
196
|
+
#
|
197
|
+
# Makes sure that:
|
198
|
+
# - newlines are single tokens
|
199
|
+
# (which means all other token are single-line)
|
200
|
+
# - there are no open tokens at the end the line
|
201
|
+
#
|
202
|
+
# This makes it simple for encoders that work line-oriented,
|
203
|
+
# like HTML with list-style numeration.
|
204
|
+
def split_into_lines
|
205
|
+
raise NotImplementedError
|
206
|
+
end
|
207
|
+
|
208
|
+
def split_into_lines!
|
209
|
+
replace split_into_lines
|
210
|
+
end
|
211
|
+
|
212
|
+
# Dumps the object into a String that can be saved
|
213
|
+
# in files or databases.
|
214
|
+
#
|
215
|
+
# The dump is created with Marshal.dump;
|
216
|
+
# In addition, it is gzipped using GZip.gzip.
|
217
|
+
#
|
218
|
+
# The returned String object includes Undumping
|
219
|
+
# so it has an #undump method. See Tokens.load.
|
220
|
+
#
|
221
|
+
# You can configure the level of compression,
|
222
|
+
# but the default value 7 should be what you want
|
223
|
+
# in most cases as it is a good compromise between
|
224
|
+
# speed and compression rate.
|
225
|
+
#
|
226
|
+
# See GZip module.
|
227
|
+
def dump gzip_level = 7
|
228
|
+
require 'coderay/helpers/gzip_simple'
|
229
|
+
dump = Marshal.dump self
|
230
|
+
dump = dump.gzip gzip_level
|
231
|
+
dump.extend Undumping
|
232
|
+
end
|
233
|
+
|
234
|
+
# The total size of the tokens.
|
235
|
+
# Should be equal to the input size before
|
236
|
+
# scanning.
|
237
|
+
def text_size
|
238
|
+
size = 0
|
239
|
+
each_text_token do |t, k|
|
240
|
+
size + t.size
|
241
|
+
end
|
242
|
+
size
|
243
|
+
end
|
244
|
+
|
245
|
+
# The total size of the tokens.
|
246
|
+
# Should be equal to the input size before
|
247
|
+
# scanning.
|
248
|
+
def text
|
249
|
+
map { |t, k| t if t.is_a? ::String }.join
|
250
|
+
end
|
251
|
+
|
252
|
+
# Include this module to give an object an #undump
|
253
|
+
# method.
|
254
|
+
#
|
255
|
+
# The string returned by Tokens.dump includes Undumping.
|
256
|
+
module Undumping
|
257
|
+
# Calls Tokens.load with itself.
|
258
|
+
def undump
|
259
|
+
Tokens.load self
|
260
|
+
end
|
261
|
+
end
|
262
|
+
|
263
|
+
# Undump the object using Marshal.load, then
|
264
|
+
# unzip it using GZip.gunzip.
|
265
|
+
#
|
266
|
+
# The result is commonly a Tokens object, but
|
267
|
+
# this is not guaranteed.
|
268
|
+
def Tokens.load dump
|
269
|
+
require 'coderay/helpers/gzip_simple'
|
270
|
+
dump = dump.gunzip
|
271
|
+
@dump = Marshal.load dump
|
272
|
+
end
|
273
|
+
|
274
|
+
end
|
275
|
+
|
276
|
+
|
277
|
+
# = TokenStream
|
278
|
+
#
|
279
|
+
# The TokenStream class is a fake Array without elements.
|
280
|
+
#
|
281
|
+
# It redirects the method << to a block given at creation.
|
282
|
+
#
|
283
|
+
# This allows scanners and Encoders to use streaming (no
|
284
|
+
# tokens are saved, the input is highlighted the same time it
|
285
|
+
# is scanned) with the same code.
|
286
|
+
#
|
287
|
+
# See CodeRay.encode_stream and CodeRay.scan_stream
|
288
|
+
class TokenStream < Tokens
|
289
|
+
|
290
|
+
# Whether the object is a TokenStream.
|
291
|
+
#
|
292
|
+
# Returns true.
|
293
|
+
def stream?
|
294
|
+
true
|
295
|
+
end
|
296
|
+
|
297
|
+
# The Array is empty, but size counts the tokens given by <<.
|
298
|
+
attr_reader :size
|
299
|
+
|
300
|
+
# Creates a new TokenStream that calls +block+ whenever
|
301
|
+
# its << method is called.
|
302
|
+
#
|
303
|
+
# Example:
|
304
|
+
#
|
305
|
+
# require 'coderay'
|
306
|
+
#
|
307
|
+
# token_stream = CodeRay::TokenStream.new do |kind, text|
|
308
|
+
# puts 'kind: %s, text size: %d.' % [kind, text.size]
|
309
|
+
# end
|
310
|
+
#
|
311
|
+
# token_stream << [:regexp, '/\d+/']
|
312
|
+
# #-> kind: rexpexp, text size: 5.
|
313
|
+
#
|
314
|
+
def initialize &block
|
315
|
+
raise ArgumentError, 'Block expected for streaming.' unless block
|
316
|
+
@callback = block
|
317
|
+
@size = 0
|
318
|
+
end
|
319
|
+
|
320
|
+
# Calls +block+ with +token+ and increments size.
|
321
|
+
#
|
322
|
+
# Returns self.
|
323
|
+
def << token
|
324
|
+
@callback.call(*token)
|
325
|
+
@size += 1
|
326
|
+
self
|
327
|
+
end
|
328
|
+
|
329
|
+
# This method is not implemented due to speed reasons. Use Tokens.
|
330
|
+
def text_size
|
331
|
+
raise NotImplementedError,
|
332
|
+
'This method is not implemented due to speed reasons.'
|
333
|
+
end
|
334
|
+
|
335
|
+
# A TokenStream cannot be dumped. Use Tokens.
|
336
|
+
def dump
|
337
|
+
raise NotImplementedError, 'A TokenStream cannot be dumped.'
|
338
|
+
end
|
339
|
+
|
340
|
+
# A TokenStream cannot be optimized. Use Tokens.
|
341
|
+
def optimize
|
342
|
+
raise NotImplementedError, 'A TokenStream cannot be optimized.'
|
343
|
+
end
|
344
|
+
|
345
|
+
end
|
346
|
+
|
347
|
+
end
|
348
|
+
|
349
|
+
if $0 == __FILE__
|
350
|
+
$VERBOSE = true
|
351
|
+
$: << File.join(File.dirname(__FILE__), '..')
|
352
|
+
eval DATA.read, nil, $0, __LINE__ + 4
|
353
|
+
end
|
354
|
+
|
355
|
+
__END__
|
356
|
+
require 'test/unit'
|
357
|
+
|
358
|
+
class TokensTest < Test::Unit::TestCase
|
359
|
+
|
360
|
+
def test_creation
|
361
|
+
assert CodeRay::Tokens < Array
|
362
|
+
tokens = nil
|
363
|
+
assert_nothing_raised do
|
364
|
+
tokens = CodeRay::Tokens.new
|
365
|
+
end
|
366
|
+
assert_kind_of Array, tokens
|
367
|
+
end
|
368
|
+
|
369
|
+
def test_adding_tokens
|
370
|
+
tokens = CodeRay::Tokens.new
|
371
|
+
assert_nothing_raised do
|
372
|
+
tokens << ['string', :type]
|
373
|
+
tokens << ['()', :operator]
|
374
|
+
end
|
375
|
+
assert_equal tokens.size, 2
|
376
|
+
end
|
377
|
+
|
378
|
+
def test_dump_undump
|
379
|
+
tokens = CodeRay::Tokens.new
|
380
|
+
assert_nothing_raised do
|
381
|
+
tokens << ['string', :type]
|
382
|
+
tokens << ['()', :operator]
|
383
|
+
end
|
384
|
+
tokens2 = nil
|
385
|
+
assert_nothing_raised do
|
386
|
+
tokens2 = tokens.dump.undump
|
387
|
+
end
|
388
|
+
assert_equal tokens, tokens2
|
389
|
+
end
|
390
|
+
|
391
|
+
end
|