coderay 0.9.8 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/{lib/README → README_INDEX.rdoc} +10 -21
- data/Rakefile +6 -6
- data/bin/coderay +193 -64
- data/lib/coderay.rb +61 -105
- data/lib/coderay/duo.rb +17 -21
- data/lib/coderay/encoder.rb +100 -112
- data/lib/coderay/encoders/_map.rb +12 -7
- data/lib/coderay/encoders/comment_filter.rb +12 -30
- data/lib/coderay/encoders/count.rb +29 -11
- data/lib/coderay/encoders/debug.rb +32 -20
- data/lib/coderay/encoders/div.rb +13 -9
- data/lib/coderay/encoders/filter.rb +34 -51
- data/lib/coderay/encoders/html.rb +155 -161
- data/lib/coderay/encoders/html/css.rb +4 -9
- data/lib/coderay/encoders/html/numbering.rb +115 -0
- data/lib/coderay/encoders/html/output.rb +22 -70
- data/lib/coderay/encoders/json.rb +59 -45
- data/lib/coderay/encoders/lines_of_code.rb +12 -57
- data/lib/coderay/encoders/null.rb +6 -14
- data/lib/coderay/encoders/page.rb +13 -9
- data/lib/coderay/encoders/span.rb +13 -9
- data/lib/coderay/encoders/statistic.rb +58 -39
- data/lib/coderay/encoders/terminal.rb +179 -0
- data/lib/coderay/encoders/text.rb +31 -17
- data/lib/coderay/encoders/token_kind_filter.rb +111 -0
- data/lib/coderay/encoders/xml.rb +19 -18
- data/lib/coderay/encoders/yaml.rb +37 -9
- data/lib/coderay/for_redcloth.rb +4 -4
- data/lib/coderay/helpers/file_type.rb +127 -246
- data/lib/coderay/helpers/gzip.rb +41 -0
- data/lib/coderay/helpers/plugin.rb +241 -306
- data/lib/coderay/helpers/word_list.rb +65 -126
- data/lib/coderay/scanner.rb +173 -156
- data/lib/coderay/scanners/_map.rb +18 -17
- data/lib/coderay/scanners/c.rb +63 -77
- data/lib/coderay/scanners/clojure.rb +217 -0
- data/lib/coderay/scanners/cpp.rb +71 -84
- data/lib/coderay/scanners/css.rb +103 -120
- data/lib/coderay/scanners/debug.rb +47 -44
- data/lib/coderay/scanners/delphi.rb +70 -76
- data/lib/coderay/scanners/diff.rb +141 -50
- data/lib/coderay/scanners/erb.rb +81 -0
- data/lib/coderay/scanners/groovy.rb +104 -113
- data/lib/coderay/scanners/haml.rb +168 -0
- data/lib/coderay/scanners/html.rb +181 -110
- data/lib/coderay/scanners/java.rb +73 -75
- data/lib/coderay/scanners/java/builtin_types.rb +2 -0
- data/lib/coderay/scanners/java_script.rb +90 -101
- data/lib/coderay/scanners/json.rb +40 -53
- data/lib/coderay/scanners/php.rb +123 -147
- data/lib/coderay/scanners/python.rb +93 -91
- data/lib/coderay/scanners/raydebug.rb +66 -0
- data/lib/coderay/scanners/ruby.rb +343 -326
- data/lib/coderay/scanners/ruby/patterns.rb +40 -106
- data/lib/coderay/scanners/ruby/string_state.rb +71 -0
- data/lib/coderay/scanners/sql.rb +80 -66
- data/lib/coderay/scanners/text.rb +26 -0
- data/lib/coderay/scanners/xml.rb +1 -1
- data/lib/coderay/scanners/yaml.rb +74 -73
- data/lib/coderay/style.rb +10 -7
- data/lib/coderay/styles/_map.rb +3 -3
- data/lib/coderay/styles/alpha.rb +143 -0
- data/lib/coderay/token_kinds.rb +90 -0
- data/lib/coderay/tokens.rb +102 -277
- data/lib/coderay/tokens_proxy.rb +55 -0
- data/lib/coderay/version.rb +3 -0
- data/test/functional/basic.rb +200 -18
- data/test/functional/examples.rb +130 -0
- data/test/functional/for_redcloth.rb +15 -8
- data/test/functional/suite.rb +9 -6
- metadata +103 -123
- data/FOLDERS +0 -53
- data/bin/coderay_stylesheet +0 -4
- data/lib/coderay/encoders/html/numerization.rb +0 -133
- data/lib/coderay/encoders/term.rb +0 -158
- data/lib/coderay/encoders/token_class_filter.rb +0 -84
- data/lib/coderay/helpers/gzip_simple.rb +0 -123
- data/lib/coderay/scanners/nitro_xhtml.rb +0 -136
- data/lib/coderay/scanners/plaintext.rb +0 -20
- data/lib/coderay/scanners/rhtml.rb +0 -78
- data/lib/coderay/scanners/scheme.rb +0 -145
- data/lib/coderay/styles/cycnus.rb +0 -152
- data/lib/coderay/styles/murphy.rb +0 -134
- data/lib/coderay/token_classes.rb +0 -86
- data/test/functional/load_plugin_scanner.rb +0 -11
- data/test/functional/vhdl.rb +0 -126
- data/test/functional/word_list.rb +0 -79
data/lib/coderay/tokens.rb
CHANGED
@@ -1,6 +1,9 @@
|
|
1
1
|
module CodeRay
|
2
|
-
|
3
|
-
#
|
2
|
+
|
3
|
+
# GZip library for writing and reading token dumps.
|
4
|
+
autoload :GZip, 'coderay/helpers/gzip'
|
5
|
+
|
6
|
+
# = Tokens TODO: Rewrite!
|
4
7
|
#
|
5
8
|
# The Tokens class represents a list of tokens returnd from
|
6
9
|
# a Scanner.
|
@@ -8,7 +11,7 @@ module CodeRay
|
|
8
11
|
# A token is not a special object, just a two-element Array
|
9
12
|
# consisting of
|
10
13
|
# * the _token_ _text_ (the original source of the token in a String) or
|
11
|
-
# a _token_ _action_ (
|
14
|
+
# a _token_ _action_ (begin_group, end_group, begin_line, end_line)
|
12
15
|
# * the _token_ _kind_ (a Symbol representing the type of the token)
|
13
16
|
#
|
14
17
|
# A token looks like this:
|
@@ -18,16 +21,16 @@ module CodeRay
|
|
18
21
|
# ['$^', :error]
|
19
22
|
#
|
20
23
|
# Some scanners also yield sub-tokens, represented by special
|
21
|
-
# token actions, namely
|
24
|
+
# token actions, namely begin_group and end_group.
|
22
25
|
#
|
23
26
|
# The Ruby scanner, for example, splits "a string" into:
|
24
27
|
#
|
25
28
|
# [
|
26
|
-
# [:
|
29
|
+
# [:begin_group, :string],
|
27
30
|
# ['"', :delimiter],
|
28
31
|
# ['a string', :content],
|
29
32
|
# ['"', :delimiter],
|
30
|
-
# [:
|
33
|
+
# [:end_group, :string]
|
31
34
|
# ]
|
32
35
|
#
|
33
36
|
# Tokens is the interface between Scanners and Encoders:
|
@@ -47,46 +50,11 @@ module CodeRay
|
|
47
50
|
#
|
48
51
|
# It also allows you to generate tokens directly (without using a scanner),
|
49
52
|
# to load them from a file, and still use any Encoder that CodeRay provides.
|
50
|
-
#
|
51
|
-
# Tokens' subclass TokenStream allows streaming to save memory.
|
52
53
|
class Tokens < Array
|
53
54
|
|
54
55
|
# The Scanner instance that created the tokens.
|
55
56
|
attr_accessor :scanner
|
56
57
|
|
57
|
-
# Whether the object is a TokenStream.
|
58
|
-
#
|
59
|
-
# Returns false.
|
60
|
-
def stream?
|
61
|
-
false
|
62
|
-
end
|
63
|
-
|
64
|
-
# Iterates over all tokens.
|
65
|
-
#
|
66
|
-
# If a filter is given, only tokens of that kind are yielded.
|
67
|
-
def each kind_filter = nil, &block
|
68
|
-
unless kind_filter
|
69
|
-
super(&block)
|
70
|
-
else
|
71
|
-
super() do |text, kind|
|
72
|
-
next unless kind == kind_filter
|
73
|
-
yield text, kind
|
74
|
-
end
|
75
|
-
end
|
76
|
-
end
|
77
|
-
|
78
|
-
# Iterates over all text tokens.
|
79
|
-
# Range tokens like [:open, :string] are left out.
|
80
|
-
#
|
81
|
-
# Example:
|
82
|
-
# tokens.each_text_token { |text, kind| text.replace html_escape(text) }
|
83
|
-
def each_text_token
|
84
|
-
each do |text, kind|
|
85
|
-
next unless text.is_a? ::String
|
86
|
-
yield text, kind
|
87
|
-
end
|
88
|
-
end
|
89
|
-
|
90
58
|
# Encode the tokens using encoder.
|
91
59
|
#
|
92
60
|
# encoder can be
|
@@ -96,120 +64,98 @@ module CodeRay
|
|
96
64
|
#
|
97
65
|
# options are passed to the encoder.
|
98
66
|
def encode encoder, options = {}
|
99
|
-
|
100
|
-
unless encoder.is_a? Class
|
101
|
-
encoder_class = Encoders[encoder]
|
102
|
-
end
|
103
|
-
encoder = encoder_class.new options
|
104
|
-
end
|
67
|
+
encoder = Encoders[encoder].new options if encoder.respond_to? :to_sym
|
105
68
|
encoder.encode_tokens self, options
|
106
69
|
end
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
# +options+ are passed to the encoder if given.
|
112
|
-
def to_s options = {}
|
113
|
-
encode :text, options
|
70
|
+
|
71
|
+
# Turn tokens into a string by concatenating them.
|
72
|
+
def to_s
|
73
|
+
encode CodeRay::Encoders::Encoder.new
|
114
74
|
end
|
115
|
-
|
75
|
+
|
116
76
|
# Redirects unknown methods to encoder calls.
|
117
77
|
#
|
118
78
|
# For example, if you call +tokens.html+, the HTML encoder
|
119
79
|
# is used to highlight the tokens.
|
120
80
|
def method_missing meth, options = {}
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
# Returns the tokens compressed by joining consecutive
|
125
|
-
# tokens of the same kind.
|
126
|
-
#
|
127
|
-
# This can not be undone, but should yield the same output
|
128
|
-
# in most Encoders. It basically makes the output smaller.
|
129
|
-
#
|
130
|
-
# Combined with dump, it saves space for the cost of time.
|
131
|
-
#
|
132
|
-
# If the scanner is written carefully, this is not required -
|
133
|
-
# for example, consecutive //-comment lines could already be
|
134
|
-
# joined in one comment token by the Scanner.
|
135
|
-
def optimize
|
136
|
-
last_kind = last_text = nil
|
137
|
-
new = self.class.new
|
138
|
-
for text, kind in self
|
139
|
-
if text.is_a? String
|
140
|
-
if kind == last_kind
|
141
|
-
last_text << text
|
142
|
-
else
|
143
|
-
new << [last_text, last_kind] if last_kind
|
144
|
-
last_text = text
|
145
|
-
last_kind = kind
|
146
|
-
end
|
147
|
-
else
|
148
|
-
new << [last_text, last_kind] if last_kind
|
149
|
-
last_kind = last_text = nil
|
150
|
-
new << [text, kind]
|
151
|
-
end
|
152
|
-
end
|
153
|
-
new << [last_text, last_kind] if last_kind
|
154
|
-
new
|
155
|
-
end
|
156
|
-
|
157
|
-
# Compact the object itself; see optimize.
|
158
|
-
def optimize!
|
159
|
-
replace optimize
|
81
|
+
encode meth, options
|
82
|
+
rescue PluginHost::PluginNotFound
|
83
|
+
super
|
160
84
|
end
|
161
85
|
|
162
|
-
#
|
163
|
-
#
|
164
|
-
#
|
165
|
-
|
166
|
-
|
167
|
-
|
86
|
+
# Split the tokens into parts of the given +sizes+.
|
87
|
+
#
|
88
|
+
# The result will be an Array of Tokens objects. The parts have
|
89
|
+
# the text size specified by the parameter. In addition, each
|
90
|
+
# part closes all opened tokens. This is useful to insert tokens
|
91
|
+
# betweem them.
|
92
|
+
#
|
93
|
+
# This method is used by @Scanner#tokenize@ when called with an Array
|
94
|
+
# of source strings. The Diff encoder uses it for inline highlighting.
|
95
|
+
def split_into_parts *sizes
|
96
|
+
parts = []
|
168
97
|
opened = []
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
98
|
+
content = nil
|
99
|
+
part = Tokens.new
|
100
|
+
part_size = 0
|
101
|
+
size = sizes.first
|
102
|
+
i = 0
|
103
|
+
for item in self
|
104
|
+
case content
|
105
|
+
when nil
|
106
|
+
content = item
|
107
|
+
when String
|
108
|
+
if size && part_size + content.size > size # token must be cut
|
109
|
+
if part_size < size # some part of the token goes into this part
|
110
|
+
content = content.dup # content may no be safe to change
|
111
|
+
part << content.slice!(0, size - part_size) << item
|
112
|
+
end
|
113
|
+
# close all open groups and lines...
|
114
|
+
closing = opened.reverse.flatten.map do |content_or_kind|
|
115
|
+
case content_or_kind
|
116
|
+
when :begin_group
|
117
|
+
:end_group
|
118
|
+
when :begin_line
|
119
|
+
:end_line
|
120
|
+
else
|
121
|
+
content_or_kind
|
122
|
+
end
|
123
|
+
end
|
124
|
+
part.concat closing
|
125
|
+
begin
|
126
|
+
parts << part
|
127
|
+
part = Tokens.new
|
128
|
+
size = sizes[i += 1]
|
129
|
+
end until size.nil? || size > 0
|
130
|
+
# ...and open them again.
|
131
|
+
part.concat opened.flatten
|
132
|
+
part_size = 0
|
133
|
+
redo unless content.empty?
|
134
|
+
else
|
135
|
+
part << content << item
|
136
|
+
part_size += content.size
|
183
137
|
end
|
138
|
+
content = nil
|
139
|
+
when Symbol
|
140
|
+
case content
|
141
|
+
when :begin_group, :begin_line
|
142
|
+
opened << [content, item]
|
143
|
+
when :end_group, :end_line
|
144
|
+
opened.pop
|
145
|
+
else
|
146
|
+
raise ArgumentError, 'Unknown token action: %p, kind = %p' % [content, item]
|
147
|
+
end
|
148
|
+
part << content << item
|
149
|
+
content = nil
|
150
|
+
else
|
151
|
+
raise ArgumentError, 'Token input junk: %p, kind = %p' % [content, item]
|
184
152
|
end
|
185
|
-
tokens << [type, kind]
|
186
153
|
end
|
187
|
-
|
188
|
-
|
189
|
-
|
154
|
+
parts << part
|
155
|
+
parts << Tokens.new while parts.size < sizes.size
|
156
|
+
parts
|
190
157
|
end
|
191
158
|
|
192
|
-
def fix!
|
193
|
-
replace fix
|
194
|
-
end
|
195
|
-
|
196
|
-
# TODO: Scanner#split_into_lines
|
197
|
-
#
|
198
|
-
# Makes sure that:
|
199
|
-
# - newlines are single tokens
|
200
|
-
# (which means all other token are single-line)
|
201
|
-
# - there are no open tokens at the end the line
|
202
|
-
#
|
203
|
-
# This makes it simple for encoders that work line-oriented,
|
204
|
-
# like HTML with list-style numeration.
|
205
|
-
def split_into_lines
|
206
|
-
raise NotImplementedError
|
207
|
-
end
|
208
|
-
|
209
|
-
def split_into_lines!
|
210
|
-
replace split_into_lines
|
211
|
-
end
|
212
|
-
|
213
159
|
# Dumps the object into a String that can be saved
|
214
160
|
# in files or databases.
|
215
161
|
#
|
@@ -226,28 +172,16 @@ module CodeRay
|
|
226
172
|
#
|
227
173
|
# See GZip module.
|
228
174
|
def dump gzip_level = 7
|
229
|
-
require 'coderay/helpers/gzip_simple'
|
230
175
|
dump = Marshal.dump self
|
231
|
-
dump =
|
176
|
+
dump = GZip.gzip dump, gzip_level
|
232
177
|
dump.extend Undumping
|
233
178
|
end
|
234
|
-
|
235
|
-
#
|
236
|
-
|
237
|
-
|
238
|
-
def text_size
|
239
|
-
size = 0
|
240
|
-
each_text_token do |t, k|
|
241
|
-
size + t.size
|
242
|
-
end
|
243
|
-
size
|
244
|
-
end
|
245
|
-
|
246
|
-
# Return all text tokens joined into a single string.
|
247
|
-
def text
|
248
|
-
map { |t, k| t if t.is_a? ::String }.join
|
179
|
+
|
180
|
+
# Return the actual number of tokens.
|
181
|
+
def count
|
182
|
+
size / 2
|
249
183
|
end
|
250
|
-
|
184
|
+
|
251
185
|
# Include this module to give an object an #undump
|
252
186
|
# method.
|
253
187
|
#
|
@@ -258,133 +192,24 @@ module CodeRay
|
|
258
192
|
Tokens.load self
|
259
193
|
end
|
260
194
|
end
|
261
|
-
|
195
|
+
|
262
196
|
# Undump the object using Marshal.load, then
|
263
197
|
# unzip it using GZip.gunzip.
|
264
198
|
#
|
265
199
|
# The result is commonly a Tokens object, but
|
266
200
|
# this is not guaranteed.
|
267
201
|
def Tokens.load dump
|
268
|
-
|
269
|
-
dump = dump.gunzip
|
202
|
+
dump = GZip.gunzip dump
|
270
203
|
@dump = Marshal.load dump
|
271
204
|
end
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
# It redirects the method << to a block given at creation.
|
281
|
-
#
|
282
|
-
# This allows scanners and Encoders to use streaming (no
|
283
|
-
# tokens are saved, the input is highlighted the same time it
|
284
|
-
# is scanned) with the same code.
|
285
|
-
#
|
286
|
-
# See CodeRay.encode_stream and CodeRay.scan_stream
|
287
|
-
class TokenStream < Tokens
|
288
|
-
|
289
|
-
# Whether the object is a TokenStream.
|
290
|
-
#
|
291
|
-
# Returns true.
|
292
|
-
def stream?
|
293
|
-
true
|
294
|
-
end
|
295
|
-
|
296
|
-
# The Array is empty, but size counts the tokens given by <<.
|
297
|
-
attr_reader :size
|
298
|
-
|
299
|
-
# Creates a new TokenStream that calls +block+ whenever
|
300
|
-
# its << method is called.
|
301
|
-
#
|
302
|
-
# Example:
|
303
|
-
#
|
304
|
-
# require 'coderay'
|
305
|
-
#
|
306
|
-
# token_stream = CodeRay::TokenStream.new do |text, kind|
|
307
|
-
# puts 'kind: %s, text size: %d.' % [kind, text.size]
|
308
|
-
# end
|
309
|
-
#
|
310
|
-
# token_stream << ['/\d+/', :regexp]
|
311
|
-
# #-> kind: rexpexp, text size: 5.
|
312
|
-
#
|
313
|
-
def initialize &block
|
314
|
-
raise ArgumentError, 'Block expected for streaming.' unless block
|
315
|
-
@callback = block
|
316
|
-
@size = 0
|
317
|
-
end
|
318
|
-
|
319
|
-
# Calls +block+ with +token+ and increments size.
|
320
|
-
#
|
321
|
-
# Returns self.
|
322
|
-
def << token
|
323
|
-
@callback.call(*token)
|
324
|
-
@size += 1
|
325
|
-
self
|
326
|
-
end
|
327
|
-
|
328
|
-
# This method is not implemented due to speed reasons. Use Tokens.
|
329
|
-
def text_size
|
330
|
-
raise NotImplementedError,
|
331
|
-
'This method is not implemented due to speed reasons.'
|
332
|
-
end
|
333
|
-
|
334
|
-
# A TokenStream cannot be dumped. Use Tokens.
|
335
|
-
def dump
|
336
|
-
raise NotImplementedError, 'A TokenStream cannot be dumped.'
|
337
|
-
end
|
338
|
-
|
339
|
-
# A TokenStream cannot be optimized. Use Tokens.
|
340
|
-
def optimize
|
341
|
-
raise NotImplementedError, 'A TokenStream cannot be optimized.'
|
342
|
-
end
|
343
|
-
|
344
|
-
end
|
345
|
-
|
346
|
-
end
|
347
|
-
|
348
|
-
if $0 == __FILE__
|
349
|
-
$VERBOSE = true
|
350
|
-
$: << File.join(File.dirname(__FILE__), '..')
|
351
|
-
eval DATA.read, nil, $0, __LINE__ + 4
|
352
|
-
end
|
353
|
-
|
354
|
-
__END__
|
355
|
-
require 'test/unit'
|
356
|
-
|
357
|
-
class TokensTest < Test::Unit::TestCase
|
358
|
-
|
359
|
-
def test_creation
|
360
|
-
assert CodeRay::Tokens < Array
|
361
|
-
tokens = nil
|
362
|
-
assert_nothing_raised do
|
363
|
-
tokens = CodeRay::Tokens.new
|
364
|
-
end
|
365
|
-
assert_kind_of Array, tokens
|
366
|
-
end
|
367
|
-
|
368
|
-
def test_adding_tokens
|
369
|
-
tokens = CodeRay::Tokens.new
|
370
|
-
assert_nothing_raised do
|
371
|
-
tokens << ['string', :type]
|
372
|
-
tokens << ['()', :operator]
|
373
|
-
end
|
374
|
-
assert_equal tokens.size, 2
|
375
|
-
end
|
376
|
-
|
377
|
-
def test_dump_undump
|
378
|
-
tokens = CodeRay::Tokens.new
|
379
|
-
assert_nothing_raised do
|
380
|
-
tokens << ['string', :type]
|
381
|
-
tokens << ['()', :operator]
|
382
|
-
end
|
383
|
-
tokens2 = nil
|
384
|
-
assert_nothing_raised do
|
385
|
-
tokens2 = tokens.dump.undump
|
386
|
-
end
|
387
|
-
assert_equal tokens, tokens2
|
205
|
+
|
206
|
+
alias text_token push
|
207
|
+
def begin_group kind; push :begin_group, kind end
|
208
|
+
def end_group kind; push :end_group, kind end
|
209
|
+
def begin_line kind; push :begin_line, kind end
|
210
|
+
def end_line kind; push :end_line, kind end
|
211
|
+
alias tokens concat
|
212
|
+
|
388
213
|
end
|
389
214
|
|
390
|
-
end
|
215
|
+
end
|