coderay 0.9.8 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/{lib/README → README_INDEX.rdoc} +10 -21
- data/Rakefile +6 -6
- data/bin/coderay +193 -64
- data/lib/coderay.rb +61 -105
- data/lib/coderay/duo.rb +17 -21
- data/lib/coderay/encoder.rb +100 -112
- data/lib/coderay/encoders/_map.rb +12 -7
- data/lib/coderay/encoders/comment_filter.rb +12 -30
- data/lib/coderay/encoders/count.rb +29 -11
- data/lib/coderay/encoders/debug.rb +32 -20
- data/lib/coderay/encoders/div.rb +13 -9
- data/lib/coderay/encoders/filter.rb +34 -51
- data/lib/coderay/encoders/html.rb +155 -161
- data/lib/coderay/encoders/html/css.rb +4 -9
- data/lib/coderay/encoders/html/numbering.rb +115 -0
- data/lib/coderay/encoders/html/output.rb +22 -70
- data/lib/coderay/encoders/json.rb +59 -45
- data/lib/coderay/encoders/lines_of_code.rb +12 -57
- data/lib/coderay/encoders/null.rb +6 -14
- data/lib/coderay/encoders/page.rb +13 -9
- data/lib/coderay/encoders/span.rb +13 -9
- data/lib/coderay/encoders/statistic.rb +58 -39
- data/lib/coderay/encoders/terminal.rb +179 -0
- data/lib/coderay/encoders/text.rb +31 -17
- data/lib/coderay/encoders/token_kind_filter.rb +111 -0
- data/lib/coderay/encoders/xml.rb +19 -18
- data/lib/coderay/encoders/yaml.rb +37 -9
- data/lib/coderay/for_redcloth.rb +4 -4
- data/lib/coderay/helpers/file_type.rb +127 -246
- data/lib/coderay/helpers/gzip.rb +41 -0
- data/lib/coderay/helpers/plugin.rb +241 -306
- data/lib/coderay/helpers/word_list.rb +65 -126
- data/lib/coderay/scanner.rb +173 -156
- data/lib/coderay/scanners/_map.rb +18 -17
- data/lib/coderay/scanners/c.rb +63 -77
- data/lib/coderay/scanners/clojure.rb +217 -0
- data/lib/coderay/scanners/cpp.rb +71 -84
- data/lib/coderay/scanners/css.rb +103 -120
- data/lib/coderay/scanners/debug.rb +47 -44
- data/lib/coderay/scanners/delphi.rb +70 -76
- data/lib/coderay/scanners/diff.rb +141 -50
- data/lib/coderay/scanners/erb.rb +81 -0
- data/lib/coderay/scanners/groovy.rb +104 -113
- data/lib/coderay/scanners/haml.rb +168 -0
- data/lib/coderay/scanners/html.rb +181 -110
- data/lib/coderay/scanners/java.rb +73 -75
- data/lib/coderay/scanners/java/builtin_types.rb +2 -0
- data/lib/coderay/scanners/java_script.rb +90 -101
- data/lib/coderay/scanners/json.rb +40 -53
- data/lib/coderay/scanners/php.rb +123 -147
- data/lib/coderay/scanners/python.rb +93 -91
- data/lib/coderay/scanners/raydebug.rb +66 -0
- data/lib/coderay/scanners/ruby.rb +343 -326
- data/lib/coderay/scanners/ruby/patterns.rb +40 -106
- data/lib/coderay/scanners/ruby/string_state.rb +71 -0
- data/lib/coderay/scanners/sql.rb +80 -66
- data/lib/coderay/scanners/text.rb +26 -0
- data/lib/coderay/scanners/xml.rb +1 -1
- data/lib/coderay/scanners/yaml.rb +74 -73
- data/lib/coderay/style.rb +10 -7
- data/lib/coderay/styles/_map.rb +3 -3
- data/lib/coderay/styles/alpha.rb +143 -0
- data/lib/coderay/token_kinds.rb +90 -0
- data/lib/coderay/tokens.rb +102 -277
- data/lib/coderay/tokens_proxy.rb +55 -0
- data/lib/coderay/version.rb +3 -0
- data/test/functional/basic.rb +200 -18
- data/test/functional/examples.rb +130 -0
- data/test/functional/for_redcloth.rb +15 -8
- data/test/functional/suite.rb +9 -6
- metadata +103 -123
- data/FOLDERS +0 -53
- data/bin/coderay_stylesheet +0 -4
- data/lib/coderay/encoders/html/numerization.rb +0 -133
- data/lib/coderay/encoders/term.rb +0 -158
- data/lib/coderay/encoders/token_class_filter.rb +0 -84
- data/lib/coderay/helpers/gzip_simple.rb +0 -123
- data/lib/coderay/scanners/nitro_xhtml.rb +0 -136
- data/lib/coderay/scanners/plaintext.rb +0 -20
- data/lib/coderay/scanners/rhtml.rb +0 -78
- data/lib/coderay/scanners/scheme.rb +0 -145
- data/lib/coderay/styles/cycnus.rb +0 -152
- data/lib/coderay/styles/murphy.rb +0 -134
- data/lib/coderay/token_classes.rb +0 -86
- data/test/functional/load_plugin_scanner.rb +0 -11
- data/test/functional/vhdl.rb +0 -126
- data/test/functional/word_list.rb +0 -79
data/lib/coderay/tokens.rb
CHANGED
@@ -1,6 +1,9 @@
|
|
1
1
|
module CodeRay
|
2
|
-
|
3
|
-
#
|
2
|
+
|
3
|
+
# GZip library for writing and reading token dumps.
|
4
|
+
autoload :GZip, 'coderay/helpers/gzip'
|
5
|
+
|
6
|
+
# = Tokens TODO: Rewrite!
|
4
7
|
#
|
5
8
|
# The Tokens class represents a list of tokens returnd from
|
6
9
|
# a Scanner.
|
@@ -8,7 +11,7 @@ module CodeRay
|
|
8
11
|
# A token is not a special object, just a two-element Array
|
9
12
|
# consisting of
|
10
13
|
# * the _token_ _text_ (the original source of the token in a String) or
|
11
|
-
# a _token_ _action_ (
|
14
|
+
# a _token_ _action_ (begin_group, end_group, begin_line, end_line)
|
12
15
|
# * the _token_ _kind_ (a Symbol representing the type of the token)
|
13
16
|
#
|
14
17
|
# A token looks like this:
|
@@ -18,16 +21,16 @@ module CodeRay
|
|
18
21
|
# ['$^', :error]
|
19
22
|
#
|
20
23
|
# Some scanners also yield sub-tokens, represented by special
|
21
|
-
# token actions, namely
|
24
|
+
# token actions, namely begin_group and end_group.
|
22
25
|
#
|
23
26
|
# The Ruby scanner, for example, splits "a string" into:
|
24
27
|
#
|
25
28
|
# [
|
26
|
-
# [:
|
29
|
+
# [:begin_group, :string],
|
27
30
|
# ['"', :delimiter],
|
28
31
|
# ['a string', :content],
|
29
32
|
# ['"', :delimiter],
|
30
|
-
# [:
|
33
|
+
# [:end_group, :string]
|
31
34
|
# ]
|
32
35
|
#
|
33
36
|
# Tokens is the interface between Scanners and Encoders:
|
@@ -47,46 +50,11 @@ module CodeRay
|
|
47
50
|
#
|
48
51
|
# It also allows you to generate tokens directly (without using a scanner),
|
49
52
|
# to load them from a file, and still use any Encoder that CodeRay provides.
|
50
|
-
#
|
51
|
-
# Tokens' subclass TokenStream allows streaming to save memory.
|
52
53
|
class Tokens < Array
|
53
54
|
|
54
55
|
# The Scanner instance that created the tokens.
|
55
56
|
attr_accessor :scanner
|
56
57
|
|
57
|
-
# Whether the object is a TokenStream.
|
58
|
-
#
|
59
|
-
# Returns false.
|
60
|
-
def stream?
|
61
|
-
false
|
62
|
-
end
|
63
|
-
|
64
|
-
# Iterates over all tokens.
|
65
|
-
#
|
66
|
-
# If a filter is given, only tokens of that kind are yielded.
|
67
|
-
def each kind_filter = nil, &block
|
68
|
-
unless kind_filter
|
69
|
-
super(&block)
|
70
|
-
else
|
71
|
-
super() do |text, kind|
|
72
|
-
next unless kind == kind_filter
|
73
|
-
yield text, kind
|
74
|
-
end
|
75
|
-
end
|
76
|
-
end
|
77
|
-
|
78
|
-
# Iterates over all text tokens.
|
79
|
-
# Range tokens like [:open, :string] are left out.
|
80
|
-
#
|
81
|
-
# Example:
|
82
|
-
# tokens.each_text_token { |text, kind| text.replace html_escape(text) }
|
83
|
-
def each_text_token
|
84
|
-
each do |text, kind|
|
85
|
-
next unless text.is_a? ::String
|
86
|
-
yield text, kind
|
87
|
-
end
|
88
|
-
end
|
89
|
-
|
90
58
|
# Encode the tokens using encoder.
|
91
59
|
#
|
92
60
|
# encoder can be
|
@@ -96,120 +64,98 @@ module CodeRay
|
|
96
64
|
#
|
97
65
|
# options are passed to the encoder.
|
98
66
|
def encode encoder, options = {}
|
99
|
-
|
100
|
-
unless encoder.is_a? Class
|
101
|
-
encoder_class = Encoders[encoder]
|
102
|
-
end
|
103
|
-
encoder = encoder_class.new options
|
104
|
-
end
|
67
|
+
encoder = Encoders[encoder].new options if encoder.respond_to? :to_sym
|
105
68
|
encoder.encode_tokens self, options
|
106
69
|
end
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
# +options+ are passed to the encoder if given.
|
112
|
-
def to_s options = {}
|
113
|
-
encode :text, options
|
70
|
+
|
71
|
+
# Turn tokens into a string by concatenating them.
|
72
|
+
def to_s
|
73
|
+
encode CodeRay::Encoders::Encoder.new
|
114
74
|
end
|
115
|
-
|
75
|
+
|
116
76
|
# Redirects unknown methods to encoder calls.
|
117
77
|
#
|
118
78
|
# For example, if you call +tokens.html+, the HTML encoder
|
119
79
|
# is used to highlight the tokens.
|
120
80
|
def method_missing meth, options = {}
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
# Returns the tokens compressed by joining consecutive
|
125
|
-
# tokens of the same kind.
|
126
|
-
#
|
127
|
-
# This can not be undone, but should yield the same output
|
128
|
-
# in most Encoders. It basically makes the output smaller.
|
129
|
-
#
|
130
|
-
# Combined with dump, it saves space for the cost of time.
|
131
|
-
#
|
132
|
-
# If the scanner is written carefully, this is not required -
|
133
|
-
# for example, consecutive //-comment lines could already be
|
134
|
-
# joined in one comment token by the Scanner.
|
135
|
-
def optimize
|
136
|
-
last_kind = last_text = nil
|
137
|
-
new = self.class.new
|
138
|
-
for text, kind in self
|
139
|
-
if text.is_a? String
|
140
|
-
if kind == last_kind
|
141
|
-
last_text << text
|
142
|
-
else
|
143
|
-
new << [last_text, last_kind] if last_kind
|
144
|
-
last_text = text
|
145
|
-
last_kind = kind
|
146
|
-
end
|
147
|
-
else
|
148
|
-
new << [last_text, last_kind] if last_kind
|
149
|
-
last_kind = last_text = nil
|
150
|
-
new << [text, kind]
|
151
|
-
end
|
152
|
-
end
|
153
|
-
new << [last_text, last_kind] if last_kind
|
154
|
-
new
|
155
|
-
end
|
156
|
-
|
157
|
-
# Compact the object itself; see optimize.
|
158
|
-
def optimize!
|
159
|
-
replace optimize
|
81
|
+
encode meth, options
|
82
|
+
rescue PluginHost::PluginNotFound
|
83
|
+
super
|
160
84
|
end
|
161
85
|
|
162
|
-
#
|
163
|
-
#
|
164
|
-
#
|
165
|
-
|
166
|
-
|
167
|
-
|
86
|
+
# Split the tokens into parts of the given +sizes+.
|
87
|
+
#
|
88
|
+
# The result will be an Array of Tokens objects. The parts have
|
89
|
+
# the text size specified by the parameter. In addition, each
|
90
|
+
# part closes all opened tokens. This is useful to insert tokens
|
91
|
+
# betweem them.
|
92
|
+
#
|
93
|
+
# This method is used by @Scanner#tokenize@ when called with an Array
|
94
|
+
# of source strings. The Diff encoder uses it for inline highlighting.
|
95
|
+
def split_into_parts *sizes
|
96
|
+
parts = []
|
168
97
|
opened = []
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
98
|
+
content = nil
|
99
|
+
part = Tokens.new
|
100
|
+
part_size = 0
|
101
|
+
size = sizes.first
|
102
|
+
i = 0
|
103
|
+
for item in self
|
104
|
+
case content
|
105
|
+
when nil
|
106
|
+
content = item
|
107
|
+
when String
|
108
|
+
if size && part_size + content.size > size # token must be cut
|
109
|
+
if part_size < size # some part of the token goes into this part
|
110
|
+
content = content.dup # content may no be safe to change
|
111
|
+
part << content.slice!(0, size - part_size) << item
|
112
|
+
end
|
113
|
+
# close all open groups and lines...
|
114
|
+
closing = opened.reverse.flatten.map do |content_or_kind|
|
115
|
+
case content_or_kind
|
116
|
+
when :begin_group
|
117
|
+
:end_group
|
118
|
+
when :begin_line
|
119
|
+
:end_line
|
120
|
+
else
|
121
|
+
content_or_kind
|
122
|
+
end
|
123
|
+
end
|
124
|
+
part.concat closing
|
125
|
+
begin
|
126
|
+
parts << part
|
127
|
+
part = Tokens.new
|
128
|
+
size = sizes[i += 1]
|
129
|
+
end until size.nil? || size > 0
|
130
|
+
# ...and open them again.
|
131
|
+
part.concat opened.flatten
|
132
|
+
part_size = 0
|
133
|
+
redo unless content.empty?
|
134
|
+
else
|
135
|
+
part << content << item
|
136
|
+
part_size += content.size
|
183
137
|
end
|
138
|
+
content = nil
|
139
|
+
when Symbol
|
140
|
+
case content
|
141
|
+
when :begin_group, :begin_line
|
142
|
+
opened << [content, item]
|
143
|
+
when :end_group, :end_line
|
144
|
+
opened.pop
|
145
|
+
else
|
146
|
+
raise ArgumentError, 'Unknown token action: %p, kind = %p' % [content, item]
|
147
|
+
end
|
148
|
+
part << content << item
|
149
|
+
content = nil
|
150
|
+
else
|
151
|
+
raise ArgumentError, 'Token input junk: %p, kind = %p' % [content, item]
|
184
152
|
end
|
185
|
-
tokens << [type, kind]
|
186
153
|
end
|
187
|
-
|
188
|
-
|
189
|
-
|
154
|
+
parts << part
|
155
|
+
parts << Tokens.new while parts.size < sizes.size
|
156
|
+
parts
|
190
157
|
end
|
191
158
|
|
192
|
-
def fix!
|
193
|
-
replace fix
|
194
|
-
end
|
195
|
-
|
196
|
-
# TODO: Scanner#split_into_lines
|
197
|
-
#
|
198
|
-
# Makes sure that:
|
199
|
-
# - newlines are single tokens
|
200
|
-
# (which means all other token are single-line)
|
201
|
-
# - there are no open tokens at the end the line
|
202
|
-
#
|
203
|
-
# This makes it simple for encoders that work line-oriented,
|
204
|
-
# like HTML with list-style numeration.
|
205
|
-
def split_into_lines
|
206
|
-
raise NotImplementedError
|
207
|
-
end
|
208
|
-
|
209
|
-
def split_into_lines!
|
210
|
-
replace split_into_lines
|
211
|
-
end
|
212
|
-
|
213
159
|
# Dumps the object into a String that can be saved
|
214
160
|
# in files or databases.
|
215
161
|
#
|
@@ -226,28 +172,16 @@ module CodeRay
|
|
226
172
|
#
|
227
173
|
# See GZip module.
|
228
174
|
def dump gzip_level = 7
|
229
|
-
require 'coderay/helpers/gzip_simple'
|
230
175
|
dump = Marshal.dump self
|
231
|
-
dump =
|
176
|
+
dump = GZip.gzip dump, gzip_level
|
232
177
|
dump.extend Undumping
|
233
178
|
end
|
234
|
-
|
235
|
-
#
|
236
|
-
|
237
|
-
|
238
|
-
def text_size
|
239
|
-
size = 0
|
240
|
-
each_text_token do |t, k|
|
241
|
-
size + t.size
|
242
|
-
end
|
243
|
-
size
|
244
|
-
end
|
245
|
-
|
246
|
-
# Return all text tokens joined into a single string.
|
247
|
-
def text
|
248
|
-
map { |t, k| t if t.is_a? ::String }.join
|
179
|
+
|
180
|
+
# Return the actual number of tokens.
|
181
|
+
def count
|
182
|
+
size / 2
|
249
183
|
end
|
250
|
-
|
184
|
+
|
251
185
|
# Include this module to give an object an #undump
|
252
186
|
# method.
|
253
187
|
#
|
@@ -258,133 +192,24 @@ module CodeRay
|
|
258
192
|
Tokens.load self
|
259
193
|
end
|
260
194
|
end
|
261
|
-
|
195
|
+
|
262
196
|
# Undump the object using Marshal.load, then
|
263
197
|
# unzip it using GZip.gunzip.
|
264
198
|
#
|
265
199
|
# The result is commonly a Tokens object, but
|
266
200
|
# this is not guaranteed.
|
267
201
|
def Tokens.load dump
|
268
|
-
|
269
|
-
dump = dump.gunzip
|
202
|
+
dump = GZip.gunzip dump
|
270
203
|
@dump = Marshal.load dump
|
271
204
|
end
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
# It redirects the method << to a block given at creation.
|
281
|
-
#
|
282
|
-
# This allows scanners and Encoders to use streaming (no
|
283
|
-
# tokens are saved, the input is highlighted the same time it
|
284
|
-
# is scanned) with the same code.
|
285
|
-
#
|
286
|
-
# See CodeRay.encode_stream and CodeRay.scan_stream
|
287
|
-
class TokenStream < Tokens
|
288
|
-
|
289
|
-
# Whether the object is a TokenStream.
|
290
|
-
#
|
291
|
-
# Returns true.
|
292
|
-
def stream?
|
293
|
-
true
|
294
|
-
end
|
295
|
-
|
296
|
-
# The Array is empty, but size counts the tokens given by <<.
|
297
|
-
attr_reader :size
|
298
|
-
|
299
|
-
# Creates a new TokenStream that calls +block+ whenever
|
300
|
-
# its << method is called.
|
301
|
-
#
|
302
|
-
# Example:
|
303
|
-
#
|
304
|
-
# require 'coderay'
|
305
|
-
#
|
306
|
-
# token_stream = CodeRay::TokenStream.new do |text, kind|
|
307
|
-
# puts 'kind: %s, text size: %d.' % [kind, text.size]
|
308
|
-
# end
|
309
|
-
#
|
310
|
-
# token_stream << ['/\d+/', :regexp]
|
311
|
-
# #-> kind: rexpexp, text size: 5.
|
312
|
-
#
|
313
|
-
def initialize &block
|
314
|
-
raise ArgumentError, 'Block expected for streaming.' unless block
|
315
|
-
@callback = block
|
316
|
-
@size = 0
|
317
|
-
end
|
318
|
-
|
319
|
-
# Calls +block+ with +token+ and increments size.
|
320
|
-
#
|
321
|
-
# Returns self.
|
322
|
-
def << token
|
323
|
-
@callback.call(*token)
|
324
|
-
@size += 1
|
325
|
-
self
|
326
|
-
end
|
327
|
-
|
328
|
-
# This method is not implemented due to speed reasons. Use Tokens.
|
329
|
-
def text_size
|
330
|
-
raise NotImplementedError,
|
331
|
-
'This method is not implemented due to speed reasons.'
|
332
|
-
end
|
333
|
-
|
334
|
-
# A TokenStream cannot be dumped. Use Tokens.
|
335
|
-
def dump
|
336
|
-
raise NotImplementedError, 'A TokenStream cannot be dumped.'
|
337
|
-
end
|
338
|
-
|
339
|
-
# A TokenStream cannot be optimized. Use Tokens.
|
340
|
-
def optimize
|
341
|
-
raise NotImplementedError, 'A TokenStream cannot be optimized.'
|
342
|
-
end
|
343
|
-
|
344
|
-
end
|
345
|
-
|
346
|
-
end
|
347
|
-
|
348
|
-
if $0 == __FILE__
|
349
|
-
$VERBOSE = true
|
350
|
-
$: << File.join(File.dirname(__FILE__), '..')
|
351
|
-
eval DATA.read, nil, $0, __LINE__ + 4
|
352
|
-
end
|
353
|
-
|
354
|
-
__END__
|
355
|
-
require 'test/unit'
|
356
|
-
|
357
|
-
class TokensTest < Test::Unit::TestCase
|
358
|
-
|
359
|
-
def test_creation
|
360
|
-
assert CodeRay::Tokens < Array
|
361
|
-
tokens = nil
|
362
|
-
assert_nothing_raised do
|
363
|
-
tokens = CodeRay::Tokens.new
|
364
|
-
end
|
365
|
-
assert_kind_of Array, tokens
|
366
|
-
end
|
367
|
-
|
368
|
-
def test_adding_tokens
|
369
|
-
tokens = CodeRay::Tokens.new
|
370
|
-
assert_nothing_raised do
|
371
|
-
tokens << ['string', :type]
|
372
|
-
tokens << ['()', :operator]
|
373
|
-
end
|
374
|
-
assert_equal tokens.size, 2
|
375
|
-
end
|
376
|
-
|
377
|
-
def test_dump_undump
|
378
|
-
tokens = CodeRay::Tokens.new
|
379
|
-
assert_nothing_raised do
|
380
|
-
tokens << ['string', :type]
|
381
|
-
tokens << ['()', :operator]
|
382
|
-
end
|
383
|
-
tokens2 = nil
|
384
|
-
assert_nothing_raised do
|
385
|
-
tokens2 = tokens.dump.undump
|
386
|
-
end
|
387
|
-
assert_equal tokens, tokens2
|
205
|
+
|
206
|
+
alias text_token push
|
207
|
+
def begin_group kind; push :begin_group, kind end
|
208
|
+
def end_group kind; push :end_group, kind end
|
209
|
+
def begin_line kind; push :begin_line, kind end
|
210
|
+
def end_line kind; push :end_line, kind end
|
211
|
+
alias tokens concat
|
212
|
+
|
388
213
|
end
|
389
214
|
|
390
|
-
end
|
215
|
+
end
|