coderay 0.9.8 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. data/{lib/README → README_INDEX.rdoc} +10 -21
  2. data/Rakefile +6 -6
  3. data/bin/coderay +193 -64
  4. data/lib/coderay.rb +61 -105
  5. data/lib/coderay/duo.rb +17 -21
  6. data/lib/coderay/encoder.rb +100 -112
  7. data/lib/coderay/encoders/_map.rb +12 -7
  8. data/lib/coderay/encoders/comment_filter.rb +12 -30
  9. data/lib/coderay/encoders/count.rb +29 -11
  10. data/lib/coderay/encoders/debug.rb +32 -20
  11. data/lib/coderay/encoders/div.rb +13 -9
  12. data/lib/coderay/encoders/filter.rb +34 -51
  13. data/lib/coderay/encoders/html.rb +155 -161
  14. data/lib/coderay/encoders/html/css.rb +4 -9
  15. data/lib/coderay/encoders/html/numbering.rb +115 -0
  16. data/lib/coderay/encoders/html/output.rb +22 -70
  17. data/lib/coderay/encoders/json.rb +59 -45
  18. data/lib/coderay/encoders/lines_of_code.rb +12 -57
  19. data/lib/coderay/encoders/null.rb +6 -14
  20. data/lib/coderay/encoders/page.rb +13 -9
  21. data/lib/coderay/encoders/span.rb +13 -9
  22. data/lib/coderay/encoders/statistic.rb +58 -39
  23. data/lib/coderay/encoders/terminal.rb +179 -0
  24. data/lib/coderay/encoders/text.rb +31 -17
  25. data/lib/coderay/encoders/token_kind_filter.rb +111 -0
  26. data/lib/coderay/encoders/xml.rb +19 -18
  27. data/lib/coderay/encoders/yaml.rb +37 -9
  28. data/lib/coderay/for_redcloth.rb +4 -4
  29. data/lib/coderay/helpers/file_type.rb +127 -246
  30. data/lib/coderay/helpers/gzip.rb +41 -0
  31. data/lib/coderay/helpers/plugin.rb +241 -306
  32. data/lib/coderay/helpers/word_list.rb +65 -126
  33. data/lib/coderay/scanner.rb +173 -156
  34. data/lib/coderay/scanners/_map.rb +18 -17
  35. data/lib/coderay/scanners/c.rb +63 -77
  36. data/lib/coderay/scanners/clojure.rb +217 -0
  37. data/lib/coderay/scanners/cpp.rb +71 -84
  38. data/lib/coderay/scanners/css.rb +103 -120
  39. data/lib/coderay/scanners/debug.rb +47 -44
  40. data/lib/coderay/scanners/delphi.rb +70 -76
  41. data/lib/coderay/scanners/diff.rb +141 -50
  42. data/lib/coderay/scanners/erb.rb +81 -0
  43. data/lib/coderay/scanners/groovy.rb +104 -113
  44. data/lib/coderay/scanners/haml.rb +168 -0
  45. data/lib/coderay/scanners/html.rb +181 -110
  46. data/lib/coderay/scanners/java.rb +73 -75
  47. data/lib/coderay/scanners/java/builtin_types.rb +2 -0
  48. data/lib/coderay/scanners/java_script.rb +90 -101
  49. data/lib/coderay/scanners/json.rb +40 -53
  50. data/lib/coderay/scanners/php.rb +123 -147
  51. data/lib/coderay/scanners/python.rb +93 -91
  52. data/lib/coderay/scanners/raydebug.rb +66 -0
  53. data/lib/coderay/scanners/ruby.rb +343 -326
  54. data/lib/coderay/scanners/ruby/patterns.rb +40 -106
  55. data/lib/coderay/scanners/ruby/string_state.rb +71 -0
  56. data/lib/coderay/scanners/sql.rb +80 -66
  57. data/lib/coderay/scanners/text.rb +26 -0
  58. data/lib/coderay/scanners/xml.rb +1 -1
  59. data/lib/coderay/scanners/yaml.rb +74 -73
  60. data/lib/coderay/style.rb +10 -7
  61. data/lib/coderay/styles/_map.rb +3 -3
  62. data/lib/coderay/styles/alpha.rb +143 -0
  63. data/lib/coderay/token_kinds.rb +90 -0
  64. data/lib/coderay/tokens.rb +102 -277
  65. data/lib/coderay/tokens_proxy.rb +55 -0
  66. data/lib/coderay/version.rb +3 -0
  67. data/test/functional/basic.rb +200 -18
  68. data/test/functional/examples.rb +130 -0
  69. data/test/functional/for_redcloth.rb +15 -8
  70. data/test/functional/suite.rb +9 -6
  71. metadata +103 -123
  72. data/FOLDERS +0 -53
  73. data/bin/coderay_stylesheet +0 -4
  74. data/lib/coderay/encoders/html/numerization.rb +0 -133
  75. data/lib/coderay/encoders/term.rb +0 -158
  76. data/lib/coderay/encoders/token_class_filter.rb +0 -84
  77. data/lib/coderay/helpers/gzip_simple.rb +0 -123
  78. data/lib/coderay/scanners/nitro_xhtml.rb +0 -136
  79. data/lib/coderay/scanners/plaintext.rb +0 -20
  80. data/lib/coderay/scanners/rhtml.rb +0 -78
  81. data/lib/coderay/scanners/scheme.rb +0 -145
  82. data/lib/coderay/styles/cycnus.rb +0 -152
  83. data/lib/coderay/styles/murphy.rb +0 -134
  84. data/lib/coderay/token_classes.rb +0 -86
  85. data/test/functional/load_plugin_scanner.rb +0 -11
  86. data/test/functional/vhdl.rb +0 -126
  87. data/test/functional/word_list.rb +0 -79
@@ -1,6 +1,9 @@
1
1
  module CodeRay
2
-
3
- # = Tokens
2
+
3
+ # GZip library for writing and reading token dumps.
4
+ autoload :GZip, 'coderay/helpers/gzip'
5
+
6
+ # = Tokens TODO: Rewrite!
4
7
  #
5
8
  # The Tokens class represents a list of tokens returnd from
6
9
  # a Scanner.
@@ -8,7 +11,7 @@ module CodeRay
8
11
  # A token is not a special object, just a two-element Array
9
12
  # consisting of
10
13
  # * the _token_ _text_ (the original source of the token in a String) or
11
- # a _token_ _action_ (:open, :close, :begin_line, :end_line)
14
+ # a _token_ _action_ (begin_group, end_group, begin_line, end_line)
12
15
  # * the _token_ _kind_ (a Symbol representing the type of the token)
13
16
  #
14
17
  # A token looks like this:
@@ -18,16 +21,16 @@ module CodeRay
18
21
  # ['$^', :error]
19
22
  #
20
23
  # Some scanners also yield sub-tokens, represented by special
21
- # token actions, namely :open and :close.
24
+ # token actions, namely begin_group and end_group.
22
25
  #
23
26
  # The Ruby scanner, for example, splits "a string" into:
24
27
  #
25
28
  # [
26
- # [:open, :string],
29
+ # [:begin_group, :string],
27
30
  # ['"', :delimiter],
28
31
  # ['a string', :content],
29
32
  # ['"', :delimiter],
30
- # [:close, :string]
33
+ # [:end_group, :string]
31
34
  # ]
32
35
  #
33
36
  # Tokens is the interface between Scanners and Encoders:
@@ -47,46 +50,11 @@ module CodeRay
47
50
  #
48
51
  # It also allows you to generate tokens directly (without using a scanner),
49
52
  # to load them from a file, and still use any Encoder that CodeRay provides.
50
- #
51
- # Tokens' subclass TokenStream allows streaming to save memory.
52
53
  class Tokens < Array
53
54
 
54
55
  # The Scanner instance that created the tokens.
55
56
  attr_accessor :scanner
56
57
 
57
- # Whether the object is a TokenStream.
58
- #
59
- # Returns false.
60
- def stream?
61
- false
62
- end
63
-
64
- # Iterates over all tokens.
65
- #
66
- # If a filter is given, only tokens of that kind are yielded.
67
- def each kind_filter = nil, &block
68
- unless kind_filter
69
- super(&block)
70
- else
71
- super() do |text, kind|
72
- next unless kind == kind_filter
73
- yield text, kind
74
- end
75
- end
76
- end
77
-
78
- # Iterates over all text tokens.
79
- # Range tokens like [:open, :string] are left out.
80
- #
81
- # Example:
82
- # tokens.each_text_token { |text, kind| text.replace html_escape(text) }
83
- def each_text_token
84
- each do |text, kind|
85
- next unless text.is_a? ::String
86
- yield text, kind
87
- end
88
- end
89
-
90
58
  # Encode the tokens using encoder.
91
59
  #
92
60
  # encoder can be
@@ -96,120 +64,98 @@ module CodeRay
96
64
  #
97
65
  # options are passed to the encoder.
98
66
  def encode encoder, options = {}
99
- unless encoder.is_a? Encoders::Encoder
100
- unless encoder.is_a? Class
101
- encoder_class = Encoders[encoder]
102
- end
103
- encoder = encoder_class.new options
104
- end
67
+ encoder = Encoders[encoder].new options if encoder.respond_to? :to_sym
105
68
  encoder.encode_tokens self, options
106
69
  end
107
-
108
-
109
- # Turn into a string using Encoders::Text.
110
- #
111
- # +options+ are passed to the encoder if given.
112
- def to_s options = {}
113
- encode :text, options
70
+
71
+ # Turn tokens into a string by concatenating them.
72
+ def to_s
73
+ encode CodeRay::Encoders::Encoder.new
114
74
  end
115
-
75
+
116
76
  # Redirects unknown methods to encoder calls.
117
77
  #
118
78
  # For example, if you call +tokens.html+, the HTML encoder
119
79
  # is used to highlight the tokens.
120
80
  def method_missing meth, options = {}
121
- Encoders[meth].new(options).encode_tokens self
122
- end
123
-
124
- # Returns the tokens compressed by joining consecutive
125
- # tokens of the same kind.
126
- #
127
- # This can not be undone, but should yield the same output
128
- # in most Encoders. It basically makes the output smaller.
129
- #
130
- # Combined with dump, it saves space for the cost of time.
131
- #
132
- # If the scanner is written carefully, this is not required -
133
- # for example, consecutive //-comment lines could already be
134
- # joined in one comment token by the Scanner.
135
- def optimize
136
- last_kind = last_text = nil
137
- new = self.class.new
138
- for text, kind in self
139
- if text.is_a? String
140
- if kind == last_kind
141
- last_text << text
142
- else
143
- new << [last_text, last_kind] if last_kind
144
- last_text = text
145
- last_kind = kind
146
- end
147
- else
148
- new << [last_text, last_kind] if last_kind
149
- last_kind = last_text = nil
150
- new << [text, kind]
151
- end
152
- end
153
- new << [last_text, last_kind] if last_kind
154
- new
155
- end
156
-
157
- # Compact the object itself; see optimize.
158
- def optimize!
159
- replace optimize
81
+ encode meth, options
82
+ rescue PluginHost::PluginNotFound
83
+ super
160
84
  end
161
85
 
162
- # Ensure that all :open tokens have a correspondent :close one.
163
- #
164
- # TODO: Test this!
165
- def fix
166
- tokens = self.class.new
167
- # Check token nesting using a stack of kinds.
86
+ # Split the tokens into parts of the given +sizes+.
87
+ #
88
+ # The result will be an Array of Tokens objects. The parts have
89
+ # the text size specified by the parameter. In addition, each
90
+ # part closes all opened tokens. This is useful to insert tokens
91
+ # betweem them.
92
+ #
93
+ # This method is used by @Scanner#tokenize@ when called with an Array
94
+ # of source strings. The Diff encoder uses it for inline highlighting.
95
+ def split_into_parts *sizes
96
+ parts = []
168
97
  opened = []
169
- for type, kind in self
170
- case type
171
- when :open
172
- opened.push [:close, kind]
173
- when :begin_line
174
- opened.push [:end_line, kind]
175
- when :close, :end_line
176
- expected = opened.pop
177
- if [type, kind] != expected
178
- # Unexpected :close; decide what to do based on the kind:
179
- # - token was never opened: delete the :close (just skip it)
180
- next unless opened.rindex expected
181
- # - token was opened earlier: also close tokens in between
182
- tokens << token until (token = opened.pop) == expected
98
+ content = nil
99
+ part = Tokens.new
100
+ part_size = 0
101
+ size = sizes.first
102
+ i = 0
103
+ for item in self
104
+ case content
105
+ when nil
106
+ content = item
107
+ when String
108
+ if size && part_size + content.size > size # token must be cut
109
+ if part_size < size # some part of the token goes into this part
110
+ content = content.dup # content may no be safe to change
111
+ part << content.slice!(0, size - part_size) << item
112
+ end
113
+ # close all open groups and lines...
114
+ closing = opened.reverse.flatten.map do |content_or_kind|
115
+ case content_or_kind
116
+ when :begin_group
117
+ :end_group
118
+ when :begin_line
119
+ :end_line
120
+ else
121
+ content_or_kind
122
+ end
123
+ end
124
+ part.concat closing
125
+ begin
126
+ parts << part
127
+ part = Tokens.new
128
+ size = sizes[i += 1]
129
+ end until size.nil? || size > 0
130
+ # ...and open them again.
131
+ part.concat opened.flatten
132
+ part_size = 0
133
+ redo unless content.empty?
134
+ else
135
+ part << content << item
136
+ part_size += content.size
183
137
  end
138
+ content = nil
139
+ when Symbol
140
+ case content
141
+ when :begin_group, :begin_line
142
+ opened << [content, item]
143
+ when :end_group, :end_line
144
+ opened.pop
145
+ else
146
+ raise ArgumentError, 'Unknown token action: %p, kind = %p' % [content, item]
147
+ end
148
+ part << content << item
149
+ content = nil
150
+ else
151
+ raise ArgumentError, 'Token input junk: %p, kind = %p' % [content, item]
184
152
  end
185
- tokens << [type, kind]
186
153
  end
187
- # Close remaining opened tokens
188
- tokens << token while token = opened.pop
189
- tokens
154
+ parts << part
155
+ parts << Tokens.new while parts.size < sizes.size
156
+ parts
190
157
  end
191
158
 
192
- def fix!
193
- replace fix
194
- end
195
-
196
- # TODO: Scanner#split_into_lines
197
- #
198
- # Makes sure that:
199
- # - newlines are single tokens
200
- # (which means all other token are single-line)
201
- # - there are no open tokens at the end the line
202
- #
203
- # This makes it simple for encoders that work line-oriented,
204
- # like HTML with list-style numeration.
205
- def split_into_lines
206
- raise NotImplementedError
207
- end
208
-
209
- def split_into_lines!
210
- replace split_into_lines
211
- end
212
-
213
159
  # Dumps the object into a String that can be saved
214
160
  # in files or databases.
215
161
  #
@@ -226,28 +172,16 @@ module CodeRay
226
172
  #
227
173
  # See GZip module.
228
174
  def dump gzip_level = 7
229
- require 'coderay/helpers/gzip_simple'
230
175
  dump = Marshal.dump self
231
- dump = dump.gzip gzip_level
176
+ dump = GZip.gzip dump, gzip_level
232
177
  dump.extend Undumping
233
178
  end
234
-
235
- # The total size of the tokens.
236
- # Should be equal to the input size before
237
- # scanning.
238
- def text_size
239
- size = 0
240
- each_text_token do |t, k|
241
- size + t.size
242
- end
243
- size
244
- end
245
-
246
- # Return all text tokens joined into a single string.
247
- def text
248
- map { |t, k| t if t.is_a? ::String }.join
179
+
180
+ # Return the actual number of tokens.
181
+ def count
182
+ size / 2
249
183
  end
250
-
184
+
251
185
  # Include this module to give an object an #undump
252
186
  # method.
253
187
  #
@@ -258,133 +192,24 @@ module CodeRay
258
192
  Tokens.load self
259
193
  end
260
194
  end
261
-
195
+
262
196
  # Undump the object using Marshal.load, then
263
197
  # unzip it using GZip.gunzip.
264
198
  #
265
199
  # The result is commonly a Tokens object, but
266
200
  # this is not guaranteed.
267
201
  def Tokens.load dump
268
- require 'coderay/helpers/gzip_simple'
269
- dump = dump.gunzip
202
+ dump = GZip.gunzip dump
270
203
  @dump = Marshal.load dump
271
204
  end
272
-
273
- end
274
-
275
-
276
- # = TokenStream
277
- #
278
- # The TokenStream class is a fake Array without elements.
279
- #
280
- # It redirects the method << to a block given at creation.
281
- #
282
- # This allows scanners and Encoders to use streaming (no
283
- # tokens are saved, the input is highlighted the same time it
284
- # is scanned) with the same code.
285
- #
286
- # See CodeRay.encode_stream and CodeRay.scan_stream
287
- class TokenStream < Tokens
288
-
289
- # Whether the object is a TokenStream.
290
- #
291
- # Returns true.
292
- def stream?
293
- true
294
- end
295
-
296
- # The Array is empty, but size counts the tokens given by <<.
297
- attr_reader :size
298
-
299
- # Creates a new TokenStream that calls +block+ whenever
300
- # its << method is called.
301
- #
302
- # Example:
303
- #
304
- # require 'coderay'
305
- #
306
- # token_stream = CodeRay::TokenStream.new do |text, kind|
307
- # puts 'kind: %s, text size: %d.' % [kind, text.size]
308
- # end
309
- #
310
- # token_stream << ['/\d+/', :regexp]
311
- # #-> kind: rexpexp, text size: 5.
312
- #
313
- def initialize &block
314
- raise ArgumentError, 'Block expected for streaming.' unless block
315
- @callback = block
316
- @size = 0
317
- end
318
-
319
- # Calls +block+ with +token+ and increments size.
320
- #
321
- # Returns self.
322
- def << token
323
- @callback.call(*token)
324
- @size += 1
325
- self
326
- end
327
-
328
- # This method is not implemented due to speed reasons. Use Tokens.
329
- def text_size
330
- raise NotImplementedError,
331
- 'This method is not implemented due to speed reasons.'
332
- end
333
-
334
- # A TokenStream cannot be dumped. Use Tokens.
335
- def dump
336
- raise NotImplementedError, 'A TokenStream cannot be dumped.'
337
- end
338
-
339
- # A TokenStream cannot be optimized. Use Tokens.
340
- def optimize
341
- raise NotImplementedError, 'A TokenStream cannot be optimized.'
342
- end
343
-
344
- end
345
-
346
- end
347
-
348
- if $0 == __FILE__
349
- $VERBOSE = true
350
- $: << File.join(File.dirname(__FILE__), '..')
351
- eval DATA.read, nil, $0, __LINE__ + 4
352
- end
353
-
354
- __END__
355
- require 'test/unit'
356
-
357
- class TokensTest < Test::Unit::TestCase
358
-
359
- def test_creation
360
- assert CodeRay::Tokens < Array
361
- tokens = nil
362
- assert_nothing_raised do
363
- tokens = CodeRay::Tokens.new
364
- end
365
- assert_kind_of Array, tokens
366
- end
367
-
368
- def test_adding_tokens
369
- tokens = CodeRay::Tokens.new
370
- assert_nothing_raised do
371
- tokens << ['string', :type]
372
- tokens << ['()', :operator]
373
- end
374
- assert_equal tokens.size, 2
375
- end
376
-
377
- def test_dump_undump
378
- tokens = CodeRay::Tokens.new
379
- assert_nothing_raised do
380
- tokens << ['string', :type]
381
- tokens << ['()', :operator]
382
- end
383
- tokens2 = nil
384
- assert_nothing_raised do
385
- tokens2 = tokens.dump.undump
386
- end
387
- assert_equal tokens, tokens2
205
+
206
+ alias text_token push
207
+ def begin_group kind; push :begin_group, kind end
208
+ def end_group kind; push :end_group, kind end
209
+ def begin_line kind; push :begin_line, kind end
210
+ def end_line kind; push :end_line, kind end
211
+ alias tokens concat
212
+
388
213
  end
389
214
 
390
- end
215
+ end