coderay 0.9.8 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (87) hide show
  1. data/{lib/README → README_INDEX.rdoc} +10 -21
  2. data/Rakefile +6 -6
  3. data/bin/coderay +193 -64
  4. data/lib/coderay.rb +61 -105
  5. data/lib/coderay/duo.rb +17 -21
  6. data/lib/coderay/encoder.rb +100 -112
  7. data/lib/coderay/encoders/_map.rb +12 -7
  8. data/lib/coderay/encoders/comment_filter.rb +12 -30
  9. data/lib/coderay/encoders/count.rb +29 -11
  10. data/lib/coderay/encoders/debug.rb +32 -20
  11. data/lib/coderay/encoders/div.rb +13 -9
  12. data/lib/coderay/encoders/filter.rb +34 -51
  13. data/lib/coderay/encoders/html.rb +155 -161
  14. data/lib/coderay/encoders/html/css.rb +4 -9
  15. data/lib/coderay/encoders/html/numbering.rb +115 -0
  16. data/lib/coderay/encoders/html/output.rb +22 -70
  17. data/lib/coderay/encoders/json.rb +59 -45
  18. data/lib/coderay/encoders/lines_of_code.rb +12 -57
  19. data/lib/coderay/encoders/null.rb +6 -14
  20. data/lib/coderay/encoders/page.rb +13 -9
  21. data/lib/coderay/encoders/span.rb +13 -9
  22. data/lib/coderay/encoders/statistic.rb +58 -39
  23. data/lib/coderay/encoders/terminal.rb +179 -0
  24. data/lib/coderay/encoders/text.rb +31 -17
  25. data/lib/coderay/encoders/token_kind_filter.rb +111 -0
  26. data/lib/coderay/encoders/xml.rb +19 -18
  27. data/lib/coderay/encoders/yaml.rb +37 -9
  28. data/lib/coderay/for_redcloth.rb +4 -4
  29. data/lib/coderay/helpers/file_type.rb +127 -246
  30. data/lib/coderay/helpers/gzip.rb +41 -0
  31. data/lib/coderay/helpers/plugin.rb +241 -306
  32. data/lib/coderay/helpers/word_list.rb +65 -126
  33. data/lib/coderay/scanner.rb +173 -156
  34. data/lib/coderay/scanners/_map.rb +18 -17
  35. data/lib/coderay/scanners/c.rb +63 -77
  36. data/lib/coderay/scanners/clojure.rb +217 -0
  37. data/lib/coderay/scanners/cpp.rb +71 -84
  38. data/lib/coderay/scanners/css.rb +103 -120
  39. data/lib/coderay/scanners/debug.rb +47 -44
  40. data/lib/coderay/scanners/delphi.rb +70 -76
  41. data/lib/coderay/scanners/diff.rb +141 -50
  42. data/lib/coderay/scanners/erb.rb +81 -0
  43. data/lib/coderay/scanners/groovy.rb +104 -113
  44. data/lib/coderay/scanners/haml.rb +168 -0
  45. data/lib/coderay/scanners/html.rb +181 -110
  46. data/lib/coderay/scanners/java.rb +73 -75
  47. data/lib/coderay/scanners/java/builtin_types.rb +2 -0
  48. data/lib/coderay/scanners/java_script.rb +90 -101
  49. data/lib/coderay/scanners/json.rb +40 -53
  50. data/lib/coderay/scanners/php.rb +123 -147
  51. data/lib/coderay/scanners/python.rb +93 -91
  52. data/lib/coderay/scanners/raydebug.rb +66 -0
  53. data/lib/coderay/scanners/ruby.rb +343 -326
  54. data/lib/coderay/scanners/ruby/patterns.rb +40 -106
  55. data/lib/coderay/scanners/ruby/string_state.rb +71 -0
  56. data/lib/coderay/scanners/sql.rb +80 -66
  57. data/lib/coderay/scanners/text.rb +26 -0
  58. data/lib/coderay/scanners/xml.rb +1 -1
  59. data/lib/coderay/scanners/yaml.rb +74 -73
  60. data/lib/coderay/style.rb +10 -7
  61. data/lib/coderay/styles/_map.rb +3 -3
  62. data/lib/coderay/styles/alpha.rb +143 -0
  63. data/lib/coderay/token_kinds.rb +90 -0
  64. data/lib/coderay/tokens.rb +102 -277
  65. data/lib/coderay/tokens_proxy.rb +55 -0
  66. data/lib/coderay/version.rb +3 -0
  67. data/test/functional/basic.rb +200 -18
  68. data/test/functional/examples.rb +130 -0
  69. data/test/functional/for_redcloth.rb +15 -8
  70. data/test/functional/suite.rb +9 -6
  71. metadata +103 -123
  72. data/FOLDERS +0 -53
  73. data/bin/coderay_stylesheet +0 -4
  74. data/lib/coderay/encoders/html/numerization.rb +0 -133
  75. data/lib/coderay/encoders/term.rb +0 -158
  76. data/lib/coderay/encoders/token_class_filter.rb +0 -84
  77. data/lib/coderay/helpers/gzip_simple.rb +0 -123
  78. data/lib/coderay/scanners/nitro_xhtml.rb +0 -136
  79. data/lib/coderay/scanners/plaintext.rb +0 -20
  80. data/lib/coderay/scanners/rhtml.rb +0 -78
  81. data/lib/coderay/scanners/scheme.rb +0 -145
  82. data/lib/coderay/styles/cycnus.rb +0 -152
  83. data/lib/coderay/styles/murphy.rb +0 -134
  84. data/lib/coderay/token_classes.rb +0 -86
  85. data/test/functional/load_plugin_scanner.rb +0 -11
  86. data/test/functional/vhdl.rb +0 -126
  87. data/test/functional/word_list.rb +0 -79
@@ -1,6 +1,9 @@
1
1
  module CodeRay
2
-
3
- # = Tokens
2
+
3
+ # GZip library for writing and reading token dumps.
4
+ autoload :GZip, 'coderay/helpers/gzip'
5
+
6
+ # = Tokens TODO: Rewrite!
4
7
  #
5
8
  # The Tokens class represents a list of tokens returnd from
6
9
  # a Scanner.
@@ -8,7 +11,7 @@ module CodeRay
8
11
  # A token is not a special object, just a two-element Array
9
12
  # consisting of
10
13
  # * the _token_ _text_ (the original source of the token in a String) or
11
- # a _token_ _action_ (:open, :close, :begin_line, :end_line)
14
+ # a _token_ _action_ (begin_group, end_group, begin_line, end_line)
12
15
  # * the _token_ _kind_ (a Symbol representing the type of the token)
13
16
  #
14
17
  # A token looks like this:
@@ -18,16 +21,16 @@ module CodeRay
18
21
  # ['$^', :error]
19
22
  #
20
23
  # Some scanners also yield sub-tokens, represented by special
21
- # token actions, namely :open and :close.
24
+ # token actions, namely begin_group and end_group.
22
25
  #
23
26
  # The Ruby scanner, for example, splits "a string" into:
24
27
  #
25
28
  # [
26
- # [:open, :string],
29
+ # [:begin_group, :string],
27
30
  # ['"', :delimiter],
28
31
  # ['a string', :content],
29
32
  # ['"', :delimiter],
30
- # [:close, :string]
33
+ # [:end_group, :string]
31
34
  # ]
32
35
  #
33
36
  # Tokens is the interface between Scanners and Encoders:
@@ -47,46 +50,11 @@ module CodeRay
47
50
  #
48
51
  # It also allows you to generate tokens directly (without using a scanner),
49
52
  # to load them from a file, and still use any Encoder that CodeRay provides.
50
- #
51
- # Tokens' subclass TokenStream allows streaming to save memory.
52
53
  class Tokens < Array
53
54
 
54
55
  # The Scanner instance that created the tokens.
55
56
  attr_accessor :scanner
56
57
 
57
- # Whether the object is a TokenStream.
58
- #
59
- # Returns false.
60
- def stream?
61
- false
62
- end
63
-
64
- # Iterates over all tokens.
65
- #
66
- # If a filter is given, only tokens of that kind are yielded.
67
- def each kind_filter = nil, &block
68
- unless kind_filter
69
- super(&block)
70
- else
71
- super() do |text, kind|
72
- next unless kind == kind_filter
73
- yield text, kind
74
- end
75
- end
76
- end
77
-
78
- # Iterates over all text tokens.
79
- # Range tokens like [:open, :string] are left out.
80
- #
81
- # Example:
82
- # tokens.each_text_token { |text, kind| text.replace html_escape(text) }
83
- def each_text_token
84
- each do |text, kind|
85
- next unless text.is_a? ::String
86
- yield text, kind
87
- end
88
- end
89
-
90
58
  # Encode the tokens using encoder.
91
59
  #
92
60
  # encoder can be
@@ -96,120 +64,98 @@ module CodeRay
96
64
  #
97
65
  # options are passed to the encoder.
98
66
  def encode encoder, options = {}
99
- unless encoder.is_a? Encoders::Encoder
100
- unless encoder.is_a? Class
101
- encoder_class = Encoders[encoder]
102
- end
103
- encoder = encoder_class.new options
104
- end
67
+ encoder = Encoders[encoder].new options if encoder.respond_to? :to_sym
105
68
  encoder.encode_tokens self, options
106
69
  end
107
-
108
-
109
- # Turn into a string using Encoders::Text.
110
- #
111
- # +options+ are passed to the encoder if given.
112
- def to_s options = {}
113
- encode :text, options
70
+
71
+ # Turn tokens into a string by concatenating them.
72
+ def to_s
73
+ encode CodeRay::Encoders::Encoder.new
114
74
  end
115
-
75
+
116
76
  # Redirects unknown methods to encoder calls.
117
77
  #
118
78
  # For example, if you call +tokens.html+, the HTML encoder
119
79
  # is used to highlight the tokens.
120
80
  def method_missing meth, options = {}
121
- Encoders[meth].new(options).encode_tokens self
122
- end
123
-
124
- # Returns the tokens compressed by joining consecutive
125
- # tokens of the same kind.
126
- #
127
- # This can not be undone, but should yield the same output
128
- # in most Encoders. It basically makes the output smaller.
129
- #
130
- # Combined with dump, it saves space for the cost of time.
131
- #
132
- # If the scanner is written carefully, this is not required -
133
- # for example, consecutive //-comment lines could already be
134
- # joined in one comment token by the Scanner.
135
- def optimize
136
- last_kind = last_text = nil
137
- new = self.class.new
138
- for text, kind in self
139
- if text.is_a? String
140
- if kind == last_kind
141
- last_text << text
142
- else
143
- new << [last_text, last_kind] if last_kind
144
- last_text = text
145
- last_kind = kind
146
- end
147
- else
148
- new << [last_text, last_kind] if last_kind
149
- last_kind = last_text = nil
150
- new << [text, kind]
151
- end
152
- end
153
- new << [last_text, last_kind] if last_kind
154
- new
155
- end
156
-
157
- # Compact the object itself; see optimize.
158
- def optimize!
159
- replace optimize
81
+ encode meth, options
82
+ rescue PluginHost::PluginNotFound
83
+ super
160
84
  end
161
85
 
162
- # Ensure that all :open tokens have a correspondent :close one.
163
- #
164
- # TODO: Test this!
165
- def fix
166
- tokens = self.class.new
167
- # Check token nesting using a stack of kinds.
86
+ # Split the tokens into parts of the given +sizes+.
87
+ #
88
+ # The result will be an Array of Tokens objects. The parts have
89
+ # the text size specified by the parameter. In addition, each
90
+ # part closes all opened tokens. This is useful to insert tokens
91
+ # betweem them.
92
+ #
93
+ # This method is used by @Scanner#tokenize@ when called with an Array
94
+ # of source strings. The Diff encoder uses it for inline highlighting.
95
+ def split_into_parts *sizes
96
+ parts = []
168
97
  opened = []
169
- for type, kind in self
170
- case type
171
- when :open
172
- opened.push [:close, kind]
173
- when :begin_line
174
- opened.push [:end_line, kind]
175
- when :close, :end_line
176
- expected = opened.pop
177
- if [type, kind] != expected
178
- # Unexpected :close; decide what to do based on the kind:
179
- # - token was never opened: delete the :close (just skip it)
180
- next unless opened.rindex expected
181
- # - token was opened earlier: also close tokens in between
182
- tokens << token until (token = opened.pop) == expected
98
+ content = nil
99
+ part = Tokens.new
100
+ part_size = 0
101
+ size = sizes.first
102
+ i = 0
103
+ for item in self
104
+ case content
105
+ when nil
106
+ content = item
107
+ when String
108
+ if size && part_size + content.size > size # token must be cut
109
+ if part_size < size # some part of the token goes into this part
110
+ content = content.dup # content may no be safe to change
111
+ part << content.slice!(0, size - part_size) << item
112
+ end
113
+ # close all open groups and lines...
114
+ closing = opened.reverse.flatten.map do |content_or_kind|
115
+ case content_or_kind
116
+ when :begin_group
117
+ :end_group
118
+ when :begin_line
119
+ :end_line
120
+ else
121
+ content_or_kind
122
+ end
123
+ end
124
+ part.concat closing
125
+ begin
126
+ parts << part
127
+ part = Tokens.new
128
+ size = sizes[i += 1]
129
+ end until size.nil? || size > 0
130
+ # ...and open them again.
131
+ part.concat opened.flatten
132
+ part_size = 0
133
+ redo unless content.empty?
134
+ else
135
+ part << content << item
136
+ part_size += content.size
183
137
  end
138
+ content = nil
139
+ when Symbol
140
+ case content
141
+ when :begin_group, :begin_line
142
+ opened << [content, item]
143
+ when :end_group, :end_line
144
+ opened.pop
145
+ else
146
+ raise ArgumentError, 'Unknown token action: %p, kind = %p' % [content, item]
147
+ end
148
+ part << content << item
149
+ content = nil
150
+ else
151
+ raise ArgumentError, 'Token input junk: %p, kind = %p' % [content, item]
184
152
  end
185
- tokens << [type, kind]
186
153
  end
187
- # Close remaining opened tokens
188
- tokens << token while token = opened.pop
189
- tokens
154
+ parts << part
155
+ parts << Tokens.new while parts.size < sizes.size
156
+ parts
190
157
  end
191
158
 
192
- def fix!
193
- replace fix
194
- end
195
-
196
- # TODO: Scanner#split_into_lines
197
- #
198
- # Makes sure that:
199
- # - newlines are single tokens
200
- # (which means all other token are single-line)
201
- # - there are no open tokens at the end the line
202
- #
203
- # This makes it simple for encoders that work line-oriented,
204
- # like HTML with list-style numeration.
205
- def split_into_lines
206
- raise NotImplementedError
207
- end
208
-
209
- def split_into_lines!
210
- replace split_into_lines
211
- end
212
-
213
159
  # Dumps the object into a String that can be saved
214
160
  # in files or databases.
215
161
  #
@@ -226,28 +172,16 @@ module CodeRay
226
172
  #
227
173
  # See GZip module.
228
174
  def dump gzip_level = 7
229
- require 'coderay/helpers/gzip_simple'
230
175
  dump = Marshal.dump self
231
- dump = dump.gzip gzip_level
176
+ dump = GZip.gzip dump, gzip_level
232
177
  dump.extend Undumping
233
178
  end
234
-
235
- # The total size of the tokens.
236
- # Should be equal to the input size before
237
- # scanning.
238
- def text_size
239
- size = 0
240
- each_text_token do |t, k|
241
- size + t.size
242
- end
243
- size
244
- end
245
-
246
- # Return all text tokens joined into a single string.
247
- def text
248
- map { |t, k| t if t.is_a? ::String }.join
179
+
180
+ # Return the actual number of tokens.
181
+ def count
182
+ size / 2
249
183
  end
250
-
184
+
251
185
  # Include this module to give an object an #undump
252
186
  # method.
253
187
  #
@@ -258,133 +192,24 @@ module CodeRay
258
192
  Tokens.load self
259
193
  end
260
194
  end
261
-
195
+
262
196
  # Undump the object using Marshal.load, then
263
197
  # unzip it using GZip.gunzip.
264
198
  #
265
199
  # The result is commonly a Tokens object, but
266
200
  # this is not guaranteed.
267
201
  def Tokens.load dump
268
- require 'coderay/helpers/gzip_simple'
269
- dump = dump.gunzip
202
+ dump = GZip.gunzip dump
270
203
  @dump = Marshal.load dump
271
204
  end
272
-
273
- end
274
-
275
-
276
- # = TokenStream
277
- #
278
- # The TokenStream class is a fake Array without elements.
279
- #
280
- # It redirects the method << to a block given at creation.
281
- #
282
- # This allows scanners and Encoders to use streaming (no
283
- # tokens are saved, the input is highlighted the same time it
284
- # is scanned) with the same code.
285
- #
286
- # See CodeRay.encode_stream and CodeRay.scan_stream
287
- class TokenStream < Tokens
288
-
289
- # Whether the object is a TokenStream.
290
- #
291
- # Returns true.
292
- def stream?
293
- true
294
- end
295
-
296
- # The Array is empty, but size counts the tokens given by <<.
297
- attr_reader :size
298
-
299
- # Creates a new TokenStream that calls +block+ whenever
300
- # its << method is called.
301
- #
302
- # Example:
303
- #
304
- # require 'coderay'
305
- #
306
- # token_stream = CodeRay::TokenStream.new do |text, kind|
307
- # puts 'kind: %s, text size: %d.' % [kind, text.size]
308
- # end
309
- #
310
- # token_stream << ['/\d+/', :regexp]
311
- # #-> kind: rexpexp, text size: 5.
312
- #
313
- def initialize &block
314
- raise ArgumentError, 'Block expected for streaming.' unless block
315
- @callback = block
316
- @size = 0
317
- end
318
-
319
- # Calls +block+ with +token+ and increments size.
320
- #
321
- # Returns self.
322
- def << token
323
- @callback.call(*token)
324
- @size += 1
325
- self
326
- end
327
-
328
- # This method is not implemented due to speed reasons. Use Tokens.
329
- def text_size
330
- raise NotImplementedError,
331
- 'This method is not implemented due to speed reasons.'
332
- end
333
-
334
- # A TokenStream cannot be dumped. Use Tokens.
335
- def dump
336
- raise NotImplementedError, 'A TokenStream cannot be dumped.'
337
- end
338
-
339
- # A TokenStream cannot be optimized. Use Tokens.
340
- def optimize
341
- raise NotImplementedError, 'A TokenStream cannot be optimized.'
342
- end
343
-
344
- end
345
-
346
- end
347
-
348
- if $0 == __FILE__
349
- $VERBOSE = true
350
- $: << File.join(File.dirname(__FILE__), '..')
351
- eval DATA.read, nil, $0, __LINE__ + 4
352
- end
353
-
354
- __END__
355
- require 'test/unit'
356
-
357
- class TokensTest < Test::Unit::TestCase
358
-
359
- def test_creation
360
- assert CodeRay::Tokens < Array
361
- tokens = nil
362
- assert_nothing_raised do
363
- tokens = CodeRay::Tokens.new
364
- end
365
- assert_kind_of Array, tokens
366
- end
367
-
368
- def test_adding_tokens
369
- tokens = CodeRay::Tokens.new
370
- assert_nothing_raised do
371
- tokens << ['string', :type]
372
- tokens << ['()', :operator]
373
- end
374
- assert_equal tokens.size, 2
375
- end
376
-
377
- def test_dump_undump
378
- tokens = CodeRay::Tokens.new
379
- assert_nothing_raised do
380
- tokens << ['string', :type]
381
- tokens << ['()', :operator]
382
- end
383
- tokens2 = nil
384
- assert_nothing_raised do
385
- tokens2 = tokens.dump.undump
386
- end
387
- assert_equal tokens, tokens2
205
+
206
+ alias text_token push
207
+ def begin_group kind; push :begin_group, kind end
208
+ def end_group kind; push :end_group, kind end
209
+ def begin_line kind; push :begin_line, kind end
210
+ def end_line kind; push :end_line, kind end
211
+ alias tokens concat
212
+
388
213
  end
389
214
 
390
- end
215
+ end