coderay-beta 0.9.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. data/FOLDERS +53 -0
  2. data/LICENSE +504 -0
  3. data/bin/coderay +82 -0
  4. data/bin/coderay_stylesheet +4 -0
  5. data/lib/README +129 -0
  6. data/lib/coderay.rb +320 -0
  7. data/lib/coderay/duo.rb +85 -0
  8. data/lib/coderay/encoder.rb +213 -0
  9. data/lib/coderay/encoders/_map.rb +11 -0
  10. data/lib/coderay/encoders/comment_filter.rb +43 -0
  11. data/lib/coderay/encoders/count.rb +21 -0
  12. data/lib/coderay/encoders/debug.rb +49 -0
  13. data/lib/coderay/encoders/div.rb +19 -0
  14. data/lib/coderay/encoders/filter.rb +75 -0
  15. data/lib/coderay/encoders/html.rb +305 -0
  16. data/lib/coderay/encoders/html/css.rb +70 -0
  17. data/lib/coderay/encoders/html/numerization.rb +133 -0
  18. data/lib/coderay/encoders/html/output.rb +206 -0
  19. data/lib/coderay/encoders/json.rb +69 -0
  20. data/lib/coderay/encoders/lines_of_code.rb +90 -0
  21. data/lib/coderay/encoders/null.rb +26 -0
  22. data/lib/coderay/encoders/page.rb +20 -0
  23. data/lib/coderay/encoders/span.rb +19 -0
  24. data/lib/coderay/encoders/statistic.rb +77 -0
  25. data/lib/coderay/encoders/term.rb +137 -0
  26. data/lib/coderay/encoders/text.rb +32 -0
  27. data/lib/coderay/encoders/token_class_filter.rb +84 -0
  28. data/lib/coderay/encoders/xml.rb +71 -0
  29. data/lib/coderay/encoders/yaml.rb +22 -0
  30. data/lib/coderay/for_redcloth.rb +85 -0
  31. data/lib/coderay/helpers/file_type.rb +240 -0
  32. data/lib/coderay/helpers/gzip_simple.rb +123 -0
  33. data/lib/coderay/helpers/plugin.rb +349 -0
  34. data/lib/coderay/helpers/word_list.rb +138 -0
  35. data/lib/coderay/scanner.rb +284 -0
  36. data/lib/coderay/scanners/_map.rb +23 -0
  37. data/lib/coderay/scanners/c.rb +203 -0
  38. data/lib/coderay/scanners/cpp.rb +228 -0
  39. data/lib/coderay/scanners/css.rb +210 -0
  40. data/lib/coderay/scanners/debug.rb +62 -0
  41. data/lib/coderay/scanners/delphi.rb +150 -0
  42. data/lib/coderay/scanners/diff.rb +105 -0
  43. data/lib/coderay/scanners/groovy.rb +263 -0
  44. data/lib/coderay/scanners/html.rb +182 -0
  45. data/lib/coderay/scanners/java.rb +176 -0
  46. data/lib/coderay/scanners/java/builtin_types.rb +419 -0
  47. data/lib/coderay/scanners/java_script.rb +224 -0
  48. data/lib/coderay/scanners/json.rb +112 -0
  49. data/lib/coderay/scanners/nitro_xhtml.rb +136 -0
  50. data/lib/coderay/scanners/php.rb +526 -0
  51. data/lib/coderay/scanners/plaintext.rb +21 -0
  52. data/lib/coderay/scanners/python.rb +285 -0
  53. data/lib/coderay/scanners/rhtml.rb +74 -0
  54. data/lib/coderay/scanners/ruby.rb +404 -0
  55. data/lib/coderay/scanners/ruby/patterns.rb +238 -0
  56. data/lib/coderay/scanners/scheme.rb +145 -0
  57. data/lib/coderay/scanners/sql.rb +162 -0
  58. data/lib/coderay/scanners/xml.rb +17 -0
  59. data/lib/coderay/scanners/yaml.rb +144 -0
  60. data/lib/coderay/style.rb +20 -0
  61. data/lib/coderay/styles/_map.rb +7 -0
  62. data/lib/coderay/styles/cycnus.rb +151 -0
  63. data/lib/coderay/styles/murphy.rb +132 -0
  64. data/lib/coderay/token_classes.rb +86 -0
  65. data/lib/coderay/tokens.rb +391 -0
  66. data/lib/term/ansicolor.rb +220 -0
  67. metadata +123 -0
@@ -0,0 +1,86 @@
1
+ module CodeRay
2
+ class Tokens
3
+ ClassOfKind = Hash.new do |h, k|
4
+ h[k] = k.to_s
5
+ end
6
+ ClassOfKind.update with = {
7
+ :annotation => 'at',
8
+ :attribute_name => 'an',
9
+ :attribute_name_fat => 'af',
10
+ :attribute_value => 'av',
11
+ :attribute_value_fat => 'aw',
12
+ :bin => 'bi',
13
+ :char => 'ch',
14
+ :class => 'cl',
15
+ :class_variable => 'cv',
16
+ :color => 'cr',
17
+ :comment => 'c',
18
+ :complex => 'cm',
19
+ :constant => 'co',
20
+ :content => 'k',
21
+ :decorator => 'de',
22
+ :definition => 'df',
23
+ :delimiter => 'dl',
24
+ :directive => 'di',
25
+ :doc => 'do',
26
+ :doctype => 'dt',
27
+ :doc_string => 'ds',
28
+ :entity => 'en',
29
+ :error => 'er',
30
+ :escape => 'e',
31
+ :exception => 'ex',
32
+ :float => 'fl',
33
+ :function => 'fu',
34
+ :global_variable => 'gv',
35
+ :hex => 'hx',
36
+ :imaginary => 'cm',
37
+ :important => 'im',
38
+ :include => 'ic',
39
+ :inline => 'il',
40
+ :inline_delimiter => 'idl',
41
+ :instance_variable => 'iv',
42
+ :integer => 'i',
43
+ :interpreted => 'in',
44
+ :keyword => 'kw',
45
+ :key => 'ke',
46
+ :label => 'la',
47
+ :local_variable => 'lv',
48
+ :modifier => 'mod',
49
+ :oct => 'oc',
50
+ :operator_fat => 'of',
51
+ :pre_constant => 'pc',
52
+ :pre_type => 'pt',
53
+ :predefined => 'pd',
54
+ :preprocessor => 'pp',
55
+ :pseudo_class => 'ps',
56
+ :regexp => 'rx',
57
+ :reserved => 'r',
58
+ :shell => 'sh',
59
+ :string => 's',
60
+ :symbol => 'sy',
61
+ :tag => 'ta',
62
+ :tag_fat => 'tf',
63
+ :tag_special => 'ts',
64
+ :type => 'ty',
65
+ :variable => 'v',
66
+ :value => 'vl',
67
+ :xml_text => 'xt',
68
+
69
+ :insert => 'ins',
70
+ :delete => 'del',
71
+ :change => 'chg',
72
+ :head => 'head',
73
+
74
+ :ident => :NO_HIGHLIGHT, # 'id'
75
+ #:operator => 'op',
76
+ :operator => :NO_HIGHLIGHT, # 'op'
77
+ :space => :NO_HIGHLIGHT, # 'sp'
78
+ :plain => :NO_HIGHLIGHT,
79
+ }
80
+ ClassOfKind[:method] = ClassOfKind[:function]
81
+ ClassOfKind[:open] = ClassOfKind[:close] = ClassOfKind[:delimiter]
82
+ ClassOfKind[:nesting_delimiter] = ClassOfKind[:delimiter]
83
+ ClassOfKind[:escape] = ClassOfKind[:delimiter]
84
+ #ClassOfKind.default = ClassOfKind[:error] or raise 'no class found for :error!'
85
+ end
86
+ end
@@ -0,0 +1,391 @@
1
+ module CodeRay
2
+
3
+ # = Tokens
4
+ #
5
+ # The Tokens class represents a list of tokens returnd from
6
+ # a Scanner.
7
+ #
8
+ # A token is not a special object, just a two-element Array
9
+ # consisting of
10
+ # * the _token_ _kind_ (a Symbol representing the type of the token)
11
+ # * the _token_ _text_ (the original source of the token in a String)
12
+ #
13
+ # A token looks like this:
14
+ #
15
+ # [:comment, '# It looks like this']
16
+ # [:float, '3.1415926']
17
+ # [:error, '$^']
18
+ #
19
+ # Some scanners also yield some kind of sub-tokens, represented by special
20
+ # token texts, namely :open and :close .
21
+ #
22
+ # The Ruby scanner, for example, splits "a string" into:
23
+ #
24
+ # [
25
+ # [:open, :string],
26
+ # [:delimiter, '"'],
27
+ # [:content, 'a string'],
28
+ # [:delimiter, '"'],
29
+ # [:close, :string]
30
+ # ]
31
+ #
32
+ # Tokens is also the interface between Scanners and Encoders:
33
+ # The input is split and saved into a Tokens object. The Encoder
34
+ # then builds the output from this object.
35
+ #
36
+ # Thus, the syntax below becomes clear:
37
+ #
38
+ # CodeRay.scan('price = 2.59', :ruby).html
39
+ # # the Tokens object is here -------^
40
+ #
41
+ # See how small it is? ;)
42
+ #
43
+ # Tokens gives you the power to handle pre-scanned code very easily:
44
+ # You can convert it to a webpage, a YAML file, or dump it into a gzip'ed string
45
+ # that you put in your DB.
46
+ #
47
+ # Tokens' subclass TokenStream allows streaming to save memory.
48
+ class Tokens < Array
49
+
50
+ # The Scanner instance that created the tokens.
51
+ attr_accessor :scanner
52
+
53
+ # Whether the object is a TokenStream.
54
+ #
55
+ # Returns false.
56
+ def stream?
57
+ false
58
+ end
59
+
60
+ # Iterates over all tokens.
61
+ #
62
+ # If a filter is given, only tokens of that kind are yielded.
63
+ def each kind_filter = nil, &block
64
+ unless kind_filter
65
+ super(&block)
66
+ else
67
+ super() do |text, kind|
68
+ next unless kind == kind_filter
69
+ yield text, kind
70
+ end
71
+ end
72
+ end
73
+
74
+ # Iterates over all text tokens.
75
+ # Range tokens like [:open, :string] are left out.
76
+ #
77
+ # Example:
78
+ # tokens.each_text_token { |text, kind| text.replace html_escape(text) }
79
+ def each_text_token
80
+ each do |text, kind|
81
+ next unless text.is_a? ::String
82
+ yield text, kind
83
+ end
84
+ end
85
+
86
+ # Encode the tokens using encoder.
87
+ #
88
+ # encoder can be
89
+ # * a symbol like :html oder :statistic
90
+ # * an Encoder class
91
+ # * an Encoder object
92
+ #
93
+ # options are passed to the encoder.
94
+ def encode encoder, options = {}
95
+ unless encoder.is_a? Encoders::Encoder
96
+ unless encoder.is_a? Class
97
+ encoder_class = Encoders[encoder]
98
+ end
99
+ encoder = encoder_class.new options
100
+ end
101
+ encoder.encode_tokens self, options
102
+ end
103
+
104
+
105
+ # Turn into a string using Encoders::Text.
106
+ #
107
+ # +options+ are passed to the encoder if given.
108
+ def to_s options = {}
109
+ encode :text, options
110
+ end
111
+
112
+ # Redirects unknown methods to encoder calls.
113
+ #
114
+ # For example, if you call +tokens.html+, the HTML encoder
115
+ # is used to highlight the tokens.
116
+ def method_missing meth, options = {}
117
+ Encoders[meth].new(options).encode_tokens self
118
+ end
119
+
120
+ # Returns the tokens compressed by joining consecutive
121
+ # tokens of the same kind.
122
+ #
123
+ # This can not be undone, but should yield the same output
124
+ # in most Encoders. It basically makes the output smaller.
125
+ #
126
+ # Combined with dump, it saves space for the cost of time.
127
+ #
128
+ # If the scanner is written carefully, this is not required -
129
+ # for example, consecutive //-comment lines could already be
130
+ # joined in one comment token by the Scanner.
131
+ def optimize
132
+ print ' Tokens#optimize: before: %d - ' % size if $DEBUG
133
+ last_kind = last_text = nil
134
+ new = self.class.new
135
+ for text, kind in self
136
+ if text.is_a? String
137
+ if kind == last_kind
138
+ last_text << text
139
+ else
140
+ new << [last_text, last_kind] if last_kind
141
+ last_text = text
142
+ last_kind = kind
143
+ end
144
+ else
145
+ new << [last_text, last_kind] if last_kind
146
+ last_kind = last_text = nil
147
+ new << [text, kind]
148
+ end
149
+ end
150
+ new << [last_text, last_kind] if last_kind
151
+ print 'after: %d (%d saved = %2.0f%%)' %
152
+ [new.size, size - new.size, 1.0 - (new.size.to_f / size)] if $DEBUG
153
+ new
154
+ end
155
+
156
+ # Compact the object itself; see optimize.
157
+ def optimize!
158
+ replace optimize
159
+ end
160
+
161
+ # Ensure that all :open tokens have a correspondent :close one.
162
+ #
163
+ # TODO: Test this!
164
+ def fix
165
+ tokens = self.class.new
166
+ # Check token nesting using a stack of kinds.
167
+ opened = []
168
+ for type, kind in self
169
+ case type
170
+ when :open
171
+ opened.push [:close, kind]
172
+ when :begin_line
173
+ opened.push [:end_line, kind]
174
+ when :close, :end_line
175
+ expected = opened.pop
176
+ if [type, kind] != expected
177
+ # Unexpected :close; decide what to do based on the kind:
178
+ # - token was never opened: delete the :close (just skip it)
179
+ next unless opened.rindex expected
180
+ # - token was opened earlier: also close tokens in between
181
+ tokens << token until (token = opened.pop) == expected
182
+ end
183
+ end
184
+ tokens << [type, kind]
185
+ end
186
+ # Close remaining opened tokens
187
+ tokens << token while token = opened.pop
188
+ tokens
189
+ end
190
+
191
+ def fix!
192
+ replace fix
193
+ end
194
+
195
+ # TODO: Scanner#split_into_lines
196
+ #
197
+ # Makes sure that:
198
+ # - newlines are single tokens
199
+ # (which means all other token are single-line)
200
+ # - there are no open tokens at the end the line
201
+ #
202
+ # This makes it simple for encoders that work line-oriented,
203
+ # like HTML with list-style numeration.
204
+ def split_into_lines
205
+ raise NotImplementedError
206
+ end
207
+
208
+ def split_into_lines!
209
+ replace split_into_lines
210
+ end
211
+
212
+ # Dumps the object into a String that can be saved
213
+ # in files or databases.
214
+ #
215
+ # The dump is created with Marshal.dump;
216
+ # In addition, it is gzipped using GZip.gzip.
217
+ #
218
+ # The returned String object includes Undumping
219
+ # so it has an #undump method. See Tokens.load.
220
+ #
221
+ # You can configure the level of compression,
222
+ # but the default value 7 should be what you want
223
+ # in most cases as it is a good compromise between
224
+ # speed and compression rate.
225
+ #
226
+ # See GZip module.
227
+ def dump gzip_level = 7
228
+ require 'coderay/helpers/gzip_simple'
229
+ dump = Marshal.dump self
230
+ dump = dump.gzip gzip_level
231
+ dump.extend Undumping
232
+ end
233
+
234
+ # The total size of the tokens.
235
+ # Should be equal to the input size before
236
+ # scanning.
237
+ def text_size
238
+ size = 0
239
+ each_text_token do |t, k|
240
+ size + t.size
241
+ end
242
+ size
243
+ end
244
+
245
+ # The total size of the tokens.
246
+ # Should be equal to the input size before
247
+ # scanning.
248
+ def text
249
+ map { |t, k| t if t.is_a? ::String }.join
250
+ end
251
+
252
+ # Include this module to give an object an #undump
253
+ # method.
254
+ #
255
+ # The string returned by Tokens.dump includes Undumping.
256
+ module Undumping
257
+ # Calls Tokens.load with itself.
258
+ def undump
259
+ Tokens.load self
260
+ end
261
+ end
262
+
263
+ # Undump the object using Marshal.load, then
264
+ # unzip it using GZip.gunzip.
265
+ #
266
+ # The result is commonly a Tokens object, but
267
+ # this is not guaranteed.
268
+ def Tokens.load dump
269
+ require 'coderay/helpers/gzip_simple'
270
+ dump = dump.gunzip
271
+ @dump = Marshal.load dump
272
+ end
273
+
274
+ end
275
+
276
+
277
+ # = TokenStream
278
+ #
279
+ # The TokenStream class is a fake Array without elements.
280
+ #
281
+ # It redirects the method << to a block given at creation.
282
+ #
283
+ # This allows scanners and Encoders to use streaming (no
284
+ # tokens are saved, the input is highlighted the same time it
285
+ # is scanned) with the same code.
286
+ #
287
+ # See CodeRay.encode_stream and CodeRay.scan_stream
288
+ class TokenStream < Tokens
289
+
290
+ # Whether the object is a TokenStream.
291
+ #
292
+ # Returns true.
293
+ def stream?
294
+ true
295
+ end
296
+
297
+ # The Array is empty, but size counts the tokens given by <<.
298
+ attr_reader :size
299
+
300
+ # Creates a new TokenStream that calls +block+ whenever
301
+ # its << method is called.
302
+ #
303
+ # Example:
304
+ #
305
+ # require 'coderay'
306
+ #
307
+ # token_stream = CodeRay::TokenStream.new do |kind, text|
308
+ # puts 'kind: %s, text size: %d.' % [kind, text.size]
309
+ # end
310
+ #
311
+ # token_stream << [:regexp, '/\d+/']
312
+ # #-> kind: rexpexp, text size: 5.
313
+ #
314
+ def initialize &block
315
+ raise ArgumentError, 'Block expected for streaming.' unless block
316
+ @callback = block
317
+ @size = 0
318
+ end
319
+
320
+ # Calls +block+ with +token+ and increments size.
321
+ #
322
+ # Returns self.
323
+ def << token
324
+ @callback.call(*token)
325
+ @size += 1
326
+ self
327
+ end
328
+
329
+ # This method is not implemented due to speed reasons. Use Tokens.
330
+ def text_size
331
+ raise NotImplementedError,
332
+ 'This method is not implemented due to speed reasons.'
333
+ end
334
+
335
+ # A TokenStream cannot be dumped. Use Tokens.
336
+ def dump
337
+ raise NotImplementedError, 'A TokenStream cannot be dumped.'
338
+ end
339
+
340
+ # A TokenStream cannot be optimized. Use Tokens.
341
+ def optimize
342
+ raise NotImplementedError, 'A TokenStream cannot be optimized.'
343
+ end
344
+
345
+ end
346
+
347
+ end
348
+
349
+ if $0 == __FILE__
350
+ $VERBOSE = true
351
+ $: << File.join(File.dirname(__FILE__), '..')
352
+ eval DATA.read, nil, $0, __LINE__ + 4
353
+ end
354
+
355
+ __END__
356
+ require 'test/unit'
357
+
358
+ class TokensTest < Test::Unit::TestCase
359
+
360
+ def test_creation
361
+ assert CodeRay::Tokens < Array
362
+ tokens = nil
363
+ assert_nothing_raised do
364
+ tokens = CodeRay::Tokens.new
365
+ end
366
+ assert_kind_of Array, tokens
367
+ end
368
+
369
+ def test_adding_tokens
370
+ tokens = CodeRay::Tokens.new
371
+ assert_nothing_raised do
372
+ tokens << ['string', :type]
373
+ tokens << ['()', :operator]
374
+ end
375
+ assert_equal tokens.size, 2
376
+ end
377
+
378
+ def test_dump_undump
379
+ tokens = CodeRay::Tokens.new
380
+ assert_nothing_raised do
381
+ tokens << ['string', :type]
382
+ tokens << ['()', :operator]
383
+ end
384
+ tokens2 = nil
385
+ assert_nothing_raised do
386
+ tokens2 = tokens.dump.undump
387
+ end
388
+ assert_equal tokens, tokens2
389
+ end
390
+
391
+ end