coderay-beta 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. data/FOLDERS +53 -0
  2. data/LICENSE +504 -0
  3. data/bin/coderay +82 -0
  4. data/bin/coderay_stylesheet +4 -0
  5. data/lib/README +129 -0
  6. data/lib/coderay.rb +320 -0
  7. data/lib/coderay/duo.rb +85 -0
  8. data/lib/coderay/encoder.rb +213 -0
  9. data/lib/coderay/encoders/_map.rb +11 -0
  10. data/lib/coderay/encoders/comment_filter.rb +43 -0
  11. data/lib/coderay/encoders/count.rb +21 -0
  12. data/lib/coderay/encoders/debug.rb +49 -0
  13. data/lib/coderay/encoders/div.rb +19 -0
  14. data/lib/coderay/encoders/filter.rb +75 -0
  15. data/lib/coderay/encoders/html.rb +305 -0
  16. data/lib/coderay/encoders/html/css.rb +70 -0
  17. data/lib/coderay/encoders/html/numerization.rb +133 -0
  18. data/lib/coderay/encoders/html/output.rb +206 -0
  19. data/lib/coderay/encoders/json.rb +69 -0
  20. data/lib/coderay/encoders/lines_of_code.rb +90 -0
  21. data/lib/coderay/encoders/null.rb +26 -0
  22. data/lib/coderay/encoders/page.rb +20 -0
  23. data/lib/coderay/encoders/span.rb +19 -0
  24. data/lib/coderay/encoders/statistic.rb +77 -0
  25. data/lib/coderay/encoders/term.rb +137 -0
  26. data/lib/coderay/encoders/text.rb +32 -0
  27. data/lib/coderay/encoders/token_class_filter.rb +84 -0
  28. data/lib/coderay/encoders/xml.rb +71 -0
  29. data/lib/coderay/encoders/yaml.rb +22 -0
  30. data/lib/coderay/for_redcloth.rb +85 -0
  31. data/lib/coderay/helpers/file_type.rb +240 -0
  32. data/lib/coderay/helpers/gzip_simple.rb +123 -0
  33. data/lib/coderay/helpers/plugin.rb +349 -0
  34. data/lib/coderay/helpers/word_list.rb +138 -0
  35. data/lib/coderay/scanner.rb +284 -0
  36. data/lib/coderay/scanners/_map.rb +23 -0
  37. data/lib/coderay/scanners/c.rb +203 -0
  38. data/lib/coderay/scanners/cpp.rb +228 -0
  39. data/lib/coderay/scanners/css.rb +210 -0
  40. data/lib/coderay/scanners/debug.rb +62 -0
  41. data/lib/coderay/scanners/delphi.rb +150 -0
  42. data/lib/coderay/scanners/diff.rb +105 -0
  43. data/lib/coderay/scanners/groovy.rb +263 -0
  44. data/lib/coderay/scanners/html.rb +182 -0
  45. data/lib/coderay/scanners/java.rb +176 -0
  46. data/lib/coderay/scanners/java/builtin_types.rb +419 -0
  47. data/lib/coderay/scanners/java_script.rb +224 -0
  48. data/lib/coderay/scanners/json.rb +112 -0
  49. data/lib/coderay/scanners/nitro_xhtml.rb +136 -0
  50. data/lib/coderay/scanners/php.rb +526 -0
  51. data/lib/coderay/scanners/plaintext.rb +21 -0
  52. data/lib/coderay/scanners/python.rb +285 -0
  53. data/lib/coderay/scanners/rhtml.rb +74 -0
  54. data/lib/coderay/scanners/ruby.rb +404 -0
  55. data/lib/coderay/scanners/ruby/patterns.rb +238 -0
  56. data/lib/coderay/scanners/scheme.rb +145 -0
  57. data/lib/coderay/scanners/sql.rb +162 -0
  58. data/lib/coderay/scanners/xml.rb +17 -0
  59. data/lib/coderay/scanners/yaml.rb +144 -0
  60. data/lib/coderay/style.rb +20 -0
  61. data/lib/coderay/styles/_map.rb +7 -0
  62. data/lib/coderay/styles/cycnus.rb +151 -0
  63. data/lib/coderay/styles/murphy.rb +132 -0
  64. data/lib/coderay/token_classes.rb +86 -0
  65. data/lib/coderay/tokens.rb +391 -0
  66. data/lib/term/ansicolor.rb +220 -0
  67. metadata +123 -0
@@ -0,0 +1,86 @@
1
+ module CodeRay
2
+ class Tokens
3
+ ClassOfKind = Hash.new do |h, k|
4
+ h[k] = k.to_s
5
+ end
6
+ ClassOfKind.update with = {
7
+ :annotation => 'at',
8
+ :attribute_name => 'an',
9
+ :attribute_name_fat => 'af',
10
+ :attribute_value => 'av',
11
+ :attribute_value_fat => 'aw',
12
+ :bin => 'bi',
13
+ :char => 'ch',
14
+ :class => 'cl',
15
+ :class_variable => 'cv',
16
+ :color => 'cr',
17
+ :comment => 'c',
18
+ :complex => 'cm',
19
+ :constant => 'co',
20
+ :content => 'k',
21
+ :decorator => 'de',
22
+ :definition => 'df',
23
+ :delimiter => 'dl',
24
+ :directive => 'di',
25
+ :doc => 'do',
26
+ :doctype => 'dt',
27
+ :doc_string => 'ds',
28
+ :entity => 'en',
29
+ :error => 'er',
30
+ :escape => 'e',
31
+ :exception => 'ex',
32
+ :float => 'fl',
33
+ :function => 'fu',
34
+ :global_variable => 'gv',
35
+ :hex => 'hx',
36
+ :imaginary => 'cm',
37
+ :important => 'im',
38
+ :include => 'ic',
39
+ :inline => 'il',
40
+ :inline_delimiter => 'idl',
41
+ :instance_variable => 'iv',
42
+ :integer => 'i',
43
+ :interpreted => 'in',
44
+ :keyword => 'kw',
45
+ :key => 'ke',
46
+ :label => 'la',
47
+ :local_variable => 'lv',
48
+ :modifier => 'mod',
49
+ :oct => 'oc',
50
+ :operator_fat => 'of',
51
+ :pre_constant => 'pc',
52
+ :pre_type => 'pt',
53
+ :predefined => 'pd',
54
+ :preprocessor => 'pp',
55
+ :pseudo_class => 'ps',
56
+ :regexp => 'rx',
57
+ :reserved => 'r',
58
+ :shell => 'sh',
59
+ :string => 's',
60
+ :symbol => 'sy',
61
+ :tag => 'ta',
62
+ :tag_fat => 'tf',
63
+ :tag_special => 'ts',
64
+ :type => 'ty',
65
+ :variable => 'v',
66
+ :value => 'vl',
67
+ :xml_text => 'xt',
68
+
69
+ :insert => 'ins',
70
+ :delete => 'del',
71
+ :change => 'chg',
72
+ :head => 'head',
73
+
74
+ :ident => :NO_HIGHLIGHT, # 'id'
75
+ #:operator => 'op',
76
+ :operator => :NO_HIGHLIGHT, # 'op'
77
+ :space => :NO_HIGHLIGHT, # 'sp'
78
+ :plain => :NO_HIGHLIGHT,
79
+ }
80
+ ClassOfKind[:method] = ClassOfKind[:function]
81
+ ClassOfKind[:open] = ClassOfKind[:close] = ClassOfKind[:delimiter]
82
+ ClassOfKind[:nesting_delimiter] = ClassOfKind[:delimiter]
83
+ ClassOfKind[:escape] = ClassOfKind[:delimiter]
84
+ #ClassOfKind.default = ClassOfKind[:error] or raise 'no class found for :error!'
85
+ end
86
+ end
@@ -0,0 +1,391 @@
1
+ module CodeRay
2
+
3
+ # = Tokens
4
+ #
5
+ # The Tokens class represents a list of tokens returnd from
6
+ # a Scanner.
7
+ #
8
+ # A token is not a special object, just a two-element Array
9
+ # consisting of
10
+ # * the _token_ _kind_ (a Symbol representing the type of the token)
11
+ # * the _token_ _text_ (the original source of the token in a String)
12
+ #
13
+ # A token looks like this:
14
+ #
15
+ # [:comment, '# It looks like this']
16
+ # [:float, '3.1415926']
17
+ # [:error, '$^']
18
+ #
19
+ # Some scanners also yield some kind of sub-tokens, represented by special
20
+ # token texts, namely :open and :close .
21
+ #
22
+ # The Ruby scanner, for example, splits "a string" into:
23
+ #
24
+ # [
25
+ # [:open, :string],
26
+ # [:delimiter, '"'],
27
+ # [:content, 'a string'],
28
+ # [:delimiter, '"'],
29
+ # [:close, :string]
30
+ # ]
31
+ #
32
+ # Tokens is also the interface between Scanners and Encoders:
33
+ # The input is split and saved into a Tokens object. The Encoder
34
+ # then builds the output from this object.
35
+ #
36
+ # Thus, the syntax below becomes clear:
37
+ #
38
+ # CodeRay.scan('price = 2.59', :ruby).html
39
+ # # the Tokens object is here -------^
40
+ #
41
+ # See how small it is? ;)
42
+ #
43
+ # Tokens gives you the power to handle pre-scanned code very easily:
44
+ # You can convert it to a webpage, a YAML file, or dump it into a gzip'ed string
45
+ # that you put in your DB.
46
+ #
47
+ # Tokens' subclass TokenStream allows streaming to save memory.
48
+ class Tokens < Array
49
+
50
+ # The Scanner instance that created the tokens.
51
+ attr_accessor :scanner
52
+
53
+ # Whether the object is a TokenStream.
54
+ #
55
+ # Returns false.
56
+ def stream?
57
+ false
58
+ end
59
+
60
+ # Iterates over all tokens.
61
+ #
62
+ # If a filter is given, only tokens of that kind are yielded.
63
+ def each kind_filter = nil, &block
64
+ unless kind_filter
65
+ super(&block)
66
+ else
67
+ super() do |text, kind|
68
+ next unless kind == kind_filter
69
+ yield text, kind
70
+ end
71
+ end
72
+ end
73
+
74
+ # Iterates over all text tokens.
75
+ # Range tokens like [:open, :string] are left out.
76
+ #
77
+ # Example:
78
+ # tokens.each_text_token { |text, kind| text.replace html_escape(text) }
79
+ def each_text_token
80
+ each do |text, kind|
81
+ next unless text.is_a? ::String
82
+ yield text, kind
83
+ end
84
+ end
85
+
86
+ # Encode the tokens using encoder.
87
+ #
88
+ # encoder can be
89
+ # * a symbol like :html oder :statistic
90
+ # * an Encoder class
91
+ # * an Encoder object
92
+ #
93
+ # options are passed to the encoder.
94
+ def encode encoder, options = {}
95
+ unless encoder.is_a? Encoders::Encoder
96
+ unless encoder.is_a? Class
97
+ encoder_class = Encoders[encoder]
98
+ end
99
+ encoder = encoder_class.new options
100
+ end
101
+ encoder.encode_tokens self, options
102
+ end
103
+
104
+
105
+ # Turn into a string using Encoders::Text.
106
+ #
107
+ # +options+ are passed to the encoder if given.
108
+ def to_s options = {}
109
+ encode :text, options
110
+ end
111
+
112
+ # Redirects unknown methods to encoder calls.
113
+ #
114
+ # For example, if you call +tokens.html+, the HTML encoder
115
+ # is used to highlight the tokens.
116
+ def method_missing meth, options = {}
117
+ Encoders[meth].new(options).encode_tokens self
118
+ end
119
+
120
+ # Returns the tokens compressed by joining consecutive
121
+ # tokens of the same kind.
122
+ #
123
+ # This can not be undone, but should yield the same output
124
+ # in most Encoders. It basically makes the output smaller.
125
+ #
126
+ # Combined with dump, it saves space for the cost of time.
127
+ #
128
+ # If the scanner is written carefully, this is not required -
129
+ # for example, consecutive //-comment lines could already be
130
+ # joined in one comment token by the Scanner.
131
+ def optimize
132
+ print ' Tokens#optimize: before: %d - ' % size if $DEBUG
133
+ last_kind = last_text = nil
134
+ new = self.class.new
135
+ for text, kind in self
136
+ if text.is_a? String
137
+ if kind == last_kind
138
+ last_text << text
139
+ else
140
+ new << [last_text, last_kind] if last_kind
141
+ last_text = text
142
+ last_kind = kind
143
+ end
144
+ else
145
+ new << [last_text, last_kind] if last_kind
146
+ last_kind = last_text = nil
147
+ new << [text, kind]
148
+ end
149
+ end
150
+ new << [last_text, last_kind] if last_kind
151
+ print 'after: %d (%d saved = %2.0f%%)' %
152
+ [new.size, size - new.size, 1.0 - (new.size.to_f / size)] if $DEBUG
153
+ new
154
+ end
155
+
156
+ # Compact the object itself; see optimize.
157
+ def optimize!
158
+ replace optimize
159
+ end
160
+
161
+ # Ensure that all :open tokens have a correspondent :close one.
162
+ #
163
+ # TODO: Test this!
164
+ def fix
165
+ tokens = self.class.new
166
+ # Check token nesting using a stack of kinds.
167
+ opened = []
168
+ for type, kind in self
169
+ case type
170
+ when :open
171
+ opened.push [:close, kind]
172
+ when :begin_line
173
+ opened.push [:end_line, kind]
174
+ when :close, :end_line
175
+ expected = opened.pop
176
+ if [type, kind] != expected
177
+ # Unexpected :close; decide what to do based on the kind:
178
+ # - token was never opened: delete the :close (just skip it)
179
+ next unless opened.rindex expected
180
+ # - token was opened earlier: also close tokens in between
181
+ tokens << token until (token = opened.pop) == expected
182
+ end
183
+ end
184
+ tokens << [type, kind]
185
+ end
186
+ # Close remaining opened tokens
187
+ tokens << token while token = opened.pop
188
+ tokens
189
+ end
190
+
191
+ def fix!
192
+ replace fix
193
+ end
194
+
195
+ # TODO: Scanner#split_into_lines
196
+ #
197
+ # Makes sure that:
198
+ # - newlines are single tokens
199
+ # (which means all other token are single-line)
200
+ # - there are no open tokens at the end the line
201
+ #
202
+ # This makes it simple for encoders that work line-oriented,
203
+ # like HTML with list-style numeration.
204
+ def split_into_lines
205
+ raise NotImplementedError
206
+ end
207
+
208
+ def split_into_lines!
209
+ replace split_into_lines
210
+ end
211
+
212
+ # Dumps the object into a String that can be saved
213
+ # in files or databases.
214
+ #
215
+ # The dump is created with Marshal.dump;
216
+ # In addition, it is gzipped using GZip.gzip.
217
+ #
218
+ # The returned String object includes Undumping
219
+ # so it has an #undump method. See Tokens.load.
220
+ #
221
+ # You can configure the level of compression,
222
+ # but the default value 7 should be what you want
223
+ # in most cases as it is a good compromise between
224
+ # speed and compression rate.
225
+ #
226
+ # See GZip module.
227
+ def dump gzip_level = 7
228
+ require 'coderay/helpers/gzip_simple'
229
+ dump = Marshal.dump self
230
+ dump = dump.gzip gzip_level
231
+ dump.extend Undumping
232
+ end
233
+
234
+ # The total size of the tokens.
235
+ # Should be equal to the input size before
236
+ # scanning.
237
+ def text_size
238
+ size = 0
239
+ each_text_token do |t, k|
240
+ size + t.size
241
+ end
242
+ size
243
+ end
244
+
245
+ # The total size of the tokens.
246
+ # Should be equal to the input size before
247
+ # scanning.
248
+ def text
249
+ map { |t, k| t if t.is_a? ::String }.join
250
+ end
251
+
252
+ # Include this module to give an object an #undump
253
+ # method.
254
+ #
255
+ # The string returned by Tokens.dump includes Undumping.
256
+ module Undumping
257
+ # Calls Tokens.load with itself.
258
+ def undump
259
+ Tokens.load self
260
+ end
261
+ end
262
+
263
+ # Undump the object using Marshal.load, then
264
+ # unzip it using GZip.gunzip.
265
+ #
266
+ # The result is commonly a Tokens object, but
267
+ # this is not guaranteed.
268
+ def Tokens.load dump
269
+ require 'coderay/helpers/gzip_simple'
270
+ dump = dump.gunzip
271
+ @dump = Marshal.load dump
272
+ end
273
+
274
+ end
275
+
276
+
277
+ # = TokenStream
278
+ #
279
+ # The TokenStream class is a fake Array without elements.
280
+ #
281
+ # It redirects the method << to a block given at creation.
282
+ #
283
+ # This allows scanners and Encoders to use streaming (no
284
+ # tokens are saved, the input is highlighted the same time it
285
+ # is scanned) with the same code.
286
+ #
287
+ # See CodeRay.encode_stream and CodeRay.scan_stream
288
+ class TokenStream < Tokens
289
+
290
+ # Whether the object is a TokenStream.
291
+ #
292
+ # Returns true.
293
+ def stream?
294
+ true
295
+ end
296
+
297
+ # The Array is empty, but size counts the tokens given by <<.
298
+ attr_reader :size
299
+
300
+ # Creates a new TokenStream that calls +block+ whenever
301
+ # its << method is called.
302
+ #
303
+ # Example:
304
+ #
305
+ # require 'coderay'
306
+ #
307
+ # token_stream = CodeRay::TokenStream.new do |kind, text|
308
+ # puts 'kind: %s, text size: %d.' % [kind, text.size]
309
+ # end
310
+ #
311
+ # token_stream << [:regexp, '/\d+/']
312
+ # #-> kind: rexpexp, text size: 5.
313
+ #
314
+ def initialize &block
315
+ raise ArgumentError, 'Block expected for streaming.' unless block
316
+ @callback = block
317
+ @size = 0
318
+ end
319
+
320
+ # Calls +block+ with +token+ and increments size.
321
+ #
322
+ # Returns self.
323
+ def << token
324
+ @callback.call(*token)
325
+ @size += 1
326
+ self
327
+ end
328
+
329
+ # This method is not implemented due to speed reasons. Use Tokens.
330
+ def text_size
331
+ raise NotImplementedError,
332
+ 'This method is not implemented due to speed reasons.'
333
+ end
334
+
335
+ # A TokenStream cannot be dumped. Use Tokens.
336
+ def dump
337
+ raise NotImplementedError, 'A TokenStream cannot be dumped.'
338
+ end
339
+
340
+ # A TokenStream cannot be optimized. Use Tokens.
341
+ def optimize
342
+ raise NotImplementedError, 'A TokenStream cannot be optimized.'
343
+ end
344
+
345
+ end
346
+
347
+ end
348
+
349
+ if $0 == __FILE__
350
+ $VERBOSE = true
351
+ $: << File.join(File.dirname(__FILE__), '..')
352
+ eval DATA.read, nil, $0, __LINE__ + 4
353
+ end
354
+
355
+ __END__
356
+ require 'test/unit'
357
+
358
+ class TokensTest < Test::Unit::TestCase
359
+
360
+ def test_creation
361
+ assert CodeRay::Tokens < Array
362
+ tokens = nil
363
+ assert_nothing_raised do
364
+ tokens = CodeRay::Tokens.new
365
+ end
366
+ assert_kind_of Array, tokens
367
+ end
368
+
369
+ def test_adding_tokens
370
+ tokens = CodeRay::Tokens.new
371
+ assert_nothing_raised do
372
+ tokens << ['string', :type]
373
+ tokens << ['()', :operator]
374
+ end
375
+ assert_equal tokens.size, 2
376
+ end
377
+
378
+ def test_dump_undump
379
+ tokens = CodeRay::Tokens.new
380
+ assert_nothing_raised do
381
+ tokens << ['string', :type]
382
+ tokens << ['()', :operator]
383
+ end
384
+ tokens2 = nil
385
+ assert_nothing_raised do
386
+ tokens2 = tokens.dump.undump
387
+ end
388
+ assert_equal tokens, tokens2
389
+ end
390
+
391
+ end