raldred-coderay 0.9.0 → 0.9.339

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. data/lib/README +128 -0
  2. data/lib/coderay.rb +319 -0
  3. data/lib/coderay/duo.rb +85 -0
  4. data/lib/coderay/encoder.rb +187 -0
  5. data/lib/coderay/encoders/_map.rb +9 -0
  6. data/lib/coderay/encoders/count.rb +21 -0
  7. data/lib/coderay/encoders/debug.rb +49 -0
  8. data/lib/coderay/encoders/div.rb +20 -0
  9. data/lib/coderay/encoders/html.rb +306 -0
  10. data/lib/coderay/encoders/html/css.rb +70 -0
  11. data/lib/coderay/encoders/html/numerization.rb +133 -0
  12. data/lib/coderay/encoders/html/output.rb +206 -0
  13. data/lib/coderay/encoders/json.rb +19 -0
  14. data/lib/coderay/encoders/null.rb +26 -0
  15. data/lib/coderay/encoders/page.rb +21 -0
  16. data/lib/coderay/encoders/span.rb +20 -0
  17. data/lib/coderay/encoders/statistic.rb +77 -0
  18. data/lib/coderay/encoders/term.rb +114 -0
  19. data/lib/coderay/encoders/text.rb +32 -0
  20. data/lib/coderay/encoders/tokens.rb +44 -0
  21. data/lib/coderay/encoders/xml.rb +71 -0
  22. data/lib/coderay/encoders/yaml.rb +22 -0
  23. data/lib/coderay/for_redcloth.rb +73 -0
  24. data/lib/coderay/helpers/file_type.rb +226 -0
  25. data/lib/coderay/helpers/gzip_simple.rb +123 -0
  26. data/lib/coderay/helpers/plugin.rb +339 -0
  27. data/lib/coderay/helpers/word_list.rb +124 -0
  28. data/lib/coderay/scanner.rb +271 -0
  29. data/lib/coderay/scanners/_map.rb +21 -0
  30. data/lib/coderay/scanners/c.rb +166 -0
  31. data/lib/coderay/scanners/css.rb +202 -0
  32. data/lib/coderay/scanners/debug.rb +61 -0
  33. data/lib/coderay/scanners/delphi.rb +150 -0
  34. data/lib/coderay/scanners/diff.rb +104 -0
  35. data/lib/coderay/scanners/groovy.rb +271 -0
  36. data/lib/coderay/scanners/html.rb +175 -0
  37. data/lib/coderay/scanners/java.rb +173 -0
  38. data/lib/coderay/scanners/java/builtin_types.rb +419 -0
  39. data/lib/coderay/scanners/java_script.rb +195 -0
  40. data/lib/coderay/scanners/json.rb +107 -0
  41. data/lib/coderay/scanners/nitro_xhtml.rb +132 -0
  42. data/lib/coderay/scanners/php.rb +404 -0
  43. data/lib/coderay/scanners/plaintext.rb +18 -0
  44. data/lib/coderay/scanners/python.rb +232 -0
  45. data/lib/coderay/scanners/rhtml.rb +71 -0
  46. data/lib/coderay/scanners/ruby.rb +386 -0
  47. data/lib/coderay/scanners/ruby/patterns.rb +232 -0
  48. data/lib/coderay/scanners/scheme.rb +142 -0
  49. data/lib/coderay/scanners/sql.rb +162 -0
  50. data/lib/coderay/scanners/xml.rb +17 -0
  51. data/lib/coderay/scanners/yaml.rb +142 -0
  52. data/lib/coderay/style.rb +20 -0
  53. data/lib/coderay/styles/_map.rb +7 -0
  54. data/lib/coderay/styles/cycnus.rb +151 -0
  55. data/lib/coderay/styles/murphy.rb +132 -0
  56. data/lib/coderay/token_classes.rb +86 -0
  57. data/lib/coderay/tokens.rb +387 -0
  58. metadata +59 -1
@@ -0,0 +1,86 @@
1
+ module CodeRay
2
+ class Tokens
3
+ ClassOfKind = Hash.new do |h, k|
4
+ h[k] = k.to_s
5
+ end
6
+ ClassOfKind.update with = {
7
+ :annotation => 'at',
8
+ :attribute_name => 'an',
9
+ :attribute_name_fat => 'af',
10
+ :attribute_value => 'av',
11
+ :attribute_value_fat => 'aw',
12
+ :bin => 'bi',
13
+ :char => 'ch',
14
+ :class => 'cl',
15
+ :class_variable => 'cv',
16
+ :color => 'cr',
17
+ :comment => 'c',
18
+ :complex => 'cm',
19
+ :constant => 'co',
20
+ :content => 'k',
21
+ :decorator => 'de',
22
+ :definition => 'df',
23
+ :delimiter => 'dl',
24
+ :directive => 'di',
25
+ :doc => 'do',
26
+ :doctype => 'dt',
27
+ :doc_string => 'ds',
28
+ :entity => 'en',
29
+ :error => 'er',
30
+ :escape => 'e',
31
+ :exception => 'ex',
32
+ :float => 'fl',
33
+ :function => 'fu',
34
+ :global_variable => 'gv',
35
+ :hex => 'hx',
36
+ :imaginary => 'cm',
37
+ :important => 'im',
38
+ :include => 'ic',
39
+ :inline => 'il',
40
+ :inline_delimiter => 'idl',
41
+ :instance_variable => 'iv',
42
+ :integer => 'i',
43
+ :interpreted => 'in',
44
+ :keyword => 'kw',
45
+ :key => 'ke',
46
+ :label => 'la',
47
+ :local_variable => 'lv',
48
+ :modifier => 'mod',
49
+ :oct => 'oc',
50
+ :operator_fat => 'of',
51
+ :pre_constant => 'pc',
52
+ :pre_type => 'pt',
53
+ :predefined => 'pd',
54
+ :preprocessor => 'pp',
55
+ :pseudo_class => 'ps',
56
+ :regexp => 'rx',
57
+ :reserved => 'r',
58
+ :shell => 'sh',
59
+ :string => 's',
60
+ :symbol => 'sy',
61
+ :tag => 'ta',
62
+ :tag_fat => 'tf',
63
+ :tag_special => 'ts',
64
+ :type => 'ty',
65
+ :variable => 'v',
66
+ :value => 'vl',
67
+ :xml_text => 'xt',
68
+
69
+ :insert => 'ins',
70
+ :delete => 'del',
71
+ :change => 'chg',
72
+ :head => 'head',
73
+
74
+ :ident => :NO_HIGHLIGHT, # 'id'
75
+ #:operator => 'op',
76
+ :operator => :NO_HIGHLIGHT, # 'op'
77
+ :space => :NO_HIGHLIGHT, # 'sp'
78
+ :plain => :NO_HIGHLIGHT,
79
+ }
80
+ ClassOfKind[:procedure] = ClassOfKind[:method] = ClassOfKind[:function]
81
+ ClassOfKind[:open] = ClassOfKind[:close] = ClassOfKind[:delimiter]
82
+ ClassOfKind[:nesting_delimiter] = ClassOfKind[:delimiter]
83
+ ClassOfKind[:escape] = ClassOfKind[:delimiter]
84
+ #ClassOfKind.default = ClassOfKind[:error] or raise 'no class found for :error!'
85
+ end
86
+ end
@@ -0,0 +1,387 @@
1
+ module CodeRay
2
+
3
+ # = Tokens
4
+ #
5
+ # The Tokens class represents a list of tokens returnd from
6
+ # a Scanner.
7
+ #
8
+ # A token is not a special object, just a two-element Array
9
+ # consisting of
10
+ # * the _token_ _kind_ (a Symbol representing the type of the token)
11
+ # * the _token_ _text_ (the original source of the token in a String)
12
+ #
13
+ # A token looks like this:
14
+ #
15
+ # [:comment, '# It looks like this']
16
+ # [:float, '3.1415926']
17
+ # [:error, '���']
18
+ #
19
+ # Some scanners also yield some kind of sub-tokens, represented by special
20
+ # token texts, namely :open and :close .
21
+ #
22
+ # The Ruby scanner, for example, splits "a string" into:
23
+ #
24
+ # [
25
+ # [:open, :string],
26
+ # [:delimiter, '"'],
27
+ # [:content, 'a string'],
28
+ # [:delimiter, '"'],
29
+ # [:close, :string]
30
+ # ]
31
+ #
32
+ # Tokens is also the interface between Scanners and Encoders:
33
+ # The input is split and saved into a Tokens object. The Encoder
34
+ # then builds the output from this object.
35
+ #
36
+ # Thus, the syntax below becomes clear:
37
+ #
38
+ # CodeRay.scan('price = 2.59', :ruby).html
39
+ # # the Tokens object is here -------^
40
+ #
41
+ # See how small it is? ;)
42
+ #
43
+ # Tokens gives you the power to handle pre-scanned code very easily:
44
+ # You can convert it to a webpage, a YAML file, or dump it into a gzip'ed string
45
+ # that you put in your DB.
46
+ #
47
+ # Tokens' subclass TokenStream allows streaming to save memory.
48
+ class Tokens < Array
49
+
50
+ class << self
51
+
52
+ # Convert the token to a string.
53
+ #
54
+ # This format is used by Encoders.Tokens.
55
+ # It can be reverted using read_token.
56
+ def write_token text, type
57
+ if text.is_a? String
58
+ "#{type}\t#{escape(text)}\n"
59
+ else
60
+ ":#{text}\t#{type}\t\n"
61
+ end
62
+ end
63
+
64
+ # Read a token from the string.
65
+ #
66
+ # Inversion of write_token.
67
+ #
68
+ # TODO Test this!
69
+ def read_token token
70
+ type, text = token.split("\t", 2)
71
+ if type[0] == ?:
72
+ [text.to_sym, type[1..-1].to_sym]
73
+ else
74
+ [type.to_sym, unescape(text)]
75
+ end
76
+ end
77
+
78
+ # Escapes a string for use in write_token.
79
+ def escape text
80
+ text.gsub(/[\n\\]/, '\\\\\&')
81
+ end
82
+
83
+ # Unescapes a string created by escape.
84
+ def unescape text
85
+ text.gsub(/\\[\n\\]/) { |m| m[1,1] }
86
+ end
87
+
88
+ end
89
+
90
+ # Whether the object is a TokenStream.
91
+ #
92
+ # Returns false.
93
+ def stream?
94
+ false
95
+ end
96
+
97
+ # Iterates over all tokens.
98
+ #
99
+ # If a filter is given, only tokens of that kind are yielded.
100
+ def each kind_filter = nil, &block
101
+ unless kind_filter
102
+ super(&block)
103
+ else
104
+ super() do |text, kind|
105
+ next unless kind == kind_filter
106
+ yield text, kind
107
+ end
108
+ end
109
+ end
110
+
111
+ # Iterates over all text tokens.
112
+ # Range tokens like [:open, :string] are left out.
113
+ #
114
+ # Example:
115
+ # tokens.each_text_token { |text, kind| text.replace html_escape(text) }
116
+ def each_text_token
117
+ each do |text, kind|
118
+ next unless text.is_a? ::String
119
+ yield text, kind
120
+ end
121
+ end
122
+
123
+ # Encode the tokens using encoder.
124
+ #
125
+ # encoder can be
126
+ # * a symbol like :html oder :statistic
127
+ # * an Encoder class
128
+ # * an Encoder object
129
+ #
130
+ # options are passed to the encoder.
131
+ def encode encoder, options = {}
132
+ unless encoder.is_a? Encoders::Encoder
133
+ unless encoder.is_a? Class
134
+ encoder_class = Encoders[encoder]
135
+ end
136
+ encoder = encoder_class.new options
137
+ end
138
+ encoder.encode_tokens self, options
139
+ end
140
+
141
+
142
+ # Turn into a string using Encoders::Text.
143
+ #
144
+ # +options+ are passed to the encoder if given.
145
+ def to_s options = {}
146
+ encode :text, options
147
+ end
148
+
149
+
150
+ # Redirects unknown methods to encoder calls.
151
+ #
152
+ # For example, if you call +tokens.html+, the HTML encoder
153
+ # is used to highlight the tokens.
154
+ def method_missing meth, options = {}
155
+ Encoders[meth].new(options).encode_tokens self
156
+ end
157
+
158
+ # Returns the tokens compressed by joining consecutive
159
+ # tokens of the same kind.
160
+ #
161
+ # This can not be undone, but should yield the same output
162
+ # in most Encoders. It basically makes the output smaller.
163
+ #
164
+ # Combined with dump, it saves space for the cost of time.
165
+ #
166
+ # If the scanner is written carefully, this is not required -
167
+ # for example, consecutive //-comment lines could already be
168
+ # joined in one comment token by the Scanner.
169
+ def optimize
170
+ print ' Tokens#optimize: before: %d - ' % size if $DEBUG
171
+ last_kind = last_text = nil
172
+ new = self.class.new
173
+ for text, kind in self
174
+ if text.is_a? String
175
+ if kind == last_kind
176
+ last_text << text
177
+ else
178
+ new << [last_text, last_kind] if last_kind
179
+ last_text = text
180
+ last_kind = kind
181
+ end
182
+ else
183
+ new << [last_text, last_kind] if last_kind
184
+ last_kind = last_text = nil
185
+ new << [text, kind]
186
+ end
187
+ end
188
+ new << [last_text, last_kind] if last_kind
189
+ print 'after: %d (%d saved = %2.0f%%)' %
190
+ [new.size, size - new.size, 1.0 - (new.size.to_f / size)] if $DEBUG
191
+ new
192
+ end
193
+
194
+ # Compact the object itself; see optimize.
195
+ def optimize!
196
+ replace optimize
197
+ end
198
+
199
+ # Ensure that all :open tokens have a correspondent :close one.
200
+ #
201
+ # TODO: Test this!
202
+ def fix
203
+ tokens = self.class.new
204
+ # Check token nesting using a stack of kinds.
205
+ opened = []
206
+ for type, kind in self
207
+ case type
208
+ when :open
209
+ opened.push [:close, kind]
210
+ when :begin_line
211
+ opened.push [:end_line, kind]
212
+ when :close, :end_line
213
+ expected = opened.pop
214
+ if [type, kind] != expected
215
+ # Unexpected :close; decide what to do based on the kind:
216
+ # - token was never opened: delete the :close (just skip it)
217
+ next unless opened.rindex expected
218
+ # - token was opened earlier: also close tokens in between
219
+ tokens << token until (token = opened.pop) == expected
220
+ end
221
+ end
222
+ tokens << [type, kind]
223
+ end
224
+ # Close remaining opened tokens
225
+ tokens << token while token = opened.pop
226
+ tokens
227
+ end
228
+
229
+ def fix!
230
+ replace fix
231
+ end
232
+
233
+ # Makes sure that:
234
+ # - newlines are single tokens
235
+ # (which means all other token are single-line)
236
+ # - there are no open tokens at the end the line
237
+ #
238
+ # This makes it simple for encoders that work line-oriented,
239
+ # like HTML with list-style numeration.
240
+ def split_into_lines
241
+ raise NotImplementedError
242
+ end
243
+
244
+ def split_into_lines!
245
+ replace split_into_lines
246
+ end
247
+
248
+ # Dumps the object into a String that can be saved
249
+ # in files or databases.
250
+ #
251
+ # The dump is created with Marshal.dump;
252
+ # In addition, it is gzipped using GZip.gzip.
253
+ #
254
+ # The returned String object includes Undumping
255
+ # so it has an #undump method. See Tokens.load.
256
+ #
257
+ # You can configure the level of compression,
258
+ # but the default value 7 should be what you want
259
+ # in most cases as it is a good compromise between
260
+ # speed and compression rate.
261
+ #
262
+ # See GZip module.
263
+ def dump gzip_level = 7
264
+ require 'coderay/helpers/gzip_simple'
265
+ dump = Marshal.dump self
266
+ dump = dump.gzip gzip_level
267
+ dump.extend Undumping
268
+ end
269
+
270
+ # The total size of the tokens.
271
+ # Should be equal to the input size before
272
+ # scanning.
273
+ def text_size
274
+ size = 0
275
+ each_text_token do |t, k|
276
+ size + t.size
277
+ end
278
+ size
279
+ end
280
+
281
+ # The total size of the tokens.
282
+ # Should be equal to the input size before
283
+ # scanning.
284
+ def text
285
+ map { |t, k| t if t.is_a? ::String }.join
286
+ end
287
+
288
+ # Include this module to give an object an #undump
289
+ # method.
290
+ #
291
+ # The string returned by Tokens.dump includes Undumping.
292
+ module Undumping
293
+ # Calls Tokens.load with itself.
294
+ def undump
295
+ Tokens.load self
296
+ end
297
+ end
298
+
299
+ # Undump the object using Marshal.load, then
300
+ # unzip it using GZip.gunzip.
301
+ #
302
+ # The result is commonly a Tokens object, but
303
+ # this is not guaranteed.
304
+ def Tokens.load dump
305
+ require 'coderay/helpers/gzip_simple'
306
+ dump = dump.gunzip
307
+ @dump = Marshal.load dump
308
+ end
309
+
310
+ end
311
+
312
+
313
+ # = TokenStream
314
+ #
315
+ # The TokenStream class is a fake Array without elements.
316
+ #
317
+ # It redirects the method << to a block given at creation.
318
+ #
319
+ # This allows scanners and Encoders to use streaming (no
320
+ # tokens are saved, the input is highlighted the same time it
321
+ # is scanned) with the same code.
322
+ #
323
+ # See CodeRay.encode_stream and CodeRay.scan_stream
324
+ class TokenStream < Tokens
325
+
326
+ # Whether the object is a TokenStream.
327
+ #
328
+ # Returns true.
329
+ def stream?
330
+ true
331
+ end
332
+
333
+ # The Array is empty, but size counts the tokens given by <<.
334
+ attr_reader :size
335
+
336
+ # Creates a new TokenStream that calls +block+ whenever
337
+ # its << method is called.
338
+ #
339
+ # Example:
340
+ #
341
+ # require 'coderay'
342
+ #
343
+ # token_stream = CodeRay::TokenStream.new do |kind, text|
344
+ # puts 'kind: %s, text size: %d.' % [kind, text.size]
345
+ # end
346
+ #
347
+ # token_stream << [:regexp, '/\d+/']
348
+ # #-> kind: rexpexp, text size: 5.
349
+ #
350
+ def initialize &block
351
+ raise ArgumentError, 'Block expected for streaming.' unless block
352
+ @callback = block
353
+ @size = 0
354
+ end
355
+
356
+ # Calls +block+ with +token+ and increments size.
357
+ #
358
+ # Returns self.
359
+ def << token
360
+ @callback.call(*token)
361
+ @size += 1
362
+ self
363
+ end
364
+
365
+ # This method is not implemented due to speed reasons. Use Tokens.
366
+ def text_size
367
+ raise NotImplementedError,
368
+ 'This method is not implemented due to speed reasons.'
369
+ end
370
+
371
+ # A TokenStream cannot be dumped. Use Tokens.
372
+ def dump
373
+ raise NotImplementedError, 'A TokenStream cannot be dumped.'
374
+ end
375
+
376
+ # A TokenStream cannot be optimized. Use Tokens.
377
+ def optimize
378
+ raise NotImplementedError, 'A TokenStream cannot be optimized.'
379
+ end
380
+
381
+ end
382
+
383
+
384
+ # Token name abbreviations
385
+ require 'coderay/token_classes'
386
+
387
+ end