coderay 0.7.1.147 → 0.7.2.165

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. data/bin/coderay +54 -56
  2. data/demo/suite.rb +54 -54
  3. data/lib/coderay.rb +187 -187
  4. data/lib/coderay/duo.rb +29 -29
  5. data/lib/coderay/encoder.rb +173 -173
  6. data/lib/coderay/encoders/_map.rb +8 -8
  7. data/lib/coderay/encoders/count.rb +21 -21
  8. data/lib/coderay/encoders/debug.rb +46 -46
  9. data/lib/coderay/encoders/div.rb +20 -20
  10. data/lib/coderay/encoders/html.rb +249 -245
  11. data/lib/coderay/encoders/html/classes.rb +73 -73
  12. data/lib/coderay/encoders/html/css.rb +65 -65
  13. data/lib/coderay/encoders/html/numerization.rb +122 -122
  14. data/lib/coderay/encoders/html/output.rb +195 -195
  15. data/lib/coderay/encoders/null.rb +26 -26
  16. data/lib/coderay/encoders/page.rb +21 -21
  17. data/lib/coderay/encoders/span.rb +20 -20
  18. data/lib/coderay/encoders/statistic.rb +81 -81
  19. data/lib/coderay/encoders/text.rb +33 -33
  20. data/lib/coderay/encoders/tokens.rb +44 -44
  21. data/lib/coderay/encoders/xml.rb +71 -71
  22. data/lib/coderay/encoders/yaml.rb +22 -22
  23. data/lib/coderay/helpers/filetype.rb +152 -153
  24. data/lib/coderay/helpers/gzip_simple.rb +67 -68
  25. data/lib/coderay/helpers/plugin.rb +297 -297
  26. data/lib/coderay/helpers/word_list.rb +46 -47
  27. data/lib/coderay/scanner.rb +238 -238
  28. data/lib/coderay/scanners/_map.rb +15 -14
  29. data/lib/coderay/scanners/c.rb +163 -155
  30. data/lib/coderay/scanners/delphi.rb +131 -129
  31. data/lib/coderay/scanners/html.rb +174 -167
  32. data/lib/coderay/scanners/nitro_xhtml.rb +130 -0
  33. data/lib/coderay/scanners/plaintext.rb +15 -15
  34. data/lib/coderay/scanners/rhtml.rb +73 -65
  35. data/lib/coderay/scanners/ruby.rb +404 -397
  36. data/lib/coderay/scanners/ruby/patterns.rb +216 -216
  37. data/lib/coderay/scanners/xml.rb +18 -18
  38. data/lib/coderay/style.rb +20 -20
  39. data/lib/coderay/styles/_map.rb +3 -3
  40. data/lib/coderay/styles/cycnus.rb +18 -18
  41. data/lib/coderay/styles/murphy.rb +18 -18
  42. data/lib/coderay/tokens.rb +322 -322
  43. metadata +86 -86
  44. data/lib/coderay/scanners/nitro_html.rb +0 -125
  45. data/lib/coderay/scanners/yaml.rb +0 -85
@@ -1,20 +1,20 @@
1
- module CodeRay
2
-
3
- # This module holds the Style class and its subclasses.
4
- #
5
- # See Plugin.
6
- module Styles
7
- extend PluginHost
8
- plugin_path File.dirname(__FILE__), 'styles'
9
-
10
- class Style
11
- extend Plugin
12
- plugin_host Styles
13
-
14
- DEFAULT_OPTIONS = { }
15
-
16
- end
17
-
18
- end
19
-
20
- end
1
+ module CodeRay
2
+
3
+ # This module holds the Style class and its subclasses.
4
+ #
5
+ # See Plugin.
6
+ module Styles
7
+ extend PluginHost
8
+ plugin_path File.dirname(__FILE__), 'styles'
9
+
10
+ class Style
11
+ extend Plugin
12
+ plugin_host Styles
13
+
14
+ DEFAULT_OPTIONS = { }
15
+
16
+ end
17
+
18
+ end
19
+
20
+ end
@@ -1,7 +1,7 @@
1
1
  module CodeRay
2
2
  module Styles
3
-
4
- default :cycnus
5
-
3
+
4
+ default :cycnus
5
+
6
6
  end
7
7
  end
@@ -1,21 +1,21 @@
1
1
  module CodeRay
2
2
  module Styles
3
3
 
4
- class Cycnus < Style
4
+ class Cycnus < Style
5
5
 
6
- register_for :cycnus
6
+ register_for :cycnus
7
7
 
8
- code_background = '#f8f8f8'
9
- numbers_background = '#def'
10
- border_color = 'silver'
11
- normal_color = '#100'
8
+ code_background = '#f8f8f8'
9
+ numbers_background = '#def'
10
+ border_color = 'silver'
11
+ normal_color = '#100'
12
12
 
13
- CSS_MAIN_STYLES = <<-MAIN
13
+ CSS_MAIN_STYLES = <<-MAIN
14
14
  .CodeRay {
15
- background-color: #{code_background};
16
- border: 1px solid #{border_color};
17
- font-family: 'Courier New', 'Terminal', monospace;
18
- color: #{normal_color};
15
+ background-color: #{code_background};
16
+ border: 1px solid #{border_color};
17
+ font-family: 'Courier New', 'Terminal', monospace;
18
+ color: #{normal_color};
19
19
  }
20
20
  .CodeRay pre { margin: 0px }
21
21
 
@@ -27,9 +27,9 @@ table.CodeRay { border-collapse: collapse; width: 100%; padding: 2px }
27
27
  table.CodeRay td { padding: 2px 4px; vertical-align: top }
28
28
 
29
29
  .CodeRay .line_numbers, .CodeRay .no {
30
- background-color: #{numbers_background};
31
- color: gray;
32
- text-align: right;
30
+ background-color: #{numbers_background};
31
+ color: gray;
32
+ text-align: right;
33
33
  }
34
34
  .CodeRay .line_numbers tt { font-weight: bold }
35
35
  .CodeRay .no { padding: 0px 4px }
@@ -39,9 +39,9 @@ ol.CodeRay { font-size: 10pt }
39
39
  ol.CodeRay li { white-space: pre }
40
40
 
41
41
  .CodeRay .code pre { overflow: auto }
42
- MAIN
42
+ MAIN
43
43
 
44
- TOKEN_COLORS = <<-'TOKENS'
44
+ TOKEN_COLORS = <<-'TOKENS'
45
45
  .af { color:#00C }
46
46
  .an { color:#007 }
47
47
  .av { color:#700 }
@@ -117,9 +117,9 @@ ol.CodeRay li { white-space: pre }
117
117
  .ty { color:#339; font-weight:bold }
118
118
  .v { color:#036 }
119
119
  .xt { color:#444 }
120
- TOKENS
120
+ TOKENS
121
121
 
122
- end
122
+ end
123
123
 
124
124
  end
125
125
  end
@@ -1,21 +1,21 @@
1
1
  module CodeRay
2
2
  module Styles
3
3
 
4
- class Murphy < Style
4
+ class Murphy < Style
5
5
 
6
- register_for :murphy
6
+ register_for :murphy
7
7
 
8
- code_background = '#001129'
9
- numbers_background = code_background
10
- border_color = 'silver'
11
- normal_color = '#C0C0C0'
8
+ code_background = '#001129'
9
+ numbers_background = code_background
10
+ border_color = 'silver'
11
+ normal_color = '#C0C0C0'
12
12
 
13
- CSS_MAIN_STYLES = <<-MAIN
13
+ CSS_MAIN_STYLES = <<-MAIN
14
14
  .CodeRay {
15
- background-color: #{code_background};
16
- border: 1px solid #{border_color};
17
- font-family: 'Courier New', 'Terminal', monospace;
18
- color: #{normal_color};
15
+ background-color: #{code_background};
16
+ border: 1px solid #{border_color};
17
+ font-family: 'Courier New', 'Terminal', monospace;
18
+ color: #{normal_color};
19
19
  }
20
20
  .CodeRay pre { margin: 0px; }
21
21
 
@@ -27,9 +27,9 @@ table.CodeRay { border-collapse: collapse; width: 100%; padding: 2px; }
27
27
  table.CodeRay td { padding: 2px 4px; vertical-align: top; }
28
28
 
29
29
  .CodeRay .line_numbers, .CodeRay .no {
30
- background-color: #{numbers_background};
31
- color: gray;
32
- text-align: right;
30
+ background-color: #{numbers_background};
31
+ color: gray;
32
+ text-align: right;
33
33
  }
34
34
  .CodeRay .line_numbers tt { font-weight: bold; }
35
35
  .CodeRay .no { padding: 0px 4px; }
@@ -39,9 +39,9 @@ ol.CodeRay { font-size: 10pt; }
39
39
  ol.CodeRay li { white-space: pre; }
40
40
 
41
41
  .CodeRay .code pre { overflow: auto; }
42
- MAIN
42
+ MAIN
43
43
 
44
- TOKEN_COLORS = <<-'TOKENS'
44
+ TOKEN_COLORS = <<-'TOKENS'
45
45
  .af { color:#00C; }
46
46
  .an { color:#007; }
47
47
  .av { color:#700; }
@@ -111,9 +111,9 @@ ol.CodeRay li { white-space: pre; }
111
111
  .ty { color:#339; font-weight:bold; }
112
112
  .v { color:#036; }
113
113
  .xt { color:#444; }
114
- TOKENS
114
+ TOKENS
115
115
 
116
- end
116
+ end
117
117
 
118
118
  end
119
119
  end
@@ -1,322 +1,322 @@
1
- module CodeRay
2
-
3
- # = Tokens
4
- #
5
- # The Tokens class represents a list of tokens returnd from
6
- # a Scanner.
7
- #
8
- # A token is not a special object, just a two-element Array
9
- # consisting of
10
- # * the _token_ _kind_ (a Symbol representing the type of the token)
11
- # * the _token_ _text_ (the original source of the token in a String)
12
- #
13
- # A token looks like this:
14
- #
15
- # [:comment, '# It looks like this']
16
- # [:float, '3.1415926']
17
- # [:error, '���']
18
- #
19
- # Some scanners also yield some kind of sub-tokens, represented by special
20
- # token texts, namely :open and :close .
21
- #
22
- # The Ruby scanner, for example, splits "a string" into:
23
- #
24
- # [
25
- # [:open, :string],
26
- # [:delimiter, '"'],
27
- # [:content, 'a string'],
28
- # [:delimiter, '"'],
29
- # [:close, :string]
30
- # ]
31
- #
32
- # Tokens is also the interface between Scanners and Encoders:
33
- # The input is split and saved into a Tokens object. The Encoder
34
- # then builds the output from this object.
35
- #
36
- # Thus, the syntax below becomes clear:
37
- #
38
- # CodeRay.scan('price = 2.59', :ruby).html
39
- # # the Tokens object is here -------^
40
- #
41
- # See how small it is? ;)
42
- #
43
- # Tokens gives you the power to handle pre-scanned code very easily:
44
- # You can convert it to a webpage, a YAML file, or dump it into a gzip'ed string
45
- # that you put in your DB.
46
- #
47
- # Tokens' subclass TokenStream allows streaming to save memory.
48
- class Tokens < Array
49
-
50
- class << self
51
-
52
- # Convert the token to a string.
53
- #
54
- # This format is used by Encoders.Tokens.
55
- # It can be reverted using read_token.
56
- def write_token text, type
57
- if text.is_a? String
58
- "#{type}\t#{escape(text)}\n"
59
- else
60
- ":#{text}\t#{type}\t\n"
61
- end
62
- end
63
-
64
- # Read a token from the string.
65
- #
66
- # Inversion of write_token.
67
- #
68
- # TODO Test this!
69
- def read_token token
70
- type, text = token.split("\t", 2)
71
- if type[0] == ?:
72
- [text.to_sym, type[1..-1].to_sym]
73
- else
74
- [type.to_sym, unescape(text)]
75
- end
76
- end
77
-
78
- # Escapes a string for use in write_token.
79
- def escape text
80
- text.gsub(/[\n\\]/, '\\\\\&')
81
- end
82
-
83
- # Unescapes a string created by escape.
84
- def unescape text
85
- text.gsub(/\\[\n\\]/) { |m| m[1,1] }
86
- end
87
-
88
- end
89
-
90
- # Whether the object is a TokenStream.
91
- #
92
- # Returns false.
93
- def stream?
94
- false
95
- end
96
-
97
- # Iterates over all tokens.
98
- #
99
- # If a filter is given, only tokens of that kind are yielded.
100
- def each kind_filter = nil, &block
101
- unless kind_filter
102
- super(&block)
103
- else
104
- super() do |text, kind|
105
- next unless kind == kind_filter
106
- yield text, kind
107
- end
108
- end
109
- end
110
-
111
- # Iterates over all text tokens.
112
- # Range tokens like [:open, :string] are left out.
113
- #
114
- # Example:
115
- # tokens.each_text_token { |text, kind| text.replace html_escape(text) }
116
- def each_text_token
117
- each do |text, kind|
118
- next unless text.respond_to? :to_str
119
- yield text, kind
120
- end
121
- end
122
-
123
- # Encode the tokens using encoder.
124
- #
125
- # encoder can be
126
- # * a symbol like :html oder :statistic
127
- # * an Encoder class
128
- # * an Encoder object
129
- #
130
- # options are passed to the encoder.
131
- def encode encoder, options = {}
132
- unless encoder.is_a? Encoders::Encoder
133
- unless encoder.is_a? Class
134
- encoder_class = Encoders[encoder]
135
- end
136
- encoder = encoder_class.new options
137
- end
138
- encoder.encode_tokens self, options
139
- end
140
-
141
-
142
- # Turn into a string using Encoders::Text.
143
- #
144
- # +options+ are passed to the encoder if given.
145
- def to_s options = {}
146
- encode :text, options
147
- end
148
-
149
-
150
- # Redirects unknown methods to encoder calls.
151
- #
152
- # For example, if you call +tokens.html+, the HTML encoder
153
- # is used to highlight the tokens.
154
- def method_missing meth, options = {}
155
- Encoders[meth].new(options).encode_tokens self
156
- end
157
-
158
- # Returns the tokens compressed by joining consecutive
159
- # tokens of the same kind.
160
- #
161
- # This can not be undone, but should yield the same output
162
- # in most Encoders. It basically makes the output smaller.
163
- #
164
- # Combined with dump, it saves space for the cost of time.
165
- #
166
- # If the scanner is written carefully, this is not required -
167
- # for example, consecutive //-comment lines could already be
168
- # joined in one comment token by the Scanner.
169
- def optimize
170
- print ' Tokens#optimize: before: %d - ' % size if $DEBUG
171
- last_kind = last_text = nil
172
- new = self.class.new
173
- each do |text, kind|
174
- if text.is_a? String
175
- if kind == last_kind
176
- last_text << text
177
- else
178
- new << [last_text, last_kind] if last_kind
179
- last_text = text
180
- last_kind = kind
181
- end
182
- else
183
- new << [last_text, last_kind] if last_kind
184
- last_kind = last_text = nil
185
- new << [text, kind]
186
- end
187
- end
188
- new << [last_text, last_kind] if last_kind
189
- print 'after: %d (%d saved = %2.0f%%)' %
190
- [new.size, size - new.size, 1.0 - (new.size.to_f / size)] if $DEBUG
191
- new
192
- end
193
-
194
- # Compact the object itself; see optimize.
195
- def optimize!
196
- replace optimize
197
- end
198
-
199
- # Dumps the object into a String that can be saved
200
- # in files or databases.
201
- #
202
- # The dump is created with Marshal.dump;
203
- # In addition, it is gzipped using GZip.gzip.
204
- #
205
- # The returned String object includes Undumping
206
- # so it has an #undump method. See Tokens.load.
207
- #
208
- # You can configure the level of compression,
209
- # but the default value 7 should be what you want
210
- # in most cases as it is a good comprimise between
211
- # speed and compression rate.
212
- #
213
- # See GZip module.
214
- def dump gzip_level = 7
215
- require 'coderay/helpers/gzip_simple'
216
- dump = Marshal.dump self
217
- dump = dump.gzip gzip_level
218
- dump.extend Undumping
219
- end
220
-
221
- # The total size of the tokens.
222
- # Should be equal to the input size before
223
- # scanning.
224
- def text_size
225
- map { |t, k| t }.join.size
226
- end
227
-
228
- # Include this module to give an object an #undump
229
- # method.
230
- #
231
- # The string returned by Tokens.dump includes Undumping.
232
- module Undumping
233
- # Calls Tokens.load with itself.
234
- def undump
235
- Tokens.load self
236
- end
237
- end
238
-
239
- # Undump the object using Marshal.load, then
240
- # unzip it using GZip.gunzip.
241
- #
242
- # The result is commonly a Tokens object, but
243
- # this is not guaranteed.
244
- def Tokens.load dump
245
- require 'coderay/helpers/gzip_simple'
246
- dump = dump.gunzip
247
- @dump = Marshal.load dump
248
- end
249
-
250
- end
251
-
252
-
253
- # = TokenStream
254
- #
255
- # The TokenStream class is a fake Array without elements.
256
- #
257
- # It redirects the method << to a block given at creation.
258
- #
259
- # This allows scanners and Encoders to use streaming (no
260
- # tokens are saved, the input is highlighted the same time it
261
- # is scanned) with the same code.
262
- #
263
- # See CodeRay.encode_stream and CodeRay.scan_stream
264
- class TokenStream < Tokens
265
-
266
- # Whether the object is a TokenStream.
267
- #
268
- # Returns true.
269
- def stream?
270
- true
271
- end
272
-
273
- # The Array is empty, but size counts the tokens given by <<.
274
- attr_reader :size
275
-
276
- # Creates a new TokenStream that calls +block+ whenever
277
- # its << method is called.
278
- #
279
- # Example:
280
- #
281
- # require 'coderay'
282
- #
283
- # token_stream = CodeRay::TokenStream.new do |kind, text|
284
- # puts 'kind: %s, text size: %d.' % [kind, text.size]
285
- # end
286
- #
287
- # token_stream << [:regexp, '/\d+/']
288
- # #-> kind: rexpexp, text size: 5.
289
- #
290
- def initialize &block
291
- raise ArgumentError, 'Block expected for streaming.' unless block
292
- @callback = block
293
- @size = 0
294
- end
295
-
296
- # Calls +block+ with +token+ and increments size.
297
- #
298
- # Returns self.
299
- def << token
300
- @callback.call token
301
- @size += 1
302
- self
303
- end
304
-
305
- # This method is not implemented due to speed reasons. Use Tokens.
306
- def text_size
307
- raise NotImplementedError, 'This method is not implemented due to speed reasons.'
308
- end
309
-
310
- # A TokenStream cannot be dumped. Use Tokens.
311
- def dump
312
- raise NotImplementedError, 'A TokenStream cannot be dumped.'
313
- end
314
-
315
- # A TokenStream cannot be optimized. Use Tokens.
316
- def optimize
317
- raise NotImplementedError, 'A TokenStream cannot be optimized.'
318
- end
319
-
320
- end
321
-
322
- end
1
+ module CodeRay
2
+
3
+ # = Tokens
4
+ #
5
+ # The Tokens class represents a list of tokens returnd from
6
+ # a Scanner.
7
+ #
8
+ # A token is not a special object, just a two-element Array
9
+ # consisting of
10
+ # * the _token_ _kind_ (a Symbol representing the type of the token)
11
+ # * the _token_ _text_ (the original source of the token in a String)
12
+ #
13
+ # A token looks like this:
14
+ #
15
+ # [:comment, '# It looks like this']
16
+ # [:float, '3.1415926']
17
+ # [:error, '���']
18
+ #
19
+ # Some scanners also yield some kind of sub-tokens, represented by special
20
+ # token texts, namely :open and :close .
21
+ #
22
+ # The Ruby scanner, for example, splits "a string" into:
23
+ #
24
+ # [
25
+ # [:open, :string],
26
+ # [:delimiter, '"'],
27
+ # [:content, 'a string'],
28
+ # [:delimiter, '"'],
29
+ # [:close, :string]
30
+ # ]
31
+ #
32
+ # Tokens is also the interface between Scanners and Encoders:
33
+ # The input is split and saved into a Tokens object. The Encoder
34
+ # then builds the output from this object.
35
+ #
36
+ # Thus, the syntax below becomes clear:
37
+ #
38
+ # CodeRay.scan('price = 2.59', :ruby).html
39
+ # # the Tokens object is here -------^
40
+ #
41
+ # See how small it is? ;)
42
+ #
43
+ # Tokens gives you the power to handle pre-scanned code very easily:
44
+ # You can convert it to a webpage, a YAML file, or dump it into a gzip'ed string
45
+ # that you put in your DB.
46
+ #
47
+ # Tokens' subclass TokenStream allows streaming to save memory.
48
+ class Tokens < Array
49
+
50
+ class << self
51
+
52
+ # Convert the token to a string.
53
+ #
54
+ # This format is used by Encoders.Tokens.
55
+ # It can be reverted using read_token.
56
+ def write_token text, type
57
+ if text.is_a? String
58
+ "#{type}\t#{escape(text)}\n"
59
+ else
60
+ ":#{text}\t#{type}\t\n"
61
+ end
62
+ end
63
+
64
+ # Read a token from the string.
65
+ #
66
+ # Inversion of write_token.
67
+ #
68
+ # TODO Test this!
69
+ def read_token token
70
+ type, text = token.split("\t", 2)
71
+ if type[0] == ?:
72
+ [text.to_sym, type[1..-1].to_sym]
73
+ else
74
+ [type.to_sym, unescape(text)]
75
+ end
76
+ end
77
+
78
+ # Escapes a string for use in write_token.
79
+ def escape text
80
+ text.gsub(/[\n\\]/, '\\\\\&')
81
+ end
82
+
83
+ # Unescapes a string created by escape.
84
+ def unescape text
85
+ text.gsub(/\\[\n\\]/) { |m| m[1,1] }
86
+ end
87
+
88
+ end
89
+
90
+ # Whether the object is a TokenStream.
91
+ #
92
+ # Returns false.
93
+ def stream?
94
+ false
95
+ end
96
+
97
+ # Iterates over all tokens.
98
+ #
99
+ # If a filter is given, only tokens of that kind are yielded.
100
+ def each kind_filter = nil, &block
101
+ unless kind_filter
102
+ super(&block)
103
+ else
104
+ super() do |text, kind|
105
+ next unless kind == kind_filter
106
+ yield text, kind
107
+ end
108
+ end
109
+ end
110
+
111
+ # Iterates over all text tokens.
112
+ # Range tokens like [:open, :string] are left out.
113
+ #
114
+ # Example:
115
+ # tokens.each_text_token { |text, kind| text.replace html_escape(text) }
116
+ def each_text_token
117
+ each do |text, kind|
118
+ next unless text.respond_to? :to_str
119
+ yield text, kind
120
+ end
121
+ end
122
+
123
+ # Encode the tokens using encoder.
124
+ #
125
+ # encoder can be
126
+ # * a symbol like :html oder :statistic
127
+ # * an Encoder class
128
+ # * an Encoder object
129
+ #
130
+ # options are passed to the encoder.
131
+ def encode encoder, options = {}
132
+ unless encoder.is_a? Encoders::Encoder
133
+ unless encoder.is_a? Class
134
+ encoder_class = Encoders[encoder]
135
+ end
136
+ encoder = encoder_class.new options
137
+ end
138
+ encoder.encode_tokens self, options
139
+ end
140
+
141
+
142
+ # Turn into a string using Encoders::Text.
143
+ #
144
+ # +options+ are passed to the encoder if given.
145
+ def to_s options = {}
146
+ encode :text, options
147
+ end
148
+
149
+
150
+ # Redirects unknown methods to encoder calls.
151
+ #
152
+ # For example, if you call +tokens.html+, the HTML encoder
153
+ # is used to highlight the tokens.
154
+ def method_missing meth, options = {}
155
+ Encoders[meth].new(options).encode_tokens self
156
+ end
157
+
158
+ # Returns the tokens compressed by joining consecutive
159
+ # tokens of the same kind.
160
+ #
161
+ # This can not be undone, but should yield the same output
162
+ # in most Encoders. It basically makes the output smaller.
163
+ #
164
+ # Combined with dump, it saves space for the cost of time.
165
+ #
166
+ # If the scanner is written carefully, this is not required -
167
+ # for example, consecutive //-comment lines could already be
168
+ # joined in one comment token by the Scanner.
169
+ def optimize
170
+ print ' Tokens#optimize: before: %d - ' % size if $DEBUG
171
+ last_kind = last_text = nil
172
+ new = self.class.new
173
+ each do |text, kind|
174
+ if text.is_a? String
175
+ if kind == last_kind
176
+ last_text << text
177
+ else
178
+ new << [last_text, last_kind] if last_kind
179
+ last_text = text
180
+ last_kind = kind
181
+ end
182
+ else
183
+ new << [last_text, last_kind] if last_kind
184
+ last_kind = last_text = nil
185
+ new << [text, kind]
186
+ end
187
+ end
188
+ new << [last_text, last_kind] if last_kind
189
+ print 'after: %d (%d saved = %2.0f%%)' %
190
+ [new.size, size - new.size, 1.0 - (new.size.to_f / size)] if $DEBUG
191
+ new
192
+ end
193
+
194
+ # Compact the object itself; see optimize.
195
+ def optimize!
196
+ replace optimize
197
+ end
198
+
199
+ # Dumps the object into a String that can be saved
200
+ # in files or databases.
201
+ #
202
+ # The dump is created with Marshal.dump;
203
+ # In addition, it is gzipped using GZip.gzip.
204
+ #
205
+ # The returned String object includes Undumping
206
+ # so it has an #undump method. See Tokens.load.
207
+ #
208
+ # You can configure the level of compression,
209
+ # but the default value 7 should be what you want
210
+ # in most cases as it is a good comprimise between
211
+ # speed and compression rate.
212
+ #
213
+ # See GZip module.
214
+ def dump gzip_level = 7
215
+ require 'coderay/helpers/gzip_simple'
216
+ dump = Marshal.dump self
217
+ dump = dump.gzip gzip_level
218
+ dump.extend Undumping
219
+ end
220
+
221
+ # The total size of the tokens.
222
+ # Should be equal to the input size before
223
+ # scanning.
224
+ def text_size
225
+ map { |t, k| t }.join.size
226
+ end
227
+
228
+ # Include this module to give an object an #undump
229
+ # method.
230
+ #
231
+ # The string returned by Tokens.dump includes Undumping.
232
+ module Undumping
233
+ # Calls Tokens.load with itself.
234
+ def undump
235
+ Tokens.load self
236
+ end
237
+ end
238
+
239
+ # Undump the object using Marshal.load, then
240
+ # unzip it using GZip.gunzip.
241
+ #
242
+ # The result is commonly a Tokens object, but
243
+ # this is not guaranteed.
244
+ def Tokens.load dump
245
+ require 'coderay/helpers/gzip_simple'
246
+ dump = dump.gunzip
247
+ @dump = Marshal.load dump
248
+ end
249
+
250
+ end
251
+
252
+
253
+ # = TokenStream
254
+ #
255
+ # The TokenStream class is a fake Array without elements.
256
+ #
257
+ # It redirects the method << to a block given at creation.
258
+ #
259
+ # This allows scanners and Encoders to use streaming (no
260
+ # tokens are saved, the input is highlighted the same time it
261
+ # is scanned) with the same code.
262
+ #
263
+ # See CodeRay.encode_stream and CodeRay.scan_stream
264
+ class TokenStream < Tokens
265
+
266
+ # Whether the object is a TokenStream.
267
+ #
268
+ # Returns true.
269
+ def stream?
270
+ true
271
+ end
272
+
273
+ # The Array is empty, but size counts the tokens given by <<.
274
+ attr_reader :size
275
+
276
+ # Creates a new TokenStream that calls +block+ whenever
277
+ # its << method is called.
278
+ #
279
+ # Example:
280
+ #
281
+ # require 'coderay'
282
+ #
283
+ # token_stream = CodeRay::TokenStream.new do |kind, text|
284
+ # puts 'kind: %s, text size: %d.' % [kind, text.size]
285
+ # end
286
+ #
287
+ # token_stream << [:regexp, '/\d+/']
288
+ # #-> kind: rexpexp, text size: 5.
289
+ #
290
+ def initialize &block
291
+ raise ArgumentError, 'Block expected for streaming.' unless block
292
+ @callback = block
293
+ @size = 0
294
+ end
295
+
296
+ # Calls +block+ with +token+ and increments size.
297
+ #
298
+ # Returns self.
299
+ def << token
300
+ @callback.call token
301
+ @size += 1
302
+ self
303
+ end
304
+
305
+ # This method is not implemented due to speed reasons. Use Tokens.
306
+ def text_size
307
+ raise NotImplementedError, 'This method is not implemented due to speed reasons.'
308
+ end
309
+
310
+ # A TokenStream cannot be dumped. Use Tokens.
311
+ def dump
312
+ raise NotImplementedError, 'A TokenStream cannot be dumped.'
313
+ end
314
+
315
+ # A TokenStream cannot be optimized. Use Tokens.
316
+ def optimize
317
+ raise NotImplementedError, 'A TokenStream cannot be optimized.'
318
+ end
319
+
320
+ end
321
+
322
+ end