coderay 0.7.1.147 → 0.7.2.165

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. data/bin/coderay +54 -56
  2. data/demo/suite.rb +54 -54
  3. data/lib/coderay.rb +187 -187
  4. data/lib/coderay/duo.rb +29 -29
  5. data/lib/coderay/encoder.rb +173 -173
  6. data/lib/coderay/encoders/_map.rb +8 -8
  7. data/lib/coderay/encoders/count.rb +21 -21
  8. data/lib/coderay/encoders/debug.rb +46 -46
  9. data/lib/coderay/encoders/div.rb +20 -20
  10. data/lib/coderay/encoders/html.rb +249 -245
  11. data/lib/coderay/encoders/html/classes.rb +73 -73
  12. data/lib/coderay/encoders/html/css.rb +65 -65
  13. data/lib/coderay/encoders/html/numerization.rb +122 -122
  14. data/lib/coderay/encoders/html/output.rb +195 -195
  15. data/lib/coderay/encoders/null.rb +26 -26
  16. data/lib/coderay/encoders/page.rb +21 -21
  17. data/lib/coderay/encoders/span.rb +20 -20
  18. data/lib/coderay/encoders/statistic.rb +81 -81
  19. data/lib/coderay/encoders/text.rb +33 -33
  20. data/lib/coderay/encoders/tokens.rb +44 -44
  21. data/lib/coderay/encoders/xml.rb +71 -71
  22. data/lib/coderay/encoders/yaml.rb +22 -22
  23. data/lib/coderay/helpers/filetype.rb +152 -153
  24. data/lib/coderay/helpers/gzip_simple.rb +67 -68
  25. data/lib/coderay/helpers/plugin.rb +297 -297
  26. data/lib/coderay/helpers/word_list.rb +46 -47
  27. data/lib/coderay/scanner.rb +238 -238
  28. data/lib/coderay/scanners/_map.rb +15 -14
  29. data/lib/coderay/scanners/c.rb +163 -155
  30. data/lib/coderay/scanners/delphi.rb +131 -129
  31. data/lib/coderay/scanners/html.rb +174 -167
  32. data/lib/coderay/scanners/nitro_xhtml.rb +130 -0
  33. data/lib/coderay/scanners/plaintext.rb +15 -15
  34. data/lib/coderay/scanners/rhtml.rb +73 -65
  35. data/lib/coderay/scanners/ruby.rb +404 -397
  36. data/lib/coderay/scanners/ruby/patterns.rb +216 -216
  37. data/lib/coderay/scanners/xml.rb +18 -18
  38. data/lib/coderay/style.rb +20 -20
  39. data/lib/coderay/styles/_map.rb +3 -3
  40. data/lib/coderay/styles/cycnus.rb +18 -18
  41. data/lib/coderay/styles/murphy.rb +18 -18
  42. data/lib/coderay/tokens.rb +322 -322
  43. metadata +86 -86
  44. data/lib/coderay/scanners/nitro_html.rb +0 -125
  45. data/lib/coderay/scanners/yaml.rb +0 -85
@@ -1,20 +1,20 @@
1
- module CodeRay
2
-
3
- # This module holds the Style class and its subclasses.
4
- #
5
- # See Plugin.
6
- module Styles
7
- extend PluginHost
8
- plugin_path File.dirname(__FILE__), 'styles'
9
-
10
- class Style
11
- extend Plugin
12
- plugin_host Styles
13
-
14
- DEFAULT_OPTIONS = { }
15
-
16
- end
17
-
18
- end
19
-
20
- end
1
+ module CodeRay
2
+
3
+ # This module holds the Style class and its subclasses.
4
+ #
5
+ # See Plugin.
6
+ module Styles
7
+ extend PluginHost
8
+ plugin_path File.dirname(__FILE__), 'styles'
9
+
10
+ class Style
11
+ extend Plugin
12
+ plugin_host Styles
13
+
14
+ DEFAULT_OPTIONS = { }
15
+
16
+ end
17
+
18
+ end
19
+
20
+ end
@@ -1,7 +1,7 @@
1
1
  module CodeRay
2
2
  module Styles
3
-
4
- default :cycnus
5
-
3
+
4
+ default :cycnus
5
+
6
6
  end
7
7
  end
@@ -1,21 +1,21 @@
1
1
  module CodeRay
2
2
  module Styles
3
3
 
4
- class Cycnus < Style
4
+ class Cycnus < Style
5
5
 
6
- register_for :cycnus
6
+ register_for :cycnus
7
7
 
8
- code_background = '#f8f8f8'
9
- numbers_background = '#def'
10
- border_color = 'silver'
11
- normal_color = '#100'
8
+ code_background = '#f8f8f8'
9
+ numbers_background = '#def'
10
+ border_color = 'silver'
11
+ normal_color = '#100'
12
12
 
13
- CSS_MAIN_STYLES = <<-MAIN
13
+ CSS_MAIN_STYLES = <<-MAIN
14
14
  .CodeRay {
15
- background-color: #{code_background};
16
- border: 1px solid #{border_color};
17
- font-family: 'Courier New', 'Terminal', monospace;
18
- color: #{normal_color};
15
+ background-color: #{code_background};
16
+ border: 1px solid #{border_color};
17
+ font-family: 'Courier New', 'Terminal', monospace;
18
+ color: #{normal_color};
19
19
  }
20
20
  .CodeRay pre { margin: 0px }
21
21
 
@@ -27,9 +27,9 @@ table.CodeRay { border-collapse: collapse; width: 100%; padding: 2px }
27
27
  table.CodeRay td { padding: 2px 4px; vertical-align: top }
28
28
 
29
29
  .CodeRay .line_numbers, .CodeRay .no {
30
- background-color: #{numbers_background};
31
- color: gray;
32
- text-align: right;
30
+ background-color: #{numbers_background};
31
+ color: gray;
32
+ text-align: right;
33
33
  }
34
34
  .CodeRay .line_numbers tt { font-weight: bold }
35
35
  .CodeRay .no { padding: 0px 4px }
@@ -39,9 +39,9 @@ ol.CodeRay { font-size: 10pt }
39
39
  ol.CodeRay li { white-space: pre }
40
40
 
41
41
  .CodeRay .code pre { overflow: auto }
42
- MAIN
42
+ MAIN
43
43
 
44
- TOKEN_COLORS = <<-'TOKENS'
44
+ TOKEN_COLORS = <<-'TOKENS'
45
45
  .af { color:#00C }
46
46
  .an { color:#007 }
47
47
  .av { color:#700 }
@@ -117,9 +117,9 @@ ol.CodeRay li { white-space: pre }
117
117
  .ty { color:#339; font-weight:bold }
118
118
  .v { color:#036 }
119
119
  .xt { color:#444 }
120
- TOKENS
120
+ TOKENS
121
121
 
122
- end
122
+ end
123
123
 
124
124
  end
125
125
  end
@@ -1,21 +1,21 @@
1
1
  module CodeRay
2
2
  module Styles
3
3
 
4
- class Murphy < Style
4
+ class Murphy < Style
5
5
 
6
- register_for :murphy
6
+ register_for :murphy
7
7
 
8
- code_background = '#001129'
9
- numbers_background = code_background
10
- border_color = 'silver'
11
- normal_color = '#C0C0C0'
8
+ code_background = '#001129'
9
+ numbers_background = code_background
10
+ border_color = 'silver'
11
+ normal_color = '#C0C0C0'
12
12
 
13
- CSS_MAIN_STYLES = <<-MAIN
13
+ CSS_MAIN_STYLES = <<-MAIN
14
14
  .CodeRay {
15
- background-color: #{code_background};
16
- border: 1px solid #{border_color};
17
- font-family: 'Courier New', 'Terminal', monospace;
18
- color: #{normal_color};
15
+ background-color: #{code_background};
16
+ border: 1px solid #{border_color};
17
+ font-family: 'Courier New', 'Terminal', monospace;
18
+ color: #{normal_color};
19
19
  }
20
20
  .CodeRay pre { margin: 0px; }
21
21
 
@@ -27,9 +27,9 @@ table.CodeRay { border-collapse: collapse; width: 100%; padding: 2px; }
27
27
  table.CodeRay td { padding: 2px 4px; vertical-align: top; }
28
28
 
29
29
  .CodeRay .line_numbers, .CodeRay .no {
30
- background-color: #{numbers_background};
31
- color: gray;
32
- text-align: right;
30
+ background-color: #{numbers_background};
31
+ color: gray;
32
+ text-align: right;
33
33
  }
34
34
  .CodeRay .line_numbers tt { font-weight: bold; }
35
35
  .CodeRay .no { padding: 0px 4px; }
@@ -39,9 +39,9 @@ ol.CodeRay { font-size: 10pt; }
39
39
  ol.CodeRay li { white-space: pre; }
40
40
 
41
41
  .CodeRay .code pre { overflow: auto; }
42
- MAIN
42
+ MAIN
43
43
 
44
- TOKEN_COLORS = <<-'TOKENS'
44
+ TOKEN_COLORS = <<-'TOKENS'
45
45
  .af { color:#00C; }
46
46
  .an { color:#007; }
47
47
  .av { color:#700; }
@@ -111,9 +111,9 @@ ol.CodeRay li { white-space: pre; }
111
111
  .ty { color:#339; font-weight:bold; }
112
112
  .v { color:#036; }
113
113
  .xt { color:#444; }
114
- TOKENS
114
+ TOKENS
115
115
 
116
- end
116
+ end
117
117
 
118
118
  end
119
119
  end
@@ -1,322 +1,322 @@
1
- module CodeRay
2
-
3
- # = Tokens
4
- #
5
- # The Tokens class represents a list of tokens returnd from
6
- # a Scanner.
7
- #
8
- # A token is not a special object, just a two-element Array
9
- # consisting of
10
- # * the _token_ _kind_ (a Symbol representing the type of the token)
11
- # * the _token_ _text_ (the original source of the token in a String)
12
- #
13
- # A token looks like this:
14
- #
15
- # [:comment, '# It looks like this']
16
- # [:float, '3.1415926']
17
- # [:error, '���']
18
- #
19
- # Some scanners also yield some kind of sub-tokens, represented by special
20
- # token texts, namely :open and :close .
21
- #
22
- # The Ruby scanner, for example, splits "a string" into:
23
- #
24
- # [
25
- # [:open, :string],
26
- # [:delimiter, '"'],
27
- # [:content, 'a string'],
28
- # [:delimiter, '"'],
29
- # [:close, :string]
30
- # ]
31
- #
32
- # Tokens is also the interface between Scanners and Encoders:
33
- # The input is split and saved into a Tokens object. The Encoder
34
- # then builds the output from this object.
35
- #
36
- # Thus, the syntax below becomes clear:
37
- #
38
- # CodeRay.scan('price = 2.59', :ruby).html
39
- # # the Tokens object is here -------^
40
- #
41
- # See how small it is? ;)
42
- #
43
- # Tokens gives you the power to handle pre-scanned code very easily:
44
- # You can convert it to a webpage, a YAML file, or dump it into a gzip'ed string
45
- # that you put in your DB.
46
- #
47
- # Tokens' subclass TokenStream allows streaming to save memory.
48
- class Tokens < Array
49
-
50
- class << self
51
-
52
- # Convert the token to a string.
53
- #
54
- # This format is used by Encoders.Tokens.
55
- # It can be reverted using read_token.
56
- def write_token text, type
57
- if text.is_a? String
58
- "#{type}\t#{escape(text)}\n"
59
- else
60
- ":#{text}\t#{type}\t\n"
61
- end
62
- end
63
-
64
- # Read a token from the string.
65
- #
66
- # Inversion of write_token.
67
- #
68
- # TODO Test this!
69
- def read_token token
70
- type, text = token.split("\t", 2)
71
- if type[0] == ?:
72
- [text.to_sym, type[1..-1].to_sym]
73
- else
74
- [type.to_sym, unescape(text)]
75
- end
76
- end
77
-
78
- # Escapes a string for use in write_token.
79
- def escape text
80
- text.gsub(/[\n\\]/, '\\\\\&')
81
- end
82
-
83
- # Unescapes a string created by escape.
84
- def unescape text
85
- text.gsub(/\\[\n\\]/) { |m| m[1,1] }
86
- end
87
-
88
- end
89
-
90
- # Whether the object is a TokenStream.
91
- #
92
- # Returns false.
93
- def stream?
94
- false
95
- end
96
-
97
- # Iterates over all tokens.
98
- #
99
- # If a filter is given, only tokens of that kind are yielded.
100
- def each kind_filter = nil, &block
101
- unless kind_filter
102
- super(&block)
103
- else
104
- super() do |text, kind|
105
- next unless kind == kind_filter
106
- yield text, kind
107
- end
108
- end
109
- end
110
-
111
- # Iterates over all text tokens.
112
- # Range tokens like [:open, :string] are left out.
113
- #
114
- # Example:
115
- # tokens.each_text_token { |text, kind| text.replace html_escape(text) }
116
- def each_text_token
117
- each do |text, kind|
118
- next unless text.respond_to? :to_str
119
- yield text, kind
120
- end
121
- end
122
-
123
- # Encode the tokens using encoder.
124
- #
125
- # encoder can be
126
- # * a symbol like :html oder :statistic
127
- # * an Encoder class
128
- # * an Encoder object
129
- #
130
- # options are passed to the encoder.
131
- def encode encoder, options = {}
132
- unless encoder.is_a? Encoders::Encoder
133
- unless encoder.is_a? Class
134
- encoder_class = Encoders[encoder]
135
- end
136
- encoder = encoder_class.new options
137
- end
138
- encoder.encode_tokens self, options
139
- end
140
-
141
-
142
- # Turn into a string using Encoders::Text.
143
- #
144
- # +options+ are passed to the encoder if given.
145
- def to_s options = {}
146
- encode :text, options
147
- end
148
-
149
-
150
- # Redirects unknown methods to encoder calls.
151
- #
152
- # For example, if you call +tokens.html+, the HTML encoder
153
- # is used to highlight the tokens.
154
- def method_missing meth, options = {}
155
- Encoders[meth].new(options).encode_tokens self
156
- end
157
-
158
- # Returns the tokens compressed by joining consecutive
159
- # tokens of the same kind.
160
- #
161
- # This can not be undone, but should yield the same output
162
- # in most Encoders. It basically makes the output smaller.
163
- #
164
- # Combined with dump, it saves space for the cost of time.
165
- #
166
- # If the scanner is written carefully, this is not required -
167
- # for example, consecutive //-comment lines could already be
168
- # joined in one comment token by the Scanner.
169
- def optimize
170
- print ' Tokens#optimize: before: %d - ' % size if $DEBUG
171
- last_kind = last_text = nil
172
- new = self.class.new
173
- each do |text, kind|
174
- if text.is_a? String
175
- if kind == last_kind
176
- last_text << text
177
- else
178
- new << [last_text, last_kind] if last_kind
179
- last_text = text
180
- last_kind = kind
181
- end
182
- else
183
- new << [last_text, last_kind] if last_kind
184
- last_kind = last_text = nil
185
- new << [text, kind]
186
- end
187
- end
188
- new << [last_text, last_kind] if last_kind
189
- print 'after: %d (%d saved = %2.0f%%)' %
190
- [new.size, size - new.size, 1.0 - (new.size.to_f / size)] if $DEBUG
191
- new
192
- end
193
-
194
- # Compact the object itself; see optimize.
195
- def optimize!
196
- replace optimize
197
- end
198
-
199
- # Dumps the object into a String that can be saved
200
- # in files or databases.
201
- #
202
- # The dump is created with Marshal.dump;
203
- # In addition, it is gzipped using GZip.gzip.
204
- #
205
- # The returned String object includes Undumping
206
- # so it has an #undump method. See Tokens.load.
207
- #
208
- # You can configure the level of compression,
209
- # but the default value 7 should be what you want
210
- # in most cases as it is a good comprimise between
211
- # speed and compression rate.
212
- #
213
- # See GZip module.
214
- def dump gzip_level = 7
215
- require 'coderay/helpers/gzip_simple'
216
- dump = Marshal.dump self
217
- dump = dump.gzip gzip_level
218
- dump.extend Undumping
219
- end
220
-
221
- # The total size of the tokens.
222
- # Should be equal to the input size before
223
- # scanning.
224
- def text_size
225
- map { |t, k| t }.join.size
226
- end
227
-
228
- # Include this module to give an object an #undump
229
- # method.
230
- #
231
- # The string returned by Tokens.dump includes Undumping.
232
- module Undumping
233
- # Calls Tokens.load with itself.
234
- def undump
235
- Tokens.load self
236
- end
237
- end
238
-
239
- # Undump the object using Marshal.load, then
240
- # unzip it using GZip.gunzip.
241
- #
242
- # The result is commonly a Tokens object, but
243
- # this is not guaranteed.
244
- def Tokens.load dump
245
- require 'coderay/helpers/gzip_simple'
246
- dump = dump.gunzip
247
- @dump = Marshal.load dump
248
- end
249
-
250
- end
251
-
252
-
253
- # = TokenStream
254
- #
255
- # The TokenStream class is a fake Array without elements.
256
- #
257
- # It redirects the method << to a block given at creation.
258
- #
259
- # This allows scanners and Encoders to use streaming (no
260
- # tokens are saved, the input is highlighted the same time it
261
- # is scanned) with the same code.
262
- #
263
- # See CodeRay.encode_stream and CodeRay.scan_stream
264
- class TokenStream < Tokens
265
-
266
- # Whether the object is a TokenStream.
267
- #
268
- # Returns true.
269
- def stream?
270
- true
271
- end
272
-
273
- # The Array is empty, but size counts the tokens given by <<.
274
- attr_reader :size
275
-
276
- # Creates a new TokenStream that calls +block+ whenever
277
- # its << method is called.
278
- #
279
- # Example:
280
- #
281
- # require 'coderay'
282
- #
283
- # token_stream = CodeRay::TokenStream.new do |kind, text|
284
- # puts 'kind: %s, text size: %d.' % [kind, text.size]
285
- # end
286
- #
287
- # token_stream << [:regexp, '/\d+/']
288
- # #-> kind: rexpexp, text size: 5.
289
- #
290
- def initialize &block
291
- raise ArgumentError, 'Block expected for streaming.' unless block
292
- @callback = block
293
- @size = 0
294
- end
295
-
296
- # Calls +block+ with +token+ and increments size.
297
- #
298
- # Returns self.
299
- def << token
300
- @callback.call token
301
- @size += 1
302
- self
303
- end
304
-
305
- # This method is not implemented due to speed reasons. Use Tokens.
306
- def text_size
307
- raise NotImplementedError, 'This method is not implemented due to speed reasons.'
308
- end
309
-
310
- # A TokenStream cannot be dumped. Use Tokens.
311
- def dump
312
- raise NotImplementedError, 'A TokenStream cannot be dumped.'
313
- end
314
-
315
- # A TokenStream cannot be optimized. Use Tokens.
316
- def optimize
317
- raise NotImplementedError, 'A TokenStream cannot be optimized.'
318
- end
319
-
320
- end
321
-
322
- end
1
+ module CodeRay
2
+
3
+ # = Tokens
4
+ #
5
+ # The Tokens class represents a list of tokens returnd from
6
+ # a Scanner.
7
+ #
8
+ # A token is not a special object, just a two-element Array
9
+ # consisting of
10
+ # * the _token_ _kind_ (a Symbol representing the type of the token)
11
+ # * the _token_ _text_ (the original source of the token in a String)
12
+ #
13
+ # A token looks like this:
14
+ #
15
+ # [:comment, '# It looks like this']
16
+ # [:float, '3.1415926']
17
+ # [:error, '���']
18
+ #
19
+ # Some scanners also yield some kind of sub-tokens, represented by special
20
+ # token texts, namely :open and :close .
21
+ #
22
+ # The Ruby scanner, for example, splits "a string" into:
23
+ #
24
+ # [
25
+ # [:open, :string],
26
+ # [:delimiter, '"'],
27
+ # [:content, 'a string'],
28
+ # [:delimiter, '"'],
29
+ # [:close, :string]
30
+ # ]
31
+ #
32
+ # Tokens is also the interface between Scanners and Encoders:
33
+ # The input is split and saved into a Tokens object. The Encoder
34
+ # then builds the output from this object.
35
+ #
36
+ # Thus, the syntax below becomes clear:
37
+ #
38
+ # CodeRay.scan('price = 2.59', :ruby).html
39
+ # # the Tokens object is here -------^
40
+ #
41
+ # See how small it is? ;)
42
+ #
43
+ # Tokens gives you the power to handle pre-scanned code very easily:
44
+ # You can convert it to a webpage, a YAML file, or dump it into a gzip'ed string
45
+ # that you put in your DB.
46
+ #
47
+ # Tokens' subclass TokenStream allows streaming to save memory.
48
+ class Tokens < Array
49
+
50
+ class << self
51
+
52
+ # Convert the token to a string.
53
+ #
54
+ # This format is used by Encoders.Tokens.
55
+ # It can be reverted using read_token.
56
+ def write_token text, type
57
+ if text.is_a? String
58
+ "#{type}\t#{escape(text)}\n"
59
+ else
60
+ ":#{text}\t#{type}\t\n"
61
+ end
62
+ end
63
+
64
+ # Read a token from the string.
65
+ #
66
+ # Inversion of write_token.
67
+ #
68
+ # TODO Test this!
69
+ def read_token token
70
+ type, text = token.split("\t", 2)
71
+ if type[0] == ?:
72
+ [text.to_sym, type[1..-1].to_sym]
73
+ else
74
+ [type.to_sym, unescape(text)]
75
+ end
76
+ end
77
+
78
+ # Escapes a string for use in write_token.
79
+ def escape text
80
+ text.gsub(/[\n\\]/, '\\\\\&')
81
+ end
82
+
83
+ # Unescapes a string created by escape.
84
+ def unescape text
85
+ text.gsub(/\\[\n\\]/) { |m| m[1,1] }
86
+ end
87
+
88
+ end
89
+
90
+ # Whether the object is a TokenStream.
91
+ #
92
+ # Returns false.
93
+ def stream?
94
+ false
95
+ end
96
+
97
+ # Iterates over all tokens.
98
+ #
99
+ # If a filter is given, only tokens of that kind are yielded.
100
+ def each kind_filter = nil, &block
101
+ unless kind_filter
102
+ super(&block)
103
+ else
104
+ super() do |text, kind|
105
+ next unless kind == kind_filter
106
+ yield text, kind
107
+ end
108
+ end
109
+ end
110
+
111
+ # Iterates over all text tokens.
112
+ # Range tokens like [:open, :string] are left out.
113
+ #
114
+ # Example:
115
+ # tokens.each_text_token { |text, kind| text.replace html_escape(text) }
116
+ def each_text_token
117
+ each do |text, kind|
118
+ next unless text.respond_to? :to_str
119
+ yield text, kind
120
+ end
121
+ end
122
+
123
+ # Encode the tokens using encoder.
124
+ #
125
+ # encoder can be
126
+ # * a symbol like :html oder :statistic
127
+ # * an Encoder class
128
+ # * an Encoder object
129
+ #
130
+ # options are passed to the encoder.
131
+ def encode encoder, options = {}
132
+ unless encoder.is_a? Encoders::Encoder
133
+ unless encoder.is_a? Class
134
+ encoder_class = Encoders[encoder]
135
+ end
136
+ encoder = encoder_class.new options
137
+ end
138
+ encoder.encode_tokens self, options
139
+ end
140
+
141
+
142
+ # Turn into a string using Encoders::Text.
143
+ #
144
+ # +options+ are passed to the encoder if given.
145
+ def to_s options = {}
146
+ encode :text, options
147
+ end
148
+
149
+
150
+ # Redirects unknown methods to encoder calls.
151
+ #
152
+ # For example, if you call +tokens.html+, the HTML encoder
153
+ # is used to highlight the tokens.
154
+ def method_missing meth, options = {}
155
+ Encoders[meth].new(options).encode_tokens self
156
+ end
157
+
158
+ # Returns the tokens compressed by joining consecutive
159
+ # tokens of the same kind.
160
+ #
161
+ # This can not be undone, but should yield the same output
162
+ # in most Encoders. It basically makes the output smaller.
163
+ #
164
+ # Combined with dump, it saves space for the cost of time.
165
+ #
166
+ # If the scanner is written carefully, this is not required -
167
+ # for example, consecutive //-comment lines could already be
168
+ # joined in one comment token by the Scanner.
169
+ def optimize
170
+ print ' Tokens#optimize: before: %d - ' % size if $DEBUG
171
+ last_kind = last_text = nil
172
+ new = self.class.new
173
+ each do |text, kind|
174
+ if text.is_a? String
175
+ if kind == last_kind
176
+ last_text << text
177
+ else
178
+ new << [last_text, last_kind] if last_kind
179
+ last_text = text
180
+ last_kind = kind
181
+ end
182
+ else
183
+ new << [last_text, last_kind] if last_kind
184
+ last_kind = last_text = nil
185
+ new << [text, kind]
186
+ end
187
+ end
188
+ new << [last_text, last_kind] if last_kind
189
+ print 'after: %d (%d saved = %2.0f%%)' %
190
+ [new.size, size - new.size, 1.0 - (new.size.to_f / size)] if $DEBUG
191
+ new
192
+ end
193
+
194
+ # Compact the object itself; see optimize.
195
+ def optimize!
196
+ replace optimize
197
+ end
198
+
199
+ # Dumps the object into a String that can be saved
200
+ # in files or databases.
201
+ #
202
+ # The dump is created with Marshal.dump;
203
+ # In addition, it is gzipped using GZip.gzip.
204
+ #
205
+ # The returned String object includes Undumping
206
+ # so it has an #undump method. See Tokens.load.
207
+ #
208
+ # You can configure the level of compression,
209
+ # but the default value 7 should be what you want
210
+ # in most cases as it is a good comprimise between
211
+ # speed and compression rate.
212
+ #
213
+ # See GZip module.
214
+ def dump gzip_level = 7
215
+ require 'coderay/helpers/gzip_simple'
216
+ dump = Marshal.dump self
217
+ dump = dump.gzip gzip_level
218
+ dump.extend Undumping
219
+ end
220
+
221
+ # The total size of the tokens.
222
+ # Should be equal to the input size before
223
+ # scanning.
224
+ def text_size
225
+ map { |t, k| t }.join.size
226
+ end
227
+
228
+ # Include this module to give an object an #undump
229
+ # method.
230
+ #
231
+ # The string returned by Tokens.dump includes Undumping.
232
+ module Undumping
233
+ # Calls Tokens.load with itself.
234
+ def undump
235
+ Tokens.load self
236
+ end
237
+ end
238
+
239
+ # Undump the object using Marshal.load, then
240
+ # unzip it using GZip.gunzip.
241
+ #
242
+ # The result is commonly a Tokens object, but
243
+ # this is not guaranteed.
244
+ def Tokens.load dump
245
+ require 'coderay/helpers/gzip_simple'
246
+ dump = dump.gunzip
247
+ @dump = Marshal.load dump
248
+ end
249
+
250
+ end
251
+
252
+
253
+ # = TokenStream
254
+ #
255
+ # The TokenStream class is a fake Array without elements.
256
+ #
257
+ # It redirects the method << to a block given at creation.
258
+ #
259
+ # This allows scanners and Encoders to use streaming (no
260
+ # tokens are saved, the input is highlighted the same time it
261
+ # is scanned) with the same code.
262
+ #
263
+ # See CodeRay.encode_stream and CodeRay.scan_stream
264
+ class TokenStream < Tokens
265
+
266
+ # Whether the object is a TokenStream.
267
+ #
268
+ # Returns true.
269
+ def stream?
270
+ true
271
+ end
272
+
273
+ # The Array is empty, but size counts the tokens given by <<.
274
+ attr_reader :size
275
+
276
+ # Creates a new TokenStream that calls +block+ whenever
277
+ # its << method is called.
278
+ #
279
+ # Example:
280
+ #
281
+ # require 'coderay'
282
+ #
283
+ # token_stream = CodeRay::TokenStream.new do |kind, text|
284
+ # puts 'kind: %s, text size: %d.' % [kind, text.size]
285
+ # end
286
+ #
287
+ # token_stream << [:regexp, '/\d+/']
288
+ # #-> kind: rexpexp, text size: 5.
289
+ #
290
+ def initialize &block
291
+ raise ArgumentError, 'Block expected for streaming.' unless block
292
+ @callback = block
293
+ @size = 0
294
+ end
295
+
296
+ # Calls +block+ with +token+ and increments size.
297
+ #
298
+ # Returns self.
299
+ def << token
300
+ @callback.call token
301
+ @size += 1
302
+ self
303
+ end
304
+
305
+ # This method is not implemented due to speed reasons. Use Tokens.
306
+ def text_size
307
+ raise NotImplementedError, 'This method is not implemented due to speed reasons.'
308
+ end
309
+
310
+ # A TokenStream cannot be dumped. Use Tokens.
311
+ def dump
312
+ raise NotImplementedError, 'A TokenStream cannot be dumped.'
313
+ end
314
+
315
+ # A TokenStream cannot be optimized. Use Tokens.
316
+ def optimize
317
+ raise NotImplementedError, 'A TokenStream cannot be optimized.'
318
+ end
319
+
320
+ end
321
+
322
+ end