coderay 1.0.0 → 1.0.0.598.pre

Sign up to get free protection for your applications and to get access to all the features.
Files changed (79) hide show
  1. data/FOLDERS +49 -0
  2. data/Rakefile +6 -5
  3. data/bin/coderay +74 -190
  4. data/bin/coderay_stylesheet +4 -0
  5. data/{README_INDEX.rdoc → lib/README} +20 -10
  6. data/lib/coderay.rb +60 -62
  7. data/lib/coderay/duo.rb +55 -2
  8. data/lib/coderay/encoder.rb +39 -52
  9. data/lib/coderay/encoders/_map.rb +7 -11
  10. data/lib/coderay/encoders/comment_filter.rb +61 -0
  11. data/lib/coderay/encoders/count.rb +26 -11
  12. data/lib/coderay/encoders/debug.rb +60 -11
  13. data/lib/coderay/encoders/div.rb +8 -9
  14. data/lib/coderay/encoders/filter.rb +52 -12
  15. data/lib/coderay/encoders/html.rb +113 -106
  16. data/lib/coderay/encoders/html/css.rb +7 -2
  17. data/lib/coderay/encoders/html/numbering.rb +27 -24
  18. data/lib/coderay/encoders/html/output.rb +58 -15
  19. data/lib/coderay/encoders/json.rb +44 -37
  20. data/lib/coderay/encoders/lines_of_code.rb +56 -9
  21. data/lib/coderay/encoders/null.rb +13 -6
  22. data/lib/coderay/encoders/page.rb +8 -8
  23. data/lib/coderay/encoders/span.rb +9 -10
  24. data/lib/coderay/encoders/statistic.rb +114 -51
  25. data/lib/coderay/encoders/terminal.rb +10 -7
  26. data/lib/coderay/encoders/text.rb +36 -17
  27. data/lib/coderay/encoders/token_kind_filter.rb +58 -1
  28. data/lib/coderay/encoders/xml.rb +11 -13
  29. data/lib/coderay/encoders/yaml.rb +14 -16
  30. data/lib/coderay/for_redcloth.rb +1 -1
  31. data/lib/coderay/helpers/file_type.rb +240 -125
  32. data/lib/coderay/helpers/gzip_simple.rb +123 -0
  33. data/lib/coderay/helpers/plugin.rb +307 -241
  34. data/lib/coderay/helpers/word_list.rb +126 -65
  35. data/lib/coderay/scanner.rb +103 -153
  36. data/lib/coderay/scanners/_map.rb +16 -18
  37. data/lib/coderay/scanners/c.rb +13 -13
  38. data/lib/coderay/scanners/cpp.rb +6 -6
  39. data/lib/coderay/scanners/css.rb +48 -47
  40. data/lib/coderay/scanners/debug.rb +55 -9
  41. data/lib/coderay/scanners/delphi.rb +4 -4
  42. data/lib/coderay/scanners/diff.rb +25 -43
  43. data/lib/coderay/scanners/groovy.rb +2 -2
  44. data/lib/coderay/scanners/html.rb +30 -107
  45. data/lib/coderay/scanners/java.rb +5 -6
  46. data/lib/coderay/scanners/java/builtin_types.rb +0 -2
  47. data/lib/coderay/scanners/java_script.rb +6 -6
  48. data/lib/coderay/scanners/json.rb +6 -7
  49. data/lib/coderay/scanners/nitro_xhtml.rb +136 -0
  50. data/lib/coderay/scanners/php.rb +12 -13
  51. data/lib/coderay/scanners/plaintext.rb +26 -0
  52. data/lib/coderay/scanners/python.rb +4 -4
  53. data/lib/coderay/scanners/{erb.rb → rhtml.rb} +11 -19
  54. data/lib/coderay/scanners/ruby.rb +208 -219
  55. data/lib/coderay/scanners/ruby/patterns.rb +85 -18
  56. data/lib/coderay/scanners/scheme.rb +136 -0
  57. data/lib/coderay/scanners/sql.rb +22 -29
  58. data/lib/coderay/scanners/yaml.rb +10 -11
  59. data/lib/coderay/styles/_map.rb +2 -2
  60. data/lib/coderay/styles/alpha.rb +104 -102
  61. data/lib/coderay/styles/cycnus.rb +143 -0
  62. data/lib/coderay/styles/murphy.rb +123 -0
  63. data/lib/coderay/token_kinds.rb +86 -87
  64. data/lib/coderay/tokens.rb +169 -26
  65. data/test/functional/basic.rb +14 -200
  66. data/test/functional/examples.rb +14 -20
  67. data/test/functional/for_redcloth.rb +8 -15
  68. data/test/functional/load_plugin_scanner.rb +11 -0
  69. data/test/functional/suite.rb +6 -9
  70. data/test/functional/vhdl.rb +126 -0
  71. data/test/functional/word_list.rb +79 -0
  72. metadata +129 -107
  73. data/lib/coderay/helpers/gzip.rb +0 -41
  74. data/lib/coderay/scanners/clojure.rb +0 -217
  75. data/lib/coderay/scanners/haml.rb +0 -168
  76. data/lib/coderay/scanners/ruby/string_state.rb +0 -71
  77. data/lib/coderay/scanners/text.rb +0 -26
  78. data/lib/coderay/tokens_proxy.rb +0 -55
  79. data/lib/coderay/version.rb +0 -3
@@ -1,77 +1,138 @@
1
1
  module CodeRay
2
-
3
- # = WordList
2
+
3
+ # = WordList
4
+ #
5
+ # <b>A Hash subclass designed for mapping word lists to token types.</b>
6
+ #
7
+ # Copyright (c) 2006 by murphy (Kornelius Kalnbach) <murphy rubychan de>
8
+ #
9
+ # License:: LGPL / ask the author
10
+ # Version:: 1.1 (2006-Oct-19)
11
+ #
12
+ # A WordList is a Hash with some additional features.
13
+ # It is intended to be used for keyword recognition.
14
+ #
15
+ # WordList is highly optimized to be used in Scanners,
16
+ # typically to decide whether a given ident is a special token.
17
+ #
18
+ # For case insensitive words use CaseIgnoringWordList.
19
+ #
20
+ # Example:
21
+ #
22
+ # # define word arrays
23
+ # RESERVED_WORDS = %w[
24
+ # asm break case continue default do else
25
+ # ...
26
+ # ]
27
+ #
28
+ # PREDEFINED_TYPES = %w[
29
+ # int long short char void
30
+ # ...
31
+ # ]
32
+ #
33
+ # PREDEFINED_CONSTANTS = %w[
34
+ # EOF NULL ...
35
+ # ]
36
+ #
37
+ # # make a WordList
38
+ # IDENT_KIND = WordList.new(:ident).
39
+ # add(RESERVED_WORDS, :reserved).
40
+ # add(PREDEFINED_TYPES, :pre_type).
41
+ # add(PREDEFINED_CONSTANTS, :pre_constant)
42
+ #
43
+ # ...
44
+ #
45
+ # def scan_tokens tokens, options
46
+ # ...
47
+ #
48
+ # elsif scan(/[A-Za-z_][A-Za-z_0-9]*/)
49
+ # # use it
50
+ # kind = IDENT_KIND[match]
51
+ # ...
52
+ class WordList < Hash
53
+
54
+ # Creates a new WordList with +default+ as default value.
4
55
  #
5
- # <b>A Hash subclass designed for mapping word lists to token types.</b>
56
+ # You can activate +caching+ to store the results for every [] request.
6
57
  #
7
- # Copyright (c) 2006-2011 by murphy (Kornelius Kalnbach) <murphy rubychan de>
8
- #
9
- # License:: LGPL / ask the author
10
- # Version:: 2.0 (2011-05-08)
11
- #
12
- # A WordList is a Hash with some additional features.
13
- # It is intended to be used for keyword recognition.
14
- #
15
- # WordList is optimized to be used in Scanners,
16
- # typically to decide whether a given ident is a special token.
17
- #
18
- # For case insensitive words use WordList::CaseIgnoring.
19
- #
20
- # Example:
21
- #
22
- # # define word arrays
23
- # RESERVED_WORDS = %w[
24
- # asm break case continue default do else
25
- # ]
26
- #
27
- # PREDEFINED_TYPES = %w[
28
- # int long short char void
29
- # ]
30
- #
31
- # # make a WordList
32
- # IDENT_KIND = WordList.new(:ident).
33
- # add(RESERVED_WORDS, :reserved).
34
- # add(PREDEFINED_TYPES, :predefined_type)
35
- #
36
- # ...
37
- #
38
- # def scan_tokens tokens, options
39
- # ...
40
- #
41
- # elsif scan(/[A-Za-z_][A-Za-z_0-9]*/)
42
- # # use it
43
- # kind = IDENT_KIND[match]
44
- # ...
45
- class WordList < Hash
46
-
47
- # Create a new WordList with +default+ as default value.
48
- def initialize default = false
49
- super default
58
+ # With caching, methods like +include?+ or +delete+ may no longer behave
59
+ # as you expect. Therefore, it is recommended to use the [] method only.
60
+ def initialize default = false, caching = false, &block
61
+ if block
62
+ raise ArgumentError, 'Can\'t combine block with caching.' if caching
63
+ super(&block)
64
+ else
65
+ if caching
66
+ super() do |h, k|
67
+ h[k] = h.fetch k, default
68
+ end
69
+ else
70
+ super default
71
+ end
50
72
  end
51
-
52
- # Add words to the list and associate them with +value+.
53
- #
54
- # Returns +self+, so you can concat add calls.
55
- def add words, value = true
56
- words.each { |word| self[word] = value }
57
- self
73
+ end
74
+
75
+ # Add words to the list and associate them with +kind+.
76
+ #
77
+ # Returns +self+, so you can concat add calls.
78
+ def add words, kind = true
79
+ words.each do |word|
80
+ self[word] = kind
81
+ end
82
+ self
83
+ end
84
+
85
+ end
86
+
87
+
88
+ # A CaseIgnoringWordList is like a WordList, only that
89
+ # keys are compared case-insensitively.
90
+ #
91
+ # Ignoring the text case is realized by sending the +downcase+ message to
92
+ # all keys.
93
+ #
94
+ # Caching usually makes a CaseIgnoringWordList faster, but it has to be
95
+ # activated explicitely.
96
+ class CaseIgnoringWordList < WordList
97
+
98
+ # Creates a new case-insensitive WordList with +default+ as default value.
99
+ #
100
+ # You can activate caching to store the results for every [] request.
101
+ # This speeds up subsequent lookups for the same word, but also
102
+ # uses memory.
103
+ def initialize default = false, caching = false
104
+ if caching
105
+ super(default, false) do |h, k|
106
+ h[k] = h.fetch k.downcase, default
107
+ end
108
+ else
109
+ super(default, false)
110
+ extend Uncached
58
111
  end
59
-
60
112
  end
61
113
 
62
-
63
- # A CaseIgnoring WordList is like a WordList, only that
64
- # keys are compared case-insensitively (normalizing keys using +downcase+).
65
- class WordList::CaseIgnoring < WordList
66
-
114
+ module Uncached # :nodoc:
67
115
  def [] key
68
- super key.downcase
116
+ super(key.downcase)
69
117
  end
70
-
71
- def []= key, value
72
- super key.downcase, value
118
+ end
119
+
120
+ # Add +words+ to the list and associate them with +kind+.
121
+ def add words, kind = true
122
+ words.each do |word|
123
+ self[word.downcase] = kind
73
124
  end
74
-
125
+ self
75
126
  end
76
-
127
+
128
+ end
129
+
77
130
  end
131
+
132
+ __END__
133
+ # check memory consumption
134
+ END {
135
+ ObjectSpace.each_object(CodeRay::CaseIgnoringWordList) do |wl|
136
+ p wl.inject(0) { |memo, key, value| memo + key.size + 24 }
137
+ end
138
+ }
@@ -1,10 +1,7 @@
1
- # encoding: utf-8
2
- require 'strscan'
3
-
4
1
  module CodeRay
5
2
 
6
- autoload :WordList, 'coderay/helpers/word_list'
7
-
3
+ require 'coderay/helpers/plugin'
4
+
8
5
  # = Scanners
9
6
  #
10
7
  # This module holds the Scanner class and its subclasses.
@@ -19,8 +16,9 @@ module CodeRay
19
16
  module Scanners
20
17
  extend PluginHost
21
18
  plugin_path File.dirname(__FILE__), 'scanners'
22
-
23
-
19
+
20
+ require 'strscan'
21
+
24
22
  # = Scanner
25
23
  #
26
24
  # The base class for all Scanners.
@@ -48,89 +46,61 @@ module CodeRay
48
46
 
49
47
  extend Plugin
50
48
  plugin_host Scanners
51
-
49
+
52
50
  # Raised if a Scanner fails while scanning
53
- ScanError = Class.new StandardError
54
-
51
+ ScanError = Class.new(Exception)
52
+
53
+ require 'coderay/helpers/word_list'
54
+
55
55
  # The default options for all scanner classes.
56
56
  #
57
57
  # Define @default_options for subclasses.
58
58
  DEFAULT_OPTIONS = { }
59
59
 
60
60
  KINDS_NOT_LOC = [:comment, :doctype, :docstring]
61
-
62
- attr_accessor :state
63
-
61
+
64
62
  class << self
65
-
66
- # Normalizes the given code into a string with UNIX newlines, in the
67
- # scanner's internal encoding, with invalid and undefined charachters
68
- # replaced by placeholders. Always returns a new object.
69
- def normalize code
70
- # original = code
71
- code = code.to_s unless code.is_a? ::String
72
- return code if code.empty?
73
-
74
- if code.respond_to? :encoding
75
- code = encode_with_encoding code, self.encoding
76
- else
77
- code = to_unix code
63
+
64
+ def normify code
65
+ code = code.to_s.dup
66
+ # try using UTF-8
67
+ if code.respond_to? :force_encoding
68
+ debug, $DEBUG = $DEBUG, false
69
+ begin
70
+ code.force_encoding 'UTF-8'
71
+ code[/\z/] # raises an ArgumentError when code contains a non-UTF-8 char
72
+ rescue ArgumentError
73
+ code.force_encoding 'binary'
74
+ ensure
75
+ $DEBUG = debug
76
+ end
78
77
  end
79
- # code = code.dup if code.eql? original
78
+ # convert the string to UNIX newline format
79
+ code.gsub!(/\r\n?/, "\n") if code.index ?\r
80
80
  code
81
81
  end
82
82
 
83
- # The typical filename suffix for this scanner's language.
84
- def file_extension extension = lang
85
- @file_extension ||= extension.to_s
86
- end
87
-
88
- # The encoding used internally by this scanner.
89
- def encoding name = 'UTF-8'
90
- @encoding ||= defined?(Encoding.find) && Encoding.find(name)
91
- end
92
-
93
- # The lang of this Scanner class, which is equal to its Plugin ID.
94
- def lang
95
- @plugin_id
96
- end
97
-
98
- protected
99
-
100
- def encode_with_encoding code, target_encoding
101
- if code.encoding == target_encoding
102
- if code.valid_encoding?
103
- return to_unix(code)
104
- else
105
- source_encoding = guess_encoding code
106
- end
83
+ def file_extension extension = nil
84
+ if extension
85
+ @file_extension = extension.to_s
107
86
  else
108
- source_encoding = code.encoding
87
+ @file_extension ||= plugin_id.to_s
109
88
  end
110
- # print "encode_with_encoding from #{source_encoding} to #{target_encoding}"
111
- code.encode target_encoding, source_encoding, :universal_newline => true, :undef => :replace, :invalid => :replace
112
- end
113
-
114
- def to_unix code
115
- code.index(?\r) ? code.gsub(/\r\n?/, "\n") : code
116
89
  end
117
-
118
- def guess_encoding s
119
- #:nocov:
120
- IO.popen("file -b --mime -", "w+") do |file|
121
- file.write s[0, 1024]
122
- file.close_write
123
- begin
124
- Encoding.find file.gets[/charset=([-\w]+)/, 1]
125
- rescue ArgumentError
126
- Encoding::BINARY
127
- end
128
- end
129
- #:nocov:
130
- end
131
-
90
+
132
91
  end
133
-
92
+
93
+ =begin
94
+ ## Excluded for speed reasons; protected seems to make methods slow.
95
+
96
+ # Save the StringScanner methods from being called.
97
+ # This would not be useful for highlighting.
98
+ strscan_public_methods =
99
+ StringScanner.instance_methods -
100
+ StringScanner.ancestors[1].instance_methods
101
+ protected(*strscan_public_methods)
102
+ =end
103
+
134
104
  # Create a new Scanner.
135
105
  #
136
106
  # * +code+ is the input String and is handled by the superclass
@@ -140,66 +110,58 @@ module CodeRay
140
110
  # overwrite default options here.)
141
111
  #
142
112
  # Else, a Tokens object is used.
143
- def initialize code = '', options = {}
144
- if self.class == Scanner
145
- raise NotImplementedError, "I am only the basic Scanner class. I can't scan anything. :( Use my subclasses."
146
- end
113
+ def initialize code='', options = {}
114
+ raise "I am only the basic Scanner class. I can't scan "\
115
+ "anything. :( Use my subclasses." if self.class == Scanner
147
116
 
148
117
  @options = self.class::DEFAULT_OPTIONS.merge options
149
-
150
- super self.class.normalize(code)
151
-
118
+
119
+ super Scanner.normify(code)
120
+
152
121
  @tokens = options[:tokens] || Tokens.new
153
122
  @tokens.scanner = self if @tokens.respond_to? :scanner=
154
-
123
+
155
124
  setup
156
125
  end
157
126
 
158
- # Sets back the scanner. Subclasses should redefine the reset_instance
159
- # method instead of this one.
127
+ # Sets back the scanner. Subclasses are to define the reset_instance
128
+ # method.
160
129
  def reset
161
130
  super
162
131
  reset_instance
163
132
  end
164
-
165
- # Set a new string to be scanned.
133
+
166
134
  def string= code
167
- code = self.class.normalize(code)
135
+ code = Scanner.normify(code)
168
136
  super code
169
137
  reset_instance
170
138
  end
171
-
172
- # the Plugin ID for this scanner
139
+
140
+ # More mnemonic accessor name for the input string.
141
+ alias code string
142
+ alias code= string=
143
+
144
+ # Returns the Plugin ID for this scanner.
173
145
  def lang
174
- self.class.lang
175
- end
176
-
177
- # the default file extension for this scanner
178
- def file_extension
179
- self.class.file_extension
146
+ self.class.plugin_id.to_s
180
147
  end
181
-
182
- # Scan the code and returns all tokens in a Tokens object.
148
+
149
+ # Scans the code and returns all tokens in a Tokens object.
183
150
  def tokenize source = nil, options = {}
184
151
  options = @options.merge(options)
185
152
  @tokens = options[:tokens] || @tokens || Tokens.new
186
153
  @tokens.scanner = self if @tokens.respond_to? :scanner=
187
154
  case source
155
+ when String
156
+ self.string = source
188
157
  when Array
189
- self.string = self.class.normalize(source.join)
158
+ self.string = source.join
190
159
  when nil
191
160
  reset
192
161
  else
193
- self.string = self.class.normalize(source)
194
- end
195
-
196
- begin
197
- scan_tokens @tokens, options
198
- rescue => e
199
- message = "Error in %s#scan_tokens, initial state was: %p" % [self.class, defined?(state) && state]
200
- raise_inspect e.message, @tokens, message, 30, e.backtrace
162
+ raise ArgumentError, 'expected String, Array, or nil'
201
163
  end
202
-
164
+ scan_tokens @tokens, options
203
165
  @cached_tokens = @tokens
204
166
  if source.is_a? Array
205
167
  @tokens.split_into_parts(*source.map { |part| part.size })
@@ -208,51 +170,46 @@ module CodeRay
208
170
  end
209
171
  end
210
172
 
211
- # Cache the result of tokenize.
173
+ # Caches the result of tokenize.
212
174
  def tokens
213
175
  @cached_tokens ||= tokenize
214
176
  end
215
177
 
216
- # Traverse the tokens.
178
+ # Traverses the tokens.
217
179
  def each &block
218
180
  tokens.each(&block)
219
181
  end
220
182
  include Enumerable
221
-
222
- # The current line position of the scanner, starting with 1.
223
- # See also: #column.
183
+
184
+ # The current line position of the scanner.
224
185
  #
225
186
  # Beware, this is implemented inefficiently. It should be used
226
187
  # for debugging only.
227
- def line pos = self.pos
228
- return 1 if pos <= 0
229
- binary_string[0...pos].count("\n") + 1
188
+ def line
189
+ string[0..pos].count("\n") + 1
230
190
  end
231
191
 
232
- # The current column position of the scanner, starting with 1.
233
- # See also: #line.
192
+ # The current column position of the scanner. See #line.
234
193
  def column pos = self.pos
235
- return 1 if pos <= 0
236
- pos - (binary_string.rindex(?\n, pos - 1) || -1)
194
+ return 0 if pos <= 0
195
+ string = string()
196
+ if string.respond_to?(:bytesize) && (defined?(@bin_string) || string.bytesize != string.size)
197
+ @bin_string ||= string.dup.force_encoding('binary')
198
+ string = @bin_string
199
+ end
200
+ pos - (string.rindex(?\n, pos) || 0)
237
201
  end
238
202
 
239
- # The string in binary encoding.
240
- #
241
- # To be used with #pos, which is the index of the byte the scanner
242
- # will scan next.
243
- def binary_string
244
- @binary_string ||=
245
- if string.respond_to?(:bytesize) && string.bytesize != string.size
246
- #:nocov:
247
- string.dup.force_encoding('binary')
248
- #:nocov:
249
- else
250
- string
251
- end
203
+ def marshal_dump # :nodoc:
204
+ @options
252
205
  end
253
206
 
207
+ def marshal_load options # :nodoc:
208
+ @options = options
209
+ end
210
+
254
211
  protected
255
-
212
+
256
213
  # Can be implemented by subclasses to do some initialization
257
214
  # that has to be done once per instance.
258
215
  #
@@ -260,25 +217,26 @@ module CodeRay
260
217
  # scan.
261
218
  def setup # :doc:
262
219
  end
263
-
220
+
264
221
  # This is the central method, and commonly the only one a
265
222
  # subclass implements.
266
223
  #
267
224
  # Subclasses must implement this method; it must return +tokens+
268
225
  # and must only use Tokens#<< for storing scanned tokens!
269
226
  def scan_tokens tokens, options # :doc:
270
- raise NotImplementedError, "#{self.class}#scan_tokens not implemented."
227
+ raise NotImplementedError,
228
+ "#{self.class}#scan_tokens not implemented."
271
229
  end
272
230
 
273
231
  # Resets the scanner.
274
232
  def reset_instance
275
233
  @tokens.clear if @tokens.respond_to?(:clear) && !@options[:keep_tokens]
276
234
  @cached_tokens = nil
277
- @binary_string = nil if defined? @binary_string
235
+ @bin_string = nil if defined? @bin_string
278
236
  end
279
-
237
+
280
238
  # Scanner error with additional status information
281
- def raise_inspect msg, tokens, state = self.state || 'No state given!', ambit = 30, backtrace = caller
239
+ def raise_inspect msg, tokens, state = 'No state given!', ambit = 30
282
240
  raise ScanError, <<-EOE % [
283
241
 
284
242
 
@@ -300,24 +258,16 @@ surrounding code:
300
258
  EOE
301
259
  File.basename(caller[0]),
302
260
  msg,
303
- tokens.respond_to?(:size) ? tokens.size : 0,
304
- tokens.respond_to?(:last) ? tokens.last(10).map { |t| t.inspect }.join("\n") : '',
261
+ tokens.size,
262
+ tokens.last(10).map { |t| t.inspect }.join("\n"),
305
263
  line, column, pos,
306
264
  matched, state, bol?, eos?,
307
- binary_string[pos - ambit, ambit],
308
- binary_string[pos, ambit],
309
- ], backtrace
265
+ string[pos - ambit, ambit],
266
+ string[pos, ambit],
267
+ ]
310
268
  end
311
-
312
- # Shorthand for scan_until(/\z/).
313
- # This method also avoids a JRuby 1.9 mode bug.
314
- def scan_rest
315
- rest = self.rest
316
- terminate
317
- rest
318
- end
319
-
269
+
320
270
  end
321
-
271
+
322
272
  end
323
273
  end