coderay 1.0.0 → 1.0.0.598.pre

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. data/FOLDERS +49 -0
  2. data/Rakefile +6 -5
  3. data/bin/coderay +74 -190
  4. data/bin/coderay_stylesheet +4 -0
  5. data/{README_INDEX.rdoc → lib/README} +20 -10
  6. data/lib/coderay.rb +60 -62
  7. data/lib/coderay/duo.rb +55 -2
  8. data/lib/coderay/encoder.rb +39 -52
  9. data/lib/coderay/encoders/_map.rb +7 -11
  10. data/lib/coderay/encoders/comment_filter.rb +61 -0
  11. data/lib/coderay/encoders/count.rb +26 -11
  12. data/lib/coderay/encoders/debug.rb +60 -11
  13. data/lib/coderay/encoders/div.rb +8 -9
  14. data/lib/coderay/encoders/filter.rb +52 -12
  15. data/lib/coderay/encoders/html.rb +113 -106
  16. data/lib/coderay/encoders/html/css.rb +7 -2
  17. data/lib/coderay/encoders/html/numbering.rb +27 -24
  18. data/lib/coderay/encoders/html/output.rb +58 -15
  19. data/lib/coderay/encoders/json.rb +44 -37
  20. data/lib/coderay/encoders/lines_of_code.rb +56 -9
  21. data/lib/coderay/encoders/null.rb +13 -6
  22. data/lib/coderay/encoders/page.rb +8 -8
  23. data/lib/coderay/encoders/span.rb +9 -10
  24. data/lib/coderay/encoders/statistic.rb +114 -51
  25. data/lib/coderay/encoders/terminal.rb +10 -7
  26. data/lib/coderay/encoders/text.rb +36 -17
  27. data/lib/coderay/encoders/token_kind_filter.rb +58 -1
  28. data/lib/coderay/encoders/xml.rb +11 -13
  29. data/lib/coderay/encoders/yaml.rb +14 -16
  30. data/lib/coderay/for_redcloth.rb +1 -1
  31. data/lib/coderay/helpers/file_type.rb +240 -125
  32. data/lib/coderay/helpers/gzip_simple.rb +123 -0
  33. data/lib/coderay/helpers/plugin.rb +307 -241
  34. data/lib/coderay/helpers/word_list.rb +126 -65
  35. data/lib/coderay/scanner.rb +103 -153
  36. data/lib/coderay/scanners/_map.rb +16 -18
  37. data/lib/coderay/scanners/c.rb +13 -13
  38. data/lib/coderay/scanners/cpp.rb +6 -6
  39. data/lib/coderay/scanners/css.rb +48 -47
  40. data/lib/coderay/scanners/debug.rb +55 -9
  41. data/lib/coderay/scanners/delphi.rb +4 -4
  42. data/lib/coderay/scanners/diff.rb +25 -43
  43. data/lib/coderay/scanners/groovy.rb +2 -2
  44. data/lib/coderay/scanners/html.rb +30 -107
  45. data/lib/coderay/scanners/java.rb +5 -6
  46. data/lib/coderay/scanners/java/builtin_types.rb +0 -2
  47. data/lib/coderay/scanners/java_script.rb +6 -6
  48. data/lib/coderay/scanners/json.rb +6 -7
  49. data/lib/coderay/scanners/nitro_xhtml.rb +136 -0
  50. data/lib/coderay/scanners/php.rb +12 -13
  51. data/lib/coderay/scanners/plaintext.rb +26 -0
  52. data/lib/coderay/scanners/python.rb +4 -4
  53. data/lib/coderay/scanners/{erb.rb → rhtml.rb} +11 -19
  54. data/lib/coderay/scanners/ruby.rb +208 -219
  55. data/lib/coderay/scanners/ruby/patterns.rb +85 -18
  56. data/lib/coderay/scanners/scheme.rb +136 -0
  57. data/lib/coderay/scanners/sql.rb +22 -29
  58. data/lib/coderay/scanners/yaml.rb +10 -11
  59. data/lib/coderay/styles/_map.rb +2 -2
  60. data/lib/coderay/styles/alpha.rb +104 -102
  61. data/lib/coderay/styles/cycnus.rb +143 -0
  62. data/lib/coderay/styles/murphy.rb +123 -0
  63. data/lib/coderay/token_kinds.rb +86 -87
  64. data/lib/coderay/tokens.rb +169 -26
  65. data/test/functional/basic.rb +14 -200
  66. data/test/functional/examples.rb +14 -20
  67. data/test/functional/for_redcloth.rb +8 -15
  68. data/test/functional/load_plugin_scanner.rb +11 -0
  69. data/test/functional/suite.rb +6 -9
  70. data/test/functional/vhdl.rb +126 -0
  71. data/test/functional/word_list.rb +79 -0
  72. metadata +129 -107
  73. data/lib/coderay/helpers/gzip.rb +0 -41
  74. data/lib/coderay/scanners/clojure.rb +0 -217
  75. data/lib/coderay/scanners/haml.rb +0 -168
  76. data/lib/coderay/scanners/ruby/string_state.rb +0 -71
  77. data/lib/coderay/scanners/text.rb +0 -26
  78. data/lib/coderay/tokens_proxy.rb +0 -55
  79. data/lib/coderay/version.rb +0 -3
@@ -1,77 +1,138 @@
1
1
  module CodeRay
2
-
3
- # = WordList
2
+
3
+ # = WordList
4
+ #
5
+ # <b>A Hash subclass designed for mapping word lists to token types.</b>
6
+ #
7
+ # Copyright (c) 2006 by murphy (Kornelius Kalnbach) <murphy rubychan de>
8
+ #
9
+ # License:: LGPL / ask the author
10
+ # Version:: 1.1 (2006-Oct-19)
11
+ #
12
+ # A WordList is a Hash with some additional features.
13
+ # It is intended to be used for keyword recognition.
14
+ #
15
+ # WordList is highly optimized to be used in Scanners,
16
+ # typically to decide whether a given ident is a special token.
17
+ #
18
+ # For case insensitive words use CaseIgnoringWordList.
19
+ #
20
+ # Example:
21
+ #
22
+ # # define word arrays
23
+ # RESERVED_WORDS = %w[
24
+ # asm break case continue default do else
25
+ # ...
26
+ # ]
27
+ #
28
+ # PREDEFINED_TYPES = %w[
29
+ # int long short char void
30
+ # ...
31
+ # ]
32
+ #
33
+ # PREDEFINED_CONSTANTS = %w[
34
+ # EOF NULL ...
35
+ # ]
36
+ #
37
+ # # make a WordList
38
+ # IDENT_KIND = WordList.new(:ident).
39
+ # add(RESERVED_WORDS, :reserved).
40
+ # add(PREDEFINED_TYPES, :pre_type).
41
+ # add(PREDEFINED_CONSTANTS, :pre_constant)
42
+ #
43
+ # ...
44
+ #
45
+ # def scan_tokens tokens, options
46
+ # ...
47
+ #
48
+ # elsif scan(/[A-Za-z_][A-Za-z_0-9]*/)
49
+ # # use it
50
+ # kind = IDENT_KIND[match]
51
+ # ...
52
+ class WordList < Hash
53
+
54
+ # Creates a new WordList with +default+ as default value.
4
55
  #
5
- # <b>A Hash subclass designed for mapping word lists to token types.</b>
56
+ # You can activate +caching+ to store the results for every [] request.
6
57
  #
7
- # Copyright (c) 2006-2011 by murphy (Kornelius Kalnbach) <murphy rubychan de>
8
- #
9
- # License:: LGPL / ask the author
10
- # Version:: 2.0 (2011-05-08)
11
- #
12
- # A WordList is a Hash with some additional features.
13
- # It is intended to be used for keyword recognition.
14
- #
15
- # WordList is optimized to be used in Scanners,
16
- # typically to decide whether a given ident is a special token.
17
- #
18
- # For case insensitive words use WordList::CaseIgnoring.
19
- #
20
- # Example:
21
- #
22
- # # define word arrays
23
- # RESERVED_WORDS = %w[
24
- # asm break case continue default do else
25
- # ]
26
- #
27
- # PREDEFINED_TYPES = %w[
28
- # int long short char void
29
- # ]
30
- #
31
- # # make a WordList
32
- # IDENT_KIND = WordList.new(:ident).
33
- # add(RESERVED_WORDS, :reserved).
34
- # add(PREDEFINED_TYPES, :predefined_type)
35
- #
36
- # ...
37
- #
38
- # def scan_tokens tokens, options
39
- # ...
40
- #
41
- # elsif scan(/[A-Za-z_][A-Za-z_0-9]*/)
42
- # # use it
43
- # kind = IDENT_KIND[match]
44
- # ...
45
- class WordList < Hash
46
-
47
- # Create a new WordList with +default+ as default value.
48
- def initialize default = false
49
- super default
58
+ # With caching, methods like +include?+ or +delete+ may no longer behave
59
+ # as you expect. Therefore, it is recommended to use the [] method only.
60
+ def initialize default = false, caching = false, &block
61
+ if block
62
+ raise ArgumentError, 'Can\'t combine block with caching.' if caching
63
+ super(&block)
64
+ else
65
+ if caching
66
+ super() do |h, k|
67
+ h[k] = h.fetch k, default
68
+ end
69
+ else
70
+ super default
71
+ end
50
72
  end
51
-
52
- # Add words to the list and associate them with +value+.
53
- #
54
- # Returns +self+, so you can concat add calls.
55
- def add words, value = true
56
- words.each { |word| self[word] = value }
57
- self
73
+ end
74
+
75
+ # Add words to the list and associate them with +kind+.
76
+ #
77
+ # Returns +self+, so you can concat add calls.
78
+ def add words, kind = true
79
+ words.each do |word|
80
+ self[word] = kind
81
+ end
82
+ self
83
+ end
84
+
85
+ end
86
+
87
+
88
+ # A CaseIgnoringWordList is like a WordList, only that
89
+ # keys are compared case-insensitively.
90
+ #
91
+ # Ignoring the text case is realized by sending the +downcase+ message to
92
+ # all keys.
93
+ #
94
+ # Caching usually makes a CaseIgnoringWordList faster, but it has to be
95
+ # activated explicitely.
96
+ class CaseIgnoringWordList < WordList
97
+
98
+ # Creates a new case-insensitive WordList with +default+ as default value.
99
+ #
100
+ # You can activate caching to store the results for every [] request.
101
+ # This speeds up subsequent lookups for the same word, but also
102
+ # uses memory.
103
+ def initialize default = false, caching = false
104
+ if caching
105
+ super(default, false) do |h, k|
106
+ h[k] = h.fetch k.downcase, default
107
+ end
108
+ else
109
+ super(default, false)
110
+ extend Uncached
58
111
  end
59
-
60
112
  end
61
113
 
62
-
63
- # A CaseIgnoring WordList is like a WordList, only that
64
- # keys are compared case-insensitively (normalizing keys using +downcase+).
65
- class WordList::CaseIgnoring < WordList
66
-
114
+ module Uncached # :nodoc:
67
115
  def [] key
68
- super key.downcase
116
+ super(key.downcase)
69
117
  end
70
-
71
- def []= key, value
72
- super key.downcase, value
118
+ end
119
+
120
+ # Add +words+ to the list and associate them with +kind+.
121
+ def add words, kind = true
122
+ words.each do |word|
123
+ self[word.downcase] = kind
73
124
  end
74
-
125
+ self
75
126
  end
76
-
127
+
128
+ end
129
+
77
130
  end
131
+
132
+ __END__
133
+ # check memory consumption
134
+ END {
135
+ ObjectSpace.each_object(CodeRay::CaseIgnoringWordList) do |wl|
136
+ p wl.inject(0) { |memo, key, value| memo + key.size + 24 }
137
+ end
138
+ }
@@ -1,10 +1,7 @@
1
- # encoding: utf-8
2
- require 'strscan'
3
-
4
1
  module CodeRay
5
2
 
6
- autoload :WordList, 'coderay/helpers/word_list'
7
-
3
+ require 'coderay/helpers/plugin'
4
+
8
5
  # = Scanners
9
6
  #
10
7
  # This module holds the Scanner class and its subclasses.
@@ -19,8 +16,9 @@ module CodeRay
19
16
  module Scanners
20
17
  extend PluginHost
21
18
  plugin_path File.dirname(__FILE__), 'scanners'
22
-
23
-
19
+
20
+ require 'strscan'
21
+
24
22
  # = Scanner
25
23
  #
26
24
  # The base class for all Scanners.
@@ -48,89 +46,61 @@ module CodeRay
48
46
 
49
47
  extend Plugin
50
48
  plugin_host Scanners
51
-
49
+
52
50
  # Raised if a Scanner fails while scanning
53
- ScanError = Class.new StandardError
54
-
51
+ ScanError = Class.new(Exception)
52
+
53
+ require 'coderay/helpers/word_list'
54
+
55
55
  # The default options for all scanner classes.
56
56
  #
57
57
  # Define @default_options for subclasses.
58
58
  DEFAULT_OPTIONS = { }
59
59
 
60
60
  KINDS_NOT_LOC = [:comment, :doctype, :docstring]
61
-
62
- attr_accessor :state
63
-
61
+
64
62
  class << self
65
-
66
- # Normalizes the given code into a string with UNIX newlines, in the
67
- # scanner's internal encoding, with invalid and undefined charachters
68
- # replaced by placeholders. Always returns a new object.
69
- def normalize code
70
- # original = code
71
- code = code.to_s unless code.is_a? ::String
72
- return code if code.empty?
73
-
74
- if code.respond_to? :encoding
75
- code = encode_with_encoding code, self.encoding
76
- else
77
- code = to_unix code
63
+
64
+ def normify code
65
+ code = code.to_s.dup
66
+ # try using UTF-8
67
+ if code.respond_to? :force_encoding
68
+ debug, $DEBUG = $DEBUG, false
69
+ begin
70
+ code.force_encoding 'UTF-8'
71
+ code[/\z/] # raises an ArgumentError when code contains a non-UTF-8 char
72
+ rescue ArgumentError
73
+ code.force_encoding 'binary'
74
+ ensure
75
+ $DEBUG = debug
76
+ end
78
77
  end
79
- # code = code.dup if code.eql? original
78
+ # convert the string to UNIX newline format
79
+ code.gsub!(/\r\n?/, "\n") if code.index ?\r
80
80
  code
81
81
  end
82
82
 
83
- # The typical filename suffix for this scanner's language.
84
- def file_extension extension = lang
85
- @file_extension ||= extension.to_s
86
- end
87
-
88
- # The encoding used internally by this scanner.
89
- def encoding name = 'UTF-8'
90
- @encoding ||= defined?(Encoding.find) && Encoding.find(name)
91
- end
92
-
93
- # The lang of this Scanner class, which is equal to its Plugin ID.
94
- def lang
95
- @plugin_id
96
- end
97
-
98
- protected
99
-
100
- def encode_with_encoding code, target_encoding
101
- if code.encoding == target_encoding
102
- if code.valid_encoding?
103
- return to_unix(code)
104
- else
105
- source_encoding = guess_encoding code
106
- end
83
+ def file_extension extension = nil
84
+ if extension
85
+ @file_extension = extension.to_s
107
86
  else
108
- source_encoding = code.encoding
87
+ @file_extension ||= plugin_id.to_s
109
88
  end
110
- # print "encode_with_encoding from #{source_encoding} to #{target_encoding}"
111
- code.encode target_encoding, source_encoding, :universal_newline => true, :undef => :replace, :invalid => :replace
112
- end
113
-
114
- def to_unix code
115
- code.index(?\r) ? code.gsub(/\r\n?/, "\n") : code
116
89
  end
117
-
118
- def guess_encoding s
119
- #:nocov:
120
- IO.popen("file -b --mime -", "w+") do |file|
121
- file.write s[0, 1024]
122
- file.close_write
123
- begin
124
- Encoding.find file.gets[/charset=([-\w]+)/, 1]
125
- rescue ArgumentError
126
- Encoding::BINARY
127
- end
128
- end
129
- #:nocov:
130
- end
131
-
90
+
132
91
  end
133
-
92
+
93
+ =begin
94
+ ## Excluded for speed reasons; protected seems to make methods slow.
95
+
96
+ # Save the StringScanner methods from being called.
97
+ # This would not be useful for highlighting.
98
+ strscan_public_methods =
99
+ StringScanner.instance_methods -
100
+ StringScanner.ancestors[1].instance_methods
101
+ protected(*strscan_public_methods)
102
+ =end
103
+
134
104
  # Create a new Scanner.
135
105
  #
136
106
  # * +code+ is the input String and is handled by the superclass
@@ -140,66 +110,58 @@ module CodeRay
140
110
  # overwrite default options here.)
141
111
  #
142
112
  # Else, a Tokens object is used.
143
- def initialize code = '', options = {}
144
- if self.class == Scanner
145
- raise NotImplementedError, "I am only the basic Scanner class. I can't scan anything. :( Use my subclasses."
146
- end
113
+ def initialize code='', options = {}
114
+ raise "I am only the basic Scanner class. I can't scan "\
115
+ "anything. :( Use my subclasses." if self.class == Scanner
147
116
 
148
117
  @options = self.class::DEFAULT_OPTIONS.merge options
149
-
150
- super self.class.normalize(code)
151
-
118
+
119
+ super Scanner.normify(code)
120
+
152
121
  @tokens = options[:tokens] || Tokens.new
153
122
  @tokens.scanner = self if @tokens.respond_to? :scanner=
154
-
123
+
155
124
  setup
156
125
  end
157
126
 
158
- # Sets back the scanner. Subclasses should redefine the reset_instance
159
- # method instead of this one.
127
+ # Sets back the scanner. Subclasses are to define the reset_instance
128
+ # method.
160
129
  def reset
161
130
  super
162
131
  reset_instance
163
132
  end
164
-
165
- # Set a new string to be scanned.
133
+
166
134
  def string= code
167
- code = self.class.normalize(code)
135
+ code = Scanner.normify(code)
168
136
  super code
169
137
  reset_instance
170
138
  end
171
-
172
- # the Plugin ID for this scanner
139
+
140
+ # More mnemonic accessor name for the input string.
141
+ alias code string
142
+ alias code= string=
143
+
144
+ # Returns the Plugin ID for this scanner.
173
145
  def lang
174
- self.class.lang
175
- end
176
-
177
- # the default file extension for this scanner
178
- def file_extension
179
- self.class.file_extension
146
+ self.class.plugin_id.to_s
180
147
  end
181
-
182
- # Scan the code and returns all tokens in a Tokens object.
148
+
149
+ # Scans the code and returns all tokens in a Tokens object.
183
150
  def tokenize source = nil, options = {}
184
151
  options = @options.merge(options)
185
152
  @tokens = options[:tokens] || @tokens || Tokens.new
186
153
  @tokens.scanner = self if @tokens.respond_to? :scanner=
187
154
  case source
155
+ when String
156
+ self.string = source
188
157
  when Array
189
- self.string = self.class.normalize(source.join)
158
+ self.string = source.join
190
159
  when nil
191
160
  reset
192
161
  else
193
- self.string = self.class.normalize(source)
194
- end
195
-
196
- begin
197
- scan_tokens @tokens, options
198
- rescue => e
199
- message = "Error in %s#scan_tokens, initial state was: %p" % [self.class, defined?(state) && state]
200
- raise_inspect e.message, @tokens, message, 30, e.backtrace
162
+ raise ArgumentError, 'expected String, Array, or nil'
201
163
  end
202
-
164
+ scan_tokens @tokens, options
203
165
  @cached_tokens = @tokens
204
166
  if source.is_a? Array
205
167
  @tokens.split_into_parts(*source.map { |part| part.size })
@@ -208,51 +170,46 @@ module CodeRay
208
170
  end
209
171
  end
210
172
 
211
- # Cache the result of tokenize.
173
+ # Caches the result of tokenize.
212
174
  def tokens
213
175
  @cached_tokens ||= tokenize
214
176
  end
215
177
 
216
- # Traverse the tokens.
178
+ # Traverses the tokens.
217
179
  def each &block
218
180
  tokens.each(&block)
219
181
  end
220
182
  include Enumerable
221
-
222
- # The current line position of the scanner, starting with 1.
223
- # See also: #column.
183
+
184
+ # The current line position of the scanner.
224
185
  #
225
186
  # Beware, this is implemented inefficiently. It should be used
226
187
  # for debugging only.
227
- def line pos = self.pos
228
- return 1 if pos <= 0
229
- binary_string[0...pos].count("\n") + 1
188
+ def line
189
+ string[0..pos].count("\n") + 1
230
190
  end
231
191
 
232
- # The current column position of the scanner, starting with 1.
233
- # See also: #line.
192
+ # The current column position of the scanner. See #line.
234
193
  def column pos = self.pos
235
- return 1 if pos <= 0
236
- pos - (binary_string.rindex(?\n, pos - 1) || -1)
194
+ return 0 if pos <= 0
195
+ string = string()
196
+ if string.respond_to?(:bytesize) && (defined?(@bin_string) || string.bytesize != string.size)
197
+ @bin_string ||= string.dup.force_encoding('binary')
198
+ string = @bin_string
199
+ end
200
+ pos - (string.rindex(?\n, pos) || 0)
237
201
  end
238
202
 
239
- # The string in binary encoding.
240
- #
241
- # To be used with #pos, which is the index of the byte the scanner
242
- # will scan next.
243
- def binary_string
244
- @binary_string ||=
245
- if string.respond_to?(:bytesize) && string.bytesize != string.size
246
- #:nocov:
247
- string.dup.force_encoding('binary')
248
- #:nocov:
249
- else
250
- string
251
- end
203
+ def marshal_dump # :nodoc:
204
+ @options
252
205
  end
253
206
 
207
+ def marshal_load options # :nodoc:
208
+ @options = options
209
+ end
210
+
254
211
  protected
255
-
212
+
256
213
  # Can be implemented by subclasses to do some initialization
257
214
  # that has to be done once per instance.
258
215
  #
@@ -260,25 +217,26 @@ module CodeRay
260
217
  # scan.
261
218
  def setup # :doc:
262
219
  end
263
-
220
+
264
221
  # This is the central method, and commonly the only one a
265
222
  # subclass implements.
266
223
  #
267
224
  # Subclasses must implement this method; it must return +tokens+
268
225
  # and must only use Tokens#<< for storing scanned tokens!
269
226
  def scan_tokens tokens, options # :doc:
270
- raise NotImplementedError, "#{self.class}#scan_tokens not implemented."
227
+ raise NotImplementedError,
228
+ "#{self.class}#scan_tokens not implemented."
271
229
  end
272
230
 
273
231
  # Resets the scanner.
274
232
  def reset_instance
275
233
  @tokens.clear if @tokens.respond_to?(:clear) && !@options[:keep_tokens]
276
234
  @cached_tokens = nil
277
- @binary_string = nil if defined? @binary_string
235
+ @bin_string = nil if defined? @bin_string
278
236
  end
279
-
237
+
280
238
  # Scanner error with additional status information
281
- def raise_inspect msg, tokens, state = self.state || 'No state given!', ambit = 30, backtrace = caller
239
+ def raise_inspect msg, tokens, state = 'No state given!', ambit = 30
282
240
  raise ScanError, <<-EOE % [
283
241
 
284
242
 
@@ -300,24 +258,16 @@ surrounding code:
300
258
  EOE
301
259
  File.basename(caller[0]),
302
260
  msg,
303
- tokens.respond_to?(:size) ? tokens.size : 0,
304
- tokens.respond_to?(:last) ? tokens.last(10).map { |t| t.inspect }.join("\n") : '',
261
+ tokens.size,
262
+ tokens.last(10).map { |t| t.inspect }.join("\n"),
305
263
  line, column, pos,
306
264
  matched, state, bol?, eos?,
307
- binary_string[pos - ambit, ambit],
308
- binary_string[pos, ambit],
309
- ], backtrace
265
+ string[pos - ambit, ambit],
266
+ string[pos, ambit],
267
+ ]
310
268
  end
311
-
312
- # Shorthand for scan_until(/\z/).
313
- # This method also avoids a JRuby 1.9 mode bug.
314
- def scan_rest
315
- rest = self.rest
316
- terminate
317
- rest
318
- end
319
-
269
+
320
270
  end
321
-
271
+
322
272
  end
323
273
  end