raldred-coderay 0.9.0 → 0.9.339

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. data/lib/README +128 -0
  2. data/lib/coderay.rb +319 -0
  3. data/lib/coderay/duo.rb +85 -0
  4. data/lib/coderay/encoder.rb +187 -0
  5. data/lib/coderay/encoders/_map.rb +9 -0
  6. data/lib/coderay/encoders/count.rb +21 -0
  7. data/lib/coderay/encoders/debug.rb +49 -0
  8. data/lib/coderay/encoders/div.rb +20 -0
  9. data/lib/coderay/encoders/html.rb +306 -0
  10. data/lib/coderay/encoders/html/css.rb +70 -0
  11. data/lib/coderay/encoders/html/numerization.rb +133 -0
  12. data/lib/coderay/encoders/html/output.rb +206 -0
  13. data/lib/coderay/encoders/json.rb +19 -0
  14. data/lib/coderay/encoders/null.rb +26 -0
  15. data/lib/coderay/encoders/page.rb +21 -0
  16. data/lib/coderay/encoders/span.rb +20 -0
  17. data/lib/coderay/encoders/statistic.rb +77 -0
  18. data/lib/coderay/encoders/term.rb +114 -0
  19. data/lib/coderay/encoders/text.rb +32 -0
  20. data/lib/coderay/encoders/tokens.rb +44 -0
  21. data/lib/coderay/encoders/xml.rb +71 -0
  22. data/lib/coderay/encoders/yaml.rb +22 -0
  23. data/lib/coderay/for_redcloth.rb +73 -0
  24. data/lib/coderay/helpers/file_type.rb +226 -0
  25. data/lib/coderay/helpers/gzip_simple.rb +123 -0
  26. data/lib/coderay/helpers/plugin.rb +339 -0
  27. data/lib/coderay/helpers/word_list.rb +124 -0
  28. data/lib/coderay/scanner.rb +271 -0
  29. data/lib/coderay/scanners/_map.rb +21 -0
  30. data/lib/coderay/scanners/c.rb +166 -0
  31. data/lib/coderay/scanners/css.rb +202 -0
  32. data/lib/coderay/scanners/debug.rb +61 -0
  33. data/lib/coderay/scanners/delphi.rb +150 -0
  34. data/lib/coderay/scanners/diff.rb +104 -0
  35. data/lib/coderay/scanners/groovy.rb +271 -0
  36. data/lib/coderay/scanners/html.rb +175 -0
  37. data/lib/coderay/scanners/java.rb +173 -0
  38. data/lib/coderay/scanners/java/builtin_types.rb +419 -0
  39. data/lib/coderay/scanners/java_script.rb +195 -0
  40. data/lib/coderay/scanners/json.rb +107 -0
  41. data/lib/coderay/scanners/nitro_xhtml.rb +132 -0
  42. data/lib/coderay/scanners/php.rb +404 -0
  43. data/lib/coderay/scanners/plaintext.rb +18 -0
  44. data/lib/coderay/scanners/python.rb +232 -0
  45. data/lib/coderay/scanners/rhtml.rb +71 -0
  46. data/lib/coderay/scanners/ruby.rb +386 -0
  47. data/lib/coderay/scanners/ruby/patterns.rb +232 -0
  48. data/lib/coderay/scanners/scheme.rb +142 -0
  49. data/lib/coderay/scanners/sql.rb +162 -0
  50. data/lib/coderay/scanners/xml.rb +17 -0
  51. data/lib/coderay/scanners/yaml.rb +142 -0
  52. data/lib/coderay/style.rb +20 -0
  53. data/lib/coderay/styles/_map.rb +7 -0
  54. data/lib/coderay/styles/cycnus.rb +151 -0
  55. data/lib/coderay/styles/murphy.rb +132 -0
  56. data/lib/coderay/token_classes.rb +86 -0
  57. data/lib/coderay/tokens.rb +387 -0
  58. metadata +59 -1
@@ -0,0 +1,124 @@
1
+ module CodeRay
2
+
3
+ # = WordList
4
+ #
5
+ # <b>A Hash subclass designed for mapping word lists to token types.</b>
6
+ #
7
+ # Copyright (c) 2006 by murphy (Kornelius Kalnbach) <murphy rubychan de>
8
+ #
9
+ # License:: LGPL / ask the author
10
+ # Version:: 1.1 (2006-Oct-19)
11
+ #
12
+ # A WordList is a Hash with some additional features.
13
+ # It is intended to be used for keyword recognition.
14
+ #
15
+ # WordList is highly optimized to be used in Scanners,
16
+ # typically to decide whether a given ident is a special token.
17
+ #
18
+ # For case insensitive words use CaseIgnoringWordList.
19
+ #
20
+ # Example:
21
+ #
22
+ # # define word arrays
23
+ # RESERVED_WORDS = %w[
24
+ # asm break case continue default do else
25
+ # ...
26
+ # ]
27
+ #
28
+ # PREDEFINED_TYPES = %w[
29
+ # int long short char void
30
+ # ...
31
+ # ]
32
+ #
33
+ # PREDEFINED_CONSTANTS = %w[
34
+ # EOF NULL ...
35
+ # ]
36
+ #
37
+ # # make a WordList
38
+ # IDENT_KIND = WordList.new(:ident).
39
+ # add(RESERVED_WORDS, :reserved).
40
+ # add(PREDEFINED_TYPES, :pre_type).
41
+ # add(PREDEFINED_CONSTANTS, :pre_constant)
42
+ #
43
+ # ...
44
+ #
45
+ # def scan_tokens tokens, options
46
+ # ...
47
+ #
48
+ # elsif scan(/[A-Za-z_][A-Za-z_0-9]*/)
49
+ # # use it
50
+ # kind = IDENT_KIND[match]
51
+ # ...
52
+ class WordList < Hash
53
+
54
+ # Creates a new WordList with +default+ as default value.
55
+ #
56
+ # You can activate +caching+ to store the results for every [] request.
57
+ #
58
+ # With caching, methods like +include?+ or +delete+ may no longer behave
59
+ # as you expect. Therefore, it is recommended to use the [] method only.
60
+ def initialize default = false, caching = false, &block
61
+ if block
62
+ raise ArgumentError, 'Can\'t combine block with caching.' if caching
63
+ super(&block)
64
+ else
65
+ if caching
66
+ super() do |h, k|
67
+ h[k] = h.fetch k, default
68
+ end
69
+ else
70
+ super default
71
+ end
72
+ end
73
+ end
74
+
75
+ # Add words to the list and associate them with +kind+.
76
+ #
77
+ # Returns +self+, so you can concat add calls.
78
+ def add words, kind = true
79
+ words.each do |word|
80
+ self[word] = kind
81
+ end
82
+ self
83
+ end
84
+
85
+ end
86
+
87
+
88
+ # A CaseIgnoringWordList is like a WordList, only that
89
+ # keys are compared case-insensitively.
90
+ #
91
+ # Ignoring the text case is realized by sending the +downcase+ message to
92
+ # all keys.
93
+ #
94
+ # Caching usually makes a CaseIgnoringWordList faster, but it has to be
95
+ # activated explicitely.
96
+ class CaseIgnoringWordList < WordList
97
+
98
+ # Creates a new case-insensitive WordList with +default+ as default value.
99
+ #
100
+ # You can activate caching to store the results for every [] request.
101
+ def initialize default = false, caching = false
102
+ if caching
103
+ super(default, false) do |h, k|
104
+ h[k] = h.fetch k.downcase, default
105
+ end
106
+ else
107
+ super(default, false)
108
+ def self.[] key # :nodoc:
109
+ super(key.downcase)
110
+ end
111
+ end
112
+ end
113
+
114
+ # Add +words+ to the list and associate them with +kind+.
115
+ def add words, kind = true
116
+ words.each do |word|
117
+ self[word.downcase] = kind
118
+ end
119
+ self
120
+ end
121
+
122
+ end
123
+
124
+ end
@@ -0,0 +1,271 @@
1
+ module CodeRay
2
+
3
+ require 'coderay/helpers/plugin'
4
+
5
+ # = Scanners
6
+ #
7
+ # This module holds the Scanner class and its subclasses.
8
+ # For example, the Ruby scanner is named CodeRay::Scanners::Ruby
9
+ # can be found in coderay/scanners/ruby.
10
+ #
11
+ # Scanner also provides methods and constants for the register
12
+ # mechanism and the [] method that returns the Scanner class
13
+ # belonging to the given lang.
14
+ #
15
+ # See PluginHost.
16
+ module Scanners
17
+ extend PluginHost
18
+ plugin_path File.dirname(__FILE__), 'scanners'
19
+
20
+ require 'strscan'
21
+
22
+ # = Scanner
23
+ #
24
+ # The base class for all Scanners.
25
+ #
26
+ # It is a subclass of Ruby's great +StringScanner+, which
27
+ # makes it easy to access the scanning methods inside.
28
+ #
29
+ # It is also +Enumerable+, so you can use it like an Array of
30
+ # Tokens:
31
+ #
32
+ # require 'coderay'
33
+ #
34
+ # c_scanner = CodeRay::Scanners[:c].new "if (*p == '{') nest++;"
35
+ #
36
+ # for text, kind in c_scanner
37
+ # puts text if kind == :operator
38
+ # end
39
+ #
40
+ # # prints: (*==)++;
41
+ #
42
+ # OK, this is a very simple example :)
43
+ # You can also use +map+, +any?+, +find+ and even +sort_by+,
44
+ # if you want.
45
+ class Scanner < StringScanner
46
+ extend Plugin
47
+ plugin_host Scanners
48
+
49
+ # Raised if a Scanner fails while scanning
50
+ ScanError = Class.new(Exception)
51
+
52
+ require 'coderay/helpers/word_list'
53
+
54
+ # The default options for all scanner classes.
55
+ #
56
+ # Define @default_options for subclasses.
57
+ DEFAULT_OPTIONS = { :stream => false }
58
+
59
+ class << self
60
+
61
+ # Returns if the Scanner can be used in streaming mode.
62
+ def streamable?
63
+ is_a? Streamable
64
+ end
65
+
66
+ def normify code
67
+ code = code.to_s
68
+ if code.respond_to? :force_encoding
69
+ begin
70
+ code.force_encoding 'utf-8'
71
+ code[/\z/] # raises an ArgumentError when code contains a non-UTF-8 char
72
+ rescue ArgumentError
73
+ code.force_encoding 'binary'
74
+ end
75
+ end
76
+ code.to_unix
77
+ end
78
+
79
+ def file_extension extension = nil
80
+ if extension
81
+ @file_extension = extension.to_s
82
+ else
83
+ @file_extension ||= plugin_id.to_s
84
+ end
85
+ end
86
+
87
+ end
88
+
89
+ =begin
90
+ ## Excluded for speed reasons; protected seems to make methods slow.
91
+
92
+ # Save the StringScanner methods from being called.
93
+ # This would not be useful for highlighting.
94
+ strscan_public_methods =
95
+ StringScanner.instance_methods -
96
+ StringScanner.ancestors[1].instance_methods
97
+ protected(*strscan_public_methods)
98
+ =end
99
+
100
+ # Create a new Scanner.
101
+ #
102
+ # * +code+ is the input String and is handled by the superclass
103
+ # StringScanner.
104
+ # * +options+ is a Hash with Symbols as keys.
105
+ # It is merged with the default options of the class (you can
106
+ # overwrite default options here.)
107
+ # * +block+ is the callback for streamed highlighting.
108
+ #
109
+ # If you set :stream to +true+ in the options, the Scanner uses a
110
+ # TokenStream with the +block+ as callback to handle the tokens.
111
+ #
112
+ # Else, a Tokens object is used.
113
+ def initialize code='', options = {}, &block
114
+ @options = self.class::DEFAULT_OPTIONS.merge options
115
+ raise "I am only the basic Scanner class. I can't scan "\
116
+ "anything. :( Use my subclasses." if self.class == Scanner
117
+
118
+ super Scanner.normify(code)
119
+
120
+ @tokens = options[:tokens]
121
+ if @options[:stream]
122
+ warn "warning in CodeRay::Scanner.new: :stream is set, "\
123
+ "but no block was given" unless block_given?
124
+ raise NotStreamableError, self unless kind_of? Streamable
125
+ @tokens ||= TokenStream.new(&block)
126
+ else
127
+ warn "warning in CodeRay::Scanner.new: Block given, "\
128
+ "but :stream is #{@options[:stream]}" if block_given?
129
+ @tokens ||= Tokens.new
130
+ end
131
+
132
+ setup
133
+ end
134
+
135
+ def reset
136
+ super
137
+ reset_instance
138
+ end
139
+
140
+ def string= code
141
+ code = Scanner.normify(code)
142
+ super code
143
+ reset_instance
144
+ end
145
+
146
+ # More mnemonic accessor name for the input string.
147
+ alias code string
148
+ alias code= string=
149
+
150
+ # Scans the code and returns all tokens in a Tokens object.
151
+ def tokenize new_string=nil, options = {}
152
+ options = @options.merge(options)
153
+ self.string = new_string if new_string
154
+ @cached_tokens =
155
+ if @options[:stream] # :stream must have been set already
156
+ reset unless new_string
157
+ scan_tokens @tokens, options
158
+ @tokens
159
+ else
160
+ scan_tokens @tokens, options
161
+ end
162
+ end
163
+
164
+ def tokens
165
+ @cached_tokens ||= tokenize
166
+ end
167
+
168
+ # Whether the scanner is in streaming mode.
169
+ def streaming?
170
+ !!@options[:stream]
171
+ end
172
+
173
+ # Traverses the tokens.
174
+ def each &block
175
+ raise ArgumentError,
176
+ 'Cannot traverse TokenStream.' if @options[:stream]
177
+ tokens.each(&block)
178
+ end
179
+ include Enumerable
180
+
181
+ # The current line position of the scanner.
182
+ #
183
+ # Beware, this is implemented inefficiently. It should be used
184
+ # for debugging only.
185
+ def line
186
+ string[0..pos].count("\n") + 1
187
+ end
188
+
189
+ def column pos = self.pos
190
+ return 0 if pos <= 0
191
+ string = string()
192
+ if string.respond_to?(:bytesize) && (defined?(@bin_string) || string.bytesize != string.size)
193
+ @bin_string ||= string.dup.force_encoding(:binary)
194
+ string = @bin_string
195
+ end
196
+ pos - (string.rindex(?\n, pos) || 0)
197
+ end
198
+
199
+ protected
200
+
201
+ # Can be implemented by subclasses to do some initialization
202
+ # that has to be done once per instance.
203
+ #
204
+ # Use reset for initialization that has to be done once per
205
+ # scan.
206
+ def setup
207
+ end
208
+
209
+ # This is the central method, and commonly the only one a
210
+ # subclass implements.
211
+ #
212
+ # Subclasses must implement this method; it must return +tokens+
213
+ # and must only use Tokens#<< for storing scanned tokens!
214
+ def scan_tokens tokens, options
215
+ raise NotImplementedError,
216
+ "#{self.class}#scan_tokens not implemented."
217
+ end
218
+
219
+ def reset_instance
220
+ @tokens.clear unless @options[:keep_tokens]
221
+ @cached_tokens = nil
222
+ @bin_string = nil if defined? @bin_string
223
+ end
224
+
225
+ # Scanner error with additional status information
226
+ def raise_inspect msg, tokens, state = 'No state given!', ambit = 30
227
+ raise ScanError, <<-EOE % [
228
+
229
+
230
+ ***ERROR in %s: %s (after %d tokens)
231
+
232
+ tokens:
233
+ %s
234
+
235
+ current line: %d column: %d pos: %d
236
+ matched: %p state: %p
237
+ bol? = %p, eos? = %p
238
+
239
+ surrounding code:
240
+ %p ~~ %p
241
+
242
+
243
+ ***ERROR***
244
+
245
+ EOE
246
+ File.basename(caller[0]),
247
+ msg,
248
+ tokens.size,
249
+ tokens.last(10).map { |t| t.inspect }.join("\n"),
250
+ line, column, pos,
251
+ matched, state, bol?, eos?,
252
+ string[pos - ambit, ambit],
253
+ string[pos, ambit],
254
+ ]
255
+ end
256
+
257
+ end
258
+
259
+ end
260
+ end
261
+
262
+ class String
263
+ # I love this hack. It seems to silence all dos/unix/mac newline problems.
264
+ def to_unix
265
+ if index ?\r
266
+ gsub(/\r\n?/, "\n")
267
+ else
268
+ self
269
+ end
270
+ end
271
+ end
@@ -0,0 +1,21 @@
1
+ module CodeRay
2
+ module Scanners
3
+
4
+ map \
5
+ :cpp => :c,
6
+ :ecma => :java_script,
7
+ :ecmascript => :java_script,
8
+ :ecma_script => :java_script,
9
+ :irb => :ruby,
10
+ :javascript => :java_script,
11
+ :js => :java_script,
12
+ :nitro => :nitro_xhtml,
13
+ :pascal => :delphi,
14
+ :plain => :plaintext,
15
+ :xhtml => :html,
16
+ :yml => :yaml
17
+
18
+ default :plain
19
+
20
+ end
21
+ end
@@ -0,0 +1,166 @@
1
+ module CodeRay
2
+ module Scanners
3
+
4
+ class C < Scanner
5
+
6
+ include Streamable
7
+
8
+ register_for :c
9
+ file_extension 'c'
10
+
11
+ RESERVED_WORDS = [
12
+ 'asm', 'break', 'case', 'continue', 'default', 'do', 'else',
13
+ 'for', 'goto', 'if', 'return', 'switch', 'while',
14
+ 'struct', 'union', 'enum', 'typedef',
15
+ 'static', 'register', 'auto', 'extern',
16
+ 'sizeof',
17
+ 'volatile', 'const', # C89
18
+ 'inline', 'restrict', # C99
19
+ ]
20
+
21
+ PREDEFINED_TYPES = [
22
+ 'int', 'long', 'short', 'char', 'void',
23
+ 'signed', 'unsigned', 'float', 'double',
24
+ 'bool', 'complex', # C99
25
+ ]
26
+
27
+ PREDEFINED_CONSTANTS = [
28
+ 'EOF', 'NULL',
29
+ 'true', 'false', # C99
30
+ ]
31
+
32
+ IDENT_KIND = WordList.new(:ident).
33
+ add(RESERVED_WORDS, :reserved).
34
+ add(PREDEFINED_TYPES, :pre_type).
35
+ add(PREDEFINED_CONSTANTS, :pre_constant)
36
+
37
+ ESCAPE = / [rbfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
38
+ UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x
39
+
40
+ def scan_tokens tokens, options
41
+
42
+ state = :initial
43
+
44
+ until eos?
45
+
46
+ kind = nil
47
+ match = nil
48
+
49
+ case state
50
+
51
+ when :initial
52
+
53
+ if scan(/ \s+ | \\\n /x)
54
+ kind = :space
55
+
56
+ elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
57
+ kind = :comment
58
+
59
+ elsif match = scan(/ \# \s* if \s* 0 /x)
60
+ match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos?
61
+ kind = :comment
62
+
63
+ elsif scan(/ [-+*\/=<>?:;,!&^|()\[\]{}~%]+ | \.(?!\d) /x)
64
+ kind = :operator
65
+
66
+ elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
67
+ kind = IDENT_KIND[match]
68
+ if kind == :ident and check(/:(?!:)/)
69
+ match << scan(/:/)
70
+ kind = :label
71
+ end
72
+
73
+ elsif match = scan(/L?"/)
74
+ tokens << [:open, :string]
75
+ if match[0] == ?L
76
+ tokens << ['L', :modifier]
77
+ match = '"'
78
+ end
79
+ state = :string
80
+ kind = :delimiter
81
+
82
+ elsif scan(/#\s*(\w*)/)
83
+ kind = :preprocessor # FIXME multiline preprocs
84
+ state = :include_expected if self[1] == 'include'
85
+
86
+ elsif scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox)
87
+ kind = :char
88
+
89
+ elsif scan(/0[xX][0-9A-Fa-f]+/)
90
+ kind = :hex
91
+
92
+ elsif scan(/(?:0[0-7]+)(?![89.eEfF])/)
93
+ kind = :oct
94
+
95
+ elsif scan(/(?:\d+)(?![.eEfF])/)
96
+ kind = :integer
97
+
98
+ elsif scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
99
+ kind = :float
100
+
101
+ else
102
+ getch
103
+ kind = :error
104
+
105
+ end
106
+
107
+ when :string
108
+ if scan(/[^\\\n"]+/)
109
+ kind = :content
110
+ elsif scan(/"/)
111
+ tokens << ['"', :delimiter]
112
+ tokens << [:close, :string]
113
+ state = :initial
114
+ next
115
+ elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
116
+ kind = :char
117
+ elsif scan(/ \\ | $ /x)
118
+ tokens << [:close, :string]
119
+ kind = :error
120
+ state = :initial
121
+ else
122
+ raise_inspect "else case \" reached; %p not handled." % peek(1), tokens
123
+ end
124
+
125
+ when :include_expected
126
+ if scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/)
127
+ kind = :include
128
+ state = :initial
129
+
130
+ elsif match = scan(/\s+/)
131
+ kind = :space
132
+ state = :initial if match.index ?\n
133
+
134
+ else
135
+ getch
136
+ kind = :error
137
+
138
+ end
139
+
140
+ else
141
+ raise_inspect 'Unknown state', tokens
142
+
143
+ end
144
+
145
+ match ||= matched
146
+ if $DEBUG and not kind
147
+ raise_inspect 'Error token %p in line %d' %
148
+ [[match, kind], line], tokens
149
+ end
150
+ raise_inspect 'Empty token', tokens unless match
151
+
152
+ tokens << [match, kind]
153
+
154
+ end
155
+
156
+ if state == :string
157
+ tokens << [:close, :string]
158
+ end
159
+
160
+ tokens
161
+ end
162
+
163
+ end
164
+
165
+ end
166
+ end