coderay-beta 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. data/FOLDERS +53 -0
  2. data/LICENSE +504 -0
  3. data/bin/coderay +82 -0
  4. data/bin/coderay_stylesheet +4 -0
  5. data/lib/README +129 -0
  6. data/lib/coderay.rb +320 -0
  7. data/lib/coderay/duo.rb +85 -0
  8. data/lib/coderay/encoder.rb +213 -0
  9. data/lib/coderay/encoders/_map.rb +11 -0
  10. data/lib/coderay/encoders/comment_filter.rb +43 -0
  11. data/lib/coderay/encoders/count.rb +21 -0
  12. data/lib/coderay/encoders/debug.rb +49 -0
  13. data/lib/coderay/encoders/div.rb +19 -0
  14. data/lib/coderay/encoders/filter.rb +75 -0
  15. data/lib/coderay/encoders/html.rb +305 -0
  16. data/lib/coderay/encoders/html/css.rb +70 -0
  17. data/lib/coderay/encoders/html/numerization.rb +133 -0
  18. data/lib/coderay/encoders/html/output.rb +206 -0
  19. data/lib/coderay/encoders/json.rb +69 -0
  20. data/lib/coderay/encoders/lines_of_code.rb +90 -0
  21. data/lib/coderay/encoders/null.rb +26 -0
  22. data/lib/coderay/encoders/page.rb +20 -0
  23. data/lib/coderay/encoders/span.rb +19 -0
  24. data/lib/coderay/encoders/statistic.rb +77 -0
  25. data/lib/coderay/encoders/term.rb +137 -0
  26. data/lib/coderay/encoders/text.rb +32 -0
  27. data/lib/coderay/encoders/token_class_filter.rb +84 -0
  28. data/lib/coderay/encoders/xml.rb +71 -0
  29. data/lib/coderay/encoders/yaml.rb +22 -0
  30. data/lib/coderay/for_redcloth.rb +85 -0
  31. data/lib/coderay/helpers/file_type.rb +240 -0
  32. data/lib/coderay/helpers/gzip_simple.rb +123 -0
  33. data/lib/coderay/helpers/plugin.rb +349 -0
  34. data/lib/coderay/helpers/word_list.rb +138 -0
  35. data/lib/coderay/scanner.rb +284 -0
  36. data/lib/coderay/scanners/_map.rb +23 -0
  37. data/lib/coderay/scanners/c.rb +203 -0
  38. data/lib/coderay/scanners/cpp.rb +228 -0
  39. data/lib/coderay/scanners/css.rb +210 -0
  40. data/lib/coderay/scanners/debug.rb +62 -0
  41. data/lib/coderay/scanners/delphi.rb +150 -0
  42. data/lib/coderay/scanners/diff.rb +105 -0
  43. data/lib/coderay/scanners/groovy.rb +263 -0
  44. data/lib/coderay/scanners/html.rb +182 -0
  45. data/lib/coderay/scanners/java.rb +176 -0
  46. data/lib/coderay/scanners/java/builtin_types.rb +419 -0
  47. data/lib/coderay/scanners/java_script.rb +224 -0
  48. data/lib/coderay/scanners/json.rb +112 -0
  49. data/lib/coderay/scanners/nitro_xhtml.rb +136 -0
  50. data/lib/coderay/scanners/php.rb +526 -0
  51. data/lib/coderay/scanners/plaintext.rb +21 -0
  52. data/lib/coderay/scanners/python.rb +285 -0
  53. data/lib/coderay/scanners/rhtml.rb +74 -0
  54. data/lib/coderay/scanners/ruby.rb +404 -0
  55. data/lib/coderay/scanners/ruby/patterns.rb +238 -0
  56. data/lib/coderay/scanners/scheme.rb +145 -0
  57. data/lib/coderay/scanners/sql.rb +162 -0
  58. data/lib/coderay/scanners/xml.rb +17 -0
  59. data/lib/coderay/scanners/yaml.rb +144 -0
  60. data/lib/coderay/style.rb +20 -0
  61. data/lib/coderay/styles/_map.rb +7 -0
  62. data/lib/coderay/styles/cycnus.rb +151 -0
  63. data/lib/coderay/styles/murphy.rb +132 -0
  64. data/lib/coderay/token_classes.rb +86 -0
  65. data/lib/coderay/tokens.rb +391 -0
  66. data/lib/term/ansicolor.rb +220 -0
  67. metadata +123 -0
@@ -0,0 +1,284 @@
1
+ module CodeRay
2
+
3
+ require 'coderay/helpers/plugin'
4
+
5
+ # = Scanners
6
+ #
7
+ # This module holds the Scanner class and its subclasses.
8
+ # For example, the Ruby scanner is named CodeRay::Scanners::Ruby
9
+ # can be found in coderay/scanners/ruby.
10
+ #
11
+ # Scanner also provides methods and constants for the register
12
+ # mechanism and the [] method that returns the Scanner class
13
+ # belonging to the given lang.
14
+ #
15
+ # See PluginHost.
16
+ module Scanners
17
+ extend PluginHost
18
+ plugin_path File.dirname(__FILE__), 'scanners'
19
+
20
+ require 'strscan'
21
+
22
+ # = Scanner
23
+ #
24
+ # The base class for all Scanners.
25
+ #
26
+ # It is a subclass of Ruby's great +StringScanner+, which
27
+ # makes it easy to access the scanning methods inside.
28
+ #
29
+ # It is also +Enumerable+, so you can use it like an Array of
30
+ # Tokens:
31
+ #
32
+ # require 'coderay'
33
+ #
34
+ # c_scanner = CodeRay::Scanners[:c].new "if (*p == '{') nest++;"
35
+ #
36
+ # for text, kind in c_scanner
37
+ # puts text if kind == :operator
38
+ # end
39
+ #
40
+ # # prints: (*==)++;
41
+ #
42
+ # OK, this is a very simple example :)
43
+ # You can also use +map+, +any?+, +find+ and even +sort_by+,
44
+ # if you want.
45
+ class Scanner < StringScanner
46
+
47
+ extend Plugin
48
+ plugin_host Scanners
49
+
50
+ # Raised if a Scanner fails while scanning
51
+ ScanError = Class.new(Exception)
52
+
53
+ require 'coderay/helpers/word_list'
54
+
55
+ # The default options for all scanner classes.
56
+ #
57
+ # Define @default_options for subclasses.
58
+ DEFAULT_OPTIONS = { :stream => false }
59
+
60
+ KINDS_NOT_LOC = [:comment, :doctype]
61
+
62
+ class << self
63
+
64
+ # Returns if the Scanner can be used in streaming mode.
65
+ def streamable?
66
+ is_a? Streamable
67
+ end
68
+
69
+ def normify code
70
+ code = code.to_s
71
+ if code.respond_to? :force_encoding
72
+ begin
73
+ code.force_encoding 'utf-8'
74
+ code[/\z/] # raises an ArgumentError when code contains a non-UTF-8 char
75
+ rescue ArgumentError
76
+ code.force_encoding 'binary'
77
+ end
78
+ end
79
+ code.to_unix
80
+ end
81
+
82
+ def file_extension extension = nil
83
+ if extension
84
+ @file_extension = extension.to_s
85
+ else
86
+ @file_extension ||= plugin_id.to_s
87
+ end
88
+ end
89
+
90
+ end
91
+
92
+ =begin
93
+ ## Excluded for speed reasons; protected seems to make methods slow.
94
+
95
+ # Save the StringScanner methods from being called.
96
+ # This would not be useful for highlighting.
97
+ strscan_public_methods =
98
+ StringScanner.instance_methods -
99
+ StringScanner.ancestors[1].instance_methods
100
+ protected(*strscan_public_methods)
101
+ =end
102
+
103
+ # Create a new Scanner.
104
+ #
105
+ # * +code+ is the input String and is handled by the superclass
106
+ # StringScanner.
107
+ # * +options+ is a Hash with Symbols as keys.
108
+ # It is merged with the default options of the class (you can
109
+ # overwrite default options here.)
110
+ # * +block+ is the callback for streamed highlighting.
111
+ #
112
+ # If you set :stream to +true+ in the options, the Scanner uses a
113
+ # TokenStream with the +block+ as callback to handle the tokens.
114
+ #
115
+ # Else, a Tokens object is used.
116
+ def initialize code='', options = {}, &block
117
+ raise "I am only the basic Scanner class. I can't scan "\
118
+ "anything. :( Use my subclasses." if self.class == Scanner
119
+
120
+ @options = self.class::DEFAULT_OPTIONS.merge options
121
+
122
+ super Scanner.normify(code)
123
+
124
+ @tokens = options[:tokens]
125
+ if @options[:stream]
126
+ warn "warning in CodeRay::Scanner.new: :stream is set, "\
127
+ "but no block was given" unless block_given?
128
+ raise NotStreamableError, self unless kind_of? Streamable
129
+ @tokens ||= TokenStream.new(&block)
130
+ else
131
+ warn "warning in CodeRay::Scanner.new: Block given, "\
132
+ "but :stream is #{@options[:stream]}" if block_given?
133
+ @tokens ||= Tokens.new
134
+ end
135
+ @tokens.scanner = self
136
+
137
+ setup
138
+ end
139
+
140
+ def reset
141
+ super
142
+ reset_instance
143
+ end
144
+
145
+ def string= code
146
+ code = Scanner.normify(code)
147
+ super code
148
+ reset_instance
149
+ end
150
+
151
+ # More mnemonic accessor name for the input string.
152
+ alias code string
153
+ alias code= string=
154
+
155
+ # Scans the code and returns all tokens in a Tokens object.
156
+ def tokenize new_string=nil, options = {}
157
+ options = @options.merge(options)
158
+ self.string = new_string if new_string
159
+ @cached_tokens =
160
+ if @options[:stream] # :stream must have been set already
161
+ reset unless new_string
162
+ scan_tokens @tokens, options
163
+ @tokens
164
+ else
165
+ scan_tokens @tokens, options
166
+ end
167
+ end
168
+
169
+ def tokens
170
+ @cached_tokens ||= tokenize
171
+ end
172
+
173
+ # Whether the scanner is in streaming mode.
174
+ def streaming?
175
+ !!@options[:stream]
176
+ end
177
+
178
+ # Traverses the tokens.
179
+ def each &block
180
+ raise ArgumentError,
181
+ 'Cannot traverse TokenStream.' if @options[:stream]
182
+ tokens.each(&block)
183
+ end
184
+ include Enumerable
185
+
186
+ # The current line position of the scanner.
187
+ #
188
+ # Beware, this is implemented inefficiently. It should be used
189
+ # for debugging only.
190
+ def line
191
+ string[0..pos].count("\n") + 1
192
+ end
193
+
194
+ def column pos = self.pos
195
+ return 0 if pos <= 0
196
+ string = string()
197
+ if string.respond_to?(:bytesize) && (defined?(@bin_string) || string.bytesize != string.size)
198
+ @bin_string ||= string.dup.force_encoding(:binary)
199
+ string = @bin_string
200
+ end
201
+ pos - (string.rindex(?\n, pos) || 0)
202
+ end
203
+
204
+ def marshal_dump
205
+ @options
206
+ end
207
+
208
+ def marshal_load options
209
+ @options = options
210
+ end
211
+
212
+ protected
213
+
214
+ # Can be implemented by subclasses to do some initialization
215
+ # that has to be done once per instance.
216
+ #
217
+ # Use reset for initialization that has to be done once per
218
+ # scan.
219
+ def setup
220
+ end
221
+
222
+ # This is the central method, and commonly the only one a
223
+ # subclass implements.
224
+ #
225
+ # Subclasses must implement this method; it must return +tokens+
226
+ # and must only use Tokens#<< for storing scanned tokens!
227
+ def scan_tokens tokens, options
228
+ raise NotImplementedError,
229
+ "#{self.class}#scan_tokens not implemented."
230
+ end
231
+
232
+ def reset_instance
233
+ @tokens.clear unless @options[:keep_tokens]
234
+ @cached_tokens = nil
235
+ @bin_string = nil if defined? @bin_string
236
+ end
237
+
238
+ # Scanner error with additional status information
239
+ def raise_inspect msg, tokens, state = 'No state given!', ambit = 30
240
+ raise ScanError, <<-EOE % [
241
+
242
+
243
+ ***ERROR in %s: %s (after %d tokens)
244
+
245
+ tokens:
246
+ %s
247
+
248
+ current line: %d column: %d pos: %d
249
+ matched: %p state: %p
250
+ bol? = %p, eos? = %p
251
+
252
+ surrounding code:
253
+ %p ~~ %p
254
+
255
+
256
+ ***ERROR***
257
+
258
+ EOE
259
+ File.basename(caller[0]),
260
+ msg,
261
+ tokens.size,
262
+ tokens.last(10).map { |t| t.inspect }.join("\n"),
263
+ line, column, pos,
264
+ matched, state, bol?, eos?,
265
+ string[pos - ambit, ambit],
266
+ string[pos, ambit],
267
+ ]
268
+ end
269
+
270
+ end
271
+
272
+ end
273
+ end
274
+
275
+ class String
276
+ # I love this hack. It seems to silence all dos/unix/mac newline problems.
277
+ def to_unix
278
+ if index ?\r
279
+ gsub(/\r\n?/, "\n")
280
+ else
281
+ self
282
+ end
283
+ end
284
+ end
@@ -0,0 +1,23 @@
1
+ module CodeRay
2
+ module Scanners
3
+
4
+ map \
5
+ :h => :c,
6
+ :cplusplus => :cpp,
7
+ :'c++' => :cpp,
8
+ :ecma => :java_script,
9
+ :ecmascript => :java_script,
10
+ :ecma_script => :java_script,
11
+ :irb => :ruby,
12
+ :javascript => :java_script,
13
+ :js => :java_script,
14
+ :nitro => :nitro_xhtml,
15
+ :pascal => :delphi,
16
+ :plain => :plaintext,
17
+ :xhtml => :html,
18
+ :yml => :yaml
19
+
20
+ default :plain
21
+
22
+ end
23
+ end
@@ -0,0 +1,203 @@
1
+ module CodeRay
2
+ module Scanners
3
+
4
+ class C < Scanner
5
+
6
+ include Streamable
7
+
8
+ register_for :c
9
+ file_extension 'c'
10
+
11
+ RESERVED_WORDS = [
12
+ 'asm', 'break', 'case', 'continue', 'default', 'do',
13
+ 'else', 'enum', 'for', 'goto', 'if', 'return',
14
+ 'sizeof', 'struct', 'switch', 'typedef', 'union', 'while',
15
+ 'restrict', # added in C99
16
+ ]
17
+
18
+ PREDEFINED_TYPES = [
19
+ 'int', 'long', 'short', 'char',
20
+ 'signed', 'unsigned', 'float', 'double',
21
+ 'bool', 'complex', # added in C99
22
+ ]
23
+
24
+ PREDEFINED_CONSTANTS = [
25
+ 'EOF', 'NULL',
26
+ 'true', 'false', # added in C99
27
+ ]
28
+ DIRECTIVES = [
29
+ 'auto', 'extern', 'register', 'static', 'void',
30
+ 'const', 'volatile', # added in C89
31
+ 'inline', # added in C99
32
+ ]
33
+
34
+ IDENT_KIND = WordList.new(:ident).
35
+ add(RESERVED_WORDS, :reserved).
36
+ add(PREDEFINED_TYPES, :pre_type).
37
+ add(DIRECTIVES, :directive).
38
+ add(PREDEFINED_CONSTANTS, :pre_constant)
39
+
40
+ ESCAPE = / [rbfntv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
41
+ UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x
42
+
43
+ def scan_tokens tokens, options
44
+
45
+ state = :initial
46
+ label_expected = true
47
+ case_expected = false
48
+ label_expected_before_preproc_line = nil
49
+ in_preproc_line = false
50
+
51
+ until eos?
52
+
53
+ kind = nil
54
+ match = nil
55
+
56
+ case state
57
+
58
+ when :initial
59
+
60
+ if match = scan(/ \s+ | \\\n /x)
61
+ if in_preproc_line && match != "\\\n" && match.index(?\n)
62
+ in_preproc_line = false
63
+ label_expected = label_expected_before_preproc_line
64
+ end
65
+ tokens << [match, :space]
66
+ next
67
+
68
+ elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
69
+ kind = :comment
70
+
71
+ elsif match = scan(/ \# \s* if \s* 0 /x)
72
+ match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos?
73
+ kind = :comment
74
+
75
+ elsif match = scan(/ [-+*=<>?:;,!&^|()\[\]{}~%]+ | \/=? | \.(?!\d) /x)
76
+ label_expected = match =~ /[;\{\}]/
77
+ if case_expected
78
+ label_expected = true if match == ':'
79
+ case_expected = false
80
+ end
81
+ kind = :operator
82
+
83
+ elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
84
+ kind = IDENT_KIND[match]
85
+ if kind == :ident && label_expected && !in_preproc_line && scan(/:(?!:)/)
86
+ kind = :label
87
+ match << matched
88
+ else
89
+ label_expected = false
90
+ if kind == :reserved
91
+ case match
92
+ when 'case', 'default'
93
+ case_expected = true
94
+ end
95
+ end
96
+ end
97
+
98
+ elsif scan(/\$/)
99
+ kind = :ident
100
+
101
+ elsif match = scan(/L?"/)
102
+ tokens << [:open, :string]
103
+ if match[0] == ?L
104
+ tokens << ['L', :modifier]
105
+ match = '"'
106
+ end
107
+ state = :string
108
+ kind = :delimiter
109
+
110
+ elsif scan(/#[ \t]*(\w*)/)
111
+ kind = :preprocessor
112
+ in_preproc_line = true
113
+ label_expected_before_preproc_line = label_expected
114
+ state = :include_expected if self[1] == 'include'
115
+
116
+ elsif scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox)
117
+ label_expected = false
118
+ kind = :char
119
+
120
+ elsif scan(/0[xX][0-9A-Fa-f]+/)
121
+ label_expected = false
122
+ kind = :hex
123
+
124
+ elsif scan(/(?:0[0-7]+)(?![89.eEfF])/)
125
+ label_expected = false
126
+ kind = :oct
127
+
128
+ elsif scan(/(?:\d+)(?![.eEfF])L?L?/)
129
+ label_expected = false
130
+ kind = :integer
131
+
132
+ elsif scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
133
+ label_expected = false
134
+ kind = :float
135
+
136
+ else
137
+ getch
138
+ kind = :error
139
+
140
+ end
141
+
142
+ when :string
143
+ if scan(/[^\\\n"]+/)
144
+ kind = :content
145
+ elsif scan(/"/)
146
+ tokens << ['"', :delimiter]
147
+ tokens << [:close, :string]
148
+ state = :initial
149
+ label_expected = false
150
+ next
151
+ elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
152
+ kind = :char
153
+ elsif scan(/ \\ | $ /x)
154
+ tokens << [:close, :string]
155
+ kind = :error
156
+ state = :initial
157
+ label_expected = false
158
+ else
159
+ raise_inspect "else case \" reached; %p not handled." % peek(1), tokens
160
+ end
161
+
162
+ when :include_expected
163
+ if scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/)
164
+ kind = :include
165
+ state = :initial
166
+
167
+ elsif match = scan(/\s+/)
168
+ kind = :space
169
+ state = :initial if match.index ?\n
170
+
171
+ else
172
+ state = :initial
173
+ next
174
+
175
+ end
176
+
177
+ else
178
+ raise_inspect 'Unknown state', tokens
179
+
180
+ end
181
+
182
+ match ||= matched
183
+ if $DEBUG and not kind
184
+ raise_inspect 'Error token %p in line %d' %
185
+ [[match, kind], line], tokens
186
+ end
187
+ raise_inspect 'Empty token', tokens unless match
188
+
189
+ tokens << [match, kind]
190
+
191
+ end
192
+
193
+ if state == :string
194
+ tokens << [:close, :string]
195
+ end
196
+
197
+ tokens
198
+ end
199
+
200
+ end
201
+
202
+ end
203
+ end