coderay-beta 0.9.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. data/FOLDERS +53 -0
  2. data/LICENSE +504 -0
  3. data/bin/coderay +82 -0
  4. data/bin/coderay_stylesheet +4 -0
  5. data/lib/README +129 -0
  6. data/lib/coderay.rb +320 -0
  7. data/lib/coderay/duo.rb +85 -0
  8. data/lib/coderay/encoder.rb +213 -0
  9. data/lib/coderay/encoders/_map.rb +11 -0
  10. data/lib/coderay/encoders/comment_filter.rb +43 -0
  11. data/lib/coderay/encoders/count.rb +21 -0
  12. data/lib/coderay/encoders/debug.rb +49 -0
  13. data/lib/coderay/encoders/div.rb +19 -0
  14. data/lib/coderay/encoders/filter.rb +75 -0
  15. data/lib/coderay/encoders/html.rb +305 -0
  16. data/lib/coderay/encoders/html/css.rb +70 -0
  17. data/lib/coderay/encoders/html/numerization.rb +133 -0
  18. data/lib/coderay/encoders/html/output.rb +206 -0
  19. data/lib/coderay/encoders/json.rb +69 -0
  20. data/lib/coderay/encoders/lines_of_code.rb +90 -0
  21. data/lib/coderay/encoders/null.rb +26 -0
  22. data/lib/coderay/encoders/page.rb +20 -0
  23. data/lib/coderay/encoders/span.rb +19 -0
  24. data/lib/coderay/encoders/statistic.rb +77 -0
  25. data/lib/coderay/encoders/term.rb +137 -0
  26. data/lib/coderay/encoders/text.rb +32 -0
  27. data/lib/coderay/encoders/token_class_filter.rb +84 -0
  28. data/lib/coderay/encoders/xml.rb +71 -0
  29. data/lib/coderay/encoders/yaml.rb +22 -0
  30. data/lib/coderay/for_redcloth.rb +85 -0
  31. data/lib/coderay/helpers/file_type.rb +240 -0
  32. data/lib/coderay/helpers/gzip_simple.rb +123 -0
  33. data/lib/coderay/helpers/plugin.rb +349 -0
  34. data/lib/coderay/helpers/word_list.rb +138 -0
  35. data/lib/coderay/scanner.rb +284 -0
  36. data/lib/coderay/scanners/_map.rb +23 -0
  37. data/lib/coderay/scanners/c.rb +203 -0
  38. data/lib/coderay/scanners/cpp.rb +228 -0
  39. data/lib/coderay/scanners/css.rb +210 -0
  40. data/lib/coderay/scanners/debug.rb +62 -0
  41. data/lib/coderay/scanners/delphi.rb +150 -0
  42. data/lib/coderay/scanners/diff.rb +105 -0
  43. data/lib/coderay/scanners/groovy.rb +263 -0
  44. data/lib/coderay/scanners/html.rb +182 -0
  45. data/lib/coderay/scanners/java.rb +176 -0
  46. data/lib/coderay/scanners/java/builtin_types.rb +419 -0
  47. data/lib/coderay/scanners/java_script.rb +224 -0
  48. data/lib/coderay/scanners/json.rb +112 -0
  49. data/lib/coderay/scanners/nitro_xhtml.rb +136 -0
  50. data/lib/coderay/scanners/php.rb +526 -0
  51. data/lib/coderay/scanners/plaintext.rb +21 -0
  52. data/lib/coderay/scanners/python.rb +285 -0
  53. data/lib/coderay/scanners/rhtml.rb +74 -0
  54. data/lib/coderay/scanners/ruby.rb +404 -0
  55. data/lib/coderay/scanners/ruby/patterns.rb +238 -0
  56. data/lib/coderay/scanners/scheme.rb +145 -0
  57. data/lib/coderay/scanners/sql.rb +162 -0
  58. data/lib/coderay/scanners/xml.rb +17 -0
  59. data/lib/coderay/scanners/yaml.rb +144 -0
  60. data/lib/coderay/style.rb +20 -0
  61. data/lib/coderay/styles/_map.rb +7 -0
  62. data/lib/coderay/styles/cycnus.rb +151 -0
  63. data/lib/coderay/styles/murphy.rb +132 -0
  64. data/lib/coderay/token_classes.rb +86 -0
  65. data/lib/coderay/tokens.rb +391 -0
  66. data/lib/term/ansicolor.rb +220 -0
  67. metadata +123 -0
@@ -0,0 +1,284 @@
1
+ module CodeRay
2
+
3
+ require 'coderay/helpers/plugin'
4
+
5
+ # = Scanners
6
+ #
7
+ # This module holds the Scanner class and its subclasses.
8
+ # For example, the Ruby scanner is named CodeRay::Scanners::Ruby
9
+ # can be found in coderay/scanners/ruby.
10
+ #
11
+ # Scanner also provides methods and constants for the register
12
+ # mechanism and the [] method that returns the Scanner class
13
+ # belonging to the given lang.
14
+ #
15
+ # See PluginHost.
16
+ module Scanners
17
+ extend PluginHost
18
+ plugin_path File.dirname(__FILE__), 'scanners'
19
+
20
+ require 'strscan'
21
+
22
+ # = Scanner
23
+ #
24
+ # The base class for all Scanners.
25
+ #
26
+ # It is a subclass of Ruby's great +StringScanner+, which
27
+ # makes it easy to access the scanning methods inside.
28
+ #
29
+ # It is also +Enumerable+, so you can use it like an Array of
30
+ # Tokens:
31
+ #
32
+ # require 'coderay'
33
+ #
34
+ # c_scanner = CodeRay::Scanners[:c].new "if (*p == '{') nest++;"
35
+ #
36
+ # for text, kind in c_scanner
37
+ # puts text if kind == :operator
38
+ # end
39
+ #
40
+ # # prints: (*==)++;
41
+ #
42
+ # OK, this is a very simple example :)
43
+ # You can also use +map+, +any?+, +find+ and even +sort_by+,
44
+ # if you want.
45
+ class Scanner < StringScanner
46
+
47
+ extend Plugin
48
+ plugin_host Scanners
49
+
50
+ # Raised if a Scanner fails while scanning
51
+ ScanError = Class.new(Exception)
52
+
53
+ require 'coderay/helpers/word_list'
54
+
55
+ # The default options for all scanner classes.
56
+ #
57
+ # Define @default_options for subclasses.
58
+ DEFAULT_OPTIONS = { :stream => false }
59
+
60
+ KINDS_NOT_LOC = [:comment, :doctype]
61
+
62
+ class << self
63
+
64
+ # Returns if the Scanner can be used in streaming mode.
65
+ def streamable?
66
+ is_a? Streamable
67
+ end
68
+
69
+ def normify code
70
+ code = code.to_s
71
+ if code.respond_to? :force_encoding
72
+ begin
73
+ code.force_encoding 'utf-8'
74
+ code[/\z/] # raises an ArgumentError when code contains a non-UTF-8 char
75
+ rescue ArgumentError
76
+ code.force_encoding 'binary'
77
+ end
78
+ end
79
+ code.to_unix
80
+ end
81
+
82
+ def file_extension extension = nil
83
+ if extension
84
+ @file_extension = extension.to_s
85
+ else
86
+ @file_extension ||= plugin_id.to_s
87
+ end
88
+ end
89
+
90
+ end
91
+
92
+ =begin
93
+ ## Excluded for speed reasons; protected seems to make methods slow.
94
+
95
+ # Save the StringScanner methods from being called.
96
+ # This would not be useful for highlighting.
97
+ strscan_public_methods =
98
+ StringScanner.instance_methods -
99
+ StringScanner.ancestors[1].instance_methods
100
+ protected(*strscan_public_methods)
101
+ =end
102
+
103
+ # Create a new Scanner.
104
+ #
105
+ # * +code+ is the input String and is handled by the superclass
106
+ # StringScanner.
107
+ # * +options+ is a Hash with Symbols as keys.
108
+ # It is merged with the default options of the class (you can
109
+ # overwrite default options here.)
110
+ # * +block+ is the callback for streamed highlighting.
111
+ #
112
+ # If you set :stream to +true+ in the options, the Scanner uses a
113
+ # TokenStream with the +block+ as callback to handle the tokens.
114
+ #
115
+ # Else, a Tokens object is used.
116
+ def initialize code='', options = {}, &block
117
+ raise "I am only the basic Scanner class. I can't scan "\
118
+ "anything. :( Use my subclasses." if self.class == Scanner
119
+
120
+ @options = self.class::DEFAULT_OPTIONS.merge options
121
+
122
+ super Scanner.normify(code)
123
+
124
+ @tokens = options[:tokens]
125
+ if @options[:stream]
126
+ warn "warning in CodeRay::Scanner.new: :stream is set, "\
127
+ "but no block was given" unless block_given?
128
+ raise NotStreamableError, self unless kind_of? Streamable
129
+ @tokens ||= TokenStream.new(&block)
130
+ else
131
+ warn "warning in CodeRay::Scanner.new: Block given, "\
132
+ "but :stream is #{@options[:stream]}" if block_given?
133
+ @tokens ||= Tokens.new
134
+ end
135
+ @tokens.scanner = self
136
+
137
+ setup
138
+ end
139
+
140
+ def reset
141
+ super
142
+ reset_instance
143
+ end
144
+
145
+ def string= code
146
+ code = Scanner.normify(code)
147
+ super code
148
+ reset_instance
149
+ end
150
+
151
+ # More mnemonic accessor name for the input string.
152
+ alias code string
153
+ alias code= string=
154
+
155
+ # Scans the code and returns all tokens in a Tokens object.
156
+ def tokenize new_string=nil, options = {}
157
+ options = @options.merge(options)
158
+ self.string = new_string if new_string
159
+ @cached_tokens =
160
+ if @options[:stream] # :stream must have been set already
161
+ reset unless new_string
162
+ scan_tokens @tokens, options
163
+ @tokens
164
+ else
165
+ scan_tokens @tokens, options
166
+ end
167
+ end
168
+
169
+ def tokens
170
+ @cached_tokens ||= tokenize
171
+ end
172
+
173
+ # Whether the scanner is in streaming mode.
174
+ def streaming?
175
+ !!@options[:stream]
176
+ end
177
+
178
+ # Traverses the tokens.
179
+ def each &block
180
+ raise ArgumentError,
181
+ 'Cannot traverse TokenStream.' if @options[:stream]
182
+ tokens.each(&block)
183
+ end
184
+ include Enumerable
185
+
186
+ # The current line position of the scanner.
187
+ #
188
+ # Beware, this is implemented inefficiently. It should be used
189
+ # for debugging only.
190
+ def line
191
+ string[0..pos].count("\n") + 1
192
+ end
193
+
194
+ def column pos = self.pos
195
+ return 0 if pos <= 0
196
+ string = string()
197
+ if string.respond_to?(:bytesize) && (defined?(@bin_string) || string.bytesize != string.size)
198
+ @bin_string ||= string.dup.force_encoding(:binary)
199
+ string = @bin_string
200
+ end
201
+ pos - (string.rindex(?\n, pos) || 0)
202
+ end
203
+
204
+ def marshal_dump
205
+ @options
206
+ end
207
+
208
+ def marshal_load options
209
+ @options = options
210
+ end
211
+
212
+ protected
213
+
214
+ # Can be implemented by subclasses to do some initialization
215
+ # that has to be done once per instance.
216
+ #
217
+ # Use reset for initialization that has to be done once per
218
+ # scan.
219
+ def setup
220
+ end
221
+
222
+ # This is the central method, and commonly the only one a
223
+ # subclass implements.
224
+ #
225
+ # Subclasses must implement this method; it must return +tokens+
226
+ # and must only use Tokens#<< for storing scanned tokens!
227
+ def scan_tokens tokens, options
228
+ raise NotImplementedError,
229
+ "#{self.class}#scan_tokens not implemented."
230
+ end
231
+
232
+ def reset_instance
233
+ @tokens.clear unless @options[:keep_tokens]
234
+ @cached_tokens = nil
235
+ @bin_string = nil if defined? @bin_string
236
+ end
237
+
238
+ # Scanner error with additional status information
239
+ def raise_inspect msg, tokens, state = 'No state given!', ambit = 30
240
+ raise ScanError, <<-EOE % [
241
+
242
+
243
+ ***ERROR in %s: %s (after %d tokens)
244
+
245
+ tokens:
246
+ %s
247
+
248
+ current line: %d column: %d pos: %d
249
+ matched: %p state: %p
250
+ bol? = %p, eos? = %p
251
+
252
+ surrounding code:
253
+ %p ~~ %p
254
+
255
+
256
+ ***ERROR***
257
+
258
+ EOE
259
+ File.basename(caller[0]),
260
+ msg,
261
+ tokens.size,
262
+ tokens.last(10).map { |t| t.inspect }.join("\n"),
263
+ line, column, pos,
264
+ matched, state, bol?, eos?,
265
+ string[pos - ambit, ambit],
266
+ string[pos, ambit],
267
+ ]
268
+ end
269
+
270
+ end
271
+
272
+ end
273
+ end
274
+
275
+ class String
276
+ # I love this hack. It seems to silence all dos/unix/mac newline problems.
277
+ def to_unix
278
+ if index ?\r
279
+ gsub(/\r\n?/, "\n")
280
+ else
281
+ self
282
+ end
283
+ end
284
+ end
@@ -0,0 +1,23 @@
1
+ module CodeRay
2
+ module Scanners
3
+
4
+ map \
5
+ :h => :c,
6
+ :cplusplus => :cpp,
7
+ :'c++' => :cpp,
8
+ :ecma => :java_script,
9
+ :ecmascript => :java_script,
10
+ :ecma_script => :java_script,
11
+ :irb => :ruby,
12
+ :javascript => :java_script,
13
+ :js => :java_script,
14
+ :nitro => :nitro_xhtml,
15
+ :pascal => :delphi,
16
+ :plain => :plaintext,
17
+ :xhtml => :html,
18
+ :yml => :yaml
19
+
20
+ default :plain
21
+
22
+ end
23
+ end
@@ -0,0 +1,203 @@
1
+ module CodeRay
2
+ module Scanners
3
+
4
+ class C < Scanner
5
+
6
+ include Streamable
7
+
8
+ register_for :c
9
+ file_extension 'c'
10
+
11
+ RESERVED_WORDS = [
12
+ 'asm', 'break', 'case', 'continue', 'default', 'do',
13
+ 'else', 'enum', 'for', 'goto', 'if', 'return',
14
+ 'sizeof', 'struct', 'switch', 'typedef', 'union', 'while',
15
+ 'restrict', # added in C99
16
+ ]
17
+
18
+ PREDEFINED_TYPES = [
19
+ 'int', 'long', 'short', 'char',
20
+ 'signed', 'unsigned', 'float', 'double',
21
+ 'bool', 'complex', # added in C99
22
+ ]
23
+
24
+ PREDEFINED_CONSTANTS = [
25
+ 'EOF', 'NULL',
26
+ 'true', 'false', # added in C99
27
+ ]
28
+ DIRECTIVES = [
29
+ 'auto', 'extern', 'register', 'static', 'void',
30
+ 'const', 'volatile', # added in C89
31
+ 'inline', # added in C99
32
+ ]
33
+
34
+ IDENT_KIND = WordList.new(:ident).
35
+ add(RESERVED_WORDS, :reserved).
36
+ add(PREDEFINED_TYPES, :pre_type).
37
+ add(DIRECTIVES, :directive).
38
+ add(PREDEFINED_CONSTANTS, :pre_constant)
39
+
40
+ ESCAPE = / [rbfntv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
41
+ UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x
42
+
43
+ def scan_tokens tokens, options
44
+
45
+ state = :initial
46
+ label_expected = true
47
+ case_expected = false
48
+ label_expected_before_preproc_line = nil
49
+ in_preproc_line = false
50
+
51
+ until eos?
52
+
53
+ kind = nil
54
+ match = nil
55
+
56
+ case state
57
+
58
+ when :initial
59
+
60
+ if match = scan(/ \s+ | \\\n /x)
61
+ if in_preproc_line && match != "\\\n" && match.index(?\n)
62
+ in_preproc_line = false
63
+ label_expected = label_expected_before_preproc_line
64
+ end
65
+ tokens << [match, :space]
66
+ next
67
+
68
+ elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
69
+ kind = :comment
70
+
71
+ elsif match = scan(/ \# \s* if \s* 0 /x)
72
+ match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos?
73
+ kind = :comment
74
+
75
+ elsif match = scan(/ [-+*=<>?:;,!&^|()\[\]{}~%]+ | \/=? | \.(?!\d) /x)
76
+ label_expected = match =~ /[;\{\}]/
77
+ if case_expected
78
+ label_expected = true if match == ':'
79
+ case_expected = false
80
+ end
81
+ kind = :operator
82
+
83
+ elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
84
+ kind = IDENT_KIND[match]
85
+ if kind == :ident && label_expected && !in_preproc_line && scan(/:(?!:)/)
86
+ kind = :label
87
+ match << matched
88
+ else
89
+ label_expected = false
90
+ if kind == :reserved
91
+ case match
92
+ when 'case', 'default'
93
+ case_expected = true
94
+ end
95
+ end
96
+ end
97
+
98
+ elsif scan(/\$/)
99
+ kind = :ident
100
+
101
+ elsif match = scan(/L?"/)
102
+ tokens << [:open, :string]
103
+ if match[0] == ?L
104
+ tokens << ['L', :modifier]
105
+ match = '"'
106
+ end
107
+ state = :string
108
+ kind = :delimiter
109
+
110
+ elsif scan(/#[ \t]*(\w*)/)
111
+ kind = :preprocessor
112
+ in_preproc_line = true
113
+ label_expected_before_preproc_line = label_expected
114
+ state = :include_expected if self[1] == 'include'
115
+
116
+ elsif scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox)
117
+ label_expected = false
118
+ kind = :char
119
+
120
+ elsif scan(/0[xX][0-9A-Fa-f]+/)
121
+ label_expected = false
122
+ kind = :hex
123
+
124
+ elsif scan(/(?:0[0-7]+)(?![89.eEfF])/)
125
+ label_expected = false
126
+ kind = :oct
127
+
128
+ elsif scan(/(?:\d+)(?![.eEfF])L?L?/)
129
+ label_expected = false
130
+ kind = :integer
131
+
132
+ elsif scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
133
+ label_expected = false
134
+ kind = :float
135
+
136
+ else
137
+ getch
138
+ kind = :error
139
+
140
+ end
141
+
142
+ when :string
143
+ if scan(/[^\\\n"]+/)
144
+ kind = :content
145
+ elsif scan(/"/)
146
+ tokens << ['"', :delimiter]
147
+ tokens << [:close, :string]
148
+ state = :initial
149
+ label_expected = false
150
+ next
151
+ elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
152
+ kind = :char
153
+ elsif scan(/ \\ | $ /x)
154
+ tokens << [:close, :string]
155
+ kind = :error
156
+ state = :initial
157
+ label_expected = false
158
+ else
159
+ raise_inspect "else case \" reached; %p not handled." % peek(1), tokens
160
+ end
161
+
162
+ when :include_expected
163
+ if scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/)
164
+ kind = :include
165
+ state = :initial
166
+
167
+ elsif match = scan(/\s+/)
168
+ kind = :space
169
+ state = :initial if match.index ?\n
170
+
171
+ else
172
+ state = :initial
173
+ next
174
+
175
+ end
176
+
177
+ else
178
+ raise_inspect 'Unknown state', tokens
179
+
180
+ end
181
+
182
+ match ||= matched
183
+ if $DEBUG and not kind
184
+ raise_inspect 'Error token %p in line %d' %
185
+ [[match, kind], line], tokens
186
+ end
187
+ raise_inspect 'Empty token', tokens unless match
188
+
189
+ tokens << [match, kind]
190
+
191
+ end
192
+
193
+ if state == :string
194
+ tokens << [:close, :string]
195
+ end
196
+
197
+ tokens
198
+ end
199
+
200
+ end
201
+
202
+ end
203
+ end