coderay 0.4.3.48

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. data/LICENSE +340 -0
  2. data/README +103 -0
  3. data/demo/demo_count.rb +10 -0
  4. data/demo/demo_css.rb +4 -0
  5. data/demo/demo_div.rb +19 -0
  6. data/demo/demo_dump.rb +15 -0
  7. data/demo/demo_encoder.rb +39 -0
  8. data/demo/demo_global_vars.rb +13 -0
  9. data/demo/demo_global_vars2.rb +28 -0
  10. data/demo/demo_html.rb +394 -0
  11. data/demo/demo_html2.rb +11 -0
  12. data/demo/demo_load_encoder.rb +17 -0
  13. data/demo/demo_more.rb +204 -0
  14. data/demo/demo_scanner.rb +36 -0
  15. data/demo/demo_server.rb +92 -0
  16. data/demo/demo_simple.rb +10 -0
  17. data/demo/demo_stream.rb +25 -0
  18. data/demo/demo_stream2.rb +8 -0
  19. data/demo/demo_tokens.rb +3 -0
  20. data/lib/coderay.rb +284 -0
  21. data/lib/coderay/encoder.rb +151 -0
  22. data/lib/coderay/encoders/count.rb +21 -0
  23. data/lib/coderay/encoders/div.rb +16 -0
  24. data/lib/coderay/encoders/helpers/html_css.rb +155 -0
  25. data/lib/coderay/encoders/helpers/html_helper.rb +68 -0
  26. data/lib/coderay/encoders/helpers/html_output.rb +237 -0
  27. data/lib/coderay/encoders/html.rb +169 -0
  28. data/lib/coderay/encoders/null.rb +20 -0
  29. data/lib/coderay/encoders/span.rb +16 -0
  30. data/lib/coderay/encoders/statistic.rb +74 -0
  31. data/lib/coderay/encoders/text.rb +33 -0
  32. data/lib/coderay/encoders/tokens.rb +44 -0
  33. data/lib/coderay/encoders/yaml.rb +19 -0
  34. data/lib/coderay/helpers/filetype.rb +145 -0
  35. data/lib/coderay/helpers/gzip_simple.rb +123 -0
  36. data/lib/coderay/helpers/plugin.rb +286 -0
  37. data/lib/coderay/helpers/scanner_helper.rb +63 -0
  38. data/lib/coderay/scanner.rb +197 -0
  39. data/lib/coderay/scanners/c.rb +147 -0
  40. data/lib/coderay/scanners/delphi.rb +123 -0
  41. data/lib/coderay/scanners/helpers/ruby_helper.rb +212 -0
  42. data/lib/coderay/scanners/plaintext.rb +13 -0
  43. data/lib/coderay/scanners/ruby.rb +337 -0
  44. data/lib/coderay/tokens.rb +324 -0
  45. metadata +89 -0
@@ -0,0 +1,212 @@
1
+ module CodeRay module Scanners
2
+
3
+ class Ruby
4
+
5
+ RESERVED_WORDS = %w[
6
+ and def end in or unless begin
7
+ defined? ensure module redo super until
8
+ BEGIN break do next rescue then
9
+ when END case else for retry
10
+ while alias class elsif if not return
11
+ undef yield
12
+ ]
13
+
14
+ DEF_KEYWORDS = %w[ def ]
15
+ MODULE_KEYWORDS = %w[class module]
16
+ DEF_NEW_STATE = WordList.new(:initial).
17
+ add(DEF_KEYWORDS, :def_expected).
18
+ add(MODULE_KEYWORDS, :module_expected)
19
+
20
+ IDENTS_ALLOWING_REGEXP = %w[
21
+ and or not while until unless if then elsif when sub sub! gsub gsub! scan slice slice! split
22
+ ]
23
+ REGEXP_ALLOWED = WordList.new(false).
24
+ add(IDENTS_ALLOWING_REGEXP, :set)
25
+
26
+ PREDEFINED_CONSTANTS = %w[
27
+ nil true false self
28
+ DATA ARGV ARGF __FILE__ __LINE__
29
+ ]
30
+
31
+ IDENT_KIND = WordList.new(:ident).
32
+ add(RESERVED_WORDS, :reserved).
33
+ add(PREDEFINED_CONSTANTS, :pre_constant)
34
+
35
+ # IDENT = /[a-zA-Z_][a-zA-Z_0-9]*/
36
+ IDENT = /[a-z_][\w_]*/i
37
+
38
+ METHOD_NAME = / #{IDENT} [?!]? /ox
39
+ METHOD_NAME_EX = /
40
+ #{IDENT}[?!=]? # common methods: split, foo=, empty?, gsub!
41
+ | \*\*? # multiplication and power
42
+ | [-+]@? # plus, minus
43
+ | [\/%&|^`~] # division, modulo or format strings, &and, |or, ^xor, `system`, tilde
44
+ | \[\]=? # array getter and setter
45
+ | << | >> # append or shift left, shift right
46
+ | <=?>? | >=? # comparison, rocket operator
47
+ | ===? # simple equality and case equality
48
+ /ox
49
+ INSTANCE_VARIABLE = / @ #{IDENT} /ox
50
+ CLASS_VARIABLE = / @@ #{IDENT} /ox
51
+ OBJECT_VARIABLE = / @@? #{IDENT} /ox
52
+ GLOBAL_VARIABLE = / \$ (?: #{IDENT} | [1-9] | 0[a-zA-Z_0-9]* | [~&+`'=\/,;_.<>!@$?*":\\] | -[a-zA-Z_0-9] ) /ox
53
+ PREFIX_VARIABLE = / #{GLOBAL_VARIABLE} |#{OBJECT_VARIABLE} /ox
54
+ VARIABLE = / @?@? #{IDENT} | #{GLOBAL_VARIABLE} /ox
55
+
56
+ QUOTE_TO_TYPE = {
57
+ '`' => :shell,
58
+ '/'=> :regexp,
59
+ }
60
+ QUOTE_TO_TYPE.default = :string
61
+
62
+ REGEXP_MODIFIERS = /[mixounse]*/
63
+ REGEXP_SYMBOLS = /
64
+ [|?*+?(){}\[\].^$]
65
+ /x
66
+
67
+ DECIMAL = /\d+(?:_\d+)*/ # doesn't recognize 09 as octal error
68
+ OCTAL = /0_?[0-7]+(?:_[0-7]+)*/
69
+ HEXADECIMAL = /0x[0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*/
70
+ BINARY = /0b[01]+(?:_[01]+)*/
71
+
72
+ EXPONENT = / [eE] [+-]? #{DECIMAL} /ox
73
+ FLOAT_OR_INT = / #{DECIMAL} (?: #{EXPONENT} | \. #{DECIMAL} #{EXPONENT}? )? /ox
74
+ FLOAT = / #{DECIMAL} (?: #{EXPONENT} | \. #{DECIMAL} #{EXPONENT}? ) /ox
75
+ NUMERIC = / #{OCTAL} | #{HEXADECIMAL} | #{BINARY} | #{FLOAT_OR_INT} /ox
76
+
77
+ SYMBOL = /
78
+ :
79
+ (?:
80
+ #{METHOD_NAME_EX}
81
+ | #{PREFIX_VARIABLE}
82
+ | ['"]
83
+ )
84
+ /ox
85
+
86
+ # TODO investigste \M, \c and \C escape sequences
87
+ # (?: M-\\C-|C-\\M-|M-\\c|c\\M-|c|C-|M-)? (?: \\ (?: [0-7]{3} | x[0-9A-Fa-f]{2} | . ) )
88
+ # assert_equal(225, ?\M-a)
89
+ # assert_equal(129, ?\M-\C-a)
90
+ ESCAPE = /
91
+ [abefnrstv]
92
+ | M-\\C-|C-\\M-|M-\\c|c\\M-|c|C-|M-
93
+ | [0-7]{1,3}
94
+ | x[0-9A-Fa-f]{1,2}
95
+ | .
96
+ /mx
97
+
98
+ CHARACTER = /
99
+ \?
100
+ (?:
101
+ [^\s\\]
102
+ | \\ #{ESCAPE}
103
+ )
104
+ /mx
105
+
106
+ # NOTE: This is not completel correct, but
107
+ # nobody needs heredoc delimiters ending with \n.
108
+ HEREDOC_OPEN = /
109
+ << (-)? # $1 = float
110
+ (?:
111
+ ( [A-Za-z_0-9]+ ) # $2 = delim
112
+ |
113
+ ( ["'`] ) # $3 = quote, type
114
+ ( [^\n]*? ) \3 # $4 = delim
115
+ )
116
+ /mx
117
+
118
+ RDOC = /
119
+ =begin (?!\S)
120
+ .*?
121
+ (?: \Z | ^=end (?!\S) [^\n]* )
122
+ /mx
123
+
124
+ DATA = /
125
+ __END__$
126
+ .*?
127
+ (?: \Z | (?=^\#CODE) )
128
+ /mx
129
+
130
+ RDOC_DATA_START = / ^=begin (?!\S) | ^__END__$ /x
131
+
132
+ FANCY_START = / % ( [qQwWxsr] | (?![\w\s=]) ) (.) /mox
133
+
134
+ FancyStringType = {
135
+ 'q' => [:string, false],
136
+ 'Q' => [:string, true],
137
+ 'r' => [:regexp, true],
138
+ 's' => [:symbol, false],
139
+ 'x' => [:shell, true],
140
+ 'w' => [:string, :word],
141
+ 'W' => [:string, :word],
142
+ }
143
+ FancyStringType['w'] = FancyStringType['q']
144
+ FancyStringType['W'] = FancyStringType[''] = FancyStringType['Q']
145
+
146
+ class StringState < Struct.new :type, :interpreted, :delim, :heredoc,
147
+ :paren, :paren_depth, :pattern
148
+
149
+ CLOSING_PAREN = Hash[ *%w[
150
+ ( )
151
+ [ ]
152
+ < >
153
+ { }
154
+ ] ]
155
+
156
+ CLOSING_PAREN.values.each { |o| o.freeze } # debug, if I try to change it with <<
157
+ OPENING_PAREN = CLOSING_PAREN.invert
158
+
159
+ STRING_PATTERN = Hash.new { |h, k|
160
+ delim, interpreted = *k
161
+ delim_pattern = Regexp.escape(delim.dup)
162
+ if starter = OPENING_PAREN[delim]
163
+ delim_pattern << Regexp.escape(starter)
164
+ end
165
+
166
+
167
+ special_escapes =
168
+ case interpreted
169
+ when :regexp_symbols
170
+ '| ' + REGEXP_SYMBOLS.source
171
+ when :words
172
+ '| \s'
173
+ end
174
+
175
+ h[k] =
176
+ if interpreted and not delim == '#'
177
+ / (?= [#{delim_pattern}\\] | \# [{$@] #{special_escapes} ) /mx
178
+ else
179
+ / (?= [#{delim_pattern}\\] #{special_escapes} ) /mx
180
+ end
181
+ }
182
+
183
+ HEREDOC_PATTERN = Hash.new { |h, k|
184
+ delim, interpreted, indented = *k
185
+ delim_pattern = Regexp.escape(delim.dup)
186
+ delim_pattern = / \n #{ '(?>[\ \t]*)' if indented } #{ Regexp.new delim_pattern } $ /x
187
+ h[k] =
188
+ if interpreted
189
+ / (?= #{delim_pattern}() | \\ | \# [{$@] ) /mx
190
+ else
191
+ / (?= #{delim_pattern}() | \\ ) /mx
192
+ end
193
+ }
194
+
195
+ def initialize kind, interpreted, delim, heredoc = false
196
+ if paren = CLOSING_PAREN[delim]
197
+ delim, paren = paren, delim
198
+ paren_depth = 1
199
+ end
200
+ if heredoc
201
+ pattern = HEREDOC_PATTERN[ [delim, interpreted, heredoc == :indented] ]
202
+ delim = nil
203
+ else
204
+ pattern = STRING_PATTERN[ [delim, interpreted] ]
205
+ end
206
+ super kind, interpreted, delim, heredoc, paren, paren_depth, pattern
207
+ end
208
+ end unless defined? StringState
209
+
210
+ end
211
+
212
+ end end
@@ -0,0 +1,13 @@
1
+ module CodeRay module Scanners
2
+
3
+ class Plaintext < Scanner
4
+
5
+ register_for :plaintext, :plain
6
+
7
+ def scan_tokens tokens, options
8
+ tokens << [scan_until(/\z/), :plain]
9
+ end
10
+
11
+ end
12
+
13
+ end end
@@ -0,0 +1,337 @@
1
+ module CodeRay module Scanners
2
+
3
+ # This scanner is really complex, since Ruby _is_ a complex language!
4
+ #
5
+ # It tries to highlight 100% of all common code,
6
+ # and 90% of strange codes.
7
+ #
8
+ # It is optimized for HTML highlighting, and is not very useful for
9
+ # parsing or pretty printing.
10
+ #
11
+ # For now, I think it's better than the scanners in VIM or Syntax, or
12
+ # any highlighter I was able to find, except Caleb's RubyLexer.
13
+ #
14
+ # I hope it's also better than the rdoc/irb lexer.
15
+ class Ruby < Scanner
16
+
17
+ include Streamable
18
+
19
+ register_for :ruby
20
+
21
+ require 'coderay/scanners/helpers/ruby_helper'
22
+
23
+ DEFAULT_OPTIONS = {
24
+ :parse_regexps => true,
25
+ }
26
+
27
+ private
28
+ def scan_tokens tokens, options
29
+ parse_regexp = false # options[:parse_regexps]
30
+ first_bake = saved_tokens = nil
31
+ last_token_dot = false
32
+ fancy_allowed = regexp_allowed = true
33
+ heredocs = nil
34
+ last_state = nil
35
+ state = :initial
36
+ depth = nil
37
+ states = []
38
+
39
+ until eos?
40
+ type = :error
41
+ match = nil
42
+ kind = nil
43
+
44
+ if state.instance_of? StringState
45
+ # {{{
46
+
47
+ match = scan_until(state.pattern) || scan_until(/\z/)
48
+ tokens << [match, :content] unless match.empty?
49
+ break if eos?
50
+
51
+ if state.heredoc and self[1]
52
+ match = getch + scan_until(/$/)
53
+ tokens << [match, :delimiter]
54
+ tokens << [:close, state.type]
55
+ state = :initial
56
+ next
57
+ end
58
+
59
+ case match = getch
60
+
61
+ when state.delim
62
+ if state.paren
63
+ state.paren_depth -= 1
64
+ if state.paren_depth > 0
65
+ tokens << [match, :nesting_delimiter]
66
+ next
67
+ end
68
+ end
69
+ tokens << [match, :delimiter]
70
+ if state.type == :regexp and not eos?
71
+ modifiers = scan(/#{REGEXP_MODIFIERS}/ox)
72
+ tokens << [modifiers, :modifier] unless modifiers.empty?
73
+ if parse_regexp
74
+ extended = modifiers.index ?x
75
+ tokens = saved_tokens
76
+ regexp = tokens
77
+ for text, type in regexp
78
+ if text.is_a? String
79
+ case type
80
+ when :content
81
+ text.scan(/([^#]+)|(#.*)/) do |plain, comment|
82
+ if plain
83
+ tokens << [plain, :content]
84
+ else
85
+ tokens << [comment, :comment]
86
+ end
87
+ end
88
+ when :character
89
+ if text[/\\(?:[swdSWDAzZbB]|\d+)/]
90
+ tokens << [text, :modifier]
91
+ else
92
+ tokens << [text, type]
93
+ end
94
+ else
95
+ tokens << [text, type]
96
+ end
97
+ else
98
+ tokens << [text, type]
99
+ end
100
+ end
101
+ first_bake = saved_tokens = nil
102
+ end
103
+ end
104
+ tokens << [:close, state.type]
105
+ fancy_allowed = regexp_allowed = false
106
+ state = :initial
107
+
108
+ when '\\'
109
+ if state.interpreted
110
+ if esc = scan(/ #{ESCAPE} /ox)
111
+ tokens << [match + esc, :char]
112
+ else
113
+ tokens << [match, :error]
114
+ end
115
+ else
116
+ case m = getch
117
+ when state.delim, '\\'
118
+ tokens << [match + m, :char]
119
+ else
120
+ tokens << [match + m, :content]
121
+ end
122
+ end
123
+
124
+ when '#'
125
+ case peek(1)[0]
126
+ when ?{
127
+ states.push [state, depth, heredocs]
128
+ fancy_allowed = regexp_allowed = true
129
+ state = :initial
130
+ depth = 1
131
+ tokens << [match + getch, :escape]
132
+ when ?$, ?@
133
+ tokens << [match, :escape]
134
+ last_state = state # scan one token as normal code, then return here
135
+ state = :initial
136
+ else
137
+ raise "else-case # reached; #%p not handled" % peek(1), tokens
138
+ end
139
+
140
+ when state.paren
141
+ state.paren_depth += 1
142
+ tokens << [match, :nesting_delimiter]
143
+
144
+ when REGEXP_SYMBOLS
145
+ tokens << [match, :function]
146
+
147
+ else
148
+ raise "else-case \" reached; %p not handled, state = %p" % [match, state], tokens
149
+
150
+ end
151
+ next
152
+ # }}}
153
+ else
154
+ # {{{
155
+ if match = scan(/ [ \t\f]+ | \\? \n | \# .* /x) or
156
+ ( bol? and match = scan(/ #{DATA} | #{RDOC} /ox) )
157
+ fancy_allowed = true
158
+ case m = match[0]
159
+ when ?\s, ?\t, ?\f
160
+ match << scan(/\s*/) unless eos? or heredocs
161
+ type = :space
162
+ when ?\n, ?\\
163
+ type = :space
164
+ regexp_allowed = m == ?\n
165
+ if heredocs
166
+ unscan # heredoc scanning needs \n at start
167
+ state = heredocs.shift
168
+ tokens << [:open, state.type]
169
+ heredocs = nil if heredocs.empty?
170
+ next
171
+ else
172
+ match << scan(/\s*/) unless eos?
173
+ end
174
+ when ?#, ?=, ?_
175
+ type = :comment
176
+ regexp_allowed = true
177
+ else
178
+ raise "else-case _ reached, because case %p was not handled" % [matched[0].chr], tokens
179
+ end
180
+ tokens << [match, type]
181
+ next
182
+
183
+ elsif state == :initial
184
+ if match = scan(/ \.\.?\.? | [-+*=>;,|&!\(\)\[\]~^]+ | [\{\}] | :: /x)
185
+ if match !~ / [.\)\]\}] \z/x or match =~ /\.\.\.?/
186
+ regexp_allowed = fancy_allowed = :set
187
+ end
188
+ last_token_dot = :set if match == '.' or match == '::'
189
+ type = :operator
190
+ unless states.empty?
191
+ case match
192
+ when '{'
193
+ depth += 1
194
+ when '}'
195
+ depth -= 1
196
+ if depth == 0
197
+ state, depth, heredocs = states.pop
198
+ type = :escape
199
+ end
200
+ end
201
+ end
202
+
203
+ elsif match = scan(/#{METHOD_NAME}/o)
204
+ if last_token_dot
205
+ type = if match[/^[A-Z]/] then :constant else :ident end
206
+ else
207
+ type = IDENT_KIND[match]
208
+ if type == :ident and match[/^[A-Z]/]
209
+ type = :constant
210
+ elsif type == :reserved
211
+ state = DEF_NEW_STATE[match]
212
+ end
213
+ end
214
+ fancy_allowed = regexp_allowed = REGEXP_ALLOWED[match]
215
+
216
+ elsif match = scan(/ ['"] /mx)
217
+ tokens << [:open, :string]
218
+ type = :delimiter
219
+ state = StringState.new :string, match != '\'', match # important for streaming
220
+
221
+ elsif match = scan(/#{INSTANCE_VARIABLE}/o)
222
+ type = :instance_variable
223
+
224
+ elsif regexp_allowed and match = scan(/ \/ /mx)
225
+ tokens << [:open, :regexp]
226
+ type = :delimiter
227
+ interpreted = true
228
+ state = StringState.new :regexp, interpreted, match
229
+ if parse_regexp
230
+ tokens = []
231
+ saved_tokens = tokens
232
+ end
233
+
234
+ elsif match = scan(/#{NUMERIC}/o)
235
+ type = if match[/#{FLOAT}/o] then :float else :integer end
236
+
237
+ elsif fancy_allowed and match = scan(/#{SYMBOL}/o)
238
+ case match[1]
239
+ when ?', ?"
240
+ tokens << [:open, :symbol]
241
+ state = StringState.new :symbol, match[1] == ?", match[1,1]
242
+ end
243
+ type = :symbol
244
+
245
+ elsif fancy_allowed and match = scan(/#{HEREDOC_OPEN}/o)
246
+ indented = self[1] == '-'
247
+ quote = self[3]
248
+ delim = self[quote ? 4 : 2]
249
+ type = QUOTE_TO_TYPE[quote]
250
+ tokens << [:open, type]
251
+ tokens << [match, :delimiter]
252
+ match = :close
253
+ heredoc = StringState.new type, quote != '\'', delim, (indented ? :indented : :linestart )
254
+ heredocs ||= [] # create heredocs if empty
255
+ heredocs << heredoc
256
+
257
+ elsif fancy_allowed and match = scan(/#{FANCY_START}/o)
258
+ type, interpreted = *FancyStringType.fetch(self[1]) do
259
+ raise 'Unknown fancy string: %%%p' % k, tokens
260
+ end
261
+ tokens << [:open, type]
262
+ state = StringState.new type, interpreted, self[2]
263
+ type = :delimiter
264
+
265
+ elsif fancy_allowed and match = scan(/#{CHARACTER}/o)
266
+ type = :integer
267
+
268
+ elsif match = scan(/ [\/%<?:] /x)
269
+ regexp_allowed = fancy_allowed = :set
270
+ type = :operator
271
+
272
+ elsif match = scan(/`/)
273
+ if last_token_dot
274
+ type = :operator
275
+ else
276
+ tokens << [:open, :shell]
277
+ type = :delimiter
278
+ state = StringState.new :shell, true, match
279
+ end
280
+
281
+ elsif match = scan(/#{GLOBAL_VARIABLE}/o)
282
+ type = :global_variable
283
+
284
+ elsif match = scan(/#{CLASS_VARIABLE}/o)
285
+ type = :class_variable
286
+
287
+ else
288
+ match = getch
289
+
290
+ end
291
+
292
+ elsif state == :def_expected
293
+ if match = scan(/ (?: #{VARIABLE} (?: ::#{IDENT} )* \. )? #{METHOD_NAME_EX} /ox)
294
+ type = :method
295
+ else
296
+ match = getch
297
+ end
298
+ state = :initial
299
+
300
+ elsif state == :module_expected
301
+ if match = scan(/<</)
302
+ type = :operator
303
+ else
304
+ if match = scan(/ (?:#{IDENT}::)* #{IDENT} /ox)
305
+ type = :class
306
+ else
307
+ match = getch
308
+ end
309
+ end
310
+ state = :initial
311
+
312
+ end
313
+
314
+ regexp_allowed = regexp_allowed == :set
315
+ fancy_allowed = fancy_allowed == :set
316
+ last_token_dot = last_token_dot == :set
317
+
318
+ if $DEBUG
319
+ raise_inspect 'error token %p in line %d' % [tokens.last, line], tokens if not type or type == :error
320
+ end
321
+
322
+ tokens << [match, type]
323
+
324
+ if last_state
325
+ state = last_state
326
+ last_state = nil
327
+ end
328
+ # }}}
329
+ end
330
+ end
331
+
332
+ tokens
333
+ end
334
+ end
335
+
336
+ end end
337
+ # vim:fdm=marker