coderay 0.4.3.48

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. data/LICENSE +340 -0
  2. data/README +103 -0
  3. data/demo/demo_count.rb +10 -0
  4. data/demo/demo_css.rb +4 -0
  5. data/demo/demo_div.rb +19 -0
  6. data/demo/demo_dump.rb +15 -0
  7. data/demo/demo_encoder.rb +39 -0
  8. data/demo/demo_global_vars.rb +13 -0
  9. data/demo/demo_global_vars2.rb +28 -0
  10. data/demo/demo_html.rb +394 -0
  11. data/demo/demo_html2.rb +11 -0
  12. data/demo/demo_load_encoder.rb +17 -0
  13. data/demo/demo_more.rb +204 -0
  14. data/demo/demo_scanner.rb +36 -0
  15. data/demo/demo_server.rb +92 -0
  16. data/demo/demo_simple.rb +10 -0
  17. data/demo/demo_stream.rb +25 -0
  18. data/demo/demo_stream2.rb +8 -0
  19. data/demo/demo_tokens.rb +3 -0
  20. data/lib/coderay.rb +284 -0
  21. data/lib/coderay/encoder.rb +151 -0
  22. data/lib/coderay/encoders/count.rb +21 -0
  23. data/lib/coderay/encoders/div.rb +16 -0
  24. data/lib/coderay/encoders/helpers/html_css.rb +155 -0
  25. data/lib/coderay/encoders/helpers/html_helper.rb +68 -0
  26. data/lib/coderay/encoders/helpers/html_output.rb +237 -0
  27. data/lib/coderay/encoders/html.rb +169 -0
  28. data/lib/coderay/encoders/null.rb +20 -0
  29. data/lib/coderay/encoders/span.rb +16 -0
  30. data/lib/coderay/encoders/statistic.rb +74 -0
  31. data/lib/coderay/encoders/text.rb +33 -0
  32. data/lib/coderay/encoders/tokens.rb +44 -0
  33. data/lib/coderay/encoders/yaml.rb +19 -0
  34. data/lib/coderay/helpers/filetype.rb +145 -0
  35. data/lib/coderay/helpers/gzip_simple.rb +123 -0
  36. data/lib/coderay/helpers/plugin.rb +286 -0
  37. data/lib/coderay/helpers/scanner_helper.rb +63 -0
  38. data/lib/coderay/scanner.rb +197 -0
  39. data/lib/coderay/scanners/c.rb +147 -0
  40. data/lib/coderay/scanners/delphi.rb +123 -0
  41. data/lib/coderay/scanners/helpers/ruby_helper.rb +212 -0
  42. data/lib/coderay/scanners/plaintext.rb +13 -0
  43. data/lib/coderay/scanners/ruby.rb +337 -0
  44. data/lib/coderay/tokens.rb +324 -0
  45. metadata +89 -0
@@ -0,0 +1,212 @@
1
+ module CodeRay module Scanners
2
+
3
+ class Ruby
4
+
5
+ RESERVED_WORDS = %w[
6
+ and def end in or unless begin
7
+ defined? ensure module redo super until
8
+ BEGIN break do next rescue then
9
+ when END case else for retry
10
+ while alias class elsif if not return
11
+ undef yield
12
+ ]
13
+
14
+ DEF_KEYWORDS = %w[ def ]
15
+ MODULE_KEYWORDS = %w[class module]
16
+ DEF_NEW_STATE = WordList.new(:initial).
17
+ add(DEF_KEYWORDS, :def_expected).
18
+ add(MODULE_KEYWORDS, :module_expected)
19
+
20
+ IDENTS_ALLOWING_REGEXP = %w[
21
+ and or not while until unless if then elsif when sub sub! gsub gsub! scan slice slice! split
22
+ ]
23
+ REGEXP_ALLOWED = WordList.new(false).
24
+ add(IDENTS_ALLOWING_REGEXP, :set)
25
+
26
+ PREDEFINED_CONSTANTS = %w[
27
+ nil true false self
28
+ DATA ARGV ARGF __FILE__ __LINE__
29
+ ]
30
+
31
+ IDENT_KIND = WordList.new(:ident).
32
+ add(RESERVED_WORDS, :reserved).
33
+ add(PREDEFINED_CONSTANTS, :pre_constant)
34
+
35
+ # IDENT = /[a-zA-Z_][a-zA-Z_0-9]*/
36
+ IDENT = /[a-z_][\w_]*/i
37
+
38
+ METHOD_NAME = / #{IDENT} [?!]? /ox
39
+ METHOD_NAME_EX = /
40
+ #{IDENT}[?!=]? # common methods: split, foo=, empty?, gsub!
41
+ | \*\*? # multiplication and power
42
+ | [-+]@? # plus, minus
43
+ | [\/%&|^`~] # division, modulo or format strings, &and, |or, ^xor, `system`, tilde
44
+ | \[\]=? # array getter and setter
45
+ | << | >> # append or shift left, shift right
46
+ | <=?>? | >=? # comparison, rocket operator
47
+ | ===? # simple equality and case equality
48
+ /ox
49
+ INSTANCE_VARIABLE = / @ #{IDENT} /ox
50
+ CLASS_VARIABLE = / @@ #{IDENT} /ox
51
+ OBJECT_VARIABLE = / @@? #{IDENT} /ox
52
+ GLOBAL_VARIABLE = / \$ (?: #{IDENT} | [1-9] | 0[a-zA-Z_0-9]* | [~&+`'=\/,;_.<>!@$?*":\\] | -[a-zA-Z_0-9] ) /ox
53
+ PREFIX_VARIABLE = / #{GLOBAL_VARIABLE} |#{OBJECT_VARIABLE} /ox
54
+ VARIABLE = / @?@? #{IDENT} | #{GLOBAL_VARIABLE} /ox
55
+
56
+ QUOTE_TO_TYPE = {
57
+ '`' => :shell,
58
+ '/'=> :regexp,
59
+ }
60
+ QUOTE_TO_TYPE.default = :string
61
+
62
+ REGEXP_MODIFIERS = /[mixounse]*/
63
+ REGEXP_SYMBOLS = /
64
+ [|?*+?(){}\[\].^$]
65
+ /x
66
+
67
+ DECIMAL = /\d+(?:_\d+)*/ # doesn't recognize 09 as octal error
68
+ OCTAL = /0_?[0-7]+(?:_[0-7]+)*/
69
+ HEXADECIMAL = /0x[0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*/
70
+ BINARY = /0b[01]+(?:_[01]+)*/
71
+
72
+ EXPONENT = / [eE] [+-]? #{DECIMAL} /ox
73
+ FLOAT_OR_INT = / #{DECIMAL} (?: #{EXPONENT} | \. #{DECIMAL} #{EXPONENT}? )? /ox
74
+ FLOAT = / #{DECIMAL} (?: #{EXPONENT} | \. #{DECIMAL} #{EXPONENT}? ) /ox
75
+ NUMERIC = / #{OCTAL} | #{HEXADECIMAL} | #{BINARY} | #{FLOAT_OR_INT} /ox
76
+
77
+ SYMBOL = /
78
+ :
79
+ (?:
80
+ #{METHOD_NAME_EX}
81
+ | #{PREFIX_VARIABLE}
82
+ | ['"]
83
+ )
84
+ /ox
85
+
86
+ # TODO investigste \M, \c and \C escape sequences
87
+ # (?: M-\\C-|C-\\M-|M-\\c|c\\M-|c|C-|M-)? (?: \\ (?: [0-7]{3} | x[0-9A-Fa-f]{2} | . ) )
88
+ # assert_equal(225, ?\M-a)
89
+ # assert_equal(129, ?\M-\C-a)
90
+ ESCAPE = /
91
+ [abefnrstv]
92
+ | M-\\C-|C-\\M-|M-\\c|c\\M-|c|C-|M-
93
+ | [0-7]{1,3}
94
+ | x[0-9A-Fa-f]{1,2}
95
+ | .
96
+ /mx
97
+
98
+ CHARACTER = /
99
+ \?
100
+ (?:
101
+ [^\s\\]
102
+ | \\ #{ESCAPE}
103
+ )
104
+ /mx
105
+
106
+ # NOTE: This is not completel correct, but
107
+ # nobody needs heredoc delimiters ending with \n.
108
+ HEREDOC_OPEN = /
109
+ << (-)? # $1 = float
110
+ (?:
111
+ ( [A-Za-z_0-9]+ ) # $2 = delim
112
+ |
113
+ ( ["'`] ) # $3 = quote, type
114
+ ( [^\n]*? ) \3 # $4 = delim
115
+ )
116
+ /mx
117
+
118
+ RDOC = /
119
+ =begin (?!\S)
120
+ .*?
121
+ (?: \Z | ^=end (?!\S) [^\n]* )
122
+ /mx
123
+
124
+ DATA = /
125
+ __END__$
126
+ .*?
127
+ (?: \Z | (?=^\#CODE) )
128
+ /mx
129
+
130
+ RDOC_DATA_START = / ^=begin (?!\S) | ^__END__$ /x
131
+
132
+ FANCY_START = / % ( [qQwWxsr] | (?![\w\s=]) ) (.) /mox
133
+
134
+ FancyStringType = {
135
+ 'q' => [:string, false],
136
+ 'Q' => [:string, true],
137
+ 'r' => [:regexp, true],
138
+ 's' => [:symbol, false],
139
+ 'x' => [:shell, true],
140
+ 'w' => [:string, :word],
141
+ 'W' => [:string, :word],
142
+ }
143
+ FancyStringType['w'] = FancyStringType['q']
144
+ FancyStringType['W'] = FancyStringType[''] = FancyStringType['Q']
145
+
146
+ class StringState < Struct.new :type, :interpreted, :delim, :heredoc,
147
+ :paren, :paren_depth, :pattern
148
+
149
+ CLOSING_PAREN = Hash[ *%w[
150
+ ( )
151
+ [ ]
152
+ < >
153
+ { }
154
+ ] ]
155
+
156
+ CLOSING_PAREN.values.each { |o| o.freeze } # debug, if I try to change it with <<
157
+ OPENING_PAREN = CLOSING_PAREN.invert
158
+
159
+ STRING_PATTERN = Hash.new { |h, k|
160
+ delim, interpreted = *k
161
+ delim_pattern = Regexp.escape(delim.dup)
162
+ if starter = OPENING_PAREN[delim]
163
+ delim_pattern << Regexp.escape(starter)
164
+ end
165
+
166
+
167
+ special_escapes =
168
+ case interpreted
169
+ when :regexp_symbols
170
+ '| ' + REGEXP_SYMBOLS.source
171
+ when :words
172
+ '| \s'
173
+ end
174
+
175
+ h[k] =
176
+ if interpreted and not delim == '#'
177
+ / (?= [#{delim_pattern}\\] | \# [{$@] #{special_escapes} ) /mx
178
+ else
179
+ / (?= [#{delim_pattern}\\] #{special_escapes} ) /mx
180
+ end
181
+ }
182
+
183
+ HEREDOC_PATTERN = Hash.new { |h, k|
184
+ delim, interpreted, indented = *k
185
+ delim_pattern = Regexp.escape(delim.dup)
186
+ delim_pattern = / \n #{ '(?>[\ \t]*)' if indented } #{ Regexp.new delim_pattern } $ /x
187
+ h[k] =
188
+ if interpreted
189
+ / (?= #{delim_pattern}() | \\ | \# [{$@] ) /mx
190
+ else
191
+ / (?= #{delim_pattern}() | \\ ) /mx
192
+ end
193
+ }
194
+
195
+ def initialize kind, interpreted, delim, heredoc = false
196
+ if paren = CLOSING_PAREN[delim]
197
+ delim, paren = paren, delim
198
+ paren_depth = 1
199
+ end
200
+ if heredoc
201
+ pattern = HEREDOC_PATTERN[ [delim, interpreted, heredoc == :indented] ]
202
+ delim = nil
203
+ else
204
+ pattern = STRING_PATTERN[ [delim, interpreted] ]
205
+ end
206
+ super kind, interpreted, delim, heredoc, paren, paren_depth, pattern
207
+ end
208
+ end unless defined? StringState
209
+
210
+ end
211
+
212
+ end end
@@ -0,0 +1,13 @@
1
+ module CodeRay module Scanners
2
+
3
+ class Plaintext < Scanner
4
+
5
+ register_for :plaintext, :plain
6
+
7
+ def scan_tokens tokens, options
8
+ tokens << [scan_until(/\z/), :plain]
9
+ end
10
+
11
+ end
12
+
13
+ end end
@@ -0,0 +1,337 @@
1
+ module CodeRay module Scanners
2
+
3
+ # This scanner is really complex, since Ruby _is_ a complex language!
4
+ #
5
+ # It tries to highlight 100% of all common code,
6
+ # and 90% of strange codes.
7
+ #
8
+ # It is optimized for HTML highlighting, and is not very useful for
9
+ # parsing or pretty printing.
10
+ #
11
+ # For now, I think it's better than the scanners in VIM or Syntax, or
12
+ # any highlighter I was able to find, except Caleb's RubyLexer.
13
+ #
14
+ # I hope it's also better than the rdoc/irb lexer.
15
+ class Ruby < Scanner
16
+
17
+ include Streamable
18
+
19
+ register_for :ruby
20
+
21
+ require 'coderay/scanners/helpers/ruby_helper'
22
+
23
+ DEFAULT_OPTIONS = {
24
+ :parse_regexps => true,
25
+ }
26
+
27
+ private
28
+ def scan_tokens tokens, options
29
+ parse_regexp = false # options[:parse_regexps]
30
+ first_bake = saved_tokens = nil
31
+ last_token_dot = false
32
+ fancy_allowed = regexp_allowed = true
33
+ heredocs = nil
34
+ last_state = nil
35
+ state = :initial
36
+ depth = nil
37
+ states = []
38
+
39
+ until eos?
40
+ type = :error
41
+ match = nil
42
+ kind = nil
43
+
44
+ if state.instance_of? StringState
45
+ # {{{
46
+
47
+ match = scan_until(state.pattern) || scan_until(/\z/)
48
+ tokens << [match, :content] unless match.empty?
49
+ break if eos?
50
+
51
+ if state.heredoc and self[1]
52
+ match = getch + scan_until(/$/)
53
+ tokens << [match, :delimiter]
54
+ tokens << [:close, state.type]
55
+ state = :initial
56
+ next
57
+ end
58
+
59
+ case match = getch
60
+
61
+ when state.delim
62
+ if state.paren
63
+ state.paren_depth -= 1
64
+ if state.paren_depth > 0
65
+ tokens << [match, :nesting_delimiter]
66
+ next
67
+ end
68
+ end
69
+ tokens << [match, :delimiter]
70
+ if state.type == :regexp and not eos?
71
+ modifiers = scan(/#{REGEXP_MODIFIERS}/ox)
72
+ tokens << [modifiers, :modifier] unless modifiers.empty?
73
+ if parse_regexp
74
+ extended = modifiers.index ?x
75
+ tokens = saved_tokens
76
+ regexp = tokens
77
+ for text, type in regexp
78
+ if text.is_a? String
79
+ case type
80
+ when :content
81
+ text.scan(/([^#]+)|(#.*)/) do |plain, comment|
82
+ if plain
83
+ tokens << [plain, :content]
84
+ else
85
+ tokens << [comment, :comment]
86
+ end
87
+ end
88
+ when :character
89
+ if text[/\\(?:[swdSWDAzZbB]|\d+)/]
90
+ tokens << [text, :modifier]
91
+ else
92
+ tokens << [text, type]
93
+ end
94
+ else
95
+ tokens << [text, type]
96
+ end
97
+ else
98
+ tokens << [text, type]
99
+ end
100
+ end
101
+ first_bake = saved_tokens = nil
102
+ end
103
+ end
104
+ tokens << [:close, state.type]
105
+ fancy_allowed = regexp_allowed = false
106
+ state = :initial
107
+
108
+ when '\\'
109
+ if state.interpreted
110
+ if esc = scan(/ #{ESCAPE} /ox)
111
+ tokens << [match + esc, :char]
112
+ else
113
+ tokens << [match, :error]
114
+ end
115
+ else
116
+ case m = getch
117
+ when state.delim, '\\'
118
+ tokens << [match + m, :char]
119
+ else
120
+ tokens << [match + m, :content]
121
+ end
122
+ end
123
+
124
+ when '#'
125
+ case peek(1)[0]
126
+ when ?{
127
+ states.push [state, depth, heredocs]
128
+ fancy_allowed = regexp_allowed = true
129
+ state = :initial
130
+ depth = 1
131
+ tokens << [match + getch, :escape]
132
+ when ?$, ?@
133
+ tokens << [match, :escape]
134
+ last_state = state # scan one token as normal code, then return here
135
+ state = :initial
136
+ else
137
+ raise "else-case # reached; #%p not handled" % peek(1), tokens
138
+ end
139
+
140
+ when state.paren
141
+ state.paren_depth += 1
142
+ tokens << [match, :nesting_delimiter]
143
+
144
+ when REGEXP_SYMBOLS
145
+ tokens << [match, :function]
146
+
147
+ else
148
+ raise "else-case \" reached; %p not handled, state = %p" % [match, state], tokens
149
+
150
+ end
151
+ next
152
+ # }}}
153
+ else
154
+ # {{{
155
+ if match = scan(/ [ \t\f]+ | \\? \n | \# .* /x) or
156
+ ( bol? and match = scan(/ #{DATA} | #{RDOC} /ox) )
157
+ fancy_allowed = true
158
+ case m = match[0]
159
+ when ?\s, ?\t, ?\f
160
+ match << scan(/\s*/) unless eos? or heredocs
161
+ type = :space
162
+ when ?\n, ?\\
163
+ type = :space
164
+ regexp_allowed = m == ?\n
165
+ if heredocs
166
+ unscan # heredoc scanning needs \n at start
167
+ state = heredocs.shift
168
+ tokens << [:open, state.type]
169
+ heredocs = nil if heredocs.empty?
170
+ next
171
+ else
172
+ match << scan(/\s*/) unless eos?
173
+ end
174
+ when ?#, ?=, ?_
175
+ type = :comment
176
+ regexp_allowed = true
177
+ else
178
+ raise "else-case _ reached, because case %p was not handled" % [matched[0].chr], tokens
179
+ end
180
+ tokens << [match, type]
181
+ next
182
+
183
+ elsif state == :initial
184
+ if match = scan(/ \.\.?\.? | [-+*=>;,|&!\(\)\[\]~^]+ | [\{\}] | :: /x)
185
+ if match !~ / [.\)\]\}] \z/x or match =~ /\.\.\.?/
186
+ regexp_allowed = fancy_allowed = :set
187
+ end
188
+ last_token_dot = :set if match == '.' or match == '::'
189
+ type = :operator
190
+ unless states.empty?
191
+ case match
192
+ when '{'
193
+ depth += 1
194
+ when '}'
195
+ depth -= 1
196
+ if depth == 0
197
+ state, depth, heredocs = states.pop
198
+ type = :escape
199
+ end
200
+ end
201
+ end
202
+
203
+ elsif match = scan(/#{METHOD_NAME}/o)
204
+ if last_token_dot
205
+ type = if match[/^[A-Z]/] then :constant else :ident end
206
+ else
207
+ type = IDENT_KIND[match]
208
+ if type == :ident and match[/^[A-Z]/]
209
+ type = :constant
210
+ elsif type == :reserved
211
+ state = DEF_NEW_STATE[match]
212
+ end
213
+ end
214
+ fancy_allowed = regexp_allowed = REGEXP_ALLOWED[match]
215
+
216
+ elsif match = scan(/ ['"] /mx)
217
+ tokens << [:open, :string]
218
+ type = :delimiter
219
+ state = StringState.new :string, match != '\'', match # important for streaming
220
+
221
+ elsif match = scan(/#{INSTANCE_VARIABLE}/o)
222
+ type = :instance_variable
223
+
224
+ elsif regexp_allowed and match = scan(/ \/ /mx)
225
+ tokens << [:open, :regexp]
226
+ type = :delimiter
227
+ interpreted = true
228
+ state = StringState.new :regexp, interpreted, match
229
+ if parse_regexp
230
+ tokens = []
231
+ saved_tokens = tokens
232
+ end
233
+
234
+ elsif match = scan(/#{NUMERIC}/o)
235
+ type = if match[/#{FLOAT}/o] then :float else :integer end
236
+
237
+ elsif fancy_allowed and match = scan(/#{SYMBOL}/o)
238
+ case match[1]
239
+ when ?', ?"
240
+ tokens << [:open, :symbol]
241
+ state = StringState.new :symbol, match[1] == ?", match[1,1]
242
+ end
243
+ type = :symbol
244
+
245
+ elsif fancy_allowed and match = scan(/#{HEREDOC_OPEN}/o)
246
+ indented = self[1] == '-'
247
+ quote = self[3]
248
+ delim = self[quote ? 4 : 2]
249
+ type = QUOTE_TO_TYPE[quote]
250
+ tokens << [:open, type]
251
+ tokens << [match, :delimiter]
252
+ match = :close
253
+ heredoc = StringState.new type, quote != '\'', delim, (indented ? :indented : :linestart )
254
+ heredocs ||= [] # create heredocs if empty
255
+ heredocs << heredoc
256
+
257
+ elsif fancy_allowed and match = scan(/#{FANCY_START}/o)
258
+ type, interpreted = *FancyStringType.fetch(self[1]) do
259
+ raise 'Unknown fancy string: %%%p' % k, tokens
260
+ end
261
+ tokens << [:open, type]
262
+ state = StringState.new type, interpreted, self[2]
263
+ type = :delimiter
264
+
265
+ elsif fancy_allowed and match = scan(/#{CHARACTER}/o)
266
+ type = :integer
267
+
268
+ elsif match = scan(/ [\/%<?:] /x)
269
+ regexp_allowed = fancy_allowed = :set
270
+ type = :operator
271
+
272
+ elsif match = scan(/`/)
273
+ if last_token_dot
274
+ type = :operator
275
+ else
276
+ tokens << [:open, :shell]
277
+ type = :delimiter
278
+ state = StringState.new :shell, true, match
279
+ end
280
+
281
+ elsif match = scan(/#{GLOBAL_VARIABLE}/o)
282
+ type = :global_variable
283
+
284
+ elsif match = scan(/#{CLASS_VARIABLE}/o)
285
+ type = :class_variable
286
+
287
+ else
288
+ match = getch
289
+
290
+ end
291
+
292
+ elsif state == :def_expected
293
+ if match = scan(/ (?: #{VARIABLE} (?: ::#{IDENT} )* \. )? #{METHOD_NAME_EX} /ox)
294
+ type = :method
295
+ else
296
+ match = getch
297
+ end
298
+ state = :initial
299
+
300
+ elsif state == :module_expected
301
+ if match = scan(/<</)
302
+ type = :operator
303
+ else
304
+ if match = scan(/ (?:#{IDENT}::)* #{IDENT} /ox)
305
+ type = :class
306
+ else
307
+ match = getch
308
+ end
309
+ end
310
+ state = :initial
311
+
312
+ end
313
+
314
+ regexp_allowed = regexp_allowed == :set
315
+ fancy_allowed = fancy_allowed == :set
316
+ last_token_dot = last_token_dot == :set
317
+
318
+ if $DEBUG
319
+ raise_inspect 'error token %p in line %d' % [tokens.last, line], tokens if not type or type == :error
320
+ end
321
+
322
+ tokens << [match, type]
323
+
324
+ if last_state
325
+ state = last_state
326
+ last_state = nil
327
+ end
328
+ # }}}
329
+ end
330
+ end
331
+
332
+ tokens
333
+ end
334
+ end
335
+
336
+ end end
337
+ # vim:fdm=marker