coderay 1.0.0.598.pre → 1.0.0.738.pre

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. data/bin/coderay +1 -1
  2. data/lib/coderay.rb +38 -32
  3. data/lib/coderay/duo.rb +1 -54
  4. data/lib/coderay/encoder.rb +31 -33
  5. data/lib/coderay/encoders/_map.rb +4 -2
  6. data/lib/coderay/encoders/comment_filter.rb +0 -61
  7. data/lib/coderay/encoders/count.rb +2 -23
  8. data/lib/coderay/encoders/debug.rb +11 -60
  9. data/lib/coderay/encoders/filter.rb +0 -46
  10. data/lib/coderay/encoders/html.rb +83 -91
  11. data/lib/coderay/encoders/html/css.rb +1 -6
  12. data/lib/coderay/encoders/html/numbering.rb +18 -21
  13. data/lib/coderay/encoders/html/output.rb +10 -52
  14. data/lib/coderay/encoders/json.rb +19 -39
  15. data/lib/coderay/encoders/lines_of_code.rb +7 -52
  16. data/lib/coderay/encoders/null.rb +6 -13
  17. data/lib/coderay/encoders/statistic.rb +30 -93
  18. data/lib/coderay/encoders/terminal.rb +3 -4
  19. data/lib/coderay/encoders/text.rb +1 -23
  20. data/lib/coderay/encoders/token_kind_filter.rb +0 -58
  21. data/lib/coderay/helpers/file_type.rb +119 -240
  22. data/lib/coderay/helpers/gzip.rb +41 -0
  23. data/lib/coderay/helpers/plugin.rb +237 -307
  24. data/lib/coderay/scanner.rb +112 -88
  25. data/lib/coderay/scanners/_map.rb +3 -3
  26. data/lib/coderay/scanners/c.rb +7 -7
  27. data/lib/coderay/scanners/clojure.rb +204 -0
  28. data/lib/coderay/scanners/css.rb +10 -20
  29. data/lib/coderay/scanners/debug.rb +9 -55
  30. data/lib/coderay/scanners/diff.rb +21 -4
  31. data/lib/coderay/scanners/html.rb +65 -18
  32. data/lib/coderay/scanners/java.rb +3 -2
  33. data/lib/coderay/scanners/java_script.rb +3 -3
  34. data/lib/coderay/scanners/json.rb +7 -6
  35. data/lib/coderay/scanners/php.rb +2 -1
  36. data/lib/coderay/scanners/rhtml.rb +6 -2
  37. data/lib/coderay/scanners/ruby.rb +193 -193
  38. data/lib/coderay/scanners/ruby/patterns.rb +15 -82
  39. data/lib/coderay/scanners/ruby/string_state.rb +71 -0
  40. data/lib/coderay/scanners/sql.rb +1 -1
  41. data/lib/coderay/scanners/yaml.rb +4 -2
  42. data/lib/coderay/styles/_map.rb +2 -2
  43. data/lib/coderay/styles/alpha.rb +48 -38
  44. data/lib/coderay/styles/cycnus.rb +2 -1
  45. data/lib/coderay/token_kinds.rb +88 -86
  46. data/lib/coderay/tokens.rb +88 -112
  47. data/test/functional/basic.rb +184 -5
  48. data/test/functional/examples.rb +4 -4
  49. data/test/functional/for_redcloth.rb +3 -2
  50. data/test/functional/suite.rb +7 -6
  51. metadata +11 -24
  52. data/lib/coderay/helpers/gzip_simple.rb +0 -123
  53. data/test/functional/load_plugin_scanner.rb +0 -11
  54. data/test/functional/vhdl.rb +0 -126
  55. data/test/functional/word_list.rb +0 -79
@@ -14,12 +14,11 @@ module Scanners
14
14
  ] # :nodoc:
15
15
 
16
16
  module RE # :nodoc:
17
- NonASCII = /[\x80-\xFF]/
18
17
  Hex = /[0-9a-fA-F]/
19
18
  Unicode = /\\#{Hex}{1,6}(?:\r\n|\s)?/ # differs from standard because it allows uppercase hex too
20
19
  Escape = /#{Unicode}|\\[^\r\n\f0-9a-fA-F]/
21
- NMChar = /[-_a-zA-Z0-9]|#{NonASCII}|#{Escape}/
22
- NMStart = /[_a-zA-Z]|#{NonASCII}|#{Escape}/
20
+ NMChar = /[-_a-zA-Z0-9]|#{Escape}/
21
+ NMStart = /[_a-zA-Z]|#{Escape}/
23
22
  NL = /\r\n|\r|\n|\f/
24
23
  String1 = /"(?:[^\n\r\f\\"]|\\#{NL}|#{Escape})*"?/ # FIXME: buggy regexp
25
24
  String2 = /'(?:[^\n\r\f\\']|\\#{NL}|#{Escape})*'?/ # FIXME: buggy regexp
@@ -55,12 +54,12 @@ module Scanners
55
54
 
56
55
  value_expected = nil
57
56
  states = [:initial]
58
-
57
+
59
58
  until eos?
60
-
59
+
61
60
  if match = scan(/\s+/)
62
61
  encoder.text_token match, :space
63
-
62
+
64
63
  elsif case states.last
65
64
  when :initial, :media
66
65
  if match = scan(/(?>#{RE::Ident})(?!\()|\*/ox)
@@ -102,24 +101,15 @@ module Scanners
102
101
  states[-1] = :media
103
102
  end
104
103
 
105
- when :comment
106
- if match = scan(/(?:[^*\s]|\*(?!\/))+/)
107
- encoder.text_token match, :comment
108
- elsif match = scan(/\*\//)
109
- encoder.text_token match, :comment
110
- states.pop
111
- elsif match = scan(/\s+/)
112
- encoder.text_token match, :space
113
- end
114
-
115
104
  else
105
+ #:nocov:
116
106
  raise_inspect 'Unknown state', encoder
117
-
107
+ #:nocov:
108
+
118
109
  end
119
-
120
- elsif match = scan(/\/\*/)
110
+
111
+ elsif match = scan(/\/\*(?:.*?\*\/|\z)/m)
121
112
  encoder.text_token match, :comment
122
- states.push :comment
123
113
 
124
114
  elsif match = scan(/\{/)
125
115
  value_expected = false
@@ -1,30 +1,29 @@
1
- ($:.unshift '../..'; require 'coderay') unless defined? CodeRay
2
1
  module CodeRay
3
2
  module Scanners
4
-
3
+
5
4
  # = Debug Scanner
6
5
  #
7
6
  # Interprets the output of the Encoders::Debug encoder.
8
7
  class Debug < Scanner
9
-
8
+
10
9
  register_for :debug
11
10
  title 'CodeRay Token Dump Import'
12
11
 
13
12
  protected
14
13
 
15
14
  def scan_tokens encoder, options
16
-
15
+
17
16
  opened_tokens = []
18
-
17
+
19
18
  until eos?
20
-
19
+
21
20
  if match = scan(/\s+/)
22
21
  encoder.text_token match, :space
23
22
 
24
23
  elsif match = scan(/ (\w+) \( ( [^\)\\]* ( \\. [^\)\\]* )* ) \)? /x)
25
24
  kind = self[1].to_sym
26
- match = self[2].gsub(/\\(.)/, '\1')
27
- unless Tokens::AbbreviationForKind.has_key? kind
25
+ match = self[2].gsub(/\\(.)/m, '\1')
26
+ unless TokenKinds.has_key? kind
28
27
  kind = :error
29
28
  match = matched
30
29
  end
@@ -59,53 +58,8 @@ module Scanners
59
58
 
60
59
  encoder
61
60
  end
62
-
63
- end
64
-
65
- end
66
- end
67
-
68
- if $0 == __FILE__
69
- $VERBOSE = true
70
- $: << File.join(File.dirname(__FILE__), '..')
71
- eval DATA.read, nil, $0, __LINE__ + 4
72
- end
73
-
74
- __END__
75
- require 'test/unit'
76
-
77
- class DebugScannerTest < Test::Unit::TestCase
78
-
79
- def test_creation
80
- assert CodeRay::Scanners::Debug < CodeRay::Scanners::Scanner
81
- debug = nil
82
- assert_nothing_raised do
83
- debug = CodeRay.scanner :debug
84
- end
85
- assert_kind_of CodeRay::Scanners::Scanner, debug
86
- end
87
-
88
- TEST_INPUT = <<-'DEBUG'.chomp
89
- integer(10)operator((\\\))string<content(test)>test[
90
-
91
-
92
- method([])]
93
- DEBUG
94
- TEST_OUTPUT = CodeRay::Tokens[
95
- ['10', :integer],
96
- ['(\\)', :operator],
97
- [:begin_group, :string],
98
- ['test', :content],
99
- [:end_group, :string],
100
- [:begin_line, :test],
101
- ["\n\n \t \n", :space],
102
- ["[]", :method],
103
- [:end_line, :test],
104
- ].flatten
105
-
106
- def test_filtering_text_tokens
107
- assert_equal TEST_OUTPUT, CodeRay::Scanners::Debug.new.tokenize(TEST_INPUT)
108
- assert_equal TEST_OUTPUT, CodeRay.scan(TEST_INPUT, :debug)
61
+
109
62
  end
110
63
 
111
64
  end
65
+ end
@@ -81,7 +81,7 @@ module Scanners
81
81
  encoder.begin_group :change
82
82
  end
83
83
  encoder.text_token match[0,2], :change
84
- encoder.text_token match[2...-2], :plain if match.size > 4
84
+ encoder.text_token match[2...-2], :plain
85
85
  encoder.text_token match[-2,2], :change
86
86
  encoder.end_group :change unless line_kind
87
87
  next unless match = scan(/.+/)
@@ -109,8 +109,9 @@ module Scanners
109
109
  if content_scanner.instance_variable_defined?(:@state)
110
110
  content_scanner_entry_state = content_scanner.instance_variable_get(:@state)
111
111
  end
112
- skip(/(.*)(.*?)(.*)\n\+\1(.*)\3$/)
113
- pre, deleted, post = content_scanner.tokenize [self[1], self[2], self[3]], :tokens => Tokens.new
112
+ skip(/(.*)\n\+(.*)$/)
113
+ head, deletion, insertion, tail = diff self[1], self[2]
114
+ pre, deleted, post = content_scanner.tokenize [head, deletion, tail], :tokens => Tokens.new
114
115
  encoder.tokens pre
115
116
  encoder.begin_group :eyecatcher
116
117
  encoder.tokens deleted
@@ -121,7 +122,7 @@ module Scanners
121
122
  encoder.begin_line line_kind = :insert
122
123
  encoder.text_token '+', :insert
123
124
  content_scanner.instance_variable_set(:@state, content_scanner_entry_state || :initial)
124
- pre, inserted, post = content_scanner.tokenize [self[1], self[4], self[3]], :tokens => Tokens.new
125
+ pre, inserted, post = content_scanner.tokenize [head, insertion, tail], :tokens => Tokens.new
125
126
  encoder.tokens pre
126
127
  encoder.begin_group :eyecatcher
127
128
  encoder.tokens inserted
@@ -177,6 +178,22 @@ module Scanners
177
178
  encoder
178
179
  end
179
180
 
181
+ private
182
+
183
+ def diff a, b
184
+ # i will be the index of the leftmost difference from the left.
185
+ i_max = [a.size, b.size].min
186
+ i = 0
187
+ i += 1 while i < i_max && a[i] == b[i]
188
+ # j_min will be the index of the leftmost difference from the right.
189
+ j_min = i - i_max
190
+ # j will be the index of the rightmost difference from the right which
191
+ # does not precede the leftmost one from the left.
192
+ j = -1
193
+ j -= 1 while j >= j_min && a[j] == b[j]
194
+ return a[0...i], a[i..j], b[i..j], (j < -1) ? a[j+1..-1] : ''
195
+ end
196
+
180
197
  end
181
198
 
182
199
  end
@@ -53,10 +53,20 @@ module Scanners
53
53
  @plain_string_content = nil
54
54
  end
55
55
 
56
+ def scan_java_script encoder, code
57
+ if code && !code.empty?
58
+ @java_script_scanner ||= Scanners::JavaScript.new '', :keep_tokens => true
59
+ # encoder.begin_group :inline
60
+ @java_script_scanner.tokenize code, :tokens => encoder
61
+ # encoder.end_group :inline
62
+ end
63
+ end
64
+
56
65
  def scan_tokens encoder, options
57
66
 
58
67
  state = @state
59
68
  plain_string_content = @plain_string_content
69
+ in_tag = in_attribute = nil
60
70
 
61
71
  until eos?
62
72
 
@@ -68,19 +78,39 @@ module Scanners
68
78
  case state
69
79
 
70
80
  when :initial
71
- if match = scan(/<!--.*?-->/m)
81
+ case in_tag
82
+ when 'script'
83
+ if scan(/(\s*<!--)(?:(.*?)(-->)|(.*))/m)
84
+ code = self[2] || self[4]
85
+ closing = self[3]
86
+ encoder.text_token self[1], :comment
87
+ else
88
+ code = scan_until(/(?=(?:\n\s*)?<\/script>)|\z/)
89
+ closing = false
90
+ end
91
+ unless code.empty?
92
+ encoder.begin_group :inline
93
+ scan_java_script encoder, code
94
+ encoder.end_group :inline
95
+ end
96
+ encoder.text_token closing, :comment if closing
97
+ end
98
+ next if eos?
99
+ if match = scan(/<!--(?:.*?-->|.*)/m)
72
100
  encoder.text_token match, :comment
73
- elsif match = scan(/<!DOCTYPE.*?>/m)
101
+ elsif match = scan(/<!DOCTYPE(?:.*?>|.*)/m)
74
102
  encoder.text_token match, :doctype
75
- elsif match = scan(/<\?xml.*?\?>/m)
103
+ elsif match = scan(/<\?xml(?:.*?\?>|.*)/m)
76
104
  encoder.text_token match, :preprocessor
77
- elsif match = scan(/<\?.*?\?>|<%.*?%>/m)
105
+ elsif match = scan(/<\?(?:.*?\?>|.*)|<%(?:.*?%>|.*)/m)
78
106
  encoder.text_token match, :comment
79
- elsif match = scan(/<\/[-\w.:]*>/m)
107
+ elsif match = scan(/<\/[-\w.:]*>?/m)
80
108
  encoder.text_token match, :tag
81
- elsif match = scan(/<[-\w.:]+>?/m)
109
+ in_tag = nil
110
+ elsif match = scan(/<(?:(script)|[-\w.:]+)(>)?/m)
82
111
  encoder.text_token match, :tag
83
- state = :attribute unless match[-1] == ?>
112
+ in_tag = self[1]
113
+ state = :attribute unless self[2]
84
114
  elsif match = scan(/[^<>&]+/)
85
115
  encoder.text_token match, :plain
86
116
  elsif match = scan(/#{ENTITY}/ox)
@@ -92,10 +122,14 @@ module Scanners
92
122
  end
93
123
 
94
124
  when :attribute
95
- if match = scan(/#{TAG_END}/)
125
+ if match = scan(/#{TAG_END}/o)
96
126
  encoder.text_token match, :tag
127
+ in_attribute = nil
97
128
  state = :initial
98
129
  elsif match = scan(/#{ATTR_NAME}/o)
130
+ if match.downcase == 'onclick'
131
+ in_attribute = 'script'
132
+ end
99
133
  encoder.text_token match, :attribute_name
100
134
  state = :attribute_equal
101
135
  else
@@ -106,11 +140,9 @@ module Scanners
106
140
  if match = scan(/=/)
107
141
  encoder.text_token match, :operator
108
142
  state = :attribute_value
109
- elsif match = scan(/#{ATTR_NAME}/o)
110
- encoder.text_token match, :attribute_name
111
- elsif match = scan(/#{TAG_END}/o)
112
- encoder.text_token match, :tag
113
- state = :initial
143
+ elsif scan(/#{ATTR_NAME}/o) || scan(/#{TAG_END}/o)
144
+ state = :attribute
145
+ next
114
146
  else
115
147
  encoder.text_token getch, :error
116
148
  state = :attribute
@@ -121,11 +153,26 @@ module Scanners
121
153
  encoder.text_token match, :attribute_value
122
154
  state = :attribute
123
155
  elsif match = scan(/["']/)
124
- encoder.begin_group :string
125
- state = :attribute_value_string
126
- plain_string_content = PLAIN_STRING_CONTENT[match]
127
- encoder.text_token match, :delimiter
128
- elsif scan(/#{TAG_END}/o)
156
+ if in_attribute == 'script'
157
+ encoder.begin_group :inline
158
+ encoder.text_token match, :inline_delimiter
159
+ if scan(/javascript:\s*/)
160
+ encoder.text_token matched, :comment
161
+ end
162
+ code = scan_until(match == '"' ? /(?="|\z)/ : /(?='|\z)/)
163
+ scan_java_script encoder, code
164
+ match = scan(/["']/)
165
+ encoder.text_token match, :inline_delimiter if match
166
+ encoder.end_group :inline
167
+ state = :attribute
168
+ in_attribute = nil
169
+ else
170
+ encoder.begin_group :string
171
+ state = :attribute_value_string
172
+ plain_string_content = PLAIN_STRING_CONTENT[match]
173
+ encoder.text_token match, :delimiter
174
+ end
175
+ elsif match = scan(/#{TAG_END}/o)
129
176
  encoder.text_token match, :tag
130
177
  state = :initial
131
178
  else
@@ -3,9 +3,10 @@ module Scanners
3
3
 
4
4
  # Scanner for Java.
5
5
  class Java < Scanner
6
-
6
+
7
7
  register_for :java
8
- helper :builtin_types
8
+
9
+ autoload :BuiltinTypes, 'coderay/scanners/java/builtin_types'
9
10
 
10
11
  # http://java.sun.com/docs/books/tutorial/java/nutsandbolts/_keywords.html
11
12
  KEYWORDS = %w[
@@ -48,8 +48,8 @@ module Scanners
48
48
  '/' => /[^\\\/]+/,
49
49
  } # :nodoc:
50
50
  KEY_CHECK_PATTERN = {
51
- "'" => / [^\\']* (?: \\.? [^\\']* )* '? \s* : /x,
52
- '"' => / [^\\"]* (?: \\.? [^\\"]* )* "? \s* : /x,
51
+ "'" => / (?> [^\\']* (?: \\. [^\\']* )* ) ' \s* : /mx,
52
+ '"' => / (?> [^\\"]* (?: \\. [^\\"]* )* ) " \s* : /mx,
53
53
  } # :nodoc:
54
54
 
55
55
  protected
@@ -134,7 +134,7 @@ module Scanners
134
134
  string_delimiter = match
135
135
  encoder.text_token match, :delimiter
136
136
 
137
- elsif value_expected && (match = scan(/\/(?=\S)/))
137
+ elsif value_expected && (match = scan(/\//))
138
138
  encoder.begin_group :regexp
139
139
  state = :regexp
140
140
  string_delimiter = '/'
@@ -13,10 +13,11 @@ module Scanners
13
13
  ] # :nodoc:
14
14
 
15
15
  ESCAPE = / [bfnrt\\"\/] /x # :nodoc:
16
- UNICODE_ESCAPE = / u[a-fA-F0-9]{4} /x # :nodoc:
16
+ UNICODE_ESCAPE = / u[a-fA-F0-9]{4} /x # :nodoc:
17
17
 
18
18
  protected
19
19
 
20
+ # See http://json.org/ for a definition of the JSON lexic/grammar.
20
21
  def scan_tokens encoder, options
21
22
 
22
23
  state = :initial
@@ -44,14 +45,14 @@ module Scanners
44
45
  when '}', ']' then stack.pop # no error recovery, but works for valid JSON
45
46
  end
46
47
  elsif match = scan(/ true | false | null /x)
47
- encoder.text_token match, :value
48
+ encoder.text_token match, :value
48
49
  elsif match = scan(/ -? (?: 0 | [1-9]\d* ) /x)
49
- kind = :integer
50
50
  if scan(/ \.\d+ (?:[eE][-+]?\d+)? | [eE][-+]? \d+ /x)
51
51
  match << matched
52
- kind = :float
52
+ encoder.text_token match, :float
53
+ else
54
+ encoder.text_token match, :integer
53
55
  end
54
- encoder.text_token match, kind
55
56
  else
56
57
  encoder.text_token getch, :error
57
58
  end
@@ -76,7 +77,7 @@ module Scanners
76
77
  end
77
78
 
78
79
  else
79
- raise_inspect 'Unknown state', encoder
80
+ raise_inspect 'Unknown state: %p' % [state], encoder
80
81
 
81
82
  end
82
83
  end
@@ -10,6 +10,7 @@ module Scanners
10
10
 
11
11
  register_for :php
12
12
  file_extension 'php'
13
+ encoding 'BINARY'
13
14
 
14
15
  KINDS_NOT_LOC = HTML::KINDS_NOT_LOC
15
16
 
@@ -461,7 +462,7 @@ module Scanners
461
462
  states.push :php
462
463
  encoder.text_token match, :delimiter
463
464
  else
464
- encoder.text_token match, :string
465
+ encoder.text_token match, :content
465
466
  end
466
467
  elsif match = scan(/\$\{#{RE::IDENTIFIER}\}/o)
467
468
  encoder.text_token match, :local_variable
@@ -48,12 +48,16 @@ module Scanners
48
48
  @html_scanner.tokenize match, :tokens => encoder
49
49
 
50
50
  elsif match = scan(/#{ERB_RUBY_BLOCK}/o)
51
- start_tag = match[/\A<%[-=]?/]
51
+ start_tag = match[/\A<%[-=#]?/]
52
52
  end_tag = match[/-?%?>?\z/]
53
53
  encoder.begin_group :inline
54
54
  encoder.text_token start_tag, :inline_delimiter
55
55
  code = match[start_tag.size .. -1 - end_tag.size]
56
- @ruby_scanner.tokenize code
56
+ if start_tag[/\A<%#/]
57
+ encoder.text_token code, :comment
58
+ else
59
+ @ruby_scanner.tokenize code
60
+ end unless code.empty?
57
61
  encoder.text_token end_tag, :inline_delimiter unless end_tag.empty?
58
62
  encoder.end_group :inline
59
63