antisamy 0.2.1 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (51) hide show
  1. data/CHANGELOG.rdoc +13 -0
  2. data/LICENSE.txt +20 -20
  3. data/README.rdoc +41 -41
  4. data/lib/antisamy.rb +46 -46
  5. data/lib/antisamy/css/css_filter.rb +187 -187
  6. data/lib/antisamy/css/css_scanner.rb +84 -84
  7. data/lib/antisamy/css/css_validator.rb +128 -128
  8. data/lib/antisamy/csspool/rsac.rb +1 -1
  9. data/lib/antisamy/csspool/rsac/sac.rb +14 -14
  10. data/lib/antisamy/csspool/rsac/sac/conditions.rb +5 -5
  11. data/lib/antisamy/csspool/rsac/sac/conditions/attribute_condition.rb +50 -50
  12. data/lib/antisamy/csspool/rsac/sac/conditions/begin_hyphen_condition.rb +18 -18
  13. data/lib/antisamy/csspool/rsac/sac/conditions/class_condition.rb +18 -18
  14. data/lib/antisamy/csspool/rsac/sac/conditions/combinator_condition.rb +36 -36
  15. data/lib/antisamy/csspool/rsac/sac/conditions/condition.rb +29 -29
  16. data/lib/antisamy/csspool/rsac/sac/conditions/id_condition.rb +23 -23
  17. data/lib/antisamy/csspool/rsac/sac/conditions/one_of_condition.rb +18 -18
  18. data/lib/antisamy/csspool/rsac/sac/conditions/pseudo_class_condition.rb +20 -20
  19. data/lib/antisamy/csspool/rsac/sac/document_handler.rb +66 -66
  20. data/lib/antisamy/csspool/rsac/sac/error_handler.rb +13 -13
  21. data/lib/antisamy/csspool/rsac/sac/generated_parser.rb +1012 -1012
  22. data/lib/antisamy/csspool/rsac/sac/generated_property_parser.rb +9284 -9284
  23. data/lib/antisamy/csspool/rsac/sac/lexeme.rb +27 -27
  24. data/lib/antisamy/csspool/rsac/sac/lexical_unit.rb +201 -201
  25. data/lib/antisamy/csspool/rsac/sac/parse_exception.rb +4 -4
  26. data/lib/antisamy/csspool/rsac/sac/parser.rb +109 -109
  27. data/lib/antisamy/csspool/rsac/sac/property_parser.rb +44 -44
  28. data/lib/antisamy/csspool/rsac/sac/selectors.rb +5 -5
  29. data/lib/antisamy/csspool/rsac/sac/selectors/child_selector.rb +36 -36
  30. data/lib/antisamy/csspool/rsac/sac/selectors/conditional_selector.rb +45 -45
  31. data/lib/antisamy/csspool/rsac/sac/selectors/descendant_selector.rb +36 -36
  32. data/lib/antisamy/csspool/rsac/sac/selectors/element_selector.rb +35 -35
  33. data/lib/antisamy/csspool/rsac/sac/selectors/selector.rb +25 -25
  34. data/lib/antisamy/csspool/rsac/sac/selectors/sibling_selector.rb +35 -35
  35. data/lib/antisamy/csspool/rsac/sac/selectors/simple_selector.rb +21 -21
  36. data/lib/antisamy/csspool/rsac/sac/token.rb +25 -25
  37. data/lib/antisamy/csspool/rsac/sac/tokenizer.rb +185 -185
  38. data/lib/antisamy/csspool/rsac/stylesheet.rb +3 -3
  39. data/lib/antisamy/csspool/rsac/stylesheet/rule.rb +20 -20
  40. data/lib/antisamy/csspool/rsac/stylesheet/stylesheet.rb +76 -76
  41. data/lib/antisamy/html/handler.rb +112 -99
  42. data/lib/antisamy/html/sax_filter.rb +305 -302
  43. data/lib/antisamy/html/scanner.rb +47 -43
  44. data/lib/antisamy/model/attribute.rb +19 -19
  45. data/lib/antisamy/model/css_property.rb +39 -39
  46. data/lib/antisamy/model/tag.rb +31 -31
  47. data/lib/antisamy/policy.rb +577 -545
  48. data/lib/antisamy/scan_results.rb +89 -89
  49. data/spec/antisamy_spec.rb +208 -142
  50. data/spec/spec_helper.rb +12 -12
  51. metadata +79 -81
@@ -1,36 +1,36 @@
1
- module RSAC
2
- module Selectors
3
- class DescendantSelector < SimpleSelector
4
- attr_accessor :ancestor_selector, :simple_selector
5
- alias :ancestor :ancestor_selector
6
- alias :selector :simple_selector
7
-
8
- def initialize(ancestor, selector)
9
- super(:SAC_DESCENDANT_SELECTOR)
10
-
11
- @ancestor_selector = ancestor
12
- @simple_selector = selector
13
- end
14
-
15
- def to_css
16
- "#{ancestor.to_css} #{selector.to_css}"
17
- end
18
-
19
- def to_xpath(prefix=true)
20
- "#{ancestor.to_xpath(prefix)}//#{selector.to_xpath(false)}"
21
- end
22
-
23
- def specificity
24
- ancestor.specificity.zip(selector.specificity).map { |x,y| x + y }
25
- end
26
-
27
- def ==(other)
28
- super && selector == other.selector && ancestor == other.ancestor
29
- end
30
-
31
- def hash
32
- [selector, ancestor].hash
33
- end
34
- end
35
- end
36
- end
1
+ module RSAC
2
+ module Selectors
3
+ class DescendantSelector < SimpleSelector
4
+ attr_accessor :ancestor_selector, :simple_selector
5
+ alias :ancestor :ancestor_selector
6
+ alias :selector :simple_selector
7
+
8
+ def initialize(ancestor, selector)
9
+ super(:SAC_DESCENDANT_SELECTOR)
10
+
11
+ @ancestor_selector = ancestor
12
+ @simple_selector = selector
13
+ end
14
+
15
+ def to_css
16
+ "#{ancestor.to_css} #{selector.to_css}"
17
+ end
18
+
19
+ def to_xpath(prefix=true)
20
+ "#{ancestor.to_xpath(prefix)}//#{selector.to_xpath(false)}"
21
+ end
22
+
23
+ def specificity
24
+ ancestor.specificity.zip(selector.specificity).map { |x,y| x + y }
25
+ end
26
+
27
+ def ==(other)
28
+ super && selector == other.selector && ancestor == other.ancestor
29
+ end
30
+
31
+ def hash
32
+ [selector, ancestor].hash
33
+ end
34
+ end
35
+ end
36
+ end
@@ -1,35 +1,35 @@
1
- module RSAC
2
- module Selectors
3
- class ElementSelector < SimpleSelector
4
- attr_reader :local_name
5
- alias :name :local_name
6
-
7
- def initialize(name)
8
- super(:SAC_ELEMENT_NODE_SELECTOR)
9
- @local_name = name
10
- end
11
-
12
- def to_css
13
- local_name
14
- end
15
-
16
- def to_xpath(prefix=true)
17
- atoms = [local_name]
18
- atoms.unshift("//") if prefix
19
- atoms.join
20
- end
21
-
22
- def specificity
23
- [0, 0, 0, 1]
24
- end
25
-
26
- def ==(other)
27
- super && name == other.name
28
- end
29
-
30
- def hash
31
- name.hash
32
- end
33
- end
34
- end
35
- end
1
+ module RSAC
2
+ module Selectors
3
+ class ElementSelector < SimpleSelector
4
+ attr_reader :local_name
5
+ alias :name :local_name
6
+
7
+ def initialize(name)
8
+ super(:SAC_ELEMENT_NODE_SELECTOR)
9
+ @local_name = name
10
+ end
11
+
12
+ def to_css
13
+ local_name
14
+ end
15
+
16
+ def to_xpath(prefix=true)
17
+ atoms = [local_name]
18
+ atoms.unshift("//") if prefix
19
+ atoms.join
20
+ end
21
+
22
+ def specificity
23
+ [0, 0, 0, 1]
24
+ end
25
+
26
+ def ==(other)
27
+ super && name == other.name
28
+ end
29
+
30
+ def hash
31
+ name.hash
32
+ end
33
+ end
34
+ end
35
+ end
@@ -1,25 +1,25 @@
1
- module RSAC
2
- module Selectors
3
- class Selector
4
-
5
- attr_reader :selector_type
6
-
7
- def initialize(selector_type)
8
- @selector_type = selector_type
9
- end
10
-
11
- def ==(other)
12
- self.class === other && selector_type == other.selector_type
13
- end
14
-
15
- def hash
16
- selector_type.hash
17
- end
18
-
19
- def eql?(other)
20
- self == other
21
- end
22
-
23
- end
24
- end
25
- end
1
+ module RSAC
2
+ module Selectors
3
+ class Selector
4
+
5
+ attr_reader :selector_type
6
+
7
+ def initialize(selector_type)
8
+ @selector_type = selector_type
9
+ end
10
+
11
+ def ==(other)
12
+ self.class === other && selector_type == other.selector_type
13
+ end
14
+
15
+ def hash
16
+ selector_type.hash
17
+ end
18
+
19
+ def eql?(other)
20
+ self == other
21
+ end
22
+
23
+ end
24
+ end
25
+ end
@@ -1,35 +1,35 @@
1
- module RSAC
2
- module Selectors
3
- class SiblingSelector < SimpleSelector
4
- attr_accessor :selector, :sibling_selector
5
- alias :sibling :sibling_selector
6
-
7
- def initialize(selector, sibling)
8
- super(:SAC_DIRECT_ADJACENT_SELECTOR)
9
-
10
- @selector = selector
11
- @sibling_selector = sibling
12
- end
13
-
14
- def to_css
15
- "#{selector.to_css} + #{sibling.to_css}"
16
- end
17
-
18
- def to_xpath(prefix=true)
19
- "#{selector.to_xpath(prefix)}/following-sibling::#{sibling.to_xpath(false)}"
20
- end
21
-
22
- def specificity
23
- selector.specificity.zip(sibling.specificity).map { |x,y| x + y }
24
- end
25
-
26
- def ==(other)
27
- super && selector == other.selector && sibling == other.sibling
28
- end
29
-
30
- def hash
31
- [selector, sibling].hash
32
- end
33
- end
34
- end
35
- end
1
+ module RSAC
2
+ module Selectors
3
+ class SiblingSelector < SimpleSelector
4
+ attr_accessor :selector, :sibling_selector
5
+ alias :sibling :sibling_selector
6
+
7
+ def initialize(selector, sibling)
8
+ super(:SAC_DIRECT_ADJACENT_SELECTOR)
9
+
10
+ @selector = selector
11
+ @sibling_selector = sibling
12
+ end
13
+
14
+ def to_css
15
+ "#{selector.to_css} + #{sibling.to_css}"
16
+ end
17
+
18
+ def to_xpath(prefix=true)
19
+ "#{selector.to_xpath(prefix)}/following-sibling::#{sibling.to_xpath(false)}"
20
+ end
21
+
22
+ def specificity
23
+ selector.specificity.zip(sibling.specificity).map { |x,y| x + y }
24
+ end
25
+
26
+ def ==(other)
27
+ super && selector == other.selector && sibling == other.sibling
28
+ end
29
+
30
+ def hash
31
+ [selector, sibling].hash
32
+ end
33
+ end
34
+ end
35
+ end
@@ -1,21 +1,21 @@
1
- module RSAC
2
- module Selectors
3
- class SimpleSelector < Selector
4
- def initialize(selector_type=:SAC_ANY_NODE_SELECTOR)
5
- super(selector_type)
6
- end
7
-
8
- def to_css
9
- '*'
10
- end
11
-
12
- def to_xpath
13
- "//*"
14
- end
15
-
16
- def specificity
17
- [0, 0, 0, 0]
18
- end
19
- end
20
- end
21
- end
1
+ module RSAC
2
+ module Selectors
3
+ class SimpleSelector < Selector
4
+ def initialize(selector_type=:SAC_ANY_NODE_SELECTOR)
5
+ super(selector_type)
6
+ end
7
+
8
+ def to_css
9
+ '*'
10
+ end
11
+
12
+ def to_xpath
13
+ "//*"
14
+ end
15
+
16
+ def specificity
17
+ [0, 0, 0, 0]
18
+ end
19
+ end
20
+ end
21
+ end
@@ -1,25 +1,25 @@
1
- module RSAC
2
- class Token
3
- attr_reader :name, :value, :position
4
-
5
- def initialize(name, value, position)
6
- @name = name
7
- @value = value
8
- @position = position
9
- end
10
-
11
- def to_racc_token
12
- [name, value]
13
- end
14
- end
15
-
16
- class DelimiterToken < Token
17
- def initialize(value, position)
18
- super(:delim, value, position)
19
- end
20
-
21
- def to_racc_token
22
- [value, value]
23
- end
24
- end
25
- end
1
+ module RSAC
2
+ class Token
3
+ attr_reader :name, :value, :position
4
+
5
+ def initialize(name, value, position)
6
+ @name = name
7
+ @value = value
8
+ @position = position
9
+ end
10
+
11
+ def to_racc_token
12
+ [name, value]
13
+ end
14
+ end
15
+
16
+ class DelimiterToken < Token
17
+ def initialize(value, position)
18
+ super(:delim, value, position)
19
+ end
20
+
21
+ def to_racc_token
22
+ [value, value]
23
+ end
24
+ end
25
+ end
@@ -1,185 +1,185 @@
1
- require "antisamy/csspool/rsac/sac/lexeme"
2
- require "antisamy/csspool/rsac/sac/token"
3
-
4
- module RSAC
5
- class Tokenizer
6
- def initialize(&block)
7
- @lexemes = []
8
- @macros = {}
9
-
10
- # http://www.w3.org/TR/CSS21/syndata.html
11
- macro(:h, /([0-9a-f])/ )
12
- macro(:nonascii, /([\200-\377])/ )
13
- macro(:nl, /(\n|\r\n|\r|\f)/ )
14
- macro(:unicode, /(\\#{m(:h)}{1,6}(\r\n|[ \t\r\n\f])?)/ )
15
- macro(:escape, /(#{m(:unicode)}|\\[^\r\n\f0-9a-f])/ )
16
- macro(:nmstart, /([_a-z]|#{m(:nonascii)}|#{m(:escape)})/ )
17
- macro(:nmchar, /([_a-z0-9-]|#{m(:nonascii)}|#{m(:escape)})/ )
18
- macro(:string1, /(\"([^\n\r\f\\\"]|\\#{m(:nl)}|#{m(:escape)})*\")/ )
19
- macro(:string2, /(\'([^\n\r\f\\']|\\#{m(:nl)}|#{m(:escape)})*\')/ )
20
- macro(:invalid1, /(\"([^\n\r\f\\\"]|\\#{m(:nl)}|#{m(:escape)})*)/ )
21
- macro(:invalid2, /(\'([^\n\r\f\\']|\\#{m(:nl)}|#{m(:escape)})*)/ )
22
- macro(:comment, /(\/\*[^*]*\*+([^\/*][^*]*\*+)*\/)/ )
23
- macro(:ident, /(-?#{m(:nmstart)}#{m(:nmchar)}*)/ )
24
- macro(:name, /(#{m(:nmchar)}+)/ )
25
- macro(:num, /([0-9]+|[0-9]*\.[0-9]+)/ )
26
- macro(:string, /(#{m(:string1)}|#{m(:string2)})/ )
27
- macro(:invalid, /(#{m(:invalid1)}|#{m(:invalid2)})/ )
28
- macro(:url, /(([!#\$%&*-~]|#{m(:nonascii)}|#{m(:escape)})*)/ )
29
- macro(:s, /([ \t\r\n\f]+)/ )
30
- macro(:w, /(#{m(:s)}?)/ )
31
- macro(:A, /(a|\\0{0,4}(41|61)(\r\n|[ \t\r\n\f])?)/ )
32
- macro(:C, /(c|\\0{0,4}(43|63)(\r\n|[ \t\r\n\f])?)/ )
33
- macro(:D, /(d|\\0{0,4}(44|64)(\r\n|[ \t\r\n\f])?)/ )
34
- macro(:E, /(e|\\0{0,4}(45|65)(\r\n|[ \t\r\n\f])?)/ )
35
- macro(:G, /(g|\\0{0,4}(47|67)(\r\n|[ \t\r\n\f])?|\\g)/ )
36
- macro(:H, /(h|\\0{0,4}(48|68)(\r\n|[ \t\r\n\f])?|\\h)/ )
37
- macro(:I, /(i|\\0{0,4}(49|69)(\r\n|[ \t\r\n\f])?|\\i)/ )
38
- macro(:K, /(k|\\0{0,4}(4b|6b)(\r\n|[ \t\r\n\f])?|\\k)/ )
39
- macro(:M, /(m|\\0{0,4}(4d|6d)(\r\n|[ \t\r\n\f])?|\\m)/ )
40
- macro(:N, /(n|\\0{0,4}(4e|6e)(\r\n|[ \t\r\n\f])?|\\n)/ )
41
- macro(:O, /(o|\\0{0,4}(51|71)(\r\n|[ \t\r\n\f])?|\\o)/ )
42
- macro(:P, /(p|\\0{0,4}(50|70)(\r\n|[ \t\r\n\f])?|\\p)/ )
43
- macro(:R, /(r|\\0{0,4}(52|72)(\r\n|[ \t\r\n\f])?|\\r)/ )
44
- macro(:S, /(s|\\0{0,4}(53|73)(\r\n|[ \t\r\n\f])?|\\s)/ )
45
- macro(:T, /(t|\\0{0,4}(54|74)(\r\n|[ \t\r\n\f])?|\\t)/ )
46
- macro(:X, /(x|\\0{0,4}(58|78)(\r\n|[ \t\r\n\f])?|\\x)/ )
47
- macro(:Z, /(z|\\0{0,4}(5a|7a)(\r\n|[ \t\r\n\f])?|\\z)/ )
48
-
49
- #token :COMMENT do |patterns|
50
- # patterns << /\/\*[^*]*\*+([^\/*][^*]*\*+)*\//
51
- # patterns << /#{m(:s)}+\/\*[^*]*\*+([^\/*][^*]*\*+)*\//
52
- #end
53
-
54
- token(:LBRACE, /#{m(:w)}\{/)
55
- token(:PLUS, /#{m(:w)}\+/)
56
- token(:GREATER, /#{m(:w)}>/)
57
- token(:COMMA, /#{m(:w)},/)
58
-
59
- token(:S, /#{m(:s)}/)
60
-
61
- #token :URI do |patterns|
62
- # patterns << /url\(#{m(:w)}#{m(:string)}#{m(:w)}\)/
63
- # patterns << /url\(#{m(:w)}#{m(:url)}#{m(:w)}\)/
64
- #end
65
-
66
- token(:FUNCTION, /#{m(:ident)}\(/)
67
- token(:IDENT, /#{m(:ident)}/)
68
-
69
- token(:CDO, /<!--/)
70
- token(:CDC, /-->/)
71
- token(:INCLUDES, /~=/)
72
- token(:DASHMATCH, /\|=/)
73
- #token(:STRING, /#{m(:string)}/)
74
- token(:INVALID, /#{m(:invalid)}/)
75
- token(:HASH, /##{m(:name)}/)
76
- token(:IMPORT_SYM, /@#{m(:I)}#{m(:M)}#{m(:P)}#{m(:O)}#{m(:R)}#{m(:T)}/)
77
- token(:PAGE_SYM, /@#{m(:P)}#{m(:A)}#{m(:G)}#{m(:E)}/)
78
- token(:MEDIA_SYM, /@#{m(:M)}#{m(:E)}#{m(:D)}#{m(:I)}#{m(:A)}/)
79
- token(:CHARSET_SYM, /@#{m(:C)}#{m(:H)}#{m(:A)}#{m(:R)}#{m(:S)}#{m(:E)}#{m(:T)}/)
80
- token(:IMPORTANT_SYM, /!(#{m(:w)}|#{m(:comment)})*#{m(:I)}#{m(:M)}#{m(:P)}#{m(:O)}#{m(:R)}#{m(:T)}#{m(:A)}#{m(:N)}#{m(:T)}/)
81
- token(:EMS, /#{m(:num)}#{m(:E)}#{m(:M)}/)
82
- token(:EXS, /#{m(:num)}#{m(:E)}#{m(:X)}/)
83
-
84
- token :LENGTH do |patterns|
85
- patterns << /#{m(:num)}#{m(:P)}#{m(:X)}/
86
- patterns << /#{m(:num)}#{m(:C)}#{m(:M)}/
87
- patterns << /#{m(:num)}#{m(:M)}#{m(:M)}/
88
- patterns << /#{m(:num)}#{m(:I)}#{m(:N)}/
89
- patterns << /#{m(:num)}#{m(:P)}#{m(:T)}/
90
- patterns << /#{m(:num)}#{m(:P)}#{m(:C)}/
91
- end
92
-
93
- token :ANGLE do |patterns|
94
- patterns << /#{m(:num)}#{m(:D)}#{m(:E)}#{m(:G)}/
95
- patterns << /#{m(:num)}#{m(:R)}#{m(:A)}#{m(:D)}/
96
- patterns << /#{m(:num)}#{m(:G)}#{m(:R)}#{m(:A)}#{m(:D)}/
97
- end
98
-
99
- token :TIME do |patterns|
100
- patterns << /#{m(:num)}#{m(:M)}#{m(:S)}/
101
- patterns << /#{m(:num)}#{m(:S)}/
102
- end
103
-
104
- token :FREQ do |patterns|
105
- patterns << /#{m(:num)}#{m(:H)}#{m(:Z)}/
106
- patterns << /#{m(:num)}#{m(:K)}#{m(:H)}#{m(:Z)}/
107
- end
108
-
109
- token(:DIMENSION, /#{m(:num)}#{m(:ident)}/)
110
- token(:PERCENTAGE, /#{m(:num)}%/)
111
- token(:NUMBER, /#{m(:num)}/)
112
-
113
-
114
- yield self if block_given?
115
- end
116
-
117
- def tokenize(input_data)
118
- tokens = []
119
- pos = 0
120
-
121
- comment_pattern = /\/\*.*?\*\//m
122
- comments = input_data.scan(comment_pattern)
123
- non_comments = input_data.split(comment_pattern)
124
-
125
- # Handle a small edge case, if our CSS is *only* comments,
126
- # the split, zip, scan trick won't work
127
- if non_comments.length == 0
128
- tokens = comments.map { |x| Token.new(:COMMENT, x, nil) }
129
- else
130
- non_comments.zip(comments).each do |non_comment, comment|
131
- non_comment.split(/url\([^\)]*\)/m).zip(
132
- non_comment.scan(/url\([^\)]*\)/m)
133
- ).each do |non_url, url|
134
- non_url.split(/"[^"]*"|'[^']*'/m).zip(
135
- non_url.scan(/"[^"]*"|'[^']*'/m)
136
- ).each do |non_string, quoted_string|
137
- if non_string.length > 0 && non_string =~ /\A\s*\Z/m
138
- tokens << Token.new(:S, non_string, nil)
139
- else
140
- non_string.split(/[ \t\r\n\f]*(?![{}+>]*)/m).zip(
141
- non_string.scan(/[ \t\r\n\f]*(?![{}+>]*)/m)
142
- ).each do |string, whitespace|
143
- until string.empty?
144
- token = nil
145
- @lexemes.each do |lexeme|
146
- match = lexeme.pattern.match(string)
147
- if match
148
- token = Token.new(lexeme.name, match.to_s, pos)
149
- break
150
- end
151
- end
152
-
153
- token ||= DelimiterToken.new(/^./.match(string).to_s, pos)
154
-
155
- tokens << token
156
- string = string.slice(Range.new(token.value.length, -1))
157
- pos += token.value.length
158
- end
159
- tokens << Token.new(:S, whitespace, nil) if whitespace
160
- end
161
- end
162
- tokens << Token.new(:STRING, quoted_string, nil) if quoted_string
163
- end
164
- tokens << Token.new(:URI, url, nil) if url
165
- end
166
- tokens << Token.new(:COMMENT, comment, nil) if comment
167
- end
168
- end
169
-
170
- tokens
171
- end
172
-
173
- private
174
-
175
- def token(name, pattern=nil, &block)
176
- @lexemes << Lexeme.new(name, pattern, &block)
177
- end
178
-
179
- def macro(name, regex=nil)
180
- regex ? @macros[name] = regex : @macros[name].source
181
- end
182
-
183
- alias :m :macro
184
- end
185
- end
1
+ require "antisamy/csspool/rsac/sac/lexeme"
2
+ require "antisamy/csspool/rsac/sac/token"
3
+
4
+ module RSAC
5
+ class Tokenizer
6
+ def initialize(&block)
7
+ @lexemes = []
8
+ @macros = {}
9
+
10
+ # http://www.w3.org/TR/CSS21/syndata.html
11
+ macro(:h, /([0-9a-f])/ )
12
+ macro(:nonascii, /([\200-\377])/ )
13
+ macro(:nl, /(\n|\r\n|\r|\f)/ )
14
+ macro(:unicode, /(\\#{m(:h)}{1,6}(\r\n|[ \t\r\n\f])?)/ )
15
+ macro(:escape, /(#{m(:unicode)}|\\[^\r\n\f0-9a-f])/ )
16
+ macro(:nmstart, /([_a-z]|#{m(:nonascii)}|#{m(:escape)})/ )
17
+ macro(:nmchar, /([_a-z0-9-]|#{m(:nonascii)}|#{m(:escape)})/ )
18
+ macro(:string1, /(\"([^\n\r\f\\\"]|\\#{m(:nl)}|#{m(:escape)})*\")/ )
19
+ macro(:string2, /(\'([^\n\r\f\\']|\\#{m(:nl)}|#{m(:escape)})*\')/ )
20
+ macro(:invalid1, /(\"([^\n\r\f\\\"]|\\#{m(:nl)}|#{m(:escape)})*)/ )
21
+ macro(:invalid2, /(\'([^\n\r\f\\']|\\#{m(:nl)}|#{m(:escape)})*)/ )
22
+ macro(:comment, /(\/\*[^*]*\*+([^\/*][^*]*\*+)*\/)/ )
23
+ macro(:ident, /(-?#{m(:nmstart)}#{m(:nmchar)}*)/ )
24
+ macro(:name, /(#{m(:nmchar)}+)/ )
25
+ macro(:num, /([0-9]+|[0-9]*\.[0-9]+)/ )
26
+ macro(:string, /(#{m(:string1)}|#{m(:string2)})/ )
27
+ macro(:invalid, /(#{m(:invalid1)}|#{m(:invalid2)})/ )
28
+ macro(:url, /(([!#\$%&*-~]|#{m(:nonascii)}|#{m(:escape)})*)/ )
29
+ macro(:s, /([ \t\r\n\f]+)/ )
30
+ macro(:w, /(#{m(:s)}?)/ )
31
+ macro(:A, /(a|\\0{0,4}(41|61)(\r\n|[ \t\r\n\f])?)/ )
32
+ macro(:C, /(c|\\0{0,4}(43|63)(\r\n|[ \t\r\n\f])?)/ )
33
+ macro(:D, /(d|\\0{0,4}(44|64)(\r\n|[ \t\r\n\f])?)/ )
34
+ macro(:E, /(e|\\0{0,4}(45|65)(\r\n|[ \t\r\n\f])?)/ )
35
+ macro(:G, /(g|\\0{0,4}(47|67)(\r\n|[ \t\r\n\f])?|\\g)/ )
36
+ macro(:H, /(h|\\0{0,4}(48|68)(\r\n|[ \t\r\n\f])?|\\h)/ )
37
+ macro(:I, /(i|\\0{0,4}(49|69)(\r\n|[ \t\r\n\f])?|\\i)/ )
38
+ macro(:K, /(k|\\0{0,4}(4b|6b)(\r\n|[ \t\r\n\f])?|\\k)/ )
39
+ macro(:M, /(m|\\0{0,4}(4d|6d)(\r\n|[ \t\r\n\f])?|\\m)/ )
40
+ macro(:N, /(n|\\0{0,4}(4e|6e)(\r\n|[ \t\r\n\f])?|\\n)/ )
41
+ macro(:O, /(o|\\0{0,4}(51|71)(\r\n|[ \t\r\n\f])?|\\o)/ )
42
+ macro(:P, /(p|\\0{0,4}(50|70)(\r\n|[ \t\r\n\f])?|\\p)/ )
43
+ macro(:R, /(r|\\0{0,4}(52|72)(\r\n|[ \t\r\n\f])?|\\r)/ )
44
+ macro(:S, /(s|\\0{0,4}(53|73)(\r\n|[ \t\r\n\f])?|\\s)/ )
45
+ macro(:T, /(t|\\0{0,4}(54|74)(\r\n|[ \t\r\n\f])?|\\t)/ )
46
+ macro(:X, /(x|\\0{0,4}(58|78)(\r\n|[ \t\r\n\f])?|\\x)/ )
47
+ macro(:Z, /(z|\\0{0,4}(5a|7a)(\r\n|[ \t\r\n\f])?|\\z)/ )
48
+
49
+ #token :COMMENT do |patterns|
50
+ # patterns << /\/\*[^*]*\*+([^\/*][^*]*\*+)*\//
51
+ # patterns << /#{m(:s)}+\/\*[^*]*\*+([^\/*][^*]*\*+)*\//
52
+ #end
53
+
54
+ token(:LBRACE, /#{m(:w)}\{/)
55
+ token(:PLUS, /#{m(:w)}\+/)
56
+ token(:GREATER, /#{m(:w)}>/)
57
+ token(:COMMA, /#{m(:w)},/)
58
+
59
+ token(:S, /#{m(:s)}/)
60
+
61
+ #token :URI do |patterns|
62
+ # patterns << /url\(#{m(:w)}#{m(:string)}#{m(:w)}\)/
63
+ # patterns << /url\(#{m(:w)}#{m(:url)}#{m(:w)}\)/
64
+ #end
65
+
66
+ token(:FUNCTION, /#{m(:ident)}\(/)
67
+ token(:IDENT, /#{m(:ident)}/)
68
+
69
+ token(:CDO, /<!--/)
70
+ token(:CDC, /-->/)
71
+ token(:INCLUDES, /~=/)
72
+ token(:DASHMATCH, /\|=/)
73
+ #token(:STRING, /#{m(:string)}/)
74
+ token(:INVALID, /#{m(:invalid)}/)
75
+ token(:HASH, /##{m(:name)}/)
76
+ token(:IMPORT_SYM, /@#{m(:I)}#{m(:M)}#{m(:P)}#{m(:O)}#{m(:R)}#{m(:T)}/)
77
+ token(:PAGE_SYM, /@#{m(:P)}#{m(:A)}#{m(:G)}#{m(:E)}/)
78
+ token(:MEDIA_SYM, /@#{m(:M)}#{m(:E)}#{m(:D)}#{m(:I)}#{m(:A)}/)
79
+ token(:CHARSET_SYM, /@#{m(:C)}#{m(:H)}#{m(:A)}#{m(:R)}#{m(:S)}#{m(:E)}#{m(:T)}/)
80
+ token(:IMPORTANT_SYM, /!(#{m(:w)}|#{m(:comment)})*#{m(:I)}#{m(:M)}#{m(:P)}#{m(:O)}#{m(:R)}#{m(:T)}#{m(:A)}#{m(:N)}#{m(:T)}/)
81
+ token(:EMS, /#{m(:num)}#{m(:E)}#{m(:M)}/)
82
+ token(:EXS, /#{m(:num)}#{m(:E)}#{m(:X)}/)
83
+
84
+ token :LENGTH do |patterns|
85
+ patterns << /#{m(:num)}#{m(:P)}#{m(:X)}/
86
+ patterns << /#{m(:num)}#{m(:C)}#{m(:M)}/
87
+ patterns << /#{m(:num)}#{m(:M)}#{m(:M)}/
88
+ patterns << /#{m(:num)}#{m(:I)}#{m(:N)}/
89
+ patterns << /#{m(:num)}#{m(:P)}#{m(:T)}/
90
+ patterns << /#{m(:num)}#{m(:P)}#{m(:C)}/
91
+ end
92
+
93
+ token :ANGLE do |patterns|
94
+ patterns << /#{m(:num)}#{m(:D)}#{m(:E)}#{m(:G)}/
95
+ patterns << /#{m(:num)}#{m(:R)}#{m(:A)}#{m(:D)}/
96
+ patterns << /#{m(:num)}#{m(:G)}#{m(:R)}#{m(:A)}#{m(:D)}/
97
+ end
98
+
99
+ token :TIME do |patterns|
100
+ patterns << /#{m(:num)}#{m(:M)}#{m(:S)}/
101
+ patterns << /#{m(:num)}#{m(:S)}/
102
+ end
103
+
104
+ token :FREQ do |patterns|
105
+ patterns << /#{m(:num)}#{m(:H)}#{m(:Z)}/
106
+ patterns << /#{m(:num)}#{m(:K)}#{m(:H)}#{m(:Z)}/
107
+ end
108
+
109
+ token(:DIMENSION, /#{m(:num)}#{m(:ident)}/)
110
+ token(:PERCENTAGE, /#{m(:num)}%/)
111
+ token(:NUMBER, /#{m(:num)}/)
112
+
113
+
114
+ yield self if block_given?
115
+ end
116
+
117
+ def tokenize(input_data)
118
+ tokens = []
119
+ pos = 0
120
+
121
+ comment_pattern = /\/\*.*?\*\//m
122
+ comments = input_data.scan(comment_pattern)
123
+ non_comments = input_data.split(comment_pattern)
124
+
125
+ # Handle a small edge case, if our CSS is *only* comments,
126
+ # the split, zip, scan trick won't work
127
+ if non_comments.length == 0
128
+ tokens = comments.map { |x| Token.new(:COMMENT, x, nil) }
129
+ else
130
+ non_comments.zip(comments).each do |non_comment, comment|
131
+ non_comment.split(/url\([^\)]*\)/m).zip(
132
+ non_comment.scan(/url\([^\)]*\)/m)
133
+ ).each do |non_url, url|
134
+ non_url.split(/"[^"]*"|'[^']*'/m).zip(
135
+ non_url.scan(/"[^"]*"|'[^']*'/m)
136
+ ).each do |non_string, quoted_string|
137
+ if non_string.length > 0 && non_string =~ /\A\s*\Z/m
138
+ tokens << Token.new(:S, non_string, nil)
139
+ else
140
+ non_string.split(/[ \t\r\n\f]*(?![{}+>]*)/m).zip(
141
+ non_string.scan(/[ \t\r\n\f]*(?![{}+>]*)/m)
142
+ ).each do |string, whitespace|
143
+ until string.empty?
144
+ token = nil
145
+ @lexemes.each do |lexeme|
146
+ match = lexeme.pattern.match(string)
147
+ if match
148
+ token = Token.new(lexeme.name, match.to_s, pos)
149
+ break
150
+ end
151
+ end
152
+
153
+ token ||= DelimiterToken.new(/^./.match(string).to_s, pos)
154
+
155
+ tokens << token
156
+ string = string.slice(Range.new(token.value.length, -1))
157
+ pos += token.value.length
158
+ end
159
+ tokens << Token.new(:S, whitespace, nil) if whitespace
160
+ end
161
+ end
162
+ tokens << Token.new(:STRING, quoted_string, nil) if quoted_string
163
+ end
164
+ tokens << Token.new(:URI, url, nil) if url
165
+ end
166
+ tokens << Token.new(:COMMENT, comment, nil) if comment
167
+ end
168
+ end
169
+
170
+ tokens
171
+ end
172
+
173
+ private
174
+
175
+ def token(name, pattern=nil, &block)
176
+ @lexemes << Lexeme.new(name, pattern, &block)
177
+ end
178
+
179
+ def macro(name, regex=nil)
180
+ regex ? @macros[name] = regex : @macros[name].source
181
+ end
182
+
183
+ alias :m :macro
184
+ end
185
+ end