antisamy 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. data/CHANGELOG.rdoc +13 -0
  2. data/LICENSE.txt +20 -20
  3. data/README.rdoc +41 -41
  4. data/lib/antisamy.rb +46 -46
  5. data/lib/antisamy/css/css_filter.rb +187 -187
  6. data/lib/antisamy/css/css_scanner.rb +84 -84
  7. data/lib/antisamy/css/css_validator.rb +128 -128
  8. data/lib/antisamy/csspool/rsac.rb +1 -1
  9. data/lib/antisamy/csspool/rsac/sac.rb +14 -14
  10. data/lib/antisamy/csspool/rsac/sac/conditions.rb +5 -5
  11. data/lib/antisamy/csspool/rsac/sac/conditions/attribute_condition.rb +50 -50
  12. data/lib/antisamy/csspool/rsac/sac/conditions/begin_hyphen_condition.rb +18 -18
  13. data/lib/antisamy/csspool/rsac/sac/conditions/class_condition.rb +18 -18
  14. data/lib/antisamy/csspool/rsac/sac/conditions/combinator_condition.rb +36 -36
  15. data/lib/antisamy/csspool/rsac/sac/conditions/condition.rb +29 -29
  16. data/lib/antisamy/csspool/rsac/sac/conditions/id_condition.rb +23 -23
  17. data/lib/antisamy/csspool/rsac/sac/conditions/one_of_condition.rb +18 -18
  18. data/lib/antisamy/csspool/rsac/sac/conditions/pseudo_class_condition.rb +20 -20
  19. data/lib/antisamy/csspool/rsac/sac/document_handler.rb +66 -66
  20. data/lib/antisamy/csspool/rsac/sac/error_handler.rb +13 -13
  21. data/lib/antisamy/csspool/rsac/sac/generated_parser.rb +1012 -1012
  22. data/lib/antisamy/csspool/rsac/sac/generated_property_parser.rb +9284 -9284
  23. data/lib/antisamy/csspool/rsac/sac/lexeme.rb +27 -27
  24. data/lib/antisamy/csspool/rsac/sac/lexical_unit.rb +201 -201
  25. data/lib/antisamy/csspool/rsac/sac/parse_exception.rb +4 -4
  26. data/lib/antisamy/csspool/rsac/sac/parser.rb +109 -109
  27. data/lib/antisamy/csspool/rsac/sac/property_parser.rb +44 -44
  28. data/lib/antisamy/csspool/rsac/sac/selectors.rb +5 -5
  29. data/lib/antisamy/csspool/rsac/sac/selectors/child_selector.rb +36 -36
  30. data/lib/antisamy/csspool/rsac/sac/selectors/conditional_selector.rb +45 -45
  31. data/lib/antisamy/csspool/rsac/sac/selectors/descendant_selector.rb +36 -36
  32. data/lib/antisamy/csspool/rsac/sac/selectors/element_selector.rb +35 -35
  33. data/lib/antisamy/csspool/rsac/sac/selectors/selector.rb +25 -25
  34. data/lib/antisamy/csspool/rsac/sac/selectors/sibling_selector.rb +35 -35
  35. data/lib/antisamy/csspool/rsac/sac/selectors/simple_selector.rb +21 -21
  36. data/lib/antisamy/csspool/rsac/sac/token.rb +25 -25
  37. data/lib/antisamy/csspool/rsac/sac/tokenizer.rb +185 -185
  38. data/lib/antisamy/csspool/rsac/stylesheet.rb +3 -3
  39. data/lib/antisamy/csspool/rsac/stylesheet/rule.rb +20 -20
  40. data/lib/antisamy/csspool/rsac/stylesheet/stylesheet.rb +76 -76
  41. data/lib/antisamy/html/handler.rb +112 -99
  42. data/lib/antisamy/html/sax_filter.rb +305 -302
  43. data/lib/antisamy/html/scanner.rb +47 -43
  44. data/lib/antisamy/model/attribute.rb +19 -19
  45. data/lib/antisamy/model/css_property.rb +39 -39
  46. data/lib/antisamy/model/tag.rb +31 -31
  47. data/lib/antisamy/policy.rb +577 -545
  48. data/lib/antisamy/scan_results.rb +89 -89
  49. data/spec/antisamy_spec.rb +208 -142
  50. data/spec/spec_helper.rb +12 -12
  51. metadata +79 -81
@@ -1,36 +1,36 @@
1
- module RSAC
2
- module Selectors
3
- class DescendantSelector < SimpleSelector
4
- attr_accessor :ancestor_selector, :simple_selector
5
- alias :ancestor :ancestor_selector
6
- alias :selector :simple_selector
7
-
8
- def initialize(ancestor, selector)
9
- super(:SAC_DESCENDANT_SELECTOR)
10
-
11
- @ancestor_selector = ancestor
12
- @simple_selector = selector
13
- end
14
-
15
- def to_css
16
- "#{ancestor.to_css} #{selector.to_css}"
17
- end
18
-
19
- def to_xpath(prefix=true)
20
- "#{ancestor.to_xpath(prefix)}//#{selector.to_xpath(false)}"
21
- end
22
-
23
- def specificity
24
- ancestor.specificity.zip(selector.specificity).map { |x,y| x + y }
25
- end
26
-
27
- def ==(other)
28
- super && selector == other.selector && ancestor == other.ancestor
29
- end
30
-
31
- def hash
32
- [selector, ancestor].hash
33
- end
34
- end
35
- end
36
- end
1
+ module RSAC
2
+ module Selectors
3
+ class DescendantSelector < SimpleSelector
4
+ attr_accessor :ancestor_selector, :simple_selector
5
+ alias :ancestor :ancestor_selector
6
+ alias :selector :simple_selector
7
+
8
+ def initialize(ancestor, selector)
9
+ super(:SAC_DESCENDANT_SELECTOR)
10
+
11
+ @ancestor_selector = ancestor
12
+ @simple_selector = selector
13
+ end
14
+
15
+ def to_css
16
+ "#{ancestor.to_css} #{selector.to_css}"
17
+ end
18
+
19
+ def to_xpath(prefix=true)
20
+ "#{ancestor.to_xpath(prefix)}//#{selector.to_xpath(false)}"
21
+ end
22
+
23
+ def specificity
24
+ ancestor.specificity.zip(selector.specificity).map { |x,y| x + y }
25
+ end
26
+
27
+ def ==(other)
28
+ super && selector == other.selector && ancestor == other.ancestor
29
+ end
30
+
31
+ def hash
32
+ [selector, ancestor].hash
33
+ end
34
+ end
35
+ end
36
+ end
@@ -1,35 +1,35 @@
1
- module RSAC
2
- module Selectors
3
- class ElementSelector < SimpleSelector
4
- attr_reader :local_name
5
- alias :name :local_name
6
-
7
- def initialize(name)
8
- super(:SAC_ELEMENT_NODE_SELECTOR)
9
- @local_name = name
10
- end
11
-
12
- def to_css
13
- local_name
14
- end
15
-
16
- def to_xpath(prefix=true)
17
- atoms = [local_name]
18
- atoms.unshift("//") if prefix
19
- atoms.join
20
- end
21
-
22
- def specificity
23
- [0, 0, 0, 1]
24
- end
25
-
26
- def ==(other)
27
- super && name == other.name
28
- end
29
-
30
- def hash
31
- name.hash
32
- end
33
- end
34
- end
35
- end
1
+ module RSAC
2
+ module Selectors
3
+ class ElementSelector < SimpleSelector
4
+ attr_reader :local_name
5
+ alias :name :local_name
6
+
7
+ def initialize(name)
8
+ super(:SAC_ELEMENT_NODE_SELECTOR)
9
+ @local_name = name
10
+ end
11
+
12
+ def to_css
13
+ local_name
14
+ end
15
+
16
+ def to_xpath(prefix=true)
17
+ atoms = [local_name]
18
+ atoms.unshift("//") if prefix
19
+ atoms.join
20
+ end
21
+
22
+ def specificity
23
+ [0, 0, 0, 1]
24
+ end
25
+
26
+ def ==(other)
27
+ super && name == other.name
28
+ end
29
+
30
+ def hash
31
+ name.hash
32
+ end
33
+ end
34
+ end
35
+ end
@@ -1,25 +1,25 @@
1
- module RSAC
2
- module Selectors
3
- class Selector
4
-
5
- attr_reader :selector_type
6
-
7
- def initialize(selector_type)
8
- @selector_type = selector_type
9
- end
10
-
11
- def ==(other)
12
- self.class === other && selector_type == other.selector_type
13
- end
14
-
15
- def hash
16
- selector_type.hash
17
- end
18
-
19
- def eql?(other)
20
- self == other
21
- end
22
-
23
- end
24
- end
25
- end
1
+ module RSAC
2
+ module Selectors
3
+ class Selector
4
+
5
+ attr_reader :selector_type
6
+
7
+ def initialize(selector_type)
8
+ @selector_type = selector_type
9
+ end
10
+
11
+ def ==(other)
12
+ self.class === other && selector_type == other.selector_type
13
+ end
14
+
15
+ def hash
16
+ selector_type.hash
17
+ end
18
+
19
+ def eql?(other)
20
+ self == other
21
+ end
22
+
23
+ end
24
+ end
25
+ end
@@ -1,35 +1,35 @@
1
- module RSAC
2
- module Selectors
3
- class SiblingSelector < SimpleSelector
4
- attr_accessor :selector, :sibling_selector
5
- alias :sibling :sibling_selector
6
-
7
- def initialize(selector, sibling)
8
- super(:SAC_DIRECT_ADJACENT_SELECTOR)
9
-
10
- @selector = selector
11
- @sibling_selector = sibling
12
- end
13
-
14
- def to_css
15
- "#{selector.to_css} + #{sibling.to_css}"
16
- end
17
-
18
- def to_xpath(prefix=true)
19
- "#{selector.to_xpath(prefix)}/following-sibling::#{sibling.to_xpath(false)}"
20
- end
21
-
22
- def specificity
23
- selector.specificity.zip(sibling.specificity).map { |x,y| x + y }
24
- end
25
-
26
- def ==(other)
27
- super && selector == other.selector && sibling == other.sibling
28
- end
29
-
30
- def hash
31
- [selector, sibling].hash
32
- end
33
- end
34
- end
35
- end
1
+ module RSAC
2
+ module Selectors
3
+ class SiblingSelector < SimpleSelector
4
+ attr_accessor :selector, :sibling_selector
5
+ alias :sibling :sibling_selector
6
+
7
+ def initialize(selector, sibling)
8
+ super(:SAC_DIRECT_ADJACENT_SELECTOR)
9
+
10
+ @selector = selector
11
+ @sibling_selector = sibling
12
+ end
13
+
14
+ def to_css
15
+ "#{selector.to_css} + #{sibling.to_css}"
16
+ end
17
+
18
+ def to_xpath(prefix=true)
19
+ "#{selector.to_xpath(prefix)}/following-sibling::#{sibling.to_xpath(false)}"
20
+ end
21
+
22
+ def specificity
23
+ selector.specificity.zip(sibling.specificity).map { |x,y| x + y }
24
+ end
25
+
26
+ def ==(other)
27
+ super && selector == other.selector && sibling == other.sibling
28
+ end
29
+
30
+ def hash
31
+ [selector, sibling].hash
32
+ end
33
+ end
34
+ end
35
+ end
@@ -1,21 +1,21 @@
1
- module RSAC
2
- module Selectors
3
- class SimpleSelector < Selector
4
- def initialize(selector_type=:SAC_ANY_NODE_SELECTOR)
5
- super(selector_type)
6
- end
7
-
8
- def to_css
9
- '*'
10
- end
11
-
12
- def to_xpath
13
- "//*"
14
- end
15
-
16
- def specificity
17
- [0, 0, 0, 0]
18
- end
19
- end
20
- end
21
- end
1
+ module RSAC
2
+ module Selectors
3
+ class SimpleSelector < Selector
4
+ def initialize(selector_type=:SAC_ANY_NODE_SELECTOR)
5
+ super(selector_type)
6
+ end
7
+
8
+ def to_css
9
+ '*'
10
+ end
11
+
12
+ def to_xpath
13
+ "//*"
14
+ end
15
+
16
+ def specificity
17
+ [0, 0, 0, 0]
18
+ end
19
+ end
20
+ end
21
+ end
@@ -1,25 +1,25 @@
1
- module RSAC
2
- class Token
3
- attr_reader :name, :value, :position
4
-
5
- def initialize(name, value, position)
6
- @name = name
7
- @value = value
8
- @position = position
9
- end
10
-
11
- def to_racc_token
12
- [name, value]
13
- end
14
- end
15
-
16
- class DelimiterToken < Token
17
- def initialize(value, position)
18
- super(:delim, value, position)
19
- end
20
-
21
- def to_racc_token
22
- [value, value]
23
- end
24
- end
25
- end
1
+ module RSAC
2
+ class Token
3
+ attr_reader :name, :value, :position
4
+
5
+ def initialize(name, value, position)
6
+ @name = name
7
+ @value = value
8
+ @position = position
9
+ end
10
+
11
+ def to_racc_token
12
+ [name, value]
13
+ end
14
+ end
15
+
16
+ class DelimiterToken < Token
17
+ def initialize(value, position)
18
+ super(:delim, value, position)
19
+ end
20
+
21
+ def to_racc_token
22
+ [value, value]
23
+ end
24
+ end
25
+ end
@@ -1,185 +1,185 @@
1
- require "antisamy/csspool/rsac/sac/lexeme"
2
- require "antisamy/csspool/rsac/sac/token"
3
-
4
- module RSAC
5
- class Tokenizer
6
- def initialize(&block)
7
- @lexemes = []
8
- @macros = {}
9
-
10
- # http://www.w3.org/TR/CSS21/syndata.html
11
- macro(:h, /([0-9a-f])/ )
12
- macro(:nonascii, /([\200-\377])/ )
13
- macro(:nl, /(\n|\r\n|\r|\f)/ )
14
- macro(:unicode, /(\\#{m(:h)}{1,6}(\r\n|[ \t\r\n\f])?)/ )
15
- macro(:escape, /(#{m(:unicode)}|\\[^\r\n\f0-9a-f])/ )
16
- macro(:nmstart, /([_a-z]|#{m(:nonascii)}|#{m(:escape)})/ )
17
- macro(:nmchar, /([_a-z0-9-]|#{m(:nonascii)}|#{m(:escape)})/ )
18
- macro(:string1, /(\"([^\n\r\f\\\"]|\\#{m(:nl)}|#{m(:escape)})*\")/ )
19
- macro(:string2, /(\'([^\n\r\f\\']|\\#{m(:nl)}|#{m(:escape)})*\')/ )
20
- macro(:invalid1, /(\"([^\n\r\f\\\"]|\\#{m(:nl)}|#{m(:escape)})*)/ )
21
- macro(:invalid2, /(\'([^\n\r\f\\']|\\#{m(:nl)}|#{m(:escape)})*)/ )
22
- macro(:comment, /(\/\*[^*]*\*+([^\/*][^*]*\*+)*\/)/ )
23
- macro(:ident, /(-?#{m(:nmstart)}#{m(:nmchar)}*)/ )
24
- macro(:name, /(#{m(:nmchar)}+)/ )
25
- macro(:num, /([0-9]+|[0-9]*\.[0-9]+)/ )
26
- macro(:string, /(#{m(:string1)}|#{m(:string2)})/ )
27
- macro(:invalid, /(#{m(:invalid1)}|#{m(:invalid2)})/ )
28
- macro(:url, /(([!#\$%&*-~]|#{m(:nonascii)}|#{m(:escape)})*)/ )
29
- macro(:s, /([ \t\r\n\f]+)/ )
30
- macro(:w, /(#{m(:s)}?)/ )
31
- macro(:A, /(a|\\0{0,4}(41|61)(\r\n|[ \t\r\n\f])?)/ )
32
- macro(:C, /(c|\\0{0,4}(43|63)(\r\n|[ \t\r\n\f])?)/ )
33
- macro(:D, /(d|\\0{0,4}(44|64)(\r\n|[ \t\r\n\f])?)/ )
34
- macro(:E, /(e|\\0{0,4}(45|65)(\r\n|[ \t\r\n\f])?)/ )
35
- macro(:G, /(g|\\0{0,4}(47|67)(\r\n|[ \t\r\n\f])?|\\g)/ )
36
- macro(:H, /(h|\\0{0,4}(48|68)(\r\n|[ \t\r\n\f])?|\\h)/ )
37
- macro(:I, /(i|\\0{0,4}(49|69)(\r\n|[ \t\r\n\f])?|\\i)/ )
38
- macro(:K, /(k|\\0{0,4}(4b|6b)(\r\n|[ \t\r\n\f])?|\\k)/ )
39
- macro(:M, /(m|\\0{0,4}(4d|6d)(\r\n|[ \t\r\n\f])?|\\m)/ )
40
- macro(:N, /(n|\\0{0,4}(4e|6e)(\r\n|[ \t\r\n\f])?|\\n)/ )
41
- macro(:O, /(o|\\0{0,4}(51|71)(\r\n|[ \t\r\n\f])?|\\o)/ )
42
- macro(:P, /(p|\\0{0,4}(50|70)(\r\n|[ \t\r\n\f])?|\\p)/ )
43
- macro(:R, /(r|\\0{0,4}(52|72)(\r\n|[ \t\r\n\f])?|\\r)/ )
44
- macro(:S, /(s|\\0{0,4}(53|73)(\r\n|[ \t\r\n\f])?|\\s)/ )
45
- macro(:T, /(t|\\0{0,4}(54|74)(\r\n|[ \t\r\n\f])?|\\t)/ )
46
- macro(:X, /(x|\\0{0,4}(58|78)(\r\n|[ \t\r\n\f])?|\\x)/ )
47
- macro(:Z, /(z|\\0{0,4}(5a|7a)(\r\n|[ \t\r\n\f])?|\\z)/ )
48
-
49
- #token :COMMENT do |patterns|
50
- # patterns << /\/\*[^*]*\*+([^\/*][^*]*\*+)*\//
51
- # patterns << /#{m(:s)}+\/\*[^*]*\*+([^\/*][^*]*\*+)*\//
52
- #end
53
-
54
- token(:LBRACE, /#{m(:w)}\{/)
55
- token(:PLUS, /#{m(:w)}\+/)
56
- token(:GREATER, /#{m(:w)}>/)
57
- token(:COMMA, /#{m(:w)},/)
58
-
59
- token(:S, /#{m(:s)}/)
60
-
61
- #token :URI do |patterns|
62
- # patterns << /url\(#{m(:w)}#{m(:string)}#{m(:w)}\)/
63
- # patterns << /url\(#{m(:w)}#{m(:url)}#{m(:w)}\)/
64
- #end
65
-
66
- token(:FUNCTION, /#{m(:ident)}\(/)
67
- token(:IDENT, /#{m(:ident)}/)
68
-
69
- token(:CDO, /<!--/)
70
- token(:CDC, /-->/)
71
- token(:INCLUDES, /~=/)
72
- token(:DASHMATCH, /\|=/)
73
- #token(:STRING, /#{m(:string)}/)
74
- token(:INVALID, /#{m(:invalid)}/)
75
- token(:HASH, /##{m(:name)}/)
76
- token(:IMPORT_SYM, /@#{m(:I)}#{m(:M)}#{m(:P)}#{m(:O)}#{m(:R)}#{m(:T)}/)
77
- token(:PAGE_SYM, /@#{m(:P)}#{m(:A)}#{m(:G)}#{m(:E)}/)
78
- token(:MEDIA_SYM, /@#{m(:M)}#{m(:E)}#{m(:D)}#{m(:I)}#{m(:A)}/)
79
- token(:CHARSET_SYM, /@#{m(:C)}#{m(:H)}#{m(:A)}#{m(:R)}#{m(:S)}#{m(:E)}#{m(:T)}/)
80
- token(:IMPORTANT_SYM, /!(#{m(:w)}|#{m(:comment)})*#{m(:I)}#{m(:M)}#{m(:P)}#{m(:O)}#{m(:R)}#{m(:T)}#{m(:A)}#{m(:N)}#{m(:T)}/)
81
- token(:EMS, /#{m(:num)}#{m(:E)}#{m(:M)}/)
82
- token(:EXS, /#{m(:num)}#{m(:E)}#{m(:X)}/)
83
-
84
- token :LENGTH do |patterns|
85
- patterns << /#{m(:num)}#{m(:P)}#{m(:X)}/
86
- patterns << /#{m(:num)}#{m(:C)}#{m(:M)}/
87
- patterns << /#{m(:num)}#{m(:M)}#{m(:M)}/
88
- patterns << /#{m(:num)}#{m(:I)}#{m(:N)}/
89
- patterns << /#{m(:num)}#{m(:P)}#{m(:T)}/
90
- patterns << /#{m(:num)}#{m(:P)}#{m(:C)}/
91
- end
92
-
93
- token :ANGLE do |patterns|
94
- patterns << /#{m(:num)}#{m(:D)}#{m(:E)}#{m(:G)}/
95
- patterns << /#{m(:num)}#{m(:R)}#{m(:A)}#{m(:D)}/
96
- patterns << /#{m(:num)}#{m(:G)}#{m(:R)}#{m(:A)}#{m(:D)}/
97
- end
98
-
99
- token :TIME do |patterns|
100
- patterns << /#{m(:num)}#{m(:M)}#{m(:S)}/
101
- patterns << /#{m(:num)}#{m(:S)}/
102
- end
103
-
104
- token :FREQ do |patterns|
105
- patterns << /#{m(:num)}#{m(:H)}#{m(:Z)}/
106
- patterns << /#{m(:num)}#{m(:K)}#{m(:H)}#{m(:Z)}/
107
- end
108
-
109
- token(:DIMENSION, /#{m(:num)}#{m(:ident)}/)
110
- token(:PERCENTAGE, /#{m(:num)}%/)
111
- token(:NUMBER, /#{m(:num)}/)
112
-
113
-
114
- yield self if block_given?
115
- end
116
-
117
- def tokenize(input_data)
118
- tokens = []
119
- pos = 0
120
-
121
- comment_pattern = /\/\*.*?\*\//m
122
- comments = input_data.scan(comment_pattern)
123
- non_comments = input_data.split(comment_pattern)
124
-
125
- # Handle a small edge case, if our CSS is *only* comments,
126
- # the split, zip, scan trick won't work
127
- if non_comments.length == 0
128
- tokens = comments.map { |x| Token.new(:COMMENT, x, nil) }
129
- else
130
- non_comments.zip(comments).each do |non_comment, comment|
131
- non_comment.split(/url\([^\)]*\)/m).zip(
132
- non_comment.scan(/url\([^\)]*\)/m)
133
- ).each do |non_url, url|
134
- non_url.split(/"[^"]*"|'[^']*'/m).zip(
135
- non_url.scan(/"[^"]*"|'[^']*'/m)
136
- ).each do |non_string, quoted_string|
137
- if non_string.length > 0 && non_string =~ /\A\s*\Z/m
138
- tokens << Token.new(:S, non_string, nil)
139
- else
140
- non_string.split(/[ \t\r\n\f]*(?![{}+>]*)/m).zip(
141
- non_string.scan(/[ \t\r\n\f]*(?![{}+>]*)/m)
142
- ).each do |string, whitespace|
143
- until string.empty?
144
- token = nil
145
- @lexemes.each do |lexeme|
146
- match = lexeme.pattern.match(string)
147
- if match
148
- token = Token.new(lexeme.name, match.to_s, pos)
149
- break
150
- end
151
- end
152
-
153
- token ||= DelimiterToken.new(/^./.match(string).to_s, pos)
154
-
155
- tokens << token
156
- string = string.slice(Range.new(token.value.length, -1))
157
- pos += token.value.length
158
- end
159
- tokens << Token.new(:S, whitespace, nil) if whitespace
160
- end
161
- end
162
- tokens << Token.new(:STRING, quoted_string, nil) if quoted_string
163
- end
164
- tokens << Token.new(:URI, url, nil) if url
165
- end
166
- tokens << Token.new(:COMMENT, comment, nil) if comment
167
- end
168
- end
169
-
170
- tokens
171
- end
172
-
173
- private
174
-
175
- def token(name, pattern=nil, &block)
176
- @lexemes << Lexeme.new(name, pattern, &block)
177
- end
178
-
179
- def macro(name, regex=nil)
180
- regex ? @macros[name] = regex : @macros[name].source
181
- end
182
-
183
- alias :m :macro
184
- end
185
- end
1
+ require "antisamy/csspool/rsac/sac/lexeme"
2
+ require "antisamy/csspool/rsac/sac/token"
3
+
4
+ module RSAC
5
+ class Tokenizer
6
+ def initialize(&block)
7
+ @lexemes = []
8
+ @macros = {}
9
+
10
+ # http://www.w3.org/TR/CSS21/syndata.html
11
+ macro(:h, /([0-9a-f])/ )
12
+ macro(:nonascii, /([\200-\377])/ )
13
+ macro(:nl, /(\n|\r\n|\r|\f)/ )
14
+ macro(:unicode, /(\\#{m(:h)}{1,6}(\r\n|[ \t\r\n\f])?)/ )
15
+ macro(:escape, /(#{m(:unicode)}|\\[^\r\n\f0-9a-f])/ )
16
+ macro(:nmstart, /([_a-z]|#{m(:nonascii)}|#{m(:escape)})/ )
17
+ macro(:nmchar, /([_a-z0-9-]|#{m(:nonascii)}|#{m(:escape)})/ )
18
+ macro(:string1, /(\"([^\n\r\f\\\"]|\\#{m(:nl)}|#{m(:escape)})*\")/ )
19
+ macro(:string2, /(\'([^\n\r\f\\']|\\#{m(:nl)}|#{m(:escape)})*\')/ )
20
+ macro(:invalid1, /(\"([^\n\r\f\\\"]|\\#{m(:nl)}|#{m(:escape)})*)/ )
21
+ macro(:invalid2, /(\'([^\n\r\f\\']|\\#{m(:nl)}|#{m(:escape)})*)/ )
22
+ macro(:comment, /(\/\*[^*]*\*+([^\/*][^*]*\*+)*\/)/ )
23
+ macro(:ident, /(-?#{m(:nmstart)}#{m(:nmchar)}*)/ )
24
+ macro(:name, /(#{m(:nmchar)}+)/ )
25
+ macro(:num, /([0-9]+|[0-9]*\.[0-9]+)/ )
26
+ macro(:string, /(#{m(:string1)}|#{m(:string2)})/ )
27
+ macro(:invalid, /(#{m(:invalid1)}|#{m(:invalid2)})/ )
28
+ macro(:url, /(([!#\$%&*-~]|#{m(:nonascii)}|#{m(:escape)})*)/ )
29
+ macro(:s, /([ \t\r\n\f]+)/ )
30
+ macro(:w, /(#{m(:s)}?)/ )
31
+ macro(:A, /(a|\\0{0,4}(41|61)(\r\n|[ \t\r\n\f])?)/ )
32
+ macro(:C, /(c|\\0{0,4}(43|63)(\r\n|[ \t\r\n\f])?)/ )
33
+ macro(:D, /(d|\\0{0,4}(44|64)(\r\n|[ \t\r\n\f])?)/ )
34
+ macro(:E, /(e|\\0{0,4}(45|65)(\r\n|[ \t\r\n\f])?)/ )
35
+ macro(:G, /(g|\\0{0,4}(47|67)(\r\n|[ \t\r\n\f])?|\\g)/ )
36
+ macro(:H, /(h|\\0{0,4}(48|68)(\r\n|[ \t\r\n\f])?|\\h)/ )
37
+ macro(:I, /(i|\\0{0,4}(49|69)(\r\n|[ \t\r\n\f])?|\\i)/ )
38
+ macro(:K, /(k|\\0{0,4}(4b|6b)(\r\n|[ \t\r\n\f])?|\\k)/ )
39
+ macro(:M, /(m|\\0{0,4}(4d|6d)(\r\n|[ \t\r\n\f])?|\\m)/ )
40
+ macro(:N, /(n|\\0{0,4}(4e|6e)(\r\n|[ \t\r\n\f])?|\\n)/ )
41
+ macro(:O, /(o|\\0{0,4}(51|71)(\r\n|[ \t\r\n\f])?|\\o)/ )
42
+ macro(:P, /(p|\\0{0,4}(50|70)(\r\n|[ \t\r\n\f])?|\\p)/ )
43
+ macro(:R, /(r|\\0{0,4}(52|72)(\r\n|[ \t\r\n\f])?|\\r)/ )
44
+ macro(:S, /(s|\\0{0,4}(53|73)(\r\n|[ \t\r\n\f])?|\\s)/ )
45
+ macro(:T, /(t|\\0{0,4}(54|74)(\r\n|[ \t\r\n\f])?|\\t)/ )
46
+ macro(:X, /(x|\\0{0,4}(58|78)(\r\n|[ \t\r\n\f])?|\\x)/ )
47
+ macro(:Z, /(z|\\0{0,4}(5a|7a)(\r\n|[ \t\r\n\f])?|\\z)/ )
48
+
49
+ #token :COMMENT do |patterns|
50
+ # patterns << /\/\*[^*]*\*+([^\/*][^*]*\*+)*\//
51
+ # patterns << /#{m(:s)}+\/\*[^*]*\*+([^\/*][^*]*\*+)*\//
52
+ #end
53
+
54
+ token(:LBRACE, /#{m(:w)}\{/)
55
+ token(:PLUS, /#{m(:w)}\+/)
56
+ token(:GREATER, /#{m(:w)}>/)
57
+ token(:COMMA, /#{m(:w)},/)
58
+
59
+ token(:S, /#{m(:s)}/)
60
+
61
+ #token :URI do |patterns|
62
+ # patterns << /url\(#{m(:w)}#{m(:string)}#{m(:w)}\)/
63
+ # patterns << /url\(#{m(:w)}#{m(:url)}#{m(:w)}\)/
64
+ #end
65
+
66
+ token(:FUNCTION, /#{m(:ident)}\(/)
67
+ token(:IDENT, /#{m(:ident)}/)
68
+
69
+ token(:CDO, /<!--/)
70
+ token(:CDC, /-->/)
71
+ token(:INCLUDES, /~=/)
72
+ token(:DASHMATCH, /\|=/)
73
+ #token(:STRING, /#{m(:string)}/)
74
+ token(:INVALID, /#{m(:invalid)}/)
75
+ token(:HASH, /##{m(:name)}/)
76
+ token(:IMPORT_SYM, /@#{m(:I)}#{m(:M)}#{m(:P)}#{m(:O)}#{m(:R)}#{m(:T)}/)
77
+ token(:PAGE_SYM, /@#{m(:P)}#{m(:A)}#{m(:G)}#{m(:E)}/)
78
+ token(:MEDIA_SYM, /@#{m(:M)}#{m(:E)}#{m(:D)}#{m(:I)}#{m(:A)}/)
79
+ token(:CHARSET_SYM, /@#{m(:C)}#{m(:H)}#{m(:A)}#{m(:R)}#{m(:S)}#{m(:E)}#{m(:T)}/)
80
+ token(:IMPORTANT_SYM, /!(#{m(:w)}|#{m(:comment)})*#{m(:I)}#{m(:M)}#{m(:P)}#{m(:O)}#{m(:R)}#{m(:T)}#{m(:A)}#{m(:N)}#{m(:T)}/)
81
+ token(:EMS, /#{m(:num)}#{m(:E)}#{m(:M)}/)
82
+ token(:EXS, /#{m(:num)}#{m(:E)}#{m(:X)}/)
83
+
84
+ token :LENGTH do |patterns|
85
+ patterns << /#{m(:num)}#{m(:P)}#{m(:X)}/
86
+ patterns << /#{m(:num)}#{m(:C)}#{m(:M)}/
87
+ patterns << /#{m(:num)}#{m(:M)}#{m(:M)}/
88
+ patterns << /#{m(:num)}#{m(:I)}#{m(:N)}/
89
+ patterns << /#{m(:num)}#{m(:P)}#{m(:T)}/
90
+ patterns << /#{m(:num)}#{m(:P)}#{m(:C)}/
91
+ end
92
+
93
+ token :ANGLE do |patterns|
94
+ patterns << /#{m(:num)}#{m(:D)}#{m(:E)}#{m(:G)}/
95
+ patterns << /#{m(:num)}#{m(:R)}#{m(:A)}#{m(:D)}/
96
+ patterns << /#{m(:num)}#{m(:G)}#{m(:R)}#{m(:A)}#{m(:D)}/
97
+ end
98
+
99
+ token :TIME do |patterns|
100
+ patterns << /#{m(:num)}#{m(:M)}#{m(:S)}/
101
+ patterns << /#{m(:num)}#{m(:S)}/
102
+ end
103
+
104
+ token :FREQ do |patterns|
105
+ patterns << /#{m(:num)}#{m(:H)}#{m(:Z)}/
106
+ patterns << /#{m(:num)}#{m(:K)}#{m(:H)}#{m(:Z)}/
107
+ end
108
+
109
+ token(:DIMENSION, /#{m(:num)}#{m(:ident)}/)
110
+ token(:PERCENTAGE, /#{m(:num)}%/)
111
+ token(:NUMBER, /#{m(:num)}/)
112
+
113
+
114
+ yield self if block_given?
115
+ end
116
+
117
+ def tokenize(input_data)
118
+ tokens = []
119
+ pos = 0
120
+
121
+ comment_pattern = /\/\*.*?\*\//m
122
+ comments = input_data.scan(comment_pattern)
123
+ non_comments = input_data.split(comment_pattern)
124
+
125
+ # Handle a small edge case, if our CSS is *only* comments,
126
+ # the split, zip, scan trick won't work
127
+ if non_comments.length == 0
128
+ tokens = comments.map { |x| Token.new(:COMMENT, x, nil) }
129
+ else
130
+ non_comments.zip(comments).each do |non_comment, comment|
131
+ non_comment.split(/url\([^\)]*\)/m).zip(
132
+ non_comment.scan(/url\([^\)]*\)/m)
133
+ ).each do |non_url, url|
134
+ non_url.split(/"[^"]*"|'[^']*'/m).zip(
135
+ non_url.scan(/"[^"]*"|'[^']*'/m)
136
+ ).each do |non_string, quoted_string|
137
+ if non_string.length > 0 && non_string =~ /\A\s*\Z/m
138
+ tokens << Token.new(:S, non_string, nil)
139
+ else
140
+ non_string.split(/[ \t\r\n\f]*(?![{}+>]*)/m).zip(
141
+ non_string.scan(/[ \t\r\n\f]*(?![{}+>]*)/m)
142
+ ).each do |string, whitespace|
143
+ until string.empty?
144
+ token = nil
145
+ @lexemes.each do |lexeme|
146
+ match = lexeme.pattern.match(string)
147
+ if match
148
+ token = Token.new(lexeme.name, match.to_s, pos)
149
+ break
150
+ end
151
+ end
152
+
153
+ token ||= DelimiterToken.new(/^./.match(string).to_s, pos)
154
+
155
+ tokens << token
156
+ string = string.slice(Range.new(token.value.length, -1))
157
+ pos += token.value.length
158
+ end
159
+ tokens << Token.new(:S, whitespace, nil) if whitespace
160
+ end
161
+ end
162
+ tokens << Token.new(:STRING, quoted_string, nil) if quoted_string
163
+ end
164
+ tokens << Token.new(:URI, url, nil) if url
165
+ end
166
+ tokens << Token.new(:COMMENT, comment, nil) if comment
167
+ end
168
+ end
169
+
170
+ tokens
171
+ end
172
+
173
+ private
174
+
175
+ def token(name, pattern=nil, &block)
176
+ @lexemes << Lexeme.new(name, pattern, &block)
177
+ end
178
+
179
+ def macro(name, regex=nil)
180
+ regex ? @macros[name] = regex : @macros[name].source
181
+ end
182
+
183
+ alias :m :macro
184
+ end
185
+ end