antisamy 0.2.1 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG.rdoc +13 -0
- data/LICENSE.txt +20 -20
- data/README.rdoc +41 -41
- data/lib/antisamy.rb +46 -46
- data/lib/antisamy/css/css_filter.rb +187 -187
- data/lib/antisamy/css/css_scanner.rb +84 -84
- data/lib/antisamy/css/css_validator.rb +128 -128
- data/lib/antisamy/csspool/rsac.rb +1 -1
- data/lib/antisamy/csspool/rsac/sac.rb +14 -14
- data/lib/antisamy/csspool/rsac/sac/conditions.rb +5 -5
- data/lib/antisamy/csspool/rsac/sac/conditions/attribute_condition.rb +50 -50
- data/lib/antisamy/csspool/rsac/sac/conditions/begin_hyphen_condition.rb +18 -18
- data/lib/antisamy/csspool/rsac/sac/conditions/class_condition.rb +18 -18
- data/lib/antisamy/csspool/rsac/sac/conditions/combinator_condition.rb +36 -36
- data/lib/antisamy/csspool/rsac/sac/conditions/condition.rb +29 -29
- data/lib/antisamy/csspool/rsac/sac/conditions/id_condition.rb +23 -23
- data/lib/antisamy/csspool/rsac/sac/conditions/one_of_condition.rb +18 -18
- data/lib/antisamy/csspool/rsac/sac/conditions/pseudo_class_condition.rb +20 -20
- data/lib/antisamy/csspool/rsac/sac/document_handler.rb +66 -66
- data/lib/antisamy/csspool/rsac/sac/error_handler.rb +13 -13
- data/lib/antisamy/csspool/rsac/sac/generated_parser.rb +1012 -1012
- data/lib/antisamy/csspool/rsac/sac/generated_property_parser.rb +9284 -9284
- data/lib/antisamy/csspool/rsac/sac/lexeme.rb +27 -27
- data/lib/antisamy/csspool/rsac/sac/lexical_unit.rb +201 -201
- data/lib/antisamy/csspool/rsac/sac/parse_exception.rb +4 -4
- data/lib/antisamy/csspool/rsac/sac/parser.rb +109 -109
- data/lib/antisamy/csspool/rsac/sac/property_parser.rb +44 -44
- data/lib/antisamy/csspool/rsac/sac/selectors.rb +5 -5
- data/lib/antisamy/csspool/rsac/sac/selectors/child_selector.rb +36 -36
- data/lib/antisamy/csspool/rsac/sac/selectors/conditional_selector.rb +45 -45
- data/lib/antisamy/csspool/rsac/sac/selectors/descendant_selector.rb +36 -36
- data/lib/antisamy/csspool/rsac/sac/selectors/element_selector.rb +35 -35
- data/lib/antisamy/csspool/rsac/sac/selectors/selector.rb +25 -25
- data/lib/antisamy/csspool/rsac/sac/selectors/sibling_selector.rb +35 -35
- data/lib/antisamy/csspool/rsac/sac/selectors/simple_selector.rb +21 -21
- data/lib/antisamy/csspool/rsac/sac/token.rb +25 -25
- data/lib/antisamy/csspool/rsac/sac/tokenizer.rb +185 -185
- data/lib/antisamy/csspool/rsac/stylesheet.rb +3 -3
- data/lib/antisamy/csspool/rsac/stylesheet/rule.rb +20 -20
- data/lib/antisamy/csspool/rsac/stylesheet/stylesheet.rb +76 -76
- data/lib/antisamy/html/handler.rb +112 -99
- data/lib/antisamy/html/sax_filter.rb +305 -302
- data/lib/antisamy/html/scanner.rb +47 -43
- data/lib/antisamy/model/attribute.rb +19 -19
- data/lib/antisamy/model/css_property.rb +39 -39
- data/lib/antisamy/model/tag.rb +31 -31
- data/lib/antisamy/policy.rb +577 -545
- data/lib/antisamy/scan_results.rb +89 -89
- data/spec/antisamy_spec.rb +208 -142
- data/spec/spec_helper.rb +12 -12
- metadata +79 -81
@@ -1,36 +1,36 @@
|
|
1
|
-
module RSAC
|
2
|
-
module Selectors
|
3
|
-
class DescendantSelector < SimpleSelector
|
4
|
-
attr_accessor :ancestor_selector, :simple_selector
|
5
|
-
alias :ancestor :ancestor_selector
|
6
|
-
alias :selector :simple_selector
|
7
|
-
|
8
|
-
def initialize(ancestor, selector)
|
9
|
-
super(:SAC_DESCENDANT_SELECTOR)
|
10
|
-
|
11
|
-
@ancestor_selector = ancestor
|
12
|
-
@simple_selector = selector
|
13
|
-
end
|
14
|
-
|
15
|
-
def to_css
|
16
|
-
"#{ancestor.to_css} #{selector.to_css}"
|
17
|
-
end
|
18
|
-
|
19
|
-
def to_xpath(prefix=true)
|
20
|
-
"#{ancestor.to_xpath(prefix)}//#{selector.to_xpath(false)}"
|
21
|
-
end
|
22
|
-
|
23
|
-
def specificity
|
24
|
-
ancestor.specificity.zip(selector.specificity).map { |x,y| x + y }
|
25
|
-
end
|
26
|
-
|
27
|
-
def ==(other)
|
28
|
-
super && selector == other.selector && ancestor == other.ancestor
|
29
|
-
end
|
30
|
-
|
31
|
-
def hash
|
32
|
-
[selector, ancestor].hash
|
33
|
-
end
|
34
|
-
end
|
35
|
-
end
|
36
|
-
end
|
1
|
+
module RSAC
|
2
|
+
module Selectors
|
3
|
+
class DescendantSelector < SimpleSelector
|
4
|
+
attr_accessor :ancestor_selector, :simple_selector
|
5
|
+
alias :ancestor :ancestor_selector
|
6
|
+
alias :selector :simple_selector
|
7
|
+
|
8
|
+
def initialize(ancestor, selector)
|
9
|
+
super(:SAC_DESCENDANT_SELECTOR)
|
10
|
+
|
11
|
+
@ancestor_selector = ancestor
|
12
|
+
@simple_selector = selector
|
13
|
+
end
|
14
|
+
|
15
|
+
def to_css
|
16
|
+
"#{ancestor.to_css} #{selector.to_css}"
|
17
|
+
end
|
18
|
+
|
19
|
+
def to_xpath(prefix=true)
|
20
|
+
"#{ancestor.to_xpath(prefix)}//#{selector.to_xpath(false)}"
|
21
|
+
end
|
22
|
+
|
23
|
+
def specificity
|
24
|
+
ancestor.specificity.zip(selector.specificity).map { |x,y| x + y }
|
25
|
+
end
|
26
|
+
|
27
|
+
def ==(other)
|
28
|
+
super && selector == other.selector && ancestor == other.ancestor
|
29
|
+
end
|
30
|
+
|
31
|
+
def hash
|
32
|
+
[selector, ancestor].hash
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -1,35 +1,35 @@
|
|
1
|
-
module RSAC
|
2
|
-
module Selectors
|
3
|
-
class ElementSelector < SimpleSelector
|
4
|
-
attr_reader :local_name
|
5
|
-
alias :name :local_name
|
6
|
-
|
7
|
-
def initialize(name)
|
8
|
-
super(:SAC_ELEMENT_NODE_SELECTOR)
|
9
|
-
@local_name = name
|
10
|
-
end
|
11
|
-
|
12
|
-
def to_css
|
13
|
-
local_name
|
14
|
-
end
|
15
|
-
|
16
|
-
def to_xpath(prefix=true)
|
17
|
-
atoms = [local_name]
|
18
|
-
atoms.unshift("//") if prefix
|
19
|
-
atoms.join
|
20
|
-
end
|
21
|
-
|
22
|
-
def specificity
|
23
|
-
[0, 0, 0, 1]
|
24
|
-
end
|
25
|
-
|
26
|
-
def ==(other)
|
27
|
-
super && name == other.name
|
28
|
-
end
|
29
|
-
|
30
|
-
def hash
|
31
|
-
name.hash
|
32
|
-
end
|
33
|
-
end
|
34
|
-
end
|
35
|
-
end
|
1
|
+
module RSAC
|
2
|
+
module Selectors
|
3
|
+
class ElementSelector < SimpleSelector
|
4
|
+
attr_reader :local_name
|
5
|
+
alias :name :local_name
|
6
|
+
|
7
|
+
def initialize(name)
|
8
|
+
super(:SAC_ELEMENT_NODE_SELECTOR)
|
9
|
+
@local_name = name
|
10
|
+
end
|
11
|
+
|
12
|
+
def to_css
|
13
|
+
local_name
|
14
|
+
end
|
15
|
+
|
16
|
+
def to_xpath(prefix=true)
|
17
|
+
atoms = [local_name]
|
18
|
+
atoms.unshift("//") if prefix
|
19
|
+
atoms.join
|
20
|
+
end
|
21
|
+
|
22
|
+
def specificity
|
23
|
+
[0, 0, 0, 1]
|
24
|
+
end
|
25
|
+
|
26
|
+
def ==(other)
|
27
|
+
super && name == other.name
|
28
|
+
end
|
29
|
+
|
30
|
+
def hash
|
31
|
+
name.hash
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -1,25 +1,25 @@
|
|
1
|
-
module RSAC
|
2
|
-
module Selectors
|
3
|
-
class Selector
|
4
|
-
|
5
|
-
attr_reader :selector_type
|
6
|
-
|
7
|
-
def initialize(selector_type)
|
8
|
-
@selector_type = selector_type
|
9
|
-
end
|
10
|
-
|
11
|
-
def ==(other)
|
12
|
-
self.class === other && selector_type == other.selector_type
|
13
|
-
end
|
14
|
-
|
15
|
-
def hash
|
16
|
-
selector_type.hash
|
17
|
-
end
|
18
|
-
|
19
|
-
def eql?(other)
|
20
|
-
self == other
|
21
|
-
end
|
22
|
-
|
23
|
-
end
|
24
|
-
end
|
25
|
-
end
|
1
|
+
module RSAC
|
2
|
+
module Selectors
|
3
|
+
class Selector
|
4
|
+
|
5
|
+
attr_reader :selector_type
|
6
|
+
|
7
|
+
def initialize(selector_type)
|
8
|
+
@selector_type = selector_type
|
9
|
+
end
|
10
|
+
|
11
|
+
def ==(other)
|
12
|
+
self.class === other && selector_type == other.selector_type
|
13
|
+
end
|
14
|
+
|
15
|
+
def hash
|
16
|
+
selector_type.hash
|
17
|
+
end
|
18
|
+
|
19
|
+
def eql?(other)
|
20
|
+
self == other
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -1,35 +1,35 @@
|
|
1
|
-
module RSAC
|
2
|
-
module Selectors
|
3
|
-
class SiblingSelector < SimpleSelector
|
4
|
-
attr_accessor :selector, :sibling_selector
|
5
|
-
alias :sibling :sibling_selector
|
6
|
-
|
7
|
-
def initialize(selector, sibling)
|
8
|
-
super(:SAC_DIRECT_ADJACENT_SELECTOR)
|
9
|
-
|
10
|
-
@selector = selector
|
11
|
-
@sibling_selector = sibling
|
12
|
-
end
|
13
|
-
|
14
|
-
def to_css
|
15
|
-
"#{selector.to_css} + #{sibling.to_css}"
|
16
|
-
end
|
17
|
-
|
18
|
-
def to_xpath(prefix=true)
|
19
|
-
"#{selector.to_xpath(prefix)}/following-sibling::#{sibling.to_xpath(false)}"
|
20
|
-
end
|
21
|
-
|
22
|
-
def specificity
|
23
|
-
selector.specificity.zip(sibling.specificity).map { |x,y| x + y }
|
24
|
-
end
|
25
|
-
|
26
|
-
def ==(other)
|
27
|
-
super && selector == other.selector && sibling == other.sibling
|
28
|
-
end
|
29
|
-
|
30
|
-
def hash
|
31
|
-
[selector, sibling].hash
|
32
|
-
end
|
33
|
-
end
|
34
|
-
end
|
35
|
-
end
|
1
|
+
module RSAC
|
2
|
+
module Selectors
|
3
|
+
class SiblingSelector < SimpleSelector
|
4
|
+
attr_accessor :selector, :sibling_selector
|
5
|
+
alias :sibling :sibling_selector
|
6
|
+
|
7
|
+
def initialize(selector, sibling)
|
8
|
+
super(:SAC_DIRECT_ADJACENT_SELECTOR)
|
9
|
+
|
10
|
+
@selector = selector
|
11
|
+
@sibling_selector = sibling
|
12
|
+
end
|
13
|
+
|
14
|
+
def to_css
|
15
|
+
"#{selector.to_css} + #{sibling.to_css}"
|
16
|
+
end
|
17
|
+
|
18
|
+
def to_xpath(prefix=true)
|
19
|
+
"#{selector.to_xpath(prefix)}/following-sibling::#{sibling.to_xpath(false)}"
|
20
|
+
end
|
21
|
+
|
22
|
+
def specificity
|
23
|
+
selector.specificity.zip(sibling.specificity).map { |x,y| x + y }
|
24
|
+
end
|
25
|
+
|
26
|
+
def ==(other)
|
27
|
+
super && selector == other.selector && sibling == other.sibling
|
28
|
+
end
|
29
|
+
|
30
|
+
def hash
|
31
|
+
[selector, sibling].hash
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -1,21 +1,21 @@
|
|
1
|
-
module RSAC
|
2
|
-
module Selectors
|
3
|
-
class SimpleSelector < Selector
|
4
|
-
def initialize(selector_type=:SAC_ANY_NODE_SELECTOR)
|
5
|
-
super(selector_type)
|
6
|
-
end
|
7
|
-
|
8
|
-
def to_css
|
9
|
-
'*'
|
10
|
-
end
|
11
|
-
|
12
|
-
def to_xpath
|
13
|
-
"//*"
|
14
|
-
end
|
15
|
-
|
16
|
-
def specificity
|
17
|
-
[0, 0, 0, 0]
|
18
|
-
end
|
19
|
-
end
|
20
|
-
end
|
21
|
-
end
|
1
|
+
module RSAC
|
2
|
+
module Selectors
|
3
|
+
class SimpleSelector < Selector
|
4
|
+
def initialize(selector_type=:SAC_ANY_NODE_SELECTOR)
|
5
|
+
super(selector_type)
|
6
|
+
end
|
7
|
+
|
8
|
+
def to_css
|
9
|
+
'*'
|
10
|
+
end
|
11
|
+
|
12
|
+
def to_xpath
|
13
|
+
"//*"
|
14
|
+
end
|
15
|
+
|
16
|
+
def specificity
|
17
|
+
[0, 0, 0, 0]
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -1,25 +1,25 @@
|
|
1
|
-
module RSAC
|
2
|
-
class Token
|
3
|
-
attr_reader :name, :value, :position
|
4
|
-
|
5
|
-
def initialize(name, value, position)
|
6
|
-
@name = name
|
7
|
-
@value = value
|
8
|
-
@position = position
|
9
|
-
end
|
10
|
-
|
11
|
-
def to_racc_token
|
12
|
-
[name, value]
|
13
|
-
end
|
14
|
-
end
|
15
|
-
|
16
|
-
class DelimiterToken < Token
|
17
|
-
def initialize(value, position)
|
18
|
-
super(:delim, value, position)
|
19
|
-
end
|
20
|
-
|
21
|
-
def to_racc_token
|
22
|
-
[value, value]
|
23
|
-
end
|
24
|
-
end
|
25
|
-
end
|
1
|
+
module RSAC
|
2
|
+
class Token
|
3
|
+
attr_reader :name, :value, :position
|
4
|
+
|
5
|
+
def initialize(name, value, position)
|
6
|
+
@name = name
|
7
|
+
@value = value
|
8
|
+
@position = position
|
9
|
+
end
|
10
|
+
|
11
|
+
def to_racc_token
|
12
|
+
[name, value]
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
class DelimiterToken < Token
|
17
|
+
def initialize(value, position)
|
18
|
+
super(:delim, value, position)
|
19
|
+
end
|
20
|
+
|
21
|
+
def to_racc_token
|
22
|
+
[value, value]
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -1,185 +1,185 @@
|
|
1
|
-
require "antisamy/csspool/rsac/sac/lexeme"
|
2
|
-
require "antisamy/csspool/rsac/sac/token"
|
3
|
-
|
4
|
-
module RSAC
|
5
|
-
class Tokenizer
|
6
|
-
def initialize(&block)
|
7
|
-
@lexemes = []
|
8
|
-
@macros = {}
|
9
|
-
|
10
|
-
# http://www.w3.org/TR/CSS21/syndata.html
|
11
|
-
macro(:h, /([0-9a-f])/ )
|
12
|
-
macro(:nonascii, /([\200-\377])/ )
|
13
|
-
macro(:nl, /(\n|\r\n|\r|\f)/ )
|
14
|
-
macro(:unicode, /(\\#{m(:h)}{1,6}(\r\n|[ \t\r\n\f])?)/ )
|
15
|
-
macro(:escape, /(#{m(:unicode)}|\\[^\r\n\f0-9a-f])/ )
|
16
|
-
macro(:nmstart, /([_a-z]|#{m(:nonascii)}|#{m(:escape)})/ )
|
17
|
-
macro(:nmchar, /([_a-z0-9-]|#{m(:nonascii)}|#{m(:escape)})/ )
|
18
|
-
macro(:string1, /(\"([^\n\r\f\\\"]|\\#{m(:nl)}|#{m(:escape)})*\")/ )
|
19
|
-
macro(:string2, /(\'([^\n\r\f\\']|\\#{m(:nl)}|#{m(:escape)})*\')/ )
|
20
|
-
macro(:invalid1, /(\"([^\n\r\f\\\"]|\\#{m(:nl)}|#{m(:escape)})*)/ )
|
21
|
-
macro(:invalid2, /(\'([^\n\r\f\\']|\\#{m(:nl)}|#{m(:escape)})*)/ )
|
22
|
-
macro(:comment, /(\/\*[^*]*\*+([^\/*][^*]*\*+)*\/)/ )
|
23
|
-
macro(:ident, /(-?#{m(:nmstart)}#{m(:nmchar)}*)/ )
|
24
|
-
macro(:name, /(#{m(:nmchar)}+)/ )
|
25
|
-
macro(:num, /([0-9]+|[0-9]*\.[0-9]+)/ )
|
26
|
-
macro(:string, /(#{m(:string1)}|#{m(:string2)})/ )
|
27
|
-
macro(:invalid, /(#{m(:invalid1)}|#{m(:invalid2)})/ )
|
28
|
-
macro(:url, /(([!#\$%&*-~]|#{m(:nonascii)}|#{m(:escape)})*)/ )
|
29
|
-
macro(:s, /([ \t\r\n\f]+)/ )
|
30
|
-
macro(:w, /(#{m(:s)}?)/ )
|
31
|
-
macro(:A, /(a|\\0{0,4}(41|61)(\r\n|[ \t\r\n\f])?)/ )
|
32
|
-
macro(:C, /(c|\\0{0,4}(43|63)(\r\n|[ \t\r\n\f])?)/ )
|
33
|
-
macro(:D, /(d|\\0{0,4}(44|64)(\r\n|[ \t\r\n\f])?)/ )
|
34
|
-
macro(:E, /(e|\\0{0,4}(45|65)(\r\n|[ \t\r\n\f])?)/ )
|
35
|
-
macro(:G, /(g|\\0{0,4}(47|67)(\r\n|[ \t\r\n\f])?|\\g)/ )
|
36
|
-
macro(:H, /(h|\\0{0,4}(48|68)(\r\n|[ \t\r\n\f])?|\\h)/ )
|
37
|
-
macro(:I, /(i|\\0{0,4}(49|69)(\r\n|[ \t\r\n\f])?|\\i)/ )
|
38
|
-
macro(:K, /(k|\\0{0,4}(4b|6b)(\r\n|[ \t\r\n\f])?|\\k)/ )
|
39
|
-
macro(:M, /(m|\\0{0,4}(4d|6d)(\r\n|[ \t\r\n\f])?|\\m)/ )
|
40
|
-
macro(:N, /(n|\\0{0,4}(4e|6e)(\r\n|[ \t\r\n\f])?|\\n)/ )
|
41
|
-
macro(:O, /(o|\\0{0,4}(51|71)(\r\n|[ \t\r\n\f])?|\\o)/ )
|
42
|
-
macro(:P, /(p|\\0{0,4}(50|70)(\r\n|[ \t\r\n\f])?|\\p)/ )
|
43
|
-
macro(:R, /(r|\\0{0,4}(52|72)(\r\n|[ \t\r\n\f])?|\\r)/ )
|
44
|
-
macro(:S, /(s|\\0{0,4}(53|73)(\r\n|[ \t\r\n\f])?|\\s)/ )
|
45
|
-
macro(:T, /(t|\\0{0,4}(54|74)(\r\n|[ \t\r\n\f])?|\\t)/ )
|
46
|
-
macro(:X, /(x|\\0{0,4}(58|78)(\r\n|[ \t\r\n\f])?|\\x)/ )
|
47
|
-
macro(:Z, /(z|\\0{0,4}(5a|7a)(\r\n|[ \t\r\n\f])?|\\z)/ )
|
48
|
-
|
49
|
-
#token :COMMENT do |patterns|
|
50
|
-
# patterns << /\/\*[^*]*\*+([^\/*][^*]*\*+)*\//
|
51
|
-
# patterns << /#{m(:s)}+\/\*[^*]*\*+([^\/*][^*]*\*+)*\//
|
52
|
-
#end
|
53
|
-
|
54
|
-
token(:LBRACE, /#{m(:w)}\{/)
|
55
|
-
token(:PLUS, /#{m(:w)}\+/)
|
56
|
-
token(:GREATER, /#{m(:w)}>/)
|
57
|
-
token(:COMMA, /#{m(:w)},/)
|
58
|
-
|
59
|
-
token(:S, /#{m(:s)}/)
|
60
|
-
|
61
|
-
#token :URI do |patterns|
|
62
|
-
# patterns << /url\(#{m(:w)}#{m(:string)}#{m(:w)}\)/
|
63
|
-
# patterns << /url\(#{m(:w)}#{m(:url)}#{m(:w)}\)/
|
64
|
-
#end
|
65
|
-
|
66
|
-
token(:FUNCTION, /#{m(:ident)}\(/)
|
67
|
-
token(:IDENT, /#{m(:ident)}/)
|
68
|
-
|
69
|
-
token(:CDO, /<!--/)
|
70
|
-
token(:CDC, /-->/)
|
71
|
-
token(:INCLUDES, /~=/)
|
72
|
-
token(:DASHMATCH, /\|=/)
|
73
|
-
#token(:STRING, /#{m(:string)}/)
|
74
|
-
token(:INVALID, /#{m(:invalid)}/)
|
75
|
-
token(:HASH, /##{m(:name)}/)
|
76
|
-
token(:IMPORT_SYM, /@#{m(:I)}#{m(:M)}#{m(:P)}#{m(:O)}#{m(:R)}#{m(:T)}/)
|
77
|
-
token(:PAGE_SYM, /@#{m(:P)}#{m(:A)}#{m(:G)}#{m(:E)}/)
|
78
|
-
token(:MEDIA_SYM, /@#{m(:M)}#{m(:E)}#{m(:D)}#{m(:I)}#{m(:A)}/)
|
79
|
-
token(:CHARSET_SYM, /@#{m(:C)}#{m(:H)}#{m(:A)}#{m(:R)}#{m(:S)}#{m(:E)}#{m(:T)}/)
|
80
|
-
token(:IMPORTANT_SYM, /!(#{m(:w)}|#{m(:comment)})*#{m(:I)}#{m(:M)}#{m(:P)}#{m(:O)}#{m(:R)}#{m(:T)}#{m(:A)}#{m(:N)}#{m(:T)}/)
|
81
|
-
token(:EMS, /#{m(:num)}#{m(:E)}#{m(:M)}/)
|
82
|
-
token(:EXS, /#{m(:num)}#{m(:E)}#{m(:X)}/)
|
83
|
-
|
84
|
-
token :LENGTH do |patterns|
|
85
|
-
patterns << /#{m(:num)}#{m(:P)}#{m(:X)}/
|
86
|
-
patterns << /#{m(:num)}#{m(:C)}#{m(:M)}/
|
87
|
-
patterns << /#{m(:num)}#{m(:M)}#{m(:M)}/
|
88
|
-
patterns << /#{m(:num)}#{m(:I)}#{m(:N)}/
|
89
|
-
patterns << /#{m(:num)}#{m(:P)}#{m(:T)}/
|
90
|
-
patterns << /#{m(:num)}#{m(:P)}#{m(:C)}/
|
91
|
-
end
|
92
|
-
|
93
|
-
token :ANGLE do |patterns|
|
94
|
-
patterns << /#{m(:num)}#{m(:D)}#{m(:E)}#{m(:G)}/
|
95
|
-
patterns << /#{m(:num)}#{m(:R)}#{m(:A)}#{m(:D)}/
|
96
|
-
patterns << /#{m(:num)}#{m(:G)}#{m(:R)}#{m(:A)}#{m(:D)}/
|
97
|
-
end
|
98
|
-
|
99
|
-
token :TIME do |patterns|
|
100
|
-
patterns << /#{m(:num)}#{m(:M)}#{m(:S)}/
|
101
|
-
patterns << /#{m(:num)}#{m(:S)}/
|
102
|
-
end
|
103
|
-
|
104
|
-
token :FREQ do |patterns|
|
105
|
-
patterns << /#{m(:num)}#{m(:H)}#{m(:Z)}/
|
106
|
-
patterns << /#{m(:num)}#{m(:K)}#{m(:H)}#{m(:Z)}/
|
107
|
-
end
|
108
|
-
|
109
|
-
token(:DIMENSION, /#{m(:num)}#{m(:ident)}/)
|
110
|
-
token(:PERCENTAGE, /#{m(:num)}%/)
|
111
|
-
token(:NUMBER, /#{m(:num)}/)
|
112
|
-
|
113
|
-
|
114
|
-
yield self if block_given?
|
115
|
-
end
|
116
|
-
|
117
|
-
def tokenize(input_data)
|
118
|
-
tokens = []
|
119
|
-
pos = 0
|
120
|
-
|
121
|
-
comment_pattern = /\/\*.*?\*\//m
|
122
|
-
comments = input_data.scan(comment_pattern)
|
123
|
-
non_comments = input_data.split(comment_pattern)
|
124
|
-
|
125
|
-
# Handle a small edge case, if our CSS is *only* comments,
|
126
|
-
# the split, zip, scan trick won't work
|
127
|
-
if non_comments.length == 0
|
128
|
-
tokens = comments.map { |x| Token.new(:COMMENT, x, nil) }
|
129
|
-
else
|
130
|
-
non_comments.zip(comments).each do |non_comment, comment|
|
131
|
-
non_comment.split(/url\([^\)]*\)/m).zip(
|
132
|
-
non_comment.scan(/url\([^\)]*\)/m)
|
133
|
-
).each do |non_url, url|
|
134
|
-
non_url.split(/"[^"]*"|'[^']*'/m).zip(
|
135
|
-
non_url.scan(/"[^"]*"|'[^']*'/m)
|
136
|
-
).each do |non_string, quoted_string|
|
137
|
-
if non_string.length > 0 && non_string =~ /\A\s*\Z/m
|
138
|
-
tokens << Token.new(:S, non_string, nil)
|
139
|
-
else
|
140
|
-
non_string.split(/[ \t\r\n\f]*(?![{}+>]*)/m).zip(
|
141
|
-
non_string.scan(/[ \t\r\n\f]*(?![{}+>]*)/m)
|
142
|
-
).each do |string, whitespace|
|
143
|
-
until string.empty?
|
144
|
-
token = nil
|
145
|
-
@lexemes.each do |lexeme|
|
146
|
-
match = lexeme.pattern.match(string)
|
147
|
-
if match
|
148
|
-
token = Token.new(lexeme.name, match.to_s, pos)
|
149
|
-
break
|
150
|
-
end
|
151
|
-
end
|
152
|
-
|
153
|
-
token ||= DelimiterToken.new(/^./.match(string).to_s, pos)
|
154
|
-
|
155
|
-
tokens << token
|
156
|
-
string = string.slice(Range.new(token.value.length, -1))
|
157
|
-
pos += token.value.length
|
158
|
-
end
|
159
|
-
tokens << Token.new(:S, whitespace, nil) if whitespace
|
160
|
-
end
|
161
|
-
end
|
162
|
-
tokens << Token.new(:STRING, quoted_string, nil) if quoted_string
|
163
|
-
end
|
164
|
-
tokens << Token.new(:URI, url, nil) if url
|
165
|
-
end
|
166
|
-
tokens << Token.new(:COMMENT, comment, nil) if comment
|
167
|
-
end
|
168
|
-
end
|
169
|
-
|
170
|
-
tokens
|
171
|
-
end
|
172
|
-
|
173
|
-
private
|
174
|
-
|
175
|
-
def token(name, pattern=nil, &block)
|
176
|
-
@lexemes << Lexeme.new(name, pattern, &block)
|
177
|
-
end
|
178
|
-
|
179
|
-
def macro(name, regex=nil)
|
180
|
-
regex ? @macros[name] = regex : @macros[name].source
|
181
|
-
end
|
182
|
-
|
183
|
-
alias :m :macro
|
184
|
-
end
|
185
|
-
end
|
1
|
+
require "antisamy/csspool/rsac/sac/lexeme"
|
2
|
+
require "antisamy/csspool/rsac/sac/token"
|
3
|
+
|
4
|
+
module RSAC
|
5
|
+
class Tokenizer
|
6
|
+
def initialize(&block)
|
7
|
+
@lexemes = []
|
8
|
+
@macros = {}
|
9
|
+
|
10
|
+
# http://www.w3.org/TR/CSS21/syndata.html
|
11
|
+
macro(:h, /([0-9a-f])/ )
|
12
|
+
macro(:nonascii, /([\200-\377])/ )
|
13
|
+
macro(:nl, /(\n|\r\n|\r|\f)/ )
|
14
|
+
macro(:unicode, /(\\#{m(:h)}{1,6}(\r\n|[ \t\r\n\f])?)/ )
|
15
|
+
macro(:escape, /(#{m(:unicode)}|\\[^\r\n\f0-9a-f])/ )
|
16
|
+
macro(:nmstart, /([_a-z]|#{m(:nonascii)}|#{m(:escape)})/ )
|
17
|
+
macro(:nmchar, /([_a-z0-9-]|#{m(:nonascii)}|#{m(:escape)})/ )
|
18
|
+
macro(:string1, /(\"([^\n\r\f\\\"]|\\#{m(:nl)}|#{m(:escape)})*\")/ )
|
19
|
+
macro(:string2, /(\'([^\n\r\f\\']|\\#{m(:nl)}|#{m(:escape)})*\')/ )
|
20
|
+
macro(:invalid1, /(\"([^\n\r\f\\\"]|\\#{m(:nl)}|#{m(:escape)})*)/ )
|
21
|
+
macro(:invalid2, /(\'([^\n\r\f\\']|\\#{m(:nl)}|#{m(:escape)})*)/ )
|
22
|
+
macro(:comment, /(\/\*[^*]*\*+([^\/*][^*]*\*+)*\/)/ )
|
23
|
+
macro(:ident, /(-?#{m(:nmstart)}#{m(:nmchar)}*)/ )
|
24
|
+
macro(:name, /(#{m(:nmchar)}+)/ )
|
25
|
+
macro(:num, /([0-9]+|[0-9]*\.[0-9]+)/ )
|
26
|
+
macro(:string, /(#{m(:string1)}|#{m(:string2)})/ )
|
27
|
+
macro(:invalid, /(#{m(:invalid1)}|#{m(:invalid2)})/ )
|
28
|
+
macro(:url, /(([!#\$%&*-~]|#{m(:nonascii)}|#{m(:escape)})*)/ )
|
29
|
+
macro(:s, /([ \t\r\n\f]+)/ )
|
30
|
+
macro(:w, /(#{m(:s)}?)/ )
|
31
|
+
macro(:A, /(a|\\0{0,4}(41|61)(\r\n|[ \t\r\n\f])?)/ )
|
32
|
+
macro(:C, /(c|\\0{0,4}(43|63)(\r\n|[ \t\r\n\f])?)/ )
|
33
|
+
macro(:D, /(d|\\0{0,4}(44|64)(\r\n|[ \t\r\n\f])?)/ )
|
34
|
+
macro(:E, /(e|\\0{0,4}(45|65)(\r\n|[ \t\r\n\f])?)/ )
|
35
|
+
macro(:G, /(g|\\0{0,4}(47|67)(\r\n|[ \t\r\n\f])?|\\g)/ )
|
36
|
+
macro(:H, /(h|\\0{0,4}(48|68)(\r\n|[ \t\r\n\f])?|\\h)/ )
|
37
|
+
macro(:I, /(i|\\0{0,4}(49|69)(\r\n|[ \t\r\n\f])?|\\i)/ )
|
38
|
+
macro(:K, /(k|\\0{0,4}(4b|6b)(\r\n|[ \t\r\n\f])?|\\k)/ )
|
39
|
+
macro(:M, /(m|\\0{0,4}(4d|6d)(\r\n|[ \t\r\n\f])?|\\m)/ )
|
40
|
+
macro(:N, /(n|\\0{0,4}(4e|6e)(\r\n|[ \t\r\n\f])?|\\n)/ )
|
41
|
+
macro(:O, /(o|\\0{0,4}(51|71)(\r\n|[ \t\r\n\f])?|\\o)/ )
|
42
|
+
macro(:P, /(p|\\0{0,4}(50|70)(\r\n|[ \t\r\n\f])?|\\p)/ )
|
43
|
+
macro(:R, /(r|\\0{0,4}(52|72)(\r\n|[ \t\r\n\f])?|\\r)/ )
|
44
|
+
macro(:S, /(s|\\0{0,4}(53|73)(\r\n|[ \t\r\n\f])?|\\s)/ )
|
45
|
+
macro(:T, /(t|\\0{0,4}(54|74)(\r\n|[ \t\r\n\f])?|\\t)/ )
|
46
|
+
macro(:X, /(x|\\0{0,4}(58|78)(\r\n|[ \t\r\n\f])?|\\x)/ )
|
47
|
+
macro(:Z, /(z|\\0{0,4}(5a|7a)(\r\n|[ \t\r\n\f])?|\\z)/ )
|
48
|
+
|
49
|
+
#token :COMMENT do |patterns|
|
50
|
+
# patterns << /\/\*[^*]*\*+([^\/*][^*]*\*+)*\//
|
51
|
+
# patterns << /#{m(:s)}+\/\*[^*]*\*+([^\/*][^*]*\*+)*\//
|
52
|
+
#end
|
53
|
+
|
54
|
+
token(:LBRACE, /#{m(:w)}\{/)
|
55
|
+
token(:PLUS, /#{m(:w)}\+/)
|
56
|
+
token(:GREATER, /#{m(:w)}>/)
|
57
|
+
token(:COMMA, /#{m(:w)},/)
|
58
|
+
|
59
|
+
token(:S, /#{m(:s)}/)
|
60
|
+
|
61
|
+
#token :URI do |patterns|
|
62
|
+
# patterns << /url\(#{m(:w)}#{m(:string)}#{m(:w)}\)/
|
63
|
+
# patterns << /url\(#{m(:w)}#{m(:url)}#{m(:w)}\)/
|
64
|
+
#end
|
65
|
+
|
66
|
+
token(:FUNCTION, /#{m(:ident)}\(/)
|
67
|
+
token(:IDENT, /#{m(:ident)}/)
|
68
|
+
|
69
|
+
token(:CDO, /<!--/)
|
70
|
+
token(:CDC, /-->/)
|
71
|
+
token(:INCLUDES, /~=/)
|
72
|
+
token(:DASHMATCH, /\|=/)
|
73
|
+
#token(:STRING, /#{m(:string)}/)
|
74
|
+
token(:INVALID, /#{m(:invalid)}/)
|
75
|
+
token(:HASH, /##{m(:name)}/)
|
76
|
+
token(:IMPORT_SYM, /@#{m(:I)}#{m(:M)}#{m(:P)}#{m(:O)}#{m(:R)}#{m(:T)}/)
|
77
|
+
token(:PAGE_SYM, /@#{m(:P)}#{m(:A)}#{m(:G)}#{m(:E)}/)
|
78
|
+
token(:MEDIA_SYM, /@#{m(:M)}#{m(:E)}#{m(:D)}#{m(:I)}#{m(:A)}/)
|
79
|
+
token(:CHARSET_SYM, /@#{m(:C)}#{m(:H)}#{m(:A)}#{m(:R)}#{m(:S)}#{m(:E)}#{m(:T)}/)
|
80
|
+
token(:IMPORTANT_SYM, /!(#{m(:w)}|#{m(:comment)})*#{m(:I)}#{m(:M)}#{m(:P)}#{m(:O)}#{m(:R)}#{m(:T)}#{m(:A)}#{m(:N)}#{m(:T)}/)
|
81
|
+
token(:EMS, /#{m(:num)}#{m(:E)}#{m(:M)}/)
|
82
|
+
token(:EXS, /#{m(:num)}#{m(:E)}#{m(:X)}/)
|
83
|
+
|
84
|
+
token :LENGTH do |patterns|
|
85
|
+
patterns << /#{m(:num)}#{m(:P)}#{m(:X)}/
|
86
|
+
patterns << /#{m(:num)}#{m(:C)}#{m(:M)}/
|
87
|
+
patterns << /#{m(:num)}#{m(:M)}#{m(:M)}/
|
88
|
+
patterns << /#{m(:num)}#{m(:I)}#{m(:N)}/
|
89
|
+
patterns << /#{m(:num)}#{m(:P)}#{m(:T)}/
|
90
|
+
patterns << /#{m(:num)}#{m(:P)}#{m(:C)}/
|
91
|
+
end
|
92
|
+
|
93
|
+
token :ANGLE do |patterns|
|
94
|
+
patterns << /#{m(:num)}#{m(:D)}#{m(:E)}#{m(:G)}/
|
95
|
+
patterns << /#{m(:num)}#{m(:R)}#{m(:A)}#{m(:D)}/
|
96
|
+
patterns << /#{m(:num)}#{m(:G)}#{m(:R)}#{m(:A)}#{m(:D)}/
|
97
|
+
end
|
98
|
+
|
99
|
+
token :TIME do |patterns|
|
100
|
+
patterns << /#{m(:num)}#{m(:M)}#{m(:S)}/
|
101
|
+
patterns << /#{m(:num)}#{m(:S)}/
|
102
|
+
end
|
103
|
+
|
104
|
+
token :FREQ do |patterns|
|
105
|
+
patterns << /#{m(:num)}#{m(:H)}#{m(:Z)}/
|
106
|
+
patterns << /#{m(:num)}#{m(:K)}#{m(:H)}#{m(:Z)}/
|
107
|
+
end
|
108
|
+
|
109
|
+
token(:DIMENSION, /#{m(:num)}#{m(:ident)}/)
|
110
|
+
token(:PERCENTAGE, /#{m(:num)}%/)
|
111
|
+
token(:NUMBER, /#{m(:num)}/)
|
112
|
+
|
113
|
+
|
114
|
+
yield self if block_given?
|
115
|
+
end
|
116
|
+
|
117
|
+
def tokenize(input_data)
|
118
|
+
tokens = []
|
119
|
+
pos = 0
|
120
|
+
|
121
|
+
comment_pattern = /\/\*.*?\*\//m
|
122
|
+
comments = input_data.scan(comment_pattern)
|
123
|
+
non_comments = input_data.split(comment_pattern)
|
124
|
+
|
125
|
+
# Handle a small edge case, if our CSS is *only* comments,
|
126
|
+
# the split, zip, scan trick won't work
|
127
|
+
if non_comments.length == 0
|
128
|
+
tokens = comments.map { |x| Token.new(:COMMENT, x, nil) }
|
129
|
+
else
|
130
|
+
non_comments.zip(comments).each do |non_comment, comment|
|
131
|
+
non_comment.split(/url\([^\)]*\)/m).zip(
|
132
|
+
non_comment.scan(/url\([^\)]*\)/m)
|
133
|
+
).each do |non_url, url|
|
134
|
+
non_url.split(/"[^"]*"|'[^']*'/m).zip(
|
135
|
+
non_url.scan(/"[^"]*"|'[^']*'/m)
|
136
|
+
).each do |non_string, quoted_string|
|
137
|
+
if non_string.length > 0 && non_string =~ /\A\s*\Z/m
|
138
|
+
tokens << Token.new(:S, non_string, nil)
|
139
|
+
else
|
140
|
+
non_string.split(/[ \t\r\n\f]*(?![{}+>]*)/m).zip(
|
141
|
+
non_string.scan(/[ \t\r\n\f]*(?![{}+>]*)/m)
|
142
|
+
).each do |string, whitespace|
|
143
|
+
until string.empty?
|
144
|
+
token = nil
|
145
|
+
@lexemes.each do |lexeme|
|
146
|
+
match = lexeme.pattern.match(string)
|
147
|
+
if match
|
148
|
+
token = Token.new(lexeme.name, match.to_s, pos)
|
149
|
+
break
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
token ||= DelimiterToken.new(/^./.match(string).to_s, pos)
|
154
|
+
|
155
|
+
tokens << token
|
156
|
+
string = string.slice(Range.new(token.value.length, -1))
|
157
|
+
pos += token.value.length
|
158
|
+
end
|
159
|
+
tokens << Token.new(:S, whitespace, nil) if whitespace
|
160
|
+
end
|
161
|
+
end
|
162
|
+
tokens << Token.new(:STRING, quoted_string, nil) if quoted_string
|
163
|
+
end
|
164
|
+
tokens << Token.new(:URI, url, nil) if url
|
165
|
+
end
|
166
|
+
tokens << Token.new(:COMMENT, comment, nil) if comment
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
tokens
|
171
|
+
end
|
172
|
+
|
173
|
+
private
|
174
|
+
|
175
|
+
def token(name, pattern=nil, &block)
|
176
|
+
@lexemes << Lexeme.new(name, pattern, &block)
|
177
|
+
end
|
178
|
+
|
179
|
+
def macro(name, regex=nil)
|
180
|
+
regex ? @macros[name] = regex : @macros[name].source
|
181
|
+
end
|
182
|
+
|
183
|
+
alias :m :macro
|
184
|
+
end
|
185
|
+
end
|