antisamy 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.rdoc +13 -0
- data/LICENSE.txt +20 -20
- data/README.rdoc +41 -41
- data/lib/antisamy.rb +46 -46
- data/lib/antisamy/css/css_filter.rb +187 -187
- data/lib/antisamy/css/css_scanner.rb +84 -84
- data/lib/antisamy/css/css_validator.rb +128 -128
- data/lib/antisamy/csspool/rsac.rb +1 -1
- data/lib/antisamy/csspool/rsac/sac.rb +14 -14
- data/lib/antisamy/csspool/rsac/sac/conditions.rb +5 -5
- data/lib/antisamy/csspool/rsac/sac/conditions/attribute_condition.rb +50 -50
- data/lib/antisamy/csspool/rsac/sac/conditions/begin_hyphen_condition.rb +18 -18
- data/lib/antisamy/csspool/rsac/sac/conditions/class_condition.rb +18 -18
- data/lib/antisamy/csspool/rsac/sac/conditions/combinator_condition.rb +36 -36
- data/lib/antisamy/csspool/rsac/sac/conditions/condition.rb +29 -29
- data/lib/antisamy/csspool/rsac/sac/conditions/id_condition.rb +23 -23
- data/lib/antisamy/csspool/rsac/sac/conditions/one_of_condition.rb +18 -18
- data/lib/antisamy/csspool/rsac/sac/conditions/pseudo_class_condition.rb +20 -20
- data/lib/antisamy/csspool/rsac/sac/document_handler.rb +66 -66
- data/lib/antisamy/csspool/rsac/sac/error_handler.rb +13 -13
- data/lib/antisamy/csspool/rsac/sac/generated_parser.rb +1012 -1012
- data/lib/antisamy/csspool/rsac/sac/generated_property_parser.rb +9284 -9284
- data/lib/antisamy/csspool/rsac/sac/lexeme.rb +27 -27
- data/lib/antisamy/csspool/rsac/sac/lexical_unit.rb +201 -201
- data/lib/antisamy/csspool/rsac/sac/parse_exception.rb +4 -4
- data/lib/antisamy/csspool/rsac/sac/parser.rb +109 -109
- data/lib/antisamy/csspool/rsac/sac/property_parser.rb +44 -44
- data/lib/antisamy/csspool/rsac/sac/selectors.rb +5 -5
- data/lib/antisamy/csspool/rsac/sac/selectors/child_selector.rb +36 -36
- data/lib/antisamy/csspool/rsac/sac/selectors/conditional_selector.rb +45 -45
- data/lib/antisamy/csspool/rsac/sac/selectors/descendant_selector.rb +36 -36
- data/lib/antisamy/csspool/rsac/sac/selectors/element_selector.rb +35 -35
- data/lib/antisamy/csspool/rsac/sac/selectors/selector.rb +25 -25
- data/lib/antisamy/csspool/rsac/sac/selectors/sibling_selector.rb +35 -35
- data/lib/antisamy/csspool/rsac/sac/selectors/simple_selector.rb +21 -21
- data/lib/antisamy/csspool/rsac/sac/token.rb +25 -25
- data/lib/antisamy/csspool/rsac/sac/tokenizer.rb +185 -185
- data/lib/antisamy/csspool/rsac/stylesheet.rb +3 -3
- data/lib/antisamy/csspool/rsac/stylesheet/rule.rb +20 -20
- data/lib/antisamy/csspool/rsac/stylesheet/stylesheet.rb +76 -76
- data/lib/antisamy/html/handler.rb +112 -99
- data/lib/antisamy/html/sax_filter.rb +305 -302
- data/lib/antisamy/html/scanner.rb +47 -43
- data/lib/antisamy/model/attribute.rb +19 -19
- data/lib/antisamy/model/css_property.rb +39 -39
- data/lib/antisamy/model/tag.rb +31 -31
- data/lib/antisamy/policy.rb +577 -545
- data/lib/antisamy/scan_results.rb +89 -89
- data/spec/antisamy_spec.rb +208 -142
- data/spec/spec_helper.rb +12 -12
- metadata +79 -81
@@ -1,4 +1,4 @@
|
|
1
|
-
module RSAC
|
2
|
-
class ParseException < RuntimeError
|
3
|
-
end
|
4
|
-
end
|
1
|
+
module RSAC
|
2
|
+
class ParseException < RuntimeError
|
3
|
+
end
|
4
|
+
end
|
@@ -1,109 +1,109 @@
|
|
1
|
-
require "antisamy/csspool/rsac/sac/document_handler"
|
2
|
-
require "antisamy/csspool/rsac/sac/error_handler"
|
3
|
-
require "antisamy/csspool/rsac/sac/generated_parser"
|
4
|
-
require "antisamy/csspool/rsac/sac/lexical_unit"
|
5
|
-
require "antisamy/csspool/rsac/sac/parse_exception"
|
6
|
-
require "antisamy/csspool/rsac/sac/tokenizer"
|
7
|
-
require "antisamy/csspool/rsac/sac/property_parser"
|
8
|
-
|
9
|
-
module RSAC
|
10
|
-
class Parser < RSAC::GeneratedParser
|
11
|
-
# The version of CSSPool you're using
|
12
|
-
VERSION = '0.2.7'
|
13
|
-
|
14
|
-
TOKENIZER = Tokenizer.new
|
15
|
-
|
16
|
-
attr_accessor :document_handler, :error_handler, :logger
|
17
|
-
|
18
|
-
def initialize(document_handler = StyleSheet.new(self))
|
19
|
-
@error_handler = ErrorHandler.new
|
20
|
-
@document_handler = document_handler
|
21
|
-
@property_parser = PropertyParser.new()
|
22
|
-
@tokenizer = TOKENIZER
|
23
|
-
@logger = nil
|
24
|
-
end
|
25
|
-
|
26
|
-
def parse_style_sheet(string)
|
27
|
-
@yydebug = true
|
28
|
-
@tokens = TOKENIZER.tokenize(string)
|
29
|
-
@position = 0
|
30
|
-
|
31
|
-
self.document_handler.start_document(string)
|
32
|
-
do_parse
|
33
|
-
self.document_handler.end_document(string)
|
34
|
-
self.document_handler
|
35
|
-
end
|
36
|
-
|
37
|
-
alias :parse :parse_style_sheet
|
38
|
-
|
39
|
-
def parse_rule(rule)
|
40
|
-
returner = Class.new(DocumentHandler) {
|
41
|
-
attr_accessor :selector
|
42
|
-
alias :start_selector :selector=
|
43
|
-
}.new
|
44
|
-
old_document_handler = self.document_handler
|
45
|
-
self.document_handler = returner
|
46
|
-
self.parse("#{rule} { }")
|
47
|
-
self.document_handler = old_document_handler
|
48
|
-
returner.selector
|
49
|
-
end
|
50
|
-
|
51
|
-
# Returns the parser version. We return CSS2, but its actually
|
52
|
-
# CSS2.1. No font-face tags. Sorry.
|
53
|
-
def parser_version
|
54
|
-
"http://www.w3.org/TR/REC-CSS2"
|
55
|
-
end
|
56
|
-
|
57
|
-
attr_reader :property_parser
|
58
|
-
attr_reader :tokenizer
|
59
|
-
|
60
|
-
private # Bro.
|
61
|
-
|
62
|
-
# We have to eliminate matching pairs.
|
63
|
-
# http://www.w3.org/TR/CSS21/syndata.html#parsing-errors
|
64
|
-
# See the malformed declarations section
|
65
|
-
def eliminate_pair_matches(error_value)
|
66
|
-
pairs = {}
|
67
|
-
pairs['"'] = '"'
|
68
|
-
pairs["'"] = "'"
|
69
|
-
pairs['{'] = '}'
|
70
|
-
pairs['['] = ']'
|
71
|
-
pairs['('] = ')'
|
72
|
-
|
73
|
-
error_value.to_s.strip!
|
74
|
-
if pairs[error_value]
|
75
|
-
logger.warn("Eliminating pair for: #{error_value}") if logger
|
76
|
-
loop {
|
77
|
-
token = next_token
|
78
|
-
eliminate_pair_matches(token[1])
|
79
|
-
logger.warn("Eliminated token: #{token.join(' ')}") if logger
|
80
|
-
if token[1] == pairs[error_value]
|
81
|
-
@position -= 1
|
82
|
-
@tokens[@position] = Token.new(:S, ' ', nil) # super hack
|
83
|
-
break
|
84
|
-
end
|
85
|
-
}
|
86
|
-
end
|
87
|
-
end
|
88
|
-
|
89
|
-
def on_error(error_token_id, error_value, value_stack)
|
90
|
-
if logger
|
91
|
-
logger.error(token_to_str(error_token_id))
|
92
|
-
logger.error("error value: #{error_value}")
|
93
|
-
end
|
94
|
-
eliminate_pair_matches(error_value)
|
95
|
-
end
|
96
|
-
|
97
|
-
def next_token
|
98
|
-
return [false, false] if @position >= @tokens.length
|
99
|
-
|
100
|
-
n_token = @tokens[@position]
|
101
|
-
@position += 1
|
102
|
-
if n_token.name == :COMMENT
|
103
|
-
self.document_handler.comment(n_token.value)
|
104
|
-
return next_token
|
105
|
-
end
|
106
|
-
n_token.to_racc_token
|
107
|
-
end
|
108
|
-
end
|
109
|
-
end
|
1
|
+
require "antisamy/csspool/rsac/sac/document_handler"
|
2
|
+
require "antisamy/csspool/rsac/sac/error_handler"
|
3
|
+
require "antisamy/csspool/rsac/sac/generated_parser"
|
4
|
+
require "antisamy/csspool/rsac/sac/lexical_unit"
|
5
|
+
require "antisamy/csspool/rsac/sac/parse_exception"
|
6
|
+
require "antisamy/csspool/rsac/sac/tokenizer"
|
7
|
+
require "antisamy/csspool/rsac/sac/property_parser"
|
8
|
+
|
9
|
+
module RSAC
|
10
|
+
class Parser < RSAC::GeneratedParser
|
11
|
+
# The version of CSSPool you're using
|
12
|
+
VERSION = '0.2.7'
|
13
|
+
|
14
|
+
TOKENIZER = Tokenizer.new
|
15
|
+
|
16
|
+
attr_accessor :document_handler, :error_handler, :logger
|
17
|
+
|
18
|
+
def initialize(document_handler = StyleSheet.new(self))
|
19
|
+
@error_handler = ErrorHandler.new
|
20
|
+
@document_handler = document_handler
|
21
|
+
@property_parser = PropertyParser.new()
|
22
|
+
@tokenizer = TOKENIZER
|
23
|
+
@logger = nil
|
24
|
+
end
|
25
|
+
|
26
|
+
def parse_style_sheet(string)
|
27
|
+
@yydebug = true
|
28
|
+
@tokens = TOKENIZER.tokenize(string)
|
29
|
+
@position = 0
|
30
|
+
|
31
|
+
self.document_handler.start_document(string)
|
32
|
+
do_parse
|
33
|
+
self.document_handler.end_document(string)
|
34
|
+
self.document_handler
|
35
|
+
end
|
36
|
+
|
37
|
+
alias :parse :parse_style_sheet
|
38
|
+
|
39
|
+
def parse_rule(rule)
|
40
|
+
returner = Class.new(DocumentHandler) {
|
41
|
+
attr_accessor :selector
|
42
|
+
alias :start_selector :selector=
|
43
|
+
}.new
|
44
|
+
old_document_handler = self.document_handler
|
45
|
+
self.document_handler = returner
|
46
|
+
self.parse("#{rule} { }")
|
47
|
+
self.document_handler = old_document_handler
|
48
|
+
returner.selector
|
49
|
+
end
|
50
|
+
|
51
|
+
# Returns the parser version. We return CSS2, but its actually
|
52
|
+
# CSS2.1. No font-face tags. Sorry.
|
53
|
+
def parser_version
|
54
|
+
"http://www.w3.org/TR/REC-CSS2"
|
55
|
+
end
|
56
|
+
|
57
|
+
attr_reader :property_parser
|
58
|
+
attr_reader :tokenizer
|
59
|
+
|
60
|
+
private # Bro.
|
61
|
+
|
62
|
+
# We have to eliminate matching pairs.
|
63
|
+
# http://www.w3.org/TR/CSS21/syndata.html#parsing-errors
|
64
|
+
# See the malformed declarations section
|
65
|
+
def eliminate_pair_matches(error_value)
|
66
|
+
pairs = {}
|
67
|
+
pairs['"'] = '"'
|
68
|
+
pairs["'"] = "'"
|
69
|
+
pairs['{'] = '}'
|
70
|
+
pairs['['] = ']'
|
71
|
+
pairs['('] = ')'
|
72
|
+
|
73
|
+
error_value.to_s.strip!
|
74
|
+
if pairs[error_value]
|
75
|
+
logger.warn("Eliminating pair for: #{error_value}") if logger
|
76
|
+
loop {
|
77
|
+
token = next_token
|
78
|
+
eliminate_pair_matches(token[1])
|
79
|
+
logger.warn("Eliminated token: #{token.join(' ')}") if logger
|
80
|
+
if token[1] == pairs[error_value]
|
81
|
+
@position -= 1
|
82
|
+
@tokens[@position] = Token.new(:S, ' ', nil) # super hack
|
83
|
+
break
|
84
|
+
end
|
85
|
+
}
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
def on_error(error_token_id, error_value, value_stack)
|
90
|
+
if logger
|
91
|
+
logger.error(token_to_str(error_token_id))
|
92
|
+
logger.error("error value: #{error_value}")
|
93
|
+
end
|
94
|
+
eliminate_pair_matches(error_value)
|
95
|
+
end
|
96
|
+
|
97
|
+
def next_token
|
98
|
+
return [false, false] if @position >= @tokens.length
|
99
|
+
|
100
|
+
n_token = @tokens[@position]
|
101
|
+
@position += 1
|
102
|
+
if n_token.name == :COMMENT
|
103
|
+
self.document_handler.comment(n_token.value)
|
104
|
+
return next_token
|
105
|
+
end
|
106
|
+
n_token.to_racc_token
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
@@ -1,44 +1,44 @@
|
|
1
|
-
require "antisamy/csspool/rsac/sac/generated_property_parser"
|
2
|
-
|
3
|
-
module RSAC
|
4
|
-
class PropertyParser < RSAC::GeneratedPropertyParser
|
5
|
-
def initialize
|
6
|
-
@tokens = []
|
7
|
-
@token_table = Racc_arg[10]
|
8
|
-
end
|
9
|
-
|
10
|
-
def parse_tokens(tokens)
|
11
|
-
negate = false # Nasty hack for unary minus
|
12
|
-
@tokens = tokens.find_all { |x| x.name != :S }.map { |token|
|
13
|
-
tok = if @token_table.has_key?(token.value)
|
14
|
-
[token.value, token.value]
|
15
|
-
else
|
16
|
-
if token.name == :delim && !@token_table.has_key?(token.value)
|
17
|
-
negate = true if token.value == '-'
|
18
|
-
nil
|
19
|
-
else
|
20
|
-
token.to_racc_token
|
21
|
-
end
|
22
|
-
end
|
23
|
-
|
24
|
-
if negate && tok
|
25
|
-
tok[1] = "-#{tok[1]}"
|
26
|
-
negate = false
|
27
|
-
end
|
28
|
-
tok
|
29
|
-
}.compact
|
30
|
-
|
31
|
-
begin
|
32
|
-
return do_parse
|
33
|
-
rescue ParseError => e
|
34
|
-
return nil
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
38
|
-
private
|
39
|
-
def next_token
|
40
|
-
return [false, false] if @tokens.empty?
|
41
|
-
@tokens.shift
|
42
|
-
end
|
43
|
-
end
|
44
|
-
end
|
1
|
+
require "antisamy/csspool/rsac/sac/generated_property_parser"
|
2
|
+
|
3
|
+
module RSAC
|
4
|
+
class PropertyParser < RSAC::GeneratedPropertyParser
|
5
|
+
def initialize
|
6
|
+
@tokens = []
|
7
|
+
@token_table = Racc_arg[10]
|
8
|
+
end
|
9
|
+
|
10
|
+
def parse_tokens(tokens)
|
11
|
+
negate = false # Nasty hack for unary minus
|
12
|
+
@tokens = tokens.find_all { |x| x.name != :S }.map { |token|
|
13
|
+
tok = if @token_table.has_key?(token.value)
|
14
|
+
[token.value, token.value]
|
15
|
+
else
|
16
|
+
if token.name == :delim && !@token_table.has_key?(token.value)
|
17
|
+
negate = true if token.value == '-'
|
18
|
+
nil
|
19
|
+
else
|
20
|
+
token.to_racc_token
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
if negate && tok
|
25
|
+
tok[1] = "-#{tok[1]}"
|
26
|
+
negate = false
|
27
|
+
end
|
28
|
+
tok
|
29
|
+
}.compact
|
30
|
+
|
31
|
+
begin
|
32
|
+
return do_parse
|
33
|
+
rescue ParseError => e
|
34
|
+
return nil
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
def next_token
|
40
|
+
return [false, false] if @tokens.empty?
|
41
|
+
@tokens.shift
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -1,5 +1,5 @@
|
|
1
|
-
require "antisamy/csspool/rsac/sac/selectors/selector"
|
2
|
-
|
3
|
-
%w(simple child conditional descendant element sibling).each do |type|
|
4
|
-
require "antisamy/csspool/rsac/sac/selectors/#{type}_selector"
|
5
|
-
end
|
1
|
+
require "antisamy/csspool/rsac/sac/selectors/selector"
|
2
|
+
|
3
|
+
%w(simple child conditional descendant element sibling).each do |type|
|
4
|
+
require "antisamy/csspool/rsac/sac/selectors/#{type}_selector"
|
5
|
+
end
|
@@ -1,36 +1,36 @@
|
|
1
|
-
module RSAC
|
2
|
-
module Selectors
|
3
|
-
class ChildSelector < SimpleSelector
|
4
|
-
attr_accessor :ancestor_selector, :simple_selector
|
5
|
-
alias :parent :ancestor_selector
|
6
|
-
alias :selector :simple_selector
|
7
|
-
|
8
|
-
def initialize(parent, selector)
|
9
|
-
super(:SAC_CHILD_SELECTOR)
|
10
|
-
|
11
|
-
@ancestor_selector = parent
|
12
|
-
@simple_selector = selector
|
13
|
-
end
|
14
|
-
|
15
|
-
def to_css
|
16
|
-
"#{parent.to_css} > #{selector.to_css}"
|
17
|
-
end
|
18
|
-
|
19
|
-
def to_xpath(prefix=true)
|
20
|
-
"#{parent.to_xpath(prefix)}/#{selector.to_xpath(false)}"
|
21
|
-
end
|
22
|
-
|
23
|
-
def specificity
|
24
|
-
parent.specificity.zip(selector.specificity).map { |x,y| x + y }
|
25
|
-
end
|
26
|
-
|
27
|
-
def ==(other)
|
28
|
-
super && parent == other.parent && selector == other.selector
|
29
|
-
end
|
30
|
-
|
31
|
-
def hash
|
32
|
-
[parent, selector].hash
|
33
|
-
end
|
34
|
-
end
|
35
|
-
end
|
36
|
-
end
|
1
|
+
module RSAC
|
2
|
+
module Selectors
|
3
|
+
class ChildSelector < SimpleSelector
|
4
|
+
attr_accessor :ancestor_selector, :simple_selector
|
5
|
+
alias :parent :ancestor_selector
|
6
|
+
alias :selector :simple_selector
|
7
|
+
|
8
|
+
def initialize(parent, selector)
|
9
|
+
super(:SAC_CHILD_SELECTOR)
|
10
|
+
|
11
|
+
@ancestor_selector = parent
|
12
|
+
@simple_selector = selector
|
13
|
+
end
|
14
|
+
|
15
|
+
def to_css
|
16
|
+
"#{parent.to_css} > #{selector.to_css}"
|
17
|
+
end
|
18
|
+
|
19
|
+
def to_xpath(prefix=true)
|
20
|
+
"#{parent.to_xpath(prefix)}/#{selector.to_xpath(false)}"
|
21
|
+
end
|
22
|
+
|
23
|
+
def specificity
|
24
|
+
parent.specificity.zip(selector.specificity).map { |x,y| x + y }
|
25
|
+
end
|
26
|
+
|
27
|
+
def ==(other)
|
28
|
+
super && parent == other.parent && selector == other.selector
|
29
|
+
end
|
30
|
+
|
31
|
+
def hash
|
32
|
+
[parent, selector].hash
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -1,45 +1,45 @@
|
|
1
|
-
module RSAC
|
2
|
-
module Selectors
|
3
|
-
class ConditionalSelector < SimpleSelector
|
4
|
-
attr_accessor :condition, :simple_selector
|
5
|
-
alias :selector :simple_selector
|
6
|
-
|
7
|
-
def initialize(selector, condition)
|
8
|
-
super(:SAC_CONDITIONAL_SELECTOR)
|
9
|
-
|
10
|
-
@condition = condition
|
11
|
-
@simple_selector = selector
|
12
|
-
end
|
13
|
-
|
14
|
-
def to_css
|
15
|
-
[selector, condition].map { |x|
|
16
|
-
x ? x.to_css : ''
|
17
|
-
}.join('')
|
18
|
-
end
|
19
|
-
|
20
|
-
def to_xpath(prefix=true)
|
21
|
-
atoms = []
|
22
|
-
atoms << "//" if prefix
|
23
|
-
atoms << (selector ? selector.to_xpath(false) : "*")
|
24
|
-
atoms << condition.to_xpath
|
25
|
-
|
26
|
-
atoms.join("")
|
27
|
-
end
|
28
|
-
|
29
|
-
def specificity
|
30
|
-
(selector ? selector.specificity : ([0] * 4)).zip(
|
31
|
-
(condition ? condition.specificity : ([0] * 4))).map { |x,y|
|
32
|
-
x + y
|
33
|
-
}
|
34
|
-
end
|
35
|
-
|
36
|
-
def ==(other)
|
37
|
-
super && condition == other.condition && selector == other.selector
|
38
|
-
end
|
39
|
-
|
40
|
-
def hash
|
41
|
-
[condition, selector].hash
|
42
|
-
end
|
43
|
-
end
|
44
|
-
end
|
45
|
-
end
|
1
|
+
module RSAC
|
2
|
+
module Selectors
|
3
|
+
class ConditionalSelector < SimpleSelector
|
4
|
+
attr_accessor :condition, :simple_selector
|
5
|
+
alias :selector :simple_selector
|
6
|
+
|
7
|
+
def initialize(selector, condition)
|
8
|
+
super(:SAC_CONDITIONAL_SELECTOR)
|
9
|
+
|
10
|
+
@condition = condition
|
11
|
+
@simple_selector = selector
|
12
|
+
end
|
13
|
+
|
14
|
+
def to_css
|
15
|
+
[selector, condition].map { |x|
|
16
|
+
x ? x.to_css : ''
|
17
|
+
}.join('')
|
18
|
+
end
|
19
|
+
|
20
|
+
def to_xpath(prefix=true)
|
21
|
+
atoms = []
|
22
|
+
atoms << "//" if prefix
|
23
|
+
atoms << (selector ? selector.to_xpath(false) : "*")
|
24
|
+
atoms << condition.to_xpath
|
25
|
+
|
26
|
+
atoms.join("")
|
27
|
+
end
|
28
|
+
|
29
|
+
def specificity
|
30
|
+
(selector ? selector.specificity : ([0] * 4)).zip(
|
31
|
+
(condition ? condition.specificity : ([0] * 4))).map { |x,y|
|
32
|
+
x + y
|
33
|
+
}
|
34
|
+
end
|
35
|
+
|
36
|
+
def ==(other)
|
37
|
+
super && condition == other.condition && selector == other.selector
|
38
|
+
end
|
39
|
+
|
40
|
+
def hash
|
41
|
+
[condition, selector].hash
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|