antisamy 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.rdoc +13 -0
- data/LICENSE.txt +20 -20
- data/README.rdoc +41 -41
- data/lib/antisamy.rb +46 -46
- data/lib/antisamy/css/css_filter.rb +187 -187
- data/lib/antisamy/css/css_scanner.rb +84 -84
- data/lib/antisamy/css/css_validator.rb +128 -128
- data/lib/antisamy/csspool/rsac.rb +1 -1
- data/lib/antisamy/csspool/rsac/sac.rb +14 -14
- data/lib/antisamy/csspool/rsac/sac/conditions.rb +5 -5
- data/lib/antisamy/csspool/rsac/sac/conditions/attribute_condition.rb +50 -50
- data/lib/antisamy/csspool/rsac/sac/conditions/begin_hyphen_condition.rb +18 -18
- data/lib/antisamy/csspool/rsac/sac/conditions/class_condition.rb +18 -18
- data/lib/antisamy/csspool/rsac/sac/conditions/combinator_condition.rb +36 -36
- data/lib/antisamy/csspool/rsac/sac/conditions/condition.rb +29 -29
- data/lib/antisamy/csspool/rsac/sac/conditions/id_condition.rb +23 -23
- data/lib/antisamy/csspool/rsac/sac/conditions/one_of_condition.rb +18 -18
- data/lib/antisamy/csspool/rsac/sac/conditions/pseudo_class_condition.rb +20 -20
- data/lib/antisamy/csspool/rsac/sac/document_handler.rb +66 -66
- data/lib/antisamy/csspool/rsac/sac/error_handler.rb +13 -13
- data/lib/antisamy/csspool/rsac/sac/generated_parser.rb +1012 -1012
- data/lib/antisamy/csspool/rsac/sac/generated_property_parser.rb +9284 -9284
- data/lib/antisamy/csspool/rsac/sac/lexeme.rb +27 -27
- data/lib/antisamy/csspool/rsac/sac/lexical_unit.rb +201 -201
- data/lib/antisamy/csspool/rsac/sac/parse_exception.rb +4 -4
- data/lib/antisamy/csspool/rsac/sac/parser.rb +109 -109
- data/lib/antisamy/csspool/rsac/sac/property_parser.rb +44 -44
- data/lib/antisamy/csspool/rsac/sac/selectors.rb +5 -5
- data/lib/antisamy/csspool/rsac/sac/selectors/child_selector.rb +36 -36
- data/lib/antisamy/csspool/rsac/sac/selectors/conditional_selector.rb +45 -45
- data/lib/antisamy/csspool/rsac/sac/selectors/descendant_selector.rb +36 -36
- data/lib/antisamy/csspool/rsac/sac/selectors/element_selector.rb +35 -35
- data/lib/antisamy/csspool/rsac/sac/selectors/selector.rb +25 -25
- data/lib/antisamy/csspool/rsac/sac/selectors/sibling_selector.rb +35 -35
- data/lib/antisamy/csspool/rsac/sac/selectors/simple_selector.rb +21 -21
- data/lib/antisamy/csspool/rsac/sac/token.rb +25 -25
- data/lib/antisamy/csspool/rsac/sac/tokenizer.rb +185 -185
- data/lib/antisamy/csspool/rsac/stylesheet.rb +3 -3
- data/lib/antisamy/csspool/rsac/stylesheet/rule.rb +20 -20
- data/lib/antisamy/csspool/rsac/stylesheet/stylesheet.rb +76 -76
- data/lib/antisamy/html/handler.rb +112 -99
- data/lib/antisamy/html/sax_filter.rb +305 -302
- data/lib/antisamy/html/scanner.rb +47 -43
- data/lib/antisamy/model/attribute.rb +19 -19
- data/lib/antisamy/model/css_property.rb +39 -39
- data/lib/antisamy/model/tag.rb +31 -31
- data/lib/antisamy/policy.rb +577 -545
- data/lib/antisamy/scan_results.rb +89 -89
- data/spec/antisamy_spec.rb +208 -142
- data/spec/spec_helper.rb +12 -12
- metadata +79 -81
@@ -1,3 +1,3 @@
|
|
1
|
-
require "antisamy/csspool/rsac/stylesheet/stylesheet"
|
2
|
-
require "antisamy/csspool/rsac/stylesheet/rule"
|
3
|
-
|
1
|
+
require "antisamy/csspool/rsac/stylesheet/stylesheet"
|
2
|
+
require "antisamy/csspool/rsac/stylesheet/rule"
|
3
|
+
|
@@ -1,20 +1,20 @@
|
|
1
|
-
require 'set'
|
2
|
-
module RSAC
|
3
|
-
class StyleSheet
|
4
|
-
class Rule
|
5
|
-
include Comparable
|
6
|
-
|
7
|
-
attr_accessor :selector, :properties, :index
|
8
|
-
def initialize(selector, index, properties = [])
|
9
|
-
@selector = selector
|
10
|
-
@properties = Set.new(properties)
|
11
|
-
@index = index
|
12
|
-
end
|
13
|
-
|
14
|
-
def <=>(other)
|
15
|
-
comp = selector.specificity <=> other.selector.specificity
|
16
|
-
comp == 0 ? index <=> other.index : comp
|
17
|
-
end
|
18
|
-
end
|
19
|
-
end
|
20
|
-
end
|
1
|
+
require 'set'
|
2
|
+
module RSAC
|
3
|
+
class StyleSheet
|
4
|
+
class Rule
|
5
|
+
include Comparable
|
6
|
+
|
7
|
+
attr_accessor :selector, :properties, :index
|
8
|
+
def initialize(selector, index, properties = [])
|
9
|
+
@selector = selector
|
10
|
+
@properties = Set.new(properties)
|
11
|
+
@index = index
|
12
|
+
end
|
13
|
+
|
14
|
+
def <=>(other)
|
15
|
+
comp = selector.specificity <=> other.selector.specificity
|
16
|
+
comp == 0 ? index <=> other.index : comp
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -1,76 +1,76 @@
|
|
1
|
-
module RSAC
|
2
|
-
class StyleSheet < RSAC::DocumentHandler
|
3
|
-
attr_reader :rules
|
4
|
-
|
5
|
-
def initialize(sac)
|
6
|
-
@sac = sac
|
7
|
-
@rules = []
|
8
|
-
@current_rules = []
|
9
|
-
@selector_index = 0
|
10
|
-
end
|
11
|
-
|
12
|
-
def start_selector(selectors)
|
13
|
-
selectors.each { |selector|
|
14
|
-
@current_rules << Rule.new(selector, @selector_index)
|
15
|
-
}
|
16
|
-
end
|
17
|
-
|
18
|
-
def end_selector(selectors)
|
19
|
-
@rules += @current_rules
|
20
|
-
@current_rules = []
|
21
|
-
@selector_index += 1
|
22
|
-
reduce!
|
23
|
-
end
|
24
|
-
|
25
|
-
def find_rule(rule)
|
26
|
-
rule = self.create_rule(rule) if rule.is_a?(String)
|
27
|
-
rules.find { |x| x.selector == rule.selector }
|
28
|
-
end
|
29
|
-
alias :[] :find_rule
|
30
|
-
|
31
|
-
def create_rule(rule)
|
32
|
-
Rule.new(@sac.parse_rule(rule).first, @selector_index += 1)
|
33
|
-
end
|
34
|
-
|
35
|
-
def property(name, value, important)
|
36
|
-
@current_rules.each { |selector|
|
37
|
-
selector.properties << [name, value, important]
|
38
|
-
}
|
39
|
-
end
|
40
|
-
|
41
|
-
# Get a hash of rules by property
|
42
|
-
def rules_by_property
|
43
|
-
rules_by_property = Hash.new { |h,k| h[k] = [] }
|
44
|
-
@rules.each { |sel|
|
45
|
-
props = sel.properties.to_a.sort_by { |x| x.hash } # HACK?
|
46
|
-
rules_by_property[props] << sel
|
47
|
-
}
|
48
|
-
rules_by_property
|
49
|
-
end
|
50
|
-
|
51
|
-
def to_css
|
52
|
-
rules_by_property.map do |properties, rules|
|
53
|
-
rules.map { |rule| rule.selector.to_css }.sort.join(', ') + " {\n" +
|
54
|
-
properties.map { |key,value,important|
|
55
|
-
# Super annoying. If the property is font-family, its supposed to
|
56
|
-
# be commas
|
57
|
-
join_val = ('font-family' == key) ? ', ' : ' '
|
58
|
-
values = [value].flatten.join(join_val)
|
59
|
-
"#{key}:#{values}#{important ? ' !important' : ''};"
|
60
|
-
}.join("\n") + "\n}"
|
61
|
-
end.sort.join("\n")
|
62
|
-
end
|
63
|
-
|
64
|
-
private
|
65
|
-
# Remove duplicate rules
|
66
|
-
def reduce!
|
67
|
-
unique_rules = {}
|
68
|
-
@rules.each do |rule|
|
69
|
-
(unique_rules[rule.selector] ||= rule).properties += rule.properties
|
70
|
-
end
|
71
|
-
@rules = unique_rules.values
|
72
|
-
self
|
73
|
-
end
|
74
|
-
end
|
75
|
-
end
|
76
|
-
|
1
|
+
module RSAC
|
2
|
+
class StyleSheet < RSAC::DocumentHandler
|
3
|
+
attr_reader :rules
|
4
|
+
|
5
|
+
def initialize(sac)
|
6
|
+
@sac = sac
|
7
|
+
@rules = []
|
8
|
+
@current_rules = []
|
9
|
+
@selector_index = 0
|
10
|
+
end
|
11
|
+
|
12
|
+
def start_selector(selectors)
|
13
|
+
selectors.each { |selector|
|
14
|
+
@current_rules << Rule.new(selector, @selector_index)
|
15
|
+
}
|
16
|
+
end
|
17
|
+
|
18
|
+
def end_selector(selectors)
|
19
|
+
@rules += @current_rules
|
20
|
+
@current_rules = []
|
21
|
+
@selector_index += 1
|
22
|
+
reduce!
|
23
|
+
end
|
24
|
+
|
25
|
+
def find_rule(rule)
|
26
|
+
rule = self.create_rule(rule) if rule.is_a?(String)
|
27
|
+
rules.find { |x| x.selector == rule.selector }
|
28
|
+
end
|
29
|
+
alias :[] :find_rule
|
30
|
+
|
31
|
+
def create_rule(rule)
|
32
|
+
Rule.new(@sac.parse_rule(rule).first, @selector_index += 1)
|
33
|
+
end
|
34
|
+
|
35
|
+
def property(name, value, important)
|
36
|
+
@current_rules.each { |selector|
|
37
|
+
selector.properties << [name, value, important]
|
38
|
+
}
|
39
|
+
end
|
40
|
+
|
41
|
+
# Get a hash of rules by property
|
42
|
+
def rules_by_property
|
43
|
+
rules_by_property = Hash.new { |h,k| h[k] = [] }
|
44
|
+
@rules.each { |sel|
|
45
|
+
props = sel.properties.to_a.sort_by { |x| x.hash } # HACK?
|
46
|
+
rules_by_property[props] << sel
|
47
|
+
}
|
48
|
+
rules_by_property
|
49
|
+
end
|
50
|
+
|
51
|
+
def to_css
|
52
|
+
rules_by_property.map do |properties, rules|
|
53
|
+
rules.map { |rule| rule.selector.to_css }.sort.join(', ') + " {\n" +
|
54
|
+
properties.map { |key,value,important|
|
55
|
+
# Super annoying. If the property is font-family, its supposed to
|
56
|
+
# be commas
|
57
|
+
join_val = ('font-family' == key) ? ', ' : ' '
|
58
|
+
values = [value].flatten.join(join_val)
|
59
|
+
"#{key}:#{values}#{important ? ' !important' : ''};"
|
60
|
+
}.join("\n") + "\n}"
|
61
|
+
end.sort.join("\n")
|
62
|
+
end
|
63
|
+
|
64
|
+
private
|
65
|
+
# Remove duplicate rules
|
66
|
+
def reduce!
|
67
|
+
unique_rules = {}
|
68
|
+
@rules.each do |rule|
|
69
|
+
(unique_rules[rule.selector] ||= rule).properties += rule.properties
|
70
|
+
end
|
71
|
+
@rules = unique_rules.values
|
72
|
+
self
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
@@ -1,99 +1,112 @@
|
|
1
|
-
module AntiSamy
|
2
|
-
|
3
|
-
class Handler
|
4
|
-
|
5
|
-
attr_accessor :errors
|
6
|
-
def initialize(policy,output) #:nodoc:
|
7
|
-
@document = Nokogiri::HTML::DocumentFragment.parse("")
|
8
|
-
@current_node = @document
|
9
|
-
@policy = policy
|
10
|
-
@preserve_whitespace = @policy.directive(Policy::PRESERVE_SPACE)
|
11
|
-
@errors = []
|
12
|
-
@output_encoding = output
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
end
|
1
|
+
module AntiSamy
|
2
|
+
|
3
|
+
class Handler
|
4
|
+
|
5
|
+
attr_accessor :errors
|
6
|
+
def initialize(policy,output,fragment = true) #:nodoc:
|
7
|
+
@document = Nokogiri::HTML::DocumentFragment.parse("")
|
8
|
+
@current_node = @document
|
9
|
+
@policy = policy
|
10
|
+
@preserve_whitespace = @policy.directive(Policy::PRESERVE_SPACE)
|
11
|
+
@errors = []
|
12
|
+
@output_encoding = output
|
13
|
+
@fragment = fragment
|
14
|
+
end
|
15
|
+
|
16
|
+
# HTML entity encode some text
|
17
|
+
def encode_text(text)
|
18
|
+
return "" if text.nil?
|
19
|
+
@document.encode_special_chars(text)
|
20
|
+
end
|
21
|
+
|
22
|
+
# create a cdata section
|
23
|
+
def cdata(text)
|
24
|
+
node = Nokogiri::XML::CDATA.new(@document,text)
|
25
|
+
@current_node.add_child(node)
|
26
|
+
end
|
27
|
+
|
28
|
+
# create a comment
|
29
|
+
def comment(text) #:nodoc:
|
30
|
+
@current_node.add_child(Nokogiri::XML::Comment.new(@document, text))
|
31
|
+
end
|
32
|
+
|
33
|
+
# create a text node
|
34
|
+
def characters(text)
|
35
|
+
node = @current_node.children.last
|
36
|
+
if node and node.text?
|
37
|
+
node.content += text
|
38
|
+
else
|
39
|
+
@current_node.add_child(Nokogiri::XML::Text.new(text, @document))
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
# start an element
|
44
|
+
def start_element(name,attributes)
|
45
|
+
if @fragment
|
46
|
+
if name.eql?("head") or name.eql?("body") or name.eql?("html")
|
47
|
+
return
|
48
|
+
end
|
49
|
+
end
|
50
|
+
elem = Nokogiri::XML::Element.new(name, @document)
|
51
|
+
attributes.each do |attrib_pair|
|
52
|
+
elem[attrib_pair.first] = attrib_pair.last
|
53
|
+
end
|
54
|
+
# Special param tag hacking, as libxml/nokogiri doesnt generate an end tag
|
55
|
+
# for param tags it seems
|
56
|
+
if name.eql?("param")
|
57
|
+
inner_html = "<param"
|
58
|
+
attributes.each do |attrib_pair|
|
59
|
+
inner_html<< " #{attrib_pair.first}=\"#{attrib_pair.last}\""
|
60
|
+
end
|
61
|
+
inner_html << "/>"
|
62
|
+
# we create a fake cdata node, add it *and* dont move our parent yet
|
63
|
+
elem = Nokogiri::XML::CDATA.new(@document,inner_html)
|
64
|
+
@current_node.add_child(elem)
|
65
|
+
return
|
66
|
+
end
|
67
|
+
@current_node = @current_node.add_child(elem)
|
68
|
+
end
|
69
|
+
|
70
|
+
#end an element
|
71
|
+
def end_element(name)
|
72
|
+
if @current_node.nil? or !@current_node.name.eql?(name)
|
73
|
+
return
|
74
|
+
end
|
75
|
+
if @current_node.children.empty?
|
76
|
+
if @policy.allow_empty?(@current_node.name)
|
77
|
+
@current_node = @current_node.parent if @current_node.parent
|
78
|
+
else
|
79
|
+
tnode = @current_node
|
80
|
+
@current_node = @current_node.parent if @current_node.parent
|
81
|
+
tnode.remove
|
82
|
+
end
|
83
|
+
else
|
84
|
+
@current_node = @current_node.parent if @current_node.parent
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
# format the output applying any policy rules
|
89
|
+
def document
|
90
|
+
# check some directives
|
91
|
+
indent = 0
|
92
|
+
options = Nokogiri::XML::Node::SaveOptions::NO_EMPTY_TAGS
|
93
|
+
if @policy.directive(Policy::FORMAT_OUTPUT)
|
94
|
+
options |= Nokogiri::XML::Node::SaveOptions::FORMAT
|
95
|
+
indent = 2
|
96
|
+
end
|
97
|
+
if @policy.directive(Policy::OMIT_DOC_TYPE) || @policy.directive(Policy::OMIT_XML_DECL)
|
98
|
+
options |= Nokogiri::XML::Node::SaveOptions::NO_DECLARATION
|
99
|
+
end
|
100
|
+
|
101
|
+
clean = ""
|
102
|
+
if @policy.directive(Policy::USE_XHTML)
|
103
|
+
options |= Nokogiri::XML::Node::SaveOptions::AS_XHTML
|
104
|
+
clean = @document.to_xhtml(:encoding => @output_encoding, :indent=>indent,:save_with=>options)
|
105
|
+
else
|
106
|
+
clean = @document.to_html(:encoding => @output_encoding, :indent=>indent,:save_with=>options)
|
107
|
+
end
|
108
|
+
return clean
|
109
|
+
end
|
110
|
+
|
111
|
+
end
|
112
|
+
end
|
@@ -1,302 +1,305 @@
|
|
1
|
-
module AntiSamy
|
2
|
-
# Quick and Dirty Stack class
|
3
|
-
class Stack
|
4
|
-
def initialize
|
5
|
-
@stack = []
|
6
|
-
end
|
7
|
-
# push an emement ont he stack
|
8
|
-
def push(v)
|
9
|
-
@stack.push v
|
10
|
-
end
|
11
|
-
# pop an element off the stack
|
12
|
-
def pop
|
13
|
-
@stack.pop
|
14
|
-
end
|
15
|
-
# size of stack
|
16
|
-
def size
|
17
|
-
@stack.size
|
18
|
-
end
|
19
|
-
# is the stack empty
|
20
|
-
def empty?
|
21
|
-
@stack.empty?
|
22
|
-
end
|
23
|
-
# peek to see what next element is
|
24
|
-
def peek?(v)
|
25
|
-
return false if @stack.empty?
|
26
|
-
return @stack.last.eql?(v)
|
27
|
-
end
|
28
|
-
|
29
|
-
def peek
|
30
|
-
@stack.last
|
31
|
-
end
|
32
|
-
|
33
|
-
end
|
34
|
-
|
35
|
-
class SaxFilter < Nokogiri::XML::SAX::Document
|
36
|
-
def initialize(policy,handler,param_tag)
|
37
|
-
@policy = policy
|
38
|
-
@handler = handler
|
39
|
-
@stack = Stack.new
|
40
|
-
@css_content = nil
|
41
|
-
@css_attributes = nil
|
42
|
-
@css_scanner = CssScanner.new(policy)
|
43
|
-
@param_tag = param_tag
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
if
|
60
|
-
|
61
|
-
|
62
|
-
text.gsub!(%r{
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
m
|
76
|
-
m[
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
@
|
122
|
-
|
123
|
-
|
124
|
-
#
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
elsif tag.action.eql?(Policy::
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
@handler.errors <<
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
@handler.errors << ScanMessage.new(ScanMessage::
|
193
|
-
|
194
|
-
elsif !valid
|
195
|
-
@handler.errors << ScanMessage.new(ScanMessage::
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
if
|
215
|
-
valid_attributes
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
@stack.push(:
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
@handler.
|
275
|
-
@handler.
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
@
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
end
|
1
|
+
module AntiSamy
|
2
|
+
# Quick and Dirty Stack class
|
3
|
+
class Stack
|
4
|
+
def initialize
|
5
|
+
@stack = []
|
6
|
+
end
|
7
|
+
# push an emement ont he stack
|
8
|
+
def push(v)
|
9
|
+
@stack.push v
|
10
|
+
end
|
11
|
+
# pop an element off the stack
|
12
|
+
def pop
|
13
|
+
@stack.pop
|
14
|
+
end
|
15
|
+
# size of stack
|
16
|
+
def size
|
17
|
+
@stack.size
|
18
|
+
end
|
19
|
+
# is the stack empty
|
20
|
+
def empty?
|
21
|
+
@stack.empty?
|
22
|
+
end
|
23
|
+
# peek to see what next element is
|
24
|
+
def peek?(v)
|
25
|
+
return false if @stack.empty?
|
26
|
+
return @stack.last.eql?(v)
|
27
|
+
end
|
28
|
+
|
29
|
+
def peek
|
30
|
+
@stack.last
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
34
|
+
|
35
|
+
class SaxFilter < Nokogiri::XML::SAX::Document
|
36
|
+
def initialize(policy,handler,param_tag,fragment = true)
|
37
|
+
@policy = policy
|
38
|
+
@handler = handler
|
39
|
+
@stack = Stack.new
|
40
|
+
@css_content = nil
|
41
|
+
@css_attributes = nil
|
42
|
+
@css_scanner = CssScanner.new(policy)
|
43
|
+
@param_tag = param_tag
|
44
|
+
@fragment = fragment
|
45
|
+
end
|
46
|
+
|
47
|
+
def error(text)
|
48
|
+
end
|
49
|
+
|
50
|
+
def warning(text)
|
51
|
+
end
|
52
|
+
|
53
|
+
# Always create a HTML document unless the DECL was set beforehand
|
54
|
+
def start_document
|
55
|
+
end
|
56
|
+
|
57
|
+
# Add a comment block
|
58
|
+
def comment(text)
|
59
|
+
return if text.nil?
|
60
|
+
if @policy.directive(Policy::PRESERVE_COMMENTS) =~ /true/i
|
61
|
+
# Strip out conditional directives
|
62
|
+
text.gsub!(%r{<!?!\[(?:end)?if*\]}ixm,"")
|
63
|
+
text.gsub!(%r{\[(?:if).*\]>},"")
|
64
|
+
@handler.comment(text)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def convert_array(x)
|
69
|
+
if x and x.first.is_a?(Array)
|
70
|
+
return x
|
71
|
+
end
|
72
|
+
i = 0
|
73
|
+
h = []
|
74
|
+
while i < x.size
|
75
|
+
m = []
|
76
|
+
m[0] = x[i]
|
77
|
+
m[1] = x[i+1]
|
78
|
+
h << m
|
79
|
+
i += 2
|
80
|
+
end
|
81
|
+
h
|
82
|
+
end
|
83
|
+
|
84
|
+
def fetch_attribute(array,key)
|
85
|
+
array.each do |pair|
|
86
|
+
if pair.first.eql?(key)
|
87
|
+
return pair.last
|
88
|
+
end
|
89
|
+
end
|
90
|
+
nil
|
91
|
+
end
|
92
|
+
|
93
|
+
# Start an element,
|
94
|
+
def start_element(name, attributes = [])
|
95
|
+
attributes = convert_array(attributes)
|
96
|
+
o_attributes = attributes.dup
|
97
|
+
tag = @policy.tag(name)
|
98
|
+
masquerade = false
|
99
|
+
embed_name = nil
|
100
|
+
embed_value = nil
|
101
|
+
# Handle validate param tag as an embed tag
|
102
|
+
if tag.nil? && @policy.directive(Policy::VALIDATE_P_AS_E) && name.eql?("param")
|
103
|
+
embed = @param_tag
|
104
|
+
if @policy.tag("embed")
|
105
|
+
embed = @policy.tag("embed")
|
106
|
+
end
|
107
|
+
if embed and embed.action == Policy::ACTION_VALIDATE
|
108
|
+
tag = embed
|
109
|
+
masquerade = true
|
110
|
+
embed_name = fetch_attribute(attributes,"name")
|
111
|
+
embed_value = fetch_attribute(attributes,"value")
|
112
|
+
attributes = [ [embed_name,embed_value] ]
|
113
|
+
end
|
114
|
+
end
|
115
|
+
valid_attributes = []
|
116
|
+
if @stack.peek?(:css) or @stack.peek?(:remove)
|
117
|
+
# We are in remove mode to remove this tag as well as any child style elements if css mode
|
118
|
+
@stack.push(:remove)
|
119
|
+
elsif (tag.nil? && @policy.directive(Policy::ON_UNKNOWN_TAG).eql?("encode")) or (!tag.nil? && tag.action.eql?(Policy::ACTION_ENCODE)) or @policy.encode?(name.downcase)
|
120
|
+
tmp = "<#{name}>"
|
121
|
+
@handler.characters(tmp)
|
122
|
+
@stack.push(:filter)
|
123
|
+
elsif tag.nil?
|
124
|
+
# We ignore missing HTML and BODY tags since we are fragment parsing, but the
|
125
|
+
# Nokogiri HTML::SAX parser injects HTML/BODY if they are missing
|
126
|
+
if @fragment
|
127
|
+
unless name.eql?("html") or name.eql?("body")
|
128
|
+
@handler.errors << ScanMessage.new(ScanMessage::ERROR_TAG_NOT_IN_POLICY,name)
|
129
|
+
end
|
130
|
+
# Nokogiri work around for a style tag being auto inserted inot head
|
131
|
+
end
|
132
|
+
if name.eql?("head") && @fragment
|
133
|
+
@stack.push(:remove)
|
134
|
+
else
|
135
|
+
@stack.push(:filter)
|
136
|
+
end
|
137
|
+
elsif tag.action.eql?(Policy::ACTION_FILTER)
|
138
|
+
@handler.errors << ScanMessage.new(ScanMessage::ERROR_TAG_FILTERED,name)
|
139
|
+
@stack.push(:filter)
|
140
|
+
elsif tag.action.eql?(Policy::ACTION_VALIDATE)
|
141
|
+
# Handle validation
|
142
|
+
remove_tag = false
|
143
|
+
filter_tag = false
|
144
|
+
is_style = name.include?("style")
|
145
|
+
if is_style
|
146
|
+
@stack.push(:css)
|
147
|
+
@css_content = ''
|
148
|
+
@css_attributes = []
|
149
|
+
else
|
150
|
+
# Validate attributes
|
151
|
+
attributes.each do |pair|
|
152
|
+
a_name = pair.first
|
153
|
+
a_value = pair.last
|
154
|
+
attrib = tag.attribute(a_name.downcase)
|
155
|
+
if attrib.nil?
|
156
|
+
attrib = @policy.global(a_name.downcase)
|
157
|
+
end
|
158
|
+
# check if the attribute is a style
|
159
|
+
if a_name.eql?("style")
|
160
|
+
# Handle Style tags
|
161
|
+
begin
|
162
|
+
results = @css_scanner.scan_inline(a_value,name,@policy.max_input)
|
163
|
+
unless result.clean_html.empty?
|
164
|
+
valid_attributes << [a_name,results.clean_html]
|
165
|
+
end
|
166
|
+
@handler.errors << results.messages
|
167
|
+
@handler.errors.flatten!
|
168
|
+
rescue Exception => e
|
169
|
+
@handler.errors << ScanMessage.new(ScanMessage::ERROR_CSS_ATTRIBUTE_MALFORMED,name,@handler.encode_text(a_value))
|
170
|
+
end
|
171
|
+
elsif !attrib.nil? # Attribute is not nil lets check it
|
172
|
+
valid = false
|
173
|
+
attrib.values.each do |av|
|
174
|
+
if av.eql?(a_value)
|
175
|
+
valid_attributes << [a_name,a_value]
|
176
|
+
valid = true
|
177
|
+
break
|
178
|
+
end
|
179
|
+
end
|
180
|
+
unless valid
|
181
|
+
attrib.expressions.each do |ae|
|
182
|
+
mc = ae.match(a_value)
|
183
|
+
if mc and mc.to_s == a_value
|
184
|
+
valid_attributes << [a_name,a_value]
|
185
|
+
valid = true
|
186
|
+
break
|
187
|
+
end
|
188
|
+
end
|
189
|
+
end
|
190
|
+
# we check the matches
|
191
|
+
if !valid && attrib.action.eql?(Attribute::ACTION_REMOVE_TAG)
|
192
|
+
@handler.errors << ScanMessage.new(ScanMessage::ERROR_ATTRIBUTE_INVALID_REMOVED,tag.name,@handler.encode_text(a_name),@handler.encode_text(a_value))
|
193
|
+
remove_tag = true
|
194
|
+
elsif !valid && attrib.action.eql?(Attribute::ACTION_FILTER_TAG)
|
195
|
+
@handler.errors << ScanMessage.new(ScanMessage::ERROR_ATTRIBUTE_CAUSE_FILTER,tag.name,@handler.encode_text(a_name),@handler.encode_text(a_value))
|
196
|
+
filter_tag = true
|
197
|
+
elsif !valid
|
198
|
+
@handler.errors << ScanMessage.new(ScanMessage::ERROR_ATTRIBUTE_INVALID,tag.name,@handler.encode_text(a_name),@handler.encode_text(a_value))
|
199
|
+
end
|
200
|
+
|
201
|
+
else # attribute was null
|
202
|
+
@handler.errors << ScanMessage.new(ScanMessage::ERROR_ATTRIBUTE_NOT_IN_POLICY,tag.name,a_name,@handler.encode_text(a_value))
|
203
|
+
if masquerade
|
204
|
+
filter_tag = true
|
205
|
+
end
|
206
|
+
end
|
207
|
+
end # end attirubte loop
|
208
|
+
end
|
209
|
+
if remove_tag
|
210
|
+
@stack.push(:remove)
|
211
|
+
elsif filter_tag
|
212
|
+
@stack.push(:filter)
|
213
|
+
else
|
214
|
+
if name.eql?("a") and @policy.directive(Policy::ANCHORS_NOFOLLOW)
|
215
|
+
valid_attributes << ["rel","nofollow"]
|
216
|
+
end
|
217
|
+
if masquerade
|
218
|
+
valid_attributes = []
|
219
|
+
valid_attributes << ["name",embed_name]
|
220
|
+
valid_attributes << ["value",embed_value]
|
221
|
+
end
|
222
|
+
@stack.push(:keep) unless @stack.peek?(:css)
|
223
|
+
end
|
224
|
+
# End validation action
|
225
|
+
elsif tag.action.eql?(Policy::ACTION_TRUNCATE)
|
226
|
+
@stack.push(:truncate)
|
227
|
+
else
|
228
|
+
@handler.errors << ScanMessage.new(ScanMessage::ERROR_TAG_DISALLOWED,name)
|
229
|
+
@stack.push(:remove)
|
230
|
+
end
|
231
|
+
# We now know wether to keep or truncat this tag
|
232
|
+
if @stack.peek?(:truncate)
|
233
|
+
@handler.start_element(name,[])
|
234
|
+
elsif @stack.peek?(:keep)
|
235
|
+
@handler.start_element(name,valid_attributes)
|
236
|
+
end
|
237
|
+
end
|
238
|
+
|
239
|
+
def start_element_namespace(name,attrs=[],prefix = nil, uri = nil, ns = nil)
|
240
|
+
start_element(name,attrs)
|
241
|
+
end
|
242
|
+
|
243
|
+
def end_element_namespace(name,prefix,uri)
|
244
|
+
end_element(name)
|
245
|
+
end
|
246
|
+
|
247
|
+
# Add character data to the current tag
|
248
|
+
def characters(text)
|
249
|
+
unless text =~ /\S/ # skip whitespace
|
250
|
+
return unless @policy.directive(Policy::PRESERVE_SPACE)
|
251
|
+
end
|
252
|
+
if @stack.peek?(:css)
|
253
|
+
@css_content << text
|
254
|
+
elsif !@stack.peek?(:remove)
|
255
|
+
@handler.characters(text)
|
256
|
+
end
|
257
|
+
end
|
258
|
+
|
259
|
+
# End an elements, will raise an error on a loose tag
|
260
|
+
def end_element(name)
|
261
|
+
if @stack.peek?(:remove)
|
262
|
+
@stack.pop
|
263
|
+
elsif @stack.peek?(:filter)
|
264
|
+
@stack.pop
|
265
|
+
elsif @stack.peek?(:css)
|
266
|
+
@stack.pop
|
267
|
+
# Do css stuff here
|
268
|
+
begin
|
269
|
+
results = @css_scanner.scan_sheet(@css_content,@policy.max_input)
|
270
|
+
@handler.errors << results.messages
|
271
|
+
@handler.errors.flatten!
|
272
|
+
unless results.clean_html.nil? or results.clean_html.empty?
|
273
|
+
@handler.start_element(name,@css_attributes)
|
274
|
+
@handler.characters results.clean_html
|
275
|
+
@handler.end_element(name)
|
276
|
+
else
|
277
|
+
@handler.start_element(name,@css_attributes)
|
278
|
+
@handler.characters "/* */"
|
279
|
+
@handler.end_element(name)
|
280
|
+
end
|
281
|
+
rescue Exception => e
|
282
|
+
puts e
|
283
|
+
@handler.errors << ScanMessage.new(ScanMessage::ERROR_CSS_TAG_MALFORMED,name,@handler.encode_text(@css_content))
|
284
|
+
ensure
|
285
|
+
@css_content = nil
|
286
|
+
@css_attributes = nil
|
287
|
+
end
|
288
|
+
else
|
289
|
+
@stack.pop
|
290
|
+
@handler.end_element(name)
|
291
|
+
end
|
292
|
+
end
|
293
|
+
|
294
|
+
# Add cdata a cdata block
|
295
|
+
def cdata_block(text)
|
296
|
+
if @stack.peek?(:css)
|
297
|
+
@css_content << text
|
298
|
+
elsif !@stack.peek?(:remove)
|
299
|
+
@handler.characters(text)
|
300
|
+
else
|
301
|
+
@handler.cdata(@handler.encode_text(text)) unless @stack.peek == :remove
|
302
|
+
end
|
303
|
+
end
|
304
|
+
end
|
305
|
+
end
|