antisamy 0.2.1 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (51) hide show
  1. data/CHANGELOG.rdoc +13 -0
  2. data/LICENSE.txt +20 -20
  3. data/README.rdoc +41 -41
  4. data/lib/antisamy.rb +46 -46
  5. data/lib/antisamy/css/css_filter.rb +187 -187
  6. data/lib/antisamy/css/css_scanner.rb +84 -84
  7. data/lib/antisamy/css/css_validator.rb +128 -128
  8. data/lib/antisamy/csspool/rsac.rb +1 -1
  9. data/lib/antisamy/csspool/rsac/sac.rb +14 -14
  10. data/lib/antisamy/csspool/rsac/sac/conditions.rb +5 -5
  11. data/lib/antisamy/csspool/rsac/sac/conditions/attribute_condition.rb +50 -50
  12. data/lib/antisamy/csspool/rsac/sac/conditions/begin_hyphen_condition.rb +18 -18
  13. data/lib/antisamy/csspool/rsac/sac/conditions/class_condition.rb +18 -18
  14. data/lib/antisamy/csspool/rsac/sac/conditions/combinator_condition.rb +36 -36
  15. data/lib/antisamy/csspool/rsac/sac/conditions/condition.rb +29 -29
  16. data/lib/antisamy/csspool/rsac/sac/conditions/id_condition.rb +23 -23
  17. data/lib/antisamy/csspool/rsac/sac/conditions/one_of_condition.rb +18 -18
  18. data/lib/antisamy/csspool/rsac/sac/conditions/pseudo_class_condition.rb +20 -20
  19. data/lib/antisamy/csspool/rsac/sac/document_handler.rb +66 -66
  20. data/lib/antisamy/csspool/rsac/sac/error_handler.rb +13 -13
  21. data/lib/antisamy/csspool/rsac/sac/generated_parser.rb +1012 -1012
  22. data/lib/antisamy/csspool/rsac/sac/generated_property_parser.rb +9284 -9284
  23. data/lib/antisamy/csspool/rsac/sac/lexeme.rb +27 -27
  24. data/lib/antisamy/csspool/rsac/sac/lexical_unit.rb +201 -201
  25. data/lib/antisamy/csspool/rsac/sac/parse_exception.rb +4 -4
  26. data/lib/antisamy/csspool/rsac/sac/parser.rb +109 -109
  27. data/lib/antisamy/csspool/rsac/sac/property_parser.rb +44 -44
  28. data/lib/antisamy/csspool/rsac/sac/selectors.rb +5 -5
  29. data/lib/antisamy/csspool/rsac/sac/selectors/child_selector.rb +36 -36
  30. data/lib/antisamy/csspool/rsac/sac/selectors/conditional_selector.rb +45 -45
  31. data/lib/antisamy/csspool/rsac/sac/selectors/descendant_selector.rb +36 -36
  32. data/lib/antisamy/csspool/rsac/sac/selectors/element_selector.rb +35 -35
  33. data/lib/antisamy/csspool/rsac/sac/selectors/selector.rb +25 -25
  34. data/lib/antisamy/csspool/rsac/sac/selectors/sibling_selector.rb +35 -35
  35. data/lib/antisamy/csspool/rsac/sac/selectors/simple_selector.rb +21 -21
  36. data/lib/antisamy/csspool/rsac/sac/token.rb +25 -25
  37. data/lib/antisamy/csspool/rsac/sac/tokenizer.rb +185 -185
  38. data/lib/antisamy/csspool/rsac/stylesheet.rb +3 -3
  39. data/lib/antisamy/csspool/rsac/stylesheet/rule.rb +20 -20
  40. data/lib/antisamy/csspool/rsac/stylesheet/stylesheet.rb +76 -76
  41. data/lib/antisamy/html/handler.rb +112 -99
  42. data/lib/antisamy/html/sax_filter.rb +305 -302
  43. data/lib/antisamy/html/scanner.rb +47 -43
  44. data/lib/antisamy/model/attribute.rb +19 -19
  45. data/lib/antisamy/model/css_property.rb +39 -39
  46. data/lib/antisamy/model/tag.rb +31 -31
  47. data/lib/antisamy/policy.rb +577 -545
  48. data/lib/antisamy/scan_results.rb +89 -89
  49. data/spec/antisamy_spec.rb +208 -142
  50. data/spec/spec_helper.rb +12 -12
  51. metadata +79 -81
@@ -1,3 +1,3 @@
1
- require "antisamy/csspool/rsac/stylesheet/stylesheet"
2
- require "antisamy/csspool/rsac/stylesheet/rule"
3
-
1
+ require "antisamy/csspool/rsac/stylesheet/stylesheet"
2
+ require "antisamy/csspool/rsac/stylesheet/rule"
3
+
@@ -1,20 +1,20 @@
1
- require 'set'
2
- module RSAC
3
- class StyleSheet
4
- class Rule
5
- include Comparable
6
-
7
- attr_accessor :selector, :properties, :index
8
- def initialize(selector, index, properties = [])
9
- @selector = selector
10
- @properties = Set.new(properties)
11
- @index = index
12
- end
13
-
14
- def <=>(other)
15
- comp = selector.specificity <=> other.selector.specificity
16
- comp == 0 ? index <=> other.index : comp
17
- end
18
- end
19
- end
20
- end
1
+ require 'set'
2
+ module RSAC
3
+ class StyleSheet
4
+ class Rule
5
+ include Comparable
6
+
7
+ attr_accessor :selector, :properties, :index
8
+ def initialize(selector, index, properties = [])
9
+ @selector = selector
10
+ @properties = Set.new(properties)
11
+ @index = index
12
+ end
13
+
14
+ def <=>(other)
15
+ comp = selector.specificity <=> other.selector.specificity
16
+ comp == 0 ? index <=> other.index : comp
17
+ end
18
+ end
19
+ end
20
+ end
@@ -1,76 +1,76 @@
1
- module RSAC
2
- class StyleSheet < RSAC::DocumentHandler
3
- attr_reader :rules
4
-
5
- def initialize(sac)
6
- @sac = sac
7
- @rules = []
8
- @current_rules = []
9
- @selector_index = 0
10
- end
11
-
12
- def start_selector(selectors)
13
- selectors.each { |selector|
14
- @current_rules << Rule.new(selector, @selector_index)
15
- }
16
- end
17
-
18
- def end_selector(selectors)
19
- @rules += @current_rules
20
- @current_rules = []
21
- @selector_index += 1
22
- reduce!
23
- end
24
-
25
- def find_rule(rule)
26
- rule = self.create_rule(rule) if rule.is_a?(String)
27
- rules.find { |x| x.selector == rule.selector }
28
- end
29
- alias :[] :find_rule
30
-
31
- def create_rule(rule)
32
- Rule.new(@sac.parse_rule(rule).first, @selector_index += 1)
33
- end
34
-
35
- def property(name, value, important)
36
- @current_rules.each { |selector|
37
- selector.properties << [name, value, important]
38
- }
39
- end
40
-
41
- # Get a hash of rules by property
42
- def rules_by_property
43
- rules_by_property = Hash.new { |h,k| h[k] = [] }
44
- @rules.each { |sel|
45
- props = sel.properties.to_a.sort_by { |x| x.hash } # HACK?
46
- rules_by_property[props] << sel
47
- }
48
- rules_by_property
49
- end
50
-
51
- def to_css
52
- rules_by_property.map do |properties, rules|
53
- rules.map { |rule| rule.selector.to_css }.sort.join(', ') + " {\n" +
54
- properties.map { |key,value,important|
55
- # Super annoying. If the property is font-family, its supposed to
56
- # be commas
57
- join_val = ('font-family' == key) ? ', ' : ' '
58
- values = [value].flatten.join(join_val)
59
- "#{key}:#{values}#{important ? ' !important' : ''};"
60
- }.join("\n") + "\n}"
61
- end.sort.join("\n")
62
- end
63
-
64
- private
65
- # Remove duplicate rules
66
- def reduce!
67
- unique_rules = {}
68
- @rules.each do |rule|
69
- (unique_rules[rule.selector] ||= rule).properties += rule.properties
70
- end
71
- @rules = unique_rules.values
72
- self
73
- end
74
- end
75
- end
76
-
1
+ module RSAC
2
+ class StyleSheet < RSAC::DocumentHandler
3
+ attr_reader :rules
4
+
5
+ def initialize(sac)
6
+ @sac = sac
7
+ @rules = []
8
+ @current_rules = []
9
+ @selector_index = 0
10
+ end
11
+
12
+ def start_selector(selectors)
13
+ selectors.each { |selector|
14
+ @current_rules << Rule.new(selector, @selector_index)
15
+ }
16
+ end
17
+
18
+ def end_selector(selectors)
19
+ @rules += @current_rules
20
+ @current_rules = []
21
+ @selector_index += 1
22
+ reduce!
23
+ end
24
+
25
+ def find_rule(rule)
26
+ rule = self.create_rule(rule) if rule.is_a?(String)
27
+ rules.find { |x| x.selector == rule.selector }
28
+ end
29
+ alias :[] :find_rule
30
+
31
+ def create_rule(rule)
32
+ Rule.new(@sac.parse_rule(rule).first, @selector_index += 1)
33
+ end
34
+
35
+ def property(name, value, important)
36
+ @current_rules.each { |selector|
37
+ selector.properties << [name, value, important]
38
+ }
39
+ end
40
+
41
+ # Get a hash of rules by property
42
+ def rules_by_property
43
+ rules_by_property = Hash.new { |h,k| h[k] = [] }
44
+ @rules.each { |sel|
45
+ props = sel.properties.to_a.sort_by { |x| x.hash } # HACK?
46
+ rules_by_property[props] << sel
47
+ }
48
+ rules_by_property
49
+ end
50
+
51
+ def to_css
52
+ rules_by_property.map do |properties, rules|
53
+ rules.map { |rule| rule.selector.to_css }.sort.join(', ') + " {\n" +
54
+ properties.map { |key,value,important|
55
+ # Super annoying. If the property is font-family, its supposed to
56
+ # be commas
57
+ join_val = ('font-family' == key) ? ', ' : ' '
58
+ values = [value].flatten.join(join_val)
59
+ "#{key}:#{values}#{important ? ' !important' : ''};"
60
+ }.join("\n") + "\n}"
61
+ end.sort.join("\n")
62
+ end
63
+
64
+ private
65
+ # Remove duplicate rules
66
+ def reduce!
67
+ unique_rules = {}
68
+ @rules.each do |rule|
69
+ (unique_rules[rule.selector] ||= rule).properties += rule.properties
70
+ end
71
+ @rules = unique_rules.values
72
+ self
73
+ end
74
+ end
75
+ end
76
+
@@ -1,99 +1,112 @@
1
- module AntiSamy
2
-
3
- class Handler
4
-
5
- attr_accessor :errors
6
- def initialize(policy,output) #:nodoc:
7
- @document = Nokogiri::HTML::DocumentFragment.parse("")
8
- @current_node = @document
9
- @policy = policy
10
- @preserve_whitespace = @policy.directive(Policy::PRESERVE_SPACE)
11
- @errors = []
12
- @output_encoding = output
13
- end
14
-
15
- # HTML entity encode some text
16
- def encode_text(text)
17
- return "" if text.nil?
18
- @document.encode_special_chars(text)
19
- end
20
-
21
- # create a cdata section
22
- def cdata(text)
23
- node = Nokogiri::XML::CDATA.new(@document,text)
24
- @current_node.add_child(node)
25
- end
26
-
27
- # create a comment
28
- def comment(text) #:nodoc:
29
- @current_node.add_child(Nokogiri::XML::Comment.new(@document, text))
30
- end
31
-
32
- # create a text node
33
- def characters(text)
34
- node = @current_node.children.last
35
- if node and node.text?
36
- node.content += text
37
- else
38
- @current_node.add_child(Nokogiri::XML::Text.new(text, @document))
39
- end
40
- end
41
-
42
- # start an element
43
- def start_element(name,attributes)
44
- if name.eql?("head") or name.eql?("body") or name.eql?("html")
45
- return
46
- end
47
- elem = Nokogiri::XML::Element.new(name, @document)
48
- attributes.each do |attrib_pair|
49
- elem[attrib_pair.first] = attrib_pair.last
50
- end
51
- # Special param tag hacking, as libxml/nokogiri doesnt generate an end tag
52
- # for param tags it seems
53
- if name.eql?("param")
54
- inner_html = "<param"
55
- attributes.each do |attrib_pair|
56
- inner_html<< " #{attrib_pair.first}=\"#{attrib_pair.last}\""
57
- end
58
- inner_html << "/>"
59
- # we create a fake cdata node, add it *and* dont move our parent yet
60
- elem = Nokogiri::XML::CDATA.new(@document,inner_html)
61
- @current_node.add_child(elem)
62
- return
63
- end
64
- @current_node = @current_node.add_child(elem)
65
- end
66
-
67
- #end an element
68
- def end_element(name)
69
- if @current_node.nil? or !@current_node.name.eql?(name)
70
- return
71
- end
72
- @current_node = @current_node.parent if @current_node.parent
73
- end
74
-
75
- # format the output applying any policy rules
76
- def document
77
- # check some directives
78
- indent = 0
79
- options = Nokogiri::XML::Node::SaveOptions::NO_EMPTY_TAGS
80
- if @policy.directive(Policy::FORMAT_OUTPUT)
81
- options |= Nokogiri::XML::Node::SaveOptions::FORMAT
82
- indent = 2
83
- end
84
- if @policy.directive(Policy::OMIT_DOC_TYPE) || @policy.directive(Policy::OMIT_XML_DECL)
85
- options |= Nokogiri::XML::Node::SaveOptions::NO_DECLARATION
86
- end
87
-
88
- clean = ""
89
- if @policy.directive(Policy::USE_XHTML)
90
- options |= Nokogiri::XML::Node::SaveOptions::AS_XHTML
91
- clean = @document.to_xhtml(:encoding => @output_encoding, :indent=>indent,:save_with=>options)
92
- else
93
- clean = @document.to_html(:encoding => @output_encoding, :indent=>indent,:save_with=>options)
94
- end
95
- return clean
96
- end
97
-
98
- end
99
- end
1
+ module AntiSamy
2
+
3
+ class Handler
4
+
5
+ attr_accessor :errors
6
+ def initialize(policy,output,fragment = true) #:nodoc:
7
+ @document = Nokogiri::HTML::DocumentFragment.parse("")
8
+ @current_node = @document
9
+ @policy = policy
10
+ @preserve_whitespace = @policy.directive(Policy::PRESERVE_SPACE)
11
+ @errors = []
12
+ @output_encoding = output
13
+ @fragment = fragment
14
+ end
15
+
16
+ # HTML entity encode some text
17
+ def encode_text(text)
18
+ return "" if text.nil?
19
+ @document.encode_special_chars(text)
20
+ end
21
+
22
+ # create a cdata section
23
+ def cdata(text)
24
+ node = Nokogiri::XML::CDATA.new(@document,text)
25
+ @current_node.add_child(node)
26
+ end
27
+
28
+ # create a comment
29
+ def comment(text) #:nodoc:
30
+ @current_node.add_child(Nokogiri::XML::Comment.new(@document, text))
31
+ end
32
+
33
+ # create a text node
34
+ def characters(text)
35
+ node = @current_node.children.last
36
+ if node and node.text?
37
+ node.content += text
38
+ else
39
+ @current_node.add_child(Nokogiri::XML::Text.new(text, @document))
40
+ end
41
+ end
42
+
43
+ # start an element
44
+ def start_element(name,attributes)
45
+ if @fragment
46
+ if name.eql?("head") or name.eql?("body") or name.eql?("html")
47
+ return
48
+ end
49
+ end
50
+ elem = Nokogiri::XML::Element.new(name, @document)
51
+ attributes.each do |attrib_pair|
52
+ elem[attrib_pair.first] = attrib_pair.last
53
+ end
54
+ # Special param tag hacking, as libxml/nokogiri doesnt generate an end tag
55
+ # for param tags it seems
56
+ if name.eql?("param")
57
+ inner_html = "<param"
58
+ attributes.each do |attrib_pair|
59
+ inner_html<< " #{attrib_pair.first}=\"#{attrib_pair.last}\""
60
+ end
61
+ inner_html << "/>"
62
+ # we create a fake cdata node, add it *and* dont move our parent yet
63
+ elem = Nokogiri::XML::CDATA.new(@document,inner_html)
64
+ @current_node.add_child(elem)
65
+ return
66
+ end
67
+ @current_node = @current_node.add_child(elem)
68
+ end
69
+
70
+ #end an element
71
+ def end_element(name)
72
+ if @current_node.nil? or !@current_node.name.eql?(name)
73
+ return
74
+ end
75
+ if @current_node.children.empty?
76
+ if @policy.allow_empty?(@current_node.name)
77
+ @current_node = @current_node.parent if @current_node.parent
78
+ else
79
+ tnode = @current_node
80
+ @current_node = @current_node.parent if @current_node.parent
81
+ tnode.remove
82
+ end
83
+ else
84
+ @current_node = @current_node.parent if @current_node.parent
85
+ end
86
+ end
87
+
88
+ # format the output applying any policy rules
89
+ def document
90
+ # check some directives
91
+ indent = 0
92
+ options = Nokogiri::XML::Node::SaveOptions::NO_EMPTY_TAGS
93
+ if @policy.directive(Policy::FORMAT_OUTPUT)
94
+ options |= Nokogiri::XML::Node::SaveOptions::FORMAT
95
+ indent = 2
96
+ end
97
+ if @policy.directive(Policy::OMIT_DOC_TYPE) || @policy.directive(Policy::OMIT_XML_DECL)
98
+ options |= Nokogiri::XML::Node::SaveOptions::NO_DECLARATION
99
+ end
100
+
101
+ clean = ""
102
+ if @policy.directive(Policy::USE_XHTML)
103
+ options |= Nokogiri::XML::Node::SaveOptions::AS_XHTML
104
+ clean = @document.to_xhtml(:encoding => @output_encoding, :indent=>indent,:save_with=>options)
105
+ else
106
+ clean = @document.to_html(:encoding => @output_encoding, :indent=>indent,:save_with=>options)
107
+ end
108
+ return clean
109
+ end
110
+
111
+ end
112
+ end
@@ -1,302 +1,305 @@
1
- module AntiSamy
2
- # Quick and Dirty Stack class
3
- class Stack
4
- def initialize
5
- @stack = []
6
- end
7
- # push an emement ont he stack
8
- def push(v)
9
- @stack.push v
10
- end
11
- # pop an element off the stack
12
- def pop
13
- @stack.pop
14
- end
15
- # size of stack
16
- def size
17
- @stack.size
18
- end
19
- # is the stack empty
20
- def empty?
21
- @stack.empty?
22
- end
23
- # peek to see what next element is
24
- def peek?(v)
25
- return false if @stack.empty?
26
- return @stack.last.eql?(v)
27
- end
28
-
29
- def peek
30
- @stack.last
31
- end
32
-
33
- end
34
-
35
- class SaxFilter < Nokogiri::XML::SAX::Document
36
- def initialize(policy,handler,param_tag)
37
- @policy = policy
38
- @handler = handler
39
- @stack = Stack.new
40
- @css_content = nil
41
- @css_attributes = nil
42
- @css_scanner = CssScanner.new(policy)
43
- @param_tag = param_tag
44
- end
45
-
46
- def error(text)
47
- end
48
-
49
- def warning(text)
50
- end
51
-
52
- # Always create a HTML document unless the DECL was set beforehand
53
- def start_document
54
- end
55
-
56
- # Add a comment block
57
- def comment(text)
58
- return if text.nil?
59
- if @policy.directive(Policy::PRESERVE_COMMENTS) =~ /true/i
60
- # Strip out conditional directives
61
- text.gsub!(%r{<!?!\[(?:end)?if*\]}ixm,"")
62
- text.gsub!(%r{\[(?:if).*\]>},"")
63
- @handler.comment(text)
64
- end
65
- end
66
-
67
- def convert_array(x)
68
- if x and x.first.is_a?(Array)
69
- return x
70
- end
71
- i = 0
72
- h = []
73
- while i < x.size
74
- m = []
75
- m[0] = x[i]
76
- m[1] = x[i+1]
77
- h << m
78
- i += 2
79
- end
80
- h
81
- end
82
-
83
- def fetch_attribute(array,key)
84
- array.each do |pair|
85
- if pair.first.eql?(key)
86
- return pair.last
87
- end
88
- end
89
- nil
90
- end
91
-
92
- # Start an element,
93
- def start_element(name, attributes = [])
94
- attributes = convert_array(attributes)
95
- o_attributes = attributes.dup
96
- tag = @policy.tag(name)
97
- masquerade = false
98
- embed_name = nil
99
- embed_value = nil
100
- # Handle validate param tag as an embed tag
101
- if tag.nil? && @policy.directive(Policy::VALIDATE_P_AS_E) && name.eql?("param")
102
- embed = @param_tag
103
- if @policy.tag("embed")
104
- embed = @policy.tag("embed")
105
- end
106
- if embed and embed.action == Policy::ACTION_VALIDATE
107
- tag = embed
108
- masquerade = true
109
- embed_name = fetch_attribute(attributes,"name")
110
- embed_value = fetch_attribute(attributes,"value")
111
- attributes = [ [embed_name,embed_value] ]
112
- end
113
- end
114
- valid_attributes = []
115
- if @stack.peek?(:css) or @stack.peek?(:remove)
116
- # We are in remove mode to remove this tag as well as any child style elements if css mode
117
- @stack.push(:remove)
118
- elsif (tag.nil? && @policy.directive(Policy::ON_UNKNOWN_TAG).eql?("encode")) or (!tag.nil? && tag.action.eql?(Policy::ACTION_ENCODE)) or @policy.encode?(name.downcase)
119
- tmp = "<#{name}>"
120
- @handler.characters(tmp)
121
- @stack.push(:filter)
122
- elsif tag.nil?
123
- # We ignore missing HTML and BODY tags since we are fragment parsing, but the
124
- # Nokogiri HTML::SAX parser injects HTML/BODY if they are missing
125
- unless name.eql?("html") or name.eql?("body")
126
- @handler.errors << ScanMessage.new(ScanMessage::ERROR_TAG_NOT_IN_POLICY,name)
127
- end
128
- # Nokogiri work around for a style tag being auto inserted inot head
129
- if name.eql?("head")
130
- @stack.push(:remove)
131
- else
132
- @stack.push(:filter)
133
- end
134
- elsif tag.action.eql?(Policy::ACTION_FILTER)
135
- @handler.errors << ScanMessage.new(ScanMessage::ERROR_TAG_FILTERED,name)
136
- @stack.push(:filter)
137
- elsif tag.action.eql?(Policy::ACTION_VALIDATE)
138
- # Handle validation
139
- remove_tag = false
140
- filter_tag = false
141
- is_style = name.include?("style")
142
- if is_style
143
- @stack.push(:css)
144
- @css_content = ''
145
- @css_attributes = []
146
- else
147
- # Validate attributes
148
- attributes.each do |pair|
149
- a_name = pair.first
150
- a_value = pair.last
151
- attrib = tag.attribute(a_name.downcase)
152
- if attrib.nil?
153
- attrib = @policy.global(a_name.downcase)
154
- end
155
- # check if the attribute is a style
156
- if a_name.eql?("style")
157
- # Handle Style tags
158
- begin
159
- results = @css_scanner.scan_inline(a_value,name,@policy.max_input)
160
- unless result.clean_html.empty?
161
- valid_attributes << [a_name,results.clean_html]
162
- end
163
- @handler.errors << results.messages
164
- @handler.errors.flatten!
165
- rescue Exception => e
166
- @handler.errors << ScanMessage.new(ScanMessage::ERROR_CSS_ATTRIBUTE_MALFORMED,name,@handler.encode_text(a_value))
167
- end
168
- elsif !attrib.nil? # Attribute is not nil lets check it
169
- valid = false
170
- attrib.values.each do |av|
171
- if av.eql?(a_value)
172
- valid_attributes << [a_name,a_value]
173
- valid = true
174
- break
175
- end
176
- end
177
- unless valid
178
- attrib.expressions.each do |ae|
179
- mc = ae.match(a_value)
180
- if mc and mc.to_s == a_value
181
- valid_attributes << [a_name,a_value]
182
- valid = true
183
- break
184
- end
185
- end
186
- end
187
- # we check the matches
188
- if !valid && attrib.action.eql?(Attribute::ACTION_REMOVE_TAG)
189
- @handler.errors << ScanMessage.new(ScanMessage::ERROR_ATTRIBUTE_INVALID_REMOVED,tag.name,@handler.encode_text(a_name),@handler.encode_text(a_value))
190
- remove_tag = true
191
- elsif !valid && attrib.action.eql?(Attribute::ACTION_FILTER_TAG)
192
- @handler.errors << ScanMessage.new(ScanMessage::ERROR_ATTRIBUTE_CAUSE_FILTER,tag.name,@handler.encode_text(a_name),@handler.encode_text(a_value))
193
- filter_tag = true
194
- elsif !valid
195
- @handler.errors << ScanMessage.new(ScanMessage::ERROR_ATTRIBUTE_INVALID,tag.name,@handler.encode_text(a_name),@handler.encode_text(a_value))
196
- end
197
-
198
- else # attribute was null
199
- @handler.errors << ScanMessage.new(ScanMessage::ERROR_ATTRIBUTE_NOT_IN_POLICY,tag.name,a_name,@handler.encode_text(a_value))
200
- if masquerade
201
- filter_tag = true
202
- end
203
- end
204
- end # end attirubte loop
205
- end
206
- if remove_tag
207
- @stack.push(:remove)
208
- elsif filter_tag
209
- @stack.push(:filter)
210
- else
211
- if name.eql?("a") and @policy.directive(Policy::ANCHROS_NOFOLLOW)
212
- valid_attributes << ["rel","nofollow"]
213
- end
214
- if masquerade
215
- valid_attributes = []
216
- valid_attributes << ["name",embed_name]
217
- valid_attributes << ["value",embed_value]
218
- end
219
- @stack.push(:keep) unless @stack.peek?(:css)
220
- end
221
- # End validation action
222
- elsif tag.action.eql?(Policy::ACTION_TRUNCATE)
223
- @stack.push(:truncate)
224
- else
225
- @handler.errors << ScanMessage.new(ScanMessage::ERROR_TAG_DISALLOWED,name)
226
- @stack.push(:remove)
227
- end
228
- # We now know wether to keep or truncat this tag
229
- if @stack.peek?(:truncate)
230
- @handler.start_element(name,[])
231
- elsif @stack.peek?(:keep)
232
- @handler.start_element(name,valid_attributes)
233
- end
234
- end
235
-
236
- def start_element_namespace(name,attrs=[],prefix = nil, uri = nil, ns = nil)
237
- start_element(name,attrs)
238
- end
239
-
240
- def end_element_namespace(name,prefix,uri)
241
- end_element(name)
242
- end
243
-
244
- # Add character data to the current tag
245
- def characters(text)
246
- unless text =~ /\S/ # skip whitespace
247
- return unless @policy.directive(Policy::PRESERVE_SPACE)
248
- end
249
- if @stack.peek?(:css)
250
- @css_content << text
251
- elsif !@stack.peek?(:remove)
252
- @handler.characters(text)
253
- end
254
- end
255
-
256
- # End an elements, will raise an error on a loose tag
257
- def end_element(name)
258
- if @stack.peek?(:remove)
259
- @stack.pop
260
- elsif @stack.peek?(:filter)
261
- @stack.pop
262
- elsif @stack.peek?(:css)
263
- @stack.pop
264
- # Do css stuff here
265
- begin
266
- results = @css_scanner.scan_sheet(@css_content,@policy.max_input)
267
- @handler.errors << results.messages
268
- @handler.errors.flatten!
269
- unless results.clean_html.nil? or results.clean_html.empty?
270
- @handler.start_element(name,@css_attributes)
271
- @handler.characters results.clean_html
272
- @handler.end_element(name)
273
- else
274
- @handler.start_element(name,@css_attributes)
275
- @handler.characters "/* */"
276
- @handler.end_element(name)
277
- end
278
- rescue Exception => e
279
- puts e
280
- @handler.errors << ScanMessage.new(ScanMessage::ERROR_CSS_TAG_MALFORMED,name,@handler.encode_text(@css_content))
281
- ensure
282
- @css_content = nil
283
- @css_attributes = nil
284
- end
285
- else
286
- @stack.pop
287
- @handler.end_element(name)
288
- end
289
- end
290
-
291
- # Add cdata a cdata block
292
- def cdata_block(text)
293
- if @stack.peek?(:css)
294
- @css_content << text
295
- elsif !@stack.peek?(:remove)
296
- @handler.characters(text)
297
- else
298
- @handler.cdata(@handler.encode_text(text)) unless @stack.peek == :remove
299
- end
300
- end
301
- end
302
- end
1
+ module AntiSamy
2
+ # Quick and Dirty Stack class
3
+ class Stack
4
+ def initialize
5
+ @stack = []
6
+ end
7
+ # push an emement ont he stack
8
+ def push(v)
9
+ @stack.push v
10
+ end
11
+ # pop an element off the stack
12
+ def pop
13
+ @stack.pop
14
+ end
15
+ # size of stack
16
+ def size
17
+ @stack.size
18
+ end
19
+ # is the stack empty
20
+ def empty?
21
+ @stack.empty?
22
+ end
23
+ # peek to see what next element is
24
+ def peek?(v)
25
+ return false if @stack.empty?
26
+ return @stack.last.eql?(v)
27
+ end
28
+
29
+ def peek
30
+ @stack.last
31
+ end
32
+
33
+ end
34
+
35
+ class SaxFilter < Nokogiri::XML::SAX::Document
36
+ def initialize(policy,handler,param_tag,fragment = true)
37
+ @policy = policy
38
+ @handler = handler
39
+ @stack = Stack.new
40
+ @css_content = nil
41
+ @css_attributes = nil
42
+ @css_scanner = CssScanner.new(policy)
43
+ @param_tag = param_tag
44
+ @fragment = fragment
45
+ end
46
+
47
+ def error(text)
48
+ end
49
+
50
+ def warning(text)
51
+ end
52
+
53
+ # Always create a HTML document unless the DECL was set beforehand
54
+ def start_document
55
+ end
56
+
57
+ # Add a comment block
58
+ def comment(text)
59
+ return if text.nil?
60
+ if @policy.directive(Policy::PRESERVE_COMMENTS) =~ /true/i
61
+ # Strip out conditional directives
62
+ text.gsub!(%r{<!?!\[(?:end)?if*\]}ixm,"")
63
+ text.gsub!(%r{\[(?:if).*\]>},"")
64
+ @handler.comment(text)
65
+ end
66
+ end
67
+
68
+ def convert_array(x)
69
+ if x and x.first.is_a?(Array)
70
+ return x
71
+ end
72
+ i = 0
73
+ h = []
74
+ while i < x.size
75
+ m = []
76
+ m[0] = x[i]
77
+ m[1] = x[i+1]
78
+ h << m
79
+ i += 2
80
+ end
81
+ h
82
+ end
83
+
84
+ def fetch_attribute(array,key)
85
+ array.each do |pair|
86
+ if pair.first.eql?(key)
87
+ return pair.last
88
+ end
89
+ end
90
+ nil
91
+ end
92
+
93
+ # Start an element,
94
+ def start_element(name, attributes = [])
95
+ attributes = convert_array(attributes)
96
+ o_attributes = attributes.dup
97
+ tag = @policy.tag(name)
98
+ masquerade = false
99
+ embed_name = nil
100
+ embed_value = nil
101
+ # Handle validate param tag as an embed tag
102
+ if tag.nil? && @policy.directive(Policy::VALIDATE_P_AS_E) && name.eql?("param")
103
+ embed = @param_tag
104
+ if @policy.tag("embed")
105
+ embed = @policy.tag("embed")
106
+ end
107
+ if embed and embed.action == Policy::ACTION_VALIDATE
108
+ tag = embed
109
+ masquerade = true
110
+ embed_name = fetch_attribute(attributes,"name")
111
+ embed_value = fetch_attribute(attributes,"value")
112
+ attributes = [ [embed_name,embed_value] ]
113
+ end
114
+ end
115
+ valid_attributes = []
116
+ if @stack.peek?(:css) or @stack.peek?(:remove)
117
+ # We are in remove mode to remove this tag as well as any child style elements if css mode
118
+ @stack.push(:remove)
119
+ elsif (tag.nil? && @policy.directive(Policy::ON_UNKNOWN_TAG).eql?("encode")) or (!tag.nil? && tag.action.eql?(Policy::ACTION_ENCODE)) or @policy.encode?(name.downcase)
120
+ tmp = "<#{name}>"
121
+ @handler.characters(tmp)
122
+ @stack.push(:filter)
123
+ elsif tag.nil?
124
+ # We ignore missing HTML and BODY tags since we are fragment parsing, but the
125
+ # Nokogiri HTML::SAX parser injects HTML/BODY if they are missing
126
+ if @fragment
127
+ unless name.eql?("html") or name.eql?("body")
128
+ @handler.errors << ScanMessage.new(ScanMessage::ERROR_TAG_NOT_IN_POLICY,name)
129
+ end
130
+ # Nokogiri work around for a style tag being auto inserted inot head
131
+ end
132
+ if name.eql?("head") && @fragment
133
+ @stack.push(:remove)
134
+ else
135
+ @stack.push(:filter)
136
+ end
137
+ elsif tag.action.eql?(Policy::ACTION_FILTER)
138
+ @handler.errors << ScanMessage.new(ScanMessage::ERROR_TAG_FILTERED,name)
139
+ @stack.push(:filter)
140
+ elsif tag.action.eql?(Policy::ACTION_VALIDATE)
141
+ # Handle validation
142
+ remove_tag = false
143
+ filter_tag = false
144
+ is_style = name.include?("style")
145
+ if is_style
146
+ @stack.push(:css)
147
+ @css_content = ''
148
+ @css_attributes = []
149
+ else
150
+ # Validate attributes
151
+ attributes.each do |pair|
152
+ a_name = pair.first
153
+ a_value = pair.last
154
+ attrib = tag.attribute(a_name.downcase)
155
+ if attrib.nil?
156
+ attrib = @policy.global(a_name.downcase)
157
+ end
158
+ # check if the attribute is a style
159
+ if a_name.eql?("style")
160
+ # Handle Style tags
161
+ begin
162
+ results = @css_scanner.scan_inline(a_value,name,@policy.max_input)
163
+ unless result.clean_html.empty?
164
+ valid_attributes << [a_name,results.clean_html]
165
+ end
166
+ @handler.errors << results.messages
167
+ @handler.errors.flatten!
168
+ rescue Exception => e
169
+ @handler.errors << ScanMessage.new(ScanMessage::ERROR_CSS_ATTRIBUTE_MALFORMED,name,@handler.encode_text(a_value))
170
+ end
171
+ elsif !attrib.nil? # Attribute is not nil lets check it
172
+ valid = false
173
+ attrib.values.each do |av|
174
+ if av.eql?(a_value)
175
+ valid_attributes << [a_name,a_value]
176
+ valid = true
177
+ break
178
+ end
179
+ end
180
+ unless valid
181
+ attrib.expressions.each do |ae|
182
+ mc = ae.match(a_value)
183
+ if mc and mc.to_s == a_value
184
+ valid_attributes << [a_name,a_value]
185
+ valid = true
186
+ break
187
+ end
188
+ end
189
+ end
190
+ # we check the matches
191
+ if !valid && attrib.action.eql?(Attribute::ACTION_REMOVE_TAG)
192
+ @handler.errors << ScanMessage.new(ScanMessage::ERROR_ATTRIBUTE_INVALID_REMOVED,tag.name,@handler.encode_text(a_name),@handler.encode_text(a_value))
193
+ remove_tag = true
194
+ elsif !valid && attrib.action.eql?(Attribute::ACTION_FILTER_TAG)
195
+ @handler.errors << ScanMessage.new(ScanMessage::ERROR_ATTRIBUTE_CAUSE_FILTER,tag.name,@handler.encode_text(a_name),@handler.encode_text(a_value))
196
+ filter_tag = true
197
+ elsif !valid
198
+ @handler.errors << ScanMessage.new(ScanMessage::ERROR_ATTRIBUTE_INVALID,tag.name,@handler.encode_text(a_name),@handler.encode_text(a_value))
199
+ end
200
+
201
+ else # attribute was null
202
+ @handler.errors << ScanMessage.new(ScanMessage::ERROR_ATTRIBUTE_NOT_IN_POLICY,tag.name,a_name,@handler.encode_text(a_value))
203
+ if masquerade
204
+ filter_tag = true
205
+ end
206
+ end
207
+ end # end attirubte loop
208
+ end
209
+ if remove_tag
210
+ @stack.push(:remove)
211
+ elsif filter_tag
212
+ @stack.push(:filter)
213
+ else
214
+ if name.eql?("a") and @policy.directive(Policy::ANCHORS_NOFOLLOW)
215
+ valid_attributes << ["rel","nofollow"]
216
+ end
217
+ if masquerade
218
+ valid_attributes = []
219
+ valid_attributes << ["name",embed_name]
220
+ valid_attributes << ["value",embed_value]
221
+ end
222
+ @stack.push(:keep) unless @stack.peek?(:css)
223
+ end
224
+ # End validation action
225
+ elsif tag.action.eql?(Policy::ACTION_TRUNCATE)
226
+ @stack.push(:truncate)
227
+ else
228
+ @handler.errors << ScanMessage.new(ScanMessage::ERROR_TAG_DISALLOWED,name)
229
+ @stack.push(:remove)
230
+ end
231
+ # We now know wether to keep or truncat this tag
232
+ if @stack.peek?(:truncate)
233
+ @handler.start_element(name,[])
234
+ elsif @stack.peek?(:keep)
235
+ @handler.start_element(name,valid_attributes)
236
+ end
237
+ end
238
+
239
+ def start_element_namespace(name,attrs=[],prefix = nil, uri = nil, ns = nil)
240
+ start_element(name,attrs)
241
+ end
242
+
243
+ def end_element_namespace(name,prefix,uri)
244
+ end_element(name)
245
+ end
246
+
247
+ # Add character data to the current tag
248
+ def characters(text)
249
+ unless text =~ /\S/ # skip whitespace
250
+ return unless @policy.directive(Policy::PRESERVE_SPACE)
251
+ end
252
+ if @stack.peek?(:css)
253
+ @css_content << text
254
+ elsif !@stack.peek?(:remove)
255
+ @handler.characters(text)
256
+ end
257
+ end
258
+
259
+ # End an elements, will raise an error on a loose tag
260
+ def end_element(name)
261
+ if @stack.peek?(:remove)
262
+ @stack.pop
263
+ elsif @stack.peek?(:filter)
264
+ @stack.pop
265
+ elsif @stack.peek?(:css)
266
+ @stack.pop
267
+ # Do css stuff here
268
+ begin
269
+ results = @css_scanner.scan_sheet(@css_content,@policy.max_input)
270
+ @handler.errors << results.messages
271
+ @handler.errors.flatten!
272
+ unless results.clean_html.nil? or results.clean_html.empty?
273
+ @handler.start_element(name,@css_attributes)
274
+ @handler.characters results.clean_html
275
+ @handler.end_element(name)
276
+ else
277
+ @handler.start_element(name,@css_attributes)
278
+ @handler.characters "/* */"
279
+ @handler.end_element(name)
280
+ end
281
+ rescue Exception => e
282
+ puts e
283
+ @handler.errors << ScanMessage.new(ScanMessage::ERROR_CSS_TAG_MALFORMED,name,@handler.encode_text(@css_content))
284
+ ensure
285
+ @css_content = nil
286
+ @css_attributes = nil
287
+ end
288
+ else
289
+ @stack.pop
290
+ @handler.end_element(name)
291
+ end
292
+ end
293
+
294
+ # Add cdata a cdata block
295
+ def cdata_block(text)
296
+ if @stack.peek?(:css)
297
+ @css_content << text
298
+ elsif !@stack.peek?(:remove)
299
+ @handler.characters(text)
300
+ else
301
+ @handler.cdata(@handler.encode_text(text)) unless @stack.peek == :remove
302
+ end
303
+ end
304
+ end
305
+ end