antisamy 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. data/CHANGELOG.rdoc +13 -0
  2. data/LICENSE.txt +20 -20
  3. data/README.rdoc +41 -41
  4. data/lib/antisamy.rb +46 -46
  5. data/lib/antisamy/css/css_filter.rb +187 -187
  6. data/lib/antisamy/css/css_scanner.rb +84 -84
  7. data/lib/antisamy/css/css_validator.rb +128 -128
  8. data/lib/antisamy/csspool/rsac.rb +1 -1
  9. data/lib/antisamy/csspool/rsac/sac.rb +14 -14
  10. data/lib/antisamy/csspool/rsac/sac/conditions.rb +5 -5
  11. data/lib/antisamy/csspool/rsac/sac/conditions/attribute_condition.rb +50 -50
  12. data/lib/antisamy/csspool/rsac/sac/conditions/begin_hyphen_condition.rb +18 -18
  13. data/lib/antisamy/csspool/rsac/sac/conditions/class_condition.rb +18 -18
  14. data/lib/antisamy/csspool/rsac/sac/conditions/combinator_condition.rb +36 -36
  15. data/lib/antisamy/csspool/rsac/sac/conditions/condition.rb +29 -29
  16. data/lib/antisamy/csspool/rsac/sac/conditions/id_condition.rb +23 -23
  17. data/lib/antisamy/csspool/rsac/sac/conditions/one_of_condition.rb +18 -18
  18. data/lib/antisamy/csspool/rsac/sac/conditions/pseudo_class_condition.rb +20 -20
  19. data/lib/antisamy/csspool/rsac/sac/document_handler.rb +66 -66
  20. data/lib/antisamy/csspool/rsac/sac/error_handler.rb +13 -13
  21. data/lib/antisamy/csspool/rsac/sac/generated_parser.rb +1012 -1012
  22. data/lib/antisamy/csspool/rsac/sac/generated_property_parser.rb +9284 -9284
  23. data/lib/antisamy/csspool/rsac/sac/lexeme.rb +27 -27
  24. data/lib/antisamy/csspool/rsac/sac/lexical_unit.rb +201 -201
  25. data/lib/antisamy/csspool/rsac/sac/parse_exception.rb +4 -4
  26. data/lib/antisamy/csspool/rsac/sac/parser.rb +109 -109
  27. data/lib/antisamy/csspool/rsac/sac/property_parser.rb +44 -44
  28. data/lib/antisamy/csspool/rsac/sac/selectors.rb +5 -5
  29. data/lib/antisamy/csspool/rsac/sac/selectors/child_selector.rb +36 -36
  30. data/lib/antisamy/csspool/rsac/sac/selectors/conditional_selector.rb +45 -45
  31. data/lib/antisamy/csspool/rsac/sac/selectors/descendant_selector.rb +36 -36
  32. data/lib/antisamy/csspool/rsac/sac/selectors/element_selector.rb +35 -35
  33. data/lib/antisamy/csspool/rsac/sac/selectors/selector.rb +25 -25
  34. data/lib/antisamy/csspool/rsac/sac/selectors/sibling_selector.rb +35 -35
  35. data/lib/antisamy/csspool/rsac/sac/selectors/simple_selector.rb +21 -21
  36. data/lib/antisamy/csspool/rsac/sac/token.rb +25 -25
  37. data/lib/antisamy/csspool/rsac/sac/tokenizer.rb +185 -185
  38. data/lib/antisamy/csspool/rsac/stylesheet.rb +3 -3
  39. data/lib/antisamy/csspool/rsac/stylesheet/rule.rb +20 -20
  40. data/lib/antisamy/csspool/rsac/stylesheet/stylesheet.rb +76 -76
  41. data/lib/antisamy/html/handler.rb +112 -99
  42. data/lib/antisamy/html/sax_filter.rb +305 -302
  43. data/lib/antisamy/html/scanner.rb +47 -43
  44. data/lib/antisamy/model/attribute.rb +19 -19
  45. data/lib/antisamy/model/css_property.rb +39 -39
  46. data/lib/antisamy/model/tag.rb +31 -31
  47. data/lib/antisamy/policy.rb +577 -545
  48. data/lib/antisamy/scan_results.rb +89 -89
  49. data/spec/antisamy_spec.rb +208 -142
  50. data/spec/spec_helper.rb +12 -12
  51. metadata +79 -81
@@ -1,3 +1,3 @@
1
- require "antisamy/csspool/rsac/stylesheet/stylesheet"
2
- require "antisamy/csspool/rsac/stylesheet/rule"
3
-
1
+ require "antisamy/csspool/rsac/stylesheet/stylesheet"
2
+ require "antisamy/csspool/rsac/stylesheet/rule"
3
+
@@ -1,20 +1,20 @@
1
- require 'set'
2
- module RSAC
3
- class StyleSheet
4
- class Rule
5
- include Comparable
6
-
7
- attr_accessor :selector, :properties, :index
8
- def initialize(selector, index, properties = [])
9
- @selector = selector
10
- @properties = Set.new(properties)
11
- @index = index
12
- end
13
-
14
- def <=>(other)
15
- comp = selector.specificity <=> other.selector.specificity
16
- comp == 0 ? index <=> other.index : comp
17
- end
18
- end
19
- end
20
- end
1
+ require 'set'
2
+ module RSAC
3
+ class StyleSheet
4
+ class Rule
5
+ include Comparable
6
+
7
+ attr_accessor :selector, :properties, :index
8
+ def initialize(selector, index, properties = [])
9
+ @selector = selector
10
+ @properties = Set.new(properties)
11
+ @index = index
12
+ end
13
+
14
+ def <=>(other)
15
+ comp = selector.specificity <=> other.selector.specificity
16
+ comp == 0 ? index <=> other.index : comp
17
+ end
18
+ end
19
+ end
20
+ end
@@ -1,76 +1,76 @@
1
- module RSAC
2
- class StyleSheet < RSAC::DocumentHandler
3
- attr_reader :rules
4
-
5
- def initialize(sac)
6
- @sac = sac
7
- @rules = []
8
- @current_rules = []
9
- @selector_index = 0
10
- end
11
-
12
- def start_selector(selectors)
13
- selectors.each { |selector|
14
- @current_rules << Rule.new(selector, @selector_index)
15
- }
16
- end
17
-
18
- def end_selector(selectors)
19
- @rules += @current_rules
20
- @current_rules = []
21
- @selector_index += 1
22
- reduce!
23
- end
24
-
25
- def find_rule(rule)
26
- rule = self.create_rule(rule) if rule.is_a?(String)
27
- rules.find { |x| x.selector == rule.selector }
28
- end
29
- alias :[] :find_rule
30
-
31
- def create_rule(rule)
32
- Rule.new(@sac.parse_rule(rule).first, @selector_index += 1)
33
- end
34
-
35
- def property(name, value, important)
36
- @current_rules.each { |selector|
37
- selector.properties << [name, value, important]
38
- }
39
- end
40
-
41
- # Get a hash of rules by property
42
- def rules_by_property
43
- rules_by_property = Hash.new { |h,k| h[k] = [] }
44
- @rules.each { |sel|
45
- props = sel.properties.to_a.sort_by { |x| x.hash } # HACK?
46
- rules_by_property[props] << sel
47
- }
48
- rules_by_property
49
- end
50
-
51
- def to_css
52
- rules_by_property.map do |properties, rules|
53
- rules.map { |rule| rule.selector.to_css }.sort.join(', ') + " {\n" +
54
- properties.map { |key,value,important|
55
- # Super annoying. If the property is font-family, its supposed to
56
- # be commas
57
- join_val = ('font-family' == key) ? ', ' : ' '
58
- values = [value].flatten.join(join_val)
59
- "#{key}:#{values}#{important ? ' !important' : ''};"
60
- }.join("\n") + "\n}"
61
- end.sort.join("\n")
62
- end
63
-
64
- private
65
- # Remove duplicate rules
66
- def reduce!
67
- unique_rules = {}
68
- @rules.each do |rule|
69
- (unique_rules[rule.selector] ||= rule).properties += rule.properties
70
- end
71
- @rules = unique_rules.values
72
- self
73
- end
74
- end
75
- end
76
-
1
+ module RSAC
2
+ class StyleSheet < RSAC::DocumentHandler
3
+ attr_reader :rules
4
+
5
+ def initialize(sac)
6
+ @sac = sac
7
+ @rules = []
8
+ @current_rules = []
9
+ @selector_index = 0
10
+ end
11
+
12
+ def start_selector(selectors)
13
+ selectors.each { |selector|
14
+ @current_rules << Rule.new(selector, @selector_index)
15
+ }
16
+ end
17
+
18
+ def end_selector(selectors)
19
+ @rules += @current_rules
20
+ @current_rules = []
21
+ @selector_index += 1
22
+ reduce!
23
+ end
24
+
25
+ def find_rule(rule)
26
+ rule = self.create_rule(rule) if rule.is_a?(String)
27
+ rules.find { |x| x.selector == rule.selector }
28
+ end
29
+ alias :[] :find_rule
30
+
31
+ def create_rule(rule)
32
+ Rule.new(@sac.parse_rule(rule).first, @selector_index += 1)
33
+ end
34
+
35
+ def property(name, value, important)
36
+ @current_rules.each { |selector|
37
+ selector.properties << [name, value, important]
38
+ }
39
+ end
40
+
41
+ # Get a hash of rules by property
42
+ def rules_by_property
43
+ rules_by_property = Hash.new { |h,k| h[k] = [] }
44
+ @rules.each { |sel|
45
+ props = sel.properties.to_a.sort_by { |x| x.hash } # HACK?
46
+ rules_by_property[props] << sel
47
+ }
48
+ rules_by_property
49
+ end
50
+
51
+ def to_css
52
+ rules_by_property.map do |properties, rules|
53
+ rules.map { |rule| rule.selector.to_css }.sort.join(', ') + " {\n" +
54
+ properties.map { |key,value,important|
55
+ # Super annoying. If the property is font-family, its supposed to
56
+ # be commas
57
+ join_val = ('font-family' == key) ? ', ' : ' '
58
+ values = [value].flatten.join(join_val)
59
+ "#{key}:#{values}#{important ? ' !important' : ''};"
60
+ }.join("\n") + "\n}"
61
+ end.sort.join("\n")
62
+ end
63
+
64
+ private
65
+ # Remove duplicate rules
66
+ def reduce!
67
+ unique_rules = {}
68
+ @rules.each do |rule|
69
+ (unique_rules[rule.selector] ||= rule).properties += rule.properties
70
+ end
71
+ @rules = unique_rules.values
72
+ self
73
+ end
74
+ end
75
+ end
76
+
@@ -1,99 +1,112 @@
1
- module AntiSamy
2
-
3
- class Handler
4
-
5
- attr_accessor :errors
6
- def initialize(policy,output) #:nodoc:
7
- @document = Nokogiri::HTML::DocumentFragment.parse("")
8
- @current_node = @document
9
- @policy = policy
10
- @preserve_whitespace = @policy.directive(Policy::PRESERVE_SPACE)
11
- @errors = []
12
- @output_encoding = output
13
- end
14
-
15
- # HTML entity encode some text
16
- def encode_text(text)
17
- return "" if text.nil?
18
- @document.encode_special_chars(text)
19
- end
20
-
21
- # create a cdata section
22
- def cdata(text)
23
- node = Nokogiri::XML::CDATA.new(@document,text)
24
- @current_node.add_child(node)
25
- end
26
-
27
- # create a comment
28
- def comment(text) #:nodoc:
29
- @current_node.add_child(Nokogiri::XML::Comment.new(@document, text))
30
- end
31
-
32
- # create a text node
33
- def characters(text)
34
- node = @current_node.children.last
35
- if node and node.text?
36
- node.content += text
37
- else
38
- @current_node.add_child(Nokogiri::XML::Text.new(text, @document))
39
- end
40
- end
41
-
42
- # start an element
43
- def start_element(name,attributes)
44
- if name.eql?("head") or name.eql?("body") or name.eql?("html")
45
- return
46
- end
47
- elem = Nokogiri::XML::Element.new(name, @document)
48
- attributes.each do |attrib_pair|
49
- elem[attrib_pair.first] = attrib_pair.last
50
- end
51
- # Special param tag hacking, as libxml/nokogiri doesnt generate an end tag
52
- # for param tags it seems
53
- if name.eql?("param")
54
- inner_html = "<param"
55
- attributes.each do |attrib_pair|
56
- inner_html<< " #{attrib_pair.first}=\"#{attrib_pair.last}\""
57
- end
58
- inner_html << "/>"
59
- # we create a fake cdata node, add it *and* dont move our parent yet
60
- elem = Nokogiri::XML::CDATA.new(@document,inner_html)
61
- @current_node.add_child(elem)
62
- return
63
- end
64
- @current_node = @current_node.add_child(elem)
65
- end
66
-
67
- #end an element
68
- def end_element(name)
69
- if @current_node.nil? or !@current_node.name.eql?(name)
70
- return
71
- end
72
- @current_node = @current_node.parent if @current_node.parent
73
- end
74
-
75
- # format the output applying any policy rules
76
- def document
77
- # check some directives
78
- indent = 0
79
- options = Nokogiri::XML::Node::SaveOptions::NO_EMPTY_TAGS
80
- if @policy.directive(Policy::FORMAT_OUTPUT)
81
- options |= Nokogiri::XML::Node::SaveOptions::FORMAT
82
- indent = 2
83
- end
84
- if @policy.directive(Policy::OMIT_DOC_TYPE) || @policy.directive(Policy::OMIT_XML_DECL)
85
- options |= Nokogiri::XML::Node::SaveOptions::NO_DECLARATION
86
- end
87
-
88
- clean = ""
89
- if @policy.directive(Policy::USE_XHTML)
90
- options |= Nokogiri::XML::Node::SaveOptions::AS_XHTML
91
- clean = @document.to_xhtml(:encoding => @output_encoding, :indent=>indent,:save_with=>options)
92
- else
93
- clean = @document.to_html(:encoding => @output_encoding, :indent=>indent,:save_with=>options)
94
- end
95
- return clean
96
- end
97
-
98
- end
99
- end
1
+ module AntiSamy
2
+
3
+ class Handler
4
+
5
+ attr_accessor :errors
6
+ def initialize(policy,output,fragment = true) #:nodoc:
7
+ @document = Nokogiri::HTML::DocumentFragment.parse("")
8
+ @current_node = @document
9
+ @policy = policy
10
+ @preserve_whitespace = @policy.directive(Policy::PRESERVE_SPACE)
11
+ @errors = []
12
+ @output_encoding = output
13
+ @fragment = fragment
14
+ end
15
+
16
+ # HTML entity encode some text
17
+ def encode_text(text)
18
+ return "" if text.nil?
19
+ @document.encode_special_chars(text)
20
+ end
21
+
22
+ # create a cdata section
23
+ def cdata(text)
24
+ node = Nokogiri::XML::CDATA.new(@document,text)
25
+ @current_node.add_child(node)
26
+ end
27
+
28
+ # create a comment
29
+ def comment(text) #:nodoc:
30
+ @current_node.add_child(Nokogiri::XML::Comment.new(@document, text))
31
+ end
32
+
33
+ # create a text node
34
+ def characters(text)
35
+ node = @current_node.children.last
36
+ if node and node.text?
37
+ node.content += text
38
+ else
39
+ @current_node.add_child(Nokogiri::XML::Text.new(text, @document))
40
+ end
41
+ end
42
+
43
+ # start an element
44
+ def start_element(name,attributes)
45
+ if @fragment
46
+ if name.eql?("head") or name.eql?("body") or name.eql?("html")
47
+ return
48
+ end
49
+ end
50
+ elem = Nokogiri::XML::Element.new(name, @document)
51
+ attributes.each do |attrib_pair|
52
+ elem[attrib_pair.first] = attrib_pair.last
53
+ end
54
+ # Special param tag hacking, as libxml/nokogiri doesnt generate an end tag
55
+ # for param tags it seems
56
+ if name.eql?("param")
57
+ inner_html = "<param"
58
+ attributes.each do |attrib_pair|
59
+ inner_html<< " #{attrib_pair.first}=\"#{attrib_pair.last}\""
60
+ end
61
+ inner_html << "/>"
62
+ # we create a fake cdata node, add it *and* dont move our parent yet
63
+ elem = Nokogiri::XML::CDATA.new(@document,inner_html)
64
+ @current_node.add_child(elem)
65
+ return
66
+ end
67
+ @current_node = @current_node.add_child(elem)
68
+ end
69
+
70
+ #end an element
71
+ def end_element(name)
72
+ if @current_node.nil? or !@current_node.name.eql?(name)
73
+ return
74
+ end
75
+ if @current_node.children.empty?
76
+ if @policy.allow_empty?(@current_node.name)
77
+ @current_node = @current_node.parent if @current_node.parent
78
+ else
79
+ tnode = @current_node
80
+ @current_node = @current_node.parent if @current_node.parent
81
+ tnode.remove
82
+ end
83
+ else
84
+ @current_node = @current_node.parent if @current_node.parent
85
+ end
86
+ end
87
+
88
+ # format the output applying any policy rules
89
+ def document
90
+ # check some directives
91
+ indent = 0
92
+ options = Nokogiri::XML::Node::SaveOptions::NO_EMPTY_TAGS
93
+ if @policy.directive(Policy::FORMAT_OUTPUT)
94
+ options |= Nokogiri::XML::Node::SaveOptions::FORMAT
95
+ indent = 2
96
+ end
97
+ if @policy.directive(Policy::OMIT_DOC_TYPE) || @policy.directive(Policy::OMIT_XML_DECL)
98
+ options |= Nokogiri::XML::Node::SaveOptions::NO_DECLARATION
99
+ end
100
+
101
+ clean = ""
102
+ if @policy.directive(Policy::USE_XHTML)
103
+ options |= Nokogiri::XML::Node::SaveOptions::AS_XHTML
104
+ clean = @document.to_xhtml(:encoding => @output_encoding, :indent=>indent,:save_with=>options)
105
+ else
106
+ clean = @document.to_html(:encoding => @output_encoding, :indent=>indent,:save_with=>options)
107
+ end
108
+ return clean
109
+ end
110
+
111
+ end
112
+ end
@@ -1,302 +1,305 @@
1
- module AntiSamy
2
- # Quick and Dirty Stack class
3
- class Stack
4
- def initialize
5
- @stack = []
6
- end
7
- # push an emement ont he stack
8
- def push(v)
9
- @stack.push v
10
- end
11
- # pop an element off the stack
12
- def pop
13
- @stack.pop
14
- end
15
- # size of stack
16
- def size
17
- @stack.size
18
- end
19
- # is the stack empty
20
- def empty?
21
- @stack.empty?
22
- end
23
- # peek to see what next element is
24
- def peek?(v)
25
- return false if @stack.empty?
26
- return @stack.last.eql?(v)
27
- end
28
-
29
- def peek
30
- @stack.last
31
- end
32
-
33
- end
34
-
35
- class SaxFilter < Nokogiri::XML::SAX::Document
36
- def initialize(policy,handler,param_tag)
37
- @policy = policy
38
- @handler = handler
39
- @stack = Stack.new
40
- @css_content = nil
41
- @css_attributes = nil
42
- @css_scanner = CssScanner.new(policy)
43
- @param_tag = param_tag
44
- end
45
-
46
- def error(text)
47
- end
48
-
49
- def warning(text)
50
- end
51
-
52
- # Always create a HTML document unless the DECL was set beforehand
53
- def start_document
54
- end
55
-
56
- # Add a comment block
57
- def comment(text)
58
- return if text.nil?
59
- if @policy.directive(Policy::PRESERVE_COMMENTS) =~ /true/i
60
- # Strip out conditional directives
61
- text.gsub!(%r{<!?!\[(?:end)?if*\]}ixm,"")
62
- text.gsub!(%r{\[(?:if).*\]>},"")
63
- @handler.comment(text)
64
- end
65
- end
66
-
67
- def convert_array(x)
68
- if x and x.first.is_a?(Array)
69
- return x
70
- end
71
- i = 0
72
- h = []
73
- while i < x.size
74
- m = []
75
- m[0] = x[i]
76
- m[1] = x[i+1]
77
- h << m
78
- i += 2
79
- end
80
- h
81
- end
82
-
83
- def fetch_attribute(array,key)
84
- array.each do |pair|
85
- if pair.first.eql?(key)
86
- return pair.last
87
- end
88
- end
89
- nil
90
- end
91
-
92
- # Start an element,
93
- def start_element(name, attributes = [])
94
- attributes = convert_array(attributes)
95
- o_attributes = attributes.dup
96
- tag = @policy.tag(name)
97
- masquerade = false
98
- embed_name = nil
99
- embed_value = nil
100
- # Handle validate param tag as an embed tag
101
- if tag.nil? && @policy.directive(Policy::VALIDATE_P_AS_E) && name.eql?("param")
102
- embed = @param_tag
103
- if @policy.tag("embed")
104
- embed = @policy.tag("embed")
105
- end
106
- if embed and embed.action == Policy::ACTION_VALIDATE
107
- tag = embed
108
- masquerade = true
109
- embed_name = fetch_attribute(attributes,"name")
110
- embed_value = fetch_attribute(attributes,"value")
111
- attributes = [ [embed_name,embed_value] ]
112
- end
113
- end
114
- valid_attributes = []
115
- if @stack.peek?(:css) or @stack.peek?(:remove)
116
- # We are in remove mode to remove this tag as well as any child style elements if css mode
117
- @stack.push(:remove)
118
- elsif (tag.nil? && @policy.directive(Policy::ON_UNKNOWN_TAG).eql?("encode")) or (!tag.nil? && tag.action.eql?(Policy::ACTION_ENCODE)) or @policy.encode?(name.downcase)
119
- tmp = "<#{name}>"
120
- @handler.characters(tmp)
121
- @stack.push(:filter)
122
- elsif tag.nil?
123
- # We ignore missing HTML and BODY tags since we are fragment parsing, but the
124
- # Nokogiri HTML::SAX parser injects HTML/BODY if they are missing
125
- unless name.eql?("html") or name.eql?("body")
126
- @handler.errors << ScanMessage.new(ScanMessage::ERROR_TAG_NOT_IN_POLICY,name)
127
- end
128
- # Nokogiri work around for a style tag being auto inserted inot head
129
- if name.eql?("head")
130
- @stack.push(:remove)
131
- else
132
- @stack.push(:filter)
133
- end
134
- elsif tag.action.eql?(Policy::ACTION_FILTER)
135
- @handler.errors << ScanMessage.new(ScanMessage::ERROR_TAG_FILTERED,name)
136
- @stack.push(:filter)
137
- elsif tag.action.eql?(Policy::ACTION_VALIDATE)
138
- # Handle validation
139
- remove_tag = false
140
- filter_tag = false
141
- is_style = name.include?("style")
142
- if is_style
143
- @stack.push(:css)
144
- @css_content = ''
145
- @css_attributes = []
146
- else
147
- # Validate attributes
148
- attributes.each do |pair|
149
- a_name = pair.first
150
- a_value = pair.last
151
- attrib = tag.attribute(a_name.downcase)
152
- if attrib.nil?
153
- attrib = @policy.global(a_name.downcase)
154
- end
155
- # check if the attribute is a style
156
- if a_name.eql?("style")
157
- # Handle Style tags
158
- begin
159
- results = @css_scanner.scan_inline(a_value,name,@policy.max_input)
160
- unless result.clean_html.empty?
161
- valid_attributes << [a_name,results.clean_html]
162
- end
163
- @handler.errors << results.messages
164
- @handler.errors.flatten!
165
- rescue Exception => e
166
- @handler.errors << ScanMessage.new(ScanMessage::ERROR_CSS_ATTRIBUTE_MALFORMED,name,@handler.encode_text(a_value))
167
- end
168
- elsif !attrib.nil? # Attribute is not nil lets check it
169
- valid = false
170
- attrib.values.each do |av|
171
- if av.eql?(a_value)
172
- valid_attributes << [a_name,a_value]
173
- valid = true
174
- break
175
- end
176
- end
177
- unless valid
178
- attrib.expressions.each do |ae|
179
- mc = ae.match(a_value)
180
- if mc and mc.to_s == a_value
181
- valid_attributes << [a_name,a_value]
182
- valid = true
183
- break
184
- end
185
- end
186
- end
187
- # we check the matches
188
- if !valid && attrib.action.eql?(Attribute::ACTION_REMOVE_TAG)
189
- @handler.errors << ScanMessage.new(ScanMessage::ERROR_ATTRIBUTE_INVALID_REMOVED,tag.name,@handler.encode_text(a_name),@handler.encode_text(a_value))
190
- remove_tag = true
191
- elsif !valid && attrib.action.eql?(Attribute::ACTION_FILTER_TAG)
192
- @handler.errors << ScanMessage.new(ScanMessage::ERROR_ATTRIBUTE_CAUSE_FILTER,tag.name,@handler.encode_text(a_name),@handler.encode_text(a_value))
193
- filter_tag = true
194
- elsif !valid
195
- @handler.errors << ScanMessage.new(ScanMessage::ERROR_ATTRIBUTE_INVALID,tag.name,@handler.encode_text(a_name),@handler.encode_text(a_value))
196
- end
197
-
198
- else # attribute was null
199
- @handler.errors << ScanMessage.new(ScanMessage::ERROR_ATTRIBUTE_NOT_IN_POLICY,tag.name,a_name,@handler.encode_text(a_value))
200
- if masquerade
201
- filter_tag = true
202
- end
203
- end
204
- end # end attirubte loop
205
- end
206
- if remove_tag
207
- @stack.push(:remove)
208
- elsif filter_tag
209
- @stack.push(:filter)
210
- else
211
- if name.eql?("a") and @policy.directive(Policy::ANCHROS_NOFOLLOW)
212
- valid_attributes << ["rel","nofollow"]
213
- end
214
- if masquerade
215
- valid_attributes = []
216
- valid_attributes << ["name",embed_name]
217
- valid_attributes << ["value",embed_value]
218
- end
219
- @stack.push(:keep) unless @stack.peek?(:css)
220
- end
221
- # End validation action
222
- elsif tag.action.eql?(Policy::ACTION_TRUNCATE)
223
- @stack.push(:truncate)
224
- else
225
- @handler.errors << ScanMessage.new(ScanMessage::ERROR_TAG_DISALLOWED,name)
226
- @stack.push(:remove)
227
- end
228
- # We now know wether to keep or truncat this tag
229
- if @stack.peek?(:truncate)
230
- @handler.start_element(name,[])
231
- elsif @stack.peek?(:keep)
232
- @handler.start_element(name,valid_attributes)
233
- end
234
- end
235
-
236
- def start_element_namespace(name,attrs=[],prefix = nil, uri = nil, ns = nil)
237
- start_element(name,attrs)
238
- end
239
-
240
- def end_element_namespace(name,prefix,uri)
241
- end_element(name)
242
- end
243
-
244
- # Add character data to the current tag
245
- def characters(text)
246
- unless text =~ /\S/ # skip whitespace
247
- return unless @policy.directive(Policy::PRESERVE_SPACE)
248
- end
249
- if @stack.peek?(:css)
250
- @css_content << text
251
- elsif !@stack.peek?(:remove)
252
- @handler.characters(text)
253
- end
254
- end
255
-
256
- # End an elements, will raise an error on a loose tag
257
- def end_element(name)
258
- if @stack.peek?(:remove)
259
- @stack.pop
260
- elsif @stack.peek?(:filter)
261
- @stack.pop
262
- elsif @stack.peek?(:css)
263
- @stack.pop
264
- # Do css stuff here
265
- begin
266
- results = @css_scanner.scan_sheet(@css_content,@policy.max_input)
267
- @handler.errors << results.messages
268
- @handler.errors.flatten!
269
- unless results.clean_html.nil? or results.clean_html.empty?
270
- @handler.start_element(name,@css_attributes)
271
- @handler.characters results.clean_html
272
- @handler.end_element(name)
273
- else
274
- @handler.start_element(name,@css_attributes)
275
- @handler.characters "/* */"
276
- @handler.end_element(name)
277
- end
278
- rescue Exception => e
279
- puts e
280
- @handler.errors << ScanMessage.new(ScanMessage::ERROR_CSS_TAG_MALFORMED,name,@handler.encode_text(@css_content))
281
- ensure
282
- @css_content = nil
283
- @css_attributes = nil
284
- end
285
- else
286
- @stack.pop
287
- @handler.end_element(name)
288
- end
289
- end
290
-
291
- # Add cdata a cdata block
292
- def cdata_block(text)
293
- if @stack.peek?(:css)
294
- @css_content << text
295
- elsif !@stack.peek?(:remove)
296
- @handler.characters(text)
297
- else
298
- @handler.cdata(@handler.encode_text(text)) unless @stack.peek == :remove
299
- end
300
- end
301
- end
302
- end
1
+ module AntiSamy
2
+ # Quick and Dirty Stack class
3
+ class Stack
4
+ def initialize
5
+ @stack = []
6
+ end
7
+ # push an emement ont he stack
8
+ def push(v)
9
+ @stack.push v
10
+ end
11
+ # pop an element off the stack
12
+ def pop
13
+ @stack.pop
14
+ end
15
+ # size of stack
16
+ def size
17
+ @stack.size
18
+ end
19
+ # is the stack empty
20
+ def empty?
21
+ @stack.empty?
22
+ end
23
+ # peek to see what next element is
24
+ def peek?(v)
25
+ return false if @stack.empty?
26
+ return @stack.last.eql?(v)
27
+ end
28
+
29
+ def peek
30
+ @stack.last
31
+ end
32
+
33
+ end
34
+
35
+ class SaxFilter < Nokogiri::XML::SAX::Document
36
+ def initialize(policy,handler,param_tag,fragment = true)
37
+ @policy = policy
38
+ @handler = handler
39
+ @stack = Stack.new
40
+ @css_content = nil
41
+ @css_attributes = nil
42
+ @css_scanner = CssScanner.new(policy)
43
+ @param_tag = param_tag
44
+ @fragment = fragment
45
+ end
46
+
47
+ def error(text)
48
+ end
49
+
50
+ def warning(text)
51
+ end
52
+
53
+ # Always create a HTML document unless the DECL was set beforehand
54
+ def start_document
55
+ end
56
+
57
+ # Add a comment block
58
+ def comment(text)
59
+ return if text.nil?
60
+ if @policy.directive(Policy::PRESERVE_COMMENTS) =~ /true/i
61
+ # Strip out conditional directives
62
+ text.gsub!(%r{<!?!\[(?:end)?if*\]}ixm,"")
63
+ text.gsub!(%r{\[(?:if).*\]>},"")
64
+ @handler.comment(text)
65
+ end
66
+ end
67
+
68
+ def convert_array(x)
69
+ if x and x.first.is_a?(Array)
70
+ return x
71
+ end
72
+ i = 0
73
+ h = []
74
+ while i < x.size
75
+ m = []
76
+ m[0] = x[i]
77
+ m[1] = x[i+1]
78
+ h << m
79
+ i += 2
80
+ end
81
+ h
82
+ end
83
+
84
+ def fetch_attribute(array,key)
85
+ array.each do |pair|
86
+ if pair.first.eql?(key)
87
+ return pair.last
88
+ end
89
+ end
90
+ nil
91
+ end
92
+
93
+ # Start an element,
94
+ def start_element(name, attributes = [])
95
+ attributes = convert_array(attributes)
96
+ o_attributes = attributes.dup
97
+ tag = @policy.tag(name)
98
+ masquerade = false
99
+ embed_name = nil
100
+ embed_value = nil
101
+ # Handle validate param tag as an embed tag
102
+ if tag.nil? && @policy.directive(Policy::VALIDATE_P_AS_E) && name.eql?("param")
103
+ embed = @param_tag
104
+ if @policy.tag("embed")
105
+ embed = @policy.tag("embed")
106
+ end
107
+ if embed and embed.action == Policy::ACTION_VALIDATE
108
+ tag = embed
109
+ masquerade = true
110
+ embed_name = fetch_attribute(attributes,"name")
111
+ embed_value = fetch_attribute(attributes,"value")
112
+ attributes = [ [embed_name,embed_value] ]
113
+ end
114
+ end
115
+ valid_attributes = []
116
+ if @stack.peek?(:css) or @stack.peek?(:remove)
117
+ # We are in remove mode to remove this tag as well as any child style elements if css mode
118
+ @stack.push(:remove)
119
+ elsif (tag.nil? && @policy.directive(Policy::ON_UNKNOWN_TAG).eql?("encode")) or (!tag.nil? && tag.action.eql?(Policy::ACTION_ENCODE)) or @policy.encode?(name.downcase)
120
+ tmp = "<#{name}>"
121
+ @handler.characters(tmp)
122
+ @stack.push(:filter)
123
+ elsif tag.nil?
124
+ # We ignore missing HTML and BODY tags since we are fragment parsing, but the
125
+ # Nokogiri HTML::SAX parser injects HTML/BODY if they are missing
126
+ if @fragment
127
+ unless name.eql?("html") or name.eql?("body")
128
+ @handler.errors << ScanMessage.new(ScanMessage::ERROR_TAG_NOT_IN_POLICY,name)
129
+ end
130
+ # Nokogiri work around for a style tag being auto inserted inot head
131
+ end
132
+ if name.eql?("head") && @fragment
133
+ @stack.push(:remove)
134
+ else
135
+ @stack.push(:filter)
136
+ end
137
+ elsif tag.action.eql?(Policy::ACTION_FILTER)
138
+ @handler.errors << ScanMessage.new(ScanMessage::ERROR_TAG_FILTERED,name)
139
+ @stack.push(:filter)
140
+ elsif tag.action.eql?(Policy::ACTION_VALIDATE)
141
+ # Handle validation
142
+ remove_tag = false
143
+ filter_tag = false
144
+ is_style = name.include?("style")
145
+ if is_style
146
+ @stack.push(:css)
147
+ @css_content = ''
148
+ @css_attributes = []
149
+ else
150
+ # Validate attributes
151
+ attributes.each do |pair|
152
+ a_name = pair.first
153
+ a_value = pair.last
154
+ attrib = tag.attribute(a_name.downcase)
155
+ if attrib.nil?
156
+ attrib = @policy.global(a_name.downcase)
157
+ end
158
+ # check if the attribute is a style
159
+ if a_name.eql?("style")
160
+ # Handle Style tags
161
+ begin
162
+ results = @css_scanner.scan_inline(a_value,name,@policy.max_input)
163
+ unless result.clean_html.empty?
164
+ valid_attributes << [a_name,results.clean_html]
165
+ end
166
+ @handler.errors << results.messages
167
+ @handler.errors.flatten!
168
+ rescue Exception => e
169
+ @handler.errors << ScanMessage.new(ScanMessage::ERROR_CSS_ATTRIBUTE_MALFORMED,name,@handler.encode_text(a_value))
170
+ end
171
+ elsif !attrib.nil? # Attribute is not nil lets check it
172
+ valid = false
173
+ attrib.values.each do |av|
174
+ if av.eql?(a_value)
175
+ valid_attributes << [a_name,a_value]
176
+ valid = true
177
+ break
178
+ end
179
+ end
180
+ unless valid
181
+ attrib.expressions.each do |ae|
182
+ mc = ae.match(a_value)
183
+ if mc and mc.to_s == a_value
184
+ valid_attributes << [a_name,a_value]
185
+ valid = true
186
+ break
187
+ end
188
+ end
189
+ end
190
+ # we check the matches
191
+ if !valid && attrib.action.eql?(Attribute::ACTION_REMOVE_TAG)
192
+ @handler.errors << ScanMessage.new(ScanMessage::ERROR_ATTRIBUTE_INVALID_REMOVED,tag.name,@handler.encode_text(a_name),@handler.encode_text(a_value))
193
+ remove_tag = true
194
+ elsif !valid && attrib.action.eql?(Attribute::ACTION_FILTER_TAG)
195
+ @handler.errors << ScanMessage.new(ScanMessage::ERROR_ATTRIBUTE_CAUSE_FILTER,tag.name,@handler.encode_text(a_name),@handler.encode_text(a_value))
196
+ filter_tag = true
197
+ elsif !valid
198
+ @handler.errors << ScanMessage.new(ScanMessage::ERROR_ATTRIBUTE_INVALID,tag.name,@handler.encode_text(a_name),@handler.encode_text(a_value))
199
+ end
200
+
201
+ else # attribute was null
202
+ @handler.errors << ScanMessage.new(ScanMessage::ERROR_ATTRIBUTE_NOT_IN_POLICY,tag.name,a_name,@handler.encode_text(a_value))
203
+ if masquerade
204
+ filter_tag = true
205
+ end
206
+ end
207
+ end # end attirubte loop
208
+ end
209
+ if remove_tag
210
+ @stack.push(:remove)
211
+ elsif filter_tag
212
+ @stack.push(:filter)
213
+ else
214
+ if name.eql?("a") and @policy.directive(Policy::ANCHORS_NOFOLLOW)
215
+ valid_attributes << ["rel","nofollow"]
216
+ end
217
+ if masquerade
218
+ valid_attributes = []
219
+ valid_attributes << ["name",embed_name]
220
+ valid_attributes << ["value",embed_value]
221
+ end
222
+ @stack.push(:keep) unless @stack.peek?(:css)
223
+ end
224
+ # End validation action
225
+ elsif tag.action.eql?(Policy::ACTION_TRUNCATE)
226
+ @stack.push(:truncate)
227
+ else
228
+ @handler.errors << ScanMessage.new(ScanMessage::ERROR_TAG_DISALLOWED,name)
229
+ @stack.push(:remove)
230
+ end
231
+ # We now know wether to keep or truncat this tag
232
+ if @stack.peek?(:truncate)
233
+ @handler.start_element(name,[])
234
+ elsif @stack.peek?(:keep)
235
+ @handler.start_element(name,valid_attributes)
236
+ end
237
+ end
238
+
239
+ def start_element_namespace(name,attrs=[],prefix = nil, uri = nil, ns = nil)
240
+ start_element(name,attrs)
241
+ end
242
+
243
+ def end_element_namespace(name,prefix,uri)
244
+ end_element(name)
245
+ end
246
+
247
+ # Add character data to the current tag
248
+ def characters(text)
249
+ unless text =~ /\S/ # skip whitespace
250
+ return unless @policy.directive(Policy::PRESERVE_SPACE)
251
+ end
252
+ if @stack.peek?(:css)
253
+ @css_content << text
254
+ elsif !@stack.peek?(:remove)
255
+ @handler.characters(text)
256
+ end
257
+ end
258
+
259
+ # End an elements, will raise an error on a loose tag
260
+ def end_element(name)
261
+ if @stack.peek?(:remove)
262
+ @stack.pop
263
+ elsif @stack.peek?(:filter)
264
+ @stack.pop
265
+ elsif @stack.peek?(:css)
266
+ @stack.pop
267
+ # Do css stuff here
268
+ begin
269
+ results = @css_scanner.scan_sheet(@css_content,@policy.max_input)
270
+ @handler.errors << results.messages
271
+ @handler.errors.flatten!
272
+ unless results.clean_html.nil? or results.clean_html.empty?
273
+ @handler.start_element(name,@css_attributes)
274
+ @handler.characters results.clean_html
275
+ @handler.end_element(name)
276
+ else
277
+ @handler.start_element(name,@css_attributes)
278
+ @handler.characters "/* */"
279
+ @handler.end_element(name)
280
+ end
281
+ rescue Exception => e
282
+ puts e
283
+ @handler.errors << ScanMessage.new(ScanMessage::ERROR_CSS_TAG_MALFORMED,name,@handler.encode_text(@css_content))
284
+ ensure
285
+ @css_content = nil
286
+ @css_attributes = nil
287
+ end
288
+ else
289
+ @stack.pop
290
+ @handler.end_element(name)
291
+ end
292
+ end
293
+
294
+ # Add cdata a cdata block
295
+ def cdata_block(text)
296
+ if @stack.peek?(:css)
297
+ @css_content << text
298
+ elsif !@stack.peek?(:remove)
299
+ @handler.characters(text)
300
+ else
301
+ @handler.cdata(@handler.encode_text(text)) unless @stack.peek == :remove
302
+ end
303
+ end
304
+ end
305
+ end