antisamy 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2011 Sal Scotto
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,36 @@
1
+ = Antisamy
2
+
3
+ This project is a port of the java AntiSamy project to the ruby runtime. Its intended to provide a library for developers to add protection to their web applications from malicious
4
+ user-supplier HTML and CSS. Please check out the AntiSamy project over at OWASP[http://www.owasp.org/index.php/Category:OWASP_AntiSamy_Project].
5
+
6
+ == TODO
7
+
8
+ * Add CSS scrubbing support
9
+
10
+ == Synopsis
11
+
12
+ require 'antisamy'
13
+ policy = AntiSamy.policy('antisamy.xml')
14
+ tainted_html = 'User supplied markup'
15
+ scan_results = AntiSamy.scan(tainted_html,policy)
16
+ clean_html = scan_results.clean_html
17
+
18
+ == Example Policies
19
+
20
+ Please check policy-examples[https://github.com/washu/antisamy-ruby/tree/master/policy-examples] for sample policy files.
21
+
22
+ == Contributing to antisamy
23
+
24
+ * Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet
25
+ * Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it
26
+ * Fork the project
27
+ * Start a feature/bugfix branch
28
+ * Commit and push until you are happy with your contribution
29
+ * Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
30
+ * Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
31
+
32
+ == Copyright
33
+
34
+ Copyright (c) 2011 Sal Scotto. See LICENSE.txt for
35
+ further details.
36
+
@@ -0,0 +1,42 @@
1
+ require 'nokogiri'
2
+ require 'antisamy/model/attribute'
3
+ require 'antisamy/model/tag'
4
+ require 'antisamy/model/css_property'
5
+ require 'antisamy/policy'
6
+ require 'antisamy/scan_results'
7
+ require 'antisamy/html/handler'
8
+ require 'antisamy/html/sax_filter'
9
+ require 'antisamy/html/scanner'
10
+
11
+ module AntiSamy
12
+ class << self
13
+
14
+ # Setup the input encoding, defaults to UTF-8
15
+ def input_encoding=(encoding)
16
+ @@input_encoding = encoding
17
+ end
18
+
19
+ # Setup the output encoding defaults to UTF-8
20
+ def output_encoding=(encoding)
21
+ @@output_encoding = encoding
22
+ end
23
+
24
+ # Scan the input using the provided policy.
25
+ # will raise an exception if there is some form of scannign error
26
+ def scan(input,policy)
27
+ scanner = Scanner.new(policy)
28
+ @@input_encoding ||= Scanner::DEFAULT_ENCODE
29
+ @@output_encoding ||= Scanner::DEFAULT_ENCODE
30
+ clean = scanner.scan(input,@@input_encoding, @output_encoding)
31
+ clean
32
+ end
33
+
34
+ # Create a policy out of the provided file
35
+ # will use a string or any IO object that can be read
36
+ # will raise an exception if the policy fails to validate
37
+ def policy(policy_file)
38
+ Policy.new(policy_file)
39
+ end
40
+
41
+ end
42
+ end
@@ -0,0 +1,95 @@
1
+ module AntiSamy
2
+
3
+ class Handler
4
+
5
+ attr_accessor :errors
6
+ def initialize(policy,output) #:nodoc:
7
+ @document = Nokogiri::HTML::DocumentFragment.parse("")
8
+ @current_node = @document
9
+ @policy = policy
10
+ @preserve_whitespace = @policy.directive(Policy::PRESERVE_SPACE)
11
+ @errors = []
12
+ @output_encoding = output
13
+ end
14
+
15
+ # HTML entity encode some text
16
+ def encode_text(text)
17
+ @document.encode_special_chars(text)
18
+ end
19
+
20
+ # create a cdata section
21
+ def cdata(text)
22
+ node = Nokogiri::XML::CDATA.new(@document,text)
23
+ @current_node.add_child(node)
24
+ end
25
+
26
+ # create a comment
27
+ def comment(text) #:nodoc:
28
+ @current_node.add_child(Nokogiri::XML::Comment.new(@document, text))
29
+ end
30
+
31
+ # create a text node
32
+ def characters(text)
33
+ node = @current_node.children.last
34
+ if node and node.text?
35
+ node.content += text
36
+ else
37
+ @current_node.add_child(Nokogiri::XML::Text.new(text, @document))
38
+ end
39
+ end
40
+
41
+ # start an element
42
+ def start_element(name,attributes)
43
+ elem = Nokogiri::XML::Element.new(name, @document)
44
+ attributes.each do |attrib_pair|
45
+ elem[attrib_pair.first] = attrib_pair.last
46
+ end
47
+ # Special param tag hacking, as libxml/nokogiri doesnt generate an end tag
48
+ # for param tags it seems
49
+ if name.eql?("param")
50
+ inner_html = "<param"
51
+ attributes.each do |attrib_pair|
52
+ inner_html<< " #{attrib_pair.first}=\"#{attrib_pair.last}\""
53
+ end
54
+ inner_html << "/>"
55
+ # we create a fake cdata node, add it *and* dont move our parent yet
56
+ elem = Nokogiri::XML::CDATA.new(@document,inner_html)
57
+ @current_node.add_child(elem)
58
+ return
59
+ end
60
+ @current_node = @current_node.add_child(elem)
61
+ end
62
+
63
+ #end an element
64
+ def end_element(name)
65
+ if @current_node.nil? or !@current_node.name.eql?(name)
66
+ return
67
+ end
68
+ @current_node = @current_node.parent if @current_node.parent
69
+ end
70
+
71
+ # format the output applying any policy rules
72
+ def document
73
+ # check some directives
74
+ indent = 0
75
+ options = Nokogiri::XML::Node::SaveOptions::NO_EMPTY_TAGS
76
+ if @policy.directive(Policy::FORMAT_OUTPUT)
77
+ options |= Nokogiri::XML::Node::SaveOptions::FORMAT
78
+ indent = 2
79
+ end
80
+ if @policy.directive(Policy::OMIT_DOC_TYPE) || @policy.directive(Policy::OMIT_XML_DECL)
81
+ options |= Nokogiri::XML::Node::SaveOptions::NO_DECLARATION
82
+ end
83
+
84
+ clean = ""
85
+ if @policy.directive(Policy::USE_XHTML)
86
+ options |= Nokogiri::XML::Node::SaveOptions::AS_XHTML
87
+ clean = @document.to_xhtml(:encoding => @output_encoding, :indent=>indent,:save_with=>options)
88
+ else
89
+ clean = @document.to_html(:encoding => @output_encoding, :indent=>indent,:save_with=>options)
90
+ end
91
+ return clean
92
+ end
93
+
94
+ end
95
+ end
@@ -0,0 +1,286 @@
1
+ module AntiSamy
2
+ # Quick and Dirty Stack class
3
+ class Stack
4
+ def initialize
5
+ @stack = []
6
+ end
7
+ # push an emement ont he stack
8
+ def push(v)
9
+ @stack.push v
10
+ end
11
+ # pop an element off the stack
12
+ def pop
13
+ @stack.pop
14
+ end
15
+ # size of stack
16
+ def size
17
+ @stack.size
18
+ end
19
+ # is the stack empty
20
+ def empty?
21
+ @stack.empty?
22
+ end
23
+ # peek to see what next element is
24
+ def peek?(v)
25
+ return false if @stack.empty?
26
+ return @stack.last.eql?(v)
27
+ end
28
+
29
+ def peek
30
+ @stack.last
31
+ end
32
+
33
+ end
34
+
35
+ class SaxFilter < Nokogiri::XML::SAX::Document
36
+ def initialize(policy,handler,param_tag)
37
+ @policy = policy
38
+ @handler = handler
39
+ @stack = Stack.new
40
+ @css_content = nil
41
+ @css_attributes = nil
42
+ @css_scanner = nil
43
+ @param_tag = param_tag
44
+ end
45
+
46
+ def error(text)
47
+ #puts "SAX Error #{text}"
48
+ end
49
+
50
+ def warning(text)
51
+ puts "SAX Warning #{text}"
52
+ end
53
+
54
+ # Always create a HTML document unless the DECL was set beforehand
55
+ def start_document
56
+ end
57
+
58
+ # Add a comment block
59
+ def comment(text)
60
+ return if text.nil?
61
+ if @policy.directive(Policy::PRESERVE_COMMENTS) =~ /true/i
62
+ # Strip out conditional directives
63
+ text.gsub!(%r{<!?!\[(?:end)?if*\]}ixm,"")
64
+ text.gsub!(%r{\[(?:if).*\]>},"")
65
+ @handler.comment(text)
66
+ end
67
+ end
68
+
69
+ def convert_array(x)
70
+ if x and x.first.is_a?(Array)
71
+ return x
72
+ end
73
+ i = 0
74
+ h = []
75
+ while i < x.size
76
+ m = []
77
+ m[0] = x[i]
78
+ m[1] = x[i+1]
79
+ h << m
80
+ i += 2
81
+ end
82
+ h
83
+ end
84
+
85
+ def fetch_attribute(array,key)
86
+ array.each do |pair|
87
+ if pair.first.eql?(key)
88
+ return pair.last
89
+ end
90
+ end
91
+ nil
92
+ end
93
+
94
+ # Start an element,
95
+ def start_element(name, attributes = [])
96
+ attributes = convert_array(attributes)
97
+ o_attributes = attributes.dup
98
+ tag = @policy.tag(name)
99
+ masquerade = false
100
+ embed_name = nil
101
+ embed_value = nil
102
+ # Handle validate param tag as an embed tag
103
+ if tag.nil? && @policy.directive(Policy::VALIDATE_P_AS_E) && name.eql?("param")
104
+ embed = @param_tag
105
+ if @policy.tag("embed")
106
+ embed = @policy.tag("embed")
107
+ end
108
+ if embed and embed.action == Policy::ACTION_VALIDATE
109
+ tag = embed
110
+ masquerade = true
111
+ embed_name = fetch_attribute(attributes,"name")
112
+ embed_value = fetch_attribute(attributes,"value")
113
+ attributes = [ [embed_name,embed_value] ]
114
+ end
115
+ end
116
+ valid_attributes = []
117
+ if @stack.peek?(:css) or @stack.peek?(:remove)
118
+ # We are in remove mode to remove this tag as well as any child style elements if css mode
119
+ @stack.push(:remove)
120
+ elsif (tag.nil? && @policy.directive(Policy::ON_UNKNOWN_TAG).eql?("encode")) or (!tag.nil? && tag.action.eql?(Policy::ACTION_ENCODE)) or @policy.encode?(name.downcase)
121
+ tmp = "<#{name}>"
122
+ @handler.characters(tmp)
123
+ @stack.push(:filter)
124
+ elsif tag.nil?
125
+ @handler.errors << ScanMessage.new(ScanMessage::ERROR_TAG_NOT_IN_POLICY,name)
126
+ @stack.push(:filter)
127
+ elsif tag.action.eql?(Policy::ACTION_FILTER)
128
+ @handler.errors << ScanMessage.new(ScanMessage::ERROR_TAG_FILTERED,name)
129
+ @stack.push(:filter)
130
+ elsif tag.action.eql?(Policy::ACTION_VALIDATE)
131
+ # Handle validation
132
+ is_style = name.include?("style")
133
+ if is_style
134
+ @stack.push(:css)
135
+ @css_content = ''
136
+ @css_attributes = []
137
+ else
138
+ # Validate attributes
139
+ remove_tag = false
140
+ filter_tag = false
141
+ attributes.each do |pair|
142
+ a_name = pair.first
143
+ a_value = pair.last
144
+ attrib = tag.attribute(a_name.downcase)
145
+ if attrib.nil?
146
+ attrib = @policy.global(a_name.downcase)
147
+ end
148
+ # check if the attribute is a style
149
+ if a_name.eql?("style")
150
+ # Handle Style tags
151
+ # begin
152
+ # results = @css_scanner.scan_inline(a_value,name,@policy.max_input)
153
+ # valid_attributes << [a_name,results.clean_html]
154
+ # @handler.errors << results.errors
155
+ # @handler.errors.flatten!
156
+ # rescue Exception => e
157
+ # @handler.errors << ScanMessage.new(ScanMessage::ERROR_CSS_ATTRIBUTE_MALFORMED,name,@handler.encode_text(value))
158
+ # end
159
+ elsif !attrib.nil? # Attribute is not nil lets check it
160
+ valid = false
161
+ attrib.values.each do |av|
162
+ if av.eql?(a_value)
163
+ valid_attributes << [a_name,a_value]
164
+ valid = true
165
+ break
166
+ end
167
+ end
168
+ unless valid
169
+ attrib.expressions.each do |ae|
170
+ if a_value.downcase =~ ae
171
+ valid_attributes << [a_name,a_value]
172
+ valid = true
173
+ break
174
+ end
175
+ end
176
+ end
177
+ # we check the matches
178
+ if !valid && attrib.action.eql?(Attribute::ACTION_REMOVE_TAG)
179
+ @handler.errors << ScanMessage.new(ScanMessage::ERROR_ATTRIBUTE_INVALID_REMOVED,tag.name,@handler.encode_text(a_name),@handler.encode_text(a_value))
180
+ remove_tag = true
181
+ elsif !valid && attrib.action.eql?(Attribute::ACTION_FILTER_TAG)
182
+ @handler.errors << ScanMessage.new(ScanMessage::ERROR_ATTRIBUTE_CAUSE_FILTER,tag.name,@handler.encode_text(a_name),@handler.encode_text(a_value))
183
+ filter_tag = true
184
+ elsif !valid
185
+ @handler.errors << ScanMessage.new(ScanMessage::ERROR_ATTRIBUTE_INVALID,tag.name,@handler.encode_text(a_name),@handler.encode_text(a_value))
186
+ end
187
+
188
+ else # attribute was null
189
+ @handler.errors << ScanMessage.new(ScanMessage::ERROR_ATTRIBUTE_NOT_IN_POLICY,tag.name,a_name,@handler.encode_text(a_value))
190
+ if masquerade
191
+ filter_tag = true
192
+ end
193
+ end
194
+ end # end attirubte loop
195
+ end
196
+ if remove_tag
197
+ @stack.push(:remove)
198
+ elsif filter_tag
199
+ @stack.push(:filter)
200
+ else
201
+ if name.eql?("a") and @policy.directive(Policy::ANCHROS_NOFOLLOW) =~ /true/i
202
+ valid_attributes << ["rel","nofollow"]
203
+ end
204
+ if masquerade
205
+ valid_attributes = []
206
+ valid_attributes << ["name",embed_name]
207
+ valid_attributes << ["value",embed_value]
208
+ end
209
+ @stack.push(:keep)
210
+ end
211
+ # End validation action
212
+ elsif tag.action.eql?(Policy::ACTION_TRUNCATE)
213
+ @stack.push(:truncate)
214
+ else
215
+ @handler.errors << ScanMessage.new(ScanMessage::ERROR_TAG_DISALLOWED,name)
216
+ @stack.push(:remove)
217
+ end
218
+ # We now know wether to keep or truncat this tag
219
+ if @stack.peek?(:truncate)
220
+ @handler.start_element(name,[])
221
+ elsif @stack.peek?(:keep)
222
+ @handler.start_element(name,valid_attributes)
223
+ end
224
+ end
225
+
226
+ def start_element_namespace(name,attrs=[],prefix = nil, uri = nil, ns = nil)
227
+ start_element(name,attrs)
228
+ end
229
+
230
+ def end_element_namespace(name,prefix,uri)
231
+ end_element(name)
232
+ end
233
+
234
+ # Add character data to the current tag
235
+ def characters(text)
236
+ unless text =~ /\S/ # skip whitespace
237
+ return unless @policy.directive(Policy::PRESERVE_SPACE) =~ /true/i
238
+ end
239
+ if @stack.peek?(:css)
240
+ @css_content << text
241
+ elsif !@stack.peek?(:remove)
242
+ @handler.characters(text)
243
+ end
244
+ end
245
+
246
+ # End an elements, will raise an error on a loose tag
247
+ def end_element(name)
248
+ if @stack.peek?(:remove)
249
+ @stack.pop
250
+ elsif @stack.peek?(:filter)
251
+ @stack.pop
252
+ elsif @stack.peek?(:css)
253
+ @stack.pop
254
+ # Do css stuff here
255
+ # begin
256
+ # results = @css_scanner.scan_tyle_sheet(@css_content,@policy.max_input)
257
+ # @handler.errors << results.errors
258
+ # @handler.errors.flatten!
259
+ # unless results.clean_html.nil? or results.clean_html.empty?
260
+ # @handler.start_element(element,css_attributes)
261
+ # @handler.characters results.clean_html
262
+ # @handler.end_element(element)
263
+ # end
264
+ # rescue Exception => e
265
+ # @handler.errors << ScanMessage.new(ScanMessage::ERROR_CSS_TAG_MALFORMED,name,@handler.encode_text(@css_content))
266
+ # ensure
267
+ # @css_content = nil
268
+ # @css_attributes = nil
269
+ else
270
+ @stack.pop
271
+ @handler.end_element(name)
272
+ end
273
+ end
274
+
275
+ # Add cdata a cdata block
276
+ def cdata_block(text)
277
+ if @stack.peek?(:css)
278
+ @css_content << text
279
+ elsif !@stack.peek?(:remove)
280
+ @handler.characters(text)
281
+ else
282
+ @handler.cdata(@handler.encode_text(text)) unless @stack.peek == :remove
283
+ end
284
+ end
285
+ end
286
+ end