antisamy 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE.txt +20 -0
- data/README.rdoc +36 -0
- data/lib/antisamy.rb +42 -0
- data/lib/antisamy/html/handler.rb +95 -0
- data/lib/antisamy/html/sax_filter.rb +286 -0
- data/lib/antisamy/html/scanner.rb +85 -0
- data/lib/antisamy/model/attribute.rb +19 -0
- data/lib/antisamy/model/css_property.rb +39 -0
- data/lib/antisamy/model/tag.rb +31 -0
- data/lib/antisamy/policy.rb +540 -0
- data/lib/antisamy/scan_results.rb +21 -0
- data/spec/antisamy_spec.rb +28 -0
- data/spec/spec_helper.rb +12 -0
- metadata +160 -0
data/LICENSE.txt
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2011 Sal Scotto
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
= Antisamy
|
2
|
+
|
3
|
+
This project is a port of the java AntiSamy project to the ruby runtime. Its intended to provide a library for developers to add protection to their web applications from malicious
|
4
|
+
user-supplier HTML and CSS. Please check out the AntiSamy project over at OWASP[http://www.owasp.org/index.php/Category:OWASP_AntiSamy_Project].
|
5
|
+
|
6
|
+
== TODO
|
7
|
+
|
8
|
+
* Add CSS scrubbing support
|
9
|
+
|
10
|
+
== Synopsis
|
11
|
+
|
12
|
+
require 'antisamy'
|
13
|
+
policy = AntiSamy.policy('antisamy.xml')
|
14
|
+
tainted_html = 'User supplied markup'
|
15
|
+
scan_results = AntiSamy.scan(tainted_html,policy)
|
16
|
+
clean_html = scan_results.clean_html
|
17
|
+
|
18
|
+
== Example Policies
|
19
|
+
|
20
|
+
Please check policy-examples[https://github.com/washu/antisamy-ruby/tree/master/policy-examples] for sample policy files.
|
21
|
+
|
22
|
+
== Contributing to antisamy
|
23
|
+
|
24
|
+
* Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet
|
25
|
+
* Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it
|
26
|
+
* Fork the project
|
27
|
+
* Start a feature/bugfix branch
|
28
|
+
* Commit and push until you are happy with your contribution
|
29
|
+
* Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
|
30
|
+
* Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
|
31
|
+
|
32
|
+
== Copyright
|
33
|
+
|
34
|
+
Copyright (c) 2011 Sal Scotto. See LICENSE.txt for
|
35
|
+
further details.
|
36
|
+
|
data/lib/antisamy.rb
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'antisamy/model/attribute'
|
3
|
+
require 'antisamy/model/tag'
|
4
|
+
require 'antisamy/model/css_property'
|
5
|
+
require 'antisamy/policy'
|
6
|
+
require 'antisamy/scan_results'
|
7
|
+
require 'antisamy/html/handler'
|
8
|
+
require 'antisamy/html/sax_filter'
|
9
|
+
require 'antisamy/html/scanner'
|
10
|
+
|
11
|
+
module AntiSamy
|
12
|
+
class << self
|
13
|
+
|
14
|
+
# Setup the input encoding, defaults to UTF-8
|
15
|
+
def input_encoding=(encoding)
|
16
|
+
@@input_encoding = encoding
|
17
|
+
end
|
18
|
+
|
19
|
+
# Setup the output encoding defaults to UTF-8
|
20
|
+
def output_encoding=(encoding)
|
21
|
+
@@output_encoding = encoding
|
22
|
+
end
|
23
|
+
|
24
|
+
# Scan the input using the provided policy.
|
25
|
+
# will raise an exception if there is some form of scannign error
|
26
|
+
def scan(input,policy)
|
27
|
+
scanner = Scanner.new(policy)
|
28
|
+
@@input_encoding ||= Scanner::DEFAULT_ENCODE
|
29
|
+
@@output_encoding ||= Scanner::DEFAULT_ENCODE
|
30
|
+
clean = scanner.scan(input,@@input_encoding, @output_encoding)
|
31
|
+
clean
|
32
|
+
end
|
33
|
+
|
34
|
+
# Create a policy out of the provided file
|
35
|
+
# will use a string or any IO object that can be read
|
36
|
+
# will raise an exception if the policy fails to validate
|
37
|
+
def policy(policy_file)
|
38
|
+
Policy.new(policy_file)
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,95 @@
|
|
1
|
+
module AntiSamy
|
2
|
+
|
3
|
+
class Handler
|
4
|
+
|
5
|
+
attr_accessor :errors
|
6
|
+
def initialize(policy,output) #:nodoc:
|
7
|
+
@document = Nokogiri::HTML::DocumentFragment.parse("")
|
8
|
+
@current_node = @document
|
9
|
+
@policy = policy
|
10
|
+
@preserve_whitespace = @policy.directive(Policy::PRESERVE_SPACE)
|
11
|
+
@errors = []
|
12
|
+
@output_encoding = output
|
13
|
+
end
|
14
|
+
|
15
|
+
# HTML entity encode some text
|
16
|
+
def encode_text(text)
|
17
|
+
@document.encode_special_chars(text)
|
18
|
+
end
|
19
|
+
|
20
|
+
# create a cdata section
|
21
|
+
def cdata(text)
|
22
|
+
node = Nokogiri::XML::CDATA.new(@document,text)
|
23
|
+
@current_node.add_child(node)
|
24
|
+
end
|
25
|
+
|
26
|
+
# create a comment
|
27
|
+
def comment(text) #:nodoc:
|
28
|
+
@current_node.add_child(Nokogiri::XML::Comment.new(@document, text))
|
29
|
+
end
|
30
|
+
|
31
|
+
# create a text node
|
32
|
+
def characters(text)
|
33
|
+
node = @current_node.children.last
|
34
|
+
if node and node.text?
|
35
|
+
node.content += text
|
36
|
+
else
|
37
|
+
@current_node.add_child(Nokogiri::XML::Text.new(text, @document))
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
# start an element
|
42
|
+
def start_element(name,attributes)
|
43
|
+
elem = Nokogiri::XML::Element.new(name, @document)
|
44
|
+
attributes.each do |attrib_pair|
|
45
|
+
elem[attrib_pair.first] = attrib_pair.last
|
46
|
+
end
|
47
|
+
# Special param tag hacking, as libxml/nokogiri doesnt generate an end tag
|
48
|
+
# for param tags it seems
|
49
|
+
if name.eql?("param")
|
50
|
+
inner_html = "<param"
|
51
|
+
attributes.each do |attrib_pair|
|
52
|
+
inner_html<< " #{attrib_pair.first}=\"#{attrib_pair.last}\""
|
53
|
+
end
|
54
|
+
inner_html << "/>"
|
55
|
+
# we create a fake cdata node, add it *and* dont move our parent yet
|
56
|
+
elem = Nokogiri::XML::CDATA.new(@document,inner_html)
|
57
|
+
@current_node.add_child(elem)
|
58
|
+
return
|
59
|
+
end
|
60
|
+
@current_node = @current_node.add_child(elem)
|
61
|
+
end
|
62
|
+
|
63
|
+
#end an element
|
64
|
+
def end_element(name)
|
65
|
+
if @current_node.nil? or !@current_node.name.eql?(name)
|
66
|
+
return
|
67
|
+
end
|
68
|
+
@current_node = @current_node.parent if @current_node.parent
|
69
|
+
end
|
70
|
+
|
71
|
+
# format the output applying any policy rules
|
72
|
+
def document
|
73
|
+
# check some directives
|
74
|
+
indent = 0
|
75
|
+
options = Nokogiri::XML::Node::SaveOptions::NO_EMPTY_TAGS
|
76
|
+
if @policy.directive(Policy::FORMAT_OUTPUT)
|
77
|
+
options |= Nokogiri::XML::Node::SaveOptions::FORMAT
|
78
|
+
indent = 2
|
79
|
+
end
|
80
|
+
if @policy.directive(Policy::OMIT_DOC_TYPE) || @policy.directive(Policy::OMIT_XML_DECL)
|
81
|
+
options |= Nokogiri::XML::Node::SaveOptions::NO_DECLARATION
|
82
|
+
end
|
83
|
+
|
84
|
+
clean = ""
|
85
|
+
if @policy.directive(Policy::USE_XHTML)
|
86
|
+
options |= Nokogiri::XML::Node::SaveOptions::AS_XHTML
|
87
|
+
clean = @document.to_xhtml(:encoding => @output_encoding, :indent=>indent,:save_with=>options)
|
88
|
+
else
|
89
|
+
clean = @document.to_html(:encoding => @output_encoding, :indent=>indent,:save_with=>options)
|
90
|
+
end
|
91
|
+
return clean
|
92
|
+
end
|
93
|
+
|
94
|
+
end
|
95
|
+
end
|
@@ -0,0 +1,286 @@
|
|
1
|
+
module AntiSamy
|
2
|
+
# Quick and Dirty Stack class
|
3
|
+
class Stack
|
4
|
+
def initialize
|
5
|
+
@stack = []
|
6
|
+
end
|
7
|
+
# push an emement ont he stack
|
8
|
+
def push(v)
|
9
|
+
@stack.push v
|
10
|
+
end
|
11
|
+
# pop an element off the stack
|
12
|
+
def pop
|
13
|
+
@stack.pop
|
14
|
+
end
|
15
|
+
# size of stack
|
16
|
+
def size
|
17
|
+
@stack.size
|
18
|
+
end
|
19
|
+
# is the stack empty
|
20
|
+
def empty?
|
21
|
+
@stack.empty?
|
22
|
+
end
|
23
|
+
# peek to see what next element is
|
24
|
+
def peek?(v)
|
25
|
+
return false if @stack.empty?
|
26
|
+
return @stack.last.eql?(v)
|
27
|
+
end
|
28
|
+
|
29
|
+
def peek
|
30
|
+
@stack.last
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
34
|
+
|
35
|
+
class SaxFilter < Nokogiri::XML::SAX::Document
|
36
|
+
def initialize(policy,handler,param_tag)
|
37
|
+
@policy = policy
|
38
|
+
@handler = handler
|
39
|
+
@stack = Stack.new
|
40
|
+
@css_content = nil
|
41
|
+
@css_attributes = nil
|
42
|
+
@css_scanner = nil
|
43
|
+
@param_tag = param_tag
|
44
|
+
end
|
45
|
+
|
46
|
+
def error(text)
|
47
|
+
#puts "SAX Error #{text}"
|
48
|
+
end
|
49
|
+
|
50
|
+
def warning(text)
|
51
|
+
puts "SAX Warning #{text}"
|
52
|
+
end
|
53
|
+
|
54
|
+
# Always create a HTML document unless the DECL was set beforehand
|
55
|
+
def start_document
|
56
|
+
end
|
57
|
+
|
58
|
+
# Add a comment block
|
59
|
+
def comment(text)
|
60
|
+
return if text.nil?
|
61
|
+
if @policy.directive(Policy::PRESERVE_COMMENTS) =~ /true/i
|
62
|
+
# Strip out conditional directives
|
63
|
+
text.gsub!(%r{<!?!\[(?:end)?if*\]}ixm,"")
|
64
|
+
text.gsub!(%r{\[(?:if).*\]>},"")
|
65
|
+
@handler.comment(text)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
def convert_array(x)
|
70
|
+
if x and x.first.is_a?(Array)
|
71
|
+
return x
|
72
|
+
end
|
73
|
+
i = 0
|
74
|
+
h = []
|
75
|
+
while i < x.size
|
76
|
+
m = []
|
77
|
+
m[0] = x[i]
|
78
|
+
m[1] = x[i+1]
|
79
|
+
h << m
|
80
|
+
i += 2
|
81
|
+
end
|
82
|
+
h
|
83
|
+
end
|
84
|
+
|
85
|
+
def fetch_attribute(array,key)
|
86
|
+
array.each do |pair|
|
87
|
+
if pair.first.eql?(key)
|
88
|
+
return pair.last
|
89
|
+
end
|
90
|
+
end
|
91
|
+
nil
|
92
|
+
end
|
93
|
+
|
94
|
+
# Start an element,
|
95
|
+
def start_element(name, attributes = [])
|
96
|
+
attributes = convert_array(attributes)
|
97
|
+
o_attributes = attributes.dup
|
98
|
+
tag = @policy.tag(name)
|
99
|
+
masquerade = false
|
100
|
+
embed_name = nil
|
101
|
+
embed_value = nil
|
102
|
+
# Handle validate param tag as an embed tag
|
103
|
+
if tag.nil? && @policy.directive(Policy::VALIDATE_P_AS_E) && name.eql?("param")
|
104
|
+
embed = @param_tag
|
105
|
+
if @policy.tag("embed")
|
106
|
+
embed = @policy.tag("embed")
|
107
|
+
end
|
108
|
+
if embed and embed.action == Policy::ACTION_VALIDATE
|
109
|
+
tag = embed
|
110
|
+
masquerade = true
|
111
|
+
embed_name = fetch_attribute(attributes,"name")
|
112
|
+
embed_value = fetch_attribute(attributes,"value")
|
113
|
+
attributes = [ [embed_name,embed_value] ]
|
114
|
+
end
|
115
|
+
end
|
116
|
+
valid_attributes = []
|
117
|
+
if @stack.peek?(:css) or @stack.peek?(:remove)
|
118
|
+
# We are in remove mode to remove this tag as well as any child style elements if css mode
|
119
|
+
@stack.push(:remove)
|
120
|
+
elsif (tag.nil? && @policy.directive(Policy::ON_UNKNOWN_TAG).eql?("encode")) or (!tag.nil? && tag.action.eql?(Policy::ACTION_ENCODE)) or @policy.encode?(name.downcase)
|
121
|
+
tmp = "<#{name}>"
|
122
|
+
@handler.characters(tmp)
|
123
|
+
@stack.push(:filter)
|
124
|
+
elsif tag.nil?
|
125
|
+
@handler.errors << ScanMessage.new(ScanMessage::ERROR_TAG_NOT_IN_POLICY,name)
|
126
|
+
@stack.push(:filter)
|
127
|
+
elsif tag.action.eql?(Policy::ACTION_FILTER)
|
128
|
+
@handler.errors << ScanMessage.new(ScanMessage::ERROR_TAG_FILTERED,name)
|
129
|
+
@stack.push(:filter)
|
130
|
+
elsif tag.action.eql?(Policy::ACTION_VALIDATE)
|
131
|
+
# Handle validation
|
132
|
+
is_style = name.include?("style")
|
133
|
+
if is_style
|
134
|
+
@stack.push(:css)
|
135
|
+
@css_content = ''
|
136
|
+
@css_attributes = []
|
137
|
+
else
|
138
|
+
# Validate attributes
|
139
|
+
remove_tag = false
|
140
|
+
filter_tag = false
|
141
|
+
attributes.each do |pair|
|
142
|
+
a_name = pair.first
|
143
|
+
a_value = pair.last
|
144
|
+
attrib = tag.attribute(a_name.downcase)
|
145
|
+
if attrib.nil?
|
146
|
+
attrib = @policy.global(a_name.downcase)
|
147
|
+
end
|
148
|
+
# check if the attribute is a style
|
149
|
+
if a_name.eql?("style")
|
150
|
+
# Handle Style tags
|
151
|
+
# begin
|
152
|
+
# results = @css_scanner.scan_inline(a_value,name,@policy.max_input)
|
153
|
+
# valid_attributes << [a_name,results.clean_html]
|
154
|
+
# @handler.errors << results.errors
|
155
|
+
# @handler.errors.flatten!
|
156
|
+
# rescue Exception => e
|
157
|
+
# @handler.errors << ScanMessage.new(ScanMessage::ERROR_CSS_ATTRIBUTE_MALFORMED,name,@handler.encode_text(value))
|
158
|
+
# end
|
159
|
+
elsif !attrib.nil? # Attribute is not nil lets check it
|
160
|
+
valid = false
|
161
|
+
attrib.values.each do |av|
|
162
|
+
if av.eql?(a_value)
|
163
|
+
valid_attributes << [a_name,a_value]
|
164
|
+
valid = true
|
165
|
+
break
|
166
|
+
end
|
167
|
+
end
|
168
|
+
unless valid
|
169
|
+
attrib.expressions.each do |ae|
|
170
|
+
if a_value.downcase =~ ae
|
171
|
+
valid_attributes << [a_name,a_value]
|
172
|
+
valid = true
|
173
|
+
break
|
174
|
+
end
|
175
|
+
end
|
176
|
+
end
|
177
|
+
# we check the matches
|
178
|
+
if !valid && attrib.action.eql?(Attribute::ACTION_REMOVE_TAG)
|
179
|
+
@handler.errors << ScanMessage.new(ScanMessage::ERROR_ATTRIBUTE_INVALID_REMOVED,tag.name,@handler.encode_text(a_name),@handler.encode_text(a_value))
|
180
|
+
remove_tag = true
|
181
|
+
elsif !valid && attrib.action.eql?(Attribute::ACTION_FILTER_TAG)
|
182
|
+
@handler.errors << ScanMessage.new(ScanMessage::ERROR_ATTRIBUTE_CAUSE_FILTER,tag.name,@handler.encode_text(a_name),@handler.encode_text(a_value))
|
183
|
+
filter_tag = true
|
184
|
+
elsif !valid
|
185
|
+
@handler.errors << ScanMessage.new(ScanMessage::ERROR_ATTRIBUTE_INVALID,tag.name,@handler.encode_text(a_name),@handler.encode_text(a_value))
|
186
|
+
end
|
187
|
+
|
188
|
+
else # attribute was null
|
189
|
+
@handler.errors << ScanMessage.new(ScanMessage::ERROR_ATTRIBUTE_NOT_IN_POLICY,tag.name,a_name,@handler.encode_text(a_value))
|
190
|
+
if masquerade
|
191
|
+
filter_tag = true
|
192
|
+
end
|
193
|
+
end
|
194
|
+
end # end attirubte loop
|
195
|
+
end
|
196
|
+
if remove_tag
|
197
|
+
@stack.push(:remove)
|
198
|
+
elsif filter_tag
|
199
|
+
@stack.push(:filter)
|
200
|
+
else
|
201
|
+
if name.eql?("a") and @policy.directive(Policy::ANCHROS_NOFOLLOW) =~ /true/i
|
202
|
+
valid_attributes << ["rel","nofollow"]
|
203
|
+
end
|
204
|
+
if masquerade
|
205
|
+
valid_attributes = []
|
206
|
+
valid_attributes << ["name",embed_name]
|
207
|
+
valid_attributes << ["value",embed_value]
|
208
|
+
end
|
209
|
+
@stack.push(:keep)
|
210
|
+
end
|
211
|
+
# End validation action
|
212
|
+
elsif tag.action.eql?(Policy::ACTION_TRUNCATE)
|
213
|
+
@stack.push(:truncate)
|
214
|
+
else
|
215
|
+
@handler.errors << ScanMessage.new(ScanMessage::ERROR_TAG_DISALLOWED,name)
|
216
|
+
@stack.push(:remove)
|
217
|
+
end
|
218
|
+
# We now know wether to keep or truncat this tag
|
219
|
+
if @stack.peek?(:truncate)
|
220
|
+
@handler.start_element(name,[])
|
221
|
+
elsif @stack.peek?(:keep)
|
222
|
+
@handler.start_element(name,valid_attributes)
|
223
|
+
end
|
224
|
+
end
|
225
|
+
|
226
|
+
def start_element_namespace(name,attrs=[],prefix = nil, uri = nil, ns = nil)
|
227
|
+
start_element(name,attrs)
|
228
|
+
end
|
229
|
+
|
230
|
+
def end_element_namespace(name,prefix,uri)
|
231
|
+
end_element(name)
|
232
|
+
end
|
233
|
+
|
234
|
+
# Add character data to the current tag
|
235
|
+
def characters(text)
|
236
|
+
unless text =~ /\S/ # skip whitespace
|
237
|
+
return unless @policy.directive(Policy::PRESERVE_SPACE) =~ /true/i
|
238
|
+
end
|
239
|
+
if @stack.peek?(:css)
|
240
|
+
@css_content << text
|
241
|
+
elsif !@stack.peek?(:remove)
|
242
|
+
@handler.characters(text)
|
243
|
+
end
|
244
|
+
end
|
245
|
+
|
246
|
+
# End an elements, will raise an error on a loose tag
|
247
|
+
def end_element(name)
|
248
|
+
if @stack.peek?(:remove)
|
249
|
+
@stack.pop
|
250
|
+
elsif @stack.peek?(:filter)
|
251
|
+
@stack.pop
|
252
|
+
elsif @stack.peek?(:css)
|
253
|
+
@stack.pop
|
254
|
+
# Do css stuff here
|
255
|
+
# begin
|
256
|
+
# results = @css_scanner.scan_tyle_sheet(@css_content,@policy.max_input)
|
257
|
+
# @handler.errors << results.errors
|
258
|
+
# @handler.errors.flatten!
|
259
|
+
# unless results.clean_html.nil? or results.clean_html.empty?
|
260
|
+
# @handler.start_element(element,css_attributes)
|
261
|
+
# @handler.characters results.clean_html
|
262
|
+
# @handler.end_element(element)
|
263
|
+
# end
|
264
|
+
# rescue Exception => e
|
265
|
+
# @handler.errors << ScanMessage.new(ScanMessage::ERROR_CSS_TAG_MALFORMED,name,@handler.encode_text(@css_content))
|
266
|
+
# ensure
|
267
|
+
# @css_content = nil
|
268
|
+
# @css_attributes = nil
|
269
|
+
else
|
270
|
+
@stack.pop
|
271
|
+
@handler.end_element(name)
|
272
|
+
end
|
273
|
+
end
|
274
|
+
|
275
|
+
# Add cdata a cdata block
|
276
|
+
def cdata_block(text)
|
277
|
+
if @stack.peek?(:css)
|
278
|
+
@css_content << text
|
279
|
+
elsif !@stack.peek?(:remove)
|
280
|
+
@handler.characters(text)
|
281
|
+
else
|
282
|
+
@handler.cdata(@handler.encode_text(text)) unless @stack.peek == :remove
|
283
|
+
end
|
284
|
+
end
|
285
|
+
end
|
286
|
+
end
|