whitewash 2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,8 @@
1
+ require 'rspec'
2
+ $LOAD_PATH.unshift(File.expand_path("../lib", File.dirname(__FILE__)))
3
+ require 'whitewash'
4
+
5
+ class Whitewash
6
+ remove_const :PATH
7
+ PATH = [ File.expand_path("../data/whitewash", File.dirname(__FILE__)) ]
8
+ end
@@ -0,0 +1,99 @@
1
+ require File.expand_path('spec/spec_helper')
2
+
3
+ describe Whitewash do
4
+ it "loads default whitelist" do
5
+ whitelist = Whitewash.default_whitelist
6
+ whitelist.should be_a_kind_of Hash
7
+ whitelist.should include '_css'
8
+ end
9
+
10
+ it "drops <html> and <body> elements" do
11
+ w = Whitewash.new
12
+ input = '<html><head></head><body><p>test</p></body>'
13
+ output = w.sanitize(input)
14
+ output.should == '<p>test</p>'
15
+ end
16
+
17
+ it "understands fragments with multiple root elements" do
18
+ w = Whitewash.new
19
+ input = '<p>foo</p><p>bar</p>'
20
+ output = w.sanitize(input)
21
+ output.should == '<p>foo</p><p>bar</p>'
22
+ end
23
+
24
+ it "removes <script/> element" do
25
+ w = Whitewash.new
26
+ input = '<p>foo <script type="text/javascript" src="test.js">bar</script> buzz</p>'
27
+ output = w.sanitize(input)
28
+ output.should == '<p>foo <![CDATA[bar]]> buzz</p>'
29
+ end
30
+
31
+ it "removes onclick attribute" do
32
+ w = Whitewash.new
33
+ input = '<p>foo <span onlick="test()">bar</span> buzz</p>'
34
+ output = w.sanitize(input)
35
+ output.should == '<p>foo <span>bar</span> buzz</p>'
36
+ end
37
+
38
+ it "removes background CSS property" do
39
+ w = Whitewash.new
40
+ input = '<p>foo <span style="background: url(//test/t.js)">bar</span> buzz</p>'
41
+ output = w.sanitize(input)
42
+ output.should == '<p>foo <span>bar</span> buzz</p>'
43
+ end
44
+
45
+ it "rewrites HTML when supplied with a block" do
46
+ w = Whitewash.new
47
+ input = '<p>foo <img src="in.jpg"/> buzz</p>'
48
+ output = w.sanitize(input) do |xml|
49
+ if xml.name == 'img'
50
+ xml['src'] = 'out.jpg'
51
+ end
52
+ end
53
+ output.should == '<p>foo <img src="out.jpg" /> buzz</p>'
54
+ end
55
+
56
+ it "fixes up invalid markup" do
57
+ w = Whitewash.new
58
+ input = '<p>foo <strong><em>bar</strong></em> buzz</p>'
59
+ output = w.sanitize(input)
60
+ output.should == '<p>foo <strong><em>bar</em></strong> buzz</p>'
61
+ end
62
+
63
+ # http://ha.ckers.org/xss.html
64
+
65
+ it "catches javascript: in img/src" do
66
+ w = Whitewash.new
67
+ input = %q{<IMG SRC=JaVaScRiPt:alert('XSS')>}
68
+ output = w.sanitize(input)
69
+ output.should == %q{<img />}
70
+ end
71
+
72
+ it "handles strings with null in the middle" do
73
+ w = Whitewash.new
74
+ input = %q{<IMG SRC=java\0script:alert("XSS")>}
75
+ output = w.sanitize(input)
76
+ output.should == %q{<img />}
77
+ end
78
+
79
+ it "handles extra open brackets" do
80
+ w = Whitewash.new
81
+ input = %q{<<SCRIPT>alert("XSS");//<</SCRIPT>}
82
+ output = w.sanitize(input)
83
+ output.should == '<p>alert("XSS");//</p>'
84
+ end
85
+
86
+ it "removes remote stylesheet link" do
87
+ w = Whitewash.new
88
+ input = %q{<P><STYLE>@import'http://ha.ckers.org/xss.css';</STYLE></P>}
89
+ output = w.sanitize(input)
90
+ output.should == '<p></p>'
91
+ end
92
+
93
+ it "removes XML data island with CDATA obfuscation" do
94
+ w = Whitewash.new
95
+ input = %{<XML ID=I><X><C><![CDATA[<IMG SRC="javas]]><![CDATA[cript:alert('XSS');">]]> </C></X></xml><SPAN DATASRC=#I DATAFLD=C DATAFORMATAS=HTML></SPAN>}
96
+ output = w.sanitize(input)
97
+ output.should == ']]&gt; <span></span>'
98
+ end
99
+ end
@@ -0,0 +1,18 @@
1
+ Gem::Specification.new do |spec|
2
+ spec.name = 'whitewash'
3
+ spec.version = '2.0'
4
+ spec.author = 'Dmitry Borodaenko'
5
+ spec.email = 'angdraug@debian.org'
6
+ spec.homepage = 'https://github.com/angdraug/whitewash'
7
+ spec.summary = 'Whitelist-based HTML filter for Ruby'
8
+ spec.description = <<-EOF
9
+ This module allows Ruby programs to clean up any HTML document or
10
+ fragment coming from an untrusted source and to remove all dangerous
11
+ constructs that could be used for cross-site scripting or request
12
+ forgery.
13
+ EOF
14
+ spec.files = `git ls-files`.split "\n"
15
+ spec.license = 'GPL3+'
16
+ # spec.add_dependency('nokogiri')
17
+ # spec.add_development_dependency('rspec')
18
+ end
metadata ADDED
@@ -0,0 +1,78 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: whitewash
3
+ version: !ruby/object:Gem::Version
4
+ hash: 3
5
+ prerelease:
6
+ segments:
7
+ - 2
8
+ - 0
9
+ version: "2.0"
10
+ platform: ruby
11
+ authors:
12
+ - Dmitry Borodaenko
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2012-02-05 00:00:00 Z
18
+ dependencies: []
19
+
20
+ description: |
21
+ This module allows Ruby programs to clean up any HTML document or
22
+ fragment coming from an untrusted source and to remove all dangerous
23
+ constructs that could be used for cross-site scripting or request
24
+ forgery.
25
+
26
+ email: angdraug@debian.org
27
+ executables: []
28
+
29
+ extensions: []
30
+
31
+ extra_rdoc_files: []
32
+
33
+ files:
34
+ - COPYING
35
+ - ChangeLog.mtn
36
+ - README.rdoc
37
+ - data/whitewash/html5_whitelist.yaml
38
+ - data/whitewash/whitelist.yaml
39
+ - lib/whitewash.rb
40
+ - setup.rb
41
+ - spec/spec_helper.rb
42
+ - spec/whitewash_spec.rb
43
+ - whitewash.gemspec
44
+ homepage: https://github.com/angdraug/whitewash
45
+ licenses:
46
+ - GPL3+
47
+ post_install_message:
48
+ rdoc_options: []
49
+
50
+ require_paths:
51
+ - lib
52
+ required_ruby_version: !ruby/object:Gem::Requirement
53
+ none: false
54
+ requirements:
55
+ - - ">="
56
+ - !ruby/object:Gem::Version
57
+ hash: 3
58
+ segments:
59
+ - 0
60
+ version: "0"
61
+ required_rubygems_version: !ruby/object:Gem::Requirement
62
+ none: false
63
+ requirements:
64
+ - - ">="
65
+ - !ruby/object:Gem::Version
66
+ hash: 3
67
+ segments:
68
+ - 0
69
+ version: "0"
70
+ requirements: []
71
+
72
+ rubyforge_project:
73
+ rubygems_version: 1.8.15
74
+ signing_key:
75
+ specification_version: 3
76
+ summary: Whitelist-based HTML filter for Ruby
77
+ test_files: []
78
+