sanitize-url 0.1.3 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -5
- data/.gitignore +21 -21
- data/LICENSE +20 -20
- data/Rakefile +53 -53
- data/VERSION +1 -1
- data/lib/sanitize-url.rb +4 -3
- data/sanitize-url.gemspec +57 -55
- data/spec/char_codes_spec.rb +32 -0
- data/spec/sanitize_url_spec.rb +169 -169
- data/spec/spec_helper.rb +6 -6
- data/test.rb +4 -16
- metadata +22 -9
data/.document
CHANGED
@@ -1,5 +1,5 @@
|
|
1
|
-
README.rdoc
|
2
|
-
lib/**/*.rb
|
3
|
-
bin/*
|
4
|
-
features/**/*.feature
|
5
|
-
LICENSE
|
1
|
+
README.rdoc
|
2
|
+
lib/**/*.rb
|
3
|
+
bin/*
|
4
|
+
features/**/*.feature
|
5
|
+
LICENSE
|
data/.gitignore
CHANGED
@@ -1,21 +1,21 @@
|
|
1
|
-
## MAC OS
|
2
|
-
.DS_Store
|
3
|
-
|
4
|
-
## TEXTMATE
|
5
|
-
*.tmproj
|
6
|
-
tmtags
|
7
|
-
|
8
|
-
## EMACS
|
9
|
-
*~
|
10
|
-
\#*
|
11
|
-
.\#*
|
12
|
-
|
13
|
-
## VIM
|
14
|
-
*.swp
|
15
|
-
|
16
|
-
## PROJECT::GENERAL
|
17
|
-
coverage
|
18
|
-
rdoc
|
19
|
-
pkg
|
20
|
-
|
21
|
-
## PROJECT::SPECIFIC
|
1
|
+
## MAC OS
|
2
|
+
.DS_Store
|
3
|
+
|
4
|
+
## TEXTMATE
|
5
|
+
*.tmproj
|
6
|
+
tmtags
|
7
|
+
|
8
|
+
## EMACS
|
9
|
+
*~
|
10
|
+
\#*
|
11
|
+
.\#*
|
12
|
+
|
13
|
+
## VIM
|
14
|
+
*.swp
|
15
|
+
|
16
|
+
## PROJECT::GENERAL
|
17
|
+
coverage
|
18
|
+
rdoc
|
19
|
+
pkg
|
20
|
+
|
21
|
+
## PROJECT::SPECIFIC
|
data/LICENSE
CHANGED
@@ -1,20 +1,20 @@
|
|
1
|
-
Copyright (c) 2009 Jarrett Colby
|
2
|
-
|
3
|
-
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
-
a copy of this software and associated documentation files (the
|
5
|
-
"Software"), to deal in the Software without restriction, including
|
6
|
-
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
-
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
-
permit persons to whom the Software is furnished to do so, subject to
|
9
|
-
the following conditions:
|
10
|
-
|
11
|
-
The above copyright notice and this permission notice shall be
|
12
|
-
included in all copies or substantial portions of the Software.
|
13
|
-
|
14
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
-
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
-
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
-
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
-
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
-
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
-
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
1
|
+
Copyright (c) 2009 Jarrett Colby
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/Rakefile
CHANGED
@@ -1,53 +1,53 @@
|
|
1
|
-
require 'rubygems'
|
2
|
-
require 'rake'
|
3
|
-
|
4
|
-
begin
|
5
|
-
require 'jeweler'
|
6
|
-
Jeweler::Tasks.new do |gem|
|
7
|
-
gem.name = "sanitize-url"
|
8
|
-
gem.summary = %Q{Sanitizes untrusted URLs}
|
9
|
-
gem.description = %Q{This gem provides a module called SanitizeUrl, which you can mix-in anywhere you like. It provides a single method: sanitize_url, which accepts a URL and returns one with JavaScript removed. It also prepends the http:// scheme if no valid scheme is found.}
|
10
|
-
gem.email = "jarrett@uchicago.edu"
|
11
|
-
gem.homepage = "http://github.com/jarrett/sanitize-url"
|
12
|
-
gem.authors = ["jarrett"]
|
13
|
-
gem.add_development_dependency "rspec", ">= 1.3.0"
|
14
|
-
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
15
|
-
end
|
16
|
-
Jeweler::GemcutterTasks.new
|
17
|
-
rescue LoadError
|
18
|
-
puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
|
19
|
-
end
|
20
|
-
|
21
|
-
require 'rake/testtask'
|
22
|
-
Rake::TestTask.new(:test) do |test|
|
23
|
-
test.libs << 'lib' << 'test'
|
24
|
-
test.pattern = 'test/**/test_*.rb'
|
25
|
-
test.verbose = true
|
26
|
-
end
|
27
|
-
|
28
|
-
begin
|
29
|
-
require 'rcov/rcovtask'
|
30
|
-
Rcov::RcovTask.new do |test|
|
31
|
-
test.libs << 'test'
|
32
|
-
test.pattern = 'test/**/test_*.rb'
|
33
|
-
test.verbose = true
|
34
|
-
end
|
35
|
-
rescue LoadError
|
36
|
-
task :rcov do
|
37
|
-
abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
|
38
|
-
end
|
39
|
-
end
|
40
|
-
|
41
|
-
task :test => :check_dependencies
|
42
|
-
|
43
|
-
task :default => :test
|
44
|
-
|
45
|
-
require 'rake/rdoctask'
|
46
|
-
Rake::RDocTask.new do |rdoc|
|
47
|
-
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
48
|
-
|
49
|
-
rdoc.rdoc_dir = 'rdoc'
|
50
|
-
rdoc.title = "sanitize-url #{version}"
|
51
|
-
rdoc.rdoc_files.include('README*')
|
52
|
-
rdoc.rdoc_files.include('lib/**/*.rb')
|
53
|
-
end
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'jeweler'
|
6
|
+
Jeweler::Tasks.new do |gem|
|
7
|
+
gem.name = "sanitize-url"
|
8
|
+
gem.summary = %Q{Sanitizes untrusted URLs}
|
9
|
+
gem.description = %Q{This gem provides a module called SanitizeUrl, which you can mix-in anywhere you like. It provides a single method: sanitize_url, which accepts a URL and returns one with JavaScript removed. It also prepends the http:// scheme if no valid scheme is found.}
|
10
|
+
gem.email = "jarrett@uchicago.edu"
|
11
|
+
gem.homepage = "http://github.com/jarrett/sanitize-url"
|
12
|
+
gem.authors = ["jarrett"]
|
13
|
+
gem.add_development_dependency "rspec", ">= 1.3.0"
|
14
|
+
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
15
|
+
end
|
16
|
+
Jeweler::GemcutterTasks.new
|
17
|
+
rescue LoadError
|
18
|
+
puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
|
19
|
+
end
|
20
|
+
|
21
|
+
require 'rake/testtask'
|
22
|
+
Rake::TestTask.new(:test) do |test|
|
23
|
+
test.libs << 'lib' << 'test'
|
24
|
+
test.pattern = 'test/**/test_*.rb'
|
25
|
+
test.verbose = true
|
26
|
+
end
|
27
|
+
|
28
|
+
begin
|
29
|
+
require 'rcov/rcovtask'
|
30
|
+
Rcov::RcovTask.new do |test|
|
31
|
+
test.libs << 'test'
|
32
|
+
test.pattern = 'test/**/test_*.rb'
|
33
|
+
test.verbose = true
|
34
|
+
end
|
35
|
+
rescue LoadError
|
36
|
+
task :rcov do
|
37
|
+
abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
task :test => :check_dependencies
|
42
|
+
|
43
|
+
task :default => :test
|
44
|
+
|
45
|
+
require 'rake/rdoctask'
|
46
|
+
Rake::RDocTask.new do |rdoc|
|
47
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
48
|
+
|
49
|
+
rdoc.rdoc_dir = 'rdoc'
|
50
|
+
rdoc.title = "sanitize-url #{version}"
|
51
|
+
rdoc.rdoc_files.include('README*')
|
52
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
53
|
+
end
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.1.
|
1
|
+
0.1.4
|
data/lib/sanitize-url.rb
CHANGED
@@ -4,11 +4,11 @@ module SanitizeUrl
|
|
4
4
|
ALPHANUMERIC_CHAR_CODES = (48..57).to_a + (65..90).to_a + (97..122).to_a
|
5
5
|
|
6
6
|
VALID_OPAQUE_SPECIAL_CHARS = ['!', '*', "'", '(', ')', ';', ':', '@', '&', '=', '+', '$', ',', '/', '?', '%', '#', '[', ']', '-', '_', '.', '~']
|
7
|
-
VALID_OPAQUE_SPECIAL_CHAR_CODES = VALID_OPAQUE_SPECIAL_CHARS.collect { |c| c[0] }
|
7
|
+
VALID_OPAQUE_SPECIAL_CHAR_CODES = VALID_OPAQUE_SPECIAL_CHARS.collect { |c| c[0].is_a?(String) ? c.ord : c[0] }
|
8
8
|
VALID_OPAQUE_CHAR_CODES = ALPHANUMERIC_CHAR_CODES + VALID_OPAQUE_SPECIAL_CHAR_CODES
|
9
9
|
|
10
10
|
VALID_SCHEME_SPECIAL_CHARS = ['+', '.', '-']
|
11
|
-
VALID_SCHEME_SPECIAL_CHAR_CODES = VALID_SCHEME_SPECIAL_CHARS.collect { |c| c[0] }
|
11
|
+
VALID_SCHEME_SPECIAL_CHAR_CODES = VALID_SCHEME_SPECIAL_CHARS.collect { |c| c[0].is_a?(String) ? c.ord : c[0] }
|
12
12
|
VALID_SCHEME_CHAR_CODES = ALPHANUMERIC_CHAR_CODES + VALID_SCHEME_SPECIAL_CHAR_CODES
|
13
13
|
|
14
14
|
HTTP_STYLE_SCHEMES = ['http', 'https', 'ftp', 'ftps', 'svn', 'svn+ssh', 'git'] # Common schemes whose format should be "scheme://" instead of "scheme:"
|
@@ -94,7 +94,8 @@ module SanitizeUrl
|
|
94
94
|
def self.char_or_url_encoded(code) #:nodoc:
|
95
95
|
if url_encode?(code)
|
96
96
|
utf_8_str = ([code.to_i].pack('U'))
|
97
|
-
|
97
|
+
length = utf_8_str.respond_to?(:bytes) ? utf_8_str.bytes.to_a.length : utf_8_str.length
|
98
|
+
'%' + utf_8_str.unpack('H2' * length).join('%').upcase
|
98
99
|
else
|
99
100
|
code.chr
|
100
101
|
end
|
data/sanitize-url.gemspec
CHANGED
@@ -1,55 +1,57 @@
|
|
1
|
-
# Generated by jeweler
|
2
|
-
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
-
# Instead, edit Jeweler::Tasks in
|
4
|
-
# -*- encoding: utf-8 -*-
|
5
|
-
|
6
|
-
Gem::Specification.new do |s|
|
7
|
-
s.name = %q{sanitize-url}
|
8
|
-
s.version = "0.1.
|
9
|
-
|
10
|
-
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
-
s.authors = ["jarrett"]
|
12
|
-
s.date = %q{2010-
|
13
|
-
s.description = %q{This gem provides a module called SanitizeUrl, which you can mix-in anywhere you like. It provides a single method: sanitize_url, which accepts a URL and returns one with JavaScript removed. It also prepends the http:// scheme if no valid scheme is found.}
|
14
|
-
s.email = %q{jarrett@uchicago.edu}
|
15
|
-
s.extra_rdoc_files = [
|
16
|
-
"LICENSE",
|
17
|
-
"README.markdown"
|
18
|
-
]
|
19
|
-
s.files = [
|
20
|
-
".document",
|
21
|
-
".gitignore",
|
22
|
-
"LICENSE",
|
23
|
-
"README.markdown",
|
24
|
-
"Rakefile",
|
25
|
-
"VERSION",
|
26
|
-
"lib/sanitize-url.rb",
|
27
|
-
"sanitize-url.gemspec",
|
28
|
-
"spec/
|
29
|
-
"spec/
|
30
|
-
"
|
31
|
-
|
32
|
-
|
33
|
-
s.
|
34
|
-
s.
|
35
|
-
s.
|
36
|
-
s.
|
37
|
-
s.
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
s.
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = %q{sanitize-url}
|
8
|
+
s.version = "0.1.4"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["jarrett"]
|
12
|
+
s.date = %q{2010-03-21}
|
13
|
+
s.description = %q{This gem provides a module called SanitizeUrl, which you can mix-in anywhere you like. It provides a single method: sanitize_url, which accepts a URL and returns one with JavaScript removed. It also prepends the http:// scheme if no valid scheme is found.}
|
14
|
+
s.email = %q{jarrett@uchicago.edu}
|
15
|
+
s.extra_rdoc_files = [
|
16
|
+
"LICENSE",
|
17
|
+
"README.markdown"
|
18
|
+
]
|
19
|
+
s.files = [
|
20
|
+
".document",
|
21
|
+
".gitignore",
|
22
|
+
"LICENSE",
|
23
|
+
"README.markdown",
|
24
|
+
"Rakefile",
|
25
|
+
"VERSION",
|
26
|
+
"lib/sanitize-url.rb",
|
27
|
+
"sanitize-url.gemspec",
|
28
|
+
"spec/char_codes_spec.rb",
|
29
|
+
"spec/sanitize_url_spec.rb",
|
30
|
+
"spec/spec_helper.rb",
|
31
|
+
"test.rb"
|
32
|
+
]
|
33
|
+
s.homepage = %q{http://github.com/jarrett/sanitize-url}
|
34
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
35
|
+
s.require_paths = ["lib"]
|
36
|
+
s.rubygems_version = %q{1.3.6}
|
37
|
+
s.summary = %q{Sanitizes untrusted URLs}
|
38
|
+
s.test_files = [
|
39
|
+
"spec/char_codes_spec.rb",
|
40
|
+
"spec/sanitize_url_spec.rb",
|
41
|
+
"spec/spec_helper.rb"
|
42
|
+
]
|
43
|
+
|
44
|
+
if s.respond_to? :specification_version then
|
45
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
46
|
+
s.specification_version = 3
|
47
|
+
|
48
|
+
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
49
|
+
s.add_development_dependency(%q<rspec>, [">= 1.3.0"])
|
50
|
+
else
|
51
|
+
s.add_dependency(%q<rspec>, [">= 1.3.0"])
|
52
|
+
end
|
53
|
+
else
|
54
|
+
s.add_dependency(%q<rspec>, [">= 1.3.0"])
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
|
+
|
3
|
+
describe 'Char codes' do
|
4
|
+
it 'counts a number as being in the range 48-57' do
|
5
|
+
(0..9).each do |num|
|
6
|
+
c = num.to_s
|
7
|
+
code = c[0].is_a?(String) ? c.ord : c[0]
|
8
|
+
code.should == 48 + num
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
it 'counts an uppercase letter as being in the range 65-90' do
|
13
|
+
('A'..'Z').each_with_index do |c, offset|
|
14
|
+
code = c[0].is_a?(String) ? c.ord : c[0]
|
15
|
+
code.should == 65 + offset
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
it 'counts a lowercase letter as being in the range 97-122' do
|
20
|
+
('a'..'z').each_with_index do |c, offset|
|
21
|
+
code = c[0].is_a?(String) ? c.ord : c[0]
|
22
|
+
code.should == 97 + offset
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
['!', '*', "'", '(', ')', ';', ':', '@', '&', '=', '+', '$', ',', '/', '?', '%', '#', '[', ']', '-', '_', '.', '~'].each do |c|
|
27
|
+
it "counts #{c} as included in VALID_OPAQUE_CHAR_CODES" do
|
28
|
+
code = c[0].is_a?(String) ? c.ord : c[0]
|
29
|
+
SanitizeUrl::VALID_OPAQUE_CHAR_CODES.should include(code)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
data/spec/sanitize_url_spec.rb
CHANGED
@@ -1,169 +1,169 @@
|
|
1
|
-
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
|
-
|
3
|
-
describe SanitizeUrl do
|
4
|
-
include SanitizeUrl
|
5
|
-
|
6
|
-
describe '#sanitize_url' do
|
7
|
-
it 'replaces JavaScript URLs with options[:replace_evil_with]' do
|
8
|
-
urls = [
|
9
|
-
'javascript:alert("1");',
|
10
|
-
'javascript//:alert("2");',
|
11
|
-
'javascript://alert("3");',
|
12
|
-
'javascript/:/alert("4");',
|
13
|
-
'j a v a script:alert("5");',
|
14
|
-
' javascript:alert("6");',
|
15
|
-
'JavaScript:alert("7");',
|
16
|
-
"java\nscript:alert(\"8\");",
|
17
|
-
"java\rscript:alert(\"9\");"
|
18
|
-
].each do |evil_url|
|
19
|
-
sanitize_url(evil_url, :replace_evil_with => 'replaced').should == 'replaced'
|
20
|
-
end
|
21
|
-
end
|
22
|
-
|
23
|
-
it 'replaces data: URLs with options[:replace_evil_with]' do
|
24
|
-
urls = [
|
25
|
-
'data:text/html;base64,PHNjcmlwdD5hbGVydCgnMScpPC9zY3JpcHQ+',
|
26
|
-
'data://text/html;base64,PHNjcmlwdD5hbGVydCgnMicpPC9zY3JpcHQ+',
|
27
|
-
'data//:text/html;base64,PHNjcmlwdD5hbGVydCgnMycpPC9zY3JpcHQ+',
|
28
|
-
'data/:/text/html;base64,PHNjcmlwdD5hbGVydCgnNCcpPC9zY3JpcHQ+',
|
29
|
-
' data:text/html;base64,PHNjcmlwdD5hbGVydCgnNScpPC9zY3JpcHQ+',
|
30
|
-
'da ta:text/html;base64,PHNjcmlwdD5hbGVydCgnNicpPC9zY3JpcHQ+',
|
31
|
-
'Data:text/html;base64,PHNjcmlwdD5hbGVydCgnNycpPC9zY3JpcHQ+',
|
32
|
-
"da\nta:text/html;base64,PHNjcmlwdD5hbGVydCgnOCcpPC9zY3JpcHQ+",
|
33
|
-
"da\rta:text/html;base64,PHNjcmlwdD5hbGVydCgnOScpPC9zY3JpcHQ+",
|
34
|
-
].each do |evil_url|
|
35
|
-
sanitize_url(evil_url, :replace_evil_with => 'replaced').should == 'replaced'
|
36
|
-
end
|
37
|
-
end
|
38
|
-
|
39
|
-
context 'with :schemes whitelist' do
|
40
|
-
it 'kills anything not on the list' do
|
41
|
-
[
|
42
|
-
'https://example.com',
|
43
|
-
'https:example.com',
|
44
|
-
'ftp://example.com',
|
45
|
-
'ftp:example.com',
|
46
|
-
'data://example.com',
|
47
|
-
'data:example.com',
|
48
|
-
'javascript://example.com',
|
49
|
-
'javascript:example.com',
|
50
|
-
].each do |evil_url|
|
51
|
-
sanitize_url(evil_url, :schemes => ['http'], :replace_evil_with => 'replaced')
|
52
|
-
end
|
53
|
-
end
|
54
|
-
|
55
|
-
it 'allows anything on the list' do
|
56
|
-
[
|
57
|
-
'http://example.com',
|
58
|
-
'https://example.com'
|
59
|
-
].each do |good_url|
|
60
|
-
sanitize_url(good_url, :schemes => ['http', 'https']).should == good_url
|
61
|
-
end
|
62
|
-
end
|
63
|
-
|
64
|
-
it 'works with schemes given as symbols' do
|
65
|
-
sanitize_url('ftp://example.com', :schemes => [:http, :https], :replace_evil_with => 'replaced').should == 'replaced'
|
66
|
-
sanitize_url('ftp://example.com', :schemes => [:http, :https, :ftp]).should == 'ftp://example.com'
|
67
|
-
end
|
68
|
-
end
|
69
|
-
|
70
|
-
it 'prepends http:// if no scheme is given' do
|
71
|
-
sanitize_url('www.example.com').should == 'http://www.example.com'
|
72
|
-
end
|
73
|
-
|
74
|
-
it 'replaces evil URLs that are encoded with Unicode numerical character references' do
|
75
|
-
[
|
76
|
-
'javascript:alert('1')',
|
77
|
-
'javascript:alert('2')'
|
78
|
-
].each do |evil_url|
|
79
|
-
sanitize_url(evil_url, :replace_evil_with => 'replaced').should == 'replaced'
|
80
|
-
end
|
81
|
-
end
|
82
|
-
|
83
|
-
it 'replaces evil URLs that are URL-encoded (hex with %)' do
|
84
|
-
sanitize_url('%6A%61%76%61%73%63%72%69%70%74%3A%61%6C%65%72%74%28%22%58%53%53%22%29', :replace_evil_with => 'replaced').should == 'replaced'
|
85
|
-
end
|
86
|
-
|
87
|
-
it 'does not try to fix broken schemes after the start of the string' do
|
88
|
-
sanitize_url('http://example.com/http/foo').should == 'http://example.com/http/foo'
|
89
|
-
end
|
90
|
-
|
91
|
-
it 'does not prepend an extra http:// if a valid scheme is given' do
|
92
|
-
sanitize_url('http://www.example.com').should == 'http://www.example.com'
|
93
|
-
sanitize_url('https://www.example.com').should == 'https://www.example.com'
|
94
|
-
sanitize_url('ftp://www.example.com').should == 'ftp://www.example.com'
|
95
|
-
end
|
96
|
-
|
97
|
-
it 'dereferences URL-encoded characters in the scheme' do
|
98
|
-
sanitize_url('h%74tp://example.com').should == 'http://example.com'
|
99
|
-
end
|
100
|
-
|
101
|
-
it 'dereferences decimal numeric character references in the scheme' do
|
102
|
-
sanitize_url('http://example.com').should == 'http://example.com'
|
103
|
-
end
|
104
|
-
|
105
|
-
it 'dereferences hex numeric character references in the scheme' do
|
106
|
-
sanitize_url('http://example.com').should == 'http://example.com'
|
107
|
-
end
|
108
|
-
|
109
|
-
it 'retains URL-encoded characters in the opaque portion' do
|
110
|
-
sanitize_url('http://someone%40gmail.com:password@example.com').should == 'http://someone%40gmail.com:password@example.com'
|
111
|
-
end
|
112
|
-
|
113
|
-
it 'URL-encodes code points outside ASCII' do
|
114
|
-
# Percent-encoding should be in UTF-8 (RFC 3986).
|
115
|
-
# http://en.wikipedia.org/wiki/Percent-encoding#Current_standard
|
116
|
-
sanitize_url('http://Д').should == 'http://%D0%94'
|
117
|
-
sanitize_url('http://Д').should == 'http://%D0%94'
|
118
|
-
sanitize_url("http://\xD0\x94").should == 'http://%D0%94' # UTF-8 version of the same.
|
119
|
-
end
|
120
|
-
|
121
|
-
it 'replaces URLs without the opaque portion' do
|
122
|
-
sanitize_url('http://', :replace_evil_with => 'replaced').should == 'replaced'
|
123
|
-
sanitize_url('mailto:', :replace_evil_with => 'replaced').should == 'replaced'
|
124
|
-
end
|
125
|
-
|
126
|
-
it 'adds the two slashes for known schemes that require it' do
|
127
|
-
sanitize_url('http:example.com').should == 'http://example.com'
|
128
|
-
sanitize_url('ftp:example.com').should == 'ftp://example.com'
|
129
|
-
sanitize_url('svn+ssh:example.com').should == 'svn+ssh://example.com'
|
130
|
-
end
|
131
|
-
|
132
|
-
it 'does not add slashes for schemes that do not require it' do
|
133
|
-
sanitize_url('mailto:someone@example.com').should == 'mailto:someone@example.com'
|
134
|
-
end
|
135
|
-
|
136
|
-
it 'strips invalid characters from the scheme and then evaluates the scheme according to the normal rules' do
|
137
|
-
sanitize_url("ht\xD0\x94tp://example.com").should == 'http://example.com'
|
138
|
-
sanitize_url('htt$p://example.com').should == 'http://example.com'
|
139
|
-
sanitize_url('j%avascript:alert("XSS")', :replace_evil_with => 'replaced').should == 'replaced'
|
140
|
-
end
|
141
|
-
end
|
142
|
-
|
143
|
-
|
144
|
-
describe '.dereference_numerics' do
|
145
|
-
it 'decodes short-form decimal UTF-8 character references with a semicolon' do
|
146
|
-
SanitizeUrl.dereference_numerics('j').should == 'j'
|
147
|
-
end
|
148
|
-
|
149
|
-
it 'decodes short-form decimal UTF-8 character references without a semicolon' do
|
150
|
-
SanitizeUrl.dereference_numerics('j').should == 'j'
|
151
|
-
end
|
152
|
-
|
153
|
-
it 'decodes long-form decimal UTF-8 character references with a semicolon' do
|
154
|
-
SanitizeUrl.dereference_numerics('j').should == 'j'
|
155
|
-
end
|
156
|
-
|
157
|
-
it 'decodes long-form decimal UTF-8 character references without a semicolon' do
|
158
|
-
SanitizeUrl.dereference_numerics('j').should == 'j'
|
159
|
-
end
|
160
|
-
|
161
|
-
it 'decodes hex UTF-8 character references with a semicolon' do
|
162
|
-
SanitizeUrl.dereference_numerics('j').should == 'j'
|
163
|
-
end
|
164
|
-
|
165
|
-
it 'decodes hex UTF-8 character references without a semicolon' do
|
166
|
-
SanitizeUrl.dereference_numerics('j').should == 'j'
|
167
|
-
end
|
168
|
-
end
|
169
|
-
end
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
|
+
|
3
|
+
describe SanitizeUrl do
|
4
|
+
include SanitizeUrl
|
5
|
+
|
6
|
+
describe '#sanitize_url' do
|
7
|
+
it 'replaces JavaScript URLs with options[:replace_evil_with]' do
|
8
|
+
urls = [
|
9
|
+
'javascript:alert("1");',
|
10
|
+
'javascript//:alert("2");',
|
11
|
+
'javascript://alert("3");',
|
12
|
+
'javascript/:/alert("4");',
|
13
|
+
'j a v a script:alert("5");',
|
14
|
+
' javascript:alert("6");',
|
15
|
+
'JavaScript:alert("7");',
|
16
|
+
"java\nscript:alert(\"8\");",
|
17
|
+
"java\rscript:alert(\"9\");"
|
18
|
+
].each do |evil_url|
|
19
|
+
sanitize_url(evil_url, :replace_evil_with => 'replaced').should == 'replaced'
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
it 'replaces data: URLs with options[:replace_evil_with]' do
|
24
|
+
urls = [
|
25
|
+
'data:text/html;base64,PHNjcmlwdD5hbGVydCgnMScpPC9zY3JpcHQ+',
|
26
|
+
'data://text/html;base64,PHNjcmlwdD5hbGVydCgnMicpPC9zY3JpcHQ+',
|
27
|
+
'data//:text/html;base64,PHNjcmlwdD5hbGVydCgnMycpPC9zY3JpcHQ+',
|
28
|
+
'data/:/text/html;base64,PHNjcmlwdD5hbGVydCgnNCcpPC9zY3JpcHQ+',
|
29
|
+
' data:text/html;base64,PHNjcmlwdD5hbGVydCgnNScpPC9zY3JpcHQ+',
|
30
|
+
'da ta:text/html;base64,PHNjcmlwdD5hbGVydCgnNicpPC9zY3JpcHQ+',
|
31
|
+
'Data:text/html;base64,PHNjcmlwdD5hbGVydCgnNycpPC9zY3JpcHQ+',
|
32
|
+
"da\nta:text/html;base64,PHNjcmlwdD5hbGVydCgnOCcpPC9zY3JpcHQ+",
|
33
|
+
"da\rta:text/html;base64,PHNjcmlwdD5hbGVydCgnOScpPC9zY3JpcHQ+",
|
34
|
+
].each do |evil_url|
|
35
|
+
sanitize_url(evil_url, :replace_evil_with => 'replaced').should == 'replaced'
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
context 'with :schemes whitelist' do
|
40
|
+
it 'kills anything not on the list' do
|
41
|
+
[
|
42
|
+
'https://example.com',
|
43
|
+
'https:example.com',
|
44
|
+
'ftp://example.com',
|
45
|
+
'ftp:example.com',
|
46
|
+
'data://example.com',
|
47
|
+
'data:example.com',
|
48
|
+
'javascript://example.com',
|
49
|
+
'javascript:example.com',
|
50
|
+
].each do |evil_url|
|
51
|
+
sanitize_url(evil_url, :schemes => ['http'], :replace_evil_with => 'replaced')
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
it 'allows anything on the list' do
|
56
|
+
[
|
57
|
+
'http://example.com',
|
58
|
+
'https://example.com'
|
59
|
+
].each do |good_url|
|
60
|
+
sanitize_url(good_url, :schemes => ['http', 'https']).should == good_url
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
it 'works with schemes given as symbols' do
|
65
|
+
sanitize_url('ftp://example.com', :schemes => [:http, :https], :replace_evil_with => 'replaced').should == 'replaced'
|
66
|
+
sanitize_url('ftp://example.com', :schemes => [:http, :https, :ftp]).should == 'ftp://example.com'
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
it 'prepends http:// if no scheme is given' do
|
71
|
+
sanitize_url('www.example.com').should == 'http://www.example.com'
|
72
|
+
end
|
73
|
+
|
74
|
+
it 'replaces evil URLs that are encoded with Unicode numerical character references' do
|
75
|
+
[
|
76
|
+
'javascript:alert('1')',
|
77
|
+
'javascript:alert('2')'
|
78
|
+
].each do |evil_url|
|
79
|
+
sanitize_url(evil_url, :replace_evil_with => 'replaced').should == 'replaced'
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
it 'replaces evil URLs that are URL-encoded (hex with %)' do
|
84
|
+
sanitize_url('%6A%61%76%61%73%63%72%69%70%74%3A%61%6C%65%72%74%28%22%58%53%53%22%29', :replace_evil_with => 'replaced').should == 'replaced'
|
85
|
+
end
|
86
|
+
|
87
|
+
it 'does not try to fix broken schemes after the start of the string' do
|
88
|
+
sanitize_url('http://example.com/http/foo').should == 'http://example.com/http/foo'
|
89
|
+
end
|
90
|
+
|
91
|
+
it 'does not prepend an extra http:// if a valid scheme is given' do
|
92
|
+
sanitize_url('http://www.example.com').should == 'http://www.example.com'
|
93
|
+
sanitize_url('https://www.example.com').should == 'https://www.example.com'
|
94
|
+
sanitize_url('ftp://www.example.com').should == 'ftp://www.example.com'
|
95
|
+
end
|
96
|
+
|
97
|
+
it 'dereferences URL-encoded characters in the scheme' do
|
98
|
+
sanitize_url('h%74tp://example.com').should == 'http://example.com'
|
99
|
+
end
|
100
|
+
|
101
|
+
it 'dereferences decimal numeric character references in the scheme' do
|
102
|
+
sanitize_url('http://example.com').should == 'http://example.com'
|
103
|
+
end
|
104
|
+
|
105
|
+
it 'dereferences hex numeric character references in the scheme' do
|
106
|
+
sanitize_url('http://example.com').should == 'http://example.com'
|
107
|
+
end
|
108
|
+
|
109
|
+
it 'retains URL-encoded characters in the opaque portion' do
|
110
|
+
sanitize_url('http://someone%40gmail.com:password@example.com').should == 'http://someone%40gmail.com:password@example.com'
|
111
|
+
end
|
112
|
+
|
113
|
+
it 'URL-encodes code points outside ASCII' do
|
114
|
+
# Percent-encoding should be in UTF-8 (RFC 3986).
|
115
|
+
# http://en.wikipedia.org/wiki/Percent-encoding#Current_standard
|
116
|
+
sanitize_url('http://Д').should == 'http://%D0%94'
|
117
|
+
sanitize_url('http://Д').should == 'http://%D0%94'
|
118
|
+
sanitize_url("http://\xD0\x94").should == 'http://%D0%94' # UTF-8 version of the same.
|
119
|
+
end
|
120
|
+
|
121
|
+
it 'replaces URLs without the opaque portion' do
|
122
|
+
sanitize_url('http://', :replace_evil_with => 'replaced').should == 'replaced'
|
123
|
+
sanitize_url('mailto:', :replace_evil_with => 'replaced').should == 'replaced'
|
124
|
+
end
|
125
|
+
|
126
|
+
it 'adds the two slashes for known schemes that require it' do
|
127
|
+
sanitize_url('http:example.com').should == 'http://example.com'
|
128
|
+
sanitize_url('ftp:example.com').should == 'ftp://example.com'
|
129
|
+
sanitize_url('svn+ssh:example.com').should == 'svn+ssh://example.com'
|
130
|
+
end
|
131
|
+
|
132
|
+
it 'does not add slashes for schemes that do not require it' do
|
133
|
+
sanitize_url('mailto:someone@example.com').should == 'mailto:someone@example.com'
|
134
|
+
end
|
135
|
+
|
136
|
+
it 'strips invalid characters from the scheme and then evaluates the scheme according to the normal rules' do
|
137
|
+
sanitize_url("ht\xD0\x94tp://example.com").should == 'http://example.com'
|
138
|
+
sanitize_url('htt$p://example.com').should == 'http://example.com'
|
139
|
+
sanitize_url('j%avascript:alert("XSS")', :replace_evil_with => 'replaced').should == 'replaced'
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
|
144
|
+
describe '.dereference_numerics' do
|
145
|
+
it 'decodes short-form decimal UTF-8 character references with a semicolon' do
|
146
|
+
SanitizeUrl.dereference_numerics('j').should == 'j'
|
147
|
+
end
|
148
|
+
|
149
|
+
it 'decodes short-form decimal UTF-8 character references without a semicolon' do
|
150
|
+
SanitizeUrl.dereference_numerics('j').should == 'j'
|
151
|
+
end
|
152
|
+
|
153
|
+
it 'decodes long-form decimal UTF-8 character references with a semicolon' do
|
154
|
+
SanitizeUrl.dereference_numerics('j').should == 'j'
|
155
|
+
end
|
156
|
+
|
157
|
+
it 'decodes long-form decimal UTF-8 character references without a semicolon' do
|
158
|
+
SanitizeUrl.dereference_numerics('j').should == 'j'
|
159
|
+
end
|
160
|
+
|
161
|
+
it 'decodes hex UTF-8 character references with a semicolon' do
|
162
|
+
SanitizeUrl.dereference_numerics('j').should == 'j'
|
163
|
+
end
|
164
|
+
|
165
|
+
it 'decodes hex UTF-8 character references without a semicolon' do
|
166
|
+
SanitizeUrl.dereference_numerics('j').should == 'j'
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|
data/spec/spec_helper.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
|
-
require 'rubygems'
|
2
|
-
require 'test/unit'
|
3
|
-
require 'spec'
|
4
|
-
|
5
|
-
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
6
|
-
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
1
|
+
require 'rubygems'
|
2
|
+
require 'test/unit'
|
3
|
+
require 'spec'
|
4
|
+
|
5
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
6
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
7
7
|
require 'sanitize-url'
|
data/test.rb
CHANGED
@@ -1,16 +1,4 @@
|
|
1
|
-
#
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
# '%' + utf_8_str.unpack('H2' * utf_8_str.length).join('%').upcase
|
6
|
-
#end
|
7
|
-
|
8
|
-
hex_code_point = 'A9'
|
9
|
-
decimal_code_point = '169'
|
10
|
-
hex_utf_8_bytes = '%C2%A9'
|
11
|
-
|
12
|
-
#puts 'Expected: ' + hex_utf_8_bytes
|
13
|
-
#puts 'Actual: ' + decimal_code_point_to_url_encoded(decimal_code_point)
|
14
|
-
|
15
|
-
evil = 'javascript:alert("XSS")'
|
16
|
-
puts evil.unpack('H2' * evil.length).join('%').upcase
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
puts 'Д'
|
4
|
+
puts 'Д'.unpack('H2' * 2).inspect
|
metadata
CHANGED
@@ -1,7 +1,12 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sanitize-url
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 1
|
8
|
+
- 4
|
9
|
+
version: 0.1.4
|
5
10
|
platform: ruby
|
6
11
|
authors:
|
7
12
|
- jarrett
|
@@ -9,19 +14,23 @@ autorequire:
|
|
9
14
|
bindir: bin
|
10
15
|
cert_chain: []
|
11
16
|
|
12
|
-
date: 2010-
|
17
|
+
date: 2010-03-21 00:00:00 -05:00
|
13
18
|
default_executable:
|
14
19
|
dependencies:
|
15
20
|
- !ruby/object:Gem::Dependency
|
16
21
|
name: rspec
|
17
|
-
|
18
|
-
|
19
|
-
version_requirements: !ruby/object:Gem::Requirement
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
20
24
|
requirements:
|
21
25
|
- - ">="
|
22
26
|
- !ruby/object:Gem::Version
|
27
|
+
segments:
|
28
|
+
- 1
|
29
|
+
- 3
|
30
|
+
- 0
|
23
31
|
version: 1.3.0
|
24
|
-
|
32
|
+
type: :development
|
33
|
+
version_requirements: *id001
|
25
34
|
description: "This gem provides a module called SanitizeUrl, which you can mix-in anywhere you like. It provides a single method: sanitize_url, which accepts a URL and returns one with JavaScript removed. It also prepends the http:// scheme if no valid scheme is found."
|
26
35
|
email: jarrett@uchicago.edu
|
27
36
|
executables: []
|
@@ -40,6 +49,7 @@ files:
|
|
40
49
|
- VERSION
|
41
50
|
- lib/sanitize-url.rb
|
42
51
|
- sanitize-url.gemspec
|
52
|
+
- spec/char_codes_spec.rb
|
43
53
|
- spec/sanitize_url_spec.rb
|
44
54
|
- spec/spec_helper.rb
|
45
55
|
- test.rb
|
@@ -56,21 +66,24 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
56
66
|
requirements:
|
57
67
|
- - ">="
|
58
68
|
- !ruby/object:Gem::Version
|
69
|
+
segments:
|
70
|
+
- 0
|
59
71
|
version: "0"
|
60
|
-
version:
|
61
72
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
62
73
|
requirements:
|
63
74
|
- - ">="
|
64
75
|
- !ruby/object:Gem::Version
|
76
|
+
segments:
|
77
|
+
- 0
|
65
78
|
version: "0"
|
66
|
-
version:
|
67
79
|
requirements: []
|
68
80
|
|
69
81
|
rubyforge_project:
|
70
|
-
rubygems_version: 1.3.
|
82
|
+
rubygems_version: 1.3.6
|
71
83
|
signing_key:
|
72
84
|
specification_version: 3
|
73
85
|
summary: Sanitizes untrusted URLs
|
74
86
|
test_files:
|
87
|
+
- spec/char_codes_spec.rb
|
75
88
|
- spec/sanitize_url_spec.rb
|
76
89
|
- spec/spec_helper.rb
|