sanitize-url 0.1.3 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
data/.document CHANGED
@@ -1,5 +1,5 @@
1
- README.rdoc
2
- lib/**/*.rb
3
- bin/*
4
- features/**/*.feature
5
- LICENSE
1
+ README.rdoc
2
+ lib/**/*.rb
3
+ bin/*
4
+ features/**/*.feature
5
+ LICENSE
data/.gitignore CHANGED
@@ -1,21 +1,21 @@
1
- ## MAC OS
2
- .DS_Store
3
-
4
- ## TEXTMATE
5
- *.tmproj
6
- tmtags
7
-
8
- ## EMACS
9
- *~
10
- \#*
11
- .\#*
12
-
13
- ## VIM
14
- *.swp
15
-
16
- ## PROJECT::GENERAL
17
- coverage
18
- rdoc
19
- pkg
20
-
21
- ## PROJECT::SPECIFIC
1
+ ## MAC OS
2
+ .DS_Store
3
+
4
+ ## TEXTMATE
5
+ *.tmproj
6
+ tmtags
7
+
8
+ ## EMACS
9
+ *~
10
+ \#*
11
+ .\#*
12
+
13
+ ## VIM
14
+ *.swp
15
+
16
+ ## PROJECT::GENERAL
17
+ coverage
18
+ rdoc
19
+ pkg
20
+
21
+ ## PROJECT::SPECIFIC
data/LICENSE CHANGED
@@ -1,20 +1,20 @@
1
- Copyright (c) 2009 Jarrett Colby
2
-
3
- Permission is hereby granted, free of charge, to any person obtaining
4
- a copy of this software and associated documentation files (the
5
- "Software"), to deal in the Software without restriction, including
6
- without limitation the rights to use, copy, modify, merge, publish,
7
- distribute, sublicense, and/or sell copies of the Software, and to
8
- permit persons to whom the Software is furnished to do so, subject to
9
- the following conditions:
10
-
11
- The above copyright notice and this permission notice shall be
12
- included in all copies or substantial portions of the Software.
13
-
14
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
1
+ Copyright (c) 2009 Jarrett Colby
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/Rakefile CHANGED
@@ -1,53 +1,53 @@
1
- require 'rubygems'
2
- require 'rake'
3
-
4
- begin
5
- require 'jeweler'
6
- Jeweler::Tasks.new do |gem|
7
- gem.name = "sanitize-url"
8
- gem.summary = %Q{Sanitizes untrusted URLs}
9
- gem.description = %Q{This gem provides a module called SanitizeUrl, which you can mix-in anywhere you like. It provides a single method: sanitize_url, which accepts a URL and returns one with JavaScript removed. It also prepends the http:// scheme if no valid scheme is found.}
10
- gem.email = "jarrett@uchicago.edu"
11
- gem.homepage = "http://github.com/jarrett/sanitize-url"
12
- gem.authors = ["jarrett"]
13
- gem.add_development_dependency "rspec", ">= 1.3.0"
14
- # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
15
- end
16
- Jeweler::GemcutterTasks.new
17
- rescue LoadError
18
- puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
19
- end
20
-
21
- require 'rake/testtask'
22
- Rake::TestTask.new(:test) do |test|
23
- test.libs << 'lib' << 'test'
24
- test.pattern = 'test/**/test_*.rb'
25
- test.verbose = true
26
- end
27
-
28
- begin
29
- require 'rcov/rcovtask'
30
- Rcov::RcovTask.new do |test|
31
- test.libs << 'test'
32
- test.pattern = 'test/**/test_*.rb'
33
- test.verbose = true
34
- end
35
- rescue LoadError
36
- task :rcov do
37
- abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
38
- end
39
- end
40
-
41
- task :test => :check_dependencies
42
-
43
- task :default => :test
44
-
45
- require 'rake/rdoctask'
46
- Rake::RDocTask.new do |rdoc|
47
- version = File.exist?('VERSION') ? File.read('VERSION') : ""
48
-
49
- rdoc.rdoc_dir = 'rdoc'
50
- rdoc.title = "sanitize-url #{version}"
51
- rdoc.rdoc_files.include('README*')
52
- rdoc.rdoc_files.include('lib/**/*.rb')
53
- end
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "sanitize-url"
8
+ gem.summary = %Q{Sanitizes untrusted URLs}
9
+ gem.description = %Q{This gem provides a module called SanitizeUrl, which you can mix-in anywhere you like. It provides a single method: sanitize_url, which accepts a URL and returns one with JavaScript removed. It also prepends the http:// scheme if no valid scheme is found.}
10
+ gem.email = "jarrett@uchicago.edu"
11
+ gem.homepage = "http://github.com/jarrett/sanitize-url"
12
+ gem.authors = ["jarrett"]
13
+ gem.add_development_dependency "rspec", ">= 1.3.0"
14
+ # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
15
+ end
16
+ Jeweler::GemcutterTasks.new
17
+ rescue LoadError
18
+ puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
19
+ end
20
+
21
+ require 'rake/testtask'
22
+ Rake::TestTask.new(:test) do |test|
23
+ test.libs << 'lib' << 'test'
24
+ test.pattern = 'test/**/test_*.rb'
25
+ test.verbose = true
26
+ end
27
+
28
+ begin
29
+ require 'rcov/rcovtask'
30
+ Rcov::RcovTask.new do |test|
31
+ test.libs << 'test'
32
+ test.pattern = 'test/**/test_*.rb'
33
+ test.verbose = true
34
+ end
35
+ rescue LoadError
36
+ task :rcov do
37
+ abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
38
+ end
39
+ end
40
+
41
+ task :test => :check_dependencies
42
+
43
+ task :default => :test
44
+
45
+ require 'rake/rdoctask'
46
+ Rake::RDocTask.new do |rdoc|
47
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
48
+
49
+ rdoc.rdoc_dir = 'rdoc'
50
+ rdoc.title = "sanitize-url #{version}"
51
+ rdoc.rdoc_files.include('README*')
52
+ rdoc.rdoc_files.include('lib/**/*.rb')
53
+ end
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.3
1
+ 0.1.4
@@ -4,11 +4,11 @@ module SanitizeUrl
4
4
  ALPHANUMERIC_CHAR_CODES = (48..57).to_a + (65..90).to_a + (97..122).to_a
5
5
 
6
6
  VALID_OPAQUE_SPECIAL_CHARS = ['!', '*', "'", '(', ')', ';', ':', '@', '&', '=', '+', '$', ',', '/', '?', '%', '#', '[', ']', '-', '_', '.', '~']
7
- VALID_OPAQUE_SPECIAL_CHAR_CODES = VALID_OPAQUE_SPECIAL_CHARS.collect { |c| c[0] }
7
+ VALID_OPAQUE_SPECIAL_CHAR_CODES = VALID_OPAQUE_SPECIAL_CHARS.collect { |c| c[0].is_a?(String) ? c.ord : c[0] }
8
8
  VALID_OPAQUE_CHAR_CODES = ALPHANUMERIC_CHAR_CODES + VALID_OPAQUE_SPECIAL_CHAR_CODES
9
9
 
10
10
  VALID_SCHEME_SPECIAL_CHARS = ['+', '.', '-']
11
- VALID_SCHEME_SPECIAL_CHAR_CODES = VALID_SCHEME_SPECIAL_CHARS.collect { |c| c[0] }
11
+ VALID_SCHEME_SPECIAL_CHAR_CODES = VALID_SCHEME_SPECIAL_CHARS.collect { |c| c[0].is_a?(String) ? c.ord : c[0] }
12
12
  VALID_SCHEME_CHAR_CODES = ALPHANUMERIC_CHAR_CODES + VALID_SCHEME_SPECIAL_CHAR_CODES
13
13
 
14
14
  HTTP_STYLE_SCHEMES = ['http', 'https', 'ftp', 'ftps', 'svn', 'svn+ssh', 'git'] # Common schemes whose format should be "scheme://" instead of "scheme:"
@@ -94,7 +94,8 @@ module SanitizeUrl
94
94
  def self.char_or_url_encoded(code) #:nodoc:
95
95
  if url_encode?(code)
96
96
  utf_8_str = ([code.to_i].pack('U'))
97
- '%' + utf_8_str.unpack('H2' * utf_8_str.length).join('%').upcase
97
+ length = utf_8_str.respond_to?(:bytes) ? utf_8_str.bytes.to_a.length : utf_8_str.length
98
+ '%' + utf_8_str.unpack('H2' * length).join('%').upcase
98
99
  else
99
100
  code.chr
100
101
  end
@@ -1,55 +1,57 @@
1
- # Generated by jeweler
2
- # DO NOT EDIT THIS FILE DIRECTLY
3
- # Instead, edit Jeweler::Tasks in rakefile, and run the gemspec command
4
- # -*- encoding: utf-8 -*-
5
-
6
- Gem::Specification.new do |s|
7
- s.name = %q{sanitize-url}
8
- s.version = "0.1.3"
9
-
10
- s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
- s.authors = ["jarrett"]
12
- s.date = %q{2010-02-25}
13
- s.description = %q{This gem provides a module called SanitizeUrl, which you can mix-in anywhere you like. It provides a single method: sanitize_url, which accepts a URL and returns one with JavaScript removed. It also prepends the http:// scheme if no valid scheme is found.}
14
- s.email = %q{jarrett@uchicago.edu}
15
- s.extra_rdoc_files = [
16
- "LICENSE",
17
- "README.markdown"
18
- ]
19
- s.files = [
20
- ".document",
21
- ".gitignore",
22
- "LICENSE",
23
- "README.markdown",
24
- "Rakefile",
25
- "VERSION",
26
- "lib/sanitize-url.rb",
27
- "sanitize-url.gemspec",
28
- "spec/sanitize_url_spec.rb",
29
- "spec/spec_helper.rb",
30
- "test.rb"
31
- ]
32
- s.homepage = %q{http://github.com/jarrett/sanitize-url}
33
- s.rdoc_options = ["--charset=UTF-8"]
34
- s.require_paths = ["lib"]
35
- s.rubygems_version = %q{1.3.5}
36
- s.summary = %q{Sanitizes untrusted URLs}
37
- s.test_files = [
38
- "spec/sanitize_url_spec.rb",
39
- "spec/spec_helper.rb"
40
- ]
41
-
42
- if s.respond_to? :specification_version then
43
- current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
44
- s.specification_version = 3
45
-
46
- if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
47
- s.add_development_dependency(%q<rspec>, [">= 1.3.0"])
48
- else
49
- s.add_dependency(%q<rspec>, [">= 1.3.0"])
50
- end
51
- else
52
- s.add_dependency(%q<rspec>, [">= 1.3.0"])
53
- end
54
- end
55
-
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{sanitize-url}
8
+ s.version = "0.1.4"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["jarrett"]
12
+ s.date = %q{2010-03-21}
13
+ s.description = %q{This gem provides a module called SanitizeUrl, which you can mix-in anywhere you like. It provides a single method: sanitize_url, which accepts a URL and returns one with JavaScript removed. It also prepends the http:// scheme if no valid scheme is found.}
14
+ s.email = %q{jarrett@uchicago.edu}
15
+ s.extra_rdoc_files = [
16
+ "LICENSE",
17
+ "README.markdown"
18
+ ]
19
+ s.files = [
20
+ ".document",
21
+ ".gitignore",
22
+ "LICENSE",
23
+ "README.markdown",
24
+ "Rakefile",
25
+ "VERSION",
26
+ "lib/sanitize-url.rb",
27
+ "sanitize-url.gemspec",
28
+ "spec/char_codes_spec.rb",
29
+ "spec/sanitize_url_spec.rb",
30
+ "spec/spec_helper.rb",
31
+ "test.rb"
32
+ ]
33
+ s.homepage = %q{http://github.com/jarrett/sanitize-url}
34
+ s.rdoc_options = ["--charset=UTF-8"]
35
+ s.require_paths = ["lib"]
36
+ s.rubygems_version = %q{1.3.6}
37
+ s.summary = %q{Sanitizes untrusted URLs}
38
+ s.test_files = [
39
+ "spec/char_codes_spec.rb",
40
+ "spec/sanitize_url_spec.rb",
41
+ "spec/spec_helper.rb"
42
+ ]
43
+
44
+ if s.respond_to? :specification_version then
45
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
46
+ s.specification_version = 3
47
+
48
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
49
+ s.add_development_dependency(%q<rspec>, [">= 1.3.0"])
50
+ else
51
+ s.add_dependency(%q<rspec>, [">= 1.3.0"])
52
+ end
53
+ else
54
+ s.add_dependency(%q<rspec>, [">= 1.3.0"])
55
+ end
56
+ end
57
+
@@ -0,0 +1,32 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe 'Char codes' do
4
+ it 'counts a number as being in the range 48-57' do
5
+ (0..9).each do |num|
6
+ c = num.to_s
7
+ code = c[0].is_a?(String) ? c.ord : c[0]
8
+ code.should == 48 + num
9
+ end
10
+ end
11
+
12
+ it 'counts an uppercase letter as being in the range 65-90' do
13
+ ('A'..'Z').each_with_index do |c, offset|
14
+ code = c[0].is_a?(String) ? c.ord : c[0]
15
+ code.should == 65 + offset
16
+ end
17
+ end
18
+
19
+ it 'counts a lowercase letter as being in the range 97-122' do
20
+ ('a'..'z').each_with_index do |c, offset|
21
+ code = c[0].is_a?(String) ? c.ord : c[0]
22
+ code.should == 97 + offset
23
+ end
24
+ end
25
+
26
+ ['!', '*', "'", '(', ')', ';', ':', '@', '&', '=', '+', '$', ',', '/', '?', '%', '#', '[', ']', '-', '_', '.', '~'].each do |c|
27
+ it "counts #{c} as included in VALID_OPAQUE_CHAR_CODES" do
28
+ code = c[0].is_a?(String) ? c.ord : c[0]
29
+ SanitizeUrl::VALID_OPAQUE_CHAR_CODES.should include(code)
30
+ end
31
+ end
32
+ end
@@ -1,169 +1,169 @@
1
- require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
-
3
- describe SanitizeUrl do
4
- include SanitizeUrl
5
-
6
- describe '#sanitize_url' do
7
- it 'replaces JavaScript URLs with options[:replace_evil_with]' do
8
- urls = [
9
- 'javascript:alert("1");',
10
- 'javascript//:alert("2");',
11
- 'javascript://alert("3");',
12
- 'javascript/:/alert("4");',
13
- 'j a v a script:alert("5");',
14
- ' javascript:alert("6");',
15
- 'JavaScript:alert("7");',
16
- "java\nscript:alert(\"8\");",
17
- "java\rscript:alert(\"9\");"
18
- ].each do |evil_url|
19
- sanitize_url(evil_url, :replace_evil_with => 'replaced').should == 'replaced'
20
- end
21
- end
22
-
23
- it 'replaces data: URLs with options[:replace_evil_with]' do
24
- urls = [
25
- 'data:text/html;base64,PHNjcmlwdD5hbGVydCgnMScpPC9zY3JpcHQ+',
26
- 'data://text/html;base64,PHNjcmlwdD5hbGVydCgnMicpPC9zY3JpcHQ+',
27
- 'data//:text/html;base64,PHNjcmlwdD5hbGVydCgnMycpPC9zY3JpcHQ+',
28
- 'data/:/text/html;base64,PHNjcmlwdD5hbGVydCgnNCcpPC9zY3JpcHQ+',
29
- ' data:text/html;base64,PHNjcmlwdD5hbGVydCgnNScpPC9zY3JpcHQ+',
30
- 'da ta:text/html;base64,PHNjcmlwdD5hbGVydCgnNicpPC9zY3JpcHQ+',
31
- 'Data:text/html;base64,PHNjcmlwdD5hbGVydCgnNycpPC9zY3JpcHQ+',
32
- "da\nta:text/html;base64,PHNjcmlwdD5hbGVydCgnOCcpPC9zY3JpcHQ+",
33
- "da\rta:text/html;base64,PHNjcmlwdD5hbGVydCgnOScpPC9zY3JpcHQ+",
34
- ].each do |evil_url|
35
- sanitize_url(evil_url, :replace_evil_with => 'replaced').should == 'replaced'
36
- end
37
- end
38
-
39
- context 'with :schemes whitelist' do
40
- it 'kills anything not on the list' do
41
- [
42
- 'https://example.com',
43
- 'https:example.com',
44
- 'ftp://example.com',
45
- 'ftp:example.com',
46
- 'data://example.com',
47
- 'data:example.com',
48
- 'javascript://example.com',
49
- 'javascript:example.com',
50
- ].each do |evil_url|
51
- sanitize_url(evil_url, :schemes => ['http'], :replace_evil_with => 'replaced')
52
- end
53
- end
54
-
55
- it 'allows anything on the list' do
56
- [
57
- 'http://example.com',
58
- 'https://example.com'
59
- ].each do |good_url|
60
- sanitize_url(good_url, :schemes => ['http', 'https']).should == good_url
61
- end
62
- end
63
-
64
- it 'works with schemes given as symbols' do
65
- sanitize_url('ftp://example.com', :schemes => [:http, :https], :replace_evil_with => 'replaced').should == 'replaced'
66
- sanitize_url('ftp://example.com', :schemes => [:http, :https, :ftp]).should == 'ftp://example.com'
67
- end
68
- end
69
-
70
- it 'prepends http:// if no scheme is given' do
71
- sanitize_url('www.example.com').should == 'http://www.example.com'
72
- end
73
-
74
- it 'replaces evil URLs that are encoded with Unicode numerical character references' do
75
- [
76
- '&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;&#58;&#97;&#108;&#101;&#114;&#116;&#40;&#39;&#49;&#39;&#41;',
77
- '&#x6A;&#x61;&#x76;&#x61;&#x73;&#x63;&#x72;&#x69;&#x70;&#x74;&#x3A;&#x61;&#x6C;&#x65;&#x72;&#x74;&#x28;&#x27;&#x32;&#x27;&#x29;'
78
- ].each do |evil_url|
79
- sanitize_url(evil_url, :replace_evil_with => 'replaced').should == 'replaced'
80
- end
81
- end
82
-
83
- it 'replaces evil URLs that are URL-encoded (hex with %)' do
84
- sanitize_url('%6A%61%76%61%73%63%72%69%70%74%3A%61%6C%65%72%74%28%22%58%53%53%22%29', :replace_evil_with => 'replaced').should == 'replaced'
85
- end
86
-
87
- it 'does not try to fix broken schemes after the start of the string' do
88
- sanitize_url('http://example.com/http/foo').should == 'http://example.com/http/foo'
89
- end
90
-
91
- it 'does not prepend an extra http:// if a valid scheme is given' do
92
- sanitize_url('http://www.example.com').should == 'http://www.example.com'
93
- sanitize_url('https://www.example.com').should == 'https://www.example.com'
94
- sanitize_url('ftp://www.example.com').should == 'ftp://www.example.com'
95
- end
96
-
97
- it 'dereferences URL-encoded characters in the scheme' do
98
- sanitize_url('h%74tp://example.com').should == 'http://example.com'
99
- end
100
-
101
- it 'dereferences decimal numeric character references in the scheme' do
102
- sanitize_url('h&#116;tp://example.com').should == 'http://example.com'
103
- end
104
-
105
- it 'dereferences hex numeric character references in the scheme' do
106
- sanitize_url('h&#x74;tp://example.com').should == 'http://example.com'
107
- end
108
-
109
- it 'retains URL-encoded characters in the opaque portion' do
110
- sanitize_url('http://someone%40gmail.com:password@example.com').should == 'http://someone%40gmail.com:password@example.com'
111
- end
112
-
113
- it 'URL-encodes code points outside ASCII' do
114
- # Percent-encoding should be in UTF-8 (RFC 3986).
115
- # http://en.wikipedia.org/wiki/Percent-encoding#Current_standard
116
- sanitize_url('http://&#1044;').should == 'http://%D0%94'
117
- sanitize_url('http://&#x0414;').should == 'http://%D0%94'
118
- sanitize_url("http://\xD0\x94").should == 'http://%D0%94' # UTF-8 version of the same.
119
- end
120
-
121
- it 'replaces URLs without the opaque portion' do
122
- sanitize_url('http://', :replace_evil_with => 'replaced').should == 'replaced'
123
- sanitize_url('mailto:', :replace_evil_with => 'replaced').should == 'replaced'
124
- end
125
-
126
- it 'adds the two slashes for known schemes that require it' do
127
- sanitize_url('http:example.com').should == 'http://example.com'
128
- sanitize_url('ftp:example.com').should == 'ftp://example.com'
129
- sanitize_url('svn+ssh:example.com').should == 'svn+ssh://example.com'
130
- end
131
-
132
- it 'does not add slashes for schemes that do not require it' do
133
- sanitize_url('mailto:someone@example.com').should == 'mailto:someone@example.com'
134
- end
135
-
136
- it 'strips invalid characters from the scheme and then evaluates the scheme according to the normal rules' do
137
- sanitize_url("ht\xD0\x94tp://example.com").should == 'http://example.com'
138
- sanitize_url('htt$p://example.com').should == 'http://example.com'
139
- sanitize_url('j%avascript:alert("XSS")', :replace_evil_with => 'replaced').should == 'replaced'
140
- end
141
- end
142
-
143
-
144
- describe '.dereference_numerics' do
145
- it 'decodes short-form decimal UTF-8 character references with a semicolon' do
146
- SanitizeUrl.dereference_numerics('&#106;').should == 'j'
147
- end
148
-
149
- it 'decodes short-form decimal UTF-8 character references without a semicolon' do
150
- SanitizeUrl.dereference_numerics('&#106').should == 'j'
151
- end
152
-
153
- it 'decodes long-form decimal UTF-8 character references with a semicolon' do
154
- SanitizeUrl.dereference_numerics('&#0000106;').should == 'j'
155
- end
156
-
157
- it 'decodes long-form decimal UTF-8 character references without a semicolon' do
158
- SanitizeUrl.dereference_numerics('&#0000106').should == 'j'
159
- end
160
-
161
- it 'decodes hex UTF-8 character references with a semicolon' do
162
- SanitizeUrl.dereference_numerics('&#x6A;').should == 'j'
163
- end
164
-
165
- it 'decodes hex UTF-8 character references without a semicolon' do
166
- SanitizeUrl.dereference_numerics('&#x6A').should == 'j'
167
- end
168
- end
169
- end
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe SanitizeUrl do
4
+ include SanitizeUrl
5
+
6
+ describe '#sanitize_url' do
7
+ it 'replaces JavaScript URLs with options[:replace_evil_with]' do
8
+ urls = [
9
+ 'javascript:alert("1");',
10
+ 'javascript//:alert("2");',
11
+ 'javascript://alert("3");',
12
+ 'javascript/:/alert("4");',
13
+ 'j a v a script:alert("5");',
14
+ ' javascript:alert("6");',
15
+ 'JavaScript:alert("7");',
16
+ "java\nscript:alert(\"8\");",
17
+ "java\rscript:alert(\"9\");"
18
+ ].each do |evil_url|
19
+ sanitize_url(evil_url, :replace_evil_with => 'replaced').should == 'replaced'
20
+ end
21
+ end
22
+
23
+ it 'replaces data: URLs with options[:replace_evil_with]' do
24
+ urls = [
25
+ 'data:text/html;base64,PHNjcmlwdD5hbGVydCgnMScpPC9zY3JpcHQ+',
26
+ 'data://text/html;base64,PHNjcmlwdD5hbGVydCgnMicpPC9zY3JpcHQ+',
27
+ 'data//:text/html;base64,PHNjcmlwdD5hbGVydCgnMycpPC9zY3JpcHQ+',
28
+ 'data/:/text/html;base64,PHNjcmlwdD5hbGVydCgnNCcpPC9zY3JpcHQ+',
29
+ ' data:text/html;base64,PHNjcmlwdD5hbGVydCgnNScpPC9zY3JpcHQ+',
30
+ 'da ta:text/html;base64,PHNjcmlwdD5hbGVydCgnNicpPC9zY3JpcHQ+',
31
+ 'Data:text/html;base64,PHNjcmlwdD5hbGVydCgnNycpPC9zY3JpcHQ+',
32
+ "da\nta:text/html;base64,PHNjcmlwdD5hbGVydCgnOCcpPC9zY3JpcHQ+",
33
+ "da\rta:text/html;base64,PHNjcmlwdD5hbGVydCgnOScpPC9zY3JpcHQ+",
34
+ ].each do |evil_url|
35
+ sanitize_url(evil_url, :replace_evil_with => 'replaced').should == 'replaced'
36
+ end
37
+ end
38
+
39
+ context 'with :schemes whitelist' do
40
+ it 'kills anything not on the list' do
41
+ [
42
+ 'https://example.com',
43
+ 'https:example.com',
44
+ 'ftp://example.com',
45
+ 'ftp:example.com',
46
+ 'data://example.com',
47
+ 'data:example.com',
48
+ 'javascript://example.com',
49
+ 'javascript:example.com',
50
+ ].each do |evil_url|
51
+ sanitize_url(evil_url, :schemes => ['http'], :replace_evil_with => 'replaced')
52
+ end
53
+ end
54
+
55
+ it 'allows anything on the list' do
56
+ [
57
+ 'http://example.com',
58
+ 'https://example.com'
59
+ ].each do |good_url|
60
+ sanitize_url(good_url, :schemes => ['http', 'https']).should == good_url
61
+ end
62
+ end
63
+
64
+ it 'works with schemes given as symbols' do
65
+ sanitize_url('ftp://example.com', :schemes => [:http, :https], :replace_evil_with => 'replaced').should == 'replaced'
66
+ sanitize_url('ftp://example.com', :schemes => [:http, :https, :ftp]).should == 'ftp://example.com'
67
+ end
68
+ end
69
+
70
+ it 'prepends http:// if no scheme is given' do
71
+ sanitize_url('www.example.com').should == 'http://www.example.com'
72
+ end
73
+
74
+ it 'replaces evil URLs that are encoded with Unicode numerical character references' do
75
+ [
76
+ '&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;&#58;&#97;&#108;&#101;&#114;&#116;&#40;&#39;&#49;&#39;&#41;',
77
+ '&#x6A;&#x61;&#x76;&#x61;&#x73;&#x63;&#x72;&#x69;&#x70;&#x74;&#x3A;&#x61;&#x6C;&#x65;&#x72;&#x74;&#x28;&#x27;&#x32;&#x27;&#x29;'
78
+ ].each do |evil_url|
79
+ sanitize_url(evil_url, :replace_evil_with => 'replaced').should == 'replaced'
80
+ end
81
+ end
82
+
83
+ it 'replaces evil URLs that are URL-encoded (hex with %)' do
84
+ sanitize_url('%6A%61%76%61%73%63%72%69%70%74%3A%61%6C%65%72%74%28%22%58%53%53%22%29', :replace_evil_with => 'replaced').should == 'replaced'
85
+ end
86
+
87
+ it 'does not try to fix broken schemes after the start of the string' do
88
+ sanitize_url('http://example.com/http/foo').should == 'http://example.com/http/foo'
89
+ end
90
+
91
+ it 'does not prepend an extra http:// if a valid scheme is given' do
92
+ sanitize_url('http://www.example.com').should == 'http://www.example.com'
93
+ sanitize_url('https://www.example.com').should == 'https://www.example.com'
94
+ sanitize_url('ftp://www.example.com').should == 'ftp://www.example.com'
95
+ end
96
+
97
+ it 'dereferences URL-encoded characters in the scheme' do
98
+ sanitize_url('h%74tp://example.com').should == 'http://example.com'
99
+ end
100
+
101
+ it 'dereferences decimal numeric character references in the scheme' do
102
+ sanitize_url('h&#116;tp://example.com').should == 'http://example.com'
103
+ end
104
+
105
+ it 'dereferences hex numeric character references in the scheme' do
106
+ sanitize_url('h&#x74;tp://example.com').should == 'http://example.com'
107
+ end
108
+
109
+ it 'retains URL-encoded characters in the opaque portion' do
110
+ sanitize_url('http://someone%40gmail.com:password@example.com').should == 'http://someone%40gmail.com:password@example.com'
111
+ end
112
+
113
+ it 'URL-encodes code points outside ASCII' do
114
+ # Percent-encoding should be in UTF-8 (RFC 3986).
115
+ # http://en.wikipedia.org/wiki/Percent-encoding#Current_standard
116
+ sanitize_url('http://&#1044;').should == 'http://%D0%94'
117
+ sanitize_url('http://&#x0414;').should == 'http://%D0%94'
118
+ sanitize_url("http://\xD0\x94").should == 'http://%D0%94' # UTF-8 version of the same.
119
+ end
120
+
121
+ it 'replaces URLs without the opaque portion' do
122
+ sanitize_url('http://', :replace_evil_with => 'replaced').should == 'replaced'
123
+ sanitize_url('mailto:', :replace_evil_with => 'replaced').should == 'replaced'
124
+ end
125
+
126
+ it 'adds the two slashes for known schemes that require it' do
127
+ sanitize_url('http:example.com').should == 'http://example.com'
128
+ sanitize_url('ftp:example.com').should == 'ftp://example.com'
129
+ sanitize_url('svn+ssh:example.com').should == 'svn+ssh://example.com'
130
+ end
131
+
132
+ it 'does not add slashes for schemes that do not require it' do
133
+ sanitize_url('mailto:someone@example.com').should == 'mailto:someone@example.com'
134
+ end
135
+
136
+ it 'strips invalid characters from the scheme and then evaluates the scheme according to the normal rules' do
137
+ sanitize_url("ht\xD0\x94tp://example.com").should == 'http://example.com'
138
+ sanitize_url('htt$p://example.com').should == 'http://example.com'
139
+ sanitize_url('j%avascript:alert("XSS")', :replace_evil_with => 'replaced').should == 'replaced'
140
+ end
141
+ end
142
+
143
+
144
+ describe '.dereference_numerics' do
145
+ it 'decodes short-form decimal UTF-8 character references with a semicolon' do
146
+ SanitizeUrl.dereference_numerics('&#106;').should == 'j'
147
+ end
148
+
149
+ it 'decodes short-form decimal UTF-8 character references without a semicolon' do
150
+ SanitizeUrl.dereference_numerics('&#106').should == 'j'
151
+ end
152
+
153
+ it 'decodes long-form decimal UTF-8 character references with a semicolon' do
154
+ SanitizeUrl.dereference_numerics('&#0000106;').should == 'j'
155
+ end
156
+
157
+ it 'decodes long-form decimal UTF-8 character references without a semicolon' do
158
+ SanitizeUrl.dereference_numerics('&#0000106').should == 'j'
159
+ end
160
+
161
+ it 'decodes hex UTF-8 character references with a semicolon' do
162
+ SanitizeUrl.dereference_numerics('&#x6A;').should == 'j'
163
+ end
164
+
165
+ it 'decodes hex UTF-8 character references without a semicolon' do
166
+ SanitizeUrl.dereference_numerics('&#x6A').should == 'j'
167
+ end
168
+ end
169
+ end
@@ -1,7 +1,7 @@
1
- require 'rubygems'
2
- require 'test/unit'
3
- require 'spec'
4
-
5
- $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
6
- $LOAD_PATH.unshift(File.dirname(__FILE__))
1
+ require 'rubygems'
2
+ require 'test/unit'
3
+ require 'spec'
4
+
5
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
6
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
7
7
  require 'sanitize-url'
data/test.rb CHANGED
@@ -1,16 +1,4 @@
1
- # Copyright sign
2
-
3
- #def decimal_code_point_to_url_encoded(code_point)
4
- # utf_8_str = ([code_point.to_i].pack('U'))
5
- # '%' + utf_8_str.unpack('H2' * utf_8_str.length).join('%').upcase
6
- #end
7
-
8
- hex_code_point = 'A9'
9
- decimal_code_point = '169'
10
- hex_utf_8_bytes = '%C2%A9'
11
-
12
- #puts 'Expected: ' + hex_utf_8_bytes
13
- #puts 'Actual: ' + decimal_code_point_to_url_encoded(decimal_code_point)
14
-
15
- evil = 'javascript:alert("XSS")'
16
- puts evil.unpack('H2' * evil.length).join('%').upcase
1
+ # encoding: UTF-8
2
+
3
+ puts 'Д'
4
+ puts 'Д'.unpack('H2' * 2).inspect
metadata CHANGED
@@ -1,7 +1,12 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sanitize-url
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 1
8
+ - 4
9
+ version: 0.1.4
5
10
  platform: ruby
6
11
  authors:
7
12
  - jarrett
@@ -9,19 +14,23 @@ autorequire:
9
14
  bindir: bin
10
15
  cert_chain: []
11
16
 
12
- date: 2010-02-25 00:00:00 -06:00
17
+ date: 2010-03-21 00:00:00 -05:00
13
18
  default_executable:
14
19
  dependencies:
15
20
  - !ruby/object:Gem::Dependency
16
21
  name: rspec
17
- type: :development
18
- version_requirement:
19
- version_requirements: !ruby/object:Gem::Requirement
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
20
24
  requirements:
21
25
  - - ">="
22
26
  - !ruby/object:Gem::Version
27
+ segments:
28
+ - 1
29
+ - 3
30
+ - 0
23
31
  version: 1.3.0
24
- version:
32
+ type: :development
33
+ version_requirements: *id001
25
34
  description: "This gem provides a module called SanitizeUrl, which you can mix-in anywhere you like. It provides a single method: sanitize_url, which accepts a URL and returns one with JavaScript removed. It also prepends the http:// scheme if no valid scheme is found."
26
35
  email: jarrett@uchicago.edu
27
36
  executables: []
@@ -40,6 +49,7 @@ files:
40
49
  - VERSION
41
50
  - lib/sanitize-url.rb
42
51
  - sanitize-url.gemspec
52
+ - spec/char_codes_spec.rb
43
53
  - spec/sanitize_url_spec.rb
44
54
  - spec/spec_helper.rb
45
55
  - test.rb
@@ -56,21 +66,24 @@ required_ruby_version: !ruby/object:Gem::Requirement
56
66
  requirements:
57
67
  - - ">="
58
68
  - !ruby/object:Gem::Version
69
+ segments:
70
+ - 0
59
71
  version: "0"
60
- version:
61
72
  required_rubygems_version: !ruby/object:Gem::Requirement
62
73
  requirements:
63
74
  - - ">="
64
75
  - !ruby/object:Gem::Version
76
+ segments:
77
+ - 0
65
78
  version: "0"
66
- version:
67
79
  requirements: []
68
80
 
69
81
  rubyforge_project:
70
- rubygems_version: 1.3.5
82
+ rubygems_version: 1.3.6
71
83
  signing_key:
72
84
  specification_version: 3
73
85
  summary: Sanitizes untrusted URLs
74
86
  test_files:
87
+ - spec/char_codes_spec.rb
75
88
  - spec/sanitize_url_spec.rb
76
89
  - spec/spec_helper.rb