sanitize-url 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.document CHANGED
@@ -1,5 +1,5 @@
1
- README.rdoc
2
- lib/**/*.rb
3
- bin/*
4
- features/**/*.feature
5
- LICENSE
1
+ README.rdoc
2
+ lib/**/*.rb
3
+ bin/*
4
+ features/**/*.feature
5
+ LICENSE
data/.gitignore CHANGED
@@ -1,21 +1,21 @@
1
- ## MAC OS
2
- .DS_Store
3
-
4
- ## TEXTMATE
5
- *.tmproj
6
- tmtags
7
-
8
- ## EMACS
9
- *~
10
- \#*
11
- .\#*
12
-
13
- ## VIM
14
- *.swp
15
-
16
- ## PROJECT::GENERAL
17
- coverage
18
- rdoc
19
- pkg
20
-
21
- ## PROJECT::SPECIFIC
1
+ ## MAC OS
2
+ .DS_Store
3
+
4
+ ## TEXTMATE
5
+ *.tmproj
6
+ tmtags
7
+
8
+ ## EMACS
9
+ *~
10
+ \#*
11
+ .\#*
12
+
13
+ ## VIM
14
+ *.swp
15
+
16
+ ## PROJECT::GENERAL
17
+ coverage
18
+ rdoc
19
+ pkg
20
+
21
+ ## PROJECT::SPECIFIC
data/LICENSE CHANGED
@@ -1,20 +1,20 @@
1
- Copyright (c) 2009 Jarrett Colby
2
-
3
- Permission is hereby granted, free of charge, to any person obtaining
4
- a copy of this software and associated documentation files (the
5
- "Software"), to deal in the Software without restriction, including
6
- without limitation the rights to use, copy, modify, merge, publish,
7
- distribute, sublicense, and/or sell copies of the Software, and to
8
- permit persons to whom the Software is furnished to do so, subject to
9
- the following conditions:
10
-
11
- The above copyright notice and this permission notice shall be
12
- included in all copies or substantial portions of the Software.
13
-
14
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
1
+ Copyright (c) 2009 Jarrett Colby
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/Rakefile CHANGED
@@ -1,53 +1,53 @@
1
- require 'rubygems'
2
- require 'rake'
3
-
4
- begin
5
- require 'jeweler'
6
- Jeweler::Tasks.new do |gem|
7
- gem.name = "sanitize-url"
8
- gem.summary = %Q{Sanitizes untrusted URLs}
9
- gem.description = %Q{This gem provides a module called SanitizeUrl, which you can mix-in anywhere you like. It provides a single method: sanitize_url, which accepts a URL and returns one with JavaScript removed. It also prepends the http:// scheme if no valid scheme is found.}
10
- gem.email = "jarrett@uchicago.edu"
11
- gem.homepage = "http://github.com/jarrett/sanitize-url"
12
- gem.authors = ["jarrett"]
13
- gem.add_development_dependency "rspec", ">= 1.3.0"
14
- # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
15
- end
16
- Jeweler::GemcutterTasks.new
17
- rescue LoadError
18
- puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
19
- end
20
-
21
- require 'rake/testtask'
22
- Rake::TestTask.new(:test) do |test|
23
- test.libs << 'lib' << 'test'
24
- test.pattern = 'test/**/test_*.rb'
25
- test.verbose = true
26
- end
27
-
28
- begin
29
- require 'rcov/rcovtask'
30
- Rcov::RcovTask.new do |test|
31
- test.libs << 'test'
32
- test.pattern = 'test/**/test_*.rb'
33
- test.verbose = true
34
- end
35
- rescue LoadError
36
- task :rcov do
37
- abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
38
- end
39
- end
40
-
41
- task :test => :check_dependencies
42
-
43
- task :default => :test
44
-
45
- require 'rake/rdoctask'
46
- Rake::RDocTask.new do |rdoc|
47
- version = File.exist?('VERSION') ? File.read('VERSION') : ""
48
-
49
- rdoc.rdoc_dir = 'rdoc'
50
- rdoc.title = "sanitize-url #{version}"
51
- rdoc.rdoc_files.include('README*')
52
- rdoc.rdoc_files.include('lib/**/*.rb')
53
- end
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "sanitize-url"
8
+ gem.summary = %Q{Sanitizes untrusted URLs}
9
+ gem.description = %Q{This gem provides a module called SanitizeUrl, which you can mix-in anywhere you like. It provides a single method: sanitize_url, which accepts a URL and returns one with JavaScript removed. It also prepends the http:// scheme if no valid scheme is found.}
10
+ gem.email = "jarrett@uchicago.edu"
11
+ gem.homepage = "http://github.com/jarrett/sanitize-url"
12
+ gem.authors = ["jarrett"]
13
+ gem.add_development_dependency "rspec", ">= 1.3.0"
14
+ # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
15
+ end
16
+ Jeweler::GemcutterTasks.new
17
+ rescue LoadError
18
+ puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
19
+ end
20
+
21
+ require 'rake/testtask'
22
+ Rake::TestTask.new(:test) do |test|
23
+ test.libs << 'lib' << 'test'
24
+ test.pattern = 'test/**/test_*.rb'
25
+ test.verbose = true
26
+ end
27
+
28
+ begin
29
+ require 'rcov/rcovtask'
30
+ Rcov::RcovTask.new do |test|
31
+ test.libs << 'test'
32
+ test.pattern = 'test/**/test_*.rb'
33
+ test.verbose = true
34
+ end
35
+ rescue LoadError
36
+ task :rcov do
37
+ abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
38
+ end
39
+ end
40
+
41
+ task :test => :check_dependencies
42
+
43
+ task :default => :test
44
+
45
+ require 'rake/rdoctask'
46
+ Rake::RDocTask.new do |rdoc|
47
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
48
+
49
+ rdoc.rdoc_dir = 'rdoc'
50
+ rdoc.title = "sanitize-url #{version}"
51
+ rdoc.rdoc_files.include('README*')
52
+ rdoc.rdoc_files.include('lib/**/*.rb')
53
+ end
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.3
1
+ 0.1.4
@@ -4,11 +4,11 @@ module SanitizeUrl
4
4
  ALPHANUMERIC_CHAR_CODES = (48..57).to_a + (65..90).to_a + (97..122).to_a
5
5
 
6
6
  VALID_OPAQUE_SPECIAL_CHARS = ['!', '*', "'", '(', ')', ';', ':', '@', '&', '=', '+', '$', ',', '/', '?', '%', '#', '[', ']', '-', '_', '.', '~']
7
- VALID_OPAQUE_SPECIAL_CHAR_CODES = VALID_OPAQUE_SPECIAL_CHARS.collect { |c| c[0] }
7
+ VALID_OPAQUE_SPECIAL_CHAR_CODES = VALID_OPAQUE_SPECIAL_CHARS.collect { |c| c[0].is_a?(String) ? c.ord : c[0] }
8
8
  VALID_OPAQUE_CHAR_CODES = ALPHANUMERIC_CHAR_CODES + VALID_OPAQUE_SPECIAL_CHAR_CODES
9
9
 
10
10
  VALID_SCHEME_SPECIAL_CHARS = ['+', '.', '-']
11
- VALID_SCHEME_SPECIAL_CHAR_CODES = VALID_SCHEME_SPECIAL_CHARS.collect { |c| c[0] }
11
+ VALID_SCHEME_SPECIAL_CHAR_CODES = VALID_SCHEME_SPECIAL_CHARS.collect { |c| c[0].is_a?(String) ? c.ord : c[0] }
12
12
  VALID_SCHEME_CHAR_CODES = ALPHANUMERIC_CHAR_CODES + VALID_SCHEME_SPECIAL_CHAR_CODES
13
13
 
14
14
  HTTP_STYLE_SCHEMES = ['http', 'https', 'ftp', 'ftps', 'svn', 'svn+ssh', 'git'] # Common schemes whose format should be "scheme://" instead of "scheme:"
@@ -94,7 +94,8 @@ module SanitizeUrl
94
94
  def self.char_or_url_encoded(code) #:nodoc:
95
95
  if url_encode?(code)
96
96
  utf_8_str = ([code.to_i].pack('U'))
97
- '%' + utf_8_str.unpack('H2' * utf_8_str.length).join('%').upcase
97
+ length = utf_8_str.respond_to?(:bytes) ? utf_8_str.bytes.to_a.length : utf_8_str.length
98
+ '%' + utf_8_str.unpack('H2' * length).join('%').upcase
98
99
  else
99
100
  code.chr
100
101
  end
@@ -1,55 +1,57 @@
1
- # Generated by jeweler
2
- # DO NOT EDIT THIS FILE DIRECTLY
3
- # Instead, edit Jeweler::Tasks in rakefile, and run the gemspec command
4
- # -*- encoding: utf-8 -*-
5
-
6
- Gem::Specification.new do |s|
7
- s.name = %q{sanitize-url}
8
- s.version = "0.1.3"
9
-
10
- s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
- s.authors = ["jarrett"]
12
- s.date = %q{2010-02-25}
13
- s.description = %q{This gem provides a module called SanitizeUrl, which you can mix-in anywhere you like. It provides a single method: sanitize_url, which accepts a URL and returns one with JavaScript removed. It also prepends the http:// scheme if no valid scheme is found.}
14
- s.email = %q{jarrett@uchicago.edu}
15
- s.extra_rdoc_files = [
16
- "LICENSE",
17
- "README.markdown"
18
- ]
19
- s.files = [
20
- ".document",
21
- ".gitignore",
22
- "LICENSE",
23
- "README.markdown",
24
- "Rakefile",
25
- "VERSION",
26
- "lib/sanitize-url.rb",
27
- "sanitize-url.gemspec",
28
- "spec/sanitize_url_spec.rb",
29
- "spec/spec_helper.rb",
30
- "test.rb"
31
- ]
32
- s.homepage = %q{http://github.com/jarrett/sanitize-url}
33
- s.rdoc_options = ["--charset=UTF-8"]
34
- s.require_paths = ["lib"]
35
- s.rubygems_version = %q{1.3.5}
36
- s.summary = %q{Sanitizes untrusted URLs}
37
- s.test_files = [
38
- "spec/sanitize_url_spec.rb",
39
- "spec/spec_helper.rb"
40
- ]
41
-
42
- if s.respond_to? :specification_version then
43
- current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
44
- s.specification_version = 3
45
-
46
- if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
47
- s.add_development_dependency(%q<rspec>, [">= 1.3.0"])
48
- else
49
- s.add_dependency(%q<rspec>, [">= 1.3.0"])
50
- end
51
- else
52
- s.add_dependency(%q<rspec>, [">= 1.3.0"])
53
- end
54
- end
55
-
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{sanitize-url}
8
+ s.version = "0.1.4"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["jarrett"]
12
+ s.date = %q{2010-03-21}
13
+ s.description = %q{This gem provides a module called SanitizeUrl, which you can mix-in anywhere you like. It provides a single method: sanitize_url, which accepts a URL and returns one with JavaScript removed. It also prepends the http:// scheme if no valid scheme is found.}
14
+ s.email = %q{jarrett@uchicago.edu}
15
+ s.extra_rdoc_files = [
16
+ "LICENSE",
17
+ "README.markdown"
18
+ ]
19
+ s.files = [
20
+ ".document",
21
+ ".gitignore",
22
+ "LICENSE",
23
+ "README.markdown",
24
+ "Rakefile",
25
+ "VERSION",
26
+ "lib/sanitize-url.rb",
27
+ "sanitize-url.gemspec",
28
+ "spec/char_codes_spec.rb",
29
+ "spec/sanitize_url_spec.rb",
30
+ "spec/spec_helper.rb",
31
+ "test.rb"
32
+ ]
33
+ s.homepage = %q{http://github.com/jarrett/sanitize-url}
34
+ s.rdoc_options = ["--charset=UTF-8"]
35
+ s.require_paths = ["lib"]
36
+ s.rubygems_version = %q{1.3.6}
37
+ s.summary = %q{Sanitizes untrusted URLs}
38
+ s.test_files = [
39
+ "spec/char_codes_spec.rb",
40
+ "spec/sanitize_url_spec.rb",
41
+ "spec/spec_helper.rb"
42
+ ]
43
+
44
+ if s.respond_to? :specification_version then
45
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
46
+ s.specification_version = 3
47
+
48
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
49
+ s.add_development_dependency(%q<rspec>, [">= 1.3.0"])
50
+ else
51
+ s.add_dependency(%q<rspec>, [">= 1.3.0"])
52
+ end
53
+ else
54
+ s.add_dependency(%q<rspec>, [">= 1.3.0"])
55
+ end
56
+ end
57
+
@@ -0,0 +1,32 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe 'Char codes' do
4
+ it 'counts a number as being in the range 48-57' do
5
+ (0..9).each do |num|
6
+ c = num.to_s
7
+ code = c[0].is_a?(String) ? c.ord : c[0]
8
+ code.should == 48 + num
9
+ end
10
+ end
11
+
12
+ it 'counts an uppercase letter as being in the range 65-90' do
13
+ ('A'..'Z').each_with_index do |c, offset|
14
+ code = c[0].is_a?(String) ? c.ord : c[0]
15
+ code.should == 65 + offset
16
+ end
17
+ end
18
+
19
+ it 'counts a lowercase letter as being in the range 97-122' do
20
+ ('a'..'z').each_with_index do |c, offset|
21
+ code = c[0].is_a?(String) ? c.ord : c[0]
22
+ code.should == 97 + offset
23
+ end
24
+ end
25
+
26
+ ['!', '*', "'", '(', ')', ';', ':', '@', '&', '=', '+', '$', ',', '/', '?', '%', '#', '[', ']', '-', '_', '.', '~'].each do |c|
27
+ it "counts #{c} as included in VALID_OPAQUE_CHAR_CODES" do
28
+ code = c[0].is_a?(String) ? c.ord : c[0]
29
+ SanitizeUrl::VALID_OPAQUE_CHAR_CODES.should include(code)
30
+ end
31
+ end
32
+ end
@@ -1,169 +1,169 @@
1
- require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
-
3
- describe SanitizeUrl do
4
- include SanitizeUrl
5
-
6
- describe '#sanitize_url' do
7
- it 'replaces JavaScript URLs with options[:replace_evil_with]' do
8
- urls = [
9
- 'javascript:alert("1");',
10
- 'javascript//:alert("2");',
11
- 'javascript://alert("3");',
12
- 'javascript/:/alert("4");',
13
- 'j a v a script:alert("5");',
14
- ' javascript:alert("6");',
15
- 'JavaScript:alert("7");',
16
- "java\nscript:alert(\"8\");",
17
- "java\rscript:alert(\"9\");"
18
- ].each do |evil_url|
19
- sanitize_url(evil_url, :replace_evil_with => 'replaced').should == 'replaced'
20
- end
21
- end
22
-
23
- it 'replaces data: URLs with options[:replace_evil_with]' do
24
- urls = [
25
- 'data:text/html;base64,PHNjcmlwdD5hbGVydCgnMScpPC9zY3JpcHQ+',
26
- 'data://text/html;base64,PHNjcmlwdD5hbGVydCgnMicpPC9zY3JpcHQ+',
27
- 'data//:text/html;base64,PHNjcmlwdD5hbGVydCgnMycpPC9zY3JpcHQ+',
28
- 'data/:/text/html;base64,PHNjcmlwdD5hbGVydCgnNCcpPC9zY3JpcHQ+',
29
- ' data:text/html;base64,PHNjcmlwdD5hbGVydCgnNScpPC9zY3JpcHQ+',
30
- 'da ta:text/html;base64,PHNjcmlwdD5hbGVydCgnNicpPC9zY3JpcHQ+',
31
- 'Data:text/html;base64,PHNjcmlwdD5hbGVydCgnNycpPC9zY3JpcHQ+',
32
- "da\nta:text/html;base64,PHNjcmlwdD5hbGVydCgnOCcpPC9zY3JpcHQ+",
33
- "da\rta:text/html;base64,PHNjcmlwdD5hbGVydCgnOScpPC9zY3JpcHQ+",
34
- ].each do |evil_url|
35
- sanitize_url(evil_url, :replace_evil_with => 'replaced').should == 'replaced'
36
- end
37
- end
38
-
39
- context 'with :schemes whitelist' do
40
- it 'kills anything not on the list' do
41
- [
42
- 'https://example.com',
43
- 'https:example.com',
44
- 'ftp://example.com',
45
- 'ftp:example.com',
46
- 'data://example.com',
47
- 'data:example.com',
48
- 'javascript://example.com',
49
- 'javascript:example.com',
50
- ].each do |evil_url|
51
- sanitize_url(evil_url, :schemes => ['http'], :replace_evil_with => 'replaced')
52
- end
53
- end
54
-
55
- it 'allows anything on the list' do
56
- [
57
- 'http://example.com',
58
- 'https://example.com'
59
- ].each do |good_url|
60
- sanitize_url(good_url, :schemes => ['http', 'https']).should == good_url
61
- end
62
- end
63
-
64
- it 'works with schemes given as symbols' do
65
- sanitize_url('ftp://example.com', :schemes => [:http, :https], :replace_evil_with => 'replaced').should == 'replaced'
66
- sanitize_url('ftp://example.com', :schemes => [:http, :https, :ftp]).should == 'ftp://example.com'
67
- end
68
- end
69
-
70
- it 'prepends http:// if no scheme is given' do
71
- sanitize_url('www.example.com').should == 'http://www.example.com'
72
- end
73
-
74
- it 'replaces evil URLs that are encoded with Unicode numerical character references' do
75
- [
76
- '&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;&#58;&#97;&#108;&#101;&#114;&#116;&#40;&#39;&#49;&#39;&#41;',
77
- '&#x6A;&#x61;&#x76;&#x61;&#x73;&#x63;&#x72;&#x69;&#x70;&#x74;&#x3A;&#x61;&#x6C;&#x65;&#x72;&#x74;&#x28;&#x27;&#x32;&#x27;&#x29;'
78
- ].each do |evil_url|
79
- sanitize_url(evil_url, :replace_evil_with => 'replaced').should == 'replaced'
80
- end
81
- end
82
-
83
- it 'replaces evil URLs that are URL-encoded (hex with %)' do
84
- sanitize_url('%6A%61%76%61%73%63%72%69%70%74%3A%61%6C%65%72%74%28%22%58%53%53%22%29', :replace_evil_with => 'replaced').should == 'replaced'
85
- end
86
-
87
- it 'does not try to fix broken schemes after the start of the string' do
88
- sanitize_url('http://example.com/http/foo').should == 'http://example.com/http/foo'
89
- end
90
-
91
- it 'does not prepend an extra http:// if a valid scheme is given' do
92
- sanitize_url('http://www.example.com').should == 'http://www.example.com'
93
- sanitize_url('https://www.example.com').should == 'https://www.example.com'
94
- sanitize_url('ftp://www.example.com').should == 'ftp://www.example.com'
95
- end
96
-
97
- it 'dereferences URL-encoded characters in the scheme' do
98
- sanitize_url('h%74tp://example.com').should == 'http://example.com'
99
- end
100
-
101
- it 'dereferences decimal numeric character references in the scheme' do
102
- sanitize_url('h&#116;tp://example.com').should == 'http://example.com'
103
- end
104
-
105
- it 'dereferences hex numeric character references in the scheme' do
106
- sanitize_url('h&#x74;tp://example.com').should == 'http://example.com'
107
- end
108
-
109
- it 'retains URL-encoded characters in the opaque portion' do
110
- sanitize_url('http://someone%40gmail.com:password@example.com').should == 'http://someone%40gmail.com:password@example.com'
111
- end
112
-
113
- it 'URL-encodes code points outside ASCII' do
114
- # Percent-encoding should be in UTF-8 (RFC 3986).
115
- # http://en.wikipedia.org/wiki/Percent-encoding#Current_standard
116
- sanitize_url('http://&#1044;').should == 'http://%D0%94'
117
- sanitize_url('http://&#x0414;').should == 'http://%D0%94'
118
- sanitize_url("http://\xD0\x94").should == 'http://%D0%94' # UTF-8 version of the same.
119
- end
120
-
121
- it 'replaces URLs without the opaque portion' do
122
- sanitize_url('http://', :replace_evil_with => 'replaced').should == 'replaced'
123
- sanitize_url('mailto:', :replace_evil_with => 'replaced').should == 'replaced'
124
- end
125
-
126
- it 'adds the two slashes for known schemes that require it' do
127
- sanitize_url('http:example.com').should == 'http://example.com'
128
- sanitize_url('ftp:example.com').should == 'ftp://example.com'
129
- sanitize_url('svn+ssh:example.com').should == 'svn+ssh://example.com'
130
- end
131
-
132
- it 'does not add slashes for schemes that do not require it' do
133
- sanitize_url('mailto:someone@example.com').should == 'mailto:someone@example.com'
134
- end
135
-
136
- it 'strips invalid characters from the scheme and then evaluates the scheme according to the normal rules' do
137
- sanitize_url("ht\xD0\x94tp://example.com").should == 'http://example.com'
138
- sanitize_url('htt$p://example.com').should == 'http://example.com'
139
- sanitize_url('j%avascript:alert("XSS")', :replace_evil_with => 'replaced').should == 'replaced'
140
- end
141
- end
142
-
143
-
144
- describe '.dereference_numerics' do
145
- it 'decodes short-form decimal UTF-8 character references with a semicolon' do
146
- SanitizeUrl.dereference_numerics('&#106;').should == 'j'
147
- end
148
-
149
- it 'decodes short-form decimal UTF-8 character references without a semicolon' do
150
- SanitizeUrl.dereference_numerics('&#106').should == 'j'
151
- end
152
-
153
- it 'decodes long-form decimal UTF-8 character references with a semicolon' do
154
- SanitizeUrl.dereference_numerics('&#0000106;').should == 'j'
155
- end
156
-
157
- it 'decodes long-form decimal UTF-8 character references without a semicolon' do
158
- SanitizeUrl.dereference_numerics('&#0000106').should == 'j'
159
- end
160
-
161
- it 'decodes hex UTF-8 character references with a semicolon' do
162
- SanitizeUrl.dereference_numerics('&#x6A;').should == 'j'
163
- end
164
-
165
- it 'decodes hex UTF-8 character references without a semicolon' do
166
- SanitizeUrl.dereference_numerics('&#x6A').should == 'j'
167
- end
168
- end
169
- end
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe SanitizeUrl do
4
+ include SanitizeUrl
5
+
6
+ describe '#sanitize_url' do
7
+ it 'replaces JavaScript URLs with options[:replace_evil_with]' do
8
+ urls = [
9
+ 'javascript:alert("1");',
10
+ 'javascript//:alert("2");',
11
+ 'javascript://alert("3");',
12
+ 'javascript/:/alert("4");',
13
+ 'j a v a script:alert("5");',
14
+ ' javascript:alert("6");',
15
+ 'JavaScript:alert("7");',
16
+ "java\nscript:alert(\"8\");",
17
+ "java\rscript:alert(\"9\");"
18
+ ].each do |evil_url|
19
+ sanitize_url(evil_url, :replace_evil_with => 'replaced').should == 'replaced'
20
+ end
21
+ end
22
+
23
+ it 'replaces data: URLs with options[:replace_evil_with]' do
24
+ urls = [
25
+ 'data:text/html;base64,PHNjcmlwdD5hbGVydCgnMScpPC9zY3JpcHQ+',
26
+ 'data://text/html;base64,PHNjcmlwdD5hbGVydCgnMicpPC9zY3JpcHQ+',
27
+ 'data//:text/html;base64,PHNjcmlwdD5hbGVydCgnMycpPC9zY3JpcHQ+',
28
+ 'data/:/text/html;base64,PHNjcmlwdD5hbGVydCgnNCcpPC9zY3JpcHQ+',
29
+ ' data:text/html;base64,PHNjcmlwdD5hbGVydCgnNScpPC9zY3JpcHQ+',
30
+ 'da ta:text/html;base64,PHNjcmlwdD5hbGVydCgnNicpPC9zY3JpcHQ+',
31
+ 'Data:text/html;base64,PHNjcmlwdD5hbGVydCgnNycpPC9zY3JpcHQ+',
32
+ "da\nta:text/html;base64,PHNjcmlwdD5hbGVydCgnOCcpPC9zY3JpcHQ+",
33
+ "da\rta:text/html;base64,PHNjcmlwdD5hbGVydCgnOScpPC9zY3JpcHQ+",
34
+ ].each do |evil_url|
35
+ sanitize_url(evil_url, :replace_evil_with => 'replaced').should == 'replaced'
36
+ end
37
+ end
38
+
39
+ context 'with :schemes whitelist' do
40
+ it 'kills anything not on the list' do
41
+ [
42
+ 'https://example.com',
43
+ 'https:example.com',
44
+ 'ftp://example.com',
45
+ 'ftp:example.com',
46
+ 'data://example.com',
47
+ 'data:example.com',
48
+ 'javascript://example.com',
49
+ 'javascript:example.com',
50
+ ].each do |evil_url|
51
+ sanitize_url(evil_url, :schemes => ['http'], :replace_evil_with => 'replaced')
52
+ end
53
+ end
54
+
55
+ it 'allows anything on the list' do
56
+ [
57
+ 'http://example.com',
58
+ 'https://example.com'
59
+ ].each do |good_url|
60
+ sanitize_url(good_url, :schemes => ['http', 'https']).should == good_url
61
+ end
62
+ end
63
+
64
+ it 'works with schemes given as symbols' do
65
+ sanitize_url('ftp://example.com', :schemes => [:http, :https], :replace_evil_with => 'replaced').should == 'replaced'
66
+ sanitize_url('ftp://example.com', :schemes => [:http, :https, :ftp]).should == 'ftp://example.com'
67
+ end
68
+ end
69
+
70
+ it 'prepends http:// if no scheme is given' do
71
+ sanitize_url('www.example.com').should == 'http://www.example.com'
72
+ end
73
+
74
+ it 'replaces evil URLs that are encoded with Unicode numerical character references' do
75
+ [
76
+ '&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;&#58;&#97;&#108;&#101;&#114;&#116;&#40;&#39;&#49;&#39;&#41;',
77
+ '&#x6A;&#x61;&#x76;&#x61;&#x73;&#x63;&#x72;&#x69;&#x70;&#x74;&#x3A;&#x61;&#x6C;&#x65;&#x72;&#x74;&#x28;&#x27;&#x32;&#x27;&#x29;'
78
+ ].each do |evil_url|
79
+ sanitize_url(evil_url, :replace_evil_with => 'replaced').should == 'replaced'
80
+ end
81
+ end
82
+
83
+ it 'replaces evil URLs that are URL-encoded (hex with %)' do
84
+ sanitize_url('%6A%61%76%61%73%63%72%69%70%74%3A%61%6C%65%72%74%28%22%58%53%53%22%29', :replace_evil_with => 'replaced').should == 'replaced'
85
+ end
86
+
87
+ it 'does not try to fix broken schemes after the start of the string' do
88
+ sanitize_url('http://example.com/http/foo').should == 'http://example.com/http/foo'
89
+ end
90
+
91
+ it 'does not prepend an extra http:// if a valid scheme is given' do
92
+ sanitize_url('http://www.example.com').should == 'http://www.example.com'
93
+ sanitize_url('https://www.example.com').should == 'https://www.example.com'
94
+ sanitize_url('ftp://www.example.com').should == 'ftp://www.example.com'
95
+ end
96
+
97
+ it 'dereferences URL-encoded characters in the scheme' do
98
+ sanitize_url('h%74tp://example.com').should == 'http://example.com'
99
+ end
100
+
101
+ it 'dereferences decimal numeric character references in the scheme' do
102
+ sanitize_url('h&#116;tp://example.com').should == 'http://example.com'
103
+ end
104
+
105
+ it 'dereferences hex numeric character references in the scheme' do
106
+ sanitize_url('h&#x74;tp://example.com').should == 'http://example.com'
107
+ end
108
+
109
+ it 'retains URL-encoded characters in the opaque portion' do
110
+ sanitize_url('http://someone%40gmail.com:password@example.com').should == 'http://someone%40gmail.com:password@example.com'
111
+ end
112
+
113
+ it 'URL-encodes code points outside ASCII' do
114
+ # Percent-encoding should be in UTF-8 (RFC 3986).
115
+ # http://en.wikipedia.org/wiki/Percent-encoding#Current_standard
116
+ sanitize_url('http://&#1044;').should == 'http://%D0%94'
117
+ sanitize_url('http://&#x0414;').should == 'http://%D0%94'
118
+ sanitize_url("http://\xD0\x94").should == 'http://%D0%94' # UTF-8 version of the same.
119
+ end
120
+
121
+ it 'replaces URLs without the opaque portion' do
122
+ sanitize_url('http://', :replace_evil_with => 'replaced').should == 'replaced'
123
+ sanitize_url('mailto:', :replace_evil_with => 'replaced').should == 'replaced'
124
+ end
125
+
126
+ it 'adds the two slashes for known schemes that require it' do
127
+ sanitize_url('http:example.com').should == 'http://example.com'
128
+ sanitize_url('ftp:example.com').should == 'ftp://example.com'
129
+ sanitize_url('svn+ssh:example.com').should == 'svn+ssh://example.com'
130
+ end
131
+
132
+ it 'does not add slashes for schemes that do not require it' do
133
+ sanitize_url('mailto:someone@example.com').should == 'mailto:someone@example.com'
134
+ end
135
+
136
+ it 'strips invalid characters from the scheme and then evaluates the scheme according to the normal rules' do
137
+ sanitize_url("ht\xD0\x94tp://example.com").should == 'http://example.com'
138
+ sanitize_url('htt$p://example.com').should == 'http://example.com'
139
+ sanitize_url('j%avascript:alert("XSS")', :replace_evil_with => 'replaced').should == 'replaced'
140
+ end
141
+ end
142
+
143
+
144
+ describe '.dereference_numerics' do
145
+ it 'decodes short-form decimal UTF-8 character references with a semicolon' do
146
+ SanitizeUrl.dereference_numerics('&#106;').should == 'j'
147
+ end
148
+
149
+ it 'decodes short-form decimal UTF-8 character references without a semicolon' do
150
+ SanitizeUrl.dereference_numerics('&#106').should == 'j'
151
+ end
152
+
153
+ it 'decodes long-form decimal UTF-8 character references with a semicolon' do
154
+ SanitizeUrl.dereference_numerics('&#0000106;').should == 'j'
155
+ end
156
+
157
+ it 'decodes long-form decimal UTF-8 character references without a semicolon' do
158
+ SanitizeUrl.dereference_numerics('&#0000106').should == 'j'
159
+ end
160
+
161
+ it 'decodes hex UTF-8 character references with a semicolon' do
162
+ SanitizeUrl.dereference_numerics('&#x6A;').should == 'j'
163
+ end
164
+
165
+ it 'decodes hex UTF-8 character references without a semicolon' do
166
+ SanitizeUrl.dereference_numerics('&#x6A').should == 'j'
167
+ end
168
+ end
169
+ end
@@ -1,7 +1,7 @@
1
- require 'rubygems'
2
- require 'test/unit'
3
- require 'spec'
4
-
5
- $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
6
- $LOAD_PATH.unshift(File.dirname(__FILE__))
1
+ require 'rubygems'
2
+ require 'test/unit'
3
+ require 'spec'
4
+
5
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
6
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
7
7
  require 'sanitize-url'
data/test.rb CHANGED
@@ -1,16 +1,4 @@
1
- # Copyright sign
2
-
3
- #def decimal_code_point_to_url_encoded(code_point)
4
- # utf_8_str = ([code_point.to_i].pack('U'))
5
- # '%' + utf_8_str.unpack('H2' * utf_8_str.length).join('%').upcase
6
- #end
7
-
8
- hex_code_point = 'A9'
9
- decimal_code_point = '169'
10
- hex_utf_8_bytes = '%C2%A9'
11
-
12
- #puts 'Expected: ' + hex_utf_8_bytes
13
- #puts 'Actual: ' + decimal_code_point_to_url_encoded(decimal_code_point)
14
-
15
- evil = 'javascript:alert("XSS")'
16
- puts evil.unpack('H2' * evil.length).join('%').upcase
1
+ # encoding: UTF-8
2
+
3
+ puts 'Д'
4
+ puts 'Д'.unpack('H2' * 2).inspect
metadata CHANGED
@@ -1,7 +1,12 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sanitize-url
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 1
8
+ - 4
9
+ version: 0.1.4
5
10
  platform: ruby
6
11
  authors:
7
12
  - jarrett
@@ -9,19 +14,23 @@ autorequire:
9
14
  bindir: bin
10
15
  cert_chain: []
11
16
 
12
- date: 2010-02-25 00:00:00 -06:00
17
+ date: 2010-03-21 00:00:00 -05:00
13
18
  default_executable:
14
19
  dependencies:
15
20
  - !ruby/object:Gem::Dependency
16
21
  name: rspec
17
- type: :development
18
- version_requirement:
19
- version_requirements: !ruby/object:Gem::Requirement
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
20
24
  requirements:
21
25
  - - ">="
22
26
  - !ruby/object:Gem::Version
27
+ segments:
28
+ - 1
29
+ - 3
30
+ - 0
23
31
  version: 1.3.0
24
- version:
32
+ type: :development
33
+ version_requirements: *id001
25
34
  description: "This gem provides a module called SanitizeUrl, which you can mix-in anywhere you like. It provides a single method: sanitize_url, which accepts a URL and returns one with JavaScript removed. It also prepends the http:// scheme if no valid scheme is found."
26
35
  email: jarrett@uchicago.edu
27
36
  executables: []
@@ -40,6 +49,7 @@ files:
40
49
  - VERSION
41
50
  - lib/sanitize-url.rb
42
51
  - sanitize-url.gemspec
52
+ - spec/char_codes_spec.rb
43
53
  - spec/sanitize_url_spec.rb
44
54
  - spec/spec_helper.rb
45
55
  - test.rb
@@ -56,21 +66,24 @@ required_ruby_version: !ruby/object:Gem::Requirement
56
66
  requirements:
57
67
  - - ">="
58
68
  - !ruby/object:Gem::Version
69
+ segments:
70
+ - 0
59
71
  version: "0"
60
- version:
61
72
  required_rubygems_version: !ruby/object:Gem::Requirement
62
73
  requirements:
63
74
  - - ">="
64
75
  - !ruby/object:Gem::Version
76
+ segments:
77
+ - 0
65
78
  version: "0"
66
- version:
67
79
  requirements: []
68
80
 
69
81
  rubyforge_project:
70
- rubygems_version: 1.3.5
82
+ rubygems_version: 1.3.6
71
83
  signing_key:
72
84
  specification_version: 3
73
85
  summary: Sanitizes untrusted URLs
74
86
  test_files:
87
+ - spec/char_codes_spec.rb
75
88
  - spec/sanitize_url_spec.rb
76
89
  - spec/spec_helper.rb