sanitize 1.0.0 → 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/HISTORY +8 -0
- data/LICENSE +1 -1
- data/{README → README.rdoc} +10 -3
- data/lib/sanitize.rb +23 -11
- data/lib/sanitize/config.rb +3 -2
- data/lib/sanitize/config/basic.rb +5 -4
- data/lib/sanitize/config/relaxed.rb +6 -5
- data/lib/sanitize/config/restricted.rb +1 -1
- data/lib/sanitize/monkeypatch/hpricot.rb +33 -0
- metadata +14 -3
data/HISTORY
CHANGED
@@ -1,5 +1,13 @@
|
|
1
1
|
Sanitize History
|
2
2
|
================================================================================
|
3
3
|
|
4
|
+
Version 1.0.1 (2009-01-01)
|
5
|
+
* You can now specify :relative in a protocol config array to allow attributes
|
6
|
+
containing relative URLs with no protocol. The Basic and Relaxed configs
|
7
|
+
have been updated to allow relative URLs.
|
8
|
+
* Added a workaround for an Hpricot bug that causes HTML entities for
|
9
|
+
non-ASCII characters to be replaced by question marks, and all other
|
10
|
+
entities to be destructively decoded.
|
11
|
+
|
4
12
|
Version 1.0.0 (2008-12-25)
|
5
13
|
* First release.
|
data/LICENSE
CHANGED
data/{README → README.rdoc}
RENAMED
@@ -15,8 +15,8 @@ or maliciously-formed HTML. When in doubt, Sanitize always errs on the side of
|
|
15
15
|
caution.
|
16
16
|
|
17
17
|
*Author*:: Ryan Grove (mailto:ryan@wonko.com)
|
18
|
-
*Version*:: 1.0.
|
19
|
-
*Copyright*:: Copyright (c)
|
18
|
+
*Version*:: 1.0.1 (2009-01-01)
|
19
|
+
*Copyright*:: Copyright (c) 2009 Ryan Grove. All rights reserved.
|
20
20
|
*License*:: MIT License (http://opensource.org/licenses/mit-license.php)
|
21
21
|
*Website*:: http://github.com/rgrove/sanitize
|
22
22
|
|
@@ -120,9 +120,16 @@ protocol at all), it will be removed.
|
|
120
120
|
'img' => {'src' => ['http', 'https']}
|
121
121
|
}
|
122
122
|
|
123
|
+
If you'd like to allow the use of relative URLs which don't have a protocol,
|
124
|
+
include the special value <code>:relative</code> in the protocol array:
|
125
|
+
|
126
|
+
:protocols => {
|
127
|
+
'a' => {'href' => ['http', 'https', :relative]}
|
128
|
+
}
|
129
|
+
|
123
130
|
== License
|
124
131
|
|
125
|
-
Copyright (c)
|
132
|
+
Copyright (c) 2009 Ryan Grove <ryan@wonko.com>
|
126
133
|
|
127
134
|
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
128
135
|
this software and associated documentation files (the 'Software'), to deal in
|
data/lib/sanitize.rb
CHANGED
@@ -1,16 +1,16 @@
|
|
1
1
|
#--
|
2
|
-
# Copyright (c)
|
3
|
-
#
|
2
|
+
# Copyright (c) 2009 Ryan Grove <ryan@wonko.com>
|
3
|
+
#
|
4
4
|
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
5
5
|
# of this software and associated documentation files (the 'Software'), to deal
|
6
6
|
# in the Software without restriction, including without limitation the rights
|
7
7
|
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
8
8
|
# copies of the Software, and to permit persons to whom the Software is
|
9
9
|
# furnished to do so, subject to the following conditions:
|
10
|
-
#
|
10
|
+
#
|
11
11
|
# The above copyright notice and this permission notice shall be included in all
|
12
12
|
# copies or substantial portions of the Software.
|
13
|
-
#
|
13
|
+
#
|
14
14
|
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
15
15
|
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
16
16
|
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
@@ -25,13 +25,17 @@ $:.unshift(File.dirname(File.expand_path(__FILE__)))
|
|
25
25
|
$:.uniq!
|
26
26
|
|
27
27
|
require 'rubygems'
|
28
|
-
|
28
|
+
|
29
|
+
gem 'hpricot', '~> 0.6'
|
30
|
+
gem 'htmlentities', '~> 4.0.0'
|
29
31
|
|
30
32
|
require 'hpricot'
|
33
|
+
require 'htmlentities'
|
31
34
|
require 'sanitize/config'
|
32
35
|
require 'sanitize/config/restricted'
|
33
36
|
require 'sanitize/config/basic'
|
34
37
|
require 'sanitize/config/relaxed'
|
38
|
+
require 'sanitize/monkeypatch/hpricot'
|
35
39
|
|
36
40
|
class Sanitize
|
37
41
|
#--
|
@@ -100,8 +104,13 @@ class Sanitize
|
|
100
104
|
protocol = @config[:protocols][name]
|
101
105
|
|
102
106
|
node.raw_attributes.delete_if do |key, value|
|
103
|
-
protocol.has_key?(key)
|
104
|
-
|
107
|
+
next false unless protocol.has_key?(key)
|
108
|
+
|
109
|
+
if value.downcase =~ /^([^:]+)(?:\:|�*58;|�*3a;)/
|
110
|
+
!protocol[key].include?($1.downcase)
|
111
|
+
else
|
112
|
+
!protocol[key].include?(:relative)
|
113
|
+
end
|
105
114
|
end
|
106
115
|
end
|
107
116
|
else
|
@@ -117,12 +126,15 @@ class Sanitize
|
|
117
126
|
end
|
118
127
|
end
|
119
128
|
|
120
|
-
# Make one last pass through the fragment and
|
121
|
-
#
|
122
|
-
# maliciously-malformed nested tags
|
129
|
+
# Make one last pass through the fragment and encode all special HTML chars
|
130
|
+
# and non-ASCII chars as entities. This eliminates certain types of
|
131
|
+
# maliciously-malformed nested tags and also compensates for Hpricot's
|
132
|
+
# burning desire to decode all entities.
|
133
|
+
coder = HTMLEntities.new
|
134
|
+
|
123
135
|
fragment.traverse_element do |node|
|
124
136
|
if node.text?
|
125
|
-
node.swap(node.inner_text
|
137
|
+
node.swap(coder.encode(node.inner_text, :named))
|
126
138
|
end
|
127
139
|
end
|
128
140
|
|
data/lib/sanitize/config.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
#--
|
2
|
-
# Copyright (c)
|
2
|
+
# Copyright (c) 2009 Ryan Grove <ryan@wonko.com>
|
3
3
|
#
|
4
4
|
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
5
5
|
# of this software and associated documentation files (the 'Software'), to deal
|
@@ -41,7 +41,8 @@ class Sanitize
|
|
41
41
|
:add_attributes => {},
|
42
42
|
|
43
43
|
# URL handling protocols to allow in specific attributes. By default, no
|
44
|
-
# protocols are allowed.
|
44
|
+
# protocols are allowed. Use :relative in place of a protocol if you want
|
45
|
+
# to allow relative URLs sans protocol.
|
45
46
|
:protocols => {}
|
46
47
|
}
|
47
48
|
end
|
@@ -1,5 +1,5 @@
|
|
1
1
|
#--
|
2
|
-
# Copyright (c)
|
2
|
+
# Copyright (c) 2009 Ryan Grove <ryan@wonko.com>
|
3
3
|
#
|
4
4
|
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
5
5
|
# of this software and associated documentation files (the 'Software'), to deal
|
@@ -39,9 +39,10 @@ class Sanitize
|
|
39
39
|
},
|
40
40
|
|
41
41
|
:protocols => {
|
42
|
-
'a' => {'href' => ['ftp', 'http', 'https', 'mailto'
|
43
|
-
|
44
|
-
'
|
42
|
+
'a' => {'href' => ['ftp', 'http', 'https', 'mailto',
|
43
|
+
:relative]},
|
44
|
+
'blockquote' => {'cite' => ['http', 'https', :relative]},
|
45
|
+
'q' => {'cite' => ['http', 'https', :relative]}
|
45
46
|
}
|
46
47
|
}
|
47
48
|
end
|
@@ -1,5 +1,5 @@
|
|
1
1
|
#--
|
2
|
-
# Copyright (c)
|
2
|
+
# Copyright (c) 2009 Ryan Grove <ryan@wonko.com>
|
3
3
|
#
|
4
4
|
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
5
5
|
# of this software and associated documentation files (the 'Software'), to deal
|
@@ -45,10 +45,11 @@ class Sanitize
|
|
45
45
|
},
|
46
46
|
|
47
47
|
:protocols => {
|
48
|
-
'a' => {'href' => ['ftp', 'http', 'https', 'mailto'
|
49
|
-
|
50
|
-
'
|
51
|
-
'
|
48
|
+
'a' => {'href' => ['ftp', 'http', 'https', 'mailto',
|
49
|
+
:relative]},
|
50
|
+
'blockquote' => {'cite' => ['http', 'https', :relative]},
|
51
|
+
'img' => {'src' => ['http', 'https', :relative]},
|
52
|
+
'q' => {'cite' => ['http', 'https', :relative]}
|
52
53
|
}
|
53
54
|
}
|
54
55
|
end
|
@@ -1,5 +1,5 @@
|
|
1
1
|
#--
|
2
|
-
# Copyright (c)
|
2
|
+
# Copyright (c) 2009 Ryan Grove <ryan@wonko.com>
|
3
3
|
#
|
4
4
|
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
5
5
|
# of this software and associated documentation files (the 'Software'), to deal
|
@@ -0,0 +1,33 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2009 Ryan Grove <ryan@wonko.com>
|
3
|
+
#
|
4
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
5
|
+
# of this software and associated documentation files (the 'Software'), to deal
|
6
|
+
# in the Software without restriction, including without limitation the rights
|
7
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
8
|
+
# copies of the Software, and to permit persons to whom the Software is
|
9
|
+
# furnished to do so, subject to the following conditions:
|
10
|
+
#
|
11
|
+
# The above copyright notice and this permission notice shall be included in all
|
12
|
+
# copies or substantial portions of the Software.
|
13
|
+
#
|
14
|
+
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
15
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
16
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
17
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
18
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
19
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
20
|
+
# SOFTWARE.
|
21
|
+
#++
|
22
|
+
|
23
|
+
module Hpricot
|
24
|
+
|
25
|
+
# Monkeypatch to fix an Hpricot bug that causes HTML entities to be decoded
|
26
|
+
# incorrectly.
|
27
|
+
def self.uxs(str)
|
28
|
+
str.to_s.
|
29
|
+
gsub(/&(\w+);/) { [Hpricot::NamedCharacters[$1] || ??].pack("U*") }.
|
30
|
+
gsub(/\&\#(\d+);/) { [$1.to_i].pack("U*") }
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sanitize
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ryan Grove
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date:
|
12
|
+
date: 2009-01-01 00:00:00 -08:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -22,6 +22,16 @@ dependencies:
|
|
22
22
|
- !ruby/object:Gem::Version
|
23
23
|
version: "0.6"
|
24
24
|
version:
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: htmlentities
|
27
|
+
type: :runtime
|
28
|
+
version_requirement:
|
29
|
+
version_requirements: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ~>
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 4.0.0
|
34
|
+
version:
|
25
35
|
description:
|
26
36
|
email: ryan@wonko.com
|
27
37
|
executables: []
|
@@ -33,12 +43,13 @@ extra_rdoc_files: []
|
|
33
43
|
files:
|
34
44
|
- HISTORY
|
35
45
|
- LICENSE
|
36
|
-
- README
|
46
|
+
- README.rdoc
|
37
47
|
- lib/sanitize.rb
|
38
48
|
- lib/sanitize/config.rb
|
39
49
|
- lib/sanitize/config/basic.rb
|
40
50
|
- lib/sanitize/config/relaxed.rb
|
41
51
|
- lib/sanitize/config/restricted.rb
|
52
|
+
- lib/sanitize/monkeypatch/hpricot.rb
|
42
53
|
has_rdoc: false
|
43
54
|
homepage: http://github.com/rgrove/sanitize/
|
44
55
|
post_install_message:
|