sanitize 1.0.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/HISTORY +8 -0
- data/LICENSE +1 -1
- data/{README → README.rdoc} +10 -3
- data/lib/sanitize.rb +23 -11
- data/lib/sanitize/config.rb +3 -2
- data/lib/sanitize/config/basic.rb +5 -4
- data/lib/sanitize/config/relaxed.rb +6 -5
- data/lib/sanitize/config/restricted.rb +1 -1
- data/lib/sanitize/monkeypatch/hpricot.rb +33 -0
- metadata +14 -3
data/HISTORY
CHANGED
@@ -1,5 +1,13 @@
|
|
1
1
|
Sanitize History
|
2
2
|
================================================================================
|
3
3
|
|
4
|
+
Version 1.0.1 (2009-01-01)
|
5
|
+
* You can now specify :relative in a protocol config array to allow attributes
|
6
|
+
containing relative URLs with no protocol. The Basic and Relaxed configs
|
7
|
+
have been updated to allow relative URLs.
|
8
|
+
* Added a workaround for an Hpricot bug that causes HTML entities for
|
9
|
+
non-ASCII characters to be replaced by question marks, and all other
|
10
|
+
entities to be destructively decoded.
|
11
|
+
|
4
12
|
Version 1.0.0 (2008-12-25)
|
5
13
|
* First release.
|
data/LICENSE
CHANGED
data/{README → README.rdoc}
RENAMED
@@ -15,8 +15,8 @@ or maliciously-formed HTML. When in doubt, Sanitize always errs on the side of
|
|
15
15
|
caution.
|
16
16
|
|
17
17
|
*Author*:: Ryan Grove (mailto:ryan@wonko.com)
|
18
|
-
*Version*:: 1.0.
|
19
|
-
*Copyright*:: Copyright (c)
|
18
|
+
*Version*:: 1.0.1 (2009-01-01)
|
19
|
+
*Copyright*:: Copyright (c) 2009 Ryan Grove. All rights reserved.
|
20
20
|
*License*:: MIT License (http://opensource.org/licenses/mit-license.php)
|
21
21
|
*Website*:: http://github.com/rgrove/sanitize
|
22
22
|
|
@@ -120,9 +120,16 @@ protocol at all), it will be removed.
|
|
120
120
|
'img' => {'src' => ['http', 'https']}
|
121
121
|
}
|
122
122
|
|
123
|
+
If you'd like to allow the use of relative URLs which don't have a protocol,
|
124
|
+
include the special value <code>:relative</code> in the protocol array:
|
125
|
+
|
126
|
+
:protocols => {
|
127
|
+
'a' => {'href' => ['http', 'https', :relative]}
|
128
|
+
}
|
129
|
+
|
123
130
|
== License
|
124
131
|
|
125
|
-
Copyright (c)
|
132
|
+
Copyright (c) 2009 Ryan Grove <ryan@wonko.com>
|
126
133
|
|
127
134
|
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
128
135
|
this software and associated documentation files (the 'Software'), to deal in
|
data/lib/sanitize.rb
CHANGED
@@ -1,16 +1,16 @@
|
|
1
1
|
#--
|
2
|
-
# Copyright (c)
|
3
|
-
#
|
2
|
+
# Copyright (c) 2009 Ryan Grove <ryan@wonko.com>
|
3
|
+
#
|
4
4
|
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
5
5
|
# of this software and associated documentation files (the 'Software'), to deal
|
6
6
|
# in the Software without restriction, including without limitation the rights
|
7
7
|
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
8
8
|
# copies of the Software, and to permit persons to whom the Software is
|
9
9
|
# furnished to do so, subject to the following conditions:
|
10
|
-
#
|
10
|
+
#
|
11
11
|
# The above copyright notice and this permission notice shall be included in all
|
12
12
|
# copies or substantial portions of the Software.
|
13
|
-
#
|
13
|
+
#
|
14
14
|
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
15
15
|
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
16
16
|
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
@@ -25,13 +25,17 @@ $:.unshift(File.dirname(File.expand_path(__FILE__)))
|
|
25
25
|
$:.uniq!
|
26
26
|
|
27
27
|
require 'rubygems'
|
28
|
-
|
28
|
+
|
29
|
+
gem 'hpricot', '~> 0.6'
|
30
|
+
gem 'htmlentities', '~> 4.0.0'
|
29
31
|
|
30
32
|
require 'hpricot'
|
33
|
+
require 'htmlentities'
|
31
34
|
require 'sanitize/config'
|
32
35
|
require 'sanitize/config/restricted'
|
33
36
|
require 'sanitize/config/basic'
|
34
37
|
require 'sanitize/config/relaxed'
|
38
|
+
require 'sanitize/monkeypatch/hpricot'
|
35
39
|
|
36
40
|
class Sanitize
|
37
41
|
#--
|
@@ -100,8 +104,13 @@ class Sanitize
|
|
100
104
|
protocol = @config[:protocols][name]
|
101
105
|
|
102
106
|
node.raw_attributes.delete_if do |key, value|
|
103
|
-
protocol.has_key?(key)
|
104
|
-
|
107
|
+
next false unless protocol.has_key?(key)
|
108
|
+
|
109
|
+
if value.downcase =~ /^([^:]+)(?:\:|�*58;|�*3a;)/
|
110
|
+
!protocol[key].include?($1.downcase)
|
111
|
+
else
|
112
|
+
!protocol[key].include?(:relative)
|
113
|
+
end
|
105
114
|
end
|
106
115
|
end
|
107
116
|
else
|
@@ -117,12 +126,15 @@ class Sanitize
|
|
117
126
|
end
|
118
127
|
end
|
119
128
|
|
120
|
-
# Make one last pass through the fragment and
|
121
|
-
#
|
122
|
-
# maliciously-malformed nested tags
|
129
|
+
# Make one last pass through the fragment and encode all special HTML chars
|
130
|
+
# and non-ASCII chars as entities. This eliminates certain types of
|
131
|
+
# maliciously-malformed nested tags and also compensates for Hpricot's
|
132
|
+
# burning desire to decode all entities.
|
133
|
+
coder = HTMLEntities.new
|
134
|
+
|
123
135
|
fragment.traverse_element do |node|
|
124
136
|
if node.text?
|
125
|
-
node.swap(node.inner_text
|
137
|
+
node.swap(coder.encode(node.inner_text, :named))
|
126
138
|
end
|
127
139
|
end
|
128
140
|
|
data/lib/sanitize/config.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
#--
|
2
|
-
# Copyright (c)
|
2
|
+
# Copyright (c) 2009 Ryan Grove <ryan@wonko.com>
|
3
3
|
#
|
4
4
|
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
5
5
|
# of this software and associated documentation files (the 'Software'), to deal
|
@@ -41,7 +41,8 @@ class Sanitize
|
|
41
41
|
:add_attributes => {},
|
42
42
|
|
43
43
|
# URL handling protocols to allow in specific attributes. By default, no
|
44
|
-
# protocols are allowed.
|
44
|
+
# protocols are allowed. Use :relative in place of a protocol if you want
|
45
|
+
# to allow relative URLs sans protocol.
|
45
46
|
:protocols => {}
|
46
47
|
}
|
47
48
|
end
|
@@ -1,5 +1,5 @@
|
|
1
1
|
#--
|
2
|
-
# Copyright (c)
|
2
|
+
# Copyright (c) 2009 Ryan Grove <ryan@wonko.com>
|
3
3
|
#
|
4
4
|
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
5
5
|
# of this software and associated documentation files (the 'Software'), to deal
|
@@ -39,9 +39,10 @@ class Sanitize
|
|
39
39
|
},
|
40
40
|
|
41
41
|
:protocols => {
|
42
|
-
'a' => {'href' => ['ftp', 'http', 'https', 'mailto'
|
43
|
-
|
44
|
-
'
|
42
|
+
'a' => {'href' => ['ftp', 'http', 'https', 'mailto',
|
43
|
+
:relative]},
|
44
|
+
'blockquote' => {'cite' => ['http', 'https', :relative]},
|
45
|
+
'q' => {'cite' => ['http', 'https', :relative]}
|
45
46
|
}
|
46
47
|
}
|
47
48
|
end
|
@@ -1,5 +1,5 @@
|
|
1
1
|
#--
|
2
|
-
# Copyright (c)
|
2
|
+
# Copyright (c) 2009 Ryan Grove <ryan@wonko.com>
|
3
3
|
#
|
4
4
|
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
5
5
|
# of this software and associated documentation files (the 'Software'), to deal
|
@@ -45,10 +45,11 @@ class Sanitize
|
|
45
45
|
},
|
46
46
|
|
47
47
|
:protocols => {
|
48
|
-
'a' => {'href' => ['ftp', 'http', 'https', 'mailto'
|
49
|
-
|
50
|
-
'
|
51
|
-
'
|
48
|
+
'a' => {'href' => ['ftp', 'http', 'https', 'mailto',
|
49
|
+
:relative]},
|
50
|
+
'blockquote' => {'cite' => ['http', 'https', :relative]},
|
51
|
+
'img' => {'src' => ['http', 'https', :relative]},
|
52
|
+
'q' => {'cite' => ['http', 'https', :relative]}
|
52
53
|
}
|
53
54
|
}
|
54
55
|
end
|
@@ -1,5 +1,5 @@
|
|
1
1
|
#--
|
2
|
-
# Copyright (c)
|
2
|
+
# Copyright (c) 2009 Ryan Grove <ryan@wonko.com>
|
3
3
|
#
|
4
4
|
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
5
5
|
# of this software and associated documentation files (the 'Software'), to deal
|
@@ -0,0 +1,33 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2009 Ryan Grove <ryan@wonko.com>
|
3
|
+
#
|
4
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
5
|
+
# of this software and associated documentation files (the 'Software'), to deal
|
6
|
+
# in the Software without restriction, including without limitation the rights
|
7
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
8
|
+
# copies of the Software, and to permit persons to whom the Software is
|
9
|
+
# furnished to do so, subject to the following conditions:
|
10
|
+
#
|
11
|
+
# The above copyright notice and this permission notice shall be included in all
|
12
|
+
# copies or substantial portions of the Software.
|
13
|
+
#
|
14
|
+
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
15
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
16
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
17
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
18
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
19
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
20
|
+
# SOFTWARE.
|
21
|
+
#++
|
22
|
+
|
23
|
+
module Hpricot
|
24
|
+
|
25
|
+
# Monkeypatch to fix an Hpricot bug that causes HTML entities to be decoded
|
26
|
+
# incorrectly.
|
27
|
+
def self.uxs(str)
|
28
|
+
str.to_s.
|
29
|
+
gsub(/&(\w+);/) { [Hpricot::NamedCharacters[$1] || ??].pack("U*") }.
|
30
|
+
gsub(/\&\#(\d+);/) { [$1.to_i].pack("U*") }
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sanitize
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ryan Grove
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date:
|
12
|
+
date: 2009-01-01 00:00:00 -08:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -22,6 +22,16 @@ dependencies:
|
|
22
22
|
- !ruby/object:Gem::Version
|
23
23
|
version: "0.6"
|
24
24
|
version:
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: htmlentities
|
27
|
+
type: :runtime
|
28
|
+
version_requirement:
|
29
|
+
version_requirements: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ~>
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 4.0.0
|
34
|
+
version:
|
25
35
|
description:
|
26
36
|
email: ryan@wonko.com
|
27
37
|
executables: []
|
@@ -33,12 +43,13 @@ extra_rdoc_files: []
|
|
33
43
|
files:
|
34
44
|
- HISTORY
|
35
45
|
- LICENSE
|
36
|
-
- README
|
46
|
+
- README.rdoc
|
37
47
|
- lib/sanitize.rb
|
38
48
|
- lib/sanitize/config.rb
|
39
49
|
- lib/sanitize/config/basic.rb
|
40
50
|
- lib/sanitize/config/relaxed.rb
|
41
51
|
- lib/sanitize/config/restricted.rb
|
52
|
+
- lib/sanitize/monkeypatch/hpricot.rb
|
42
53
|
has_rdoc: false
|
43
54
|
homepage: http://github.com/rgrove/sanitize/
|
44
55
|
post_install_message:
|