sanitize 1.0.0 → 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/HISTORY CHANGED
@@ -1,5 +1,13 @@
1
1
  Sanitize History
2
2
  ================================================================================
3
3
 
4
+ Version 1.0.1 (2009-01-01)
5
+ * You can now specify :relative in a protocol config array to allow attributes
6
+ containing relative URLs with no protocol. The Basic and Relaxed configs
7
+ have been updated to allow relative URLs.
8
+ * Added a workaround for an Hpricot bug that causes HTML entities for
9
+ non-ASCII characters to be replaced by question marks, and all other
10
+ entities to be destructively decoded.
11
+
4
12
  Version 1.0.0 (2008-12-25)
5
13
  * First release.
data/LICENSE CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2008 Ryan Grove <ryan@wonko.com>
1
+ Copyright (c) 2009 Ryan Grove <ryan@wonko.com>
2
2
 
3
3
  Permission is hereby granted, free of charge, to any person obtaining a copy of
4
4
  this software and associated documentation files (the 'Software'), to deal in
@@ -15,8 +15,8 @@ or maliciously-formed HTML. When in doubt, Sanitize always errs on the side of
15
15
  caution.
16
16
 
17
17
  *Author*:: Ryan Grove (mailto:ryan@wonko.com)
18
- *Version*:: 1.0.0 (2008-12-25)
19
- *Copyright*:: Copyright (c) 2008 Ryan Grove. All rights reserved.
18
+ *Version*:: 1.0.1 (2009-01-01)
19
+ *Copyright*:: Copyright (c) 2009 Ryan Grove. All rights reserved.
20
20
  *License*:: MIT License (http://opensource.org/licenses/mit-license.php)
21
21
  *Website*:: http://github.com/rgrove/sanitize
22
22
 
@@ -120,9 +120,16 @@ protocol at all), it will be removed.
120
120
  'img' => {'src' => ['http', 'https']}
121
121
  }
122
122
 
123
+ If you'd like to allow the use of relative URLs which don't have a protocol,
124
+ include the special value <code>:relative</code> in the protocol array:
125
+
126
+ :protocols => {
127
+ 'a' => {'href' => ['http', 'https', :relative]}
128
+ }
129
+
123
130
  == License
124
131
 
125
- Copyright (c) 2008 Ryan Grove <ryan@wonko.com>
132
+ Copyright (c) 2009 Ryan Grove <ryan@wonko.com>
126
133
 
127
134
  Permission is hereby granted, free of charge, to any person obtaining a copy of
128
135
  this software and associated documentation files (the 'Software'), to deal in
data/lib/sanitize.rb CHANGED
@@ -1,16 +1,16 @@
1
1
  #--
2
- # Copyright (c) 2008 Ryan Grove <ryan@wonko.com>
3
- #
2
+ # Copyright (c) 2009 Ryan Grove <ryan@wonko.com>
3
+ #
4
4
  # Permission is hereby granted, free of charge, to any person obtaining a copy
5
5
  # of this software and associated documentation files (the 'Software'), to deal
6
6
  # in the Software without restriction, including without limitation the rights
7
7
  # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8
8
  # copies of the Software, and to permit persons to whom the Software is
9
9
  # furnished to do so, subject to the following conditions:
10
- #
10
+ #
11
11
  # The above copyright notice and this permission notice shall be included in all
12
12
  # copies or substantial portions of the Software.
13
- #
13
+ #
14
14
  # THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
15
  # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
16
  # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
@@ -25,13 +25,17 @@ $:.unshift(File.dirname(File.expand_path(__FILE__)))
25
25
  $:.uniq!
26
26
 
27
27
  require 'rubygems'
28
- gem 'hpricot', '~> 0.6'
28
+
29
+ gem 'hpricot', '~> 0.6'
30
+ gem 'htmlentities', '~> 4.0.0'
29
31
 
30
32
  require 'hpricot'
33
+ require 'htmlentities'
31
34
  require 'sanitize/config'
32
35
  require 'sanitize/config/restricted'
33
36
  require 'sanitize/config/basic'
34
37
  require 'sanitize/config/relaxed'
38
+ require 'sanitize/monkeypatch/hpricot'
35
39
 
36
40
  class Sanitize
37
41
  #--
@@ -100,8 +104,13 @@ class Sanitize
100
104
  protocol = @config[:protocols][name]
101
105
 
102
106
  node.raw_attributes.delete_if do |key, value|
103
- protocol.has_key?(key) && (!(value.downcase =~ /^([^:]+):/) ||
104
- !protocol[key].include?($1.downcase))
107
+ next false unless protocol.has_key?(key)
108
+
109
+ if value.downcase =~ /^([^:]+)(?:\:|&#0*58;|&#x0*3a;)/
110
+ !protocol[key].include?($1.downcase)
111
+ else
112
+ !protocol[key].include?(:relative)
113
+ end
105
114
  end
106
115
  end
107
116
  else
@@ -117,12 +126,15 @@ class Sanitize
117
126
  end
118
127
  end
119
128
 
120
- # Make one last pass through the fragment and replace angle brackets with
121
- # entities in all text nodes. This helps eliminate certain types of
122
- # maliciously-malformed nested tags.
129
+ # Make one last pass through the fragment and encode all special HTML chars
130
+ # and non-ASCII chars as entities. This eliminates certain types of
131
+ # maliciously-malformed nested tags and also compensates for Hpricot's
132
+ # burning desire to decode all entities.
133
+ coder = HTMLEntities.new
134
+
123
135
  fragment.traverse_element do |node|
124
136
  if node.text?
125
- node.swap(node.inner_text.gsub('<', '&lt;').gsub('>', '&gt;'))
137
+ node.swap(coder.encode(node.inner_text, :named))
126
138
  end
127
139
  end
128
140
 
@@ -1,5 +1,5 @@
1
1
  #--
2
- # Copyright (c) 2008 Ryan Grove <ryan@wonko.com>
2
+ # Copyright (c) 2009 Ryan Grove <ryan@wonko.com>
3
3
  #
4
4
  # Permission is hereby granted, free of charge, to any person obtaining a copy
5
5
  # of this software and associated documentation files (the 'Software'), to deal
@@ -41,7 +41,8 @@ class Sanitize
41
41
  :add_attributes => {},
42
42
 
43
43
  # URL handling protocols to allow in specific attributes. By default, no
44
- # protocols are allowed.
44
+ # protocols are allowed. Use :relative in place of a protocol if you want
45
+ # to allow relative URLs sans protocol.
45
46
  :protocols => {}
46
47
  }
47
48
  end
@@ -1,5 +1,5 @@
1
1
  #--
2
- # Copyright (c) 2008 Ryan Grove <ryan@wonko.com>
2
+ # Copyright (c) 2009 Ryan Grove <ryan@wonko.com>
3
3
  #
4
4
  # Permission is hereby granted, free of charge, to any person obtaining a copy
5
5
  # of this software and associated documentation files (the 'Software'), to deal
@@ -39,9 +39,10 @@ class Sanitize
39
39
  },
40
40
 
41
41
  :protocols => {
42
- 'a' => {'href' => ['ftp', 'http', 'https', 'mailto']},
43
- 'blockquote' => {'cite' => ['http', 'https']},
44
- 'q' => {'cite' => ['http', 'https']}
42
+ 'a' => {'href' => ['ftp', 'http', 'https', 'mailto',
43
+ :relative]},
44
+ 'blockquote' => {'cite' => ['http', 'https', :relative]},
45
+ 'q' => {'cite' => ['http', 'https', :relative]}
45
46
  }
46
47
  }
47
48
  end
@@ -1,5 +1,5 @@
1
1
  #--
2
- # Copyright (c) 2008 Ryan Grove <ryan@wonko.com>
2
+ # Copyright (c) 2009 Ryan Grove <ryan@wonko.com>
3
3
  #
4
4
  # Permission is hereby granted, free of charge, to any person obtaining a copy
5
5
  # of this software and associated documentation files (the 'Software'), to deal
@@ -45,10 +45,11 @@ class Sanitize
45
45
  },
46
46
 
47
47
  :protocols => {
48
- 'a' => {'href' => ['ftp', 'http', 'https', 'mailto']},
49
- 'blockquote' => {'cite' => ['http', 'https']},
50
- 'img' => {'src' => ['http', 'https']},
51
- 'q' => {'cite' => ['http', 'https']}
48
+ 'a' => {'href' => ['ftp', 'http', 'https', 'mailto',
49
+ :relative]},
50
+ 'blockquote' => {'cite' => ['http', 'https', :relative]},
51
+ 'img' => {'src' => ['http', 'https', :relative]},
52
+ 'q' => {'cite' => ['http', 'https', :relative]}
52
53
  }
53
54
  }
54
55
  end
@@ -1,5 +1,5 @@
1
1
  #--
2
- # Copyright (c) 2008 Ryan Grove <ryan@wonko.com>
2
+ # Copyright (c) 2009 Ryan Grove <ryan@wonko.com>
3
3
  #
4
4
  # Permission is hereby granted, free of charge, to any person obtaining a copy
5
5
  # of this software and associated documentation files (the 'Software'), to deal
@@ -0,0 +1,33 @@
1
+ #--
2
+ # Copyright (c) 2009 Ryan Grove <ryan@wonko.com>
3
+ #
4
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
5
+ # of this software and associated documentation files (the 'Software'), to deal
6
+ # in the Software without restriction, including without limitation the rights
7
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8
+ # copies of the Software, and to permit persons to whom the Software is
9
+ # furnished to do so, subject to the following conditions:
10
+ #
11
+ # The above copyright notice and this permission notice shall be included in all
12
+ # copies or substantial portions of the Software.
13
+ #
14
+ # THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20
+ # SOFTWARE.
21
+ #++
22
+
23
+ module Hpricot
24
+
25
+ # Monkeypatch to fix an Hpricot bug that causes HTML entities to be decoded
26
+ # incorrectly.
27
+ def self.uxs(str)
28
+ str.to_s.
29
+ gsub(/&(\w+);/) { [Hpricot::NamedCharacters[$1] || ??].pack("U*") }.
30
+ gsub(/\&\#(\d+);/) { [$1.to_i].pack("U*") }
31
+ end
32
+
33
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sanitize
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ryan Grove
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2008-12-24 00:00:00 -08:00
12
+ date: 2009-01-01 00:00:00 -08:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -22,6 +22,16 @@ dependencies:
22
22
  - !ruby/object:Gem::Version
23
23
  version: "0.6"
24
24
  version:
25
+ - !ruby/object:Gem::Dependency
26
+ name: htmlentities
27
+ type: :runtime
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: 4.0.0
34
+ version:
25
35
  description:
26
36
  email: ryan@wonko.com
27
37
  executables: []
@@ -33,12 +43,13 @@ extra_rdoc_files: []
33
43
  files:
34
44
  - HISTORY
35
45
  - LICENSE
36
- - README
46
+ - README.rdoc
37
47
  - lib/sanitize.rb
38
48
  - lib/sanitize/config.rb
39
49
  - lib/sanitize/config/basic.rb
40
50
  - lib/sanitize/config/relaxed.rb
41
51
  - lib/sanitize/config/restricted.rb
52
+ - lib/sanitize/monkeypatch/hpricot.rb
42
53
  has_rdoc: false
43
54
  homepage: http://github.com/rgrove/sanitize/
44
55
  post_install_message: