sanitize 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/HISTORY CHANGED
@@ -1,5 +1,13 @@
1
1
  Sanitize History
2
2
  ================================================================================
3
3
 
4
+ Version 1.0.1 (2009-01-01)
5
+ * You can now specify :relative in a protocol config array to allow attributes
6
+ containing relative URLs with no protocol. The Basic and Relaxed configs
7
+ have been updated to allow relative URLs.
8
+ * Added a workaround for an Hpricot bug that causes HTML entities for
9
+ non-ASCII characters to be replaced by question marks, and all other
10
+ entities to be destructively decoded.
11
+
4
12
  Version 1.0.0 (2008-12-25)
5
13
  * First release.
data/LICENSE CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2008 Ryan Grove <ryan@wonko.com>
1
+ Copyright (c) 2009 Ryan Grove <ryan@wonko.com>
2
2
 
3
3
  Permission is hereby granted, free of charge, to any person obtaining a copy of
4
4
  this software and associated documentation files (the 'Software'), to deal in
@@ -15,8 +15,8 @@ or maliciously-formed HTML. When in doubt, Sanitize always errs on the side of
15
15
  caution.
16
16
 
17
17
  *Author*:: Ryan Grove (mailto:ryan@wonko.com)
18
- *Version*:: 1.0.0 (2008-12-25)
19
- *Copyright*:: Copyright (c) 2008 Ryan Grove. All rights reserved.
18
+ *Version*:: 1.0.1 (2009-01-01)
19
+ *Copyright*:: Copyright (c) 2009 Ryan Grove. All rights reserved.
20
20
  *License*:: MIT License (http://opensource.org/licenses/mit-license.php)
21
21
  *Website*:: http://github.com/rgrove/sanitize
22
22
 
@@ -120,9 +120,16 @@ protocol at all), it will be removed.
120
120
  'img' => {'src' => ['http', 'https']}
121
121
  }
122
122
 
123
+ If you'd like to allow the use of relative URLs which don't have a protocol,
124
+ include the special value <code>:relative</code> in the protocol array:
125
+
126
+ :protocols => {
127
+ 'a' => {'href' => ['http', 'https', :relative]}
128
+ }
129
+
123
130
  == License
124
131
 
125
- Copyright (c) 2008 Ryan Grove <ryan@wonko.com>
132
+ Copyright (c) 2009 Ryan Grove <ryan@wonko.com>
126
133
 
127
134
  Permission is hereby granted, free of charge, to any person obtaining a copy of
128
135
  this software and associated documentation files (the 'Software'), to deal in
data/lib/sanitize.rb CHANGED
@@ -1,16 +1,16 @@
1
1
  #--
2
- # Copyright (c) 2008 Ryan Grove <ryan@wonko.com>
3
- #
2
+ # Copyright (c) 2009 Ryan Grove <ryan@wonko.com>
3
+ #
4
4
  # Permission is hereby granted, free of charge, to any person obtaining a copy
5
5
  # of this software and associated documentation files (the 'Software'), to deal
6
6
  # in the Software without restriction, including without limitation the rights
7
7
  # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8
8
  # copies of the Software, and to permit persons to whom the Software is
9
9
  # furnished to do so, subject to the following conditions:
10
- #
10
+ #
11
11
  # The above copyright notice and this permission notice shall be included in all
12
12
  # copies or substantial portions of the Software.
13
- #
13
+ #
14
14
  # THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
15
  # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
16
  # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
@@ -25,13 +25,17 @@ $:.unshift(File.dirname(File.expand_path(__FILE__)))
25
25
  $:.uniq!
26
26
 
27
27
  require 'rubygems'
28
- gem 'hpricot', '~> 0.6'
28
+
29
+ gem 'hpricot', '~> 0.6'
30
+ gem 'htmlentities', '~> 4.0.0'
29
31
 
30
32
  require 'hpricot'
33
+ require 'htmlentities'
31
34
  require 'sanitize/config'
32
35
  require 'sanitize/config/restricted'
33
36
  require 'sanitize/config/basic'
34
37
  require 'sanitize/config/relaxed'
38
+ require 'sanitize/monkeypatch/hpricot'
35
39
 
36
40
  class Sanitize
37
41
  #--
@@ -100,8 +104,13 @@ class Sanitize
100
104
  protocol = @config[:protocols][name]
101
105
 
102
106
  node.raw_attributes.delete_if do |key, value|
103
- protocol.has_key?(key) && (!(value.downcase =~ /^([^:]+):/) ||
104
- !protocol[key].include?($1.downcase))
107
+ next false unless protocol.has_key?(key)
108
+
109
+ if value.downcase =~ /^([^:]+)(?:\:|&#0*58;|&#x0*3a;)/
110
+ !protocol[key].include?($1.downcase)
111
+ else
112
+ !protocol[key].include?(:relative)
113
+ end
105
114
  end
106
115
  end
107
116
  else
@@ -117,12 +126,15 @@ class Sanitize
117
126
  end
118
127
  end
119
128
 
120
- # Make one last pass through the fragment and replace angle brackets with
121
- # entities in all text nodes. This helps eliminate certain types of
122
- # maliciously-malformed nested tags.
129
+ # Make one last pass through the fragment and encode all special HTML chars
130
+ # and non-ASCII chars as entities. This eliminates certain types of
131
+ # maliciously-malformed nested tags and also compensates for Hpricot's
132
+ # burning desire to decode all entities.
133
+ coder = HTMLEntities.new
134
+
123
135
  fragment.traverse_element do |node|
124
136
  if node.text?
125
- node.swap(node.inner_text.gsub('<', '&lt;').gsub('>', '&gt;'))
137
+ node.swap(coder.encode(node.inner_text, :named))
126
138
  end
127
139
  end
128
140
 
@@ -1,5 +1,5 @@
1
1
  #--
2
- # Copyright (c) 2008 Ryan Grove <ryan@wonko.com>
2
+ # Copyright (c) 2009 Ryan Grove <ryan@wonko.com>
3
3
  #
4
4
  # Permission is hereby granted, free of charge, to any person obtaining a copy
5
5
  # of this software and associated documentation files (the 'Software'), to deal
@@ -41,7 +41,8 @@ class Sanitize
41
41
  :add_attributes => {},
42
42
 
43
43
  # URL handling protocols to allow in specific attributes. By default, no
44
- # protocols are allowed.
44
+ # protocols are allowed. Use :relative in place of a protocol if you want
45
+ # to allow relative URLs sans protocol.
45
46
  :protocols => {}
46
47
  }
47
48
  end
@@ -1,5 +1,5 @@
1
1
  #--
2
- # Copyright (c) 2008 Ryan Grove <ryan@wonko.com>
2
+ # Copyright (c) 2009 Ryan Grove <ryan@wonko.com>
3
3
  #
4
4
  # Permission is hereby granted, free of charge, to any person obtaining a copy
5
5
  # of this software and associated documentation files (the 'Software'), to deal
@@ -39,9 +39,10 @@ class Sanitize
39
39
  },
40
40
 
41
41
  :protocols => {
42
- 'a' => {'href' => ['ftp', 'http', 'https', 'mailto']},
43
- 'blockquote' => {'cite' => ['http', 'https']},
44
- 'q' => {'cite' => ['http', 'https']}
42
+ 'a' => {'href' => ['ftp', 'http', 'https', 'mailto',
43
+ :relative]},
44
+ 'blockquote' => {'cite' => ['http', 'https', :relative]},
45
+ 'q' => {'cite' => ['http', 'https', :relative]}
45
46
  }
46
47
  }
47
48
  end
@@ -1,5 +1,5 @@
1
1
  #--
2
- # Copyright (c) 2008 Ryan Grove <ryan@wonko.com>
2
+ # Copyright (c) 2009 Ryan Grove <ryan@wonko.com>
3
3
  #
4
4
  # Permission is hereby granted, free of charge, to any person obtaining a copy
5
5
  # of this software and associated documentation files (the 'Software'), to deal
@@ -45,10 +45,11 @@ class Sanitize
45
45
  },
46
46
 
47
47
  :protocols => {
48
- 'a' => {'href' => ['ftp', 'http', 'https', 'mailto']},
49
- 'blockquote' => {'cite' => ['http', 'https']},
50
- 'img' => {'src' => ['http', 'https']},
51
- 'q' => {'cite' => ['http', 'https']}
48
+ 'a' => {'href' => ['ftp', 'http', 'https', 'mailto',
49
+ :relative]},
50
+ 'blockquote' => {'cite' => ['http', 'https', :relative]},
51
+ 'img' => {'src' => ['http', 'https', :relative]},
52
+ 'q' => {'cite' => ['http', 'https', :relative]}
52
53
  }
53
54
  }
54
55
  end
@@ -1,5 +1,5 @@
1
1
  #--
2
- # Copyright (c) 2008 Ryan Grove <ryan@wonko.com>
2
+ # Copyright (c) 2009 Ryan Grove <ryan@wonko.com>
3
3
  #
4
4
  # Permission is hereby granted, free of charge, to any person obtaining a copy
5
5
  # of this software and associated documentation files (the 'Software'), to deal
@@ -0,0 +1,33 @@
1
+ #--
2
+ # Copyright (c) 2009 Ryan Grove <ryan@wonko.com>
3
+ #
4
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
5
+ # of this software and associated documentation files (the 'Software'), to deal
6
+ # in the Software without restriction, including without limitation the rights
7
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8
+ # copies of the Software, and to permit persons to whom the Software is
9
+ # furnished to do so, subject to the following conditions:
10
+ #
11
+ # The above copyright notice and this permission notice shall be included in all
12
+ # copies or substantial portions of the Software.
13
+ #
14
+ # THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20
+ # SOFTWARE.
21
+ #++
22
+
23
+ module Hpricot
24
+
25
+ # Monkeypatch to fix an Hpricot bug that causes HTML entities to be decoded
26
+ # incorrectly.
27
+ def self.uxs(str)
28
+ str.to_s.
29
+ gsub(/&(\w+);/) { [Hpricot::NamedCharacters[$1] || ??].pack("U*") }.
30
+ gsub(/\&\#(\d+);/) { [$1.to_i].pack("U*") }
31
+ end
32
+
33
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sanitize
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ryan Grove
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2008-12-24 00:00:00 -08:00
12
+ date: 2009-01-01 00:00:00 -08:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -22,6 +22,16 @@ dependencies:
22
22
  - !ruby/object:Gem::Version
23
23
  version: "0.6"
24
24
  version:
25
+ - !ruby/object:Gem::Dependency
26
+ name: htmlentities
27
+ type: :runtime
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: 4.0.0
34
+ version:
25
35
  description:
26
36
  email: ryan@wonko.com
27
37
  executables: []
@@ -33,12 +43,13 @@ extra_rdoc_files: []
33
43
  files:
34
44
  - HISTORY
35
45
  - LICENSE
36
- - README
46
+ - README.rdoc
37
47
  - lib/sanitize.rb
38
48
  - lib/sanitize/config.rb
39
49
  - lib/sanitize/config/basic.rb
40
50
  - lib/sanitize/config/relaxed.rb
41
51
  - lib/sanitize/config/restricted.rb
52
+ - lib/sanitize/monkeypatch/hpricot.rb
42
53
  has_rdoc: false
43
54
  homepage: http://github.com/rgrove/sanitize/
44
55
  post_install_message: