rgrove-sanitize 1.0.8.3 → 1.0.8.4

Sign up to get free protection for your applications and to get access to all the features.
data/HISTORY CHANGED
@@ -1,8 +1,10 @@
1
1
  Sanitize History
2
2
  ================================================================================
3
3
 
4
- Version 1.0.8.2 (git)
4
+ Version 1.0.8.4 (git)
5
5
  * Migrated from Hpricot to Nokogiri. Requires libxml2 >= 2.7.2 [Adam Hooper]
6
+ * Added an :output config setting to allow the output format to be specified.
7
+ Supported formats are :xhtml (the default) and :html (which outputs HTML4).
6
8
  * Changed protocol regex to ensure Sanitize doesn't kill URLs with colons in
7
9
  path segments. [Peter Cooper]
8
10
 
@@ -1,3 +1,4 @@
1
+ # encoding: utf-8
1
2
  #--
2
3
  # Copyright (c) 2009 Ryan Grove <ryan@wonko.com>
3
4
  #
@@ -29,18 +30,6 @@ require 'sanitize/config/relaxed'
29
30
 
30
31
  class Sanitize
31
32
 
32
- # Characters that should be replaced with entities in text nodes.
33
- ENTITY_MAP = {
34
- '<' => '&lt;',
35
- '>' => '&gt;',
36
- '"' => '&quot;',
37
- "'" => '&#39;'
38
- }
39
-
40
- # Matches an unencoded ampersand that is not part of a valid character entity
41
- # reference.
42
- REGEX_AMPERSAND = /&(?!(?:[a-z]+[0-9]{0,2}|#[0-9]+|#x[0-9a-f]+);)/i
43
-
44
33
  # Matches an attribute value that could be treated by a browser as a URL
45
34
  # with a protocol prefix, such as "http:" or "javascript:". Any string of zero
46
35
  # or more characters followed by a colon is considered a match, even if the
@@ -124,7 +113,24 @@ class Sanitize
124
113
  end
125
114
  end
126
115
 
127
- result = fragment.to_xhtml(:encoding => 'UTF-8', :indent => 0).gsub(/>\n/, '>')
116
+ if @config[:output] == :xhtml
117
+ output_method = fragment.method(:to_xhtml)
118
+ elsif @config[:output] == :html
119
+ output_method = fragment.method(:to_html)
120
+ else
121
+ raise Error, "unsupported output format: #{@config[:output]}"
122
+ end
123
+
124
+ if RUBY_VERSION >= '1.9'
125
+ # Nokogiri 1.3.3 (and possibly earlier versions) always returns a US-ASCII
126
+ # string no matter what we ask for. This will be fixed in 1.4.0, but for
127
+ # now we have to hack around it to prevent errors.
128
+ result = output_method.call(:encoding => 'utf-8', :indent => 0).force_encoding('utf-8')
129
+ result.gsub!(">\n", '>')
130
+ else
131
+ result = output_method.call(:encoding => 'utf-8', :indent => 0).gsub(">\n", '>')
132
+ end
133
+
128
134
  return result == html ? nil : html[0, html.length] = result
129
135
  end
130
136
 
@@ -146,18 +152,6 @@ class Sanitize
146
152
  sanitize = Sanitize.new(config)
147
153
  sanitize.clean!(html)
148
154
  end
149
-
150
- # Encodes special HTML characters (<, >, ", ', and &) in _html_ as entity
151
- # references and returns the encoded string.
152
- def encode_html(html)
153
- str = html.dup
154
-
155
- # Encode special chars.
156
- ENTITY_MAP.each {|char, entity| str.gsub!(char, entity) }
157
-
158
- # Convert unencoded ampersands to entity references.
159
- str.gsub(REGEX_AMPERSAND, '&amp;')
160
- end
161
155
  end
162
156
 
163
157
  end
@@ -28,17 +28,21 @@ class Sanitize
28
28
  # comments.
29
29
  :allow_comments => false,
30
30
 
31
- # HTML elements to allow. By default, no elements are allowed (which means
32
- # that all HTML will be stripped).
33
- :elements => [],
31
+ # HTML attributes to add to specific elements. By default, no attributes
32
+ # are added.
33
+ :add_attributes => {},
34
34
 
35
35
  # HTML attributes to allow in specific elements. By default, no attributes
36
36
  # are allowed.
37
37
  :attributes => {},
38
38
 
39
- # HTML attributes to add to specific elements. By default, no attributes
40
- # are added.
41
- :add_attributes => {},
39
+ # HTML elements to allow. By default, no elements are allowed (which means
40
+ # that all HTML will be stripped).
41
+ :elements => [],
42
+
43
+ # Output format. Supported formats are :html and :xhtml (which is the
44
+ # default).
45
+ :output => :xhtml,
42
46
 
43
47
  # URL handling protocols to allow in specific attributes. By default, no
44
48
  # protocols are allowed. Use :relative in place of a protocol if you want
@@ -1,3 +1,3 @@
1
1
  class Sanitize
2
- VERSION = '1.0.8.3'
2
+ VERSION = '1.0.8.4'
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rgrove-sanitize
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.8.3
4
+ version: 1.0.8.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ryan Grove
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-09-04 00:00:00 -07:00
12
+ date: 2009-09-17 00:00:00 -07:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -62,6 +62,7 @@ files:
62
62
  - lib/sanitize.rb
63
63
  has_rdoc: false
64
64
  homepage: http://github.com/rgrove/sanitize/
65
+ licenses:
65
66
  post_install_message:
66
67
  rdoc_options: []
67
68
 
@@ -82,7 +83,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
82
83
  requirements: []
83
84
 
84
85
  rubyforge_project:
85
- rubygems_version: 1.2.0
86
+ rubygems_version: 1.3.5
86
87
  signing_key:
87
88
  specification_version: 3
88
89
  summary: Whitelist-based HTML sanitizer.